From f3eedaf45d1b6218b8c7c5346853c6ae64500b71 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Wed, 12 Jan 2022 18:11:40 -0600
Subject: [PATCH 001/513] [SPARK-37805][TESTS] Refactor
 `TestUtils#configTestLog4j` method to use log4j2 api
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
SPARK-37795 add a scalastyle rule to ban `org.apache.log4j` imports, but there is still one place to retain the imports of `org.apache.log4j` in `o.a.spark.TestUtils`.

This pr refactor `configTestLog4j` method in `o.a.spark.TestUtils` to use log4j2 api and let the log behavior using log4j2.x be the same as that using log4j1.x before. In fact, the `configTestLog4j` method behavior before this pr is invalid because `PropertyConfigurator.configure` method in `org.apache.logging.log4j:log4j-1.2-api` is an empty method as follows:

https://github.com/apache/logging-log4j2/blob/491a0b3787975b6fc95b6a8cb3da76dc7517c65f/log4j-1.2-api/src/main/java/org/apache/log4j/PropertyConfigurator.java#L39-L47

Another change of this pr is rename the method name from `configTestLog4j`  to `configTestLog4j2`.

### Why are the changes needed?
Clean up the `org.apache.log4j` imports left in Spark internal and let `configTestLog4j` method behavior keep consistent between log4j1.x and log4j2.x.

### Does this PR introduce _any_ user-facing change?
The  `configTestLog4j` method in `TestUtils` rename to `configTestLog4j2`

### How was this patch tested?

- Pass the Jenkins or GitHub Action
- Manual test

Run the test cases using `configTestLog4j` method in the following 3 scenarios:

1. without this pr to test log4j2.x
2. with this pr to test log4j2.x
3. run `git reset --hard 19227983e91a54b5f27ade6412dad1088dfcba9e` to test log4j1.x

For example `WholeStageCodegenSparkSubmitSuite`, run

```
mvn clean install -DskipTests -pl sql/core -am
mvn test -pl sql/core -Dtest=none -DwildcardSuites=org.apache.spark.sql.execution.WholeStageCodegenSparkSubmitSuite
```

Scenario 1 does not print any logs to the console, scenario 2 and scenario 3 will print similar logs to the console

Closes #35095 from LuciferYang/refactor-configTestLog4j.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../scala/org/apache/spark/TestUtils.scala    | 30 ++++++++++---------
 .../scala/org/apache/spark/DriverSuite.scala  |  2 +-
 .../spark/deploy/SparkSubmitSuite.scala       |  4 +--
 .../WholeStageCodegenSparkSubmitSuite.scala   |  2 +-
 .../spark/sql/hive/HiveSparkSubmitSuite.scala | 20 ++++++-------
 5 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
index 9bc6ccbd0df65..20159afc51a6c 100644
--- a/core/src/main/scala/org/apache/spark/TestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -24,7 +24,7 @@ import java.nio.file.{Files => JavaFiles, Paths}
 import java.nio.file.attribute.PosixFilePermission.{OWNER_EXECUTE, OWNER_READ, OWNER_WRITE}
 import java.security.SecureRandom
 import java.security.cert.X509Certificate
-import java.util.{Arrays, EnumSet, Locale, Properties}
+import java.util.{Arrays, EnumSet, Locale}
 import java.util.concurrent.{TimeoutException, TimeUnit}
 import java.util.jar.{JarEntry, JarOutputStream, Manifest}
 import java.util.regex.Pattern
@@ -41,9 +41,10 @@ import scala.util.Try
 
 import com.google.common.io.{ByteStreams, Files}
 import org.apache.commons.lang3.StringUtils
-// scalastyle:off
-import org.apache.log4j.PropertyConfigurator
-// scalastyle:on
+import org.apache.logging.log4j.LogManager
+import org.apache.logging.log4j.core.LoggerContext
+import org.apache.logging.log4j.core.appender.ConsoleAppender
+import org.apache.logging.log4j.core.config.builder.api.ConfigurationBuilderFactory
 import org.eclipse.jetty.server.Handler
 import org.eclipse.jetty.server.Server
 import org.eclipse.jetty.server.handler.DefaultHandler
@@ -418,17 +419,18 @@ private[spark] object TestUtils {
   }
 
   /**
-   * config a log4j properties used for testsuite
+   * config a log4j2 properties used for testsuite
    */
-  def configTestLog4j(level: String): Unit = {
-    val pro = new Properties()
-    pro.put("log4j.rootLogger", s"$level, console")
-    pro.put("log4j.appender.console", "org.apache.log4j.ConsoleAppender")
-    pro.put("log4j.appender.console.target", "System.err")
-    pro.put("log4j.appender.console.layout", "org.apache.log4j.PatternLayout")
-    pro.put("log4j.appender.console.layout.ConversionPattern",
-      "%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n")
-    PropertyConfigurator.configure(pro)
+  def configTestLog4j2(level: String): Unit = {
+    val builder = ConfigurationBuilderFactory.newConfigurationBuilder()
+    val appenderBuilder = builder.newAppender("console", "CONSOLE")
+      .addAttribute("target", ConsoleAppender.Target.SYSTEM_ERR)
+    appenderBuilder.add(builder.newLayout("PatternLayout")
+      .addAttribute("pattern", "%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n"))
+    builder.add(appenderBuilder)
+    builder.add(builder.newRootLogger(level).add(builder.newAppenderRef("console")))
+    val configuration = builder.build()
+    LogManager.getContext(false).asInstanceOf[LoggerContext].reconfigure(configuration)
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/DriverSuite.scala b/core/src/test/scala/org/apache/spark/DriverSuite.scala
index f58777584d0ae..124a138ccf10f 100644
--- a/core/src/test/scala/org/apache/spark/DriverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DriverSuite.scala
@@ -51,7 +51,7 @@ class DriverSuite extends SparkFunSuite with TimeLimits {
  */
 object DriverWithoutCleanup {
   def main(args: Array[String]): Unit = {
-    TestUtils.configTestLog4j("INFO")
+    TestUtils.configTestLog4j2("INFO")
     val conf = new SparkConf
     val sc = new SparkContext(args(0), "DriverWithoutCleanup", conf)
     sc.parallelize(1 to 100, 4).count()
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 19e4875512a65..aead72ea0fdb7 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -1520,7 +1520,7 @@ class SparkSubmitSuite
 
 object JarCreationTest extends Logging {
   def main(args: Array[String]): Unit = {
-    TestUtils.configTestLog4j("INFO")
+    TestUtils.configTestLog4j2("INFO")
     val conf = new SparkConf()
     val sc = new SparkContext(conf)
     val result = sc.makeRDD(1 to 100, 10).mapPartitions { x =>
@@ -1544,7 +1544,7 @@ object JarCreationTest extends Logging {
 
 object SimpleApplicationTest {
   def main(args: Array[String]): Unit = {
-    TestUtils.configTestLog4j("INFO")
+    TestUtils.configTestLog4j2("INFO")
     val conf = new SparkConf()
     val sc = new SparkContext(conf)
     val configs = Seq("spark.master", "spark.app.name")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala
index ffbdc3f64195f..5e0318d97ff94 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala
@@ -71,7 +71,7 @@ object WholeStageCodegenSparkSubmitSuite extends Assertions with Logging {
   var spark: SparkSession = _
 
   def main(args: Array[String]): Unit = {
-    TestUtils.configTestLog4j("INFO")
+    TestUtils.configTestLog4j2("INFO")
 
     spark = SparkSession.builder().getOrCreate()
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 90752e70e1b57..170cf4898f314 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -371,7 +371,7 @@ class HiveSparkSubmitSuite
 
 object SetMetastoreURLTest extends Logging {
   def main(args: Array[String]): Unit = {
-    TestUtils.configTestLog4j("INFO")
+    TestUtils.configTestLog4j2("INFO")
 
     val sparkConf = new SparkConf(loadDefaults = true)
     val builder = SparkSession.builder()
@@ -409,7 +409,7 @@ object SetMetastoreURLTest extends Logging {
 
 object SetWarehouseLocationTest extends Logging {
   def main(args: Array[String]): Unit = {
-    TestUtils.configTestLog4j("INFO")
+    TestUtils.configTestLog4j2("INFO")
 
     val sparkConf = new SparkConf(loadDefaults = true).set(UI_ENABLED, false)
     val providedExpectedWarehouseLocation =
@@ -489,7 +489,7 @@ object SetWarehouseLocationTest extends Logging {
 // can load the jar defined with the function.
 object TemporaryHiveUDFTest extends Logging {
   def main(args: Array[String]): Unit = {
-    TestUtils.configTestLog4j("INFO")
+    TestUtils.configTestLog4j2("INFO")
     val conf = new SparkConf()
     conf.set(UI_ENABLED, false)
     val sc = new SparkContext(conf)
@@ -527,7 +527,7 @@ object TemporaryHiveUDFTest extends Logging {
 // can load the jar defined with the function.
 object PermanentHiveUDFTest1 extends Logging {
   def main(args: Array[String]): Unit = {
-    TestUtils.configTestLog4j("INFO")
+    TestUtils.configTestLog4j2("INFO")
     val conf = new SparkConf()
     conf.set(UI_ENABLED, false)
     val sc = new SparkContext(conf)
@@ -565,7 +565,7 @@ object PermanentHiveUDFTest1 extends Logging {
 // can load the jar defined with the function.
 object PermanentHiveUDFTest2 extends Logging {
   def main(args: Array[String]): Unit = {
-    TestUtils.configTestLog4j("INFO")
+    TestUtils.configTestLog4j2("INFO")
     val conf = new SparkConf()
     conf.set(UI_ENABLED, false)
     val sc = new SparkContext(conf)
@@ -600,7 +600,7 @@ object PermanentHiveUDFTest2 extends Logging {
 // We test if we can load user jars in both driver and executors when HiveContext is used.
 object SparkSubmitClassLoaderTest extends Logging {
   def main(args: Array[String]): Unit = {
-    TestUtils.configTestLog4j("INFO")
+    TestUtils.configTestLog4j2("INFO")
     val conf = new SparkConf()
     val hiveWarehouseLocation = Utils.createTempDir()
     conf.set(UI_ENABLED, false)
@@ -670,7 +670,7 @@ object SparkSubmitClassLoaderTest extends Logging {
 // We test if we can correctly set spark sql configurations when HiveContext is used.
 object SparkSQLConfTest extends Logging {
   def main(args: Array[String]): Unit = {
-    TestUtils.configTestLog4j("INFO")
+    TestUtils.configTestLog4j2("INFO")
     // We override the SparkConf to add spark.sql.hive.metastore.version and
     // spark.sql.hive.metastore.jars to the beginning of the conf entry array.
     // So, if metadataHive get initialized after we set spark.sql.hive.metastore.version but
@@ -711,7 +711,7 @@ object SPARK_9757 extends QueryTest {
   protected var spark: SparkSession = _
 
   def main(args: Array[String]): Unit = {
-    TestUtils.configTestLog4j("INFO")
+    TestUtils.configTestLog4j2("INFO")
 
     val hiveWarehouseLocation = Utils.createTempDir()
     val sparkContext = new SparkContext(
@@ -760,7 +760,7 @@ object SPARK_11009 extends QueryTest {
   protected var spark: SparkSession = _
 
   def main(args: Array[String]): Unit = {
-    TestUtils.configTestLog4j("INFO")
+    TestUtils.configTestLog4j2("INFO")
 
     val sparkContext = new SparkContext(
       new SparkConf()
@@ -791,7 +791,7 @@ object SPARK_14244 extends QueryTest {
   protected var spark: SparkSession = _
 
   def main(args: Array[String]): Unit = {
-    TestUtils.configTestLog4j("INFO")
+    TestUtils.configTestLog4j2("INFO")
 
     val sparkContext = new SparkContext(
       new SparkConf()

From e7cc032c3e99f632ca87e5d60ef10e2e06df26e1 Mon Sep 17 00:00:00 2001
From: PengLei <peng.8lei@gmail.com>
Date: Thu, 13 Jan 2022 11:09:10 +0800
Subject: [PATCH 002/513] [SPARK-37381][SQL] Unify v1 and v2 SHOW CREATE TABLE
 tests

### What changes were proposed in this pull request?
unify test case:

1. Move the testcase from `DDLParserSuite.scala` to `ShowCreateTableParserSuite.scala`.
2. Move the testcase from `DataSourceV2SQLSuite` to `v2.ShowCreateTableSuite`.
    If the test case also work with V1/Hive, then move to ShowCreateTableSuiteBase
3. Move the testcase from  `sql/ShowCreateTableSuite.scala` to `v1.ShowCreateTableSuite`
    If the test case also work with Hive, then move to `v1.ShowCreateTableSuiteBase`
    If the test case also work with V2/Hive, then move to `ShowCreateTableSuiteBase`
4. Move the testcase from `HiveShowCreateTableSuite` to `hive.ShowCreateTableSuite`
    If the test case also work with V1/V2, then move to `ShowCreateTableSuiteBase`
    If the test case also work with V1, then move to `v1.ShowCreateTableSuiteBase`
5. Use `getShowCreateDDL` instead of `checkCreateTable` to check the result.

fix diff behavior:

1. Add one space after `create table xxx` for the command `SHOW CREATE TABLE AS SERDE` to unify the output.
2. Add one space after `OPTIONS` for v2 command `SHOW CREATE TABLE` to unify the output.

The changes follow the approach of [#30287](https://github.com/apache/spark/pull/30287) [#34305](https://github.com/apache/spark/pull/34305)

### Why are the changes needed?

1. The unification will allow to run common `SHOW CREATE TABLE` tests for both DSv1/Hive DSv1 and DSv2
2. We can detect missing features and differences between DSv1 and DSv2 implementations.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
$  build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *ShowCreateTableSuite"

Closes #34719 from Peng-Lei/SPARK-37381.

Authored-by: PengLei <peng.8lei@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/parser/DDLParserSuite.scala  |  13 -
 .../spark/sql/execution/command/tables.scala  |   2 +-
 .../datasources/v2/ShowCreateTableExec.scala  |   2 +-
 .../results/show-create-table.sql.out         |   6 +-
 .../spark/sql/ShowCreateTableSuite.scala      | 267 ------------------
 .../sql/connector/DataSourceV2SQLSuite.scala  | 107 -------
 .../sql/execution/SQLViewTestSuite.scala      |  64 ++++-
 .../command/ShowCreateTableParserSuite.scala  |  37 +++
 .../command/ShowCreateTableSuiteBase.scala    | 194 +++++++++++++
 .../command/v1/ShowCreateTableSuite.scala     | 141 +++++++++
 .../command/v2/ShowCreateTableSuite.scala     | 139 +++++++++
 .../sql/hive/execution/HiveSQLViewSuite.scala |  41 ++-
 .../command/ShowCreateTableSuite.scala}       | 249 ++++++----------
 13 files changed, 708 insertions(+), 554 deletions(-)
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowCreateTableParserSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowCreateTableSuiteBase.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
 rename sql/hive/src/test/scala/org/apache/spark/sql/hive/{HiveShowCreateTableSuite.scala => execution/command/ShowCreateTableSuite.scala} (63%)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 04309297bb4fc..56fdffdd82bc9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1839,19 +1839,6 @@ class DDLParserSuite extends AnalysisTest {
         Some(Map("ds" -> "2017-06-10"))))
   }
 
-  test("SHOW CREATE table") {
-    comparePlans(
-      parsePlan("SHOW CREATE TABLE a.b.c"),
-      ShowCreateTable(
-        UnresolvedTableOrView(Seq("a", "b", "c"), "SHOW CREATE TABLE", allowTempView = false)))
-
-    comparePlans(
-      parsePlan("SHOW CREATE TABLE a.b.c AS SERDE"),
-      ShowCreateTable(
-        UnresolvedTableOrView(Seq("a", "b", "c"), "SHOW CREATE TABLE", allowTempView = false),
-        asSerde = true))
-  }
-
   test("CACHE TABLE") {
     comparePlans(
       parsePlan("CACHE TABLE a.b.c"),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 761a0d508e877..b989224d4e0f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -1247,7 +1247,7 @@ case class ShowCreateTableAsSerdeCommand(
           s"Unknown table type is found at showCreateHiveTable: $t")
     }
 
-    builder ++= s"CREATE$tableTypeString ${table.quotedString}"
+    builder ++= s"CREATE$tableTypeString ${table.quotedString} "
 
     if (metadata.tableType == VIEW) {
       showCreateView(metadata, builder)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
index 5eaa16961886b..f21b9a5095a3b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
@@ -74,7 +74,7 @@ case class ShowCreateTableExec(
       val props = tableOptions.toSeq.sortBy(_._1).map { case (key, value) =>
         s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
       }
-      builder ++= "OPTIONS"
+      builder ++= "OPTIONS "
       builder ++= concatByMultiLines(props)
     }
   }
diff --git a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
index e7399e45c3579..49c27a2229c5b 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
@@ -295,7 +295,7 @@ SHOW CREATE TABLE view_SPARK_30302 AS SERDE
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE VIEW `default`.`view_SPARK_30302`(
+CREATE VIEW `default`.`view_SPARK_30302` (
   `aaa`,
   `bbb`)
 AS SELECT a, b FROM tbl
@@ -335,7 +335,7 @@ SHOW CREATE TABLE view_SPARK_30302 AS SERDE
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE VIEW `default`.`view_SPARK_30302`(
+CREATE VIEW `default`.`view_SPARK_30302` (
   `aaa` COMMENT 'comment with \'quoted text\' for aaa',
   `bbb`)
 COMMENT 'This is a comment with \'quoted text\' for view'
@@ -377,7 +377,7 @@ SHOW CREATE TABLE view_SPARK_30302 AS SERDE
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE VIEW `default`.`view_SPARK_30302`(
+CREATE VIEW `default`.`view_SPARK_30302` (
   `aaa`,
   `bbb`)
 TBLPROPERTIES (
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
deleted file mode 100644
index 13983120955fb..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.CatalogTable
-import org.apache.spark.sql.sources.SimpleInsertSource
-import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
-import org.apache.spark.util.Utils
-
-class SimpleShowCreateTableSuite extends ShowCreateTableSuite with SharedSparkSession
-
-abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils {
-  import testImplicits._
-
-  test("data source table with user specified schema") {
-    withTable("ddl_test") {
-      val jsonFilePath = Utils.getSparkClassLoader.getResource("sample.json").getFile
-
-      sql(
-        s"""CREATE TABLE ddl_test (
-           |  a STRING,
-           |  b STRING,
-           |  `extra col` ARRAY<INT>,
-           |  `<another>` STRUCT<x: INT, y: ARRAY<BOOLEAN>>
-           |)
-           |USING json
-           |OPTIONS (
-           | PATH '$jsonFilePath'
-           |)
-         """.stripMargin
-      )
-
-      checkCreateTable("ddl_test")
-    }
-  }
-
-  test("data source table CTAS") {
-    withTable("ddl_test") {
-      sql(
-        s"""CREATE TABLE ddl_test
-           |USING json
-           |AS SELECT 1 AS a, "foo" AS b
-         """.stripMargin
-      )
-
-      checkCreateTable("ddl_test")
-    }
-  }
-
-  test("partitioned data source table") {
-    withTable("ddl_test") {
-      sql(
-        s"""CREATE TABLE ddl_test
-           |USING json
-           |PARTITIONED BY (b)
-           |AS SELECT 1 AS a, "foo" AS b
-         """.stripMargin
-      )
-
-      checkCreateTable("ddl_test")
-    }
-  }
-
-  test("bucketed data source table") {
-    withTable("ddl_test") {
-      sql(
-        s"""CREATE TABLE ddl_test
-           |USING json
-           |CLUSTERED BY (a) SORTED BY (b) INTO 2 BUCKETS
-           |AS SELECT 1 AS a, "foo" AS b
-         """.stripMargin
-      )
-
-      checkCreateTable("ddl_test")
-    }
-  }
-
-  test("partitioned bucketed data source table") {
-    withTable("ddl_test") {
-      sql(
-        s"""CREATE TABLE ddl_test
-           |USING json
-           |PARTITIONED BY (c)
-           |CLUSTERED BY (a) SORTED BY (b) INTO 2 BUCKETS
-           |AS SELECT 1 AS a, "foo" AS b, 2.5 AS c
-         """.stripMargin
-      )
-
-      checkCreateTable("ddl_test")
-    }
-  }
-
-  test("data source table with a comment") {
-    withTable("ddl_test") {
-      sql(
-        s"""CREATE TABLE ddl_test
-           |USING json
-           |COMMENT 'This is a comment'
-           |AS SELECT 1 AS a, "foo" AS b, 2.5 AS c
-         """.stripMargin
-      )
-
-      checkCreateTable("ddl_test")
-    }
-  }
-
-  test("data source table with table properties") {
-    withTable("ddl_test") {
-      sql(
-        s"""CREATE TABLE ddl_test
-           |USING json
-           |TBLPROPERTIES ('a' = '1')
-           |AS SELECT 1 AS a, "foo" AS b, 2.5 AS c
-         """.stripMargin
-      )
-
-      checkCreateTable("ddl_test")
-    }
-  }
-
-  test("data source table using Dataset API") {
-    withTable("ddl_test") {
-      spark
-        .range(3)
-        .select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd, 'id as 'e)
-        .write
-        .mode("overwrite")
-        .partitionBy("a", "b")
-        .bucketBy(2, "c", "d")
-        .saveAsTable("ddl_test")
-
-      checkCreateTable("ddl_test")
-    }
-  }
-
-  test("temp view") {
-    val viewName = "spark_28383"
-    withTempView(viewName) {
-      sql(s"CREATE TEMPORARY VIEW $viewName AS SELECT 1 AS a")
-      val ex = intercept[AnalysisException] {
-        sql(s"SHOW CREATE TABLE $viewName")
-      }
-      assert(ex.getMessage.contains(
-        s"$viewName is a temp view. 'SHOW CREATE TABLE' expects a table or permanent view."))
-    }
-
-    withGlobalTempView(viewName) {
-      sql(s"CREATE GLOBAL TEMPORARY VIEW $viewName AS SELECT 1 AS a")
-      val globalTempViewDb = spark.sessionState.catalog.globalTempViewManager.database
-      val ex = intercept[AnalysisException] {
-        sql(s"SHOW CREATE TABLE $globalTempViewDb.$viewName")
-      }
-      assert(ex.getMessage.contains(
-        s"$globalTempViewDb.$viewName is a temp view. " +
-          "'SHOW CREATE TABLE' expects a table or permanent view."))
-    }
-  }
-
-  test("SPARK-24911: keep quotes for nested fields") {
-    withTable("t1") {
-      val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>)"
-      sql(s"$createTable USING json")
-      val shownDDL = getShowDDL("SHOW CREATE TABLE t1")
-      assert(shownDDL == "CREATE TABLE `default`.`t1` ( `a` STRUCT<`b`: STRING>) USING json")
-
-      checkCreateTable("t1")
-    }
-  }
-
-  test("SPARK-36012: Add NULL flag when SHOW CREATE TABLE") {
-    val t = "SPARK_36012"
-    withTable(t) {
-      sql(
-        s"""
-           |CREATE TABLE $t (
-           |  a bigint NOT NULL,
-           |  b bigint
-           |)
-           |USING ${classOf[SimpleInsertSource].getName}
-        """.stripMargin)
-      val showDDL = getShowDDL(s"SHOW CREATE TABLE $t")
-      assert(showDDL == s"CREATE TABLE `default`.`$t` ( `a` BIGINT NOT NULL," +
-        s" `b` BIGINT) USING ${classOf[SimpleInsertSource].getName}")
-    }
-  }
-
-  test("SPARK-37494: Unify v1 and v2 option output") {
-    withTable("ddl_test") {
-      sql(
-        s"""CREATE TABLE ddl_test (
-           |  a STRING
-           |)
-           |USING json
-           |TBLPROPERTIES (
-           | 'b' = '1',
-           | 'a' = '2')
-           |OPTIONS (
-           | k4 'v4',
-           | `k3` 'v3',
-           | 'k5' 'v5',
-           | 'k1' = 'v1',
-           | k2 = 'v2'
-           |)
-         """.stripMargin
-      )
-      val expected = "CREATE TABLE `default`.`ddl_test` ( `a` STRING) USING json" +
-        " OPTIONS ( 'k1' = 'v1', 'k2' = 'v2', 'k3' = 'v3', 'k4' = 'v4', 'k5' = 'v5')" +
-        " TBLPROPERTIES ( 'a' = '2', 'b' = '1')"
-      assert(getShowDDL("SHOW CREATE TABLE ddl_test") == expected)
-    }
-  }
-
-  protected def getShowDDL(showCreateTableSql: String): String = {
-    sql(showCreateTableSql).head().getString(0).split("\n").map(_.trim).mkString(" ")
-  }
-
-  protected def checkCreateTable(table: String, serde: Boolean = false): Unit = {
-    checkCreateTableOrView(TableIdentifier(table, Some("default")), "TABLE", serde)
-  }
-
-  protected def checkCreateView(table: String, serde: Boolean = false): Unit = {
-    checkCreateTableOrView(TableIdentifier(table, Some("default")), "VIEW", serde)
-  }
-
-  protected def checkCreateTableOrView(
-      table: TableIdentifier,
-      checkType: String,
-      serde: Boolean): Unit = {
-    val db = table.database.getOrElse("default")
-    val expected = spark.sharedState.externalCatalog.getTable(db, table.table)
-    val shownDDL = if (serde) {
-      sql(s"SHOW CREATE TABLE ${table.quotedString} AS SERDE").head().getString(0)
-    } else {
-      sql(s"SHOW CREATE TABLE ${table.quotedString}").head().getString(0)
-    }
-
-    sql(s"DROP $checkType ${table.quotedString}")
-
-    try {
-      sql(shownDDL)
-      val actual = spark.sharedState.externalCatalog.getTable(db, table.table)
-      checkCatalogTables(expected, actual)
-    } finally {
-      sql(s"DROP $checkType IF EXISTS ${table.table}")
-    }
-  }
-
-  protected def checkCatalogTables(expected: CatalogTable, actual: CatalogTable): Unit = {
-    assert(CatalogTable.normalize(actual) == CatalogTable.normalize(expected))
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 3667a10f132ad..2ed7f6163be79 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -1854,109 +1854,6 @@ class DataSourceV2SQLSuite
     }
   }
 
-  test("SPARK-33898: SHOW CREATE TABLE AS SERDE") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
-      val e = intercept[AnalysisException] {
-        sql(s"SHOW CREATE TABLE $t AS SERDE")
-      }
-      assert(e.message.contains(s"SHOW CREATE TABLE AS SERDE is not supported for v2 tables."))
-    }
-  }
-
-  test("SPARK-33898: SHOW CREATE TABLE") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      sql(
-        s"""
-           |CREATE TABLE $t (
-           |  a bigint NOT NULL,
-           |  b bigint,
-           |  c bigint,
-           |  `extra col` ARRAY<INT>,
-           |  `<another>` STRUCT<x: INT, y: ARRAY<BOOLEAN>>
-           |)
-           |USING foo
-           |OPTIONS (
-           |  from = 0,
-           |  to = 1,
-           |  via = 2)
-           |COMMENT 'This is a comment'
-           |TBLPROPERTIES ('prop1' = '1', 'prop2' = '2', 'prop3' = 3, 'prop4' = 4)
-           |PARTITIONED BY (a)
-           |LOCATION 'file:/tmp'
-        """.stripMargin)
-      val showDDL = getShowCreateDDL(s"SHOW CREATE TABLE $t")
-      assert(showDDL === Array(
-        "CREATE TABLE testcat.ns1.ns2.tbl (",
-        "`a` BIGINT NOT NULL,",
-        "`b` BIGINT,",
-        "`c` BIGINT,",
-        "`extra col` ARRAY<INT>,",
-        "`<another>` STRUCT<`x`: INT, `y`: ARRAY<BOOLEAN>>)",
-        "USING foo",
-        "OPTIONS(",
-        "'from' = '0',",
-        "'to' = '1',",
-        "'via' = '2')",
-        "PARTITIONED BY (a)",
-        "COMMENT 'This is a comment'",
-        "LOCATION 'file:/tmp'",
-        "TBLPROPERTIES (",
-        "'prop1' = '1',",
-        "'prop2' = '2',",
-        "'prop3' = '3',",
-        "'prop4' = '4')"
-      ))
-    }
-  }
-
-  test("SPARK-33898: SHOW CREATE TABLE WITH AS SELECT") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      sql(
-        s"""
-           |CREATE TABLE $t
-           |USING foo
-           |AS SELECT 1 AS a, "foo" AS b
-         """.stripMargin)
-      val showDDL = getShowCreateDDL(s"SHOW CREATE TABLE $t")
-      assert(showDDL === Array(
-        "CREATE TABLE testcat.ns1.ns2.tbl (",
-        "`a` INT,",
-        "`b` STRING)",
-        "USING foo"
-      ))
-    }
-  }
-
-  test("SPARK-33898: SHOW CREATE TABLE PARTITIONED BY Transforms") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      sql(
-        s"""
-           |CREATE TABLE $t (a INT, b STRING, ts TIMESTAMP) USING foo
-           |PARTITIONED BY (
-           |    a,
-           |    bucket(16, b),
-           |    years(ts),
-           |    months(ts),
-           |    days(ts),
-           |    hours(ts))
-         """.stripMargin)
-      val showDDL = getShowCreateDDL(s"SHOW CREATE TABLE $t")
-      assert(showDDL === Array(
-        "CREATE TABLE testcat.ns1.ns2.tbl (",
-        "`a` INT,",
-        "`b` STRING,",
-        "`ts` TIMESTAMP)",
-        "USING foo",
-        "PARTITIONED BY (a, bucket(16, b), years(ts), months(ts), days(ts), hours(ts))"
-      ))
-    }
-  }
-
   test("CACHE/UNCACHE TABLE") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
@@ -2901,10 +2798,6 @@ class DataSourceV2SQLSuite
     assert(ex.getErrorClass == expectedErrorClass)
     assert(ex.messageParameters.sameElements(expectedErrorMessageParameters))
   }
-
-  private def getShowCreateDDL(showCreateTableSql: String): Array[String] = {
-    sql(showCreateTableSql).head().getString(0).split("\n").map(_.trim)
-  }
 }
 
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
index 730299c2f2c9b..433264951acef 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -45,10 +45,12 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
       viewName: String,
       sqlText: String,
       columnNames: Seq[String] = Seq.empty,
+      others: Seq[String] = Seq.empty,
       replace: Boolean = false): String = {
     val replaceString = if (replace) "OR REPLACE" else ""
     val columnString = if (columnNames.nonEmpty) columnNames.mkString("(", ",", ")") else ""
-    sql(s"CREATE $replaceString $viewTypeString $viewName $columnString AS $sqlText")
+    val othersString = if (others.nonEmpty) others.mkString(" ") else ""
+    sql(s"CREATE $replaceString $viewTypeString $viewName $columnString $othersString AS $sqlText")
     formattedViewName(viewName)
   }
 
@@ -421,6 +423,18 @@ abstract class TempViewTestSuite extends SQLViewTestSuite {
       }
     }
   }
+
+  test("show create table does not support temp view") {
+    val viewName = "spark_28383"
+    withView(viewName) {
+      createView(viewName, "SELECT 1 AS a")
+      val ex = intercept[AnalysisException] {
+        sql(s"SHOW CREATE TABLE ${formattedViewName(viewName)}")
+      }
+      assert(ex.getMessage.contains(
+        s"$viewName is a temp view. 'SHOW CREATE TABLE' expects a table or permanent view."))
+    }
+  }
 }
 
 class LocalTempViewTestSuite extends TempViewTestSuite with SharedSparkSession {
@@ -591,4 +605,52 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
         s" The view ${table.qualifiedName} may have been tampered with"))
     }
   }
+
+  test("show create table for persisted simple view") {
+    val viewName = "v1"
+    Seq(true, false).foreach { serde =>
+      withView(viewName) {
+        createView(viewName, "SELECT 1 AS a")
+        val expected = "CREATE VIEW `default`.`v1` ( `a`) AS SELECT 1 AS a"
+        assert(getShowCreateDDL(formattedViewName(viewName), serde) == expected)
+      }
+    }
+  }
+
+  test("show create table for persisted view with output columns") {
+    val viewName = "v1"
+    Seq(true, false).foreach { serde =>
+      withView(viewName) {
+        createView(viewName, "SELECT 1 AS a, 2 AS b", Seq("a", "b COMMENT 'b column'"))
+        val expected = "CREATE VIEW `default`.`v1` ( `a`, `b` COMMENT 'b column')" +
+          " AS SELECT 1 AS a, 2 AS b"
+        assert(getShowCreateDDL(formattedViewName(viewName), serde) == expected)
+      }
+    }
+  }
+
+  test("show create table for persisted simple view with table comment and properties") {
+    val viewName = "v1"
+    Seq(true, false).foreach { serde =>
+      withView(viewName) {
+        createView(viewName, "SELECT 1 AS c1, '2' AS c2", Seq("c1 COMMENT 'bla'", "c2"),
+          Seq("COMMENT 'table comment'", "TBLPROPERTIES ( 'prop1' = 'value1', 'prop2' = 'value2')"))
+
+        val expected = "CREATE VIEW `default`.`v1` ( `c1` COMMENT 'bla', `c2`)" +
+          " COMMENT 'table comment'" +
+          " TBLPROPERTIES ( 'prop1' = 'value1', 'prop2' = 'value2')" +
+          " AS SELECT 1 AS c1, '2' AS c2"
+        assert(getShowCreateDDL(formattedViewName(viewName), serde) == expected)
+      }
+    }
+  }
+
+  def getShowCreateDDL(view: String, serde: Boolean = false): String = {
+    val result = if (serde) {
+      sql(s"SHOW CREATE TABLE $view AS SERDE")
+    } else {
+      sql(s"SHOW CREATE TABLE $view")
+    }
+    result.head().getString(0).split("\n").map(_.trim).mkString(" ")
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowCreateTableParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowCreateTableParserSuite.scala
new file mode 100644
index 0000000000000..ab7c6e4dec568
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowCreateTableParserSuite.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedTableOrView}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.plans.logical.ShowCreateTable
+
+class ShowCreateTableParserSuite extends AnalysisTest {
+  test("show create table") {
+    comparePlans(
+      parsePlan("SHOW CREATE TABLE a.b.c"),
+      ShowCreateTable(
+        UnresolvedTableOrView(Seq("a", "b", "c"), "SHOW CREATE TABLE", allowTempView = false)))
+
+    comparePlans(
+      parsePlan("SHOW CREATE TABLE a.b.c AS SERDE"),
+      ShowCreateTable(
+        UnresolvedTableOrView(Seq("a", "b", "c"), "SHOW CREATE TABLE", allowTempView = false),
+        asSerde = true))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowCreateTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowCreateTableSuiteBase.scala
new file mode 100644
index 0000000000000..53cdec0d2b6c0
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowCreateTableSuiteBase.scala
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.sources.SimpleInsertSource
+import org.apache.spark.util.Utils
+
+/**
+ * This base suite contains unified tests for the `SHOW CREATE TABLE` command that check V1 and V2
+ * table catalogs. The tests that cannot run for all supported catalogs are located in more
+ * specific test suites:
+ *
+ *   - V2 table catalog tests: `org.apache.spark.sql.execution.command.v2.ShowCreateTableSuite`
+ *   - V1 table catalog tests:
+ *     `org.apache.spark.sql.execution.command.v1.ShowCreateTableSuiteBase`
+ *     - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.ShowCreateTableSuite`
+ *     - V1 Hive External catalog:
+*        `org.apache.spark.sql.hive.execution.command.ShowCreateTableSuite`
+ */
+trait ShowCreateTableSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command = "SHOW CREATE TABLE"
+  protected def ns: String = "ns1"
+  protected def table: String = "tbl"
+  protected def fullName: String
+
+  test("SPARK-36012: add null flag when show create table") {
+    withNamespaceAndTable(ns, table) { t =>
+      sql(
+        s"""
+           |CREATE TABLE $t (
+           |  a bigint NOT NULL,
+           |  b bigint
+           |)
+           |USING ${classOf[SimpleInsertSource].getName}
+        """.stripMargin)
+      val showDDL = getShowCreateDDL(t)
+      assert(showDDL(0) == s"CREATE TABLE $fullName (")
+      assert(showDDL(1) == "`a` BIGINT NOT NULL,")
+      assert(showDDL(2) == "`b` BIGINT)")
+      assert(showDDL(3) == s"USING ${classOf[SimpleInsertSource].getName}")
+    }
+  }
+
+  test("data source table with user specified schema") {
+    withNamespaceAndTable(ns, table) { t =>
+      val jsonFilePath = Utils.getSparkClassLoader.getResource("sample.json").getFile
+      sql(
+        s"""CREATE TABLE $t (
+           |  a STRING,
+           |  b STRING,
+           |  `extra col` ARRAY<INT>,
+           |  `<another>` STRUCT<x: INT, y: ARRAY<BOOLEAN>>
+           |)
+           |USING json
+           |OPTIONS (
+           | PATH '$jsonFilePath'
+           |)
+         """.stripMargin
+      )
+      val showDDL = getShowCreateDDL(t)
+      assert(showDDL(0) == s"CREATE TABLE $fullName (")
+      assert(showDDL(1) == "`a` STRING,")
+      assert(showDDL(2) == "`b` STRING,")
+      assert(showDDL(3) == "`extra col` ARRAY<INT>,")
+      assert(showDDL(4) == "`<another>` STRUCT<`x`: INT, `y`: ARRAY<BOOLEAN>>)")
+      assert(showDDL(5) == "USING json")
+      assert(showDDL(6).startsWith("LOCATION 'file:") && showDDL(6).endsWith("sample.json'"))
+    }
+  }
+
+  test("SPARK-24911: keep quotes for nested fields") {
+    withNamespaceAndTable(ns, table) { t =>
+      sql(
+        s"""
+           |CREATE TABLE $t (
+           |  `a` STRUCT<`b`: STRING>
+           |)
+           |USING json
+        """.stripMargin)
+      val showDDL = getShowCreateDDL(t)
+      assert(showDDL(0) == s"CREATE TABLE $fullName (")
+      assert(showDDL(1) == "`a` STRUCT<`b`: STRING>)")
+      assert(showDDL(2) == "USING json")
+    }
+  }
+
+  test("SPARK-37494: Unify v1 and v2 option output") {
+    withNamespaceAndTable(ns, table) { t =>
+      sql(
+        s"""CREATE TABLE $t (
+           |  a STRING
+           |)
+           |USING json
+           |TBLPROPERTIES (
+           | 'b' = '1',
+           | 'a' = '2')
+           |OPTIONS (
+           | k4 'v4',
+           | `k3` 'v3',
+           | 'k5' 'v5',
+           | 'k1' = 'v1',
+           | k2 = 'v2'
+           |)
+         """.stripMargin
+      )
+      val expected = s"CREATE TABLE $fullName ( `a` STRING) USING json" +
+        " OPTIONS ( 'k1' = 'v1', 'k2' = 'v2', 'k3' = 'v3', 'k4' = 'v4', 'k5' = 'v5')" +
+        " TBLPROPERTIES ( 'a' = '2', 'b' = '1')"
+      assert(getShowCreateDDL(t).mkString(" ") == expected)
+    }
+  }
+
+  test("data source table CTAS") {
+    withNamespaceAndTable(ns, table) { t =>
+      sql(
+        s"""CREATE TABLE $t
+           |USING json
+           |AS SELECT 1 AS a, "foo" AS b
+         """.stripMargin
+      )
+      val expected = s"CREATE TABLE $fullName ( `a` INT, `b` STRING) USING json"
+      assert(getShowCreateDDL(t).mkString(" ") == expected)
+    }
+  }
+
+  test("partitioned data source table") {
+    withNamespaceAndTable(ns, table) { t =>
+      sql(
+        s"""CREATE TABLE $t
+           |USING json
+           |PARTITIONED BY (b)
+           |AS SELECT 1 AS a, "foo" AS b
+         """.stripMargin
+      )
+      val expected = s"CREATE TABLE $fullName ( `a` INT, `b` STRING) USING json PARTITIONED BY (b)"
+      assert(getShowCreateDDL(t).mkString(" ") == expected)
+    }
+  }
+
+  test("data source table with a comment") {
+    withNamespaceAndTable(ns, table) { t =>
+      sql(
+        s"""CREATE TABLE $t
+           |USING json
+           |COMMENT 'This is a comment'
+           |AS SELECT 1 AS a, "foo" AS b, 2.5 AS c
+         """.stripMargin
+      )
+      val expected = s"CREATE TABLE $fullName ( `a` INT, `b` STRING, `c` DECIMAL(2,1)) USING json" +
+        s" COMMENT 'This is a comment'"
+      assert(getShowCreateDDL(t).mkString(" ") == expected)
+    }
+  }
+
+  test("data source table with table properties") {
+    withNamespaceAndTable(ns, table) { t =>
+      sql(
+        s"""CREATE TABLE $t
+           |USING json
+           |TBLPROPERTIES ('a' = '1')
+           |AS SELECT 1 AS a, "foo" AS b, 2.5 AS c
+         """.stripMargin
+      )
+      val expected = s"CREATE TABLE $fullName ( `a` INT, `b` STRING, `c` DECIMAL(2,1)) USING json" +
+        s" TBLPROPERTIES ( 'a' = '1')"
+      assert(getShowCreateDDL(t).mkString(" ") == expected)
+    }
+  }
+
+  def getShowCreateDDL(table: String, serde: Boolean = false): Array[String] = {
+    val result = if (serde) {
+      sql(s"SHOW CREATE TABLE $table AS SERDE")
+    } else {
+      sql(s"SHOW CREATE TABLE $table")
+    }
+    result.head().getString(0).split("\n").map(_.trim)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
new file mode 100644
index 0000000000000..208ed4c08afc8
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.execution.command
+
+/**
+ * This base suite contains unified tests for the `SHOW CREATE TABLE` command that checks V1
+ * table catalogs. The tests that cannot run for all V1 catalogs are located in more
+ * specific test suites:
+ *
+ *   - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.ShowCreateTableSuite`
+ *   - V1 Hive External catalog:
+ *     `org.apache.spark.sql.hive.execution.command.ShowCreateTableSuite`
+ */
+trait ShowCreateTableSuiteBase extends command.ShowCreateTableSuiteBase
+    with command.TestsV1AndV2Commands {
+  override def fullName: String = s"`$ns`.`$table`"
+
+  test("show create table[simple]") {
+    // todo After SPARK-37517 unify the testcase both v1 and v2
+    withNamespaceAndTable(ns, table) { t =>
+      sql(
+        s"""
+           |CREATE TABLE $t (
+           |  a bigint NOT NULL,
+           |  b bigint,
+           |  c bigint,
+           |  `extraCol` ARRAY<INT>,
+           |  `<another>` STRUCT<x: INT, y: ARRAY<BOOLEAN>>
+           |)
+           |using parquet
+           |OPTIONS (
+           |  from = 0,
+           |  to = 1,
+           |  via = 2)
+           |COMMENT 'This is a comment'
+           |TBLPROPERTIES ('prop1' = '1', 'prop2' = '2', 'prop3' = 3, 'prop4' = 4)
+           |PARTITIONED BY (a)
+           |LOCATION '/tmp'
+        """.stripMargin)
+      val showDDL = getShowCreateDDL(t)
+      assert(showDDL === Array(
+        s"CREATE TABLE $fullName (",
+        "`b` BIGINT,",
+        "`c` BIGINT,",
+        "`extraCol` ARRAY<INT>,",
+        "`<another>` STRUCT<`x`: INT, `y`: ARRAY<BOOLEAN>>,",
+        "`a` BIGINT NOT NULL)",
+        "USING parquet",
+        "OPTIONS (",
+        "'from' = '0',",
+        "'to' = '1',",
+        "'via' = '2')",
+        "PARTITIONED BY (a)",
+        "COMMENT 'This is a comment'",
+        "LOCATION 'file:/tmp'",
+        "TBLPROPERTIES (",
+        "'prop1' = '1',",
+        "'prop2' = '2',",
+        "'prop3' = '3',",
+        "'prop4' = '4')"
+      ))
+    }
+  }
+
+  test("bucketed data source table") {
+    withNamespaceAndTable(ns, table) { t =>
+      sql(
+        s"""CREATE TABLE $t
+           |USING json
+           |CLUSTERED BY (a) SORTED BY (b) INTO 2 BUCKETS
+           |AS SELECT 1 AS a, "foo" AS b
+         """.stripMargin
+      )
+      val expected = s"CREATE TABLE $fullName ( `a` INT, `b` STRING) USING json" +
+        s" CLUSTERED BY (a) SORTED BY (b) INTO 2 BUCKETS"
+      assert(getShowCreateDDL(t).mkString(" ") == expected)
+    }
+  }
+
+  test("partitioned bucketed data source table") {
+    withNamespaceAndTable(ns, table) { t =>
+      sql(
+        s"""CREATE TABLE $t
+           |USING json
+           |PARTITIONED BY (c)
+           |CLUSTERED BY (a) SORTED BY (b) INTO 2 BUCKETS
+           |AS SELECT 1 AS a, "foo" AS b, 2.5 AS c
+         """.stripMargin
+      )
+      val expected = s"CREATE TABLE $fullName ( `a` INT, `b` STRING, `c` DECIMAL(2,1)) USING json" +
+        s" PARTITIONED BY (c) CLUSTERED BY (a) SORTED BY (b) INTO 2 BUCKETS"
+      assert(getShowCreateDDL(t).mkString(" ") == expected)
+    }
+  }
+
+  test("show create table as serde can't work on data source table") {
+    withNamespaceAndTable(ns, table) { t =>
+      sql(
+        s"""
+           |CREATE TABLE $t (
+           |  c1 STRING COMMENT 'bla',
+           |  c2 STRING
+           |)
+           |USING orc
+         """.stripMargin
+      )
+
+      val cause = intercept[AnalysisException] {
+        getShowCreateDDL(t, true)
+      }
+
+      assert(cause.getMessage.contains("Use `SHOW CREATE TABLE` without `AS SERDE` instead"))
+    }
+  }
+}
+
+/**
+ * The class contains tests for the `SHOW CREATE TABLE` command to check V1 In-Memory
+ * table catalog.
+ */
+class ShowCreateTableSuite extends ShowCreateTableSuiteBase with CommandSuiteBase {
+  override def commandVersion: String = super[ShowCreateTableSuiteBase].commandVersion
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
new file mode 100644
index 0000000000000..35b196fe0d8bb
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.execution.command
+
+/**
+ * The class contains tests for the `SHOW CREATE TABLE` command to check V2 table catalogs.
+ */
+class ShowCreateTableSuite extends command.ShowCreateTableSuiteBase with CommandSuiteBase {
+  override def fullName: String = s"$catalog.$ns.$table"
+
+  test("SPARK-33898: show create table as serde") {
+    withNamespaceAndTable(ns, table) { t =>
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing")
+      val e = intercept[AnalysisException] {
+        sql(s"SHOW CREATE TABLE $t AS SERDE")
+      }
+      assert(e.message.contains(s"SHOW CREATE TABLE AS SERDE is not supported for v2 tables."))
+    }
+  }
+
+  test("SPARK-33898: show create table[CTAS]") {
+    // does not work with hive, also different order between v2 with v1/hive
+    withNamespaceAndTable(ns, table) { t =>
+      sql(
+        s"""CREATE TABLE $t
+           |$defaultUsing
+           |PARTITIONED BY (a)
+           |COMMENT 'This is a comment'
+           |TBLPROPERTIES ('a' = '1')
+           |AS SELECT 1 AS a, "foo" AS b
+         """.stripMargin
+      )
+      val showDDL = getShowCreateDDL(t, false)
+      assert(showDDL === Array(
+        s"CREATE TABLE $t (",
+        "`a` INT,",
+        "`b` STRING)",
+        defaultUsing,
+        "PARTITIONED BY (a)",
+        "COMMENT 'This is a comment'",
+        "TBLPROPERTIES (",
+        "'a' = '1')"
+      ))
+    }
+  }
+
+  test("SPARK-33898: show create table[simple]") {
+    // TODO: After SPARK-37517, we can move the test case to base to test for v2/v1/hive
+    val db = "ns1"
+    val table = "tbl"
+    withNamespaceAndTable(db, table) { t =>
+      sql(
+        s"""
+           |CREATE TABLE $t (
+           |  a bigint NOT NULL,
+           |  b bigint,
+           |  c bigint,
+           |  `extraCol` ARRAY<INT>,
+           |  `<another>` STRUCT<x: INT, y: ARRAY<BOOLEAN>>
+           |)
+           |$defaultUsing
+           |OPTIONS (
+           |  from = 0,
+           |  to = 1,
+           |  via = 2)
+           |COMMENT 'This is a comment'
+           |TBLPROPERTIES ('prop1' = '1', 'prop2' = '2', 'prop3' = 3, 'prop4' = 4)
+           |PARTITIONED BY (a)
+           |LOCATION '/tmp'
+        """.stripMargin)
+      val showDDL = getShowCreateDDL(t, false)
+      assert(showDDL === Array(
+        s"CREATE TABLE $t (",
+        "`a` BIGINT NOT NULL,",
+        "`b` BIGINT,",
+        "`c` BIGINT,",
+        "`extraCol` ARRAY<INT>,",
+        "`<another>` STRUCT<`x`: INT, `y`: ARRAY<BOOLEAN>>)",
+        defaultUsing,
+        "OPTIONS (",
+        "'from' = '0',",
+        "'to' = '1',",
+        "'via' = '2')",
+        "PARTITIONED BY (a)",
+        "COMMENT 'This is a comment'",
+        "LOCATION 'file:/tmp'",
+        "TBLPROPERTIES (",
+        "'prop1' = '1',",
+        "'prop2' = '2',",
+        "'prop3' = '3',",
+        "'prop4' = '4')"
+      ))
+    }
+  }
+
+  test("SPARK-33898: show create table[multi-partition]") {
+    withNamespaceAndTable(ns, table) { t =>
+      sql(
+        s"""
+           |CREATE TABLE $t (a INT, b STRING, ts TIMESTAMP) $defaultUsing
+           |PARTITIONED BY (
+           |    a,
+           |    bucket(16, b),
+           |    years(ts),
+           |    months(ts),
+           |    days(ts),
+           |    hours(ts))
+         """.stripMargin)
+      // V1 transforms cannot be converted to partition columns: bucket,year,...)
+      val showDDL = getShowCreateDDL(t, false)
+      assert(showDDL === Array(
+        s"CREATE TABLE $t (",
+        "`a` INT,",
+        "`b` STRING,",
+        "`ts` TIMESTAMP)",
+        defaultUsing,
+        "PARTITIONED BY (a, bucket(16, b), years(ts), months(ts), days(ts), hours(ts))"
+      ))
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
index 189a5c5768f61..cbf5e640db468 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, HiveTableRelation}
 import org.apache.spark.sql.execution.SQLViewSuite
-import org.apache.spark.sql.hive.HiveUtils
+import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.types.{NullType, StructType}
 import org.apache.spark.tags.SlowHiveTest
@@ -181,4 +181,43 @@ class HiveSQLViewSuite extends SQLViewSuite with TestHiveSingleton {
       }
     }
   }
+
+  test("hive partitioned view is not supported") {
+    withTable("test") {
+      withView("v1") {
+        sql(
+          s"""
+             |CREATE TABLE test (c1 INT, c2 STRING)
+             |PARTITIONED BY (
+             |  p1 BIGINT COMMENT 'bla',
+             |  p2 STRING )
+           """.stripMargin)
+
+        createRawHiveTable(
+          s"""
+             |CREATE VIEW v1
+             |PARTITIONED ON (p1, p2)
+             |AS SELECT * from test
+           """.stripMargin
+        )
+
+        val cause = intercept[AnalysisException] {
+          sql("SHOW CREATE TABLE v1")
+        }
+
+        assert(cause.getMessage.contains(" - partitioned view"))
+
+        val causeForSpark = intercept[AnalysisException] {
+          sql("SHOW CREATE TABLE v1 AS SERDE")
+        }
+
+        assert(causeForSpark.getMessage.contains(" - partitioned view"))
+      }
+    }
+  }
+
+  private def createRawHiveTable(ddl: String): Unit = {
+    hiveContext.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog]
+      .client.runSqlHive(ddl)
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowCreateTableSuite.scala
similarity index 63%
rename from sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
rename to sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowCreateTableSuite.scala
index e3a1034ad4f1d..58145b03fd3c0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowCreateTableSuite.scala
@@ -15,77 +15,30 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hive
+package org.apache.spark.sql.hive.execution.command
 
-import org.apache.spark.sql.{AnalysisException, ShowCreateTableSuite}
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.CatalogTable
-import org.apache.spark.sql.hive.test.TestHiveSingleton
-import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
-
-class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSingleton {
-
-  private var origCreateHiveTableConfig = false
-
-  protected override def beforeAll(): Unit = {
-    super.beforeAll()
-    origCreateHiveTableConfig =
-      spark.conf.get(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT)
-    spark.conf.set(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key, true)
-  }
-
-  protected override def afterAll(): Unit = {
-    spark.conf.set(
-      SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key,
-      origCreateHiveTableConfig)
-    super.afterAll()
-  }
-
-  test("view") {
-    Seq(true, false).foreach { serde =>
-      withView("v1") {
-        sql("CREATE VIEW v1 AS SELECT 1 AS a")
-        checkCreateView("v1", serde)
-      }
-    }
-  }
-
-  test("view with output columns") {
-    Seq(true, false).foreach { serde =>
-      withView("v1") {
-        sql("CREATE VIEW v1 (a, b COMMENT 'b column') AS SELECT 1 AS a, 2 AS b")
-        checkCreateView("v1", serde)
-      }
-    }
-  }
-
-  test("view with table comment and properties") {
-    Seq(true, false).foreach { serde =>
-      withView("v1") {
-        sql(
-          s"""
-             |CREATE VIEW v1 (
-             |  c1 COMMENT 'bla',
-             |  c2
-             |)
-             |COMMENT 'table comment'
-             |TBLPROPERTIES (
-             |  'prop1' = 'value1',
-             |  'prop2' = 'value2'
-             |)
-             |AS SELECT 1 AS c1, '2' AS c2
-         """.stripMargin
-        )
+import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils}
+import org.apache.spark.sql.catalyst.util.escapeSingleQuotedString
+import org.apache.spark.sql.execution.command.v1
+import org.apache.spark.sql.internal.HiveSerDe
+
+/**
+ * The class contains tests for the `SHOW CREATE TABLE` command to check V1 Hive external
+ * table catalog.
+ */
+class ShowCreateTableSuite extends v1.ShowCreateTableSuiteBase with CommandSuiteBase {
+  override def commandVersion: String = super[ShowCreateTableSuiteBase].commandVersion
 
-        checkCreateView("v1", serde)
-      }
-    }
+  override def getShowCreateDDL(table: String, serde: Boolean = false): Array[String] = {
+    super.getShowCreateDDL(table, serde).filter(!_.startsWith("'transient_lastDdlTime'"))
   }
 
   test("simple hive table") {
-    withTable("t1") {
+    withNamespaceAndTable(ns, table) { t =>
       sql(
-        s"""CREATE TABLE t1 (
+        s"""CREATE TABLE $t (
            |  c1 INT COMMENT 'bla',
            |  c2 STRING
            |)
@@ -95,16 +48,21 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
            |)
          """.stripMargin
       )
-
-      checkCreateTable("t1", serde = true)
+      val expected = s"CREATE TABLE $fullName ( `c1` INT COMMENT 'bla', `c2` STRING)" +
+        " ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'" +
+        " WITH SERDEPROPERTIES ( 'serialization.format' = '1')" +
+        " STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'" +
+        " OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'" +
+        " TBLPROPERTIES ( 'prop1' = 'value1', 'prop2' = 'value2',"
+      assert(getShowCreateDDL(t, true).mkString(" ") == expected)
     }
   }
 
   test("simple external hive table") {
     withTempDir { dir =>
-      withTable("t1") {
+      withNamespaceAndTable(ns, table) { t =>
         sql(
-          s"""CREATE TABLE t1 (
+          s"""CREATE TABLE $t (
              |  c1 INT COMMENT 'bla',
              |  c2 STRING
              |)
@@ -115,16 +73,23 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
              |)
            """.stripMargin
         )
-
-        checkCreateTable("t1", serde = true)
+        val expected = s"CREATE EXTERNAL TABLE $fullName ( `c1` INT COMMENT 'bla', `c2` STRING)" +
+          s" ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'" +
+          s" WITH SERDEPROPERTIES ( 'serialization.format' = '1')" +
+          s" STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'" +
+          s" OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'" +
+          s" LOCATION" +
+          s" '${escapeSingleQuotedString(CatalogUtils.URIToString(dir.toURI)).dropRight(1)}'" +
+          s" TBLPROPERTIES ( 'prop1' = 'value1', 'prop2' = 'value2',"
+        assert(getShowCreateDDL(t, true).mkString(" ") == expected)
       }
     }
   }
 
   test("partitioned hive table") {
-    withTable("t1") {
+    withNamespaceAndTable(ns, table) { t =>
       sql(
-        s"""CREATE TABLE t1 (
+        s"""CREATE TABLE $t (
            |  c1 INT COMMENT 'bla',
            |  c2 STRING
            |)
@@ -135,15 +100,21 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
            |)
          """.stripMargin
       )
-
-      checkCreateTable("t1", serde = true)
+      val expected = s"CREATE TABLE $fullName ( `c1` INT COMMENT 'bla', `c2` STRING)" +
+        " COMMENT 'bla' PARTITIONED BY (`p1` BIGINT COMMENT 'bla', `p2` STRING)" +
+        " ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'" +
+        " WITH SERDEPROPERTIES ( 'serialization.format' = '1')" +
+        " STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'" +
+        " OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'" +
+        " TBLPROPERTIES ("
+      assert(getShowCreateDDL(t, true).mkString(" ") == expected)
     }
   }
 
   test("hive table with explicit storage info") {
-    withTable("t1") {
+    withNamespaceAndTable(ns, table) { t =>
       sql(
-        s"""CREATE TABLE t1 (
+        s"""CREATE TABLE $t (
            |  c1 INT COMMENT 'bla',
            |  c2 STRING
            |)
@@ -153,30 +124,44 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
            |NULL DEFINED AS 'NaN'
          """.stripMargin
       )
-
-      checkCreateTable("t1", serde = true)
+      val expected = s"CREATE TABLE $fullName ( `c1` INT COMMENT 'bla', `c2` STRING)" +
+        " ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'" +
+        " WITH SERDEPROPERTIES (" +
+        " 'colelction.delim' = '@'," +
+        " 'mapkey.delim' = '#'," +
+        " 'serialization.format' = ','," +
+        " 'field.delim' = ',')" +
+        " STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'" +
+        " OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'" +
+        " TBLPROPERTIES ("
+      assert(getShowCreateDDL(t, true).mkString(" ") == expected)
     }
   }
 
   test("hive table with STORED AS clause") {
-    withTable("t1") {
+    withNamespaceAndTable(ns, table) { t =>
       sql(
-        s"""CREATE TABLE t1 (
+        s"""CREATE TABLE $t (
            |  c1 INT COMMENT 'bla',
            |  c2 STRING
            |)
            |STORED AS PARQUET
          """.stripMargin
       )
-
-      checkCreateTable("t1", serde = true)
+      val expected = s"CREATE TABLE $fullName ( `c1` INT COMMENT 'bla', `c2` STRING)" +
+        " ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'" +
+        " WITH SERDEPROPERTIES ( 'serialization.format' = '1')" +
+        " STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'" +
+        " OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'" +
+        " TBLPROPERTIES ("
+      assert(getShowCreateDDL(t, true).mkString(" ") == expected)
     }
   }
 
   test("hive table with serde info") {
-    withTable("t1") {
+    withNamespaceAndTable(ns, table) { t =>
       sql(
-        s"""CREATE TABLE t1 (
+        s"""CREATE TABLE $t (
            |  c1 INT COMMENT 'bla',
            |  c2 STRING
            |)
@@ -190,75 +175,39 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
            |  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
          """.stripMargin
       )
-
-      checkCreateTable("t1", serde = true)
+      val expected = s"CREATE TABLE $fullName ( `c1` INT COMMENT 'bla', `c2` STRING)" +
+        " ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'" +
+        " WITH SERDEPROPERTIES (" +
+        " 'mapkey.delim' = ','," +
+        " 'serialization.format' = '1'," +
+        " 'field.delim' = ',')" +
+        " STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'" +
+        " OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'" +
+        " TBLPROPERTIES ("
+      assert(getShowCreateDDL(t, true).mkString(" ") == expected)
     }
   }
 
   test("hive bucketing is supported") {
-    withTable("t1") {
+    withNamespaceAndTable(ns, table) { t =>
       sql(
-        s"""CREATE TABLE t1 (a INT, b STRING)
+        s"""CREATE TABLE $t (a INT, b STRING)
            |CLUSTERED BY (a)
            |SORTED BY (b)
            |INTO 2 BUCKETS
          """.stripMargin
       )
-      checkCreateTable("t1", serde = true)
-    }
-  }
-
-  test("hive partitioned view is not supported") {
-    withTable("t1") {
-      withView("v1") {
-        sql(
-          s"""
-             |CREATE TABLE t1 (c1 INT, c2 STRING)
-             |PARTITIONED BY (
-             |  p1 BIGINT COMMENT 'bla',
-             |  p2 STRING )
-           """.stripMargin)
-
-        createRawHiveTable(
-          s"""
-             |CREATE VIEW v1
-             |PARTITIONED ON (p1, p2)
-             |AS SELECT * from t1
-           """.stripMargin
-        )
-
-        val cause = intercept[AnalysisException] {
-          sql("SHOW CREATE TABLE v1")
-        }
-
-        assert(cause.getMessage.contains(" - partitioned view"))
-
-        val causeForSpark = intercept[AnalysisException] {
-          sql("SHOW CREATE TABLE v1 AS SERDE")
-        }
-
-        assert(causeForSpark.getMessage.contains(" - partitioned view"))
-      }
+      val expected = s"CREATE TABLE $fullName ( `a` INT, `b` STRING)" +
+        " CLUSTERED BY (a) SORTED BY (b ASC) INTO 2 BUCKETS" +
+        " ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'" +
+        " WITH SERDEPROPERTIES ( 'serialization.format' = '1')" +
+        " STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'" +
+        " OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'" +
+        " TBLPROPERTIES ("
+      assert(getShowCreateDDL(t, true).mkString(" ") == expected)
     }
   }
 
-  test("SPARK-24911: keep quotes for nested fields in hive") {
-    withTable("t1") {
-      val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>) USING hive"
-      sql(createTable)
-      val shownDDL = getShowDDL("SHOW CREATE TABLE t1")
-      assert(shownDDL.substring(0, shownDDL.indexOf(" USING")) ==
-        "CREATE TABLE `default`.`t1` ( `a` STRUCT<`b`: STRING>)")
-
-      checkCreateTable("t1", serde = true)
-    }
-  }
-
-  private def createRawHiveTable(ddl: String): Unit = {
-    hiveContext.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog]
-      .client.runSqlHive(ddl)
-  }
-
   private def checkCreateSparkTableAsHive(tableName: String): Unit = {
     val table = TableIdentifier(tableName, Some("default"))
     val db = table.database.get
@@ -339,26 +288,6 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
     }
   }
 
-  test("show create table as serde can't work on data source table") {
-    withTable("t1") {
-      sql(
-        s"""
-           |CREATE TABLE t1 (
-           |  c1 STRING COMMENT 'bla',
-           |  c2 STRING
-           |)
-           |USING orc
-         """.stripMargin
-      )
-
-      val cause = intercept[AnalysisException] {
-        checkCreateTable("t1", serde = true)
-      }
-
-      assert(cause.getMessage.contains("Use `SHOW CREATE TABLE` without `AS SERDE` instead"))
-    }
-  }
-
   test("simple external hive table in Spark DDL") {
     withTempDir { dir =>
       withTable("t1") {

From 21ad3da08214980f776f17c605774a249192ed4a Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Thu, 13 Jan 2022 13:00:02 +0900
Subject: [PATCH 003/513] [SPARK-37885][PYTHON] Make pandas_udf to take type
 annotations with future annotations enabled

### What changes were proposed in this pull request?

Makes `pandas_udf` to take type annotations with future annotations enabled.

### Why are the changes needed?

When using `from __future__ import annotations`, the type hints will be all strings, then pandas UDF type inference won't work as follows:

```py
>>> from __future__ import annotations
>>> from typing import Union
>>> import pandas as pd
>>> from pyspark.sql.functions import pandas_udf
>>> pandas_udf("long")
... def plus_one(v: Union[pd.Series, pd.DataFrame]) -> pd.Series:
...     return v + 1

Traceback (most recent call last):
...
NotImplementedError: Unsupported signature: (v: 'Union[pd.Series, pd.DataFrame]') -> 'pd.Series'.
```

### Does this PR introduce _any_ user-facing change?

Yes. Users can use type annotations for pandas UDFs with the future annotations flag.

```py
>>> from __future__ import annotations
>>> from typing import Union
>>> import pandas as pd
>>> from pyspark.sql.functions import pandas_udf
>>> pandas_udf("long")
... def plus_one(v: Union[pd.Series, pd.DataFrame]) -> pd.Series:
...     return v + 1
...
>>> df = spark.range(10).selectExpr("id", "id as v")
>>> df.select(plus_one(df.v).alias("plus_one")).show()
+--------+
|plus_one|
+--------+
|       1|
|       2|
|       3|
|       4|
|       5|
|       6|
|       7|
|       8|
|       9|
|      10|
+--------+
```

### How was this patch tested?

Added tests with the future annotations enabled.

Closes #35184 from ueshin/issues/SPARK-37885/annotations.

Authored-by: Takuya UESHIN <ueshin@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 dev/sparktestsupport/modules.py               |   1 +
 python/pyspark/sql/pandas/functions.py        |  11 +-
 python/pyspark/sql/pandas/typehints.py        |  10 +-
 .../sql/tests/test_pandas_udf_typehints.py    | 148 +++++--
 ...s_udf_typehints_with_future_annotations.py | 375 ++++++++++++++++++
 5 files changed, 511 insertions(+), 34 deletions(-)
 create mode 100644 python/pyspark/sql/tests/test_pandas_udf_typehints_with_future_annotations.py

diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 7cd5bd15752ae..6e668bba8c803 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -453,6 +453,7 @@ def __hash__(self):
         "pyspark.sql.tests.test_pandas_udf_grouped_agg",
         "pyspark.sql.tests.test_pandas_udf_scalar",
         "pyspark.sql.tests.test_pandas_udf_typehints",
+        "pyspark.sql.tests.test_pandas_udf_typehints_with_future_annotations",
         "pyspark.sql.tests.test_pandas_udf_window",
         "pyspark.sql.tests.test_readwriter",
         "pyspark.sql.tests.test_serde",
diff --git a/python/pyspark/sql/pandas/functions.py b/python/pyspark/sql/pandas/functions.py
index 5f502209ae608..0b7aa6b2abb2c 100644
--- a/python/pyspark/sql/pandas/functions.py
+++ b/python/pyspark/sql/pandas/functions.py
@@ -17,7 +17,8 @@
 
 import functools
 import warnings
-from inspect import getfullargspec
+from inspect import getfullargspec, signature
+from typing import get_type_hints
 
 from pyspark.rdd import PythonEvalType
 from pyspark.sql.pandas.typehints import infer_eval_type
@@ -385,8 +386,6 @@ def _create_pandas_udf(f, returnType, evalType):
     argspec = getfullargspec(f)
 
     # pandas UDF by type hints.
-    from inspect import signature
-
     if evalType in [
         PythonEvalType.SQL_SCALAR_PANDAS_UDF,
         PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF,
@@ -410,7 +409,11 @@ def _create_pandas_udf(f, returnType, evalType):
         # 'SQL_COGROUPED_MAP_PANDAS_UDF', the evaluation type will always be set.
         pass
     elif len(argspec.annotations) > 0:
-        evalType = infer_eval_type(signature(f))
+        try:
+            type_hints = get_type_hints(f)
+        except NameError:
+            type_hints = {}
+        evalType = infer_eval_type(signature(f), type_hints)
         assert evalType is not None
 
     if evalType is None:
diff --git a/python/pyspark/sql/pandas/typehints.py b/python/pyspark/sql/pandas/typehints.py
index 167104c1ad7dc..fc3dd89a0712a 100644
--- a/python/pyspark/sql/pandas/typehints.py
+++ b/python/pyspark/sql/pandas/typehints.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 #
 from inspect import Signature
-from typing import Any, Callable, Optional, Union, TYPE_CHECKING
+from typing import Any, Callable, Dict, Optional, Union, TYPE_CHECKING
 
 from pyspark.sql.pandas.utils import require_minimum_pandas_version
 
@@ -28,11 +28,11 @@
 
 
 def infer_eval_type(
-    sig: Signature,
+    sig: Signature, type_hints: Dict[str, Any]
 ) -> Union["PandasScalarUDFType", "PandasScalarIterUDFType", "PandasGroupedAggUDFType"]:
     """
     Infers the evaluation type in :class:`pyspark.rdd.PythonEvalType` from
-    :class:`inspect.Signature` instance.
+    :class:`inspect.Signature` instance and type hints.
     """
     from pyspark.sql.pandas.functions import PandasUDFType
 
@@ -43,7 +43,7 @@ def infer_eval_type(
     annotations = {}
     for param in sig.parameters.values():
         if param.annotation is not param.empty:
-            annotations[param.name] = param.annotation
+            annotations[param.name] = type_hints.get(param.name, param.annotation)
 
     # Check if all arguments have type hints
     parameters_sig = [
@@ -53,7 +53,7 @@ def infer_eval_type(
         raise ValueError("Type hints for all parameters should be specified; however, got %s" % sig)
 
     # Check if the return has a type hint
-    return_annotation = sig.return_annotation
+    return_annotation = type_hints.get("return", sig.return_annotation)
     if sig.empty is return_annotation:
         raise ValueError("Type hint for the return type should be specified; however, got %s" % sig)
 
diff --git a/python/pyspark/sql/tests/test_pandas_udf_typehints.py b/python/pyspark/sql/tests/test_pandas_udf_typehints.py
index 119b2cf310f5d..661dd7b38479f 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_typehints.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_typehints.py
@@ -15,8 +15,8 @@
 # limitations under the License.
 #
 import unittest
-import inspect
-from typing import Union, Iterator, Tuple, cast
+from inspect import signature
+from typing import Union, Iterator, Tuple, cast, get_type_hints
 
 from pyspark.sql.functions import mean, lit
 from pyspark.testing.sqlutils import (
@@ -45,84 +45,116 @@ def test_type_annotation_scalar(self):
         def func(col: pd.Series) -> pd.Series:
             pass
 
-        self.assertEqual(infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR)
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
 
         def func(col: pd.DataFrame, col1: pd.Series) -> pd.DataFrame:
             pass
 
-        self.assertEqual(infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR)
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
 
         def func(col: pd.DataFrame, *args: pd.Series) -> pd.Series:
             pass
 
-        self.assertEqual(infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR)
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
 
         def func(col: pd.Series, *args: pd.Series, **kwargs: pd.DataFrame) -> pd.Series:
             pass
 
-        self.assertEqual(infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR)
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
 
         def func(col: pd.Series, *, col2: pd.DataFrame) -> pd.DataFrame:
             pass
 
-        self.assertEqual(infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR)
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
 
         def func(col: Union[pd.Series, pd.DataFrame], *, col2: pd.DataFrame) -> pd.Series:
             pass
 
-        self.assertEqual(infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR)
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
 
     def test_type_annotation_scalar_iter(self):
         def func(iter: Iterator[pd.Series]) -> Iterator[pd.Series]:
             pass
 
-        self.assertEqual(infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR_ITER)
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR_ITER
+        )
 
         def func(iter: Iterator[Tuple[pd.DataFrame, pd.Series]]) -> Iterator[pd.DataFrame]:
             pass
 
-        self.assertEqual(infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR_ITER)
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR_ITER
+        )
 
         def func(iter: Iterator[Tuple[pd.DataFrame, ...]]) -> Iterator[pd.Series]:
             pass
 
-        self.assertEqual(infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR_ITER)
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR_ITER
+        )
 
         def func(iter: Iterator[Tuple[Union[pd.DataFrame, pd.Series], ...]]) -> Iterator[pd.Series]:
             pass
 
-        self.assertEqual(infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR_ITER)
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR_ITER
+        )
 
     def test_type_annotation_group_agg(self):
         def func(col: pd.Series) -> str:
             pass
 
-        self.assertEqual(infer_eval_type(inspect.signature(func)), PandasUDFType.GROUPED_AGG)
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.GROUPED_AGG
+        )
 
         def func(col: pd.DataFrame, col1: pd.Series) -> int:
             pass
 
-        self.assertEqual(infer_eval_type(inspect.signature(func)), PandasUDFType.GROUPED_AGG)
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.GROUPED_AGG
+        )
 
         def func(col: pd.DataFrame, *args: pd.Series) -> Row:
             pass
 
-        self.assertEqual(infer_eval_type(inspect.signature(func)), PandasUDFType.GROUPED_AGG)
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.GROUPED_AGG
+        )
 
         def func(col: pd.Series, *args: pd.Series, **kwargs: pd.DataFrame) -> str:
             pass
 
-        self.assertEqual(infer_eval_type(inspect.signature(func)), PandasUDFType.GROUPED_AGG)
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.GROUPED_AGG
+        )
 
         def func(col: pd.Series, *, col2: pd.DataFrame) -> float:
             pass
 
-        self.assertEqual(infer_eval_type(inspect.signature(func)), PandasUDFType.GROUPED_AGG)
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.GROUPED_AGG
+        )
 
         def func(col: Union[pd.Series, pd.DataFrame], *, col2: pd.DataFrame) -> float:
             pass
 
-        self.assertEqual(infer_eval_type(inspect.signature(func)), PandasUDFType.GROUPED_AGG)
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.GROUPED_AGG
+        )
 
     def test_type_annotation_negative(self):
         def func(col: str) -> pd.Series:
@@ -132,7 +164,8 @@ def func(col: str) -> pd.Series:
             NotImplementedError,
             "Unsupported signature.*str",
             infer_eval_type,
-            inspect.signature(func),
+            signature(func),
+            get_type_hints(func),
         )
 
         def func(col: pd.DataFrame, col1: int) -> pd.DataFrame:
@@ -142,7 +175,8 @@ def func(col: pd.DataFrame, col1: int) -> pd.DataFrame:
             NotImplementedError,
             "Unsupported signature.*int",
             infer_eval_type,
-            inspect.signature(func),
+            signature(func),
+            get_type_hints(func),
         )
 
         def func(col: Union[pd.DataFrame, str], col1: int) -> pd.DataFrame:
@@ -152,7 +186,8 @@ def func(col: Union[pd.DataFrame, str], col1: int) -> pd.DataFrame:
             NotImplementedError,
             "Unsupported signature.*str",
             infer_eval_type,
-            inspect.signature(func),
+            signature(func),
+            get_type_hints(func),
         )
 
         def func(col: pd.Series) -> Tuple[pd.DataFrame]:
@@ -162,28 +197,41 @@ def func(col: pd.Series) -> Tuple[pd.DataFrame]:
             NotImplementedError,
             "Unsupported signature.*Tuple",
             infer_eval_type,
-            inspect.signature(func),
+            signature(func),
+            get_type_hints(func),
         )
 
         def func(col, *args: pd.Series) -> pd.Series:
             pass
 
         self.assertRaisesRegex(
-            ValueError, "should be specified.*Series", infer_eval_type, inspect.signature(func)
+            ValueError,
+            "should be specified.*Series",
+            infer_eval_type,
+            signature(func),
+            get_type_hints(func),
         )
 
         def func(col: pd.Series, *args: pd.Series, **kwargs: pd.DataFrame):
             pass
 
         self.assertRaisesRegex(
-            ValueError, "should be specified.*Series", infer_eval_type, inspect.signature(func)
+            ValueError,
+            "should be specified.*Series",
+            infer_eval_type,
+            signature(func),
+            get_type_hints(func),
         )
 
         def func(col: pd.Series, *, col2) -> pd.DataFrame:
             pass
 
         self.assertRaisesRegex(
-            ValueError, "should be specified.*Series", infer_eval_type, inspect.signature(func)
+            ValueError,
+            "should be specified.*Series",
+            infer_eval_type,
+            signature(func),
+            get_type_hints(func),
         )
 
     def test_scalar_udf_type_hint(self):
@@ -257,6 +305,56 @@ def pandas_plus_one(iter: Iterator[pd.DataFrame]) -> Iterator[pd.DataFrame]:
         expected = df.selectExpr("id + 1 as id")
         assert_frame_equal(expected.toPandas(), actual.toPandas())
 
+    def test_string_type_annotation(self):
+        def func(col: "pd.Series") -> "pd.Series":
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
+        def func(col: "pd.DataFrame", col1: "pd.Series") -> "pd.DataFrame":
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
+        def func(col: "pd.DataFrame", *args: "pd.Series") -> "pd.Series":
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
+        def func(col: "pd.Series", *args: "pd.Series", **kwargs: "pd.DataFrame") -> "pd.Series":
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
+        def func(col: "pd.Series", *, col2: "pd.DataFrame") -> "pd.DataFrame":
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
+        def func(col: Union["pd.Series", "pd.DataFrame"], *, col2: "pd.DataFrame") -> "pd.Series":
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
+        def func(col: "Union[pd.Series, pd.DataFrame]", *, col2: "pd.DataFrame") -> "pd.Series":
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
 
 if __name__ == "__main__":
     from pyspark.sql.tests.test_pandas_udf_typehints import *  # noqa: #401
diff --git a/python/pyspark/sql/tests/test_pandas_udf_typehints_with_future_annotations.py b/python/pyspark/sql/tests/test_pandas_udf_typehints_with_future_annotations.py
new file mode 100644
index 0000000000000..1dfec04495893
--- /dev/null
+++ b/python/pyspark/sql/tests/test_pandas_udf_typehints_with_future_annotations.py
@@ -0,0 +1,375 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from __future__ import annotations
+
+import sys
+import unittest
+from inspect import signature
+from typing import Union, Iterator, Tuple, cast, get_type_hints
+
+from pyspark.sql.functions import mean, lit
+from pyspark.testing.sqlutils import (
+    ReusedSQLTestCase,
+    have_pandas,
+    have_pyarrow,
+    pandas_requirement_message,
+    pyarrow_requirement_message,
+)
+from pyspark.sql.pandas.typehints import infer_eval_type
+from pyspark.sql.pandas.functions import pandas_udf, PandasUDFType
+from pyspark.sql import Row
+
+if have_pandas:
+    import pandas as pd
+    import numpy as np
+    from pandas.testing import assert_frame_equal
+
+
+@unittest.skipIf(
+    not have_pandas or not have_pyarrow,
+    cast(str, pandas_requirement_message or pyarrow_requirement_message),
+)
+class PandasUDFTypeHintsWithFutureAnnotationsTests(ReusedSQLTestCase):
+    def test_type_annotation_scalar(self):
+        def func(col: pd.Series) -> pd.Series:
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
+        def func(col: pd.DataFrame, col1: pd.Series) -> pd.DataFrame:
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
+        def func(col: pd.DataFrame, *args: pd.Series) -> pd.Series:
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
+        def func(col: pd.Series, *args: pd.Series, **kwargs: pd.DataFrame) -> pd.Series:
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
+        def func(col: pd.Series, *, col2: pd.DataFrame) -> pd.DataFrame:
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
+        def func(col: Union[pd.Series, pd.DataFrame], *, col2: pd.DataFrame) -> pd.Series:
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
+    def test_type_annotation_scalar_iter(self):
+        def func(iter: Iterator[pd.Series]) -> Iterator[pd.Series]:
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR_ITER
+        )
+
+        def func(iter: Iterator[Tuple[pd.DataFrame, pd.Series]]) -> Iterator[pd.DataFrame]:
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR_ITER
+        )
+
+        def func(iter: Iterator[Tuple[pd.DataFrame, ...]]) -> Iterator[pd.Series]:
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR_ITER
+        )
+
+        def func(iter: Iterator[Tuple[Union[pd.DataFrame, pd.Series], ...]]) -> Iterator[pd.Series]:
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR_ITER
+        )
+
+    def test_type_annotation_group_agg(self):
+        def func(col: pd.Series) -> str:
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.GROUPED_AGG
+        )
+
+        def func(col: pd.DataFrame, col1: pd.Series) -> int:
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.GROUPED_AGG
+        )
+
+        def func(col: pd.DataFrame, *args: pd.Series) -> Row:
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.GROUPED_AGG
+        )
+
+        def func(col: pd.Series, *args: pd.Series, **kwargs: pd.DataFrame) -> str:
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.GROUPED_AGG
+        )
+
+        def func(col: pd.Series, *, col2: pd.DataFrame) -> float:
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.GROUPED_AGG
+        )
+
+        def func(col: Union[pd.Series, pd.DataFrame], *, col2: pd.DataFrame) -> float:
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.GROUPED_AGG
+        )
+
+    def test_type_annotation_negative(self):
+        def func(col: str) -> pd.Series:
+            pass
+
+        self.assertRaisesRegex(
+            NotImplementedError,
+            "Unsupported signature.*str",
+            infer_eval_type,
+            signature(func),
+            get_type_hints(func),
+        )
+
+        def func(col: pd.DataFrame, col1: int) -> pd.DataFrame:
+            pass
+
+        self.assertRaisesRegex(
+            NotImplementedError,
+            "Unsupported signature.*int",
+            infer_eval_type,
+            signature(func),
+            get_type_hints(func),
+        )
+
+        def func(col: Union[pd.DataFrame, str], col1: int) -> pd.DataFrame:
+            pass
+
+        self.assertRaisesRegex(
+            NotImplementedError,
+            "Unsupported signature.*str",
+            infer_eval_type,
+            signature(func),
+            get_type_hints(func),
+        )
+
+        def func(col: pd.Series) -> Tuple[pd.DataFrame]:
+            pass
+
+        self.assertRaisesRegex(
+            NotImplementedError,
+            "Unsupported signature.*Tuple",
+            infer_eval_type,
+            signature(func),
+            get_type_hints(func),
+        )
+
+        def func(col, *args: pd.Series) -> pd.Series:
+            pass
+
+        self.assertRaisesRegex(
+            ValueError,
+            "should be specified.*Series",
+            infer_eval_type,
+            signature(func),
+            get_type_hints(func),
+        )
+
+        def func(col: pd.Series, *args: pd.Series, **kwargs: pd.DataFrame):
+            pass
+
+        self.assertRaisesRegex(
+            ValueError,
+            "should be specified.*Series",
+            infer_eval_type,
+            signature(func),
+            get_type_hints(func),
+        )
+
+        def func(col: pd.Series, *, col2) -> pd.DataFrame:
+            pass
+
+        self.assertRaisesRegex(
+            ValueError,
+            "should be specified.*Series",
+            infer_eval_type,
+            signature(func),
+            get_type_hints(func),
+        )
+
+    def test_scalar_udf_type_hint(self):
+        df = self.spark.range(10).selectExpr("id", "id as v")
+
+        def plus_one(v: Union[pd.Series, pd.DataFrame]) -> pd.Series:
+            return v + 1  # type: ignore[return-value]
+
+        plus_one = pandas_udf("long")(plus_one)
+        actual = df.select(plus_one(df.v).alias("plus_one"))
+        expected = df.selectExpr("(v + 1) as plus_one")
+        assert_frame_equal(expected.toPandas(), actual.toPandas())
+
+    def test_scalar_iter_udf_type_hint(self):
+        df = self.spark.range(10).selectExpr("id", "id as v")
+
+        def plus_one(itr: Iterator[pd.Series]) -> Iterator[pd.Series]:
+            for s in itr:
+                yield s + 1
+
+        plus_one = pandas_udf("long")(plus_one)
+
+        actual = df.select(plus_one(df.v).alias("plus_one"))
+        expected = df.selectExpr("(v + 1) as plus_one")
+        assert_frame_equal(expected.toPandas(), actual.toPandas())
+
+    def test_group_agg_udf_type_hint(self):
+        df = self.spark.range(10).selectExpr("id", "id as v")
+
+        def weighted_mean(v: pd.Series, w: pd.Series) -> float:
+            return np.average(v, weights=w)
+
+        weighted_mean = pandas_udf("double")(weighted_mean)
+
+        actual = df.groupby("id").agg(weighted_mean(df.v, lit(1.0))).sort("id")
+        expected = df.groupby("id").agg(mean(df.v).alias("weighted_mean(v, 1.0)")).sort("id")
+        assert_frame_equal(expected.toPandas(), actual.toPandas())
+
+    def test_ignore_type_hint_in_group_apply_in_pandas(self):
+        df = self.spark.range(10)
+
+        def pandas_plus_one(v: pd.DataFrame) -> pd.DataFrame:
+            return v + 1
+
+        actual = df.groupby("id").applyInPandas(pandas_plus_one, schema=df.schema).sort("id")
+        expected = df.selectExpr("id + 1 as id")
+        assert_frame_equal(expected.toPandas(), actual.toPandas())
+
+    def test_ignore_type_hint_in_cogroup_apply_in_pandas(self):
+        df = self.spark.range(10)
+
+        def pandas_plus_one(left: pd.DataFrame, right: pd.DataFrame) -> pd.DataFrame:
+            return left + 1
+
+        actual = (
+            df.groupby("id")
+            .cogroup(self.spark.range(10).groupby("id"))
+            .applyInPandas(pandas_plus_one, schema=df.schema)
+            .sort("id")
+        )
+        expected = df.selectExpr("id + 1 as id")
+        assert_frame_equal(expected.toPandas(), actual.toPandas())
+
+    def test_ignore_type_hint_in_map_in_pandas(self):
+        df = self.spark.range(10)
+
+        def pandas_plus_one(iter: Iterator[pd.DataFrame]) -> Iterator[pd.DataFrame]:
+            return map(lambda v: v + 1, iter)
+
+        actual = df.mapInPandas(pandas_plus_one, schema=df.schema)
+        expected = df.selectExpr("id + 1 as id")
+        assert_frame_equal(expected.toPandas(), actual.toPandas())
+
+    @unittest.skipIf(
+        sys.version_info < (3, 9),
+        "string annotations with future annotations do not work under Python<3.9",
+    )
+    def test_string_type_annotation(self):
+        def func(col: "pd.Series") -> "pd.Series":
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
+        def func(col: "pd.DataFrame", col1: "pd.Series") -> "pd.DataFrame":
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
+        def func(col: "pd.DataFrame", *args: "pd.Series") -> "pd.Series":
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
+        def func(col: "pd.Series", *args: "pd.Series", **kwargs: "pd.DataFrame") -> "pd.Series":
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
+        def func(col: "pd.Series", *, col2: "pd.DataFrame") -> "pd.DataFrame":
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
+        def func(col: Union["pd.Series", "pd.DataFrame"], *, col2: "pd.DataFrame") -> "pd.Series":
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
+        def func(col: "Union[pd.Series, pd.DataFrame]", *, col2: "pd.DataFrame") -> "pd.Series":
+            pass
+
+        self.assertEqual(
+            infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR
+        )
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.test_pandas_udf_typehints_with_future_annotations import *  # noqa: #401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)

From 79d42a1fa6cc354b34c790ea39573735f8363137 Mon Sep 17 00:00:00 2001
From: dch nguyen <dgd_contributor@viettel.com.vn>
Date: Thu, 13 Jan 2022 18:18:07 +0900
Subject: [PATCH 004/513] [SPARK-37095][PYTHON] Inline type hints for files in
 python/pyspark/broadcast.py

Lead-authored-by: dchvn nguyen <dgd_contributorviettel.com.vn>
Co-authored-by: zero323 <mszymkiewiczgmail.com>

### What changes were proposed in this pull request?
Inline type hints for python/pyspark/broadcast.py
### Why are the changes needed?
We can take advantage of static type checking within the functions by inlining the type hints.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Existing tests

Closes #34439 from dchvn/SPARK-37095.

Authored-by: dch nguyen <dgd_contributor@viettel.com.vn>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/broadcast.py  | 81 ++++++++++++++++++++++++++++--------
 python/pyspark/broadcast.pyi | 48 ---------------------
 2 files changed, 64 insertions(+), 65 deletions(-)
 delete mode 100644 python/pyspark/broadcast.pyi

diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index 903e4ea4b0851..edd282de92f64 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -21,20 +21,40 @@
 from tempfile import NamedTemporaryFile
 import threading
 import pickle
+from typing import (
+    overload,
+    Any,
+    Callable,
+    Dict,
+    Generic,
+    IO,
+    Iterator,
+    Optional,
+    Tuple,
+    TypeVar,
+    TYPE_CHECKING,
+    Union,
+)
+from typing.io import BinaryIO  # type: ignore[import]
 
 from pyspark.java_gateway import local_connect_and_auth
 from pyspark.serializers import ChunkedStream, pickle_protocol
 from pyspark.util import print_exec
 
+if TYPE_CHECKING:
+    from pyspark import SparkContext
+
 
 __all__ = ["Broadcast"]
 
+T = TypeVar("T")
+
 
 # Holds broadcasted data received from Java, keyed by its id.
-_broadcastRegistry = {}
+_broadcastRegistry: Dict[int, "Broadcast[Any]"] = {}
 
 
-def _from_id(bid):
+def _from_id(bid: int) -> "Broadcast[Any]":
     from pyspark.broadcast import _broadcastRegistry
 
     if bid not in _broadcastRegistry:
@@ -42,7 +62,7 @@ def _from_id(bid):
     return _broadcastRegistry[bid]
 
 
-class Broadcast:
+class Broadcast(Generic[T]):
 
     """
     A broadcast variable created with :meth:`SparkContext.broadcast`.
@@ -62,7 +82,31 @@ class Broadcast:
     >>> large_broadcast = sc.broadcast(range(10000))
     """
 
-    def __init__(self, sc=None, value=None, pickle_registry=None, path=None, sock_file=None):
+    @overload  # On driver
+    def __init__(
+        self: "Broadcast[T]",
+        sc: "SparkContext",
+        value: T,
+        pickle_registry: "BroadcastPickleRegistry",
+    ):
+        ...
+
+    @overload  # On worker without decryption server
+    def __init__(self: "Broadcast[Any]", *, path: str):
+        ...
+
+    @overload  # On worker with decryption server
+    def __init__(self: "Broadcast[Any]", *, sock_file: str):
+        ...
+
+    def __init__(
+        self,
+        sc: Optional["SparkContext"] = None,
+        value: Optional[T] = None,
+        pickle_registry: Optional["BroadcastPickleRegistry"] = None,
+        path: Optional[str] = None,
+        sock_file: Optional[BinaryIO] = None,
+    ):
         """
         Should not be called directly by users -- use :meth:`SparkContext.broadcast`
         instead.
@@ -71,8 +115,10 @@ def __init__(self, sc=None, value=None, pickle_registry=None, path=None, sock_fi
             # we're on the driver.  We want the pickled data to end up in a file (maybe encrypted)
             f = NamedTemporaryFile(delete=False, dir=sc._temp_dir)
             self._path = f.name
-            self._sc = sc
+            self._sc: Optional["SparkContext"] = sc
+            assert sc._jvm is not None
             self._python_broadcast = sc._jvm.PythonRDD.setupBroadcast(self._path)
+            broadcast_out: Union[ChunkedStream, IO[bytes]]
             if sc._encryption_enabled:
                 # with encryption, we ask the jvm to do the encryption for us, we send it data
                 # over a socket
@@ -82,7 +128,7 @@ def __init__(self, sc=None, value=None, pickle_registry=None, path=None, sock_fi
             else:
                 # no encryption, we can just write pickled data directly to the file from python
                 broadcast_out = f
-            self.dump(value, broadcast_out)
+            self.dump(value, broadcast_out)  # type: ignore[arg-type]
             if sc._encryption_enabled:
                 self._python_broadcast.waitTillDataReceived()
             self._jbroadcast = sc._jsc.broadcast(self._python_broadcast)
@@ -102,7 +148,7 @@ def __init__(self, sc=None, value=None, pickle_registry=None, path=None, sock_fi
                 assert path is not None
                 self._path = path
 
-    def dump(self, value, f):
+    def dump(self, value: T, f: BinaryIO) -> None:
         try:
             pickle.dump(value, f, pickle_protocol)
         except pickle.PickleError:
@@ -113,11 +159,11 @@ def dump(self, value, f):
             raise pickle.PicklingError(msg)
         f.close()
 
-    def load_from_path(self, path):
+    def load_from_path(self, path: str) -> T:
         with open(path, "rb", 1 << 20) as f:
             return self.load(f)
 
-    def load(self, file):
+    def load(self, file: BinaryIO) -> T:
         # "file" could also be a socket
         gc.disable()
         try:
@@ -126,7 +172,7 @@ def load(self, file):
             gc.enable()
 
     @property
-    def value(self):
+    def value(self) -> T:
         """Return the broadcasted value"""
         if not hasattr(self, "_value") and self._path is not None:
             # we only need to decrypt it here when encryption is enabled and
@@ -140,7 +186,7 @@ def value(self):
                 self._value = self.load_from_path(self._path)
         return self._value
 
-    def unpersist(self, blocking=False):
+    def unpersist(self, blocking: bool = False) -> None:
         """
         Delete cached copies of this broadcast on the executors. If the
         broadcast is used after this is called, it will need to be
@@ -155,7 +201,7 @@ def unpersist(self, blocking=False):
             raise RuntimeError("Broadcast can only be unpersisted in driver")
         self._jbroadcast.unpersist(blocking)
 
-    def destroy(self, blocking=False):
+    def destroy(self, blocking: bool = False) -> None:
         """
         Destroy all data and metadata related to this broadcast variable.
         Use this with caution; once a broadcast variable has been destroyed,
@@ -175,9 +221,10 @@ def destroy(self, blocking=False):
         self._jbroadcast.destroy(blocking)
         os.unlink(self._path)
 
-    def __reduce__(self):
+    def __reduce__(self) -> Tuple[Callable[[int], "Broadcast[T]"], Tuple[int]]:
         if self._jbroadcast is None:
             raise RuntimeError("Broadcast can only be serialized in driver")
+        assert self._pickle_registry is not None
         self._pickle_registry.add(self)
         return _from_id, (self._jbroadcast.id(),)
 
@@ -185,17 +232,17 @@ def __reduce__(self):
 class BroadcastPickleRegistry(threading.local):
     """Thread-local registry for broadcast variables that have been pickled"""
 
-    def __init__(self):
+    def __init__(self) -> None:
         self.__dict__.setdefault("_registry", set())
 
-    def __iter__(self):
+    def __iter__(self) -> Iterator[Broadcast[Any]]:
         for bcast in self._registry:
             yield bcast
 
-    def add(self, bcast):
+    def add(self, bcast: Broadcast[Any]) -> None:
         self._registry.add(bcast)
 
-    def clear(self):
+    def clear(self) -> None:
         self._registry.clear()
 
 
diff --git a/python/pyspark/broadcast.pyi b/python/pyspark/broadcast.pyi
deleted file mode 100644
index 944cb06d4178c..0000000000000
--- a/python/pyspark/broadcast.pyi
+++ /dev/null
@@ -1,48 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import threading
-from typing import Any, Callable, Dict, Generic, Optional, Tuple, TypeVar
-
-T = TypeVar("T")
-
-_broadcastRegistry: Dict[int, Broadcast]
-
-class Broadcast(Generic[T]):
-    def __init__(
-        self,
-        sc: Optional[Any] = ...,
-        value: Optional[T] = ...,
-        pickle_registry: Optional[Any] = ...,
-        path: Optional[Any] = ...,
-        sock_file: Optional[Any] = ...,
-    ) -> None: ...
-    def dump(self, value: T, f: Any) -> None: ...
-    def load_from_path(self, path: Any) -> T: ...
-    def load(self, file: Any) -> T: ...
-    @property
-    def value(self) -> T: ...
-    def unpersist(self, blocking: bool = ...) -> None: ...
-    def destroy(self, blocking: bool = ...) -> None: ...
-    def __reduce__(self) -> Tuple[Callable[[int], T], Tuple[int]]: ...
-
-class BroadcastPickleRegistry(threading.local):
-    def __init__(self) -> None: ...
-    def __iter__(self) -> None: ...
-    def add(self, bcast: Any) -> None: ...
-    def clear(self) -> None: ...

From 8ed6144a6a599d913358206e84b726573d1f2f86 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Thu, 13 Jan 2022 18:24:19 +0900
Subject: [PATCH 005/513] [SPARK-37889][SQL] Replace Log4j2 MarkerFilter with
 RegexFilter

### What changes were proposed in this pull request?

Log4j2 MarkerFilter can not suppress

```
10:55:54.597 ERROR org.apache.thrift.server.TThreadPoolServer: Thrift error occurred during processing of message.
org.apache.thrift.transport.TTransportException: null
	at org.apache.thrift.transport.TIOStreamTransport.read(TIOStreamTransport.java:132) ~[libthrift-0.12.0.jar:0.12.0]
```

We shall use RegexFilter instead

### Why are the changes needed?

bugfix

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

run `build/sbt -Phive -Phive-thriftserver "hive-thriftserver/testOnly *ThriftServerWithSparkContextInBinarySuite"` locally and checked

Closes #35186 from yaooqinn/SPARK-37889.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 conf/log4j2.properties.template                           | 4 ++--
 .../src/test/resources/log4j2.properties                  | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/conf/log4j2.properties.template b/conf/log4j2.properties.template
index 85b4f679a93e2..99f68a8a9e98c 100644
--- a/conf/log4j2.properties.template
+++ b/conf/log4j2.properties.template
@@ -57,7 +57,7 @@ logger.FunctionRegistry.level = error
 
 # For deploying Spark ThriftServer
 # SPARK-34128: Suppress undesirable TTransportException warnings involved in THRIFT-4805
-appender.console.filter.1.type = MarkerFilter
-appender.console.filter.1.marker = Thrift error occurred during processing of message
+appender.console.filter.1.type = RegexFilter
+appender.console.filter.1.regex = .*Thrift error occurred during processing of message.*
 appender.console.filter.1.onMatch = deny
 appender.console.filter.1.onMismatch = neutral
diff --git a/sql/hive-thriftserver/src/test/resources/log4j2.properties b/sql/hive-thriftserver/src/test/resources/log4j2.properties
index 58e18af0a8e6c..939335bf3ac8d 100644
--- a/sql/hive-thriftserver/src/test/resources/log4j2.properties
+++ b/sql/hive-thriftserver/src/test/resources/log4j2.properties
@@ -33,8 +33,8 @@ appender.console.filter.1.a.type = ThresholdFilter
 appender.console.filter.1.a.level = warn
 
 # SPARK-34128: Suppress undesirable TTransportException warnings, due to THRIFT-4805
-appender.console.filter.1.b.type = MarkerFilter
-appender.console.filter.1.b.marker = Thrift error occurred during processing of message
+appender.console.filter.1.b.type = RegexFilter
+appender.console.filter.1.b.regex = .*Thrift error occurred during processing of message.*
 appender.console.filter.1.b.onMatch = deny
 appender.console.filter.1.b.onMismatch = neutral
 
@@ -47,8 +47,8 @@ appender.file.layout.pattern = %d{HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 appender.file.filter.1.type = Filters
 
-appender.file.filter.1.a.type = MarkerFilter
-appender.file.filter.1.a.marker = Thrift error occurred during processing of message
+appender.file.filter.1.a.type = RegexFilter
+appender.file.filter.1.a.regx = .*Thrift error occurred during processing of message.*
 appender.file.filter.1.a.onMatch = deny
 appender.file.filter.1.a.onMismatch = neutral
 

From c3a0fce0e4bea6ba7349355bf549a48a0816d4ef Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Thu, 13 Jan 2022 18:28:57 +0900
Subject: [PATCH 006/513] [SPARK-37879][INFRA][FOLLOWUP] Change actions to
 check-runs in update_build_status.yml

### What changes were proposed in this pull request?
Change actions to check-runs in update_build_status.yml.

### Why are the changes needed?
https://github.com/apache/spark/pull/35179#issuecomment-1011881076

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

Closes #35189 from Yikun/patch-11.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .github/workflows/update_build_status.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/update_build_status.yml b/.github/workflows/update_build_status.yml
index 671487adbfe05..4da5f62933118 100644
--- a/.github/workflows/update_build_status.yml
+++ b/.github/workflows/update_build_status.yml
@@ -65,7 +65,7 @@ jobs:
                       // Get the workflow run in the forked repository
                       let run
                       try {
-                        run = await github.request('GET /repos/{owner}/{repo}/actions/runs/{run_id}', params)
+                        run = await github.request('GET /repos/{owner}/{repo}/check-runs/{run_id}', params)
                       } catch (error) {
                         console.error(error)
                         // Run not found. This can happen when the PR author removes GitHub Actions runs or
@@ -83,7 +83,7 @@ jobs:
                           output: cr.output,
                           status: run.data.status,
                           conclusion: run.data.conclusion,
-                          details_url: run.data.details_url
+                          details_url: run.data.url
                         })
                       } else {
                         console.log('    Run ' + cr.id + ': set status (' + run.data.status + ')')
@@ -93,7 +93,7 @@ jobs:
                           check_run_id: cr.id,
                           output: cr.output,
                           status: run.data.status,
-                          details_url: run.data.details_url
+                          details_url: run.data.url
                         })
                       }
 

From 85efc85f9aa93b3fac9e591c96efa38d4414adf8 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Thu, 13 Jan 2022 19:17:47 +0900
Subject: [PATCH 007/513] Revert "[SPARK-37879][INFRA][FOLLOWUP] Change actions
 to check-runs in update_build_status.yml"

This reverts commit c3a0fce0e4bea6ba7349355bf549a48a0816d4ef.
---
 .github/workflows/update_build_status.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/update_build_status.yml b/.github/workflows/update_build_status.yml
index 4da5f62933118..671487adbfe05 100644
--- a/.github/workflows/update_build_status.yml
+++ b/.github/workflows/update_build_status.yml
@@ -65,7 +65,7 @@ jobs:
                       // Get the workflow run in the forked repository
                       let run
                       try {
-                        run = await github.request('GET /repos/{owner}/{repo}/check-runs/{run_id}', params)
+                        run = await github.request('GET /repos/{owner}/{repo}/actions/runs/{run_id}', params)
                       } catch (error) {
                         console.error(error)
                         // Run not found. This can happen when the PR author removes GitHub Actions runs or
@@ -83,7 +83,7 @@ jobs:
                           output: cr.output,
                           status: run.data.status,
                           conclusion: run.data.conclusion,
-                          details_url: run.data.url
+                          details_url: run.data.details_url
                         })
                       } else {
                         console.log('    Run ' + cr.id + ': set status (' + run.data.status + ')')
@@ -93,7 +93,7 @@ jobs:
                           check_run_id: cr.id,
                           output: cr.output,
                           status: run.data.status,
-                          details_url: run.data.url
+                          details_url: run.data.details_url
                         })
                       }
 

From 0cbc9349473176d850068b421474579f6c44ad1e Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Thu, 13 Jan 2022 19:17:54 +0900
Subject: [PATCH 008/513] Revert "[SPARK-37879][INFRA] Show test report in
 GitHub Actions builds from PRs"

This reverts commit ebd7fca02be159c120e9b1ebbea1d21b2f290eda.
---
 .github/workflows/notify_test_workflow.yml | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/notify_test_workflow.yml b/.github/workflows/notify_test_workflow.yml
index bd9147abe1f75..17d75938a802c 100644
--- a/.github/workflows/notify_test_workflow.yml
+++ b/.github/workflows/notify_test_workflow.yml
@@ -38,7 +38,7 @@ jobs:
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           script: |
-            const endpoint = 'GET /repos/:owner/:repo/commits/:ref/check-runs'
+            const endpoint = 'GET /repos/:owner/:repo/actions/workflows/:id/runs?&branch=:branch'
 
             // TODO: Should use pull_request.user and pull_request.user.repos_url?
             // If a different person creates a commit to another forked repo,
@@ -46,7 +46,8 @@ jobs:
             const params = {
               owner: context.payload.pull_request.head.repo.owner.login,
               repo: context.payload.pull_request.head.repo.name,
-              ref: context.payload.pull_request.head.ref,
+              id: 'build_and_test.yml',
+              branch: context.payload.pull_request.head.ref,
             }
 
             console.log('Ref: ' + context.payload.pull_request.head.ref)
@@ -67,7 +68,7 @@ jobs:
             const head_sha = context.payload.pull_request.head.sha
             let status = 'queued'
 
-            if (!runs || runs.data.check_runs.filter(r => r.name === "Configure jobs").length === 0) {
+            if (!runs || runs.data.workflow_runs.length === 0) {
               status = 'completed'
               const conclusion = 'action_required'
 
@@ -99,15 +100,15 @@ jobs:
                 }
               })
             } else {
-              const runID = runs.data.check_runs.filter(r => r.name === "Configure jobs")[0].id
+              const runID = runs.data.workflow_runs[0].id
 
-              if (runs.data.check_runs[0].head_sha != context.payload.pull_request.head.sha) {
+              if (runs.data.workflow_runs[0].head_sha != context.payload.pull_request.head.sha) {
                 throw new Error('There was a new unsynced commit pushed. Please retrigger the workflow.');
               }
 
               const runUrl = 'https://github.com/'
                 + context.payload.pull_request.head.repo.full_name
-                + '/runs/'
+                + '/actions/runs/'
                 + runID
 
               github.checks.create({

From 0e186e8a19926f91810f3eaf174611b71e598de6 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Thu, 13 Jan 2022 12:36:13 +0100
Subject: [PATCH 009/513] [SPARK-37686][PYTHON][SQL] Use _invoke_function
 helpers for all pyspark.sql.functions

### What changes were proposed in this pull request?

This PR proposes conversion of functions not covered by SPARK-32084 to `_invoke_functions` style.

Two new `_invoke` functions where added:

- `_invoke_function_over_columns`
- `_invoke_function_over_seq_of_columns`

to address common examples.

### Why are the changes needed?

To reduce boilerplate (especially related to type checking) and improve manageability.

Additionally, it opens opportunity for reducing driver-side invocation latency.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #34951 from zero323/SPARK-37686.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/sql/functions.py | 847 ++++++++++----------------------
 1 file changed, 256 insertions(+), 591 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index d9ba4220e93aa..f2bca0b5d0505 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -28,6 +28,7 @@
     Callable,
     Dict,
     List,
+    Iterable,
     overload,
     Optional,
     Tuple,
@@ -65,7 +66,7 @@
 # since it requires to make every single overridden definition.
 
 
-def _get_get_jvm_function(name: str, sc: SparkContext) -> Callable:
+def _get_jvm_function(name: str, sc: SparkContext) -> Callable:
     """
     Retrieves JVM function identified by name from
     Java gateway associated with sc.
@@ -80,16 +81,26 @@ def _invoke_function(name: str, *args: Any) -> Column:
     and wraps the result with :class:`~pyspark.sql.Column`.
     """
     assert SparkContext._active_spark_context is not None
-    jf = _get_get_jvm_function(name, SparkContext._active_spark_context)
+    jf = _get_jvm_function(name, SparkContext._active_spark_context)
     return Column(jf(*args))
 
 
-def _invoke_function_over_column(name: str, col: "ColumnOrName") -> Column:
+def _invoke_function_over_columns(name: str, *cols: "ColumnOrName") -> Column:
     """
-    Invokes unary JVM function identified by name
+    Invokes n-ary JVM function identified by name
     and wraps the result with :class:`~pyspark.sql.Column`.
     """
-    return _invoke_function(name, _to_java_column(col))
+    return _invoke_function(name, *(_to_java_column(col) for col in cols))
+
+
+def _invoke_function_over_seq_of_columns(name: str, cols: "Iterable[ColumnOrName]") -> Column:
+    """
+    Invokes unary JVM function identified by name with
+    and wraps the result with :class:`~pyspark.sql.Column`.
+    """
+    sc = SparkContext._active_spark_context
+    assert sc is not None and sc._jvm is not None
+    return _invoke_function(name, _to_seq(sc, cols, _to_java_column))
 
 
 def _invoke_binary_math_function(name: str, col1: Any, col2: Any) -> Column:
@@ -164,7 +175,7 @@ def sqrt(col: "ColumnOrName") -> Column:
     """
     Computes the square root of the specified float value.
     """
-    return _invoke_function_over_column("sqrt", col)
+    return _invoke_function_over_columns("sqrt", col)
 
 
 @since(1.3)
@@ -172,7 +183,7 @@ def abs(col: "ColumnOrName") -> Column:
     """
     Computes the absolute value.
     """
-    return _invoke_function_over_column("abs", col)
+    return _invoke_function_over_columns("abs", col)
 
 
 @since(1.3)
@@ -180,7 +191,7 @@ def max(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the maximum value of the expression in a group.
     """
-    return _invoke_function_over_column("max", col)
+    return _invoke_function_over_columns("max", col)
 
 
 @since(1.3)
@@ -188,7 +199,7 @@ def min(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the minimum value of the expression in a group.
     """
-    return _invoke_function_over_column("min", col)
+    return _invoke_function_over_columns("min", col)
 
 
 def max_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column:
@@ -223,7 +234,7 @@ def max_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column:
     |dotNET|                  2013|
     +------+----------------------+
     """
-    return _invoke_function("max_by", _to_java_column(col), _to_java_column(ord))
+    return _invoke_function_over_columns("max_by", col, ord)
 
 
 def min_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column:
@@ -258,7 +269,7 @@ def min_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column:
     |dotNET|                  2012|
     +------+----------------------+
     """
-    return _invoke_function("min_by", _to_java_column(col), _to_java_column(ord))
+    return _invoke_function_over_columns("min_by", col, ord)
 
 
 @since(1.3)
@@ -266,7 +277,7 @@ def count(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the number of items in a group.
     """
-    return _invoke_function_over_column("count", col)
+    return _invoke_function_over_columns("count", col)
 
 
 @since(1.3)
@@ -274,7 +285,7 @@ def sum(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the sum of all values in the expression.
     """
-    return _invoke_function_over_column("sum", col)
+    return _invoke_function_over_columns("sum", col)
 
 
 @since(1.3)
@@ -282,7 +293,7 @@ def avg(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the average of the values in a group.
     """
-    return _invoke_function_over_column("avg", col)
+    return _invoke_function_over_columns("avg", col)
 
 
 @since(1.3)
@@ -290,7 +301,7 @@ def mean(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the average of the values in a group.
     """
-    return _invoke_function_over_column("mean", col)
+    return _invoke_function_over_columns("mean", col)
 
 
 @since(1.3)
@@ -310,7 +321,7 @@ def sum_distinct(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the sum of distinct values in the expression.
     """
-    return _invoke_function_over_column("sum_distinct", col)
+    return _invoke_function_over_columns("sum_distinct", col)
 
 
 def product(col: "ColumnOrName") -> Column:
@@ -338,7 +349,7 @@ def product(col: "ColumnOrName") -> Column:
     +----+-------+
 
     """
-    return _invoke_function_over_column("product", col)
+    return _invoke_function_over_columns("product", col)
 
 
 def acos(col: "ColumnOrName") -> Column:
@@ -352,7 +363,7 @@ def acos(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         inverse cosine of `col`, as if computed by `java.lang.Math.acos()`
     """
-    return _invoke_function_over_column("acos", col)
+    return _invoke_function_over_columns("acos", col)
 
 
 def acosh(col: "ColumnOrName") -> Column:
@@ -365,7 +376,7 @@ def acosh(col: "ColumnOrName") -> Column:
     -------
     :class:`~pyspark.sql.Column`
     """
-    return _invoke_function_over_column("acosh", col)
+    return _invoke_function_over_columns("acosh", col)
 
 
 def asin(col: "ColumnOrName") -> Column:
@@ -380,7 +391,7 @@ def asin(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         inverse sine of `col`, as if computed by `java.lang.Math.asin()`
     """
-    return _invoke_function_over_column("asin", col)
+    return _invoke_function_over_columns("asin", col)
 
 
 def asinh(col: "ColumnOrName") -> Column:
@@ -393,7 +404,7 @@ def asinh(col: "ColumnOrName") -> Column:
     -------
     :class:`~pyspark.sql.Column`
     """
-    return _invoke_function_over_column("asinh", col)
+    return _invoke_function_over_columns("asinh", col)
 
 
 def atan(col: "ColumnOrName") -> Column:
@@ -407,7 +418,7 @@ def atan(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         inverse tangent of `col`, as if computed by `java.lang.Math.atan()`
     """
-    return _invoke_function_over_column("atan", col)
+    return _invoke_function_over_columns("atan", col)
 
 
 def atanh(col: "ColumnOrName") -> Column:
@@ -420,7 +431,7 @@ def atanh(col: "ColumnOrName") -> Column:
     -------
     :class:`~pyspark.sql.Column`
     """
-    return _invoke_function_over_column("atanh", col)
+    return _invoke_function_over_columns("atanh", col)
 
 
 @since(1.4)
@@ -428,7 +439,7 @@ def cbrt(col: "ColumnOrName") -> Column:
     """
     Computes the cube-root of the given value.
     """
-    return _invoke_function_over_column("cbrt", col)
+    return _invoke_function_over_columns("cbrt", col)
 
 
 @since(1.4)
@@ -436,7 +447,7 @@ def ceil(col: "ColumnOrName") -> Column:
     """
     Computes the ceiling of the given value.
     """
-    return _invoke_function_over_column("ceil", col)
+    return _invoke_function_over_columns("ceil", col)
 
 
 def cos(col: "ColumnOrName") -> Column:
@@ -455,7 +466,7 @@ def cos(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         cosine of the angle, as if computed by `java.lang.Math.cos()`.
     """
-    return _invoke_function_over_column("cos", col)
+    return _invoke_function_over_columns("cos", col)
 
 
 def cosh(col: "ColumnOrName") -> Column:
@@ -474,7 +485,7 @@ def cosh(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         hyperbolic cosine of the angle, as if computed by `java.lang.Math.cosh()`
     """
-    return _invoke_function_over_column("cosh", col)
+    return _invoke_function_over_columns("cosh", col)
 
 
 def cot(col: "ColumnOrName") -> Column:
@@ -493,7 +504,7 @@ def cot(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         Cotangent of the angle.
     """
-    return _invoke_function_over_column("cot", col)
+    return _invoke_function_over_columns("cot", col)
 
 
 def csc(col: "ColumnOrName") -> Column:
@@ -512,7 +523,7 @@ def csc(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         Cosecant of the angle.
     """
-    return _invoke_function_over_column("csc", col)
+    return _invoke_function_over_columns("csc", col)
 
 
 @since(1.4)
@@ -520,7 +531,7 @@ def exp(col: "ColumnOrName") -> Column:
     """
     Computes the exponential of the given value.
     """
-    return _invoke_function_over_column("exp", col)
+    return _invoke_function_over_columns("exp", col)
 
 
 @since(1.4)
@@ -528,7 +539,7 @@ def expm1(col: "ColumnOrName") -> Column:
     """
     Computes the exponential of the given value minus one.
     """
-    return _invoke_function_over_column("expm1", col)
+    return _invoke_function_over_columns("expm1", col)
 
 
 @since(1.4)
@@ -536,7 +547,7 @@ def floor(col: "ColumnOrName") -> Column:
     """
     Computes the floor of the given value.
     """
-    return _invoke_function_over_column("floor", col)
+    return _invoke_function_over_columns("floor", col)
 
 
 @since(1.4)
@@ -544,7 +555,7 @@ def log(col: "ColumnOrName") -> Column:
     """
     Computes the natural logarithm of the given value.
     """
-    return _invoke_function_over_column("log", col)
+    return _invoke_function_over_columns("log", col)
 
 
 @since(1.4)
@@ -552,7 +563,7 @@ def log10(col: "ColumnOrName") -> Column:
     """
     Computes the logarithm of the given value in Base 10.
     """
-    return _invoke_function_over_column("log10", col)
+    return _invoke_function_over_columns("log10", col)
 
 
 @since(1.4)
@@ -560,7 +571,7 @@ def log1p(col: "ColumnOrName") -> Column:
     """
     Computes the natural logarithm of the given value plus one.
     """
-    return _invoke_function_over_column("log1p", col)
+    return _invoke_function_over_columns("log1p", col)
 
 
 @since(1.4)
@@ -569,7 +580,7 @@ def rint(col: "ColumnOrName") -> Column:
     Returns the double value that is closest in value to the argument and
     is equal to a mathematical integer.
     """
-    return _invoke_function_over_column("rint", col)
+    return _invoke_function_over_columns("rint", col)
 
 
 def sec(col: "ColumnOrName") -> Column:
@@ -588,7 +599,7 @@ def sec(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         Secant of the angle.
     """
-    return _invoke_function_over_column("sec", col)
+    return _invoke_function_over_columns("sec", col)
 
 
 @since(1.4)
@@ -596,7 +607,7 @@ def signum(col: "ColumnOrName") -> Column:
     """
     Computes the signum of the given value.
     """
-    return _invoke_function_over_column("signum", col)
+    return _invoke_function_over_columns("signum", col)
 
 
 def sin(col: "ColumnOrName") -> Column:
@@ -614,7 +625,7 @@ def sin(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         sine of the angle, as if computed by `java.lang.Math.sin()`
     """
-    return _invoke_function_over_column("sin", col)
+    return _invoke_function_over_columns("sin", col)
 
 
 def sinh(col: "ColumnOrName") -> Column:
@@ -634,7 +645,7 @@ def sinh(col: "ColumnOrName") -> Column:
         hyperbolic sine of the given value,
         as if computed by `java.lang.Math.sinh()`
     """
-    return _invoke_function_over_column("sinh", col)
+    return _invoke_function_over_columns("sinh", col)
 
 
 def tan(col: "ColumnOrName") -> Column:
@@ -653,7 +664,7 @@ def tan(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         tangent of the given value, as if computed by `java.lang.Math.tan()`
     """
-    return _invoke_function_over_column("tan", col)
+    return _invoke_function_over_columns("tan", col)
 
 
 def tanh(col: "ColumnOrName") -> Column:
@@ -673,7 +684,7 @@ def tanh(col: "ColumnOrName") -> Column:
         hyperbolic tangent of the given value
         as if computed by `java.lang.Math.tanh()`
     """
-    return _invoke_function_over_column("tanh", col)
+    return _invoke_function_over_columns("tanh", col)
 
 
 @since(1.4)
@@ -713,7 +724,7 @@ def bitwise_not(col: "ColumnOrName") -> Column:
     """
     Computes bitwise not.
     """
-    return _invoke_function_over_column("bitwise_not", col)
+    return _invoke_function_over_columns("bitwise_not", col)
 
 
 @since(2.4)
@@ -771,7 +782,7 @@ def stddev(col: "ColumnOrName") -> Column:
     """
     Aggregate function: alias for stddev_samp.
     """
-    return _invoke_function_over_column("stddev", col)
+    return _invoke_function_over_columns("stddev", col)
 
 
 @since(1.6)
@@ -780,7 +791,7 @@ def stddev_samp(col: "ColumnOrName") -> Column:
     Aggregate function: returns the unbiased sample standard deviation of
     the expression in a group.
     """
-    return _invoke_function_over_column("stddev_samp", col)
+    return _invoke_function_over_columns("stddev_samp", col)
 
 
 @since(1.6)
@@ -789,7 +800,7 @@ def stddev_pop(col: "ColumnOrName") -> Column:
     Aggregate function: returns population standard deviation of
     the expression in a group.
     """
-    return _invoke_function_over_column("stddev_pop", col)
+    return _invoke_function_over_columns("stddev_pop", col)
 
 
 @since(1.6)
@@ -797,7 +808,7 @@ def variance(col: "ColumnOrName") -> Column:
     """
     Aggregate function: alias for var_samp
     """
-    return _invoke_function_over_column("variance", col)
+    return _invoke_function_over_columns("variance", col)
 
 
 @since(1.6)
@@ -806,7 +817,7 @@ def var_samp(col: "ColumnOrName") -> Column:
     Aggregate function: returns the unbiased sample variance of
     the values in a group.
     """
-    return _invoke_function_over_column("var_samp", col)
+    return _invoke_function_over_columns("var_samp", col)
 
 
 @since(1.6)
@@ -814,7 +825,7 @@ def var_pop(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the population variance of the values in a group.
     """
-    return _invoke_function_over_column("var_pop", col)
+    return _invoke_function_over_columns("var_pop", col)
 
 
 @since(1.6)
@@ -822,7 +833,7 @@ def skewness(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the skewness of the values in a group.
     """
-    return _invoke_function_over_column("skewness", col)
+    return _invoke_function_over_columns("skewness", col)
 
 
 @since(1.6)
@@ -830,7 +841,7 @@ def kurtosis(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the kurtosis of the values in a group.
     """
-    return _invoke_function_over_column("kurtosis", col)
+    return _invoke_function_over_columns("kurtosis", col)
 
 
 def collect_list(col: "ColumnOrName") -> Column:
@@ -850,7 +861,7 @@ def collect_list(col: "ColumnOrName") -> Column:
     >>> df2.agg(collect_list('age')).collect()
     [Row(collect_list(age)=[2, 5, 5])]
     """
-    return _invoke_function_over_column("collect_list", col)
+    return _invoke_function_over_columns("collect_list", col)
 
 
 def collect_set(col: "ColumnOrName") -> Column:
@@ -870,7 +881,7 @@ def collect_set(col: "ColumnOrName") -> Column:
     >>> df2.agg(array_sort(collect_set('age')).alias('c')).collect()
     [Row(c=[2, 5])]
     """
-    return _invoke_function_over_column("collect_set", col)
+    return _invoke_function_over_columns("collect_set", col)
 
 
 def degrees(col: "ColumnOrName") -> Column:
@@ -890,7 +901,7 @@ def degrees(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         angle in degrees, as if computed by `java.lang.Math.toDegrees()`
     """
-    return _invoke_function_over_column("degrees", col)
+    return _invoke_function_over_columns("degrees", col)
 
 
 def radians(col: "ColumnOrName") -> Column:
@@ -910,7 +921,7 @@ def radians(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         angle in radians, as if computed by `java.lang.Math.toRadians()`
     """
-    return _invoke_function_over_column("radians", col)
+    return _invoke_function_over_columns("radians", col)
 
 
 @overload
@@ -1082,13 +1093,10 @@ def approx_count_distinct(col: "ColumnOrName", rsd: Optional[float] = None) -> C
     >>> df.agg(approx_count_distinct(df.age).alias('distinct_ages')).collect()
     [Row(distinct_ages=2)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     if rsd is None:
-        jc = sc._jvm.functions.approx_count_distinct(_to_java_column(col))
+        return _invoke_function_over_columns("approx_count_distinct", col)
     else:
-        jc = sc._jvm.functions.approx_count_distinct(_to_java_column(col), rsd)
-    return Column(jc)
+        return _invoke_function("approx_count_distinct", _to_java_column(col), rsd)
 
 
 @since(1.6)
@@ -1135,10 +1143,7 @@ def coalesce(*cols: "ColumnOrName") -> Column:
     |null|   2|             0.0|
     +----+----+----------------+
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.coalesce(_to_seq(sc, cols, _to_java_column))
-    return Column(jc)
+    return _invoke_function_over_seq_of_columns("coalesce", cols)
 
 
 def corr(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
@@ -1155,9 +1160,7 @@ def corr(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     >>> df.agg(corr("a", "b").alias('c')).collect()
     [Row(c=1.0)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.corr(_to_java_column(col1), _to_java_column(col2)))
+    return _invoke_function_over_columns("corr", col1, col2)
 
 
 def covar_pop(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
@@ -1174,9 +1177,7 @@ def covar_pop(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     >>> df.agg(covar_pop("a", "b").alias('c')).collect()
     [Row(c=0.0)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.covar_pop(_to_java_column(col1), _to_java_column(col2)))
+    return _invoke_function_over_columns("covar_pop", col1, col2)
 
 
 def covar_samp(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
@@ -1193,9 +1194,7 @@ def covar_samp(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     >>> df.agg(covar_samp("a", "b").alias('c')).collect()
     [Row(c=0.0)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.covar_samp(_to_java_column(col1), _to_java_column(col2)))
+    return _invoke_function_over_columns("covar_samp", col1, col2)
 
 
 def countDistinct(col: "ColumnOrName", *cols: "ColumnOrName") -> Column:
@@ -1224,8 +1223,9 @@ def count_distinct(col: "ColumnOrName", *cols: "ColumnOrName") -> Column:
     """
     sc = SparkContext._active_spark_context
     assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.count_distinct(_to_java_column(col), _to_seq(sc, cols, _to_java_column))
-    return Column(jc)
+    return _invoke_function(
+        "count_distinct", _to_java_column(col), _to_seq(sc, cols, _to_java_column)
+    )
 
 
 def first(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
@@ -1241,10 +1241,7 @@ def first(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
     The function is non-deterministic because its results depends on the order of the
     rows which may be non-deterministic after a shuffle.
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.first(_to_java_column(col), ignorenulls)
-    return Column(jc)
+    return _invoke_function("first", _to_java_column(col), ignorenulls)
 
 
 def grouping(col: "ColumnOrName") -> Column:
@@ -1265,10 +1262,7 @@ def grouping(col: "ColumnOrName") -> Column:
     |  Bob|             0|       5|
     +-----+--------------+--------+
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.grouping(_to_java_column(col))
-    return Column(jc)
+    return _invoke_function_over_columns("grouping", col)
 
 
 def grouping_id(*cols: "ColumnOrName") -> Column:
@@ -1295,18 +1289,13 @@ def grouping_id(*cols: "ColumnOrName") -> Column:
     |  Bob|            0|       5|
     +-----+-------------+--------+
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.grouping_id(_to_seq(sc, cols, _to_java_column))
-    return Column(jc)
+    return _invoke_function_over_seq_of_columns("grouping_id", cols)
 
 
 @since(1.6)
 def input_file_name() -> Column:
     """Creates a string column for the file name of the current Spark task."""
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.input_file_name())
+    return _invoke_function("input_file_name")
 
 
 def isnan(col: "ColumnOrName") -> Column:
@@ -1320,9 +1309,7 @@ def isnan(col: "ColumnOrName") -> Column:
     >>> df.select(isnan("a").alias("r1"), isnan(df.a).alias("r2")).collect()
     [Row(r1=False, r2=False), Row(r1=True, r2=True)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.isnan(_to_java_column(col)))
+    return _invoke_function_over_columns("isnan", col)
 
 
 def isnull(col: "ColumnOrName") -> Column:
@@ -1336,9 +1323,7 @@ def isnull(col: "ColumnOrName") -> Column:
     >>> df.select(isnull("a").alias("r1"), isnull(df.a).alias("r2")).collect()
     [Row(r1=False, r2=False), Row(r1=True, r2=True)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.isnull(_to_java_column(col)))
+    return _invoke_function_over_columns("isnull", col)
 
 
 def last(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
@@ -1354,10 +1339,7 @@ def last(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
     The function is non-deterministic because its results depends on the order of the
     rows which may be non-deterministic after a shuffle.
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.last(_to_java_column(col), ignorenulls)
-    return Column(jc)
+    return _invoke_function("last", _to_java_column(col), ignorenulls)
 
 
 def monotonically_increasing_id() -> Column:
@@ -1382,9 +1364,7 @@ def monotonically_increasing_id() -> Column:
     >>> df0.select(monotonically_increasing_id().alias('id')).collect()
     [Row(id=0), Row(id=1), Row(id=2), Row(id=8589934592), Row(id=8589934593), Row(id=8589934594)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.monotonically_increasing_id())
+    return _invoke_function("monotonically_increasing_id")
 
 
 def nanvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
@@ -1400,9 +1380,7 @@ def nanvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     >>> df.select(nanvl("a", "b").alias("r1"), nanvl(df.a, df.b).alias("r2")).collect()
     [Row(r1=1.0, r2=1.0), Row(r1=2.0, r2=2.0)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.nanvl(_to_java_column(col1), _to_java_column(col2)))
+    return _invoke_function_over_columns("nanvl", col1, col2)
 
 
 def percentile_approx(
@@ -1450,9 +1428,9 @@ def percentile_approx(
 
     if isinstance(percentage, (list, tuple)):
         # A local list
-        percentage = sc._jvm.functions.array(
-            _to_seq(sc, [_create_column_from_literal(x) for x in percentage])
-        )
+        percentage = _invoke_function(
+            "array", _to_seq(sc, [_create_column_from_literal(x) for x in percentage])
+        )._jc
     elif isinstance(percentage, Column):
         # Already a Column
         percentage = _to_java_column(percentage)
@@ -1466,7 +1444,7 @@ def percentile_approx(
         else _create_column_from_literal(accuracy)
     )
 
-    return Column(sc._jvm.functions.percentile_approx(_to_java_column(col), percentage, accuracy))
+    return _invoke_function("percentile_approx", _to_java_column(col), percentage, accuracy)
 
 
 def rand(seed: Optional[int] = None) -> Column:
@@ -1485,13 +1463,10 @@ def rand(seed: Optional[int] = None) -> Column:
     [Row(age=2, name='Alice', rand=2.4052597283576684),
      Row(age=5, name='Bob', rand=2.3913904055683974)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     if seed is not None:
-        jc = sc._jvm.functions.rand(seed)
+        return _invoke_function("rand", seed)
     else:
-        jc = sc._jvm.functions.rand()
-    return Column(jc)
+        return _invoke_function("rand")
 
 
 def randn(seed: Optional[int] = None) -> Column:
@@ -1510,13 +1485,10 @@ def randn(seed: Optional[int] = None) -> Column:
     [Row(age=2, name='Alice', randn=1.1027054481455365),
     Row(age=5, name='Bob', randn=0.7400395449950132)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     if seed is not None:
-        jc = sc._jvm.functions.randn(seed)
+        return _invoke_function("randn", seed)
     else:
-        jc = sc._jvm.functions.randn()
-    return Column(jc)
+        return _invoke_function("randn")
 
 
 def round(col: "ColumnOrName", scale: int = 0) -> Column:
@@ -1531,9 +1503,7 @@ def round(col: "ColumnOrName", scale: int = 0) -> Column:
     >>> spark.createDataFrame([(2.5,)], ['a']).select(round('a', 0).alias('r')).collect()
     [Row(r=3.0)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.round(_to_java_column(col), scale))
+    return _invoke_function("round", _to_java_column(col), scale)
 
 
 def bround(col: "ColumnOrName", scale: int = 0) -> Column:
@@ -1548,9 +1518,7 @@ def bround(col: "ColumnOrName", scale: int = 0) -> Column:
     >>> spark.createDataFrame([(2.5,)], ['a']).select(bround('a', 0).alias('r')).collect()
     [Row(r=2.0)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.bround(_to_java_column(col), scale))
+    return _invoke_function("bround", _to_java_column(col), scale)
 
 
 def shiftLeft(col: "ColumnOrName", numBits: int) -> Column:
@@ -1575,9 +1543,7 @@ def shiftleft(col: "ColumnOrName", numBits: int) -> Column:
     >>> spark.createDataFrame([(21,)], ['a']).select(shiftleft('a', 1).alias('r')).collect()
     [Row(r=42)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.shiftleft(_to_java_column(col), numBits))
+    return _invoke_function("shiftleft", _to_java_column(col), numBits)
 
 
 def shiftRight(col: "ColumnOrName", numBits: int) -> Column:
@@ -1602,10 +1568,7 @@ def shiftright(col: "ColumnOrName", numBits: int) -> Column:
     >>> spark.createDataFrame([(42,)], ['a']).select(shiftright('a', 1).alias('r')).collect()
     [Row(r=21)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.shiftRight(_to_java_column(col), numBits)
-    return Column(jc)
+    return _invoke_function("shiftright", _to_java_column(col), numBits)
 
 
 def shiftRightUnsigned(col: "ColumnOrName", numBits: int) -> Column:
@@ -1631,10 +1594,7 @@ def shiftrightunsigned(col: "ColumnOrName", numBits: int) -> Column:
     >>> df.select(shiftrightunsigned('a', 1).alias('r')).collect()
     [Row(r=9223372036854775787)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.shiftRightUnsigned(_to_java_column(col), numBits)
-    return Column(jc)
+    return _invoke_function("shiftrightunsigned", _to_java_column(col), numBits)
 
 
 def spark_partition_id() -> Column:
@@ -1651,9 +1611,7 @@ def spark_partition_id() -> Column:
     >>> df.repartition(1).select(spark_partition_id().alias("pid")).collect()
     [Row(pid=0), Row(pid=0)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.spark_partition_id())
+    return _invoke_function("spark_partition_id")
 
 
 def expr(str: str) -> Column:
@@ -1666,9 +1624,7 @@ def expr(str: str) -> Column:
     >>> df.select(expr("length(name)")).collect()
     [Row(length(name)=5), Row(length(name)=3)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.expr(str))
+    return _invoke_function("expr", str)
 
 
 @overload
@@ -1700,12 +1656,9 @@ def struct(
     >>> df.select(struct([df.age, df.name]).alias("struct")).collect()
     [Row(struct=Row(age=2, name='Alice')), Row(struct=Row(age=5, name='Bob'))]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     if len(cols) == 1 and isinstance(cols[0], (list, set)):
         cols = cols[0]  # type: ignore[assignment]
-    jc = sc._jvm.functions.struct(_to_seq(sc, cols, _to_java_column))  # type: ignore[arg-type]
-    return Column(jc)
+    return _invoke_function_over_seq_of_columns("struct", cols)  # type: ignore[arg-type]
 
 
 def greatest(*cols: "ColumnOrName") -> Column:
@@ -1723,9 +1676,7 @@ def greatest(*cols: "ColumnOrName") -> Column:
     """
     if len(cols) < 2:
         raise ValueError("greatest should take at least two columns")
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.greatest(_to_seq(sc, cols, _to_java_column)))
+    return _invoke_function_over_seq_of_columns("greatest", cols)
 
 
 def least(*cols: "ColumnOrName") -> Column:
@@ -1748,9 +1699,7 @@ def least(*cols: "ColumnOrName") -> Column:
     """
     if len(cols) < 2:
         raise ValueError("least should take at least two columns")
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.least(_to_seq(sc, cols, _to_java_column)))
+    return _invoke_function_over_seq_of_columns("least", cols)
 
 
 def when(condition: Column, value: Any) -> Column:
@@ -1773,15 +1722,12 @@ def when(condition: Column, value: Any) -> Column:
     >>> df.select(when(df.age == 2, df.age + 1).alias("age")).collect()
     [Row(age=3), Row(age=None)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-
     # Explicitly not using ColumnOrName type here to make reading condition less opaque
     if not isinstance(condition, Column):
         raise TypeError("condition should be a Column")
     v = value._jc if isinstance(value, Column) else value
-    jc = sc._jvm.functions.when(condition._jc, v)
-    return Column(jc)
+
+    return _invoke_function("when", condition._jc, v)
 
 
 @overload  # type: ignore[no-redef]
@@ -1809,13 +1755,10 @@ def log(arg1: Union["ColumnOrName", float], arg2: Optional["ColumnOrName"] = Non
     >>> df.select(log(df.age).alias('e')).rdd.map(lambda l: str(l.e)[:7]).collect()
     ['0.69314', '1.60943']
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     if arg2 is None:
-        jc = sc._jvm.functions.log(_to_java_column(cast("ColumnOrName", arg1)))
+        return _invoke_function_over_columns("log", cast("ColumnOrName", arg1))
     else:
-        jc = sc._jvm.functions.log(arg1, _to_java_column(arg2))
-    return Column(jc)
+        return _invoke_function("log", arg1, _to_java_column(arg2))
 
 
 def log2(col: "ColumnOrName") -> Column:
@@ -1828,9 +1771,7 @@ def log2(col: "ColumnOrName") -> Column:
     >>> spark.createDataFrame([(4,)], ['a']).select(log2('a').alias('log2')).collect()
     [Row(log2=2.0)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.log2(_to_java_column(col)))
+    return _invoke_function_over_columns("log2", col)
 
 
 def conv(col: "ColumnOrName", fromBase: int, toBase: int) -> Column:
@@ -1845,9 +1786,7 @@ def conv(col: "ColumnOrName", fromBase: int, toBase: int) -> Column:
     >>> df.select(conv(df.n, 2, 16).alias('hex')).collect()
     [Row(hex='15')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.conv(_to_java_column(col), fromBase, toBase))
+    return _invoke_function("conv", _to_java_column(col), fromBase, toBase)
 
 
 def factorial(col: "ColumnOrName") -> Column:
@@ -1862,9 +1801,7 @@ def factorial(col: "ColumnOrName") -> Column:
     >>> df.select(factorial(df.n).alias('f')).collect()
     [Row(f=120)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.factorial(_to_java_column(col)))
+    return _invoke_function_over_columns("factorial", col)
 
 
 # ---------------  Window functions ------------------------
@@ -1889,9 +1826,7 @@ def lag(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) ->
     default : optional
         default value
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.lag(_to_java_column(col), offset, default))
+    return _invoke_function("lag", _to_java_column(col), offset, default)
 
 
 def lead(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) -> Column:
@@ -1913,9 +1848,7 @@ def lead(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) ->
     default : optional
         default value
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.lead(_to_java_column(col), offset, default))
+    return _invoke_function("lead", _to_java_column(col), offset, default)
 
 
 def nth_value(col: "ColumnOrName", offset: int, ignoreNulls: Optional[bool] = False) -> Column:
@@ -1940,9 +1873,7 @@ def nth_value(col: "ColumnOrName", offset: int, ignoreNulls: Optional[bool] = Fa
         indicates the Nth value should skip null in the
         determination of which row to use
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.nth_value(_to_java_column(col), offset, ignoreNulls))
+    return _invoke_function("nth_value", _to_java_column(col), offset, ignoreNulls)
 
 
 def ntile(n: int) -> Column:
@@ -1961,9 +1892,7 @@ def ntile(n: int) -> Column:
     n : int
         an integer
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.ntile(int(n)))
+    return _invoke_function("ntile", int(n))
 
 
 # ---------------------- Date/Timestamp functions ------------------------------
@@ -1975,9 +1904,7 @@ def current_date() -> Column:
     Returns the current date at the start of query evaluation as a :class:`DateType` column.
     All calls of current_date within the same query return the same value.
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.current_date())
+    return _invoke_function("current_date")
 
 
 def current_timestamp() -> Column:
@@ -1985,9 +1912,7 @@ def current_timestamp() -> Column:
     Returns the current timestamp at the start of query evaluation as a :class:`TimestampType`
     column. All calls of current_timestamp within the same query return the same value.
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.current_timestamp())
+    return _invoke_function("current_timestamp")
 
 
 def date_format(date: "ColumnOrName", format: str) -> Column:
@@ -2012,9 +1937,7 @@ def date_format(date: "ColumnOrName", format: str) -> Column:
     >>> df.select(date_format('dt', 'MM/dd/yyy').alias('date')).collect()
     [Row(date='04/08/2015')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.date_format(_to_java_column(date), format))
+    return _invoke_function("date_format", _to_java_column(date), format)
 
 
 def year(col: "ColumnOrName") -> Column:
@@ -2029,9 +1952,7 @@ def year(col: "ColumnOrName") -> Column:
     >>> df.select(year('dt').alias('year')).collect()
     [Row(year=2015)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.year(_to_java_column(col)))
+    return _invoke_function_over_columns("year", col)
 
 
 def quarter(col: "ColumnOrName") -> Column:
@@ -2046,9 +1967,7 @@ def quarter(col: "ColumnOrName") -> Column:
     >>> df.select(quarter('dt').alias('quarter')).collect()
     [Row(quarter=2)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.quarter(_to_java_column(col)))
+    return _invoke_function_over_columns("quarter", col)
 
 
 def month(col: "ColumnOrName") -> Column:
@@ -2063,9 +1982,7 @@ def month(col: "ColumnOrName") -> Column:
     >>> df.select(month('dt').alias('month')).collect()
     [Row(month=4)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.month(_to_java_column(col)))
+    return _invoke_function_over_columns("month", col)
 
 
 def dayofweek(col: "ColumnOrName") -> Column:
@@ -2081,9 +1998,7 @@ def dayofweek(col: "ColumnOrName") -> Column:
     >>> df.select(dayofweek('dt').alias('day')).collect()
     [Row(day=4)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.dayofweek(_to_java_column(col)))
+    return _invoke_function_over_columns("dayofweek", col)
 
 
 def dayofmonth(col: "ColumnOrName") -> Column:
@@ -2098,9 +2013,7 @@ def dayofmonth(col: "ColumnOrName") -> Column:
     >>> df.select(dayofmonth('dt').alias('day')).collect()
     [Row(day=8)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.dayofmonth(_to_java_column(col)))
+    return _invoke_function_over_columns("dayofmonth", col)
 
 
 def dayofyear(col: "ColumnOrName") -> Column:
@@ -2115,9 +2028,7 @@ def dayofyear(col: "ColumnOrName") -> Column:
     >>> df.select(dayofyear('dt').alias('day')).collect()
     [Row(day=98)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.dayofyear(_to_java_column(col)))
+    return _invoke_function_over_columns("dayofyear", col)
 
 
 def hour(col: "ColumnOrName") -> Column:
@@ -2132,9 +2043,7 @@ def hour(col: "ColumnOrName") -> Column:
     >>> df.select(hour('ts').alias('hour')).collect()
     [Row(hour=13)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.hour(_to_java_column(col)))
+    return _invoke_function_over_columns("hour", col)
 
 
 def minute(col: "ColumnOrName") -> Column:
@@ -2149,9 +2058,7 @@ def minute(col: "ColumnOrName") -> Column:
     >>> df.select(minute('ts').alias('minute')).collect()
     [Row(minute=8)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.minute(_to_java_column(col)))
+    return _invoke_function_over_columns("minute", col)
 
 
 def second(col: "ColumnOrName") -> Column:
@@ -2166,9 +2073,7 @@ def second(col: "ColumnOrName") -> Column:
     >>> df.select(second('ts').alias('second')).collect()
     [Row(second=15)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.second(_to_java_column(col)))
+    return _invoke_function_over_columns("second", col)
 
 
 def weekofyear(col: "ColumnOrName") -> Column:
@@ -2185,9 +2090,7 @@ def weekofyear(col: "ColumnOrName") -> Column:
     >>> df.select(weekofyear(df.dt).alias('week')).collect()
     [Row(week=15)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.weekofyear(_to_java_column(col)))
+    return _invoke_function_over_columns("weekofyear", col)
 
 
 def make_date(year: "ColumnOrName", month: "ColumnOrName", day: "ColumnOrName") -> Column:
@@ -2211,13 +2114,7 @@ def make_date(year: "ColumnOrName", month: "ColumnOrName", day: "ColumnOrName")
     >>> df.select(make_date(df.Y, df.M, df.D).alias("datefield")).collect()
     [Row(datefield=datetime.date(2020, 6, 26))]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    year_col = _to_java_column(year)
-    month_col = _to_java_column(month)
-    day_col = _to_java_column(day)
-    jc = sc._jvm.functions.make_date(year_col, month_col, day_col)
-    return Column(jc)
+    return _invoke_function_over_columns("make_date", year, month, day)
 
 
 def date_add(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
@@ -2234,12 +2131,8 @@ def date_add(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
     >>> df.select(date_add(df.dt, df.add.cast('integer')).alias('next_date')).collect()
     [Row(next_date=datetime.date(2015, 4, 10))]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-
     days = lit(days) if isinstance(days, int) else days
-
-    return Column(sc._jvm.functions.date_add(_to_java_column(start), _to_java_column(days)))
+    return _invoke_function_over_columns("date_add", start, days)
 
 
 def date_sub(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
@@ -2256,12 +2149,8 @@ def date_sub(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
     >>> df.select(date_sub(df.dt, df.sub.cast('integer')).alias('prev_date')).collect()
     [Row(prev_date=datetime.date(2015, 4, 6))]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-
     days = lit(days) if isinstance(days, int) else days
-
-    return Column(sc._jvm.functions.date_sub(_to_java_column(start), _to_java_column(days)))
+    return _invoke_function_over_columns("date_sub", start, days)
 
 
 def datediff(end: "ColumnOrName", start: "ColumnOrName") -> Column:
@@ -2276,9 +2165,7 @@ def datediff(end: "ColumnOrName", start: "ColumnOrName") -> Column:
     >>> df.select(datediff(df.d2, df.d1).alias('diff')).collect()
     [Row(diff=32)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.datediff(_to_java_column(end), _to_java_column(start)))
+    return _invoke_function_over_columns("datediff", end, start)
 
 
 def add_months(start: "ColumnOrName", months: Union["ColumnOrName", int]) -> Column:
@@ -2295,12 +2182,8 @@ def add_months(start: "ColumnOrName", months: Union["ColumnOrName", int]) -> Col
     >>> df.select(add_months(df.dt, df.add.cast('integer')).alias('next_month')).collect()
     [Row(next_month=datetime.date(2015, 6, 8))]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-
     months = lit(months) if isinstance(months, int) else months
-
-    return Column(sc._jvm.functions.add_months(_to_java_column(start), _to_java_column(months)))
+    return _invoke_function_over_columns("add_months", start, months)
 
 
 def months_between(date1: "ColumnOrName", date2: "ColumnOrName", roundOff: bool = True) -> Column:
@@ -2321,10 +2204,8 @@ def months_between(date1: "ColumnOrName", date2: "ColumnOrName", roundOff: bool
     >>> df.select(months_between(df.date1, df.date2, False).alias('months')).collect()
     [Row(months=3.9495967741935485)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(
-        sc._jvm.functions.months_between(_to_java_column(date1), _to_java_column(date2), roundOff)
+    return _invoke_function(
+        "months_between", _to_java_column(date1), _to_java_column(date2), roundOff
     )
 
 
@@ -2348,13 +2229,10 @@ def to_date(col: "ColumnOrName", format: Optional[str] = None) -> Column:
     >>> df.select(to_date(df.t, 'yyyy-MM-dd HH:mm:ss').alias('date')).collect()
     [Row(date=datetime.date(1997, 2, 28))]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     if format is None:
-        jc = sc._jvm.functions.to_date(_to_java_column(col))
+        return _invoke_function_over_columns("to_date", col)
     else:
-        jc = sc._jvm.functions.to_date(_to_java_column(col), format)
-    return Column(jc)
+        return _invoke_function("to_date", _to_java_column(col), format)
 
 
 @overload
@@ -2387,13 +2265,10 @@ def to_timestamp(col: "ColumnOrName", format: Optional[str] = None) -> Column:
     >>> df.select(to_timestamp(df.t, 'yyyy-MM-dd HH:mm:ss').alias('dt')).collect()
     [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     if format is None:
-        jc = sc._jvm.functions.to_timestamp(_to_java_column(col))
+        return _invoke_function_over_columns("to_timestamp", col)
     else:
-        jc = sc._jvm.functions.to_timestamp(_to_java_column(col), format)
-    return Column(jc)
+        return _invoke_function("to_timestamp", _to_java_column(col), format)
 
 
 def trunc(date: "ColumnOrName", format: str) -> Column:
@@ -2418,9 +2293,7 @@ def trunc(date: "ColumnOrName", format: str) -> Column:
     >>> df.select(trunc(df.d, 'mon').alias('month')).collect()
     [Row(month=datetime.date(1997, 2, 1))]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.trunc(_to_java_column(date), format))
+    return _invoke_function("trunc", _to_java_column(date), format)
 
 
 def date_trunc(format: str, timestamp: "ColumnOrName") -> Column:
@@ -2447,9 +2320,7 @@ def date_trunc(format: str, timestamp: "ColumnOrName") -> Column:
     >>> df.select(date_trunc('mon', df.t).alias('month')).collect()
     [Row(month=datetime.datetime(1997, 2, 1, 0, 0))]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.date_trunc(format, _to_java_column(timestamp)))
+    return _invoke_function("date_trunc", format, _to_java_column(timestamp))
 
 
 def next_day(date: "ColumnOrName", dayOfWeek: str) -> Column:
@@ -2467,9 +2338,7 @@ def next_day(date: "ColumnOrName", dayOfWeek: str) -> Column:
     >>> df.select(next_day(df.d, 'Sun').alias('date')).collect()
     [Row(date=datetime.date(2015, 8, 2))]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.next_day(_to_java_column(date), dayOfWeek))
+    return _invoke_function("next_day", _to_java_column(date), dayOfWeek)
 
 
 def last_day(date: "ColumnOrName") -> Column:
@@ -2484,9 +2353,7 @@ def last_day(date: "ColumnOrName") -> Column:
     >>> df.select(last_day(df.d).alias('date')).collect()
     [Row(date=datetime.date(1997, 2, 28))]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.last_day(_to_java_column(date)))
+    return _invoke_function("last_day", _to_java_column(date))
 
 
 def from_unixtime(timestamp: "ColumnOrName", format: str = "yyyy-MM-dd HH:mm:ss") -> Column:
@@ -2505,9 +2372,17 @@ def from_unixtime(timestamp: "ColumnOrName", format: str = "yyyy-MM-dd HH:mm:ss"
     [Row(ts='2015-04-08 00:00:00')]
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.from_unixtime(_to_java_column(timestamp), format))
+    return _invoke_function("from_unixtime", _to_java_column(timestamp), format)
+
+
+@overload
+def unix_timestamp(timestamp: "ColumnOrName", format: str = ...) -> Column:
+    ...
+
+
+@overload
+def unix_timestamp() -> Column:
+    ...
 
 
 def unix_timestamp(
@@ -2530,11 +2405,9 @@ def unix_timestamp(
     [Row(unix_time=1428476400)]
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     if timestamp is None:
-        return Column(sc._jvm.functions.unix_timestamp())
-    return Column(sc._jvm.functions.unix_timestamp(_to_java_column(timestamp), format))
+        return _invoke_function("unix_timestamp")
+    return _invoke_function("unix_timestamp", _to_java_column(timestamp), format)
 
 
 def from_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
@@ -2577,11 +2450,9 @@ def from_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
     >>> df.select(from_utc_timestamp(df.ts, df.tz).alias('local_time')).collect()
     [Row(local_time=datetime.datetime(1997, 2, 28, 19, 30))]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     if isinstance(tz, Column):
         tz = _to_java_column(tz)
-    return Column(sc._jvm.functions.from_utc_timestamp(_to_java_column(timestamp), tz))
+    return _invoke_function("from_utc_timestamp", _to_java_column(timestamp), tz)
 
 
 def to_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
@@ -2624,11 +2495,9 @@ def to_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
     >>> df.select(to_utc_timestamp(df.ts, df.tz).alias('utc_time')).collect()
     [Row(utc_time=datetime.datetime(1997, 2, 28, 1, 30))]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     if isinstance(tz, Column):
         tz = _to_java_column(tz)
-    return Column(sc._jvm.functions.to_utc_timestamp(_to_java_column(timestamp), tz))
+    return _invoke_function("to_utc_timestamp", _to_java_column(timestamp), tz)
 
 
 def timestamp_seconds(col: "ColumnOrName") -> Column:
@@ -2649,9 +2518,7 @@ def timestamp_seconds(col: "ColumnOrName") -> Column:
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
 
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.timestamp_seconds(_to_java_column(col)))
+    return _invoke_function_over_columns("timestamp_seconds", col)
 
 
 def window(
@@ -2716,23 +2583,20 @@ def check_string_field(field, fieldName):  # type: ignore[no-untyped-def]
         if not field or type(field) is not str:
             raise TypeError("%s should be provided as a string" % fieldName)
 
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     time_col = _to_java_column(timeColumn)
     check_string_field(windowDuration, "windowDuration")
     if slideDuration and startTime:
         check_string_field(slideDuration, "slideDuration")
         check_string_field(startTime, "startTime")
-        res = sc._jvm.functions.window(time_col, windowDuration, slideDuration, startTime)
+        return _invoke_function("window", time_col, windowDuration, slideDuration, startTime)
     elif slideDuration:
         check_string_field(slideDuration, "slideDuration")
-        res = sc._jvm.functions.window(time_col, windowDuration, slideDuration)
+        return _invoke_function("window", time_col, windowDuration, slideDuration)
     elif startTime:
         check_string_field(startTime, "startTime")
-        res = sc._jvm.functions.window(time_col, windowDuration, windowDuration, startTime)
+        return _invoke_function("window", time_col, windowDuration, windowDuration, startTime)
     else:
-        res = sc._jvm.functions.window(time_col, windowDuration)
-    return Column(res)
+        return _invoke_function("window", time_col, windowDuration)
 
 
 def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str]) -> Column:
@@ -2782,13 +2646,10 @@ def check_field(field: Union[Column, str], fieldName: str) -> None:
         if field is None or not isinstance(field, (str, Column)):
             raise TypeError("%s should be provided as a string or Column" % fieldName)
 
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     time_col = _to_java_column(timeColumn)
     check_field(gapDuration, "gapDuration")
     gap_duration = gapDuration if isinstance(gapDuration, str) else _to_java_column(gapDuration)
-    res = sc._jvm.functions.session_window(time_col, gap_duration)
-    return Column(res)
+    return _invoke_function("session_window", time_col, gap_duration)
 
 
 # ---------------------------- misc functions ----------------------------------
@@ -2806,9 +2667,7 @@ def crc32(col: "ColumnOrName") -> Column:
     >>> spark.createDataFrame([('ABC',)], ['a']).select(crc32('a').alias('crc32')).collect()
     [Row(crc32=2743272264)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.crc32(_to_java_column(col)))
+    return _invoke_function_over_columns("crc32", col)
 
 
 def md5(col: "ColumnOrName") -> Column:
@@ -2821,10 +2680,7 @@ def md5(col: "ColumnOrName") -> Column:
     >>> spark.createDataFrame([('ABC',)], ['a']).select(md5('a').alias('hash')).collect()
     [Row(hash='902fbdd2b1df0c4f70b4a5d23525e932')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.md5(_to_java_column(col))
-    return Column(jc)
+    return _invoke_function_over_columns("md5", col)
 
 
 def sha1(col: "ColumnOrName") -> Column:
@@ -2837,10 +2693,7 @@ def sha1(col: "ColumnOrName") -> Column:
     >>> spark.createDataFrame([('ABC',)], ['a']).select(sha1('a').alias('hash')).collect()
     [Row(hash='3c01bdbb26f358bab27f267924aa2c9a03fcfdb8')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.sha1(_to_java_column(col))
-    return Column(jc)
+    return _invoke_function_over_columns("sha1", col)
 
 
 def sha2(col: "ColumnOrName", numBits: int) -> Column:
@@ -2858,10 +2711,7 @@ def sha2(col: "ColumnOrName", numBits: int) -> Column:
     >>> digests[1]
     Row(s='cd9fb1e148ccd8442e5aa74904cc73bf6fb54d1d54d333bd596aa9bb4bb4e961')
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.sha2(_to_java_column(col), numBits)
-    return Column(jc)
+    return _invoke_function("sha2", _to_java_column(col), numBits)
 
 
 def hash(*cols: "ColumnOrName") -> Column:
@@ -2874,10 +2724,7 @@ def hash(*cols: "ColumnOrName") -> Column:
     >>> spark.createDataFrame([('ABC',)], ['a']).select(hash('a').alias('hash')).collect()
     [Row(hash=-757602832)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.hash(_to_seq(sc, cols, _to_java_column))
-    return Column(jc)
+    return _invoke_function_over_seq_of_columns("hash", cols)
 
 
 def xxhash64(*cols: "ColumnOrName") -> Column:
@@ -2891,10 +2738,7 @@ def xxhash64(*cols: "ColumnOrName") -> Column:
     >>> spark.createDataFrame([('ABC',)], ['a']).select(xxhash64('a').alias('hash')).collect()
     [Row(hash=4105715581806190027)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.xxhash64(_to_seq(sc, cols, _to_java_column))
-    return Column(jc)
+    return _invoke_function_over_seq_of_columns("xxhash64", cols)
 
 
 def assert_true(col: "ColumnOrName", errMsg: Optional[Union[Column, str]] = None) -> Column:
@@ -2923,17 +2767,15 @@ def assert_true(col: "ColumnOrName", errMsg: Optional[Union[Column, str]] = None
     >>> df.select(assert_true(df.a < df.b, 'error').alias('r')).collect()
     [Row(r=None)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     if errMsg is None:
-        return Column(sc._jvm.functions.assert_true(_to_java_column(col)))
+        return _invoke_function_over_columns("assert_true", col)
     if not isinstance(errMsg, (str, Column)):
         raise TypeError("errMsg should be a Column or a str, got {}".format(type(errMsg)))
 
     errMsg = (
         _create_column_from_literal(errMsg) if isinstance(errMsg, str) else _to_java_column(errMsg)
     )
-    return Column(sc._jvm.functions.assert_true(_to_java_column(col), errMsg))
+    return _invoke_function("assert_true", _to_java_column(col), errMsg)
 
 
 @since(3.1)
@@ -2949,12 +2791,10 @@ def raise_error(errMsg: Union[Column, str]) -> Column:
     if not isinstance(errMsg, (str, Column)):
         raise TypeError("errMsg should be a Column or a str, got {}".format(type(errMsg)))
 
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     errMsg = (
         _create_column_from_literal(errMsg) if isinstance(errMsg, str) else _to_java_column(errMsg)
     )
-    return Column(sc._jvm.functions.raise_error(errMsg))
+    return _invoke_function("raise_error", errMsg)
 
 
 # ---------------------- String/Binary functions ------------------------------
@@ -2965,7 +2805,7 @@ def upper(col: "ColumnOrName") -> Column:
     """
     Converts a string expression to upper case.
     """
-    return _invoke_function_over_column("upper", col)
+    return _invoke_function_over_columns("upper", col)
 
 
 @since(1.5)
@@ -2973,7 +2813,7 @@ def lower(col: "ColumnOrName") -> Column:
     """
     Converts a string expression to lower case.
     """
-    return _invoke_function_over_column("lower", col)
+    return _invoke_function_over_columns("lower", col)
 
 
 @since(1.5)
@@ -2981,7 +2821,7 @@ def ascii(col: "ColumnOrName") -> Column:
     """
     Computes the numeric value of the first character of the string column.
     """
-    return _invoke_function_over_column("ascii", col)
+    return _invoke_function_over_columns("ascii", col)
 
 
 @since(1.5)
@@ -2989,7 +2829,7 @@ def base64(col: "ColumnOrName") -> Column:
     """
     Computes the BASE64 encoding of a binary column and returns it as a string column.
     """
-    return _invoke_function_over_column("base64", col)
+    return _invoke_function_over_columns("base64", col)
 
 
 @since(1.5)
@@ -2997,7 +2837,7 @@ def unbase64(col: "ColumnOrName") -> Column:
     """
     Decodes a BASE64 encoded string column and returns it as a binary column.
     """
-    return _invoke_function_over_column("unbase64", col)
+    return _invoke_function_over_columns("unbase64", col)
 
 
 @since(1.5)
@@ -3005,7 +2845,7 @@ def ltrim(col: "ColumnOrName") -> Column:
     """
     Trim the spaces from left end for the specified string value.
     """
-    return _invoke_function_over_column("ltrim", col)
+    return _invoke_function_over_columns("ltrim", col)
 
 
 @since(1.5)
@@ -3013,7 +2853,7 @@ def rtrim(col: "ColumnOrName") -> Column:
     """
     Trim the spaces from right end for the specified string value.
     """
-    return _invoke_function_over_column("rtrim", col)
+    return _invoke_function_over_columns("rtrim", col)
 
 
 @since(1.5)
@@ -3021,7 +2861,7 @@ def trim(col: "ColumnOrName") -> Column:
     """
     Trim the spaces from both ends for the specified string column.
     """
-    return _invoke_function_over_column("trim", col)
+    return _invoke_function_over_columns("trim", col)
 
 
 def concat_ws(sep: str, *cols: "ColumnOrName") -> Column:
@@ -3039,7 +2879,7 @@ def concat_ws(sep: str, *cols: "ColumnOrName") -> Column:
     """
     sc = SparkContext._active_spark_context
     assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.concat_ws(sep, _to_seq(sc, cols, _to_java_column)))
+    return _invoke_function("concat_ws", sep, _to_seq(sc, cols, _to_java_column))
 
 
 @since(1.5)
@@ -3048,9 +2888,7 @@ def decode(col: "ColumnOrName", charset: str) -> Column:
     Computes the first argument into a string from a binary using the provided character set
     (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.decode(_to_java_column(col), charset))
+    return _invoke_function("decode", _to_java_column(col), charset)
 
 
 @since(1.5)
@@ -3059,9 +2897,7 @@ def encode(col: "ColumnOrName", charset: str) -> Column:
     Computes the first argument into a binary from a string using the provided character set
     (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.encode(_to_java_column(col), charset))
+    return _invoke_function("encode", _to_java_column(col), charset)
 
 
 def format_number(col: "ColumnOrName", d: int) -> Column:
@@ -3081,9 +2917,7 @@ def format_number(col: "ColumnOrName", d: int) -> Column:
     >>> spark.createDataFrame([(5,)], ['a']).select(format_number('a', 4).alias('v')).collect()
     [Row(v='5.0000')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.format_number(_to_java_column(col), d))
+    return _invoke_function("format_number", _to_java_column(col), d)
 
 
 def format_string(format: str, *cols: "ColumnOrName") -> Column:
@@ -3107,7 +2941,7 @@ def format_string(format: str, *cols: "ColumnOrName") -> Column:
     """
     sc = SparkContext._active_spark_context
     assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.format_string(format, _to_seq(sc, cols, _to_java_column)))
+    return _invoke_function("format_string", format, _to_seq(sc, cols, _to_java_column))
 
 
 def instr(str: "ColumnOrName", substr: str) -> Column:
@@ -3126,9 +2960,7 @@ def instr(str: "ColumnOrName", substr: str) -> Column:
     >>> df.select(instr(df.s, 'b').alias('s')).collect()
     [Row(s=2)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.instr(_to_java_column(str), substr))
+    return _invoke_function("instr", _to_java_column(str), substr)
 
 
 def overlay(
@@ -3177,12 +3009,7 @@ def overlay(
     pos = _create_column_from_literal(pos) if isinstance(pos, int) else _to_java_column(pos)
     len = _create_column_from_literal(len) if isinstance(len, int) else _to_java_column(len)
 
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-
-    return Column(
-        sc._jvm.functions.overlay(_to_java_column(src), _to_java_column(replace), pos, len)
-    )
+    return _invoke_function("overlay", _to_java_column(src), _to_java_column(replace), pos, len)
 
 
 def sentences(
@@ -3220,13 +3047,7 @@ def sentences(
     if country is None:
         country = lit("")
 
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(
-        sc._jvm.functions.sentences(
-            _to_java_column(string), _to_java_column(language), _to_java_column(country)
-        )
-    )
+    return _invoke_function_over_columns("sentences", string, language, country)
 
 
 def substring(str: "ColumnOrName", pos: int, len: int) -> Column:
@@ -3247,9 +3068,7 @@ def substring(str: "ColumnOrName", pos: int, len: int) -> Column:
     >>> df.select(substring(df.s, 1, 2).alias('s')).collect()
     [Row(s='ab')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.substring(_to_java_column(str), pos, len))
+    return _invoke_function("substring", _to_java_column(str), pos, len)
 
 
 def substring_index(str: "ColumnOrName", delim: str, count: int) -> Column:
@@ -3269,9 +3088,7 @@ def substring_index(str: "ColumnOrName", delim: str, count: int) -> Column:
     >>> df.select(substring_index(df.s, '.', -3).alias('s')).collect()
     [Row(s='b.c.d')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.substring_index(_to_java_column(str), delim, count))
+    return _invoke_function("substring_index", _to_java_column(str), delim, count)
 
 
 def levenshtein(left: "ColumnOrName", right: "ColumnOrName") -> Column:
@@ -3285,10 +3102,7 @@ def levenshtein(left: "ColumnOrName", right: "ColumnOrName") -> Column:
     >>> df0.select(levenshtein('l', 'r').alias('d')).collect()
     [Row(d=3)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.levenshtein(_to_java_column(left), _to_java_column(right))
-    return Column(jc)
+    return _invoke_function_over_columns("levenshtein", left, right)
 
 
 def locate(substr: str, str: "ColumnOrName", pos: int = 1) -> Column:
@@ -3317,9 +3131,7 @@ def locate(substr: str, str: "ColumnOrName", pos: int = 1) -> Column:
     >>> df.select(locate('b', df.s, 1).alias('s')).collect()
     [Row(s=2)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.locate(substr, _to_java_column(str), pos))
+    return _invoke_function("locate", substr, _to_java_column(str), pos)
 
 
 def lpad(col: "ColumnOrName", len: int, pad: str) -> Column:
@@ -3334,9 +3146,7 @@ def lpad(col: "ColumnOrName", len: int, pad: str) -> Column:
     >>> df.select(lpad(df.s, 6, '#').alias('s')).collect()
     [Row(s='##abcd')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.lpad(_to_java_column(col), len, pad))
+    return _invoke_function("lpad", _to_java_column(col), len, pad)
 
 
 def rpad(col: "ColumnOrName", len: int, pad: str) -> Column:
@@ -3351,9 +3161,7 @@ def rpad(col: "ColumnOrName", len: int, pad: str) -> Column:
     >>> df.select(rpad(df.s, 6, '#').alias('s')).collect()
     [Row(s='abcd##')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.rpad(_to_java_column(col), len, pad))
+    return _invoke_function("rpad", _to_java_column(col), len, pad)
 
 
 def repeat(col: "ColumnOrName", n: int) -> Column:
@@ -3368,9 +3176,7 @@ def repeat(col: "ColumnOrName", n: int) -> Column:
     >>> df.select(repeat(df.s, 3).alias('s')).collect()
     [Row(s='ababab')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.repeat(_to_java_column(col), n))
+    return _invoke_function("repeat", _to_java_column(col), n)
 
 
 def split(str: "ColumnOrName", pattern: str, limit: int = -1) -> Column:
@@ -3406,9 +3212,7 @@ def split(str: "ColumnOrName", pattern: str, limit: int = -1) -> Column:
     >>> df.select(split(df.s, '[ABC]', -1).alias('s')).collect()
     [Row(s=['one', 'two', 'three', ''])]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.split(_to_java_column(str), pattern, limit))
+    return _invoke_function("split", _to_java_column(str), pattern, limit)
 
 
 def regexp_extract(str: "ColumnOrName", pattern: str, idx: int) -> Column:
@@ -3429,10 +3233,7 @@ def regexp_extract(str: "ColumnOrName", pattern: str, idx: int) -> Column:
     >>> df.select(regexp_extract('str', '(a+)(b)?(c)', 2).alias('d')).collect()
     [Row(d='')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.regexp_extract(_to_java_column(str), pattern, idx)
-    return Column(jc)
+    return _invoke_function("regexp_extract", _to_java_column(str), pattern, idx)
 
 
 def regexp_replace(str: "ColumnOrName", pattern: str, replacement: str) -> Column:
@@ -3446,10 +3247,7 @@ def regexp_replace(str: "ColumnOrName", pattern: str, replacement: str) -> Colum
     >>> df.select(regexp_replace('str', r'(\d+)', '--').alias('d')).collect()
     [Row(d='-----')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.regexp_replace(_to_java_column(str), pattern, replacement)
-    return Column(jc)
+    return _invoke_function("regexp_replace", _to_java_column(str), pattern, replacement)
 
 
 def initcap(col: "ColumnOrName") -> Column:
@@ -3462,9 +3260,7 @@ def initcap(col: "ColumnOrName") -> Column:
     >>> spark.createDataFrame([('ab cd',)], ['a']).select(initcap("a").alias('v')).collect()
     [Row(v='Ab Cd')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.initcap(_to_java_column(col)))
+    return _invoke_function_over_columns("initcap", col)
 
 
 def soundex(col: "ColumnOrName") -> Column:
@@ -3479,9 +3275,7 @@ def soundex(col: "ColumnOrName") -> Column:
     >>> df.select(soundex(df.name).alias("soundex")).collect()
     [Row(soundex='P362'), Row(soundex='U612')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.soundex(_to_java_column(col)))
+    return _invoke_function_over_columns("soundex", col)
 
 
 def bin(col: "ColumnOrName") -> Column:
@@ -3494,10 +3288,7 @@ def bin(col: "ColumnOrName") -> Column:
     >>> df.select(bin(df.age).alias('c')).collect()
     [Row(c='10'), Row(c='101')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.bin(_to_java_column(col))
-    return Column(jc)
+    return _invoke_function_over_columns("bin", col)
 
 
 def hex(col: "ColumnOrName") -> Column:
@@ -3512,10 +3303,7 @@ def hex(col: "ColumnOrName") -> Column:
     >>> spark.createDataFrame([('ABC', 3)], ['a', 'b']).select(hex('a'), hex('b')).collect()
     [Row(hex(a)='414243', hex(b)='3')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.hex(_to_java_column(col))
-    return Column(jc)
+    return _invoke_function_over_columns("hex", col)
 
 
 def unhex(col: "ColumnOrName") -> Column:
@@ -3529,9 +3317,7 @@ def unhex(col: "ColumnOrName") -> Column:
     >>> spark.createDataFrame([('414243',)], ['a']).select(unhex('a')).collect()
     [Row(unhex(a)=bytearray(b'ABC'))]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.unhex(_to_java_column(col)))
+    return _invoke_function_over_columns("unhex", col)
 
 
 def length(col: "ColumnOrName") -> Column:
@@ -3546,9 +3332,7 @@ def length(col: "ColumnOrName") -> Column:
     >>> spark.createDataFrame([('ABC ',)], ['a']).select(length('a').alias('length')).collect()
     [Row(length=4)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.length(_to_java_column(col)))
+    return _invoke_function_over_columns("length", col)
 
 
 def octet_length(col: "ColumnOrName") -> Column:
@@ -3574,7 +3358,7 @@ def octet_length(col: "ColumnOrName") -> Column:
     ...      .select(octet_length('cat')).collect()
         [Row(octet_length(cat)=3), Row(octet_length(cat)=4)]
     """
-    return _invoke_function_over_column("octet_length", col)
+    return _invoke_function_over_columns("octet_length", col)
 
 
 def bit_length(col: "ColumnOrName") -> Column:
@@ -3600,7 +3384,7 @@ def bit_length(col: "ColumnOrName") -> Column:
     ...      .select(bit_length('cat')).collect()
         [Row(bit_length(cat)=24), Row(bit_length(cat)=32)]
     """
-    return _invoke_function_over_column("bit_length", col)
+    return _invoke_function_over_columns("bit_length", col)
 
 
 def translate(srcCol: "ColumnOrName", matching: str, replace: str) -> Column:
@@ -3617,9 +3401,7 @@ def translate(srcCol: "ColumnOrName", matching: str, replace: str) -> Column:
     ...     .alias('r')).collect()
     [Row(r='1a2s3ae')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.translate(_to_java_column(srcCol), matching, replace))
+    return _invoke_function("translate", _to_java_column(srcCol), matching, replace)
 
 
 # ---------------------- Collection functions ------------------------------
@@ -3655,12 +3437,9 @@ def create_map(
     >>> df.select(create_map([df.name, df.age]).alias("map")).collect()
     [Row(map={'Alice': 2}), Row(map={'Bob': 5})]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     if len(cols) == 1 and isinstance(cols[0], (list, set)):
         cols = cols[0]  # type: ignore[assignment]
-    jc = sc._jvm.functions.map(_to_seq(sc, cols, _to_java_column))  # type: ignore[arg-type]
-    return Column(jc)
+    return _invoke_function_over_seq_of_columns("map", cols)  # type: ignore[arg-type]
 
 
 def map_from_arrays(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
@@ -3685,9 +3464,7 @@ def map_from_arrays(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     |{2 -> a, 5 -> b}|
     +----------------+
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.map_from_arrays(_to_java_column(col1), _to_java_column(col2)))
+    return _invoke_function_over_columns("map_from_arrays", col1, col2)
 
 
 @overload
@@ -3720,12 +3497,9 @@ def array(
     >>> df.select(array([df.age, df.age]).alias("arr")).collect()
     [Row(arr=[2, 2]), Row(arr=[5, 5])]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     if len(cols) == 1 and isinstance(cols[0], (list, set)):
         cols = cols[0]  # type: ignore[assignment]
-    jc = sc._jvm.functions.array(_to_seq(sc, cols, _to_java_column))  # type: ignore[arg-type]
-    return Column(jc)
+    return _invoke_function_over_seq_of_columns("array", cols)  # type: ignore[arg-type]
 
 
 def array_contains(col: "ColumnOrName", value: Any) -> Column:
@@ -3750,10 +3524,8 @@ def array_contains(col: "ColumnOrName", value: Any) -> Column:
     >>> df.select(array_contains(df.data, lit("a"))).collect()
     [Row(array_contains(data, a)=True), Row(array_contains(data, a)=False)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     value = value._jc if isinstance(value, Column) else value
-    return Column(sc._jvm.functions.array_contains(_to_java_column(col), value))
+    return _invoke_function("array_contains", _to_java_column(col), value)
 
 
 def arrays_overlap(a1: "ColumnOrName", a2: "ColumnOrName") -> Column:
@@ -3770,9 +3542,7 @@ def arrays_overlap(a1: "ColumnOrName", a2: "ColumnOrName") -> Column:
     >>> df.select(arrays_overlap(df.x, df.y).alias("overlap")).collect()
     [Row(overlap=True), Row(overlap=False)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.arrays_overlap(_to_java_column(a1), _to_java_column(a2)))
+    return _invoke_function_over_columns("arrays_overlap", a1, a2)
 
 
 def slice(
@@ -3799,19 +3569,10 @@ def slice(
     >>> df.select(slice(df.x, 2, 2).alias("sliced")).collect()
     [Row(sliced=[2, 3]), Row(sliced=[5])]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-
     start = lit(start) if isinstance(start, int) else start
     length = lit(length) if isinstance(length, int) else length
 
-    return Column(
-        sc._jvm.functions.slice(
-            _to_java_column(x),
-            _to_java_column(start),
-            _to_java_column(length),
-        )
-    )
+    return _invoke_function_over_columns("slice", x, start, length)
 
 
 def array_join(
@@ -3834,11 +3595,9 @@ def array_join(
     sc = SparkContext._active_spark_context
     assert sc is not None and sc._jvm is not None
     if null_replacement is None:
-        return Column(sc._jvm.functions.array_join(_to_java_column(col), delimiter))
+        return _invoke_function("array_join", _to_java_column(col), delimiter)
     else:
-        return Column(
-            sc._jvm.functions.array_join(_to_java_column(col), delimiter, null_replacement)
-        )
+        return _invoke_function("array_join", _to_java_column(col), delimiter, null_replacement)
 
 
 def concat(*cols: "ColumnOrName") -> Column:
@@ -3858,9 +3617,7 @@ def concat(*cols: "ColumnOrName") -> Column:
     >>> df.select(concat(df.a, df.b, df.c).alias("arr")).collect()
     [Row(arr=[1, 2, 3, 4, 5]), Row(arr=None)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.concat(_to_seq(sc, cols, _to_java_column)))
+    return _invoke_function_over_seq_of_columns("concat", cols)
 
 
 def array_position(col: "ColumnOrName", value: Any) -> Column:
@@ -3881,9 +3638,7 @@ def array_position(col: "ColumnOrName", value: Any) -> Column:
     >>> df.select(array_position(df.data, "a")).collect()
     [Row(array_position(data, a)=3), Row(array_position(data, a)=0)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.array_position(_to_java_column(col), value))
+    return _invoke_function("array_position", _to_java_column(col), value)
 
 
 def element_at(col: "ColumnOrName", extraction: Any) -> Column:
@@ -3914,9 +3669,7 @@ def element_at(col: "ColumnOrName", extraction: Any) -> Column:
     >>> df.select(element_at(df.data, lit("a"))).collect()
     [Row(element_at(data, a)=1.0), Row(element_at(data, a)=None)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.element_at(_to_java_column(col), lit(extraction)._jc))
+    return _invoke_function_over_columns("element_at", col, lit(extraction))
 
 
 def array_remove(col: "ColumnOrName", element: Any) -> Column:
@@ -3938,9 +3691,7 @@ def array_remove(col: "ColumnOrName", element: Any) -> Column:
     >>> df.select(array_remove(df.data, 1)).collect()
     [Row(array_remove(data, 1)=[2, 3]), Row(array_remove(data, 1)=[])]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.array_remove(_to_java_column(col), element))
+    return _invoke_function("array_remove", _to_java_column(col), element)
 
 
 def array_distinct(col: "ColumnOrName") -> Column:
@@ -3960,9 +3711,7 @@ def array_distinct(col: "ColumnOrName") -> Column:
     >>> df.select(array_distinct(df.data)).collect()
     [Row(array_distinct(data)=[1, 2, 3]), Row(array_distinct(data)=[4, 5])]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.array_distinct(_to_java_column(col)))
+    return _invoke_function_over_columns("array_distinct", col)
 
 
 def array_intersect(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
@@ -3986,9 +3735,7 @@ def array_intersect(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     >>> df.select(array_intersect(df.c1, df.c2)).collect()
     [Row(array_intersect(c1, c2)=['a', 'c'])]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.array_intersect(_to_java_column(col1), _to_java_column(col2)))
+    return _invoke_function_over_columns("array_intersect", col1, col2)
 
 
 def array_union(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
@@ -4012,9 +3759,7 @@ def array_union(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     >>> df.select(array_union(df.c1, df.c2)).collect()
     [Row(array_union(c1, c2)=['b', 'a', 'c', 'd', 'f'])]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.array_union(_to_java_column(col1), _to_java_column(col2)))
+    return _invoke_function_over_columns("array_union", col1, col2)
 
 
 def array_except(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
@@ -4038,9 +3783,7 @@ def array_except(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     >>> df.select(array_except(df.c1, df.c2)).collect()
     [Row(array_except(c1, c2)=['b'])]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.array_except(_to_java_column(col1), _to_java_column(col2)))
+    return _invoke_function_over_columns("array_except", col1, col2)
 
 
 def explode(col: "ColumnOrName") -> Column:
@@ -4065,10 +3808,7 @@ def explode(col: "ColumnOrName") -> Column:
     |  a|    b|
     +---+-----+
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.explode(_to_java_column(col))
-    return Column(jc)
+    return _invoke_function_over_columns("explode", col)
 
 
 def posexplode(col: "ColumnOrName") -> Column:
@@ -4093,10 +3833,7 @@ def posexplode(col: "ColumnOrName") -> Column:
     |  0|  a|    b|
     +---+---+-----+
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.posexplode(_to_java_column(col))
-    return Column(jc)
+    return _invoke_function_over_columns("posexplode", col)
 
 
 def explode_outer(col: "ColumnOrName") -> Column:
@@ -4133,10 +3870,7 @@ def explode_outer(col: "ColumnOrName") -> Column:
     |  3|      null|null|
     +---+----------+----+
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.explode_outer(_to_java_column(col))
-    return Column(jc)
+    return _invoke_function_over_columns("explode_outer", col)
 
 
 def posexplode_outer(col: "ColumnOrName") -> Column:
@@ -4172,10 +3906,7 @@ def posexplode_outer(col: "ColumnOrName") -> Column:
     |  3|      null|null|null|
     +---+----------+----+----+
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.posexplode_outer(_to_java_column(col))
-    return Column(jc)
+    return _invoke_function_over_columns("posexplode_outer", col)
 
 
 def get_json_object(col: "ColumnOrName", path: str) -> Column:
@@ -4200,10 +3931,7 @@ def get_json_object(col: "ColumnOrName", path: str) -> Column:
     ...                   get_json_object(df.jstring, '$.f2').alias("c1") ).collect()
     [Row(key='1', c0='value1', c1='value2'), Row(key='2', c0='value12', c1=None)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.get_json_object(_to_java_column(col), path)
-    return Column(jc)
+    return _invoke_function("get_json_object", _to_java_column(col), path)
 
 
 def json_tuple(col: "ColumnOrName", *fields: str) -> Column:
@@ -4227,8 +3955,7 @@ def json_tuple(col: "ColumnOrName", *fields: str) -> Column:
     """
     sc = SparkContext._active_spark_context
     assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.json_tuple(_to_java_column(col), _to_seq(sc, fields))
-    return Column(jc)
+    return _invoke_function("json_tuple", _to_java_column(col), _to_seq(sc, fields))
 
 
 def from_json(
@@ -4284,14 +4011,11 @@ def from_json(
     [Row(json=[1, 2, 3])]
     """
 
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     if isinstance(schema, DataType):
         schema = schema.json()
     elif isinstance(schema, Column):
         schema = _to_java_column(schema)
-    jc = sc._jvm.functions.from_json(_to_java_column(col), schema, _options_to_str(options))
-    return Column(jc)
+    return _invoke_function("from_json", _to_java_column(col), schema, _options_to_str(options))
 
 
 def to_json(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
@@ -4340,10 +4064,7 @@ def to_json(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Co
     [Row(json='["Alice","Bob"]')]
     """
 
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.to_json(_to_java_column(col), _options_to_str(options))
-    return Column(jc)
+    return _invoke_function("to_json", _to_java_column(col), _options_to_str(options))
 
 
 def schema_of_json(json: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
@@ -4382,10 +4103,7 @@ def schema_of_json(json: "ColumnOrName", options: Optional[Dict[str, str]] = Non
     else:
         raise TypeError("schema argument should be a column or string")
 
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.schema_of_json(col, _options_to_str(options))
-    return Column(jc)
+    return _invoke_function("schema_of_json", col, _options_to_str(options))
 
 
 def schema_of_csv(csv: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
@@ -4420,10 +4138,7 @@ def schema_of_csv(csv: "ColumnOrName", options: Optional[Dict[str, str]] = None)
     else:
         raise TypeError("schema argument should be a column or string")
 
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.schema_of_csv(col, _options_to_str(options))
-    return Column(jc)
+    return _invoke_function("schema_of_csv", col, _options_to_str(options))
 
 
 def to_csv(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
@@ -4453,10 +4168,7 @@ def to_csv(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Col
     [Row(csv='2,Alice')]
     """
 
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    jc = sc._jvm.functions.to_csv(_to_java_column(col), _options_to_str(options))
-    return Column(jc)
+    return _invoke_function("to_csv", _to_java_column(col), _options_to_str(options))
 
 
 def size(col: "ColumnOrName") -> Column:
@@ -4476,9 +4188,7 @@ def size(col: "ColumnOrName") -> Column:
     >>> df.select(size(df.data)).collect()
     [Row(size(data)=3), Row(size(data)=1), Row(size(data)=0)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.size(_to_java_column(col)))
+    return _invoke_function_over_columns("size", col)
 
 
 def array_min(col: "ColumnOrName") -> Column:
@@ -4498,9 +4208,7 @@ def array_min(col: "ColumnOrName") -> Column:
     >>> df.select(array_min(df.data).alias('min')).collect()
     [Row(min=1), Row(min=-1)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.array_min(_to_java_column(col)))
+    return _invoke_function_over_columns("array_min", col)
 
 
 def array_max(col: "ColumnOrName") -> Column:
@@ -4520,9 +4228,7 @@ def array_max(col: "ColumnOrName") -> Column:
     >>> df.select(array_max(df.data).alias('max')).collect()
     [Row(max=3), Row(max=10)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.array_max(_to_java_column(col)))
+    return _invoke_function_over_columns("array_max", col)
 
 
 def sort_array(col: "ColumnOrName", asc: bool = True) -> Column:
@@ -4548,9 +4254,7 @@ def sort_array(col: "ColumnOrName", asc: bool = True) -> Column:
     >>> df.select(sort_array(df.data, asc=False).alias('r')).collect()
     [Row(r=[3, 2, 1, None]), Row(r=[1]), Row(r=[])]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.sort_array(_to_java_column(col), asc))
+    return _invoke_function("sort_array", _to_java_column(col), asc)
 
 
 def array_sort(col: "ColumnOrName") -> Column:
@@ -4571,9 +4275,7 @@ def array_sort(col: "ColumnOrName") -> Column:
     >>> df.select(array_sort(df.data).alias('r')).collect()
     [Row(r=[1, 2, 3, None]), Row(r=[1]), Row(r=[])]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.array_sort(_to_java_column(col)))
+    return _invoke_function_over_columns("array_sort", col)
 
 
 def shuffle(col: "ColumnOrName") -> Column:
@@ -4597,9 +4299,7 @@ def shuffle(col: "ColumnOrName") -> Column:
     >>> df.select(shuffle(df.data).alias('s')).collect()  # doctest: +SKIP
     [Row(s=[3, 1, 5, 20]), Row(s=[20, None, 3, 1])]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.shuffle(_to_java_column(col)))
+    return _invoke_function_over_columns("shuffle", col)
 
 
 def reverse(col: "ColumnOrName") -> Column:
@@ -4622,9 +4322,7 @@ def reverse(col: "ColumnOrName") -> Column:
     >>> df.select(reverse(df.data).alias('r')).collect()
     [Row(r=[3, 1, 2]), Row(r=[1]), Row(r=[])]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.reverse(_to_java_column(col)))
+    return _invoke_function_over_columns("reverse", col)
 
 
 def flatten(col: "ColumnOrName") -> Column:
@@ -4646,9 +4344,7 @@ def flatten(col: "ColumnOrName") -> Column:
     >>> df.select(flatten(df.data).alias('r')).collect()
     [Row(r=[1, 2, 3, 4, 5, 6]), Row(r=None)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.flatten(_to_java_column(col)))
+    return _invoke_function_over_columns("flatten", col)
 
 
 def map_keys(col: "ColumnOrName") -> Column:
@@ -4673,9 +4369,7 @@ def map_keys(col: "ColumnOrName") -> Column:
     |[1, 2]|
     +------+
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.map_keys(_to_java_column(col)))
+    return _invoke_function_over_columns("map_keys", col)
 
 
 def map_values(col: "ColumnOrName") -> Column:
@@ -4700,9 +4394,7 @@ def map_values(col: "ColumnOrName") -> Column:
     |[a, b]|
     +------+
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.map_values(_to_java_column(col)))
+    return _invoke_function_over_columns("map_values", col)
 
 
 def map_entries(col: "ColumnOrName") -> Column:
@@ -4727,9 +4419,7 @@ def map_entries(col: "ColumnOrName") -> Column:
     |[{1, a}, {2, b}]|
     +----------------+
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.map_entries(_to_java_column(col)))
+    return _invoke_function_over_columns("map_entries", col)
 
 
 def map_from_entries(col: "ColumnOrName") -> Column:
@@ -4754,9 +4444,7 @@ def map_from_entries(col: "ColumnOrName") -> Column:
     |{1 -> a, 2 -> b}|
     +----------------+
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.map_from_entries(_to_java_column(col)))
+    return _invoke_function_over_columns("map_from_entries", col)
 
 
 def array_repeat(col: "ColumnOrName", count: Union["ColumnOrName", int]) -> Column:
@@ -4778,12 +4466,9 @@ def array_repeat(col: "ColumnOrName", count: Union["ColumnOrName", int]) -> Colu
     >>> df.select(array_repeat(df.data, 3).alias('r')).collect()
     [Row(r=['ab', 'ab', 'ab'])]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-
     count = lit(count) if isinstance(count, int) else count
 
-    return Column(sc._jvm.functions.array_repeat(_to_java_column(col), _to_java_column(count)))
+    return _invoke_function_over_columns("array_repeat", col, count)
 
 
 def arrays_zip(*cols: "ColumnOrName") -> Column:
@@ -4805,9 +4490,7 @@ def arrays_zip(*cols: "ColumnOrName") -> Column:
     >>> df.select(arrays_zip(df.vals1, df.vals2).alias('zipped')).collect()
     [Row(zipped=[Row(vals1=1, vals2=2), Row(vals1=2, vals2=3), Row(vals1=3, vals2=4)])]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.arrays_zip(_to_seq(sc, cols, _to_java_column)))
+    return _invoke_function_over_seq_of_columns("arrays_zip", cols)
 
 
 @overload
@@ -4843,12 +4526,9 @@ def map_concat(
     |{1 -> a, 2 -> b, 3 -> c}|
     +------------------------+
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     if len(cols) == 1 and isinstance(cols[0], (list, set)):
         cols = cols[0]  # type: ignore[assignment]
-    jc = sc._jvm.functions.map_concat(_to_seq(sc, cols, _to_java_column))  # type: ignore[arg-type]
-    return Column(jc)
+    return _invoke_function_over_seq_of_columns("map_concat", cols)  # type: ignore[arg-type]
 
 
 def sequence(
@@ -4870,16 +4550,10 @@ def sequence(
     >>> df2.select(sequence('C1', 'C2', 'C3').alias('r')).collect()
     [Row(r=[4, 2, 0, -2, -4])]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
     if step is None:
-        return Column(sc._jvm.functions.sequence(_to_java_column(start), _to_java_column(stop)))
+        return _invoke_function_over_columns("sequence", start, stop)
     else:
-        return Column(
-            sc._jvm.functions.sequence(
-                _to_java_column(start), _to_java_column(stop), _to_java_column(step)
-            )
-        )
+        return _invoke_function_over_columns("sequence", start, stop, step)
 
 
 def from_csv(
@@ -4931,8 +4605,7 @@ def from_csv(
     else:
         raise TypeError("schema argument should be a column or string")
 
-    jc = sc._jvm.functions.from_csv(_to_java_column(col), schema, _options_to_str(options))
-    return Column(jc)
+    return _invoke_function("from_csv", _to_java_column(col), schema, _options_to_str(options))
 
 
 def _unresolved_named_lambda_variable(*name_parts: Any) -> Column:
@@ -5532,9 +5205,7 @@ def years(col: "ColumnOrName") -> Column:
     method of the `DataFrameWriterV2`.
 
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.years(_to_java_column(col)))
+    return _invoke_function_over_columns("years", col)
 
 
 def months(col: "ColumnOrName") -> Column:
@@ -5557,9 +5228,7 @@ def months(col: "ColumnOrName") -> Column:
     method of the `DataFrameWriterV2`.
 
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.months(_to_java_column(col)))
+    return _invoke_function_over_columns("months", col)
 
 
 def days(col: "ColumnOrName") -> Column:
@@ -5582,9 +5251,7 @@ def days(col: "ColumnOrName") -> Column:
     method of the `DataFrameWriterV2`.
 
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.days(_to_java_column(col)))
+    return _invoke_function_over_columns("days", col)
 
 
 def hours(col: "ColumnOrName") -> Column:
@@ -5607,9 +5274,7 @@ def hours(col: "ColumnOrName") -> Column:
     method of the `DataFrameWriterV2`.
 
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.functions.hours(_to_java_column(col)))
+    return _invoke_function_over_columns("hours", col)
 
 
 def bucket(numBuckets: Union[Column, int], col: "ColumnOrName") -> Column:
@@ -5642,7 +5307,7 @@ def bucket(numBuckets: Union[Column, int], col: "ColumnOrName") -> Column:
         if isinstance(numBuckets, int)
         else _to_java_column(numBuckets)
     )
-    return Column(sc._jvm.functions.bucket(numBuckets, _to_java_column(col)))
+    return _invoke_function("bucket", numBuckets, _to_java_column(col))
 
 
 # ---------------------------- User Defined Function ----------------------------------

From 4b4ff4b130306c269fb470826b2b113caf67f8bf Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 13 Jan 2022 21:35:17 +0800
Subject: [PATCH 010/513] [SPARK-35703][SQL][FOLLOWUP] Only eliminate shuffles
 if partition keys contain all the join keys

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/32875 . Basically https://github.com/apache/spark/pull/32875 did two improvements:
1. allow bucket join even if the bucket hash function is different from Spark's shuffle hash function
2. allow bucket join even if the hash partition keys are subset of join keys.

The first improvement is the major target for implementing the SPIP "storage partition join". The second improvement is kind of a consequence of the framework refactor, which is not planned.

This PR is to disable the second improvement by default, which may introduce perf regression if there are data skew without shuffle. We need more designs to enable this improvement, like checking the ndv.

### Why are the changes needed?

Avoid perf regression

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

Closes #35138 from cloud-fan/join.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../plans/physical/partitioning.scala         |  54 +-
 .../apache/spark/sql/internal/SQLConf.scala   |  11 +
 .../spark/sql/catalyst/ShuffleSpecSuite.scala |  21 +-
 .../approved-plans-v1_4/q17.sf100/explain.txt | 325 +++----
 .../q17.sf100/simplified.txt                  | 155 ++--
 .../approved-plans-v1_4/q25.sf100/explain.txt | 325 +++----
 .../q25.sf100/simplified.txt                  | 155 ++--
 .../approved-plans-v1_4/q29.sf100/explain.txt | 361 ++++----
 .../q29.sf100/simplified.txt                  | 157 ++--
 .../approved-plans-v1_4/q47.sf100/explain.txt | 235 ++---
 .../q47.sf100/simplified.txt                  | 169 ++--
 .../approved-plans-v1_4/q57.sf100/explain.txt | 235 ++---
 .../q57.sf100/simplified.txt                  | 169 ++--
 .../approved-plans-v1_4/q72.sf100/explain.txt | 404 +++++----
 .../q72.sf100/simplified.txt                  | 216 ++---
 .../approved-plans-v2_7/q24.sf100/explain.txt | 104 +--
 .../q24.sf100/simplified.txt                  |  69 +-
 .../approved-plans-v2_7/q47.sf100/explain.txt | 235 ++---
 .../q47.sf100/simplified.txt                  | 169 ++--
 .../q51a.sf100/explain.txt                    | 508 +++++------
 .../q51a.sf100/simplified.txt                 | 206 ++---
 .../approved-plans-v2_7/q57.sf100/explain.txt | 235 ++---
 .../q57.sf100/simplified.txt                  | 169 ++--
 .../approved-plans-v2_7/q64/explain.txt       | 838 +++++++++---------
 .../approved-plans-v2_7/q64/simplified.txt    | 524 +++++------
 .../approved-plans-v2_7/q72.sf100/explain.txt | 404 +++++----
 .../q72.sf100/simplified.txt                  | 216 ++---
 .../spark/sql/execution/PlannerSuite.scala    |   6 +-
 .../exchange/EnsureRequirementsSuite.scala    |  71 +-
 .../spark/sql/sources/BucketedReadSuite.scala |   3 +-
 30 files changed, 3506 insertions(+), 3243 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
index 7d30ecd97c3ca..ed360bbf1ca4e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.plans.physical
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, IntegerType}
 
 /**
@@ -380,7 +381,7 @@ trait ShuffleSpec {
   /**
    * Whether this shuffle spec can be used to create partitionings for the other children.
    */
-  def canCreatePartitioning: Boolean = false
+  def canCreatePartitioning: Boolean
 
   /**
    * Creates a partitioning that can be used to re-partition the other side with the given
@@ -412,6 +413,11 @@ case class RangeShuffleSpec(
     numPartitions: Int,
     distribution: ClusteredDistribution) extends ShuffleSpec {
 
+  // `RangePartitioning` is not compatible with any other partitioning since it can't guarantee
+  // data are co-partitioned for all the children, as range boundaries are randomly sampled. We
+  // can't let `RangeShuffleSpec` to create a partitioning.
+  override def canCreatePartitioning: Boolean = false
+
   override def isCompatibleWith(other: ShuffleSpec): Boolean = other match {
     case SinglePartitionShuffleSpec => numPartitions == 1
     case ShuffleSpecCollection(specs) => specs.exists(isCompatibleWith)
@@ -424,8 +430,19 @@ case class RangeShuffleSpec(
 case class HashShuffleSpec(
     partitioning: HashPartitioning,
     distribution: ClusteredDistribution) extends ShuffleSpec {
-  lazy val hashKeyPositions: Seq[mutable.BitSet] =
-    createHashKeyPositions(distribution.clustering, partitioning.expressions)
+
+  /**
+   * A sequence where each element is a set of positions of the hash partition key to the cluster
+   * keys. For instance, if cluster keys are [a, b, b] and hash partition keys are [a, b], the
+   * result will be [(0), (1, 2)].
+   */
+  lazy val hashKeyPositions: Seq[mutable.BitSet] = {
+    val distKeyToPos = mutable.Map.empty[Expression, mutable.BitSet]
+    distribution.clustering.zipWithIndex.foreach { case (distKey, distKeyPos) =>
+      distKeyToPos.getOrElseUpdate(distKey.canonicalized, mutable.BitSet.empty).add(distKeyPos)
+    }
+    partitioning.expressions.map(k => distKeyToPos(k.canonicalized))
+  }
 
   override def isCompatibleWith(other: ShuffleSpec): Boolean = other match {
     case SinglePartitionShuffleSpec =>
@@ -451,7 +468,20 @@ case class HashShuffleSpec(
       false
   }
 
-  override def canCreatePartitioning: Boolean = true
+  override def canCreatePartitioning: Boolean = {
+    // To avoid potential data skew, we don't allow `HashShuffleSpec` to create partitioning if
+    // the hash partition keys are not the full join keys (the cluster keys). Then the planner
+    // will add shuffles with the default partitioning of `ClusteredDistribution`, which uses all
+    // the join keys.
+    if (SQLConf.get.getConf(SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION)) {
+      partitioning.expressions.length == distribution.clustering.length &&
+        partitioning.expressions.zip(distribution.clustering).forall {
+          case (l, r) => l.semanticEquals(r)
+        }
+    } else {
+      true
+    }
+  }
 
   override def createPartitioning(clustering: Seq[Expression]): Partitioning = {
     val exprs = hashKeyPositions.map(v => clustering(v.head))
@@ -459,22 +489,6 @@ case class HashShuffleSpec(
   }
 
   override def numPartitions: Int = partitioning.numPartitions
-
-  /**
-   * Returns a sequence where each element is a set of positions of the key in `hashKeys` to its
-   * positions in `requiredClusterKeys`. For instance, if `requiredClusterKeys` is [a, b, b] and
-   * `hashKeys` is [a, b], the result will be [(0), (1, 2)].
-   */
-  private def createHashKeyPositions(
-      requiredClusterKeys: Seq[Expression],
-      hashKeys: Seq[Expression]): Seq[mutable.BitSet] = {
-    val distKeyToPos = mutable.Map.empty[Expression, mutable.BitSet]
-    requiredClusterKeys.zipWithIndex.foreach { case (distKey, distKeyPos) =>
-      distKeyToPos.getOrElseUpdate(distKey.canonicalized, mutable.BitSet.empty).add(distKeyPos)
-    }
-
-    hashKeys.map(k => distKeyToPos(k.canonicalized))
-  }
 }
 
 case class ShuffleSpecCollection(specs: Seq[ShuffleSpec]) extends ShuffleSpec {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 252dd5bad30b4..42979a68d8578 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -396,6 +396,17 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION =
+    buildConf("spark.sql.requireAllClusterKeysForCoPartition")
+      .internal()
+      .doc("When true, the planner requires all the clustering keys as the hash partition keys " +
+        "of the children, to eliminate the shuffles for the operator that needs its children to " +
+        "be co-partitioned, such as JOIN node. This is to avoid data skews which can lead to " +
+        "significant performance regression if shuffles are eliminated.")
+      .version("3.3.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val RADIX_SORT_ENABLED = buildConf("spark.sql.sort.enableRadixSort")
     .internal()
     .doc("When true, enable use of radix sort when possible. Radix sort is much faster but " +
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala
index d4d73b363e23d..74ec949fe4470 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala
@@ -18,11 +18,12 @@
 package org.apache.spark.sql.catalyst
 
 import org.apache.spark.SparkFunSuite
-/* Implicit conversions */
 import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.plans.physical._
+import org.apache.spark.sql.internal.SQLConf
 
-class ShuffleSpecSuite extends SparkFunSuite {
+class ShuffleSpecSuite extends SparkFunSuite with SQLHelper {
   protected def checkCompatible(
       left: ShuffleSpec,
       right: ShuffleSpec,
@@ -349,12 +350,22 @@ class ShuffleSpecSuite extends SparkFunSuite {
 
   test("canCreatePartitioning") {
     val distribution = ClusteredDistribution(Seq($"a", $"b"))
-    assert(HashShuffleSpec(HashPartitioning(Seq($"a"), 10), distribution).canCreatePartitioning)
+    withSQLConf(SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION.key -> "false") {
+      assert(HashShuffleSpec(HashPartitioning(Seq($"a"), 10), distribution).canCreatePartitioning)
+    }
+    withSQLConf(SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION.key -> "true") {
+      assert(!HashShuffleSpec(HashPartitioning(Seq($"a"), 10), distribution)
+        .canCreatePartitioning)
+      assert(HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 10), distribution)
+        .canCreatePartitioning)
+    }
     assert(SinglePartitionShuffleSpec.canCreatePartitioning)
-    assert(ShuffleSpecCollection(Seq(
+    withSQLConf(SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION.key -> "false") {
+      assert(ShuffleSpecCollection(Seq(
         HashShuffleSpec(HashPartitioning(Seq($"a"), 10), distribution),
         HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 10), distribution)))
-      .canCreatePartitioning)
+        .canCreatePartitioning)
+    }
     assert(!RangeShuffleSpec(10, distribution).canCreatePartitioning)
   }
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt
index 16afa38901107..d61798f6ad06e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt
@@ -1,50 +1,53 @@
 == Physical Plan ==
-TakeOrderedAndProject (46)
-+- * HashAggregate (45)
-   +- Exchange (44)
-      +- * HashAggregate (43)
-         +- * Project (42)
-            +- * SortMergeJoin Inner (41)
-               :- * Project (32)
-               :  +- * SortMergeJoin Inner (31)
-               :     :- * Sort (22)
-               :     :  +- * Project (21)
-               :     :     +- * SortMergeJoin Inner (20)
-               :     :        :- * Sort (14)
-               :     :        :  +- Exchange (13)
-               :     :        :     +- * Project (12)
-               :     :        :        +- * BroadcastHashJoin Inner BuildRight (11)
-               :     :        :           :- * Project (6)
-               :     :        :           :  +- * BroadcastHashJoin Inner BuildRight (5)
-               :     :        :           :     :- * Filter (3)
-               :     :        :           :     :  +- * ColumnarToRow (2)
-               :     :        :           :     :     +- Scan parquet default.store_sales (1)
-               :     :        :           :     +- ReusedExchange (4)
-               :     :        :           +- BroadcastExchange (10)
-               :     :        :              +- * Filter (9)
-               :     :        :                 +- * ColumnarToRow (8)
-               :     :        :                    +- Scan parquet default.store (7)
-               :     :        +- * Sort (19)
-               :     :           +- Exchange (18)
-               :     :              +- * Filter (17)
-               :     :                 +- * ColumnarToRow (16)
-               :     :                    +- Scan parquet default.item (15)
-               :     +- * Sort (30)
-               :        +- Exchange (29)
-               :           +- * Project (28)
-               :              +- * BroadcastHashJoin Inner BuildRight (27)
-               :                 :- * Filter (25)
-               :                 :  +- * ColumnarToRow (24)
-               :                 :     +- Scan parquet default.store_returns (23)
-               :                 +- ReusedExchange (26)
-               +- * Sort (40)
-                  +- Exchange (39)
-                     +- * Project (38)
-                        +- * BroadcastHashJoin Inner BuildRight (37)
-                           :- * Filter (35)
-                           :  +- * ColumnarToRow (34)
-                           :     +- Scan parquet default.catalog_sales (33)
-                           +- ReusedExchange (36)
+TakeOrderedAndProject (49)
++- * HashAggregate (48)
+   +- Exchange (47)
+      +- * HashAggregate (46)
+         +- * Project (45)
+            +- * SortMergeJoin Inner (44)
+               :- * Sort (35)
+               :  +- Exchange (34)
+               :     +- * Project (33)
+               :        +- * SortMergeJoin Inner (32)
+               :           :- * Sort (23)
+               :           :  +- Exchange (22)
+               :           :     +- * Project (21)
+               :           :        +- * SortMergeJoin Inner (20)
+               :           :           :- * Sort (14)
+               :           :           :  +- Exchange (13)
+               :           :           :     +- * Project (12)
+               :           :           :        +- * BroadcastHashJoin Inner BuildRight (11)
+               :           :           :           :- * Project (6)
+               :           :           :           :  +- * BroadcastHashJoin Inner BuildRight (5)
+               :           :           :           :     :- * Filter (3)
+               :           :           :           :     :  +- * ColumnarToRow (2)
+               :           :           :           :     :     +- Scan parquet default.store_sales (1)
+               :           :           :           :     +- ReusedExchange (4)
+               :           :           :           +- BroadcastExchange (10)
+               :           :           :              +- * Filter (9)
+               :           :           :                 +- * ColumnarToRow (8)
+               :           :           :                    +- Scan parquet default.store (7)
+               :           :           +- * Sort (19)
+               :           :              +- Exchange (18)
+               :           :                 +- * Filter (17)
+               :           :                    +- * ColumnarToRow (16)
+               :           :                       +- Scan parquet default.item (15)
+               :           +- * Sort (31)
+               :              +- Exchange (30)
+               :                 +- * Project (29)
+               :                    +- * BroadcastHashJoin Inner BuildRight (28)
+               :                       :- * Filter (26)
+               :                       :  +- * ColumnarToRow (25)
+               :                       :     +- Scan parquet default.store_returns (24)
+               :                       +- ReusedExchange (27)
+               +- * Sort (43)
+                  +- Exchange (42)
+                     +- * Project (41)
+                        +- * BroadcastHashJoin Inner BuildRight (40)
+                           :- * Filter (38)
+                           :  +- * ColumnarToRow (37)
+                           :     +- Scan parquet default.catalog_sales (36)
+                           +- ReusedExchange (39)
 
 
 (1) Scan parquet default.store_sales
@@ -62,7 +65,7 @@ Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s
 Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6]
 Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3))
 
-(4) ReusedExchange [Reuses operator id: 51]
+(4) ReusedExchange [Reuses operator id: 54]
 Output [1]: [d_date_sk#8]
 
 (5) BroadcastHashJoin [codegen id : 3]
@@ -140,182 +143,194 @@ Join condition: None
 Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10, i_item_id#14, i_item_desc#15]
 Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10, i_item_sk#13, i_item_id#14, i_item_desc#15]
 
-(22) Sort [codegen id : 7]
+(22) Exchange
+Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10, i_item_id#14, i_item_desc#15]
+Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [id=#17]
+
+(23) Sort [codegen id : 8]
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10, i_item_id#14, i_item_desc#15]
 Arguments: [ss_customer_sk#2 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0
 
-(23) Scan parquet default.store_returns
-Output [5]: [sr_item_sk#17, sr_customer_sk#18, sr_ticket_number#19, sr_return_quantity#20, sr_returned_date_sk#21]
+(24) Scan parquet default.store_returns
+Output [5]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21, sr_returned_date_sk#22]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(sr_returned_date_sk#21), dynamicpruningexpression(sr_returned_date_sk#21 IN dynamicpruning#22)]
+PartitionFilters: [isnotnull(sr_returned_date_sk#22), dynamicpruningexpression(sr_returned_date_sk#22 IN dynamicpruning#23)]
 PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)]
 ReadSchema: struct<sr_item_sk:int,sr_customer_sk:int,sr_ticket_number:int,sr_return_quantity:int>
 
-(24) ColumnarToRow [codegen id : 9]
-Input [5]: [sr_item_sk#17, sr_customer_sk#18, sr_ticket_number#19, sr_return_quantity#20, sr_returned_date_sk#21]
+(25) ColumnarToRow [codegen id : 10]
+Input [5]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21, sr_returned_date_sk#22]
 
-(25) Filter [codegen id : 9]
-Input [5]: [sr_item_sk#17, sr_customer_sk#18, sr_ticket_number#19, sr_return_quantity#20, sr_returned_date_sk#21]
-Condition : ((isnotnull(sr_customer_sk#18) AND isnotnull(sr_item_sk#17)) AND isnotnull(sr_ticket_number#19))
+(26) Filter [codegen id : 10]
+Input [5]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21, sr_returned_date_sk#22]
+Condition : ((isnotnull(sr_customer_sk#19) AND isnotnull(sr_item_sk#18)) AND isnotnull(sr_ticket_number#20))
 
-(26) ReusedExchange [Reuses operator id: 56]
-Output [1]: [d_date_sk#23]
+(27) ReusedExchange [Reuses operator id: 59]
+Output [1]: [d_date_sk#24]
 
-(27) BroadcastHashJoin [codegen id : 9]
-Left keys [1]: [sr_returned_date_sk#21]
-Right keys [1]: [d_date_sk#23]
+(28) BroadcastHashJoin [codegen id : 10]
+Left keys [1]: [sr_returned_date_sk#22]
+Right keys [1]: [d_date_sk#24]
 Join condition: None
 
-(28) Project [codegen id : 9]
-Output [4]: [sr_item_sk#17, sr_customer_sk#18, sr_ticket_number#19, sr_return_quantity#20]
-Input [6]: [sr_item_sk#17, sr_customer_sk#18, sr_ticket_number#19, sr_return_quantity#20, sr_returned_date_sk#21, d_date_sk#23]
+(29) Project [codegen id : 10]
+Output [4]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21]
+Input [6]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21, sr_returned_date_sk#22, d_date_sk#24]
 
-(29) Exchange
-Input [4]: [sr_item_sk#17, sr_customer_sk#18, sr_ticket_number#19, sr_return_quantity#20]
-Arguments: hashpartitioning(sr_item_sk#17, 5), ENSURE_REQUIREMENTS, [id=#24]
+(30) Exchange
+Input [4]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21]
+Arguments: hashpartitioning(sr_customer_sk#19, sr_item_sk#18, sr_ticket_number#20, 5), ENSURE_REQUIREMENTS, [id=#25]
 
-(30) Sort [codegen id : 10]
-Input [4]: [sr_item_sk#17, sr_customer_sk#18, sr_ticket_number#19, sr_return_quantity#20]
-Arguments: [sr_customer_sk#18 ASC NULLS FIRST, sr_item_sk#17 ASC NULLS FIRST, sr_ticket_number#19 ASC NULLS FIRST], false, 0
+(31) Sort [codegen id : 11]
+Input [4]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21]
+Arguments: [sr_customer_sk#19 ASC NULLS FIRST, sr_item_sk#18 ASC NULLS FIRST, sr_ticket_number#20 ASC NULLS FIRST], false, 0
 
-(31) SortMergeJoin [codegen id : 11]
+(32) SortMergeJoin [codegen id : 12]
 Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4]
-Right keys [3]: [sr_customer_sk#18, sr_item_sk#17, sr_ticket_number#19]
+Right keys [3]: [sr_customer_sk#19, sr_item_sk#18, sr_ticket_number#20]
 Join condition: None
 
-(32) Project [codegen id : 11]
-Output [7]: [ss_quantity#5, s_state#10, i_item_id#14, i_item_desc#15, sr_item_sk#17, sr_customer_sk#18, sr_return_quantity#20]
-Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10, i_item_id#14, i_item_desc#15, sr_item_sk#17, sr_customer_sk#18, sr_ticket_number#19, sr_return_quantity#20]
+(33) Project [codegen id : 12]
+Output [7]: [ss_quantity#5, s_state#10, i_item_id#14, i_item_desc#15, sr_item_sk#18, sr_customer_sk#19, sr_return_quantity#21]
+Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10, i_item_id#14, i_item_desc#15, sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21]
+
+(34) Exchange
+Input [7]: [ss_quantity#5, s_state#10, i_item_id#14, i_item_desc#15, sr_item_sk#18, sr_customer_sk#19, sr_return_quantity#21]
+Arguments: hashpartitioning(sr_customer_sk#19, sr_item_sk#18, 5), ENSURE_REQUIREMENTS, [id=#26]
 
-(33) Scan parquet default.catalog_sales
-Output [4]: [cs_bill_customer_sk#25, cs_item_sk#26, cs_quantity#27, cs_sold_date_sk#28]
+(35) Sort [codegen id : 13]
+Input [7]: [ss_quantity#5, s_state#10, i_item_id#14, i_item_desc#15, sr_item_sk#18, sr_customer_sk#19, sr_return_quantity#21]
+Arguments: [sr_customer_sk#19 ASC NULLS FIRST, sr_item_sk#18 ASC NULLS FIRST], false, 0
+
+(36) Scan parquet default.catalog_sales
+Output [4]: [cs_bill_customer_sk#27, cs_item_sk#28, cs_quantity#29, cs_sold_date_sk#30]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#28), dynamicpruningexpression(cs_sold_date_sk#28 IN dynamicpruning#22)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#30), dynamicpruningexpression(cs_sold_date_sk#30 IN dynamicpruning#23)]
 PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)]
 ReadSchema: struct<cs_bill_customer_sk:int,cs_item_sk:int,cs_quantity:int>
 
-(34) ColumnarToRow [codegen id : 13]
-Input [4]: [cs_bill_customer_sk#25, cs_item_sk#26, cs_quantity#27, cs_sold_date_sk#28]
+(37) ColumnarToRow [codegen id : 15]
+Input [4]: [cs_bill_customer_sk#27, cs_item_sk#28, cs_quantity#29, cs_sold_date_sk#30]
 
-(35) Filter [codegen id : 13]
-Input [4]: [cs_bill_customer_sk#25, cs_item_sk#26, cs_quantity#27, cs_sold_date_sk#28]
-Condition : (isnotnull(cs_bill_customer_sk#25) AND isnotnull(cs_item_sk#26))
+(38) Filter [codegen id : 15]
+Input [4]: [cs_bill_customer_sk#27, cs_item_sk#28, cs_quantity#29, cs_sold_date_sk#30]
+Condition : (isnotnull(cs_bill_customer_sk#27) AND isnotnull(cs_item_sk#28))
 
-(36) ReusedExchange [Reuses operator id: 56]
-Output [1]: [d_date_sk#29]
+(39) ReusedExchange [Reuses operator id: 59]
+Output [1]: [d_date_sk#31]
 
-(37) BroadcastHashJoin [codegen id : 13]
-Left keys [1]: [cs_sold_date_sk#28]
-Right keys [1]: [d_date_sk#29]
+(40) BroadcastHashJoin [codegen id : 15]
+Left keys [1]: [cs_sold_date_sk#30]
+Right keys [1]: [d_date_sk#31]
 Join condition: None
 
-(38) Project [codegen id : 13]
-Output [3]: [cs_bill_customer_sk#25, cs_item_sk#26, cs_quantity#27]
-Input [5]: [cs_bill_customer_sk#25, cs_item_sk#26, cs_quantity#27, cs_sold_date_sk#28, d_date_sk#29]
+(41) Project [codegen id : 15]
+Output [3]: [cs_bill_customer_sk#27, cs_item_sk#28, cs_quantity#29]
+Input [5]: [cs_bill_customer_sk#27, cs_item_sk#28, cs_quantity#29, cs_sold_date_sk#30, d_date_sk#31]
 
-(39) Exchange
-Input [3]: [cs_bill_customer_sk#25, cs_item_sk#26, cs_quantity#27]
-Arguments: hashpartitioning(cs_item_sk#26, 5), ENSURE_REQUIREMENTS, [id=#30]
+(42) Exchange
+Input [3]: [cs_bill_customer_sk#27, cs_item_sk#28, cs_quantity#29]
+Arguments: hashpartitioning(cs_bill_customer_sk#27, cs_item_sk#28, 5), ENSURE_REQUIREMENTS, [id=#32]
 
-(40) Sort [codegen id : 14]
-Input [3]: [cs_bill_customer_sk#25, cs_item_sk#26, cs_quantity#27]
-Arguments: [cs_bill_customer_sk#25 ASC NULLS FIRST, cs_item_sk#26 ASC NULLS FIRST], false, 0
+(43) Sort [codegen id : 16]
+Input [3]: [cs_bill_customer_sk#27, cs_item_sk#28, cs_quantity#29]
+Arguments: [cs_bill_customer_sk#27 ASC NULLS FIRST, cs_item_sk#28 ASC NULLS FIRST], false, 0
 
-(41) SortMergeJoin [codegen id : 15]
-Left keys [2]: [sr_customer_sk#18, sr_item_sk#17]
-Right keys [2]: [cs_bill_customer_sk#25, cs_item_sk#26]
+(44) SortMergeJoin [codegen id : 17]
+Left keys [2]: [sr_customer_sk#19, sr_item_sk#18]
+Right keys [2]: [cs_bill_customer_sk#27, cs_item_sk#28]
 Join condition: None
 
-(42) Project [codegen id : 15]
-Output [6]: [ss_quantity#5, sr_return_quantity#20, cs_quantity#27, s_state#10, i_item_id#14, i_item_desc#15]
-Input [10]: [ss_quantity#5, s_state#10, i_item_id#14, i_item_desc#15, sr_item_sk#17, sr_customer_sk#18, sr_return_quantity#20, cs_bill_customer_sk#25, cs_item_sk#26, cs_quantity#27]
+(45) Project [codegen id : 17]
+Output [6]: [ss_quantity#5, sr_return_quantity#21, cs_quantity#29, s_state#10, i_item_id#14, i_item_desc#15]
+Input [10]: [ss_quantity#5, s_state#10, i_item_id#14, i_item_desc#15, sr_item_sk#18, sr_customer_sk#19, sr_return_quantity#21, cs_bill_customer_sk#27, cs_item_sk#28, cs_quantity#29]
 
-(43) HashAggregate [codegen id : 15]
-Input [6]: [ss_quantity#5, sr_return_quantity#20, cs_quantity#27, s_state#10, i_item_id#14, i_item_desc#15]
+(46) HashAggregate [codegen id : 17]
+Input [6]: [ss_quantity#5, sr_return_quantity#21, cs_quantity#29, s_state#10, i_item_id#14, i_item_desc#15]
 Keys [3]: [i_item_id#14, i_item_desc#15, s_state#10]
-Functions [9]: [partial_count(ss_quantity#5), partial_avg(ss_quantity#5), partial_stddev_samp(cast(ss_quantity#5 as double)), partial_count(sr_return_quantity#20), partial_avg(sr_return_quantity#20), partial_stddev_samp(cast(sr_return_quantity#20 as double)), partial_count(cs_quantity#27), partial_avg(cs_quantity#27), partial_stddev_samp(cast(cs_quantity#27 as double))]
-Aggregate Attributes [18]: [count#31, sum#32, count#33, n#34, avg#35, m2#36, count#37, sum#38, count#39, n#40, avg#41, m2#42, count#43, sum#44, count#45, n#46, avg#47, m2#48]
-Results [21]: [i_item_id#14, i_item_desc#15, s_state#10, count#49, sum#50, count#51, n#52, avg#53, m2#54, count#55, sum#56, count#57, n#58, avg#59, m2#60, count#61, sum#62, count#63, n#64, avg#65, m2#66]
+Functions [9]: [partial_count(ss_quantity#5), partial_avg(ss_quantity#5), partial_stddev_samp(cast(ss_quantity#5 as double)), partial_count(sr_return_quantity#21), partial_avg(sr_return_quantity#21), partial_stddev_samp(cast(sr_return_quantity#21 as double)), partial_count(cs_quantity#29), partial_avg(cs_quantity#29), partial_stddev_samp(cast(cs_quantity#29 as double))]
+Aggregate Attributes [18]: [count#33, sum#34, count#35, n#36, avg#37, m2#38, count#39, sum#40, count#41, n#42, avg#43, m2#44, count#45, sum#46, count#47, n#48, avg#49, m2#50]
+Results [21]: [i_item_id#14, i_item_desc#15, s_state#10, count#51, sum#52, count#53, n#54, avg#55, m2#56, count#57, sum#58, count#59, n#60, avg#61, m2#62, count#63, sum#64, count#65, n#66, avg#67, m2#68]
 
-(44) Exchange
-Input [21]: [i_item_id#14, i_item_desc#15, s_state#10, count#49, sum#50, count#51, n#52, avg#53, m2#54, count#55, sum#56, count#57, n#58, avg#59, m2#60, count#61, sum#62, count#63, n#64, avg#65, m2#66]
-Arguments: hashpartitioning(i_item_id#14, i_item_desc#15, s_state#10, 5), ENSURE_REQUIREMENTS, [id=#67]
+(47) Exchange
+Input [21]: [i_item_id#14, i_item_desc#15, s_state#10, count#51, sum#52, count#53, n#54, avg#55, m2#56, count#57, sum#58, count#59, n#60, avg#61, m2#62, count#63, sum#64, count#65, n#66, avg#67, m2#68]
+Arguments: hashpartitioning(i_item_id#14, i_item_desc#15, s_state#10, 5), ENSURE_REQUIREMENTS, [id=#69]
 
-(45) HashAggregate [codegen id : 16]
-Input [21]: [i_item_id#14, i_item_desc#15, s_state#10, count#49, sum#50, count#51, n#52, avg#53, m2#54, count#55, sum#56, count#57, n#58, avg#59, m2#60, count#61, sum#62, count#63, n#64, avg#65, m2#66]
+(48) HashAggregate [codegen id : 18]
+Input [21]: [i_item_id#14, i_item_desc#15, s_state#10, count#51, sum#52, count#53, n#54, avg#55, m2#56, count#57, sum#58, count#59, n#60, avg#61, m2#62, count#63, sum#64, count#65, n#66, avg#67, m2#68]
 Keys [3]: [i_item_id#14, i_item_desc#15, s_state#10]
-Functions [9]: [count(ss_quantity#5), avg(ss_quantity#5), stddev_samp(cast(ss_quantity#5 as double)), count(sr_return_quantity#20), avg(sr_return_quantity#20), stddev_samp(cast(sr_return_quantity#20 as double)), count(cs_quantity#27), avg(cs_quantity#27), stddev_samp(cast(cs_quantity#27 as double))]
-Aggregate Attributes [9]: [count(ss_quantity#5)#68, avg(ss_quantity#5)#69, stddev_samp(cast(ss_quantity#5 as double))#70, count(sr_return_quantity#20)#71, avg(sr_return_quantity#20)#72, stddev_samp(cast(sr_return_quantity#20 as double))#73, count(cs_quantity#27)#74, avg(cs_quantity#27)#75, stddev_samp(cast(cs_quantity#27 as double))#76]
-Results [15]: [i_item_id#14, i_item_desc#15, s_state#10, count(ss_quantity#5)#68 AS store_sales_quantitycount#77, avg(ss_quantity#5)#69 AS store_sales_quantityave#78, stddev_samp(cast(ss_quantity#5 as double))#70 AS store_sales_quantitystdev#79, (stddev_samp(cast(ss_quantity#5 as double))#70 / avg(ss_quantity#5)#69) AS store_sales_quantitycov#80, count(sr_return_quantity#20)#71 AS as_store_returns_quantitycount#81, avg(sr_return_quantity#20)#72 AS as_store_returns_quantityave#82, stddev_samp(cast(sr_return_quantity#20 as double))#73 AS as_store_returns_quantitystdev#83, (stddev_samp(cast(sr_return_quantity#20 as double))#73 / avg(sr_return_quantity#20)#72) AS store_returns_quantitycov#84, count(cs_quantity#27)#74 AS catalog_sales_quantitycount#85, avg(cs_quantity#27)#75 AS catalog_sales_quantityave#86, (stddev_samp(cast(cs_quantity#27 as double))#76 / avg(cs_quantity#27)#75) AS catalog_sales_quantitystdev#87, (stddev_samp(cast(cs_quantity#27 as double))#76 / avg(cs_quantity#27)#75) AS catalog_sales_quantitycov#88]
+Functions [9]: [count(ss_quantity#5), avg(ss_quantity#5), stddev_samp(cast(ss_quantity#5 as double)), count(sr_return_quantity#21), avg(sr_return_quantity#21), stddev_samp(cast(sr_return_quantity#21 as double)), count(cs_quantity#29), avg(cs_quantity#29), stddev_samp(cast(cs_quantity#29 as double))]
+Aggregate Attributes [9]: [count(ss_quantity#5)#70, avg(ss_quantity#5)#71, stddev_samp(cast(ss_quantity#5 as double))#72, count(sr_return_quantity#21)#73, avg(sr_return_quantity#21)#74, stddev_samp(cast(sr_return_quantity#21 as double))#75, count(cs_quantity#29)#76, avg(cs_quantity#29)#77, stddev_samp(cast(cs_quantity#29 as double))#78]
+Results [15]: [i_item_id#14, i_item_desc#15, s_state#10, count(ss_quantity#5)#70 AS store_sales_quantitycount#79, avg(ss_quantity#5)#71 AS store_sales_quantityave#80, stddev_samp(cast(ss_quantity#5 as double))#72 AS store_sales_quantitystdev#81, (stddev_samp(cast(ss_quantity#5 as double))#72 / avg(ss_quantity#5)#71) AS store_sales_quantitycov#82, count(sr_return_quantity#21)#73 AS as_store_returns_quantitycount#83, avg(sr_return_quantity#21)#74 AS as_store_returns_quantityave#84, stddev_samp(cast(sr_return_quantity#21 as double))#75 AS as_store_returns_quantitystdev#85, (stddev_samp(cast(sr_return_quantity#21 as double))#75 / avg(sr_return_quantity#21)#74) AS store_returns_quantitycov#86, count(cs_quantity#29)#76 AS catalog_sales_quantitycount#87, avg(cs_quantity#29)#77 AS catalog_sales_quantityave#88, (stddev_samp(cast(cs_quantity#29 as double))#78 / avg(cs_quantity#29)#77) AS catalog_sales_quantitystdev#89, (stddev_samp(cast(cs_quantity#29 as double))#78 / avg(cs_quantity#29)#77) AS catalog_sales_quantitycov#90]
 
-(46) TakeOrderedAndProject
-Input [15]: [i_item_id#14, i_item_desc#15, s_state#10, store_sales_quantitycount#77, store_sales_quantityave#78, store_sales_quantitystdev#79, store_sales_quantitycov#80, as_store_returns_quantitycount#81, as_store_returns_quantityave#82, as_store_returns_quantitystdev#83, store_returns_quantitycov#84, catalog_sales_quantitycount#85, catalog_sales_quantityave#86, catalog_sales_quantitystdev#87, catalog_sales_quantitycov#88]
-Arguments: 100, [i_item_id#14 ASC NULLS FIRST, i_item_desc#15 ASC NULLS FIRST, s_state#10 ASC NULLS FIRST], [i_item_id#14, i_item_desc#15, s_state#10, store_sales_quantitycount#77, store_sales_quantityave#78, store_sales_quantitystdev#79, store_sales_quantitycov#80, as_store_returns_quantitycount#81, as_store_returns_quantityave#82, as_store_returns_quantitystdev#83, store_returns_quantitycov#84, catalog_sales_quantitycount#85, catalog_sales_quantityave#86, catalog_sales_quantitystdev#87, catalog_sales_quantitycov#88]
+(49) TakeOrderedAndProject
+Input [15]: [i_item_id#14, i_item_desc#15, s_state#10, store_sales_quantitycount#79, store_sales_quantityave#80, store_sales_quantitystdev#81, store_sales_quantitycov#82, as_store_returns_quantitycount#83, as_store_returns_quantityave#84, as_store_returns_quantitystdev#85, store_returns_quantitycov#86, catalog_sales_quantitycount#87, catalog_sales_quantityave#88, catalog_sales_quantitystdev#89, catalog_sales_quantitycov#90]
+Arguments: 100, [i_item_id#14 ASC NULLS FIRST, i_item_desc#15 ASC NULLS FIRST, s_state#10 ASC NULLS FIRST], [i_item_id#14, i_item_desc#15, s_state#10, store_sales_quantitycount#79, store_sales_quantityave#80, store_sales_quantitystdev#81, store_sales_quantitycov#82, as_store_returns_quantitycount#83, as_store_returns_quantityave#84, as_store_returns_quantitystdev#85, store_returns_quantitycov#86, catalog_sales_quantitycount#87, catalog_sales_quantityave#88, catalog_sales_quantitystdev#89, catalog_sales_quantitycov#90]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7
-BroadcastExchange (51)
-+- * Project (50)
-   +- * Filter (49)
-      +- * ColumnarToRow (48)
-         +- Scan parquet default.date_dim (47)
+BroadcastExchange (54)
++- * Project (53)
+   +- * Filter (52)
+      +- * ColumnarToRow (51)
+         +- Scan parquet default.date_dim (50)
 
 
-(47) Scan parquet default.date_dim
-Output [2]: [d_date_sk#8, d_quarter_name#89]
+(50) Scan parquet default.date_dim
+Output [2]: [d_date_sk#8, d_quarter_name#91]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_quarter_name), EqualTo(d_quarter_name,2001Q1), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_quarter_name:string>
 
-(48) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#8, d_quarter_name#89]
+(51) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#8, d_quarter_name#91]
 
-(49) Filter [codegen id : 1]
-Input [2]: [d_date_sk#8, d_quarter_name#89]
-Condition : ((isnotnull(d_quarter_name#89) AND (d_quarter_name#89 = 2001Q1)) AND isnotnull(d_date_sk#8))
+(52) Filter [codegen id : 1]
+Input [2]: [d_date_sk#8, d_quarter_name#91]
+Condition : ((isnotnull(d_quarter_name#91) AND (d_quarter_name#91 = 2001Q1)) AND isnotnull(d_date_sk#8))
 
-(50) Project [codegen id : 1]
+(53) Project [codegen id : 1]
 Output [1]: [d_date_sk#8]
-Input [2]: [d_date_sk#8, d_quarter_name#89]
+Input [2]: [d_date_sk#8, d_quarter_name#91]
 
-(51) BroadcastExchange
+(54) BroadcastExchange
 Input [1]: [d_date_sk#8]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#90]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#92]
 
-Subquery:2 Hosting operator id = 23 Hosting Expression = sr_returned_date_sk#21 IN dynamicpruning#22
-BroadcastExchange (56)
-+- * Project (55)
-   +- * Filter (54)
-      +- * ColumnarToRow (53)
-         +- Scan parquet default.date_dim (52)
+Subquery:2 Hosting operator id = 24 Hosting Expression = sr_returned_date_sk#22 IN dynamicpruning#23
+BroadcastExchange (59)
++- * Project (58)
+   +- * Filter (57)
+      +- * ColumnarToRow (56)
+         +- Scan parquet default.date_dim (55)
 
 
-(52) Scan parquet default.date_dim
-Output [2]: [d_date_sk#23, d_quarter_name#91]
+(55) Scan parquet default.date_dim
+Output [2]: [d_date_sk#24, d_quarter_name#93]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_quarter_name:string>
 
-(53) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#23, d_quarter_name#91]
+(56) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#24, d_quarter_name#93]
 
-(54) Filter [codegen id : 1]
-Input [2]: [d_date_sk#23, d_quarter_name#91]
-Condition : (d_quarter_name#91 IN (2001Q1,2001Q2,2001Q3) AND isnotnull(d_date_sk#23))
+(57) Filter [codegen id : 1]
+Input [2]: [d_date_sk#24, d_quarter_name#93]
+Condition : (d_quarter_name#93 IN (2001Q1,2001Q2,2001Q3) AND isnotnull(d_date_sk#24))
 
-(55) Project [codegen id : 1]
-Output [1]: [d_date_sk#23]
-Input [2]: [d_date_sk#23, d_quarter_name#91]
+(58) Project [codegen id : 1]
+Output [1]: [d_date_sk#24]
+Input [2]: [d_date_sk#24, d_quarter_name#93]
 
-(56) BroadcastExchange
-Input [1]: [d_date_sk#23]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#92]
+(59) BroadcastExchange
+Input [1]: [d_date_sk#24]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#94]
 
-Subquery:3 Hosting operator id = 33 Hosting Expression = cs_sold_date_sk#28 IN dynamicpruning#22
+Subquery:3 Hosting operator id = 36 Hosting Expression = cs_sold_date_sk#30 IN dynamicpruning#23
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt
index b00c5da2ef7d0..06c8f7b3912e5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt
@@ -1,90 +1,97 @@
 TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,store_sales_quantityave,store_sales_quantitystdev,store_sales_quantitycov,as_store_returns_quantitycount,as_store_returns_quantityave,as_store_returns_quantitystdev,store_returns_quantitycov,catalog_sales_quantitycount,catalog_sales_quantityave,catalog_sales_quantitystdev,catalog_sales_quantitycov]
-  WholeStageCodegen (16)
+  WholeStageCodegen (18)
     HashAggregate [i_item_id,i_item_desc,s_state,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2] [count(ss_quantity),avg(ss_quantity),stddev_samp(cast(ss_quantity as double)),count(sr_return_quantity),avg(sr_return_quantity),stddev_samp(cast(sr_return_quantity as double)),count(cs_quantity),avg(cs_quantity),stddev_samp(cast(cs_quantity as double)),store_sales_quantitycount,store_sales_quantityave,store_sales_quantitystdev,store_sales_quantitycov,as_store_returns_quantitycount,as_store_returns_quantityave,as_store_returns_quantitystdev,store_returns_quantitycov,catalog_sales_quantitycount,catalog_sales_quantityave,catalog_sales_quantitystdev,catalog_sales_quantitycov,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2]
       InputAdapter
         Exchange [i_item_id,i_item_desc,s_state] #1
-          WholeStageCodegen (15)
+          WholeStageCodegen (17)
             HashAggregate [i_item_id,i_item_desc,s_state,ss_quantity,sr_return_quantity,cs_quantity] [count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2]
               Project [ss_quantity,sr_return_quantity,cs_quantity,s_state,i_item_id,i_item_desc]
                 SortMergeJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk]
                   InputAdapter
-                    WholeStageCodegen (11)
-                      Project [ss_quantity,s_state,i_item_id,i_item_desc,sr_item_sk,sr_customer_sk,sr_return_quantity]
-                        SortMergeJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number]
-                          InputAdapter
-                            WholeStageCodegen (7)
-                              Sort [ss_customer_sk,ss_item_sk,ss_ticket_number]
-                                Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,s_state,i_item_id,i_item_desc]
-                                  SortMergeJoin [ss_item_sk,i_item_sk]
-                                    InputAdapter
-                                      WholeStageCodegen (4)
-                                        Sort [ss_item_sk]
-                                          InputAdapter
-                                            Exchange [ss_item_sk] #2
-                                              WholeStageCodegen (3)
-                                                Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,s_state]
-                                                  BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                                    Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity]
-                                                      BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                        Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_store_sk]
-                                                          ColumnarToRow
-                                                            InputAdapter
-                                                              Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk]
-                                                                SubqueryBroadcast [d_date_sk] #1
-                                                                  BroadcastExchange #3
-                                                                    WholeStageCodegen (1)
-                                                                      Project [d_date_sk]
-                                                                        Filter [d_quarter_name,d_date_sk]
+                    WholeStageCodegen (13)
+                      Sort [sr_customer_sk,sr_item_sk]
+                        InputAdapter
+                          Exchange [sr_customer_sk,sr_item_sk] #2
+                            WholeStageCodegen (12)
+                              Project [ss_quantity,s_state,i_item_id,i_item_desc,sr_item_sk,sr_customer_sk,sr_return_quantity]
+                                SortMergeJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number]
+                                  InputAdapter
+                                    WholeStageCodegen (8)
+                                      Sort [ss_customer_sk,ss_item_sk,ss_ticket_number]
+                                        InputAdapter
+                                          Exchange [ss_customer_sk,ss_item_sk,ss_ticket_number] #3
+                                            WholeStageCodegen (7)
+                                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,s_state,i_item_id,i_item_desc]
+                                                SortMergeJoin [ss_item_sk,i_item_sk]
+                                                  InputAdapter
+                                                    WholeStageCodegen (4)
+                                                      Sort [ss_item_sk]
+                                                        InputAdapter
+                                                          Exchange [ss_item_sk] #4
+                                                            WholeStageCodegen (3)
+                                                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,s_state]
+                                                                BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                                  Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity]
+                                                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                      Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_store_sk]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk]
+                                                                              SubqueryBroadcast [d_date_sk] #1
+                                                                                BroadcastExchange #5
+                                                                                  WholeStageCodegen (1)
+                                                                                    Project [d_date_sk]
+                                                                                      Filter [d_quarter_name,d_date_sk]
+                                                                                        ColumnarToRow
+                                                                                          InputAdapter
+                                                                                            Scan parquet default.date_dim [d_date_sk,d_quarter_name]
+                                                                      InputAdapter
+                                                                        ReusedExchange [d_date_sk] #5
+                                                                  InputAdapter
+                                                                    BroadcastExchange #6
+                                                                      WholeStageCodegen (2)
+                                                                        Filter [s_store_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.date_dim [d_date_sk,d_quarter_name]
+                                                                              Scan parquet default.store [s_store_sk,s_state]
+                                                  InputAdapter
+                                                    WholeStageCodegen (6)
+                                                      Sort [i_item_sk]
                                                         InputAdapter
-                                                          ReusedExchange [d_date_sk] #3
-                                                    InputAdapter
-                                                      BroadcastExchange #4
-                                                        WholeStageCodegen (2)
-                                                          Filter [s_store_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.store [s_store_sk,s_state]
-                                    InputAdapter
-                                      WholeStageCodegen (6)
-                                        Sort [i_item_sk]
-                                          InputAdapter
-                                            Exchange [i_item_sk] #5
-                                              WholeStageCodegen (5)
-                                                Filter [i_item_sk]
-                                                  ColumnarToRow
-                                                    InputAdapter
-                                                      Scan parquet default.item [i_item_sk,i_item_id,i_item_desc]
-                          InputAdapter
-                            WholeStageCodegen (10)
-                              Sort [sr_customer_sk,sr_item_sk,sr_ticket_number]
-                                InputAdapter
-                                  Exchange [sr_item_sk] #6
-                                    WholeStageCodegen (9)
-                                      Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity]
-                                        BroadcastHashJoin [sr_returned_date_sk,d_date_sk]
-                                          Filter [sr_customer_sk,sr_item_sk,sr_ticket_number]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk]
-                                                  SubqueryBroadcast [d_date_sk] #2
-                                                    BroadcastExchange #7
-                                                      WholeStageCodegen (1)
-                                                        Project [d_date_sk]
-                                                          Filter [d_quarter_name,d_date_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_quarter_name]
-                                          InputAdapter
-                                            ReusedExchange [d_date_sk] #7
+                                                          Exchange [i_item_sk] #7
+                                                            WholeStageCodegen (5)
+                                                              Filter [i_item_sk]
+                                                                ColumnarToRow
+                                                                  InputAdapter
+                                                                    Scan parquet default.item [i_item_sk,i_item_id,i_item_desc]
+                                  InputAdapter
+                                    WholeStageCodegen (11)
+                                      Sort [sr_customer_sk,sr_item_sk,sr_ticket_number]
+                                        InputAdapter
+                                          Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #8
+                                            WholeStageCodegen (10)
+                                              Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity]
+                                                BroadcastHashJoin [sr_returned_date_sk,d_date_sk]
+                                                  Filter [sr_customer_sk,sr_item_sk,sr_ticket_number]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk]
+                                                          SubqueryBroadcast [d_date_sk] #2
+                                                            BroadcastExchange #9
+                                                              WholeStageCodegen (1)
+                                                                Project [d_date_sk]
+                                                                  Filter [d_quarter_name,d_date_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet default.date_dim [d_date_sk,d_quarter_name]
+                                                  InputAdapter
+                                                    ReusedExchange [d_date_sk] #9
                   InputAdapter
-                    WholeStageCodegen (14)
+                    WholeStageCodegen (16)
                       Sort [cs_bill_customer_sk,cs_item_sk]
                         InputAdapter
-                          Exchange [cs_item_sk] #8
-                            WholeStageCodegen (13)
+                          Exchange [cs_bill_customer_sk,cs_item_sk] #10
+                            WholeStageCodegen (15)
                               Project [cs_bill_customer_sk,cs_item_sk,cs_quantity]
                                 BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                   Filter [cs_bill_customer_sk,cs_item_sk]
@@ -93,4 +100,4 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s
                                         Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk]
                                           ReusedSubquery [d_date_sk] #2
                                   InputAdapter
-                                    ReusedExchange [d_date_sk] #7
+                                    ReusedExchange [d_date_sk] #9
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt
index cbbf3da55739d..fc55789fab16a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt
@@ -1,50 +1,53 @@
 == Physical Plan ==
-TakeOrderedAndProject (46)
-+- * HashAggregate (45)
-   +- Exchange (44)
-      +- * HashAggregate (43)
-         +- * Project (42)
-            +- * SortMergeJoin Inner (41)
-               :- * Project (32)
-               :  +- * SortMergeJoin Inner (31)
-               :     :- * Sort (22)
-               :     :  +- * Project (21)
-               :     :     +- * SortMergeJoin Inner (20)
-               :     :        :- * Sort (14)
-               :     :        :  +- Exchange (13)
-               :     :        :     +- * Project (12)
-               :     :        :        +- * BroadcastHashJoin Inner BuildRight (11)
-               :     :        :           :- * Project (6)
-               :     :        :           :  +- * BroadcastHashJoin Inner BuildRight (5)
-               :     :        :           :     :- * Filter (3)
-               :     :        :           :     :  +- * ColumnarToRow (2)
-               :     :        :           :     :     +- Scan parquet default.store_sales (1)
-               :     :        :           :     +- ReusedExchange (4)
-               :     :        :           +- BroadcastExchange (10)
-               :     :        :              +- * Filter (9)
-               :     :        :                 +- * ColumnarToRow (8)
-               :     :        :                    +- Scan parquet default.store (7)
-               :     :        +- * Sort (19)
-               :     :           +- Exchange (18)
-               :     :              +- * Filter (17)
-               :     :                 +- * ColumnarToRow (16)
-               :     :                    +- Scan parquet default.item (15)
-               :     +- * Sort (30)
-               :        +- Exchange (29)
-               :           +- * Project (28)
-               :              +- * BroadcastHashJoin Inner BuildRight (27)
-               :                 :- * Filter (25)
-               :                 :  +- * ColumnarToRow (24)
-               :                 :     +- Scan parquet default.store_returns (23)
-               :                 +- ReusedExchange (26)
-               +- * Sort (40)
-                  +- Exchange (39)
-                     +- * Project (38)
-                        +- * BroadcastHashJoin Inner BuildRight (37)
-                           :- * Filter (35)
-                           :  +- * ColumnarToRow (34)
-                           :     +- Scan parquet default.catalog_sales (33)
-                           +- ReusedExchange (36)
+TakeOrderedAndProject (49)
++- * HashAggregate (48)
+   +- Exchange (47)
+      +- * HashAggregate (46)
+         +- * Project (45)
+            +- * SortMergeJoin Inner (44)
+               :- * Sort (35)
+               :  +- Exchange (34)
+               :     +- * Project (33)
+               :        +- * SortMergeJoin Inner (32)
+               :           :- * Sort (23)
+               :           :  +- Exchange (22)
+               :           :     +- * Project (21)
+               :           :        +- * SortMergeJoin Inner (20)
+               :           :           :- * Sort (14)
+               :           :           :  +- Exchange (13)
+               :           :           :     +- * Project (12)
+               :           :           :        +- * BroadcastHashJoin Inner BuildRight (11)
+               :           :           :           :- * Project (6)
+               :           :           :           :  +- * BroadcastHashJoin Inner BuildRight (5)
+               :           :           :           :     :- * Filter (3)
+               :           :           :           :     :  +- * ColumnarToRow (2)
+               :           :           :           :     :     +- Scan parquet default.store_sales (1)
+               :           :           :           :     +- ReusedExchange (4)
+               :           :           :           +- BroadcastExchange (10)
+               :           :           :              +- * Filter (9)
+               :           :           :                 +- * ColumnarToRow (8)
+               :           :           :                    +- Scan parquet default.store (7)
+               :           :           +- * Sort (19)
+               :           :              +- Exchange (18)
+               :           :                 +- * Filter (17)
+               :           :                    +- * ColumnarToRow (16)
+               :           :                       +- Scan parquet default.item (15)
+               :           +- * Sort (31)
+               :              +- Exchange (30)
+               :                 +- * Project (29)
+               :                    +- * BroadcastHashJoin Inner BuildRight (28)
+               :                       :- * Filter (26)
+               :                       :  +- * ColumnarToRow (25)
+               :                       :     +- Scan parquet default.store_returns (24)
+               :                       +- ReusedExchange (27)
+               +- * Sort (43)
+                  +- Exchange (42)
+                     +- * Project (41)
+                        +- * BroadcastHashJoin Inner BuildRight (40)
+                           :- * Filter (38)
+                           :  +- * ColumnarToRow (37)
+                           :     +- Scan parquet default.catalog_sales (36)
+                           +- ReusedExchange (39)
 
 
 (1) Scan parquet default.store_sales
@@ -62,7 +65,7 @@ Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s
 Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6]
 Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3))
 
-(4) ReusedExchange [Reuses operator id: 51]
+(4) ReusedExchange [Reuses operator id: 54]
 Output [1]: [d_date_sk#8]
 
 (5) BroadcastHashJoin [codegen id : 3]
@@ -140,182 +143,194 @@ Join condition: None
 Output [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16]
 Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_sk#14, i_item_id#15, i_item_desc#16]
 
-(22) Sort [codegen id : 7]
+(22) Exchange
+Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16]
+Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [id=#18]
+
+(23) Sort [codegen id : 8]
 Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16]
 Arguments: [ss_customer_sk#2 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0
 
-(23) Scan parquet default.store_returns
-Output [5]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_net_loss#21, sr_returned_date_sk#22]
+(24) Scan parquet default.store_returns
+Output [5]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_net_loss#22, sr_returned_date_sk#23]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(sr_returned_date_sk#22), dynamicpruningexpression(sr_returned_date_sk#22 IN dynamicpruning#23)]
+PartitionFilters: [isnotnull(sr_returned_date_sk#23), dynamicpruningexpression(sr_returned_date_sk#23 IN dynamicpruning#24)]
 PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)]
 ReadSchema: struct<sr_item_sk:int,sr_customer_sk:int,sr_ticket_number:int,sr_net_loss:decimal(7,2)>
 
-(24) ColumnarToRow [codegen id : 9]
-Input [5]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_net_loss#21, sr_returned_date_sk#22]
+(25) ColumnarToRow [codegen id : 10]
+Input [5]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_net_loss#22, sr_returned_date_sk#23]
 
-(25) Filter [codegen id : 9]
-Input [5]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_net_loss#21, sr_returned_date_sk#22]
-Condition : ((isnotnull(sr_customer_sk#19) AND isnotnull(sr_item_sk#18)) AND isnotnull(sr_ticket_number#20))
+(26) Filter [codegen id : 10]
+Input [5]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_net_loss#22, sr_returned_date_sk#23]
+Condition : ((isnotnull(sr_customer_sk#20) AND isnotnull(sr_item_sk#19)) AND isnotnull(sr_ticket_number#21))
 
-(26) ReusedExchange [Reuses operator id: 56]
-Output [1]: [d_date_sk#24]
+(27) ReusedExchange [Reuses operator id: 59]
+Output [1]: [d_date_sk#25]
 
-(27) BroadcastHashJoin [codegen id : 9]
-Left keys [1]: [sr_returned_date_sk#22]
-Right keys [1]: [d_date_sk#24]
+(28) BroadcastHashJoin [codegen id : 10]
+Left keys [1]: [sr_returned_date_sk#23]
+Right keys [1]: [d_date_sk#25]
 Join condition: None
 
-(28) Project [codegen id : 9]
-Output [4]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_net_loss#21]
-Input [6]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_net_loss#21, sr_returned_date_sk#22, d_date_sk#24]
+(29) Project [codegen id : 10]
+Output [4]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_net_loss#22]
+Input [6]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_net_loss#22, sr_returned_date_sk#23, d_date_sk#25]
 
-(29) Exchange
-Input [4]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_net_loss#21]
-Arguments: hashpartitioning(sr_item_sk#18, 5), ENSURE_REQUIREMENTS, [id=#25]
+(30) Exchange
+Input [4]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_net_loss#22]
+Arguments: hashpartitioning(sr_customer_sk#20, sr_item_sk#19, sr_ticket_number#21, 5), ENSURE_REQUIREMENTS, [id=#26]
 
-(30) Sort [codegen id : 10]
-Input [4]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_net_loss#21]
-Arguments: [sr_customer_sk#19 ASC NULLS FIRST, sr_item_sk#18 ASC NULLS FIRST, sr_ticket_number#20 ASC NULLS FIRST], false, 0
+(31) Sort [codegen id : 11]
+Input [4]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_net_loss#22]
+Arguments: [sr_customer_sk#20 ASC NULLS FIRST, sr_item_sk#19 ASC NULLS FIRST, sr_ticket_number#21 ASC NULLS FIRST], false, 0
 
-(31) SortMergeJoin [codegen id : 11]
+(32) SortMergeJoin [codegen id : 12]
 Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4]
-Right keys [3]: [sr_customer_sk#19, sr_item_sk#18, sr_ticket_number#20]
+Right keys [3]: [sr_customer_sk#20, sr_item_sk#19, sr_ticket_number#21]
 Join condition: None
 
-(32) Project [codegen id : 11]
-Output [8]: [ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#18, sr_customer_sk#19, sr_net_loss#21]
-Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_net_loss#21]
+(33) Project [codegen id : 12]
+Output [8]: [ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_net_loss#22]
+Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_net_loss#22]
+
+(34) Exchange
+Input [8]: [ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_net_loss#22]
+Arguments: hashpartitioning(sr_customer_sk#20, sr_item_sk#19, 5), ENSURE_REQUIREMENTS, [id=#27]
+
+(35) Sort [codegen id : 13]
+Input [8]: [ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_net_loss#22]
+Arguments: [sr_customer_sk#20 ASC NULLS FIRST, sr_item_sk#19 ASC NULLS FIRST], false, 0
 
-(33) Scan parquet default.catalog_sales
-Output [4]: [cs_bill_customer_sk#26, cs_item_sk#27, cs_net_profit#28, cs_sold_date_sk#29]
+(36) Scan parquet default.catalog_sales
+Output [4]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_net_profit#30, cs_sold_date_sk#31]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#29), dynamicpruningexpression(cs_sold_date_sk#29 IN dynamicpruning#23)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#31), dynamicpruningexpression(cs_sold_date_sk#31 IN dynamicpruning#24)]
 PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)]
 ReadSchema: struct<cs_bill_customer_sk:int,cs_item_sk:int,cs_net_profit:decimal(7,2)>
 
-(34) ColumnarToRow [codegen id : 13]
-Input [4]: [cs_bill_customer_sk#26, cs_item_sk#27, cs_net_profit#28, cs_sold_date_sk#29]
+(37) ColumnarToRow [codegen id : 15]
+Input [4]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_net_profit#30, cs_sold_date_sk#31]
 
-(35) Filter [codegen id : 13]
-Input [4]: [cs_bill_customer_sk#26, cs_item_sk#27, cs_net_profit#28, cs_sold_date_sk#29]
-Condition : (isnotnull(cs_bill_customer_sk#26) AND isnotnull(cs_item_sk#27))
+(38) Filter [codegen id : 15]
+Input [4]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_net_profit#30, cs_sold_date_sk#31]
+Condition : (isnotnull(cs_bill_customer_sk#28) AND isnotnull(cs_item_sk#29))
 
-(36) ReusedExchange [Reuses operator id: 56]
-Output [1]: [d_date_sk#30]
+(39) ReusedExchange [Reuses operator id: 59]
+Output [1]: [d_date_sk#32]
 
-(37) BroadcastHashJoin [codegen id : 13]
-Left keys [1]: [cs_sold_date_sk#29]
-Right keys [1]: [d_date_sk#30]
+(40) BroadcastHashJoin [codegen id : 15]
+Left keys [1]: [cs_sold_date_sk#31]
+Right keys [1]: [d_date_sk#32]
 Join condition: None
 
-(38) Project [codegen id : 13]
-Output [3]: [cs_bill_customer_sk#26, cs_item_sk#27, cs_net_profit#28]
-Input [5]: [cs_bill_customer_sk#26, cs_item_sk#27, cs_net_profit#28, cs_sold_date_sk#29, d_date_sk#30]
+(41) Project [codegen id : 15]
+Output [3]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_net_profit#30]
+Input [5]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_net_profit#30, cs_sold_date_sk#31, d_date_sk#32]
 
-(39) Exchange
-Input [3]: [cs_bill_customer_sk#26, cs_item_sk#27, cs_net_profit#28]
-Arguments: hashpartitioning(cs_item_sk#27, 5), ENSURE_REQUIREMENTS, [id=#31]
+(42) Exchange
+Input [3]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_net_profit#30]
+Arguments: hashpartitioning(cs_bill_customer_sk#28, cs_item_sk#29, 5), ENSURE_REQUIREMENTS, [id=#33]
 
-(40) Sort [codegen id : 14]
-Input [3]: [cs_bill_customer_sk#26, cs_item_sk#27, cs_net_profit#28]
-Arguments: [cs_bill_customer_sk#26 ASC NULLS FIRST, cs_item_sk#27 ASC NULLS FIRST], false, 0
+(43) Sort [codegen id : 16]
+Input [3]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_net_profit#30]
+Arguments: [cs_bill_customer_sk#28 ASC NULLS FIRST, cs_item_sk#29 ASC NULLS FIRST], false, 0
 
-(41) SortMergeJoin [codegen id : 15]
-Left keys [2]: [sr_customer_sk#19, sr_item_sk#18]
-Right keys [2]: [cs_bill_customer_sk#26, cs_item_sk#27]
+(44) SortMergeJoin [codegen id : 17]
+Left keys [2]: [sr_customer_sk#20, sr_item_sk#19]
+Right keys [2]: [cs_bill_customer_sk#28, cs_item_sk#29]
 Join condition: None
 
-(42) Project [codegen id : 15]
-Output [7]: [ss_net_profit#5, sr_net_loss#21, cs_net_profit#28, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16]
-Input [11]: [ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#18, sr_customer_sk#19, sr_net_loss#21, cs_bill_customer_sk#26, cs_item_sk#27, cs_net_profit#28]
+(45) Project [codegen id : 17]
+Output [7]: [ss_net_profit#5, sr_net_loss#22, cs_net_profit#30, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16]
+Input [11]: [ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_net_loss#22, cs_bill_customer_sk#28, cs_item_sk#29, cs_net_profit#30]
 
-(43) HashAggregate [codegen id : 15]
-Input [7]: [ss_net_profit#5, sr_net_loss#21, cs_net_profit#28, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16]
+(46) HashAggregate [codegen id : 17]
+Input [7]: [ss_net_profit#5, sr_net_loss#22, cs_net_profit#30, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16]
 Keys [4]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11]
-Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#5)), partial_sum(UnscaledValue(sr_net_loss#21)), partial_sum(UnscaledValue(cs_net_profit#28))]
-Aggregate Attributes [3]: [sum#32, sum#33, sum#34]
-Results [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum#35, sum#36, sum#37]
+Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#5)), partial_sum(UnscaledValue(sr_net_loss#22)), partial_sum(UnscaledValue(cs_net_profit#30))]
+Aggregate Attributes [3]: [sum#34, sum#35, sum#36]
+Results [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum#37, sum#38, sum#39]
 
-(44) Exchange
-Input [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum#35, sum#36, sum#37]
-Arguments: hashpartitioning(i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, 5), ENSURE_REQUIREMENTS, [id=#38]
+(47) Exchange
+Input [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum#37, sum#38, sum#39]
+Arguments: hashpartitioning(i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, 5), ENSURE_REQUIREMENTS, [id=#40]
 
-(45) HashAggregate [codegen id : 16]
-Input [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum#35, sum#36, sum#37]
+(48) HashAggregate [codegen id : 18]
+Input [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum#37, sum#38, sum#39]
 Keys [4]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11]
-Functions [3]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(sr_net_loss#21)), sum(UnscaledValue(cs_net_profit#28))]
-Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#5))#39, sum(UnscaledValue(sr_net_loss#21))#40, sum(UnscaledValue(cs_net_profit#28))#41]
-Results [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#39,17,2) AS store_sales_profit#42, MakeDecimal(sum(UnscaledValue(sr_net_loss#21))#40,17,2) AS store_returns_loss#43, MakeDecimal(sum(UnscaledValue(cs_net_profit#28))#41,17,2) AS catalog_sales_profit#44]
+Functions [3]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(sr_net_loss#22)), sum(UnscaledValue(cs_net_profit#30))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#5))#41, sum(UnscaledValue(sr_net_loss#22))#42, sum(UnscaledValue(cs_net_profit#30))#43]
+Results [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#41,17,2) AS store_sales_profit#44, MakeDecimal(sum(UnscaledValue(sr_net_loss#22))#42,17,2) AS store_returns_loss#45, MakeDecimal(sum(UnscaledValue(cs_net_profit#30))#43,17,2) AS catalog_sales_profit#46]
 
-(46) TakeOrderedAndProject
-Input [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, store_sales_profit#42, store_returns_loss#43, catalog_sales_profit#44]
-Arguments: 100, [i_item_id#15 ASC NULLS FIRST, i_item_desc#16 ASC NULLS FIRST, s_store_id#10 ASC NULLS FIRST, s_store_name#11 ASC NULLS FIRST], [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, store_sales_profit#42, store_returns_loss#43, catalog_sales_profit#44]
+(49) TakeOrderedAndProject
+Input [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, store_sales_profit#44, store_returns_loss#45, catalog_sales_profit#46]
+Arguments: 100, [i_item_id#15 ASC NULLS FIRST, i_item_desc#16 ASC NULLS FIRST, s_store_id#10 ASC NULLS FIRST, s_store_name#11 ASC NULLS FIRST], [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, store_sales_profit#44, store_returns_loss#45, catalog_sales_profit#46]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7
-BroadcastExchange (51)
-+- * Project (50)
-   +- * Filter (49)
-      +- * ColumnarToRow (48)
-         +- Scan parquet default.date_dim (47)
+BroadcastExchange (54)
++- * Project (53)
+   +- * Filter (52)
+      +- * ColumnarToRow (51)
+         +- Scan parquet default.date_dim (50)
 
 
-(47) Scan parquet default.date_dim
-Output [3]: [d_date_sk#8, d_year#45, d_moy#46]
+(50) Scan parquet default.date_dim
+Output [3]: [d_date_sk#8, d_year#47, d_moy#48]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,4), EqualTo(d_year,2001), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(48) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#8, d_year#45, d_moy#46]
+(51) ColumnarToRow [codegen id : 1]
+Input [3]: [d_date_sk#8, d_year#47, d_moy#48]
 
-(49) Filter [codegen id : 1]
-Input [3]: [d_date_sk#8, d_year#45, d_moy#46]
-Condition : ((((isnotnull(d_moy#46) AND isnotnull(d_year#45)) AND (d_moy#46 = 4)) AND (d_year#45 = 2001)) AND isnotnull(d_date_sk#8))
+(52) Filter [codegen id : 1]
+Input [3]: [d_date_sk#8, d_year#47, d_moy#48]
+Condition : ((((isnotnull(d_moy#48) AND isnotnull(d_year#47)) AND (d_moy#48 = 4)) AND (d_year#47 = 2001)) AND isnotnull(d_date_sk#8))
 
-(50) Project [codegen id : 1]
+(53) Project [codegen id : 1]
 Output [1]: [d_date_sk#8]
-Input [3]: [d_date_sk#8, d_year#45, d_moy#46]
+Input [3]: [d_date_sk#8, d_year#47, d_moy#48]
 
-(51) BroadcastExchange
+(54) BroadcastExchange
 Input [1]: [d_date_sk#8]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#47]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#49]
 
-Subquery:2 Hosting operator id = 23 Hosting Expression = sr_returned_date_sk#22 IN dynamicpruning#23
-BroadcastExchange (56)
-+- * Project (55)
-   +- * Filter (54)
-      +- * ColumnarToRow (53)
-         +- Scan parquet default.date_dim (52)
+Subquery:2 Hosting operator id = 24 Hosting Expression = sr_returned_date_sk#23 IN dynamicpruning#24
+BroadcastExchange (59)
++- * Project (58)
+   +- * Filter (57)
+      +- * ColumnarToRow (56)
+         +- Scan parquet default.date_dim (55)
 
 
-(52) Scan parquet default.date_dim
-Output [3]: [d_date_sk#24, d_year#48, d_moy#49]
+(55) Scan parquet default.date_dim
+Output [3]: [d_date_sk#25, d_year#50, d_moy#51]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), EqualTo(d_year,2001), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(53) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#24, d_year#48, d_moy#49]
+(56) ColumnarToRow [codegen id : 1]
+Input [3]: [d_date_sk#25, d_year#50, d_moy#51]
 
-(54) Filter [codegen id : 1]
-Input [3]: [d_date_sk#24, d_year#48, d_moy#49]
-Condition : (((((isnotnull(d_moy#49) AND isnotnull(d_year#48)) AND (d_moy#49 >= 4)) AND (d_moy#49 <= 10)) AND (d_year#48 = 2001)) AND isnotnull(d_date_sk#24))
+(57) Filter [codegen id : 1]
+Input [3]: [d_date_sk#25, d_year#50, d_moy#51]
+Condition : (((((isnotnull(d_moy#51) AND isnotnull(d_year#50)) AND (d_moy#51 >= 4)) AND (d_moy#51 <= 10)) AND (d_year#50 = 2001)) AND isnotnull(d_date_sk#25))
 
-(55) Project [codegen id : 1]
-Output [1]: [d_date_sk#24]
-Input [3]: [d_date_sk#24, d_year#48, d_moy#49]
+(58) Project [codegen id : 1]
+Output [1]: [d_date_sk#25]
+Input [3]: [d_date_sk#25, d_year#50, d_moy#51]
 
-(56) BroadcastExchange
-Input [1]: [d_date_sk#24]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50]
+(59) BroadcastExchange
+Input [1]: [d_date_sk#25]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#52]
 
-Subquery:3 Hosting operator id = 33 Hosting Expression = cs_sold_date_sk#29 IN dynamicpruning#23
+Subquery:3 Hosting operator id = 36 Hosting Expression = cs_sold_date_sk#31 IN dynamicpruning#24
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt
index 0b106ced5504d..23d7e84027b2e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt
@@ -1,90 +1,97 @@
 TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_profit,store_returns_loss,catalog_sales_profit]
-  WholeStageCodegen (16)
+  WholeStageCodegen (18)
     HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,sum,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(cs_net_profit)),store_sales_profit,store_returns_loss,catalog_sales_profit,sum,sum,sum]
       InputAdapter
         Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1
-          WholeStageCodegen (15)
+          WholeStageCodegen (17)
             HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,ss_net_profit,sr_net_loss,cs_net_profit] [sum,sum,sum,sum,sum,sum]
               Project [ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc]
                 SortMergeJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk]
                   InputAdapter
-                    WholeStageCodegen (11)
-                      Project [ss_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc,sr_item_sk,sr_customer_sk,sr_net_loss]
-                        SortMergeJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number]
-                          InputAdapter
-                            WholeStageCodegen (7)
-                              Sort [ss_customer_sk,ss_item_sk,ss_ticket_number]
-                                Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc]
-                                  SortMergeJoin [ss_item_sk,i_item_sk]
-                                    InputAdapter
-                                      WholeStageCodegen (4)
-                                        Sort [ss_item_sk]
-                                          InputAdapter
-                                            Exchange [ss_item_sk] #2
-                                              WholeStageCodegen (3)
-                                                Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_profit,s_store_id,s_store_name]
-                                                  BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                                    Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit]
-                                                      BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                        Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_store_sk]
-                                                          ColumnarToRow
-                                                            InputAdapter
-                                                              Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit,ss_sold_date_sk]
-                                                                SubqueryBroadcast [d_date_sk] #1
-                                                                  BroadcastExchange #3
-                                                                    WholeStageCodegen (1)
-                                                                      Project [d_date_sk]
-                                                                        Filter [d_moy,d_year,d_date_sk]
+                    WholeStageCodegen (13)
+                      Sort [sr_customer_sk,sr_item_sk]
+                        InputAdapter
+                          Exchange [sr_customer_sk,sr_item_sk] #2
+                            WholeStageCodegen (12)
+                              Project [ss_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc,sr_item_sk,sr_customer_sk,sr_net_loss]
+                                SortMergeJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number]
+                                  InputAdapter
+                                    WholeStageCodegen (8)
+                                      Sort [ss_customer_sk,ss_item_sk,ss_ticket_number]
+                                        InputAdapter
+                                          Exchange [ss_customer_sk,ss_item_sk,ss_ticket_number] #3
+                                            WholeStageCodegen (7)
+                                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc]
+                                                SortMergeJoin [ss_item_sk,i_item_sk]
+                                                  InputAdapter
+                                                    WholeStageCodegen (4)
+                                                      Sort [ss_item_sk]
+                                                        InputAdapter
+                                                          Exchange [ss_item_sk] #4
+                                                            WholeStageCodegen (3)
+                                                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_profit,s_store_id,s_store_name]
+                                                                BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                                  Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit]
+                                                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                      Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_store_sk]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit,ss_sold_date_sk]
+                                                                              SubqueryBroadcast [d_date_sk] #1
+                                                                                BroadcastExchange #5
+                                                                                  WholeStageCodegen (1)
+                                                                                    Project [d_date_sk]
+                                                                                      Filter [d_moy,d_year,d_date_sk]
+                                                                                        ColumnarToRow
+                                                                                          InputAdapter
+                                                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                      InputAdapter
+                                                                        ReusedExchange [d_date_sk] #5
+                                                                  InputAdapter
+                                                                    BroadcastExchange #6
+                                                                      WholeStageCodegen (2)
+                                                                        Filter [s_store_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                              Scan parquet default.store [s_store_sk,s_store_id,s_store_name]
+                                                  InputAdapter
+                                                    WholeStageCodegen (6)
+                                                      Sort [i_item_sk]
                                                         InputAdapter
-                                                          ReusedExchange [d_date_sk] #3
-                                                    InputAdapter
-                                                      BroadcastExchange #4
-                                                        WholeStageCodegen (2)
-                                                          Filter [s_store_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.store [s_store_sk,s_store_id,s_store_name]
-                                    InputAdapter
-                                      WholeStageCodegen (6)
-                                        Sort [i_item_sk]
-                                          InputAdapter
-                                            Exchange [i_item_sk] #5
-                                              WholeStageCodegen (5)
-                                                Filter [i_item_sk]
-                                                  ColumnarToRow
-                                                    InputAdapter
-                                                      Scan parquet default.item [i_item_sk,i_item_id,i_item_desc]
-                          InputAdapter
-                            WholeStageCodegen (10)
-                              Sort [sr_customer_sk,sr_item_sk,sr_ticket_number]
-                                InputAdapter
-                                  Exchange [sr_item_sk] #6
-                                    WholeStageCodegen (9)
-                                      Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss]
-                                        BroadcastHashJoin [sr_returned_date_sk,d_date_sk]
-                                          Filter [sr_customer_sk,sr_item_sk,sr_ticket_number]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk]
-                                                  SubqueryBroadcast [d_date_sk] #2
-                                                    BroadcastExchange #7
-                                                      WholeStageCodegen (1)
-                                                        Project [d_date_sk]
-                                                          Filter [d_moy,d_year,d_date_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                                          InputAdapter
-                                            ReusedExchange [d_date_sk] #7
+                                                          Exchange [i_item_sk] #7
+                                                            WholeStageCodegen (5)
+                                                              Filter [i_item_sk]
+                                                                ColumnarToRow
+                                                                  InputAdapter
+                                                                    Scan parquet default.item [i_item_sk,i_item_id,i_item_desc]
+                                  InputAdapter
+                                    WholeStageCodegen (11)
+                                      Sort [sr_customer_sk,sr_item_sk,sr_ticket_number]
+                                        InputAdapter
+                                          Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #8
+                                            WholeStageCodegen (10)
+                                              Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss]
+                                                BroadcastHashJoin [sr_returned_date_sk,d_date_sk]
+                                                  Filter [sr_customer_sk,sr_item_sk,sr_ticket_number]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk]
+                                                          SubqueryBroadcast [d_date_sk] #2
+                                                            BroadcastExchange #9
+                                                              WholeStageCodegen (1)
+                                                                Project [d_date_sk]
+                                                                  Filter [d_moy,d_year,d_date_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                  InputAdapter
+                                                    ReusedExchange [d_date_sk] #9
                   InputAdapter
-                    WholeStageCodegen (14)
+                    WholeStageCodegen (16)
                       Sort [cs_bill_customer_sk,cs_item_sk]
                         InputAdapter
-                          Exchange [cs_item_sk] #8
-                            WholeStageCodegen (13)
+                          Exchange [cs_bill_customer_sk,cs_item_sk] #10
+                            WholeStageCodegen (15)
                               Project [cs_bill_customer_sk,cs_item_sk,cs_net_profit]
                                 BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                   Filter [cs_bill_customer_sk,cs_item_sk]
@@ -93,4 +100,4 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                         Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk]
                                           ReusedSubquery [d_date_sk] #2
                                   InputAdapter
-                                    ReusedExchange [d_date_sk] #7
+                                    ReusedExchange [d_date_sk] #9
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt
index e9857b76bc9e8..221439075d24d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt
@@ -1,50 +1,53 @@
 == Physical Plan ==
-TakeOrderedAndProject (46)
-+- * HashAggregate (45)
-   +- Exchange (44)
-      +- * HashAggregate (43)
-         +- * Project (42)
-            +- * SortMergeJoin Inner (41)
-               :- * Project (32)
-               :  +- * SortMergeJoin Inner (31)
-               :     :- * Sort (22)
-               :     :  +- * Project (21)
-               :     :     +- * SortMergeJoin Inner (20)
-               :     :        :- * Sort (14)
-               :     :        :  +- Exchange (13)
-               :     :        :     +- * Project (12)
-               :     :        :        +- * BroadcastHashJoin Inner BuildRight (11)
-               :     :        :           :- * Project (6)
-               :     :        :           :  +- * BroadcastHashJoin Inner BuildRight (5)
-               :     :        :           :     :- * Filter (3)
-               :     :        :           :     :  +- * ColumnarToRow (2)
-               :     :        :           :     :     +- Scan parquet default.store_sales (1)
-               :     :        :           :     +- ReusedExchange (4)
-               :     :        :           +- BroadcastExchange (10)
-               :     :        :              +- * Filter (9)
-               :     :        :                 +- * ColumnarToRow (8)
-               :     :        :                    +- Scan parquet default.store (7)
-               :     :        +- * Sort (19)
-               :     :           +- Exchange (18)
-               :     :              +- * Filter (17)
-               :     :                 +- * ColumnarToRow (16)
-               :     :                    +- Scan parquet default.item (15)
-               :     +- * Sort (30)
-               :        +- Exchange (29)
-               :           +- * Project (28)
-               :              +- * BroadcastHashJoin Inner BuildRight (27)
-               :                 :- * Filter (25)
-               :                 :  +- * ColumnarToRow (24)
-               :                 :     +- Scan parquet default.store_returns (23)
-               :                 +- ReusedExchange (26)
-               +- * Sort (40)
-                  +- Exchange (39)
-                     +- * Project (38)
-                        +- * BroadcastHashJoin Inner BuildRight (37)
-                           :- * Filter (35)
-                           :  +- * ColumnarToRow (34)
-                           :     +- Scan parquet default.catalog_sales (33)
-                           +- ReusedExchange (36)
+TakeOrderedAndProject (49)
++- * HashAggregate (48)
+   +- Exchange (47)
+      +- * HashAggregate (46)
+         +- * Project (45)
+            +- * SortMergeJoin Inner (44)
+               :- * Sort (35)
+               :  +- Exchange (34)
+               :     +- * Project (33)
+               :        +- * SortMergeJoin Inner (32)
+               :           :- * Sort (23)
+               :           :  +- Exchange (22)
+               :           :     +- * Project (21)
+               :           :        +- * SortMergeJoin Inner (20)
+               :           :           :- * Sort (14)
+               :           :           :  +- Exchange (13)
+               :           :           :     +- * Project (12)
+               :           :           :        +- * BroadcastHashJoin Inner BuildRight (11)
+               :           :           :           :- * Project (6)
+               :           :           :           :  +- * BroadcastHashJoin Inner BuildRight (5)
+               :           :           :           :     :- * Filter (3)
+               :           :           :           :     :  +- * ColumnarToRow (2)
+               :           :           :           :     :     +- Scan parquet default.store_sales (1)
+               :           :           :           :     +- ReusedExchange (4)
+               :           :           :           +- BroadcastExchange (10)
+               :           :           :              +- * Filter (9)
+               :           :           :                 +- * ColumnarToRow (8)
+               :           :           :                    +- Scan parquet default.store (7)
+               :           :           +- * Sort (19)
+               :           :              +- Exchange (18)
+               :           :                 +- * Filter (17)
+               :           :                    +- * ColumnarToRow (16)
+               :           :                       +- Scan parquet default.item (15)
+               :           +- * Sort (31)
+               :              +- Exchange (30)
+               :                 +- * Project (29)
+               :                    +- * BroadcastHashJoin Inner BuildRight (28)
+               :                       :- * Filter (26)
+               :                       :  +- * ColumnarToRow (25)
+               :                       :     +- Scan parquet default.store_returns (24)
+               :                       +- ReusedExchange (27)
+               +- * Sort (43)
+                  +- Exchange (42)
+                     +- * Project (41)
+                        +- * BroadcastHashJoin Inner BuildRight (40)
+                           :- * Filter (38)
+                           :  +- * ColumnarToRow (37)
+                           :     +- Scan parquet default.catalog_sales (36)
+                           +- ReusedExchange (39)
 
 
 (1) Scan parquet default.store_sales
@@ -62,7 +65,7 @@ Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s
 Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6]
 Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3))
 
-(4) ReusedExchange [Reuses operator id: 51]
+(4) ReusedExchange [Reuses operator id: 54]
 Output [1]: [d_date_sk#8]
 
 (5) BroadcastHashJoin [codegen id : 3]
@@ -140,210 +143,222 @@ Join condition: None
 Output [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16]
 Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11, i_item_sk#14, i_item_id#15, i_item_desc#16]
 
-(22) Sort [codegen id : 7]
+(22) Exchange
+Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16]
+Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [id=#18]
+
+(23) Sort [codegen id : 8]
 Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16]
 Arguments: [ss_customer_sk#2 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0
 
-(23) Scan parquet default.store_returns
-Output [5]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21, sr_returned_date_sk#22]
+(24) Scan parquet default.store_returns
+Output [5]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_return_quantity#22, sr_returned_date_sk#23]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(sr_returned_date_sk#22), dynamicpruningexpression(sr_returned_date_sk#22 IN dynamicpruning#23)]
+PartitionFilters: [isnotnull(sr_returned_date_sk#23), dynamicpruningexpression(sr_returned_date_sk#23 IN dynamicpruning#24)]
 PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)]
 ReadSchema: struct<sr_item_sk:int,sr_customer_sk:int,sr_ticket_number:int,sr_return_quantity:int>
 
-(24) ColumnarToRow [codegen id : 9]
-Input [5]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21, sr_returned_date_sk#22]
+(25) ColumnarToRow [codegen id : 10]
+Input [5]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_return_quantity#22, sr_returned_date_sk#23]
 
-(25) Filter [codegen id : 9]
-Input [5]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21, sr_returned_date_sk#22]
-Condition : ((isnotnull(sr_customer_sk#19) AND isnotnull(sr_item_sk#18)) AND isnotnull(sr_ticket_number#20))
+(26) Filter [codegen id : 10]
+Input [5]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_return_quantity#22, sr_returned_date_sk#23]
+Condition : ((isnotnull(sr_customer_sk#20) AND isnotnull(sr_item_sk#19)) AND isnotnull(sr_ticket_number#21))
 
-(26) ReusedExchange [Reuses operator id: 56]
-Output [1]: [d_date_sk#24]
+(27) ReusedExchange [Reuses operator id: 59]
+Output [1]: [d_date_sk#25]
 
-(27) BroadcastHashJoin [codegen id : 9]
-Left keys [1]: [sr_returned_date_sk#22]
-Right keys [1]: [d_date_sk#24]
+(28) BroadcastHashJoin [codegen id : 10]
+Left keys [1]: [sr_returned_date_sk#23]
+Right keys [1]: [d_date_sk#25]
 Join condition: None
 
-(28) Project [codegen id : 9]
-Output [4]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21]
-Input [6]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21, sr_returned_date_sk#22, d_date_sk#24]
+(29) Project [codegen id : 10]
+Output [4]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_return_quantity#22]
+Input [6]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_return_quantity#22, sr_returned_date_sk#23, d_date_sk#25]
 
-(29) Exchange
-Input [4]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21]
-Arguments: hashpartitioning(sr_item_sk#18, 5), ENSURE_REQUIREMENTS, [id=#25]
+(30) Exchange
+Input [4]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_return_quantity#22]
+Arguments: hashpartitioning(sr_customer_sk#20, sr_item_sk#19, sr_ticket_number#21, 5), ENSURE_REQUIREMENTS, [id=#26]
 
-(30) Sort [codegen id : 10]
-Input [4]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21]
-Arguments: [sr_customer_sk#19 ASC NULLS FIRST, sr_item_sk#18 ASC NULLS FIRST, sr_ticket_number#20 ASC NULLS FIRST], false, 0
+(31) Sort [codegen id : 11]
+Input [4]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_return_quantity#22]
+Arguments: [sr_customer_sk#20 ASC NULLS FIRST, sr_item_sk#19 ASC NULLS FIRST, sr_ticket_number#21 ASC NULLS FIRST], false, 0
 
-(31) SortMergeJoin [codegen id : 11]
+(32) SortMergeJoin [codegen id : 12]
 Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4]
-Right keys [3]: [sr_customer_sk#19, sr_item_sk#18, sr_ticket_number#20]
+Right keys [3]: [sr_customer_sk#20, sr_item_sk#19, sr_ticket_number#21]
 Join condition: None
 
-(32) Project [codegen id : 11]
-Output [8]: [ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#18, sr_customer_sk#19, sr_return_quantity#21]
-Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21]
+(33) Project [codegen id : 12]
+Output [8]: [ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_return_quantity#22]
+Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_return_quantity#22]
+
+(34) Exchange
+Input [8]: [ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_return_quantity#22]
+Arguments: hashpartitioning(sr_customer_sk#20, sr_item_sk#19, 5), ENSURE_REQUIREMENTS, [id=#27]
 
-(33) Scan parquet default.catalog_sales
-Output [4]: [cs_bill_customer_sk#26, cs_item_sk#27, cs_quantity#28, cs_sold_date_sk#29]
+(35) Sort [codegen id : 13]
+Input [8]: [ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_return_quantity#22]
+Arguments: [sr_customer_sk#20 ASC NULLS FIRST, sr_item_sk#19 ASC NULLS FIRST], false, 0
+
+(36) Scan parquet default.catalog_sales
+Output [4]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_quantity#30, cs_sold_date_sk#31]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#29), dynamicpruningexpression(cs_sold_date_sk#29 IN dynamicpruning#30)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#31), dynamicpruningexpression(cs_sold_date_sk#31 IN dynamicpruning#32)]
 PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)]
 ReadSchema: struct<cs_bill_customer_sk:int,cs_item_sk:int,cs_quantity:int>
 
-(34) ColumnarToRow [codegen id : 13]
-Input [4]: [cs_bill_customer_sk#26, cs_item_sk#27, cs_quantity#28, cs_sold_date_sk#29]
+(37) ColumnarToRow [codegen id : 15]
+Input [4]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_quantity#30, cs_sold_date_sk#31]
 
-(35) Filter [codegen id : 13]
-Input [4]: [cs_bill_customer_sk#26, cs_item_sk#27, cs_quantity#28, cs_sold_date_sk#29]
-Condition : (isnotnull(cs_bill_customer_sk#26) AND isnotnull(cs_item_sk#27))
+(38) Filter [codegen id : 15]
+Input [4]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_quantity#30, cs_sold_date_sk#31]
+Condition : (isnotnull(cs_bill_customer_sk#28) AND isnotnull(cs_item_sk#29))
 
-(36) ReusedExchange [Reuses operator id: 61]
-Output [1]: [d_date_sk#31]
+(39) ReusedExchange [Reuses operator id: 64]
+Output [1]: [d_date_sk#33]
 
-(37) BroadcastHashJoin [codegen id : 13]
-Left keys [1]: [cs_sold_date_sk#29]
-Right keys [1]: [d_date_sk#31]
+(40) BroadcastHashJoin [codegen id : 15]
+Left keys [1]: [cs_sold_date_sk#31]
+Right keys [1]: [d_date_sk#33]
 Join condition: None
 
-(38) Project [codegen id : 13]
-Output [3]: [cs_bill_customer_sk#26, cs_item_sk#27, cs_quantity#28]
-Input [5]: [cs_bill_customer_sk#26, cs_item_sk#27, cs_quantity#28, cs_sold_date_sk#29, d_date_sk#31]
+(41) Project [codegen id : 15]
+Output [3]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_quantity#30]
+Input [5]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_quantity#30, cs_sold_date_sk#31, d_date_sk#33]
 
-(39) Exchange
-Input [3]: [cs_bill_customer_sk#26, cs_item_sk#27, cs_quantity#28]
-Arguments: hashpartitioning(cs_item_sk#27, 5), ENSURE_REQUIREMENTS, [id=#32]
+(42) Exchange
+Input [3]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_quantity#30]
+Arguments: hashpartitioning(cs_bill_customer_sk#28, cs_item_sk#29, 5), ENSURE_REQUIREMENTS, [id=#34]
 
-(40) Sort [codegen id : 14]
-Input [3]: [cs_bill_customer_sk#26, cs_item_sk#27, cs_quantity#28]
-Arguments: [cs_bill_customer_sk#26 ASC NULLS FIRST, cs_item_sk#27 ASC NULLS FIRST], false, 0
+(43) Sort [codegen id : 16]
+Input [3]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_quantity#30]
+Arguments: [cs_bill_customer_sk#28 ASC NULLS FIRST, cs_item_sk#29 ASC NULLS FIRST], false, 0
 
-(41) SortMergeJoin [codegen id : 15]
-Left keys [2]: [sr_customer_sk#19, sr_item_sk#18]
-Right keys [2]: [cs_bill_customer_sk#26, cs_item_sk#27]
+(44) SortMergeJoin [codegen id : 17]
+Left keys [2]: [sr_customer_sk#20, sr_item_sk#19]
+Right keys [2]: [cs_bill_customer_sk#28, cs_item_sk#29]
 Join condition: None
 
-(42) Project [codegen id : 15]
-Output [7]: [ss_quantity#5, sr_return_quantity#21, cs_quantity#28, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16]
-Input [11]: [ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#18, sr_customer_sk#19, sr_return_quantity#21, cs_bill_customer_sk#26, cs_item_sk#27, cs_quantity#28]
+(45) Project [codegen id : 17]
+Output [7]: [ss_quantity#5, sr_return_quantity#22, cs_quantity#30, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16]
+Input [11]: [ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_return_quantity#22, cs_bill_customer_sk#28, cs_item_sk#29, cs_quantity#30]
 
-(43) HashAggregate [codegen id : 15]
-Input [7]: [ss_quantity#5, sr_return_quantity#21, cs_quantity#28, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16]
+(46) HashAggregate [codegen id : 17]
+Input [7]: [ss_quantity#5, sr_return_quantity#22, cs_quantity#30, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16]
 Keys [4]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11]
-Functions [3]: [partial_sum(ss_quantity#5), partial_sum(sr_return_quantity#21), partial_sum(cs_quantity#28)]
-Aggregate Attributes [3]: [sum#33, sum#34, sum#35]
-Results [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum#36, sum#37, sum#38]
+Functions [3]: [partial_sum(ss_quantity#5), partial_sum(sr_return_quantity#22), partial_sum(cs_quantity#30)]
+Aggregate Attributes [3]: [sum#35, sum#36, sum#37]
+Results [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum#38, sum#39, sum#40]
 
-(44) Exchange
-Input [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum#36, sum#37, sum#38]
-Arguments: hashpartitioning(i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, 5), ENSURE_REQUIREMENTS, [id=#39]
+(47) Exchange
+Input [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum#38, sum#39, sum#40]
+Arguments: hashpartitioning(i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, 5), ENSURE_REQUIREMENTS, [id=#41]
 
-(45) HashAggregate [codegen id : 16]
-Input [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum#36, sum#37, sum#38]
+(48) HashAggregate [codegen id : 18]
+Input [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum#38, sum#39, sum#40]
 Keys [4]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11]
-Functions [3]: [sum(ss_quantity#5), sum(sr_return_quantity#21), sum(cs_quantity#28)]
-Aggregate Attributes [3]: [sum(ss_quantity#5)#40, sum(sr_return_quantity#21)#41, sum(cs_quantity#28)#42]
-Results [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum(ss_quantity#5)#40 AS store_sales_quantity#43, sum(sr_return_quantity#21)#41 AS store_returns_quantity#44, sum(cs_quantity#28)#42 AS catalog_sales_quantity#45]
+Functions [3]: [sum(ss_quantity#5), sum(sr_return_quantity#22), sum(cs_quantity#30)]
+Aggregate Attributes [3]: [sum(ss_quantity#5)#42, sum(sr_return_quantity#22)#43, sum(cs_quantity#30)#44]
+Results [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum(ss_quantity#5)#42 AS store_sales_quantity#45, sum(sr_return_quantity#22)#43 AS store_returns_quantity#46, sum(cs_quantity#30)#44 AS catalog_sales_quantity#47]
 
-(46) TakeOrderedAndProject
-Input [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, store_sales_quantity#43, store_returns_quantity#44, catalog_sales_quantity#45]
-Arguments: 100, [i_item_id#15 ASC NULLS FIRST, i_item_desc#16 ASC NULLS FIRST, s_store_id#10 ASC NULLS FIRST, s_store_name#11 ASC NULLS FIRST], [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, store_sales_quantity#43, store_returns_quantity#44, catalog_sales_quantity#45]
+(49) TakeOrderedAndProject
+Input [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, store_sales_quantity#45, store_returns_quantity#46, catalog_sales_quantity#47]
+Arguments: 100, [i_item_id#15 ASC NULLS FIRST, i_item_desc#16 ASC NULLS FIRST, s_store_id#10 ASC NULLS FIRST, s_store_name#11 ASC NULLS FIRST], [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, store_sales_quantity#45, store_returns_quantity#46, catalog_sales_quantity#47]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7
-BroadcastExchange (51)
-+- * Project (50)
-   +- * Filter (49)
-      +- * ColumnarToRow (48)
-         +- Scan parquet default.date_dim (47)
+BroadcastExchange (54)
++- * Project (53)
+   +- * Filter (52)
+      +- * ColumnarToRow (51)
+         +- Scan parquet default.date_dim (50)
 
 
-(47) Scan parquet default.date_dim
-Output [3]: [d_date_sk#8, d_year#46, d_moy#47]
+(50) Scan parquet default.date_dim
+Output [3]: [d_date_sk#8, d_year#48, d_moy#49]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,9), EqualTo(d_year,1999), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(48) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#8, d_year#46, d_moy#47]
+(51) ColumnarToRow [codegen id : 1]
+Input [3]: [d_date_sk#8, d_year#48, d_moy#49]
 
-(49) Filter [codegen id : 1]
-Input [3]: [d_date_sk#8, d_year#46, d_moy#47]
-Condition : ((((isnotnull(d_moy#47) AND isnotnull(d_year#46)) AND (d_moy#47 = 9)) AND (d_year#46 = 1999)) AND isnotnull(d_date_sk#8))
+(52) Filter [codegen id : 1]
+Input [3]: [d_date_sk#8, d_year#48, d_moy#49]
+Condition : ((((isnotnull(d_moy#49) AND isnotnull(d_year#48)) AND (d_moy#49 = 9)) AND (d_year#48 = 1999)) AND isnotnull(d_date_sk#8))
 
-(50) Project [codegen id : 1]
+(53) Project [codegen id : 1]
 Output [1]: [d_date_sk#8]
-Input [3]: [d_date_sk#8, d_year#46, d_moy#47]
+Input [3]: [d_date_sk#8, d_year#48, d_moy#49]
 
-(51) BroadcastExchange
+(54) BroadcastExchange
 Input [1]: [d_date_sk#8]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#48]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50]
 
-Subquery:2 Hosting operator id = 23 Hosting Expression = sr_returned_date_sk#22 IN dynamicpruning#23
-BroadcastExchange (56)
-+- * Project (55)
-   +- * Filter (54)
-      +- * ColumnarToRow (53)
-         +- Scan parquet default.date_dim (52)
+Subquery:2 Hosting operator id = 24 Hosting Expression = sr_returned_date_sk#23 IN dynamicpruning#24
+BroadcastExchange (59)
++- * Project (58)
+   +- * Filter (57)
+      +- * ColumnarToRow (56)
+         +- Scan parquet default.date_dim (55)
 
 
-(52) Scan parquet default.date_dim
-Output [3]: [d_date_sk#24, d_year#49, d_moy#50]
+(55) Scan parquet default.date_dim
+Output [3]: [d_date_sk#25, d_year#51, d_moy#52]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), EqualTo(d_year,1999), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(53) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#24, d_year#49, d_moy#50]
+(56) ColumnarToRow [codegen id : 1]
+Input [3]: [d_date_sk#25, d_year#51, d_moy#52]
 
-(54) Filter [codegen id : 1]
-Input [3]: [d_date_sk#24, d_year#49, d_moy#50]
-Condition : (((((isnotnull(d_moy#50) AND isnotnull(d_year#49)) AND (d_moy#50 >= 9)) AND (d_moy#50 <= 12)) AND (d_year#49 = 1999)) AND isnotnull(d_date_sk#24))
+(57) Filter [codegen id : 1]
+Input [3]: [d_date_sk#25, d_year#51, d_moy#52]
+Condition : (((((isnotnull(d_moy#52) AND isnotnull(d_year#51)) AND (d_moy#52 >= 9)) AND (d_moy#52 <= 12)) AND (d_year#51 = 1999)) AND isnotnull(d_date_sk#25))
 
-(55) Project [codegen id : 1]
-Output [1]: [d_date_sk#24]
-Input [3]: [d_date_sk#24, d_year#49, d_moy#50]
+(58) Project [codegen id : 1]
+Output [1]: [d_date_sk#25]
+Input [3]: [d_date_sk#25, d_year#51, d_moy#52]
 
-(56) BroadcastExchange
-Input [1]: [d_date_sk#24]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#51]
+(59) BroadcastExchange
+Input [1]: [d_date_sk#25]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#53]
 
-Subquery:3 Hosting operator id = 33 Hosting Expression = cs_sold_date_sk#29 IN dynamicpruning#30
-BroadcastExchange (61)
-+- * Project (60)
-   +- * Filter (59)
-      +- * ColumnarToRow (58)
-         +- Scan parquet default.date_dim (57)
+Subquery:3 Hosting operator id = 36 Hosting Expression = cs_sold_date_sk#31 IN dynamicpruning#32
+BroadcastExchange (64)
++- * Project (63)
+   +- * Filter (62)
+      +- * ColumnarToRow (61)
+         +- Scan parquet default.date_dim (60)
 
 
-(57) Scan parquet default.date_dim
-Output [2]: [d_date_sk#31, d_year#52]
+(60) Scan parquet default.date_dim
+Output [2]: [d_date_sk#33, d_year#54]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(58) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#31, d_year#52]
+(61) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#33, d_year#54]
 
-(59) Filter [codegen id : 1]
-Input [2]: [d_date_sk#31, d_year#52]
-Condition : (d_year#52 IN (1999,2000,2001) AND isnotnull(d_date_sk#31))
+(62) Filter [codegen id : 1]
+Input [2]: [d_date_sk#33, d_year#54]
+Condition : (d_year#54 IN (1999,2000,2001) AND isnotnull(d_date_sk#33))
 
-(60) Project [codegen id : 1]
-Output [1]: [d_date_sk#31]
-Input [2]: [d_date_sk#31, d_year#52]
+(63) Project [codegen id : 1]
+Output [1]: [d_date_sk#33]
+Input [2]: [d_date_sk#33, d_year#54]
 
-(61) BroadcastExchange
-Input [1]: [d_date_sk#31]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#53]
+(64) BroadcastExchange
+Input [1]: [d_date_sk#33]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#55]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/simplified.txt
index 0db54fe759962..5463f3f0a8fd4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/simplified.txt
@@ -1,90 +1,97 @@
 TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_quantity,store_returns_quantity,catalog_sales_quantity]
-  WholeStageCodegen (16)
+  WholeStageCodegen (18)
     HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,sum,sum,sum] [sum(ss_quantity),sum(sr_return_quantity),sum(cs_quantity),store_sales_quantity,store_returns_quantity,catalog_sales_quantity,sum,sum,sum]
       InputAdapter
         Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1
-          WholeStageCodegen (15)
+          WholeStageCodegen (17)
             HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,ss_quantity,sr_return_quantity,cs_quantity] [sum,sum,sum,sum,sum,sum]
               Project [ss_quantity,sr_return_quantity,cs_quantity,s_store_id,s_store_name,i_item_id,i_item_desc]
                 SortMergeJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk]
                   InputAdapter
-                    WholeStageCodegen (11)
-                      Project [ss_quantity,s_store_id,s_store_name,i_item_id,i_item_desc,sr_item_sk,sr_customer_sk,sr_return_quantity]
-                        SortMergeJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number]
-                          InputAdapter
-                            WholeStageCodegen (7)
-                              Sort [ss_customer_sk,ss_item_sk,ss_ticket_number]
-                                Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,s_store_id,s_store_name,i_item_id,i_item_desc]
-                                  SortMergeJoin [ss_item_sk,i_item_sk]
-                                    InputAdapter
-                                      WholeStageCodegen (4)
-                                        Sort [ss_item_sk]
-                                          InputAdapter
-                                            Exchange [ss_item_sk] #2
-                                              WholeStageCodegen (3)
-                                                Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,s_store_id,s_store_name]
-                                                  BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                                    Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity]
-                                                      BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                        Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_store_sk]
-                                                          ColumnarToRow
-                                                            InputAdapter
-                                                              Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk]
-                                                                SubqueryBroadcast [d_date_sk] #1
-                                                                  BroadcastExchange #3
-                                                                    WholeStageCodegen (1)
-                                                                      Project [d_date_sk]
-                                                                        Filter [d_moy,d_year,d_date_sk]
+                    WholeStageCodegen (13)
+                      Sort [sr_customer_sk,sr_item_sk]
+                        InputAdapter
+                          Exchange [sr_customer_sk,sr_item_sk] #2
+                            WholeStageCodegen (12)
+                              Project [ss_quantity,s_store_id,s_store_name,i_item_id,i_item_desc,sr_item_sk,sr_customer_sk,sr_return_quantity]
+                                SortMergeJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number]
+                                  InputAdapter
+                                    WholeStageCodegen (8)
+                                      Sort [ss_customer_sk,ss_item_sk,ss_ticket_number]
+                                        InputAdapter
+                                          Exchange [ss_customer_sk,ss_item_sk,ss_ticket_number] #3
+                                            WholeStageCodegen (7)
+                                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,s_store_id,s_store_name,i_item_id,i_item_desc]
+                                                SortMergeJoin [ss_item_sk,i_item_sk]
+                                                  InputAdapter
+                                                    WholeStageCodegen (4)
+                                                      Sort [ss_item_sk]
+                                                        InputAdapter
+                                                          Exchange [ss_item_sk] #4
+                                                            WholeStageCodegen (3)
+                                                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,s_store_id,s_store_name]
+                                                                BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                                  Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity]
+                                                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                      Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_store_sk]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk]
+                                                                              SubqueryBroadcast [d_date_sk] #1
+                                                                                BroadcastExchange #5
+                                                                                  WholeStageCodegen (1)
+                                                                                    Project [d_date_sk]
+                                                                                      Filter [d_moy,d_year,d_date_sk]
+                                                                                        ColumnarToRow
+                                                                                          InputAdapter
+                                                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                      InputAdapter
+                                                                        ReusedExchange [d_date_sk] #5
+                                                                  InputAdapter
+                                                                    BroadcastExchange #6
+                                                                      WholeStageCodegen (2)
+                                                                        Filter [s_store_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                              Scan parquet default.store [s_store_sk,s_store_id,s_store_name]
+                                                  InputAdapter
+                                                    WholeStageCodegen (6)
+                                                      Sort [i_item_sk]
                                                         InputAdapter
-                                                          ReusedExchange [d_date_sk] #3
-                                                    InputAdapter
-                                                      BroadcastExchange #4
-                                                        WholeStageCodegen (2)
-                                                          Filter [s_store_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.store [s_store_sk,s_store_id,s_store_name]
-                                    InputAdapter
-                                      WholeStageCodegen (6)
-                                        Sort [i_item_sk]
-                                          InputAdapter
-                                            Exchange [i_item_sk] #5
-                                              WholeStageCodegen (5)
-                                                Filter [i_item_sk]
-                                                  ColumnarToRow
-                                                    InputAdapter
-                                                      Scan parquet default.item [i_item_sk,i_item_id,i_item_desc]
-                          InputAdapter
-                            WholeStageCodegen (10)
-                              Sort [sr_customer_sk,sr_item_sk,sr_ticket_number]
-                                InputAdapter
-                                  Exchange [sr_item_sk] #6
-                                    WholeStageCodegen (9)
-                                      Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity]
-                                        BroadcastHashJoin [sr_returned_date_sk,d_date_sk]
-                                          Filter [sr_customer_sk,sr_item_sk,sr_ticket_number]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk]
-                                                  SubqueryBroadcast [d_date_sk] #2
-                                                    BroadcastExchange #7
-                                                      WholeStageCodegen (1)
-                                                        Project [d_date_sk]
-                                                          Filter [d_moy,d_year,d_date_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                                          InputAdapter
-                                            ReusedExchange [d_date_sk] #7
+                                                          Exchange [i_item_sk] #7
+                                                            WholeStageCodegen (5)
+                                                              Filter [i_item_sk]
+                                                                ColumnarToRow
+                                                                  InputAdapter
+                                                                    Scan parquet default.item [i_item_sk,i_item_id,i_item_desc]
+                                  InputAdapter
+                                    WholeStageCodegen (11)
+                                      Sort [sr_customer_sk,sr_item_sk,sr_ticket_number]
+                                        InputAdapter
+                                          Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #8
+                                            WholeStageCodegen (10)
+                                              Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity]
+                                                BroadcastHashJoin [sr_returned_date_sk,d_date_sk]
+                                                  Filter [sr_customer_sk,sr_item_sk,sr_ticket_number]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk]
+                                                          SubqueryBroadcast [d_date_sk] #2
+                                                            BroadcastExchange #9
+                                                              WholeStageCodegen (1)
+                                                                Project [d_date_sk]
+                                                                  Filter [d_moy,d_year,d_date_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                  InputAdapter
+                                                    ReusedExchange [d_date_sk] #9
                   InputAdapter
-                    WholeStageCodegen (14)
+                    WholeStageCodegen (16)
                       Sort [cs_bill_customer_sk,cs_item_sk]
                         InputAdapter
-                          Exchange [cs_item_sk] #8
-                            WholeStageCodegen (13)
+                          Exchange [cs_bill_customer_sk,cs_item_sk] #10
+                            WholeStageCodegen (15)
                               Project [cs_bill_customer_sk,cs_item_sk,cs_quantity]
                                 BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                   Filter [cs_bill_customer_sk,cs_item_sk]
@@ -92,7 +99,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                       InputAdapter
                                         Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk]
                                           SubqueryBroadcast [d_date_sk] #3
-                                            BroadcastExchange #9
+                                            BroadcastExchange #11
                                               WholeStageCodegen (1)
                                                 Project [d_date_sk]
                                                   Filter [d_year,d_date_sk]
@@ -100,4 +107,4 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                       InputAdapter
                                                         Scan parquet default.date_dim [d_date_sk,d_year]
                                   InputAdapter
-                                    ReusedExchange [d_date_sk] #9
+                                    ReusedExchange [d_date_sk] #11
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt
index 44a956471b61e..529b9c8282db5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt
@@ -1,53 +1,56 @@
 == Physical Plan ==
-TakeOrderedAndProject (49)
-+- * Project (48)
-   +- * SortMergeJoin Inner (47)
-      :- * Project (41)
-      :  +- * SortMergeJoin Inner (40)
-      :     :- * Sort (32)
-      :     :  +- * Project (31)
-      :     :     +- * Filter (30)
-      :     :        +- Window (29)
-      :     :           +- * Filter (28)
-      :     :              +- Window (27)
-      :     :                 +- * Sort (26)
-      :     :                    +- Exchange (25)
-      :     :                       +- * HashAggregate (24)
-      :     :                          +- Exchange (23)
-      :     :                             +- * HashAggregate (22)
-      :     :                                +- * Project (21)
-      :     :                                   +- * SortMergeJoin Inner (20)
-      :     :                                      :- * Sort (14)
-      :     :                                      :  +- Exchange (13)
-      :     :                                      :     +- * Project (12)
-      :     :                                      :        +- * BroadcastHashJoin Inner BuildRight (11)
-      :     :                                      :           :- * Project (6)
-      :     :                                      :           :  +- * BroadcastHashJoin Inner BuildRight (5)
-      :     :                                      :           :     :- * Filter (3)
-      :     :                                      :           :     :  +- * ColumnarToRow (2)
-      :     :                                      :           :     :     +- Scan parquet default.store_sales (1)
-      :     :                                      :           :     +- ReusedExchange (4)
-      :     :                                      :           +- BroadcastExchange (10)
-      :     :                                      :              +- * Filter (9)
-      :     :                                      :                 +- * ColumnarToRow (8)
-      :     :                                      :                    +- Scan parquet default.store (7)
-      :     :                                      +- * Sort (19)
-      :     :                                         +- Exchange (18)
-      :     :                                            +- * Filter (17)
-      :     :                                               +- * ColumnarToRow (16)
-      :     :                                                  +- Scan parquet default.item (15)
-      :     +- * Sort (39)
-      :        +- * Project (38)
-      :           +- Window (37)
-      :              +- * Sort (36)
-      :                 +- Exchange (35)
-      :                    +- * HashAggregate (34)
-      :                       +- ReusedExchange (33)
-      +- * Sort (46)
-         +- * Project (45)
-            +- Window (44)
-               +- * Sort (43)
-                  +- ReusedExchange (42)
+TakeOrderedAndProject (52)
++- * Project (51)
+   +- * SortMergeJoin Inner (50)
+      :- * Project (43)
+      :  +- * SortMergeJoin Inner (42)
+      :     :- * Sort (33)
+      :     :  +- Exchange (32)
+      :     :     +- * Project (31)
+      :     :        +- * Filter (30)
+      :     :           +- Window (29)
+      :     :              +- * Filter (28)
+      :     :                 +- Window (27)
+      :     :                    +- * Sort (26)
+      :     :                       +- Exchange (25)
+      :     :                          +- * HashAggregate (24)
+      :     :                             +- Exchange (23)
+      :     :                                +- * HashAggregate (22)
+      :     :                                   +- * Project (21)
+      :     :                                      +- * SortMergeJoin Inner (20)
+      :     :                                         :- * Sort (14)
+      :     :                                         :  +- Exchange (13)
+      :     :                                         :     +- * Project (12)
+      :     :                                         :        +- * BroadcastHashJoin Inner BuildRight (11)
+      :     :                                         :           :- * Project (6)
+      :     :                                         :           :  +- * BroadcastHashJoin Inner BuildRight (5)
+      :     :                                         :           :     :- * Filter (3)
+      :     :                                         :           :     :  +- * ColumnarToRow (2)
+      :     :                                         :           :     :     +- Scan parquet default.store_sales (1)
+      :     :                                         :           :     +- ReusedExchange (4)
+      :     :                                         :           +- BroadcastExchange (10)
+      :     :                                         :              +- * Filter (9)
+      :     :                                         :                 +- * ColumnarToRow (8)
+      :     :                                         :                    +- Scan parquet default.store (7)
+      :     :                                         +- * Sort (19)
+      :     :                                            +- Exchange (18)
+      :     :                                               +- * Filter (17)
+      :     :                                                  +- * ColumnarToRow (16)
+      :     :                                                     +- Scan parquet default.item (15)
+      :     +- * Sort (41)
+      :        +- Exchange (40)
+      :           +- * Project (39)
+      :              +- Window (38)
+      :                 +- * Sort (37)
+      :                    +- Exchange (36)
+      :                       +- * HashAggregate (35)
+      :                          +- ReusedExchange (34)
+      +- * Sort (49)
+         +- Exchange (48)
+            +- * Project (47)
+               +- Window (46)
+                  +- * Sort (45)
+                     +- ReusedExchange (44)
 
 
 (1) Scan parquet default.store_sales
@@ -65,7 +68,7 @@ Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, ss_sold_date_sk#4]
 Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, ss_sold_date_sk#4]
 Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_store_sk#2))
 
-(4) ReusedExchange [Reuses operator id: 53]
+(4) ReusedExchange [Reuses operator id: 56]
 Output [3]: [d_date_sk#6, d_year#7, d_moy#8]
 
 (5) BroadcastHashJoin [codegen id : 3]
@@ -189,106 +192,118 @@ Condition : ((isnotnull(avg_monthly_sales#26) AND (avg_monthly_sales#26 > 0.0000
 Output [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25]
 Input [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, _w0#23, rn#25, avg_monthly_sales#26]
 
-(32) Sort [codegen id : 11]
+(32) Exchange
+Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25]
+Arguments: hashpartitioning(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, rn#25, 5), ENSURE_REQUIREMENTS, [id=#27]
+
+(33) Sort [codegen id : 12]
 Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25]
 Arguments: [i_category#16 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST, s_company_name#11 ASC NULLS FIRST, rn#25 ASC NULLS FIRST], false, 0
 
-(33) ReusedExchange [Reuses operator id: 23]
-Output [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum#33]
+(34) ReusedExchange [Reuses operator id: 23]
+Output [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum#34]
 
-(34) HashAggregate [codegen id : 19]
-Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum#33]
-Keys [6]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32]
-Functions [1]: [sum(UnscaledValue(ss_sales_price#34))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#34))#21]
-Results [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, MakeDecimal(sum(UnscaledValue(ss_sales_price#34))#21,17,2) AS sum_sales#22]
+(35) HashAggregate [codegen id : 20]
+Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum#34]
+Keys [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33]
+Functions [1]: [sum(UnscaledValue(ss_sales_price#35))]
+Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#35))#21]
+Results [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, MakeDecimal(sum(UnscaledValue(ss_sales_price#35))#21,17,2) AS sum_sales#22]
 
-(35) Exchange
-Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#22]
-Arguments: hashpartitioning(i_category#27, i_brand#28, s_store_name#29, s_company_name#30, 5), ENSURE_REQUIREMENTS, [id=#35]
+(36) Exchange
+Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#22]
+Arguments: hashpartitioning(i_category#28, i_brand#29, s_store_name#30, s_company_name#31, 5), ENSURE_REQUIREMENTS, [id=#36]
 
-(36) Sort [codegen id : 20]
-Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#22]
-Arguments: [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, s_store_name#29 ASC NULLS FIRST, s_company_name#30 ASC NULLS FIRST, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST], false, 0
+(37) Sort [codegen id : 21]
+Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#22]
+Arguments: [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, s_store_name#30 ASC NULLS FIRST, s_company_name#31 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0
 
-(37) Window
-Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#22]
-Arguments: [rank(d_year#31, d_moy#32) windowspecdefinition(i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#36], [i_category#27, i_brand#28, s_store_name#29, s_company_name#30], [d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST]
+(38) Window
+Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#22]
+Arguments: [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#37], [i_category#28, i_brand#29, s_store_name#30, s_company_name#31], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST]
 
-(38) Project [codegen id : 21]
-Output [6]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, sum_sales#22 AS sum_sales#37, rn#36]
-Input [8]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#22, rn#36]
+(39) Project [codegen id : 22]
+Output [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#22 AS sum_sales#38, rn#37]
+Input [8]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#22, rn#37]
 
-(39) Sort [codegen id : 21]
-Input [6]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, sum_sales#37, rn#36]
-Arguments: [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, s_store_name#29 ASC NULLS FIRST, s_company_name#30 ASC NULLS FIRST, (rn#36 + 1) ASC NULLS FIRST], false, 0
+(40) Exchange
+Input [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#38, rn#37]
+Arguments: hashpartitioning(i_category#28, i_brand#29, s_store_name#30, s_company_name#31, (rn#37 + 1), 5), ENSURE_REQUIREMENTS, [id=#39]
 
-(40) SortMergeJoin [codegen id : 22]
+(41) Sort [codegen id : 23]
+Input [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#38, rn#37]
+Arguments: [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, s_store_name#30 ASC NULLS FIRST, s_company_name#31 ASC NULLS FIRST, (rn#37 + 1) ASC NULLS FIRST], false, 0
+
+(42) SortMergeJoin [codegen id : 24]
 Left keys [5]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, rn#25]
-Right keys [5]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, (rn#36 + 1)]
+Right keys [5]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, (rn#37 + 1)]
 Join condition: None
 
-(41) Project [codegen id : 22]
-Output [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25, sum_sales#37]
-Input [15]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25, i_category#27, i_brand#28, s_store_name#29, s_company_name#30, sum_sales#37, rn#36]
+(43) Project [codegen id : 24]
+Output [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25, sum_sales#38]
+Input [15]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25, i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#38, rn#37]
+
+(44) ReusedExchange [Reuses operator id: 36]
+Output [7]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44, d_moy#45, sum_sales#22]
 
-(42) ReusedExchange [Reuses operator id: 35]
-Output [7]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42, d_moy#43, sum_sales#22]
+(45) Sort [codegen id : 33]
+Input [7]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44, d_moy#45, sum_sales#22]
+Arguments: [i_category#40 ASC NULLS FIRST, i_brand#41 ASC NULLS FIRST, s_store_name#42 ASC NULLS FIRST, s_company_name#43 ASC NULLS FIRST, d_year#44 ASC NULLS FIRST, d_moy#45 ASC NULLS FIRST], false, 0
 
-(43) Sort [codegen id : 31]
-Input [7]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42, d_moy#43, sum_sales#22]
-Arguments: [i_category#38 ASC NULLS FIRST, i_brand#39 ASC NULLS FIRST, s_store_name#40 ASC NULLS FIRST, s_company_name#41 ASC NULLS FIRST, d_year#42 ASC NULLS FIRST, d_moy#43 ASC NULLS FIRST], false, 0
+(46) Window
+Input [7]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44, d_moy#45, sum_sales#22]
+Arguments: [rank(d_year#44, d_moy#45) windowspecdefinition(i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44 ASC NULLS FIRST, d_moy#45 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#46], [i_category#40, i_brand#41, s_store_name#42, s_company_name#43], [d_year#44 ASC NULLS FIRST, d_moy#45 ASC NULLS FIRST]
 
-(44) Window
-Input [7]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42, d_moy#43, sum_sales#22]
-Arguments: [rank(d_year#42, d_moy#43) windowspecdefinition(i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42 ASC NULLS FIRST, d_moy#43 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#44], [i_category#38, i_brand#39, s_store_name#40, s_company_name#41], [d_year#42 ASC NULLS FIRST, d_moy#43 ASC NULLS FIRST]
+(47) Project [codegen id : 34]
+Output [6]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, sum_sales#22 AS sum_sales#47, rn#46]
+Input [8]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44, d_moy#45, sum_sales#22, rn#46]
 
-(45) Project [codegen id : 32]
-Output [6]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, sum_sales#22 AS sum_sales#45, rn#44]
-Input [8]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42, d_moy#43, sum_sales#22, rn#44]
+(48) Exchange
+Input [6]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, sum_sales#47, rn#46]
+Arguments: hashpartitioning(i_category#40, i_brand#41, s_store_name#42, s_company_name#43, (rn#46 - 1), 5), ENSURE_REQUIREMENTS, [id=#48]
 
-(46) Sort [codegen id : 32]
-Input [6]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, sum_sales#45, rn#44]
-Arguments: [i_category#38 ASC NULLS FIRST, i_brand#39 ASC NULLS FIRST, s_store_name#40 ASC NULLS FIRST, s_company_name#41 ASC NULLS FIRST, (rn#44 - 1) ASC NULLS FIRST], false, 0
+(49) Sort [codegen id : 35]
+Input [6]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, sum_sales#47, rn#46]
+Arguments: [i_category#40 ASC NULLS FIRST, i_brand#41 ASC NULLS FIRST, s_store_name#42 ASC NULLS FIRST, s_company_name#43 ASC NULLS FIRST, (rn#46 - 1) ASC NULLS FIRST], false, 0
 
-(47) SortMergeJoin [codegen id : 33]
+(50) SortMergeJoin [codegen id : 36]
 Left keys [5]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, rn#25]
-Right keys [5]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, (rn#44 - 1)]
+Right keys [5]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, (rn#46 - 1)]
 Join condition: None
 
-(48) Project [codegen id : 33]
-Output [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, sum_sales#37 AS psum#46, sum_sales#45 AS nsum#47]
-Input [16]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25, sum_sales#37, i_category#38, i_brand#39, s_store_name#40, s_company_name#41, sum_sales#45, rn#44]
+(51) Project [codegen id : 36]
+Output [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, sum_sales#38 AS psum#49, sum_sales#47 AS nsum#50]
+Input [16]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25, sum_sales#38, i_category#40, i_brand#41, s_store_name#42, s_company_name#43, sum_sales#47, rn#46]
 
-(49) TakeOrderedAndProject
-Input [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, psum#46, nsum#47]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST], [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, psum#46, nsum#47]
+(52) TakeOrderedAndProject
+Input [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, psum#49, nsum#50]
+Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST], [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, psum#49, nsum#50]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (53)
-+- * Filter (52)
-   +- * ColumnarToRow (51)
-      +- Scan parquet default.date_dim (50)
+BroadcastExchange (56)
++- * Filter (55)
+   +- * ColumnarToRow (54)
+      +- Scan parquet default.date_dim (53)
 
 
-(50) Scan parquet default.date_dim
+(53) Scan parquet default.date_dim
 Output [3]: [d_date_sk#6, d_year#7, d_moy#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(51) ColumnarToRow [codegen id : 1]
+(54) ColumnarToRow [codegen id : 1]
 Input [3]: [d_date_sk#6, d_year#7, d_moy#8]
 
-(52) Filter [codegen id : 1]
+(55) Filter [codegen id : 1]
 Input [3]: [d_date_sk#6, d_year#7, d_moy#8]
 Condition : ((((d_year#7 = 1999) OR ((d_year#7 = 1998) AND (d_moy#8 = 12))) OR ((d_year#7 = 2000) AND (d_moy#8 = 1))) AND isnotnull(d_date_sk#6))
 
-(53) BroadcastExchange
+(56) BroadcastExchange
 Input [3]: [d_date_sk#6, d_year#7, d_moy#8]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#51]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/simplified.txt
index aa2346cacaf2d..07c75d91ca3cf 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/simplified.txt
@@ -1,95 +1,104 @@
 TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_brand,s_company_name,d_year,d_moy,psum,nsum]
-  WholeStageCodegen (33)
+  WholeStageCodegen (36)
     Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales]
       SortMergeJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn]
         InputAdapter
-          WholeStageCodegen (22)
+          WholeStageCodegen (24)
             Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales]
               SortMergeJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn]
                 InputAdapter
-                  WholeStageCodegen (11)
+                  WholeStageCodegen (12)
                     Sort [i_category,i_brand,s_store_name,s_company_name,rn]
-                      Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn]
-                        Filter [avg_monthly_sales,sum_sales]
-                          InputAdapter
-                            Window [_w0,i_category,i_brand,s_store_name,s_company_name,d_year]
-                              WholeStageCodegen (10)
-                                Filter [d_year]
-                                  InputAdapter
-                                    Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name]
-                                      WholeStageCodegen (9)
-                                        Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy]
-                                          InputAdapter
-                                            Exchange [i_category,i_brand,s_store_name,s_company_name] #1
-                                              WholeStageCodegen (8)
-                                                HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum]
-                                                  InputAdapter
-                                                    Exchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] #2
-                                                      WholeStageCodegen (7)
-                                                        HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,ss_sales_price] [sum,sum]
-                                                          Project [i_brand,i_category,ss_sales_price,d_year,d_moy,s_store_name,s_company_name]
-                                                            SortMergeJoin [ss_item_sk,i_item_sk]
-                                                              InputAdapter
-                                                                WholeStageCodegen (4)
-                                                                  Sort [ss_item_sk]
+                      InputAdapter
+                        Exchange [i_category,i_brand,s_store_name,s_company_name,rn] #1
+                          WholeStageCodegen (11)
+                            Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn]
+                              Filter [avg_monthly_sales,sum_sales]
+                                InputAdapter
+                                  Window [_w0,i_category,i_brand,s_store_name,s_company_name,d_year]
+                                    WholeStageCodegen (10)
+                                      Filter [d_year]
+                                        InputAdapter
+                                          Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name]
+                                            WholeStageCodegen (9)
+                                              Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy]
+                                                InputAdapter
+                                                  Exchange [i_category,i_brand,s_store_name,s_company_name] #2
+                                                    WholeStageCodegen (8)
+                                                      HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum]
+                                                        InputAdapter
+                                                          Exchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] #3
+                                                            WholeStageCodegen (7)
+                                                              HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,ss_sales_price] [sum,sum]
+                                                                Project [i_brand,i_category,ss_sales_price,d_year,d_moy,s_store_name,s_company_name]
+                                                                  SortMergeJoin [ss_item_sk,i_item_sk]
                                                                     InputAdapter
-                                                                      Exchange [ss_item_sk] #3
-                                                                        WholeStageCodegen (3)
-                                                                          Project [ss_item_sk,ss_sales_price,d_year,d_moy,s_store_name,s_company_name]
-                                                                            BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                                                              Project [ss_item_sk,ss_store_sk,ss_sales_price,d_year,d_moy]
-                                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                  Filter [ss_item_sk,ss_store_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
-                                                                                          SubqueryBroadcast [d_date_sk] #1
-                                                                                            BroadcastExchange #4
-                                                                                              WholeStageCodegen (1)
-                                                                                                Filter [d_year,d_moy,d_date_sk]
-                                                                                                  ColumnarToRow
-                                                                                                    InputAdapter
-                                                                                                      Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                                                                                  InputAdapter
-                                                                                    ReusedExchange [d_date_sk,d_year,d_moy] #4
-                                                                              InputAdapter
-                                                                                BroadcastExchange #5
-                                                                                  WholeStageCodegen (2)
-                                                                                    Filter [s_store_sk,s_store_name,s_company_name]
-                                                                                      ColumnarToRow
+                                                                      WholeStageCodegen (4)
+                                                                        Sort [ss_item_sk]
+                                                                          InputAdapter
+                                                                            Exchange [ss_item_sk] #4
+                                                                              WholeStageCodegen (3)
+                                                                                Project [ss_item_sk,ss_sales_price,d_year,d_moy,s_store_name,s_company_name]
+                                                                                  BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                                                    Project [ss_item_sk,ss_store_sk,ss_sales_price,d_year,d_moy]
+                                                                                      BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                                        Filter [ss_item_sk,ss_store_sk]
+                                                                                          ColumnarToRow
+                                                                                            InputAdapter
+                                                                                              Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                                                                SubqueryBroadcast [d_date_sk] #1
+                                                                                                  BroadcastExchange #5
+                                                                                                    WholeStageCodegen (1)
+                                                                                                      Filter [d_year,d_moy,d_date_sk]
+                                                                                                        ColumnarToRow
+                                                                                                          InputAdapter
+                                                                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
                                                                                         InputAdapter
-                                                                                          Scan parquet default.store [s_store_sk,s_store_name,s_company_name]
-                                                              InputAdapter
-                                                                WholeStageCodegen (6)
-                                                                  Sort [i_item_sk]
+                                                                                          ReusedExchange [d_date_sk,d_year,d_moy] #5
+                                                                                    InputAdapter
+                                                                                      BroadcastExchange #6
+                                                                                        WholeStageCodegen (2)
+                                                                                          Filter [s_store_sk,s_store_name,s_company_name]
+                                                                                            ColumnarToRow
+                                                                                              InputAdapter
+                                                                                                Scan parquet default.store [s_store_sk,s_store_name,s_company_name]
                                                                     InputAdapter
-                                                                      Exchange [i_item_sk] #6
-                                                                        WholeStageCodegen (5)
-                                                                          Filter [i_item_sk,i_category,i_brand]
-                                                                            ColumnarToRow
-                                                                              InputAdapter
-                                                                                Scan parquet default.item [i_item_sk,i_brand,i_category]
+                                                                      WholeStageCodegen (6)
+                                                                        Sort [i_item_sk]
+                                                                          InputAdapter
+                                                                            Exchange [i_item_sk] #7
+                                                                              WholeStageCodegen (5)
+                                                                                Filter [i_item_sk,i_category,i_brand]
+                                                                                  ColumnarToRow
+                                                                                    InputAdapter
+                                                                                      Scan parquet default.item [i_item_sk,i_brand,i_category]
                 InputAdapter
-                  WholeStageCodegen (21)
+                  WholeStageCodegen (23)
                     Sort [i_category,i_brand,s_store_name,s_company_name,rn]
-                      Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn]
-                        InputAdapter
-                          Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name]
-                            WholeStageCodegen (20)
-                              Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy]
-                                InputAdapter
-                                  Exchange [i_category,i_brand,s_store_name,s_company_name] #7
-                                    WholeStageCodegen (19)
-                                      HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,sum]
-                                        InputAdapter
-                                          ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] #2
+                      InputAdapter
+                        Exchange [i_category,i_brand,s_store_name,s_company_name,rn] #8
+                          WholeStageCodegen (22)
+                            Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn]
+                              InputAdapter
+                                Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name]
+                                  WholeStageCodegen (21)
+                                    Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy]
+                                      InputAdapter
+                                        Exchange [i_category,i_brand,s_store_name,s_company_name] #9
+                                          WholeStageCodegen (20)
+                                            HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,sum]
+                                              InputAdapter
+                                                ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] #3
         InputAdapter
-          WholeStageCodegen (32)
+          WholeStageCodegen (35)
             Sort [i_category,i_brand,s_store_name,s_company_name,rn]
-              Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn]
-                InputAdapter
-                  Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name]
-                    WholeStageCodegen (31)
-                      Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy]
-                        InputAdapter
-                          ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #7
+              InputAdapter
+                Exchange [i_category,i_brand,s_store_name,s_company_name,rn] #10
+                  WholeStageCodegen (34)
+                    Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn]
+                      InputAdapter
+                        Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name]
+                          WholeStageCodegen (33)
+                            Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy]
+                              InputAdapter
+                                ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #9
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt
index ad356d44af668..ef8d64cee2c4a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt
@@ -1,53 +1,56 @@
 == Physical Plan ==
-TakeOrderedAndProject (49)
-+- * Project (48)
-   +- * SortMergeJoin Inner (47)
-      :- * Project (41)
-      :  +- * SortMergeJoin Inner (40)
-      :     :- * Sort (32)
-      :     :  +- * Project (31)
-      :     :     +- * Filter (30)
-      :     :        +- Window (29)
-      :     :           +- * Filter (28)
-      :     :              +- Window (27)
-      :     :                 +- * Sort (26)
-      :     :                    +- Exchange (25)
-      :     :                       +- * HashAggregate (24)
-      :     :                          +- Exchange (23)
-      :     :                             +- * HashAggregate (22)
-      :     :                                +- * Project (21)
-      :     :                                   +- * SortMergeJoin Inner (20)
-      :     :                                      :- * Sort (14)
-      :     :                                      :  +- Exchange (13)
-      :     :                                      :     +- * Project (12)
-      :     :                                      :        +- * BroadcastHashJoin Inner BuildRight (11)
-      :     :                                      :           :- * Project (6)
-      :     :                                      :           :  +- * BroadcastHashJoin Inner BuildRight (5)
-      :     :                                      :           :     :- * Filter (3)
-      :     :                                      :           :     :  +- * ColumnarToRow (2)
-      :     :                                      :           :     :     +- Scan parquet default.catalog_sales (1)
-      :     :                                      :           :     +- ReusedExchange (4)
-      :     :                                      :           +- BroadcastExchange (10)
-      :     :                                      :              +- * Filter (9)
-      :     :                                      :                 +- * ColumnarToRow (8)
-      :     :                                      :                    +- Scan parquet default.call_center (7)
-      :     :                                      +- * Sort (19)
-      :     :                                         +- Exchange (18)
-      :     :                                            +- * Filter (17)
-      :     :                                               +- * ColumnarToRow (16)
-      :     :                                                  +- Scan parquet default.item (15)
-      :     +- * Sort (39)
-      :        +- * Project (38)
-      :           +- Window (37)
-      :              +- * Sort (36)
-      :                 +- Exchange (35)
-      :                    +- * HashAggregate (34)
-      :                       +- ReusedExchange (33)
-      +- * Sort (46)
-         +- * Project (45)
-            +- Window (44)
-               +- * Sort (43)
-                  +- ReusedExchange (42)
+TakeOrderedAndProject (52)
++- * Project (51)
+   +- * SortMergeJoin Inner (50)
+      :- * Project (43)
+      :  +- * SortMergeJoin Inner (42)
+      :     :- * Sort (33)
+      :     :  +- Exchange (32)
+      :     :     +- * Project (31)
+      :     :        +- * Filter (30)
+      :     :           +- Window (29)
+      :     :              +- * Filter (28)
+      :     :                 +- Window (27)
+      :     :                    +- * Sort (26)
+      :     :                       +- Exchange (25)
+      :     :                          +- * HashAggregate (24)
+      :     :                             +- Exchange (23)
+      :     :                                +- * HashAggregate (22)
+      :     :                                   +- * Project (21)
+      :     :                                      +- * SortMergeJoin Inner (20)
+      :     :                                         :- * Sort (14)
+      :     :                                         :  +- Exchange (13)
+      :     :                                         :     +- * Project (12)
+      :     :                                         :        +- * BroadcastHashJoin Inner BuildRight (11)
+      :     :                                         :           :- * Project (6)
+      :     :                                         :           :  +- * BroadcastHashJoin Inner BuildRight (5)
+      :     :                                         :           :     :- * Filter (3)
+      :     :                                         :           :     :  +- * ColumnarToRow (2)
+      :     :                                         :           :     :     +- Scan parquet default.catalog_sales (1)
+      :     :                                         :           :     +- ReusedExchange (4)
+      :     :                                         :           +- BroadcastExchange (10)
+      :     :                                         :              +- * Filter (9)
+      :     :                                         :                 +- * ColumnarToRow (8)
+      :     :                                         :                    +- Scan parquet default.call_center (7)
+      :     :                                         +- * Sort (19)
+      :     :                                            +- Exchange (18)
+      :     :                                               +- * Filter (17)
+      :     :                                                  +- * ColumnarToRow (16)
+      :     :                                                     +- Scan parquet default.item (15)
+      :     +- * Sort (41)
+      :        +- Exchange (40)
+      :           +- * Project (39)
+      :              +- Window (38)
+      :                 +- * Sort (37)
+      :                    +- Exchange (36)
+      :                       +- * HashAggregate (35)
+      :                          +- ReusedExchange (34)
+      +- * Sort (49)
+         +- Exchange (48)
+            +- * Project (47)
+               +- Window (46)
+                  +- * Sort (45)
+                     +- ReusedExchange (44)
 
 
 (1) Scan parquet default.catalog_sales
@@ -65,7 +68,7 @@ Input [4]: [cs_call_center_sk#1, cs_item_sk#2, cs_sales_price#3, cs_sold_date_sk
 Input [4]: [cs_call_center_sk#1, cs_item_sk#2, cs_sales_price#3, cs_sold_date_sk#4]
 Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_call_center_sk#1))
 
-(4) ReusedExchange [Reuses operator id: 53]
+(4) ReusedExchange [Reuses operator id: 56]
 Output [3]: [d_date_sk#6, d_year#7, d_moy#8]
 
 (5) BroadcastHashJoin [codegen id : 3]
@@ -189,106 +192,118 @@ Condition : ((isnotnull(avg_monthly_sales#25) AND (avg_monthly_sales#25 > 0.0000
 Output [8]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24]
 Input [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, _w0#22, rn#24, avg_monthly_sales#25]
 
-(32) Sort [codegen id : 11]
+(32) Exchange
+Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24]
+Arguments: hashpartitioning(i_category#15, i_brand#14, cc_name#10, rn#24, 5), ENSURE_REQUIREMENTS, [id=#26]
+
+(33) Sort [codegen id : 12]
 Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24]
 Arguments: [i_category#15 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST, rn#24 ASC NULLS FIRST], false, 0
 
-(33) ReusedExchange [Reuses operator id: 23]
-Output [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum#31]
+(34) ReusedExchange [Reuses operator id: 23]
+Output [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum#32]
 
-(34) HashAggregate [codegen id : 19]
-Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum#31]
-Keys [5]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30]
-Functions [1]: [sum(UnscaledValue(cs_sales_price#32))]
-Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#32))#20]
-Results [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, MakeDecimal(sum(UnscaledValue(cs_sales_price#32))#20,17,2) AS sum_sales#21]
+(35) HashAggregate [codegen id : 20]
+Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum#32]
+Keys [5]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31]
+Functions [1]: [sum(UnscaledValue(cs_sales_price#33))]
+Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#33))#20]
+Results [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, MakeDecimal(sum(UnscaledValue(cs_sales_price#33))#20,17,2) AS sum_sales#21]
 
-(35) Exchange
-Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#21]
-Arguments: hashpartitioning(i_category#26, i_brand#27, cc_name#28, 5), ENSURE_REQUIREMENTS, [id=#33]
+(36) Exchange
+Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#21]
+Arguments: hashpartitioning(i_category#27, i_brand#28, cc_name#29, 5), ENSURE_REQUIREMENTS, [id=#34]
 
-(36) Sort [codegen id : 20]
-Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#21]
-Arguments: [i_category#26 ASC NULLS FIRST, i_brand#27 ASC NULLS FIRST, cc_name#28 ASC NULLS FIRST, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST], false, 0
+(37) Sort [codegen id : 21]
+Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#21]
+Arguments: [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, cc_name#29 ASC NULLS FIRST, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST], false, 0
 
-(37) Window
-Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#21]
-Arguments: [rank(d_year#29, d_moy#30) windowspecdefinition(i_category#26, i_brand#27, cc_name#28, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#34], [i_category#26, i_brand#27, cc_name#28], [d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST]
+(38) Window
+Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#21]
+Arguments: [rank(d_year#30, d_moy#31) windowspecdefinition(i_category#27, i_brand#28, cc_name#29, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#35], [i_category#27, i_brand#28, cc_name#29], [d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST]
 
-(38) Project [codegen id : 21]
-Output [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#21 AS sum_sales#35, rn#34]
-Input [7]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#21, rn#34]
+(39) Project [codegen id : 22]
+Output [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#21 AS sum_sales#36, rn#35]
+Input [7]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#21, rn#35]
 
-(39) Sort [codegen id : 21]
-Input [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#35, rn#34]
-Arguments: [i_category#26 ASC NULLS FIRST, i_brand#27 ASC NULLS FIRST, cc_name#28 ASC NULLS FIRST, (rn#34 + 1) ASC NULLS FIRST], false, 0
+(40) Exchange
+Input [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#36, rn#35]
+Arguments: hashpartitioning(i_category#27, i_brand#28, cc_name#29, (rn#35 + 1), 5), ENSURE_REQUIREMENTS, [id=#37]
 
-(40) SortMergeJoin [codegen id : 22]
+(41) Sort [codegen id : 23]
+Input [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#36, rn#35]
+Arguments: [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, cc_name#29 ASC NULLS FIRST, (rn#35 + 1) ASC NULLS FIRST], false, 0
+
+(42) SortMergeJoin [codegen id : 24]
 Left keys [4]: [i_category#15, i_brand#14, cc_name#10, rn#24]
-Right keys [4]: [i_category#26, i_brand#27, cc_name#28, (rn#34 + 1)]
+Right keys [4]: [i_category#27, i_brand#28, cc_name#29, (rn#35 + 1)]
 Join condition: None
 
-(41) Project [codegen id : 22]
-Output [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24, sum_sales#35]
-Input [13]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24, i_category#26, i_brand#27, cc_name#28, sum_sales#35, rn#34]
+(43) Project [codegen id : 24]
+Output [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24, sum_sales#36]
+Input [13]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24, i_category#27, i_brand#28, cc_name#29, sum_sales#36, rn#35]
+
+(44) ReusedExchange [Reuses operator id: 36]
+Output [6]: [i_category#38, i_brand#39, cc_name#40, d_year#41, d_moy#42, sum_sales#21]
 
-(42) ReusedExchange [Reuses operator id: 35]
-Output [6]: [i_category#36, i_brand#37, cc_name#38, d_year#39, d_moy#40, sum_sales#21]
+(45) Sort [codegen id : 33]
+Input [6]: [i_category#38, i_brand#39, cc_name#40, d_year#41, d_moy#42, sum_sales#21]
+Arguments: [i_category#38 ASC NULLS FIRST, i_brand#39 ASC NULLS FIRST, cc_name#40 ASC NULLS FIRST, d_year#41 ASC NULLS FIRST, d_moy#42 ASC NULLS FIRST], false, 0
 
-(43) Sort [codegen id : 31]
-Input [6]: [i_category#36, i_brand#37, cc_name#38, d_year#39, d_moy#40, sum_sales#21]
-Arguments: [i_category#36 ASC NULLS FIRST, i_brand#37 ASC NULLS FIRST, cc_name#38 ASC NULLS FIRST, d_year#39 ASC NULLS FIRST, d_moy#40 ASC NULLS FIRST], false, 0
+(46) Window
+Input [6]: [i_category#38, i_brand#39, cc_name#40, d_year#41, d_moy#42, sum_sales#21]
+Arguments: [rank(d_year#41, d_moy#42) windowspecdefinition(i_category#38, i_brand#39, cc_name#40, d_year#41 ASC NULLS FIRST, d_moy#42 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#43], [i_category#38, i_brand#39, cc_name#40], [d_year#41 ASC NULLS FIRST, d_moy#42 ASC NULLS FIRST]
 
-(44) Window
-Input [6]: [i_category#36, i_brand#37, cc_name#38, d_year#39, d_moy#40, sum_sales#21]
-Arguments: [rank(d_year#39, d_moy#40) windowspecdefinition(i_category#36, i_brand#37, cc_name#38, d_year#39 ASC NULLS FIRST, d_moy#40 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#41], [i_category#36, i_brand#37, cc_name#38], [d_year#39 ASC NULLS FIRST, d_moy#40 ASC NULLS FIRST]
+(47) Project [codegen id : 34]
+Output [5]: [i_category#38, i_brand#39, cc_name#40, sum_sales#21 AS sum_sales#44, rn#43]
+Input [7]: [i_category#38, i_brand#39, cc_name#40, d_year#41, d_moy#42, sum_sales#21, rn#43]
 
-(45) Project [codegen id : 32]
-Output [5]: [i_category#36, i_brand#37, cc_name#38, sum_sales#21 AS sum_sales#42, rn#41]
-Input [7]: [i_category#36, i_brand#37, cc_name#38, d_year#39, d_moy#40, sum_sales#21, rn#41]
+(48) Exchange
+Input [5]: [i_category#38, i_brand#39, cc_name#40, sum_sales#44, rn#43]
+Arguments: hashpartitioning(i_category#38, i_brand#39, cc_name#40, (rn#43 - 1), 5), ENSURE_REQUIREMENTS, [id=#45]
 
-(46) Sort [codegen id : 32]
-Input [5]: [i_category#36, i_brand#37, cc_name#38, sum_sales#42, rn#41]
-Arguments: [i_category#36 ASC NULLS FIRST, i_brand#37 ASC NULLS FIRST, cc_name#38 ASC NULLS FIRST, (rn#41 - 1) ASC NULLS FIRST], false, 0
+(49) Sort [codegen id : 35]
+Input [5]: [i_category#38, i_brand#39, cc_name#40, sum_sales#44, rn#43]
+Arguments: [i_category#38 ASC NULLS FIRST, i_brand#39 ASC NULLS FIRST, cc_name#40 ASC NULLS FIRST, (rn#43 - 1) ASC NULLS FIRST], false, 0
 
-(47) SortMergeJoin [codegen id : 33]
+(50) SortMergeJoin [codegen id : 36]
 Left keys [4]: [i_category#15, i_brand#14, cc_name#10, rn#24]
-Right keys [4]: [i_category#36, i_brand#37, cc_name#38, (rn#41 - 1)]
+Right keys [4]: [i_category#38, i_brand#39, cc_name#40, (rn#43 - 1)]
 Join condition: None
 
-(48) Project [codegen id : 33]
-Output [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, sum_sales#35 AS psum#43, sum_sales#42 AS nsum#44]
-Input [14]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24, sum_sales#35, i_category#36, i_brand#37, cc_name#38, sum_sales#42, rn#41]
+(51) Project [codegen id : 36]
+Output [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, sum_sales#36 AS psum#46, sum_sales#44 AS nsum#47]
+Input [14]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24, sum_sales#36, i_category#38, i_brand#39, cc_name#40, sum_sales#44, rn#43]
 
-(49) TakeOrderedAndProject
-Input [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, psum#43, nsum#44]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST], [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, psum#43, nsum#44]
+(52) TakeOrderedAndProject
+Input [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, psum#46, nsum#47]
+Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST], [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, psum#46, nsum#47]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = cs_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (53)
-+- * Filter (52)
-   +- * ColumnarToRow (51)
-      +- Scan parquet default.date_dim (50)
+BroadcastExchange (56)
++- * Filter (55)
+   +- * ColumnarToRow (54)
+      +- Scan parquet default.date_dim (53)
 
 
-(50) Scan parquet default.date_dim
+(53) Scan parquet default.date_dim
 Output [3]: [d_date_sk#6, d_year#7, d_moy#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(51) ColumnarToRow [codegen id : 1]
+(54) ColumnarToRow [codegen id : 1]
 Input [3]: [d_date_sk#6, d_year#7, d_moy#8]
 
-(52) Filter [codegen id : 1]
+(55) Filter [codegen id : 1]
 Input [3]: [d_date_sk#6, d_year#7, d_moy#8]
 Condition : ((((d_year#7 = 1999) OR ((d_year#7 = 1998) AND (d_moy#8 = 12))) OR ((d_year#7 = 2000) AND (d_moy#8 = 1))) AND isnotnull(d_date_sk#6))
 
-(53) BroadcastExchange
+(56) BroadcastExchange
 Input [3]: [d_date_sk#6, d_year#7, d_moy#8]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#45]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/simplified.txt
index b488806fe9a07..3bf10f82e6a88 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/simplified.txt
@@ -1,95 +1,104 @@
 TakeOrderedAndProject [sum_sales,avg_monthly_sales,cc_name,i_category,i_brand,d_year,d_moy,psum,nsum]
-  WholeStageCodegen (33)
+  WholeStageCodegen (36)
     Project [i_category,i_brand,cc_name,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales]
       SortMergeJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn]
         InputAdapter
-          WholeStageCodegen (22)
+          WholeStageCodegen (24)
             Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales]
               SortMergeJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn]
                 InputAdapter
-                  WholeStageCodegen (11)
+                  WholeStageCodegen (12)
                     Sort [i_category,i_brand,cc_name,rn]
-                      Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn]
-                        Filter [avg_monthly_sales,sum_sales]
-                          InputAdapter
-                            Window [_w0,i_category,i_brand,cc_name,d_year]
-                              WholeStageCodegen (10)
-                                Filter [d_year]
-                                  InputAdapter
-                                    Window [d_year,d_moy,i_category,i_brand,cc_name]
-                                      WholeStageCodegen (9)
-                                        Sort [i_category,i_brand,cc_name,d_year,d_moy]
-                                          InputAdapter
-                                            Exchange [i_category,i_brand,cc_name] #1
-                                              WholeStageCodegen (8)
-                                                HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,_w0,sum]
-                                                  InputAdapter
-                                                    Exchange [i_category,i_brand,cc_name,d_year,d_moy] #2
-                                                      WholeStageCodegen (7)
-                                                        HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,cs_sales_price] [sum,sum]
-                                                          Project [i_brand,i_category,cs_sales_price,d_year,d_moy,cc_name]
-                                                            SortMergeJoin [cs_item_sk,i_item_sk]
-                                                              InputAdapter
-                                                                WholeStageCodegen (4)
-                                                                  Sort [cs_item_sk]
+                      InputAdapter
+                        Exchange [i_category,i_brand,cc_name,rn] #1
+                          WholeStageCodegen (11)
+                            Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn]
+                              Filter [avg_monthly_sales,sum_sales]
+                                InputAdapter
+                                  Window [_w0,i_category,i_brand,cc_name,d_year]
+                                    WholeStageCodegen (10)
+                                      Filter [d_year]
+                                        InputAdapter
+                                          Window [d_year,d_moy,i_category,i_brand,cc_name]
+                                            WholeStageCodegen (9)
+                                              Sort [i_category,i_brand,cc_name,d_year,d_moy]
+                                                InputAdapter
+                                                  Exchange [i_category,i_brand,cc_name] #2
+                                                    WholeStageCodegen (8)
+                                                      HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,_w0,sum]
+                                                        InputAdapter
+                                                          Exchange [i_category,i_brand,cc_name,d_year,d_moy] #3
+                                                            WholeStageCodegen (7)
+                                                              HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,cs_sales_price] [sum,sum]
+                                                                Project [i_brand,i_category,cs_sales_price,d_year,d_moy,cc_name]
+                                                                  SortMergeJoin [cs_item_sk,i_item_sk]
                                                                     InputAdapter
-                                                                      Exchange [cs_item_sk] #3
-                                                                        WholeStageCodegen (3)
-                                                                          Project [cs_item_sk,cs_sales_price,d_year,d_moy,cc_name]
-                                                                            BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk]
-                                                                              Project [cs_call_center_sk,cs_item_sk,cs_sales_price,d_year,d_moy]
-                                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                  Filter [cs_item_sk,cs_call_center_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk]
-                                                                                          SubqueryBroadcast [d_date_sk] #1
-                                                                                            BroadcastExchange #4
-                                                                                              WholeStageCodegen (1)
-                                                                                                Filter [d_year,d_moy,d_date_sk]
-                                                                                                  ColumnarToRow
-                                                                                                    InputAdapter
-                                                                                                      Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                                                                                  InputAdapter
-                                                                                    ReusedExchange [d_date_sk,d_year,d_moy] #4
-                                                                              InputAdapter
-                                                                                BroadcastExchange #5
-                                                                                  WholeStageCodegen (2)
-                                                                                    Filter [cc_call_center_sk,cc_name]
-                                                                                      ColumnarToRow
+                                                                      WholeStageCodegen (4)
+                                                                        Sort [cs_item_sk]
+                                                                          InputAdapter
+                                                                            Exchange [cs_item_sk] #4
+                                                                              WholeStageCodegen (3)
+                                                                                Project [cs_item_sk,cs_sales_price,d_year,d_moy,cc_name]
+                                                                                  BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk]
+                                                                                    Project [cs_call_center_sk,cs_item_sk,cs_sales_price,d_year,d_moy]
+                                                                                      BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                                        Filter [cs_item_sk,cs_call_center_sk]
+                                                                                          ColumnarToRow
+                                                                                            InputAdapter
+                                                                                              Scan parquet default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk]
+                                                                                                SubqueryBroadcast [d_date_sk] #1
+                                                                                                  BroadcastExchange #5
+                                                                                                    WholeStageCodegen (1)
+                                                                                                      Filter [d_year,d_moy,d_date_sk]
+                                                                                                        ColumnarToRow
+                                                                                                          InputAdapter
+                                                                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
                                                                                         InputAdapter
-                                                                                          Scan parquet default.call_center [cc_call_center_sk,cc_name]
-                                                              InputAdapter
-                                                                WholeStageCodegen (6)
-                                                                  Sort [i_item_sk]
+                                                                                          ReusedExchange [d_date_sk,d_year,d_moy] #5
+                                                                                    InputAdapter
+                                                                                      BroadcastExchange #6
+                                                                                        WholeStageCodegen (2)
+                                                                                          Filter [cc_call_center_sk,cc_name]
+                                                                                            ColumnarToRow
+                                                                                              InputAdapter
+                                                                                                Scan parquet default.call_center [cc_call_center_sk,cc_name]
                                                                     InputAdapter
-                                                                      Exchange [i_item_sk] #6
-                                                                        WholeStageCodegen (5)
-                                                                          Filter [i_item_sk,i_category,i_brand]
-                                                                            ColumnarToRow
-                                                                              InputAdapter
-                                                                                Scan parquet default.item [i_item_sk,i_brand,i_category]
+                                                                      WholeStageCodegen (6)
+                                                                        Sort [i_item_sk]
+                                                                          InputAdapter
+                                                                            Exchange [i_item_sk] #7
+                                                                              WholeStageCodegen (5)
+                                                                                Filter [i_item_sk,i_category,i_brand]
+                                                                                  ColumnarToRow
+                                                                                    InputAdapter
+                                                                                      Scan parquet default.item [i_item_sk,i_brand,i_category]
                 InputAdapter
-                  WholeStageCodegen (21)
+                  WholeStageCodegen (23)
                     Sort [i_category,i_brand,cc_name,rn]
-                      Project [i_category,i_brand,cc_name,sum_sales,rn]
-                        InputAdapter
-                          Window [d_year,d_moy,i_category,i_brand,cc_name]
-                            WholeStageCodegen (20)
-                              Sort [i_category,i_brand,cc_name,d_year,d_moy]
-                                InputAdapter
-                                  Exchange [i_category,i_brand,cc_name] #7
-                                    WholeStageCodegen (19)
-                                      HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,sum]
-                                        InputAdapter
-                                          ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum] #2
+                      InputAdapter
+                        Exchange [i_category,i_brand,cc_name,rn] #8
+                          WholeStageCodegen (22)
+                            Project [i_category,i_brand,cc_name,sum_sales,rn]
+                              InputAdapter
+                                Window [d_year,d_moy,i_category,i_brand,cc_name]
+                                  WholeStageCodegen (21)
+                                    Sort [i_category,i_brand,cc_name,d_year,d_moy]
+                                      InputAdapter
+                                        Exchange [i_category,i_brand,cc_name] #9
+                                          WholeStageCodegen (20)
+                                            HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,sum]
+                                              InputAdapter
+                                                ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum] #3
         InputAdapter
-          WholeStageCodegen (32)
+          WholeStageCodegen (35)
             Sort [i_category,i_brand,cc_name,rn]
-              Project [i_category,i_brand,cc_name,sum_sales,rn]
-                InputAdapter
-                  Window [d_year,d_moy,i_category,i_brand,cc_name]
-                    WholeStageCodegen (31)
-                      Sort [i_category,i_brand,cc_name,d_year,d_moy]
-                        InputAdapter
-                          ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #7
+              InputAdapter
+                Exchange [i_category,i_brand,cc_name,rn] #10
+                  WholeStageCodegen (34)
+                    Project [i_category,i_brand,cc_name,sum_sales,rn]
+                      InputAdapter
+                        Window [d_year,d_moy,i_category,i_brand,cc_name]
+                          WholeStageCodegen (33)
+                            Sort [i_category,i_brand,cc_name,d_year,d_moy]
+                              InputAdapter
+                                ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #9
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt
index d4ecd7a94c66a..c6971f3ea904b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt
@@ -1,72 +1,74 @@
 == Physical Plan ==
-TakeOrderedAndProject (68)
-+- * HashAggregate (67)
-   +- Exchange (66)
-      +- * HashAggregate (65)
-         +- * Project (64)
-            +- * SortMergeJoin LeftOuter (63)
-               :- * Sort (56)
-               :  +- * Project (55)
-               :     +- * BroadcastHashJoin LeftOuter BuildRight (54)
-               :        :- * Project (49)
-               :        :  +- * SortMergeJoin Inner (48)
-               :        :     :- * Sort (36)
-               :        :     :  +- * Project (35)
-               :        :     :     +- * BroadcastHashJoin Inner BuildRight (34)
-               :        :     :        :- * Project (32)
-               :        :     :        :  +- * SortMergeJoin Inner (31)
-               :        :     :        :     :- * Sort (25)
-               :        :     :        :     :  +- Exchange (24)
-               :        :     :        :     :     +- * Project (23)
-               :        :     :        :     :        +- * BroadcastHashJoin Inner BuildRight (22)
-               :        :     :        :     :           :- * Project (17)
-               :        :     :        :     :           :  +- * BroadcastHashJoin Inner BuildRight (16)
-               :        :     :        :     :           :     :- * Project (10)
-               :        :     :        :     :           :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-               :        :     :        :     :           :     :     :- * Filter (3)
-               :        :     :        :     :           :     :     :  +- * ColumnarToRow (2)
-               :        :     :        :     :           :     :     :     +- Scan parquet default.catalog_sales (1)
-               :        :     :        :     :           :     :     +- BroadcastExchange (8)
-               :        :     :        :     :           :     :        +- * Project (7)
-               :        :     :        :     :           :     :           +- * Filter (6)
-               :        :     :        :     :           :     :              +- * ColumnarToRow (5)
-               :        :     :        :     :           :     :                 +- Scan parquet default.household_demographics (4)
-               :        :     :        :     :           :     +- BroadcastExchange (15)
-               :        :     :        :     :           :        +- * Project (14)
-               :        :     :        :     :           :           +- * Filter (13)
-               :        :     :        :     :           :              +- * ColumnarToRow (12)
-               :        :     :        :     :           :                 +- Scan parquet default.customer_demographics (11)
-               :        :     :        :     :           +- BroadcastExchange (21)
-               :        :     :        :     :              +- * Filter (20)
-               :        :     :        :     :                 +- * ColumnarToRow (19)
-               :        :     :        :     :                    +- Scan parquet default.date_dim (18)
-               :        :     :        :     +- * Sort (30)
-               :        :     :        :        +- Exchange (29)
-               :        :     :        :           +- * Filter (28)
-               :        :     :        :              +- * ColumnarToRow (27)
-               :        :     :        :                 +- Scan parquet default.item (26)
-               :        :     :        +- ReusedExchange (33)
-               :        :     +- * Sort (47)
-               :        :        +- Exchange (46)
-               :        :           +- * Project (45)
-               :        :              +- * BroadcastHashJoin Inner BuildRight (44)
-               :        :                 :- * Filter (39)
-               :        :                 :  +- * ColumnarToRow (38)
-               :        :                 :     +- Scan parquet default.inventory (37)
-               :        :                 +- BroadcastExchange (43)
-               :        :                    +- * Filter (42)
-               :        :                       +- * ColumnarToRow (41)
-               :        :                          +- Scan parquet default.warehouse (40)
-               :        +- BroadcastExchange (53)
-               :           +- * Filter (52)
-               :              +- * ColumnarToRow (51)
-               :                 +- Scan parquet default.promotion (50)
-               +- * Sort (62)
-                  +- Exchange (61)
-                     +- * Project (60)
-                        +- * Filter (59)
-                           +- * ColumnarToRow (58)
-                              +- Scan parquet default.catalog_returns (57)
+TakeOrderedAndProject (70)
++- * HashAggregate (69)
+   +- Exchange (68)
+      +- * HashAggregate (67)
+         +- * Project (66)
+            +- * SortMergeJoin LeftOuter (65)
+               :- * Sort (58)
+               :  +- Exchange (57)
+               :     +- * Project (56)
+               :        +- * BroadcastHashJoin LeftOuter BuildRight (55)
+               :           :- * Project (50)
+               :           :  +- * SortMergeJoin Inner (49)
+               :           :     :- * Sort (37)
+               :           :     :  +- Exchange (36)
+               :           :     :     +- * Project (35)
+               :           :     :        +- * BroadcastHashJoin Inner BuildRight (34)
+               :           :     :           :- * Project (32)
+               :           :     :           :  +- * SortMergeJoin Inner (31)
+               :           :     :           :     :- * Sort (25)
+               :           :     :           :     :  +- Exchange (24)
+               :           :     :           :     :     +- * Project (23)
+               :           :     :           :     :        +- * BroadcastHashJoin Inner BuildRight (22)
+               :           :     :           :     :           :- * Project (17)
+               :           :     :           :     :           :  +- * BroadcastHashJoin Inner BuildRight (16)
+               :           :     :           :     :           :     :- * Project (10)
+               :           :     :           :     :           :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+               :           :     :           :     :           :     :     :- * Filter (3)
+               :           :     :           :     :           :     :     :  +- * ColumnarToRow (2)
+               :           :     :           :     :           :     :     :     +- Scan parquet default.catalog_sales (1)
+               :           :     :           :     :           :     :     +- BroadcastExchange (8)
+               :           :     :           :     :           :     :        +- * Project (7)
+               :           :     :           :     :           :     :           +- * Filter (6)
+               :           :     :           :     :           :     :              +- * ColumnarToRow (5)
+               :           :     :           :     :           :     :                 +- Scan parquet default.household_demographics (4)
+               :           :     :           :     :           :     +- BroadcastExchange (15)
+               :           :     :           :     :           :        +- * Project (14)
+               :           :     :           :     :           :           +- * Filter (13)
+               :           :     :           :     :           :              +- * ColumnarToRow (12)
+               :           :     :           :     :           :                 +- Scan parquet default.customer_demographics (11)
+               :           :     :           :     :           +- BroadcastExchange (21)
+               :           :     :           :     :              +- * Filter (20)
+               :           :     :           :     :                 +- * ColumnarToRow (19)
+               :           :     :           :     :                    +- Scan parquet default.date_dim (18)
+               :           :     :           :     +- * Sort (30)
+               :           :     :           :        +- Exchange (29)
+               :           :     :           :           +- * Filter (28)
+               :           :     :           :              +- * ColumnarToRow (27)
+               :           :     :           :                 +- Scan parquet default.item (26)
+               :           :     :           +- ReusedExchange (33)
+               :           :     +- * Sort (48)
+               :           :        +- Exchange (47)
+               :           :           +- * Project (46)
+               :           :              +- * BroadcastHashJoin Inner BuildRight (45)
+               :           :                 :- * Filter (40)
+               :           :                 :  +- * ColumnarToRow (39)
+               :           :                 :     +- Scan parquet default.inventory (38)
+               :           :                 +- BroadcastExchange (44)
+               :           :                    +- * Filter (43)
+               :           :                       +- * ColumnarToRow (42)
+               :           :                          +- Scan parquet default.warehouse (41)
+               :           +- BroadcastExchange (54)
+               :              +- * Filter (53)
+               :                 +- * ColumnarToRow (52)
+               :                    +- Scan parquet default.promotion (51)
+               +- * Sort (64)
+                  +- Exchange (63)
+                     +- * Project (62)
+                        +- * Filter (61)
+                           +- * ColumnarToRow (60)
+                              +- Scan parquet default.catalog_returns (59)
 
 
 (1) Scan parquet default.catalog_sales
@@ -212,7 +214,7 @@ Join condition: None
 Output [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#17, i_item_desc#21]
 Input [8]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#17, i_item_sk#20, i_item_desc#21]
 
-(33) ReusedExchange [Reuses operator id: 79]
+(33) ReusedExchange [Reuses operator id: 81]
 Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26]
 
 (34) BroadcastHashJoin [codegen id : 10]
@@ -224,220 +226,228 @@ Join condition: (d_date#17 > date_add(d_date#24, 5))
 Output [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#21, d_week_seq#25, d_date_sk#26]
 Input [11]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#17, i_item_desc#21, d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26]
 
-(36) Sort [codegen id : 10]
+(36) Exchange
+Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#21, d_week_seq#25, d_date_sk#26]
+Arguments: hashpartitioning(cs_item_sk#4, d_date_sk#26, 5), ENSURE_REQUIREMENTS, [id=#27]
+
+(37) Sort [codegen id : 11]
 Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#21, d_week_seq#25, d_date_sk#26]
 Arguments: [cs_item_sk#4 ASC NULLS FIRST, d_date_sk#26 ASC NULLS FIRST], false, 0
 
-(37) Scan parquet default.inventory
-Output [4]: [inv_item_sk#27, inv_warehouse_sk#28, inv_quantity_on_hand#29, inv_date_sk#30]
+(38) Scan parquet default.inventory
+Output [4]: [inv_item_sk#28, inv_warehouse_sk#29, inv_quantity_on_hand#30, inv_date_sk#31]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(inv_date_sk#30), dynamicpruningexpression(true)]
+PartitionFilters: [isnotnull(inv_date_sk#31), dynamicpruningexpression(true)]
 PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)]
 ReadSchema: struct<inv_item_sk:int,inv_warehouse_sk:int,inv_quantity_on_hand:int>
 
-(38) ColumnarToRow [codegen id : 12]
-Input [4]: [inv_item_sk#27, inv_warehouse_sk#28, inv_quantity_on_hand#29, inv_date_sk#30]
+(39) ColumnarToRow [codegen id : 13]
+Input [4]: [inv_item_sk#28, inv_warehouse_sk#29, inv_quantity_on_hand#30, inv_date_sk#31]
 
-(39) Filter [codegen id : 12]
-Input [4]: [inv_item_sk#27, inv_warehouse_sk#28, inv_quantity_on_hand#29, inv_date_sk#30]
-Condition : ((isnotnull(inv_quantity_on_hand#29) AND isnotnull(inv_item_sk#27)) AND isnotnull(inv_warehouse_sk#28))
+(40) Filter [codegen id : 13]
+Input [4]: [inv_item_sk#28, inv_warehouse_sk#29, inv_quantity_on_hand#30, inv_date_sk#31]
+Condition : ((isnotnull(inv_quantity_on_hand#30) AND isnotnull(inv_item_sk#28)) AND isnotnull(inv_warehouse_sk#29))
 
-(40) Scan parquet default.warehouse
-Output [2]: [w_warehouse_sk#31, w_warehouse_name#32]
+(41) Scan parquet default.warehouse
+Output [2]: [w_warehouse_sk#32, w_warehouse_name#33]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
 PushedFilters: [IsNotNull(w_warehouse_sk)]
 ReadSchema: struct<w_warehouse_sk:int,w_warehouse_name:string>
 
-(41) ColumnarToRow [codegen id : 11]
-Input [2]: [w_warehouse_sk#31, w_warehouse_name#32]
+(42) ColumnarToRow [codegen id : 12]
+Input [2]: [w_warehouse_sk#32, w_warehouse_name#33]
 
-(42) Filter [codegen id : 11]
-Input [2]: [w_warehouse_sk#31, w_warehouse_name#32]
-Condition : isnotnull(w_warehouse_sk#31)
+(43) Filter [codegen id : 12]
+Input [2]: [w_warehouse_sk#32, w_warehouse_name#33]
+Condition : isnotnull(w_warehouse_sk#32)
 
-(43) BroadcastExchange
-Input [2]: [w_warehouse_sk#31, w_warehouse_name#32]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#33]
+(44) BroadcastExchange
+Input [2]: [w_warehouse_sk#32, w_warehouse_name#33]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#34]
 
-(44) BroadcastHashJoin [codegen id : 12]
-Left keys [1]: [inv_warehouse_sk#28]
-Right keys [1]: [w_warehouse_sk#31]
+(45) BroadcastHashJoin [codegen id : 13]
+Left keys [1]: [inv_warehouse_sk#29]
+Right keys [1]: [w_warehouse_sk#32]
 Join condition: None
 
-(45) Project [codegen id : 12]
-Output [4]: [inv_item_sk#27, inv_quantity_on_hand#29, inv_date_sk#30, w_warehouse_name#32]
-Input [6]: [inv_item_sk#27, inv_warehouse_sk#28, inv_quantity_on_hand#29, inv_date_sk#30, w_warehouse_sk#31, w_warehouse_name#32]
+(46) Project [codegen id : 13]
+Output [4]: [inv_item_sk#28, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_name#33]
+Input [6]: [inv_item_sk#28, inv_warehouse_sk#29, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_sk#32, w_warehouse_name#33]
 
-(46) Exchange
-Input [4]: [inv_item_sk#27, inv_quantity_on_hand#29, inv_date_sk#30, w_warehouse_name#32]
-Arguments: hashpartitioning(inv_item_sk#27, 5), ENSURE_REQUIREMENTS, [id=#34]
+(47) Exchange
+Input [4]: [inv_item_sk#28, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_name#33]
+Arguments: hashpartitioning(inv_item_sk#28, inv_date_sk#31, 5), ENSURE_REQUIREMENTS, [id=#35]
 
-(47) Sort [codegen id : 13]
-Input [4]: [inv_item_sk#27, inv_quantity_on_hand#29, inv_date_sk#30, w_warehouse_name#32]
-Arguments: [inv_item_sk#27 ASC NULLS FIRST, inv_date_sk#30 ASC NULLS FIRST], false, 0
+(48) Sort [codegen id : 14]
+Input [4]: [inv_item_sk#28, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_name#33]
+Arguments: [inv_item_sk#28 ASC NULLS FIRST, inv_date_sk#31 ASC NULLS FIRST], false, 0
 
-(48) SortMergeJoin [codegen id : 15]
+(49) SortMergeJoin [codegen id : 16]
 Left keys [2]: [cs_item_sk#4, d_date_sk#26]
-Right keys [2]: [inv_item_sk#27, inv_date_sk#30]
-Join condition: (inv_quantity_on_hand#29 < cs_quantity#7)
+Right keys [2]: [inv_item_sk#28, inv_date_sk#31]
+Join condition: (inv_quantity_on_hand#30 < cs_quantity#7)
 
-(49) Project [codegen id : 15]
-Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#32, i_item_desc#21, d_week_seq#25]
-Input [11]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#21, d_week_seq#25, d_date_sk#26, inv_item_sk#27, inv_quantity_on_hand#29, inv_date_sk#30, w_warehouse_name#32]
+(50) Project [codegen id : 16]
+Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25]
+Input [11]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#21, d_week_seq#25, d_date_sk#26, inv_item_sk#28, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_name#33]
 
-(50) Scan parquet default.promotion
-Output [1]: [p_promo_sk#35]
+(51) Scan parquet default.promotion
+Output [1]: [p_promo_sk#36]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
 PushedFilters: [IsNotNull(p_promo_sk)]
 ReadSchema: struct<p_promo_sk:int>
 
-(51) ColumnarToRow [codegen id : 14]
-Input [1]: [p_promo_sk#35]
+(52) ColumnarToRow [codegen id : 15]
+Input [1]: [p_promo_sk#36]
 
-(52) Filter [codegen id : 14]
-Input [1]: [p_promo_sk#35]
-Condition : isnotnull(p_promo_sk#35)
+(53) Filter [codegen id : 15]
+Input [1]: [p_promo_sk#36]
+Condition : isnotnull(p_promo_sk#36)
 
-(53) BroadcastExchange
-Input [1]: [p_promo_sk#35]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#36]
+(54) BroadcastExchange
+Input [1]: [p_promo_sk#36]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#37]
 
-(54) BroadcastHashJoin [codegen id : 15]
+(55) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [cs_promo_sk#5]
-Right keys [1]: [p_promo_sk#35]
+Right keys [1]: [p_promo_sk#36]
 Join condition: None
 
-(55) Project [codegen id : 15]
-Output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#32, i_item_desc#21, d_week_seq#25]
-Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#32, i_item_desc#21, d_week_seq#25, p_promo_sk#35]
+(56) Project [codegen id : 16]
+Output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25]
+Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25, p_promo_sk#36]
+
+(57) Exchange
+Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25]
+Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREMENTS, [id=#38]
 
-(56) Sort [codegen id : 15]
-Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#32, i_item_desc#21, d_week_seq#25]
+(58) Sort [codegen id : 17]
+Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25]
 Arguments: [cs_item_sk#4 ASC NULLS FIRST, cs_order_number#6 ASC NULLS FIRST], false, 0
 
-(57) Scan parquet default.catalog_returns
-Output [3]: [cr_item_sk#37, cr_order_number#38, cr_returned_date_sk#39]
+(59) Scan parquet default.catalog_returns
+Output [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
 PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)]
 ReadSchema: struct<cr_item_sk:int,cr_order_number:int>
 
-(58) ColumnarToRow [codegen id : 16]
-Input [3]: [cr_item_sk#37, cr_order_number#38, cr_returned_date_sk#39]
+(60) ColumnarToRow [codegen id : 18]
+Input [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41]
 
-(59) Filter [codegen id : 16]
-Input [3]: [cr_item_sk#37, cr_order_number#38, cr_returned_date_sk#39]
-Condition : (isnotnull(cr_item_sk#37) AND isnotnull(cr_order_number#38))
+(61) Filter [codegen id : 18]
+Input [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41]
+Condition : (isnotnull(cr_item_sk#39) AND isnotnull(cr_order_number#40))
 
-(60) Project [codegen id : 16]
-Output [2]: [cr_item_sk#37, cr_order_number#38]
-Input [3]: [cr_item_sk#37, cr_order_number#38, cr_returned_date_sk#39]
+(62) Project [codegen id : 18]
+Output [2]: [cr_item_sk#39, cr_order_number#40]
+Input [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41]
 
-(61) Exchange
-Input [2]: [cr_item_sk#37, cr_order_number#38]
-Arguments: hashpartitioning(cr_item_sk#37, 5), ENSURE_REQUIREMENTS, [id=#40]
+(63) Exchange
+Input [2]: [cr_item_sk#39, cr_order_number#40]
+Arguments: hashpartitioning(cr_item_sk#39, cr_order_number#40, 5), ENSURE_REQUIREMENTS, [id=#42]
 
-(62) Sort [codegen id : 17]
-Input [2]: [cr_item_sk#37, cr_order_number#38]
-Arguments: [cr_item_sk#37 ASC NULLS FIRST, cr_order_number#38 ASC NULLS FIRST], false, 0
+(64) Sort [codegen id : 19]
+Input [2]: [cr_item_sk#39, cr_order_number#40]
+Arguments: [cr_item_sk#39 ASC NULLS FIRST, cr_order_number#40 ASC NULLS FIRST], false, 0
 
-(63) SortMergeJoin [codegen id : 18]
+(65) SortMergeJoin [codegen id : 20]
 Left keys [2]: [cs_item_sk#4, cs_order_number#6]
-Right keys [2]: [cr_item_sk#37, cr_order_number#38]
+Right keys [2]: [cr_item_sk#39, cr_order_number#40]
 Join condition: None
 
-(64) Project [codegen id : 18]
-Output [3]: [w_warehouse_name#32, i_item_desc#21, d_week_seq#25]
-Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#32, i_item_desc#21, d_week_seq#25, cr_item_sk#37, cr_order_number#38]
+(66) Project [codegen id : 20]
+Output [3]: [w_warehouse_name#33, i_item_desc#21, d_week_seq#25]
+Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25, cr_item_sk#39, cr_order_number#40]
 
-(65) HashAggregate [codegen id : 18]
-Input [3]: [w_warehouse_name#32, i_item_desc#21, d_week_seq#25]
-Keys [3]: [i_item_desc#21, w_warehouse_name#32, d_week_seq#25]
+(67) HashAggregate [codegen id : 20]
+Input [3]: [w_warehouse_name#33, i_item_desc#21, d_week_seq#25]
+Keys [3]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25]
 Functions [1]: [partial_count(1)]
-Aggregate Attributes [1]: [count#41]
-Results [4]: [i_item_desc#21, w_warehouse_name#32, d_week_seq#25, count#42]
+Aggregate Attributes [1]: [count#43]
+Results [4]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, count#44]
 
-(66) Exchange
-Input [4]: [i_item_desc#21, w_warehouse_name#32, d_week_seq#25, count#42]
-Arguments: hashpartitioning(i_item_desc#21, w_warehouse_name#32, d_week_seq#25, 5), ENSURE_REQUIREMENTS, [id=#43]
+(68) Exchange
+Input [4]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, count#44]
+Arguments: hashpartitioning(i_item_desc#21, w_warehouse_name#33, d_week_seq#25, 5), ENSURE_REQUIREMENTS, [id=#45]
 
-(67) HashAggregate [codegen id : 19]
-Input [4]: [i_item_desc#21, w_warehouse_name#32, d_week_seq#25, count#42]
-Keys [3]: [i_item_desc#21, w_warehouse_name#32, d_week_seq#25]
+(69) HashAggregate [codegen id : 21]
+Input [4]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, count#44]
+Keys [3]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25]
 Functions [1]: [count(1)]
-Aggregate Attributes [1]: [count(1)#44]
-Results [6]: [i_item_desc#21, w_warehouse_name#32, d_week_seq#25, count(1)#44 AS no_promo#45, count(1)#44 AS promo#46, count(1)#44 AS total_cnt#47]
+Aggregate Attributes [1]: [count(1)#46]
+Results [6]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, count(1)#46 AS no_promo#47, count(1)#46 AS promo#48, count(1)#46 AS total_cnt#49]
 
-(68) TakeOrderedAndProject
-Input [6]: [i_item_desc#21, w_warehouse_name#32, d_week_seq#25, no_promo#45, promo#46, total_cnt#47]
-Arguments: 100, [total_cnt#47 DESC NULLS LAST, i_item_desc#21 ASC NULLS FIRST, w_warehouse_name#32 ASC NULLS FIRST, d_week_seq#25 ASC NULLS FIRST], [i_item_desc#21, w_warehouse_name#32, d_week_seq#25, no_promo#45, promo#46, total_cnt#47]
+(70) TakeOrderedAndProject
+Input [6]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, no_promo#47, promo#48, total_cnt#49]
+Arguments: 100, [total_cnt#49 DESC NULLS LAST, i_item_desc#21 ASC NULLS FIRST, w_warehouse_name#33 ASC NULLS FIRST, d_week_seq#25 ASC NULLS FIRST], [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, no_promo#47, promo#48, total_cnt#49]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = cs_sold_date_sk#8 IN dynamicpruning#9
-BroadcastExchange (79)
-+- * Project (78)
-   +- * BroadcastHashJoin Inner BuildLeft (77)
-      :- BroadcastExchange (73)
-      :  +- * Project (72)
-      :     +- * Filter (71)
-      :        +- * ColumnarToRow (70)
-      :           +- Scan parquet default.date_dim (69)
-      +- * Filter (76)
-         +- * ColumnarToRow (75)
-            +- Scan parquet default.date_dim (74)
-
-
-(69) Scan parquet default.date_dim
-Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#48]
+BroadcastExchange (81)
++- * Project (80)
+   +- * BroadcastHashJoin Inner BuildLeft (79)
+      :- BroadcastExchange (75)
+      :  +- * Project (74)
+      :     +- * Filter (73)
+      :        +- * ColumnarToRow (72)
+      :           +- Scan parquet default.date_dim (71)
+      +- * Filter (78)
+         +- * ColumnarToRow (77)
+            +- Scan parquet default.date_dim (76)
+
+
+(71) Scan parquet default.date_dim
+Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#50]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)]
 ReadSchema: struct<d_date_sk:int,d_date:date,d_week_seq:int,d_year:int>
 
-(70) ColumnarToRow [codegen id : 1]
-Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#48]
+(72) ColumnarToRow [codegen id : 1]
+Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#50]
 
-(71) Filter [codegen id : 1]
-Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#48]
-Condition : ((((isnotnull(d_year#48) AND (d_year#48 = 1999)) AND isnotnull(d_date_sk#23)) AND isnotnull(d_week_seq#25)) AND isnotnull(d_date#24))
+(73) Filter [codegen id : 1]
+Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#50]
+Condition : ((((isnotnull(d_year#50) AND (d_year#50 = 1999)) AND isnotnull(d_date_sk#23)) AND isnotnull(d_week_seq#25)) AND isnotnull(d_date#24))
 
-(72) Project [codegen id : 1]
+(74) Project [codegen id : 1]
 Output [3]: [d_date_sk#23, d_date#24, d_week_seq#25]
-Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#48]
+Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#50]
 
-(73) BroadcastExchange
+(75) BroadcastExchange
 Input [3]: [d_date_sk#23, d_date#24, d_week_seq#25]
-Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#49]
+Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#51]
 
-(74) Scan parquet default.date_dim
-Output [2]: [d_date_sk#26, d_week_seq#50]
+(76) Scan parquet default.date_dim
+Output [2]: [d_date_sk#26, d_week_seq#52]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_week_seq:int>
 
-(75) ColumnarToRow
-Input [2]: [d_date_sk#26, d_week_seq#50]
+(77) ColumnarToRow
+Input [2]: [d_date_sk#26, d_week_seq#52]
 
-(76) Filter
-Input [2]: [d_date_sk#26, d_week_seq#50]
-Condition : (isnotnull(d_week_seq#50) AND isnotnull(d_date_sk#26))
+(78) Filter
+Input [2]: [d_date_sk#26, d_week_seq#52]
+Condition : (isnotnull(d_week_seq#52) AND isnotnull(d_date_sk#26))
 
-(77) BroadcastHashJoin [codegen id : 2]
+(79) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [d_week_seq#25]
-Right keys [1]: [d_week_seq#50]
+Right keys [1]: [d_week_seq#52]
 Join condition: None
 
-(78) Project [codegen id : 2]
+(80) Project [codegen id : 2]
 Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26]
-Input [5]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26, d_week_seq#50]
+Input [5]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26, d_week_seq#52]
 
-(79) BroadcastExchange
+(81) BroadcastExchange
 Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#51]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#53]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt
index d84393b2ff106..e838025a71db8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt
@@ -1,126 +1,132 @@
 TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_promo,promo]
-  WholeStageCodegen (19)
+  WholeStageCodegen (21)
     HashAggregate [i_item_desc,w_warehouse_name,d_week_seq,count] [count(1),no_promo,promo,total_cnt,count]
       InputAdapter
         Exchange [i_item_desc,w_warehouse_name,d_week_seq] #1
-          WholeStageCodegen (18)
+          WholeStageCodegen (20)
             HashAggregate [i_item_desc,w_warehouse_name,d_week_seq] [count,count]
               Project [w_warehouse_name,i_item_desc,d_week_seq]
                 SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number]
                   InputAdapter
-                    WholeStageCodegen (15)
+                    WholeStageCodegen (17)
                       Sort [cs_item_sk,cs_order_number]
-                        Project [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq]
-                          BroadcastHashJoin [cs_promo_sk,p_promo_sk]
-                            Project [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq]
-                              SortMergeJoin [cs_item_sk,d_date_sk,inv_item_sk,inv_date_sk,inv_quantity_on_hand,cs_quantity]
-                                InputAdapter
-                                  WholeStageCodegen (10)
-                                    Sort [cs_item_sk,d_date_sk]
-                                      Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,i_item_desc,d_week_seq,d_date_sk]
-                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk,d_date,d_date]
-                                          Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk,d_date,i_item_desc]
-                                            SortMergeJoin [cs_item_sk,i_item_sk]
-                                              InputAdapter
-                                                WholeStageCodegen (5)
-                                                  Sort [cs_item_sk]
-                                                    InputAdapter
-                                                      Exchange [cs_item_sk] #2
-                                                        WholeStageCodegen (4)
-                                                          Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk,d_date]
-                                                            BroadcastHashJoin [cs_ship_date_sk,d_date_sk]
-                                                              Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
-                                                                BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk]
-                                                                  Project [cs_ship_date_sk,cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
-                                                                    BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk]
-                                                                      Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_ship_date_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.catalog_sales [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
-                                                                              SubqueryBroadcast [d_date_sk] #1
-                                                                                BroadcastExchange #3
-                                                                                  WholeStageCodegen (2)
-                                                                                    Project [d_date_sk,d_date,d_week_seq,d_date_sk]
-                                                                                      BroadcastHashJoin [d_week_seq,d_week_seq]
-                                                                                        InputAdapter
-                                                                                          BroadcastExchange #4
-                                                                                            WholeStageCodegen (1)
-                                                                                              Project [d_date_sk,d_date,d_week_seq]
-                                                                                                Filter [d_year,d_date_sk,d_week_seq,d_date]
-                                                                                                  ColumnarToRow
+                        InputAdapter
+                          Exchange [cs_item_sk,cs_order_number] #2
+                            WholeStageCodegen (16)
+                              Project [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq]
+                                BroadcastHashJoin [cs_promo_sk,p_promo_sk]
+                                  Project [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq]
+                                    SortMergeJoin [cs_item_sk,d_date_sk,inv_item_sk,inv_date_sk,inv_quantity_on_hand,cs_quantity]
+                                      InputAdapter
+                                        WholeStageCodegen (11)
+                                          Sort [cs_item_sk,d_date_sk]
+                                            InputAdapter
+                                              Exchange [cs_item_sk,d_date_sk] #3
+                                                WholeStageCodegen (10)
+                                                  Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,i_item_desc,d_week_seq,d_date_sk]
+                                                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk,d_date,d_date]
+                                                      Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk,d_date,i_item_desc]
+                                                        SortMergeJoin [cs_item_sk,i_item_sk]
+                                                          InputAdapter
+                                                            WholeStageCodegen (5)
+                                                              Sort [cs_item_sk]
+                                                                InputAdapter
+                                                                  Exchange [cs_item_sk] #4
+                                                                    WholeStageCodegen (4)
+                                                                      Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk,d_date]
+                                                                        BroadcastHashJoin [cs_ship_date_sk,d_date_sk]
+                                                                          Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
+                                                                            BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk]
+                                                                              Project [cs_ship_date_sk,cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
+                                                                                BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk]
+                                                                                  Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_ship_date_sk]
+                                                                                    ColumnarToRow
+                                                                                      InputAdapter
+                                                                                        Scan parquet default.catalog_sales [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
+                                                                                          SubqueryBroadcast [d_date_sk] #1
+                                                                                            BroadcastExchange #5
+                                                                                              WholeStageCodegen (2)
+                                                                                                Project [d_date_sk,d_date,d_week_seq,d_date_sk]
+                                                                                                  BroadcastHashJoin [d_week_seq,d_week_seq]
                                                                                                     InputAdapter
-                                                                                                      Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year]
-                                                                                        Filter [d_week_seq,d_date_sk]
-                                                                                          ColumnarToRow
-                                                                                            InputAdapter
-                                                                                              Scan parquet default.date_dim [d_date_sk,d_week_seq]
-                                                                      InputAdapter
-                                                                        BroadcastExchange #5
-                                                                          WholeStageCodegen (1)
-                                                                            Project [hd_demo_sk]
-                                                                              Filter [hd_buy_potential,hd_demo_sk]
-                                                                                ColumnarToRow
+                                                                                                      BroadcastExchange #6
+                                                                                                        WholeStageCodegen (1)
+                                                                                                          Project [d_date_sk,d_date,d_week_seq]
+                                                                                                            Filter [d_year,d_date_sk,d_week_seq,d_date]
+                                                                                                              ColumnarToRow
+                                                                                                                InputAdapter
+                                                                                                                  Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year]
+                                                                                                    Filter [d_week_seq,d_date_sk]
+                                                                                                      ColumnarToRow
+                                                                                                        InputAdapter
+                                                                                                          Scan parquet default.date_dim [d_date_sk,d_week_seq]
                                                                                   InputAdapter
-                                                                                    Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential]
-                                                                  InputAdapter
-                                                                    BroadcastExchange #6
-                                                                      WholeStageCodegen (2)
-                                                                        Project [cd_demo_sk]
-                                                                          Filter [cd_marital_status,cd_demo_sk]
-                                                                            ColumnarToRow
+                                                                                    BroadcastExchange #7
+                                                                                      WholeStageCodegen (1)
+                                                                                        Project [hd_demo_sk]
+                                                                                          Filter [hd_buy_potential,hd_demo_sk]
+                                                                                            ColumnarToRow
+                                                                                              InputAdapter
+                                                                                                Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential]
                                                                               InputAdapter
-                                                                                Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status]
-                                                              InputAdapter
-                                                                BroadcastExchange #7
-                                                                  WholeStageCodegen (3)
-                                                                    Filter [d_date,d_date_sk]
-                                                                      ColumnarToRow
-                                                                        InputAdapter
-                                                                          Scan parquet default.date_dim [d_date_sk,d_date]
-                                              InputAdapter
-                                                WholeStageCodegen (7)
-                                                  Sort [i_item_sk]
-                                                    InputAdapter
-                                                      Exchange [i_item_sk] #8
-                                                        WholeStageCodegen (6)
-                                                          Filter [i_item_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.item [i_item_sk,i_item_desc]
-                                          InputAdapter
-                                            ReusedExchange [d_date_sk,d_date,d_week_seq,d_date_sk] #3
-                                InputAdapter
-                                  WholeStageCodegen (13)
-                                    Sort [inv_item_sk,inv_date_sk]
+                                                                                BroadcastExchange #8
+                                                                                  WholeStageCodegen (2)
+                                                                                    Project [cd_demo_sk]
+                                                                                      Filter [cd_marital_status,cd_demo_sk]
+                                                                                        ColumnarToRow
+                                                                                          InputAdapter
+                                                                                            Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status]
+                                                                          InputAdapter
+                                                                            BroadcastExchange #9
+                                                                              WholeStageCodegen (3)
+                                                                                Filter [d_date,d_date_sk]
+                                                                                  ColumnarToRow
+                                                                                    InputAdapter
+                                                                                      Scan parquet default.date_dim [d_date_sk,d_date]
+                                                          InputAdapter
+                                                            WholeStageCodegen (7)
+                                                              Sort [i_item_sk]
+                                                                InputAdapter
+                                                                  Exchange [i_item_sk] #10
+                                                                    WholeStageCodegen (6)
+                                                                      Filter [i_item_sk]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.item [i_item_sk,i_item_desc]
+                                                      InputAdapter
+                                                        ReusedExchange [d_date_sk,d_date,d_week_seq,d_date_sk] #5
                                       InputAdapter
-                                        Exchange [inv_item_sk] #9
-                                          WholeStageCodegen (12)
-                                            Project [inv_item_sk,inv_quantity_on_hand,inv_date_sk,w_warehouse_name]
-                                              BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk]
-                                                Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk]
-                                                  ColumnarToRow
-                                                    InputAdapter
-                                                      Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
-                                                InputAdapter
-                                                  BroadcastExchange #10
-                                                    WholeStageCodegen (11)
-                                                      Filter [w_warehouse_sk]
+                                        WholeStageCodegen (14)
+                                          Sort [inv_item_sk,inv_date_sk]
+                                            InputAdapter
+                                              Exchange [inv_item_sk,inv_date_sk] #11
+                                                WholeStageCodegen (13)
+                                                  Project [inv_item_sk,inv_quantity_on_hand,inv_date_sk,w_warehouse_name]
+                                                    BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk]
+                                                      Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
-                            InputAdapter
-                              BroadcastExchange #11
-                                WholeStageCodegen (14)
-                                  Filter [p_promo_sk]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.promotion [p_promo_sk]
+                                                            Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                                      InputAdapter
+                                                        BroadcastExchange #12
+                                                          WholeStageCodegen (12)
+                                                            Filter [w_warehouse_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
+                                  InputAdapter
+                                    BroadcastExchange #13
+                                      WholeStageCodegen (15)
+                                        Filter [p_promo_sk]
+                                          ColumnarToRow
+                                            InputAdapter
+                                              Scan parquet default.promotion [p_promo_sk]
                   InputAdapter
-                    WholeStageCodegen (17)
+                    WholeStageCodegen (19)
                       Sort [cr_item_sk,cr_order_number]
                         InputAdapter
-                          Exchange [cr_item_sk] #12
-                            WholeStageCodegen (16)
+                          Exchange [cr_item_sk,cr_order_number] #14
+                            WholeStageCodegen (18)
                               Project [cr_item_sk,cr_order_number]
                                 Filter [cr_item_sk,cr_order_number]
                                   ColumnarToRow
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt
index 332a0b9220538..c08379b07b397 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt
@@ -273,37 +273,34 @@ Arguments: [c_last_name#15 ASC NULLS FIRST, c_first_name#14 ASC NULLS FIRST, s_s
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquery#48, [id=#49]
-* HashAggregate (79)
-+- Exchange (78)
-   +- * HashAggregate (77)
-      +- * HashAggregate (76)
-         +- Exchange (75)
-            +- * HashAggregate (74)
-               +- * Project (73)
-                  +- * SortMergeJoin Inner (72)
-                     :- * Sort (65)
-                     :  +- * Project (64)
-                     :     +- * SortMergeJoin Inner (63)
-                     :        :- * Sort (57)
-                     :        :  +- Exchange (56)
-                     :        :     +- * Project (55)
-                     :        :        +- * BroadcastHashJoin Inner BuildLeft (54)
-                     :        :           :- ReusedExchange (49)
-                     :        :           +- * Project (53)
-                     :        :              +- * Filter (52)
-                     :        :                 +- * ColumnarToRow (51)
-                     :        :                    +- Scan parquet default.store_sales (50)
-                     :        +- * Sort (62)
-                     :           +- Exchange (61)
-                     :              +- * Filter (60)
-                     :                 +- * ColumnarToRow (59)
-                     :                    +- Scan parquet default.item (58)
-                     +- * Sort (71)
-                        +- Exchange (70)
-                           +- * Project (69)
-                              +- * Filter (68)
-                                 +- * ColumnarToRow (67)
-                                    +- Scan parquet default.store_returns (66)
+* HashAggregate (76)
++- Exchange (75)
+   +- * HashAggregate (74)
+      +- * HashAggregate (73)
+         +- Exchange (72)
+            +- * HashAggregate (71)
+               +- * Project (70)
+                  +- * SortMergeJoin Inner (69)
+                     :- * Sort (66)
+                     :  +- Exchange (65)
+                     :     +- * Project (64)
+                     :        +- * SortMergeJoin Inner (63)
+                     :           :- * Sort (57)
+                     :           :  +- Exchange (56)
+                     :           :     +- * Project (55)
+                     :           :        +- * BroadcastHashJoin Inner BuildLeft (54)
+                     :           :           :- ReusedExchange (49)
+                     :           :           +- * Project (53)
+                     :           :              +- * Filter (52)
+                     :           :                 +- * ColumnarToRow (51)
+                     :           :                    +- Scan parquet default.store_sales (50)
+                     :           +- * Sort (62)
+                     :              +- Exchange (61)
+                     :                 +- * Filter (60)
+                     :                    +- * ColumnarToRow (59)
+                     :                       +- Scan parquet default.item (58)
+                     +- * Sort (68)
+                        +- ReusedExchange (67)
 
 
 (49) ReusedExchange [Reuses operator id: 17]
@@ -375,75 +372,60 @@ Join condition: None
 Output [13]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29]
 Input [14]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_item_sk#24, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29]
 
-(65) Sort [codegen id : 8]
+(65) Exchange
 Input [13]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29]
-Arguments: [ss_ticket_number#21 ASC NULLS FIRST, ss_item_sk#18 ASC NULLS FIRST], false, 0
-
-(66) Scan parquet default.store_returns
-Output [3]: [sr_item_sk#32, sr_ticket_number#33, sr_returned_date_sk#34]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/store_returns]
-PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)]
-ReadSchema: struct<sr_item_sk:int,sr_ticket_number:int>
-
-(67) ColumnarToRow [codegen id : 9]
-Input [3]: [sr_item_sk#32, sr_ticket_number#33, sr_returned_date_sk#34]
+Arguments: hashpartitioning(ss_ticket_number#21, ss_item_sk#18, 5), ENSURE_REQUIREMENTS, [id=#53]
 
-(68) Filter [codegen id : 9]
-Input [3]: [sr_item_sk#32, sr_ticket_number#33, sr_returned_date_sk#34]
-Condition : (isnotnull(sr_ticket_number#33) AND isnotnull(sr_item_sk#32))
+(66) Sort [codegen id : 9]
+Input [13]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29]
+Arguments: [ss_ticket_number#21 ASC NULLS FIRST, ss_item_sk#18 ASC NULLS FIRST], false, 0
 
-(69) Project [codegen id : 9]
+(67) ReusedExchange [Reuses operator id: 36]
 Output [2]: [sr_item_sk#32, sr_ticket_number#33]
-Input [3]: [sr_item_sk#32, sr_ticket_number#33, sr_returned_date_sk#34]
 
-(70) Exchange
-Input [2]: [sr_item_sk#32, sr_ticket_number#33]
-Arguments: hashpartitioning(sr_item_sk#32, 5), ENSURE_REQUIREMENTS, [id=#53]
-
-(71) Sort [codegen id : 10]
+(68) Sort [codegen id : 11]
 Input [2]: [sr_item_sk#32, sr_ticket_number#33]
 Arguments: [sr_ticket_number#33 ASC NULLS FIRST, sr_item_sk#32 ASC NULLS FIRST], false, 0
 
-(72) SortMergeJoin [codegen id : 11]
+(69) SortMergeJoin [codegen id : 12]
 Left keys [2]: [ss_ticket_number#21, ss_item_sk#18]
 Right keys [2]: [sr_ticket_number#33, sr_item_sk#32]
 Join condition: None
 
-(73) Project [codegen id : 11]
+(70) Project [codegen id : 12]
 Output [11]: [ss_net_paid#22, s_store_name#2, s_state#4, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29, c_first_name#14, c_last_name#15, ca_state#8]
 Input [15]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29, sr_item_sk#32, sr_ticket_number#33]
 
-(74) HashAggregate [codegen id : 11]
+(71) HashAggregate [codegen id : 12]
 Input [11]: [ss_net_paid#22, s_store_name#2, s_state#4, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29, c_first_name#14, c_last_name#15, ca_state#8]
 Keys [10]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#27, i_current_price#25, i_manager_id#29, i_units#28, i_size#26]
 Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#22))]
 Aggregate Attributes [1]: [sum#54]
 Results [11]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#27, i_current_price#25, i_manager_id#29, i_units#28, i_size#26, sum#55]
 
-(75) Exchange
+(72) Exchange
 Input [11]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#27, i_current_price#25, i_manager_id#29, i_units#28, i_size#26, sum#55]
 Arguments: hashpartitioning(c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#27, i_current_price#25, i_manager_id#29, i_units#28, i_size#26, 5), ENSURE_REQUIREMENTS, [id=#56]
 
-(76) HashAggregate [codegen id : 12]
+(73) HashAggregate [codegen id : 13]
 Input [11]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#27, i_current_price#25, i_manager_id#29, i_units#28, i_size#26, sum#55]
 Keys [10]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#27, i_current_price#25, i_manager_id#29, i_units#28, i_size#26]
 Functions [1]: [sum(UnscaledValue(ss_net_paid#22))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#22))#39]
 Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#22))#39,17,2) AS netpaid#40]
 
-(77) HashAggregate [codegen id : 12]
+(74) HashAggregate [codegen id : 13]
 Input [1]: [netpaid#40]
 Keys: []
 Functions [1]: [partial_avg(netpaid#40)]
 Aggregate Attributes [2]: [sum#57, count#58]
 Results [2]: [sum#59, count#60]
 
-(78) Exchange
+(75) Exchange
 Input [2]: [sum#59, count#60]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#61]
 
-(79) HashAggregate [codegen id : 13]
+(76) HashAggregate [codegen id : 14]
 Input [2]: [sum#59, count#60]
 Keys: []
 Functions [1]: [avg(netpaid#40)]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/simplified.txt
index d12b734269651..4beebcbbe52ef 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/simplified.txt
@@ -5,60 +5,57 @@ WholeStageCodegen (12)
         WholeStageCodegen (11)
           Filter [paid]
             Subquery #1
-              WholeStageCodegen (13)
+              WholeStageCodegen (14)
                 HashAggregate [sum,count] [avg(netpaid),(0.05 * avg(netpaid)),sum,count]
                   InputAdapter
                     Exchange #10
-                      WholeStageCodegen (12)
+                      WholeStageCodegen (13)
                         HashAggregate [netpaid] [sum,count,sum,count]
                           HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum]
                             InputAdapter
                               Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #11
-                                WholeStageCodegen (11)
+                                WholeStageCodegen (12)
                                   HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum]
                                     Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state]
                                       SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
                                         InputAdapter
-                                          WholeStageCodegen (8)
+                                          WholeStageCodegen (9)
                                             Sort [ss_ticket_number,ss_item_sk]
-                                              Project [s_store_name,s_state,ca_state,c_first_name,c_last_name,ss_item_sk,ss_ticket_number,ss_net_paid,i_current_price,i_size,i_color,i_units,i_manager_id]
-                                                SortMergeJoin [ss_item_sk,i_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (5)
-                                                      Sort [ss_item_sk]
+                                              InputAdapter
+                                                Exchange [ss_ticket_number,ss_item_sk] #12
+                                                  WholeStageCodegen (8)
+                                                    Project [s_store_name,s_state,ca_state,c_first_name,c_last_name,ss_item_sk,ss_ticket_number,ss_net_paid,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                      SortMergeJoin [ss_item_sk,i_item_sk]
                                                         InputAdapter
-                                                          Exchange [ss_item_sk] #12
-                                                            WholeStageCodegen (4)
-                                                              Project [s_store_name,s_state,ca_state,c_first_name,c_last_name,ss_item_sk,ss_ticket_number,ss_net_paid]
-                                                                BroadcastHashJoin [s_store_sk,c_customer_sk,ss_store_sk,ss_customer_sk]
-                                                                  InputAdapter
-                                                                    ReusedExchange [s_store_sk,s_store_name,s_state,ca_state,c_customer_sk,c_first_name,c_last_name] #5
-                                                                  Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid]
-                                                                    Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk]
-                                                                      ColumnarToRow
+                                                          WholeStageCodegen (5)
+                                                            Sort [ss_item_sk]
+                                                              InputAdapter
+                                                                Exchange [ss_item_sk] #13
+                                                                  WholeStageCodegen (4)
+                                                                    Project [s_store_name,s_state,ca_state,c_first_name,c_last_name,ss_item_sk,ss_ticket_number,ss_net_paid]
+                                                                      BroadcastHashJoin [s_store_sk,c_customer_sk,ss_store_sk,ss_customer_sk]
                                                                         InputAdapter
-                                                                          Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (7)
-                                                      Sort [i_item_sk]
+                                                                          ReusedExchange [s_store_sk,s_store_name,s_state,ca_state,c_customer_sk,c_first_name,c_last_name] #5
+                                                                        Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid]
+                                                                          Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk]
+                                                                            ColumnarToRow
+                                                                              InputAdapter
+                                                                                Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
                                                         InputAdapter
-                                                          Exchange [i_item_sk] #13
-                                                            WholeStageCodegen (6)
-                                                              Filter [i_item_sk]
-                                                                ColumnarToRow
-                                                                  InputAdapter
-                                                                    Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                          WholeStageCodegen (7)
+                                                            Sort [i_item_sk]
+                                                              InputAdapter
+                                                                Exchange [i_item_sk] #14
+                                                                  WholeStageCodegen (6)
+                                                                    Filter [i_item_sk]
+                                                                      ColumnarToRow
+                                                                        InputAdapter
+                                                                          Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
                                         InputAdapter
-                                          WholeStageCodegen (10)
+                                          WholeStageCodegen (11)
                                             Sort [sr_ticket_number,sr_item_sk]
                                               InputAdapter
-                                                Exchange [sr_item_sk] #14
-                                                  WholeStageCodegen (9)
-                                                    Project [sr_item_sk,sr_ticket_number]
-                                                      Filter [sr_ticket_number,sr_item_sk]
-                                                        ColumnarToRow
-                                                          InputAdapter
-                                                            Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                ReusedExchange [sr_item_sk,sr_ticket_number] #9
             HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum,isEmpty]
               InputAdapter
                 Exchange [c_last_name,c_first_name,s_store_name] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt
index 51b2f051403e6..4566f30b27d04 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt
@@ -1,53 +1,56 @@
 == Physical Plan ==
-TakeOrderedAndProject (49)
-+- * Project (48)
-   +- * SortMergeJoin Inner (47)
-      :- * Project (41)
-      :  +- * SortMergeJoin Inner (40)
-      :     :- * Sort (32)
-      :     :  +- * Project (31)
-      :     :     +- * Filter (30)
-      :     :        +- Window (29)
-      :     :           +- * Filter (28)
-      :     :              +- Window (27)
-      :     :                 +- * Sort (26)
-      :     :                    +- Exchange (25)
-      :     :                       +- * HashAggregate (24)
-      :     :                          +- Exchange (23)
-      :     :                             +- * HashAggregate (22)
-      :     :                                +- * Project (21)
-      :     :                                   +- * SortMergeJoin Inner (20)
-      :     :                                      :- * Sort (14)
-      :     :                                      :  +- Exchange (13)
-      :     :                                      :     +- * Project (12)
-      :     :                                      :        +- * BroadcastHashJoin Inner BuildRight (11)
-      :     :                                      :           :- * Project (6)
-      :     :                                      :           :  +- * BroadcastHashJoin Inner BuildRight (5)
-      :     :                                      :           :     :- * Filter (3)
-      :     :                                      :           :     :  +- * ColumnarToRow (2)
-      :     :                                      :           :     :     +- Scan parquet default.store_sales (1)
-      :     :                                      :           :     +- ReusedExchange (4)
-      :     :                                      :           +- BroadcastExchange (10)
-      :     :                                      :              +- * Filter (9)
-      :     :                                      :                 +- * ColumnarToRow (8)
-      :     :                                      :                    +- Scan parquet default.store (7)
-      :     :                                      +- * Sort (19)
-      :     :                                         +- Exchange (18)
-      :     :                                            +- * Filter (17)
-      :     :                                               +- * ColumnarToRow (16)
-      :     :                                                  +- Scan parquet default.item (15)
-      :     +- * Sort (39)
-      :        +- * Project (38)
-      :           +- Window (37)
-      :              +- * Sort (36)
-      :                 +- Exchange (35)
-      :                    +- * HashAggregate (34)
-      :                       +- ReusedExchange (33)
-      +- * Sort (46)
-         +- * Project (45)
-            +- Window (44)
-               +- * Sort (43)
-                  +- ReusedExchange (42)
+TakeOrderedAndProject (52)
++- * Project (51)
+   +- * SortMergeJoin Inner (50)
+      :- * Project (43)
+      :  +- * SortMergeJoin Inner (42)
+      :     :- * Sort (33)
+      :     :  +- Exchange (32)
+      :     :     +- * Project (31)
+      :     :        +- * Filter (30)
+      :     :           +- Window (29)
+      :     :              +- * Filter (28)
+      :     :                 +- Window (27)
+      :     :                    +- * Sort (26)
+      :     :                       +- Exchange (25)
+      :     :                          +- * HashAggregate (24)
+      :     :                             +- Exchange (23)
+      :     :                                +- * HashAggregate (22)
+      :     :                                   +- * Project (21)
+      :     :                                      +- * SortMergeJoin Inner (20)
+      :     :                                         :- * Sort (14)
+      :     :                                         :  +- Exchange (13)
+      :     :                                         :     +- * Project (12)
+      :     :                                         :        +- * BroadcastHashJoin Inner BuildRight (11)
+      :     :                                         :           :- * Project (6)
+      :     :                                         :           :  +- * BroadcastHashJoin Inner BuildRight (5)
+      :     :                                         :           :     :- * Filter (3)
+      :     :                                         :           :     :  +- * ColumnarToRow (2)
+      :     :                                         :           :     :     +- Scan parquet default.store_sales (1)
+      :     :                                         :           :     +- ReusedExchange (4)
+      :     :                                         :           +- BroadcastExchange (10)
+      :     :                                         :              +- * Filter (9)
+      :     :                                         :                 +- * ColumnarToRow (8)
+      :     :                                         :                    +- Scan parquet default.store (7)
+      :     :                                         +- * Sort (19)
+      :     :                                            +- Exchange (18)
+      :     :                                               +- * Filter (17)
+      :     :                                                  +- * ColumnarToRow (16)
+      :     :                                                     +- Scan parquet default.item (15)
+      :     +- * Sort (41)
+      :        +- Exchange (40)
+      :           +- * Project (39)
+      :              +- Window (38)
+      :                 +- * Sort (37)
+      :                    +- Exchange (36)
+      :                       +- * HashAggregate (35)
+      :                          +- ReusedExchange (34)
+      +- * Sort (49)
+         +- Exchange (48)
+            +- * Project (47)
+               +- Window (46)
+                  +- * Sort (45)
+                     +- ReusedExchange (44)
 
 
 (1) Scan parquet default.store_sales
@@ -65,7 +68,7 @@ Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, ss_sold_date_sk#4]
 Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, ss_sold_date_sk#4]
 Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_store_sk#2))
 
-(4) ReusedExchange [Reuses operator id: 53]
+(4) ReusedExchange [Reuses operator id: 56]
 Output [3]: [d_date_sk#6, d_year#7, d_moy#8]
 
 (5) BroadcastHashJoin [codegen id : 3]
@@ -189,106 +192,118 @@ Condition : ((isnotnull(avg_monthly_sales#26) AND (avg_monthly_sales#26 > 0.0000
 Output [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25]
 Input [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, _w0#23, rn#25, avg_monthly_sales#26]
 
-(32) Sort [codegen id : 11]
+(32) Exchange
+Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25]
+Arguments: hashpartitioning(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, rn#25, 5), ENSURE_REQUIREMENTS, [id=#27]
+
+(33) Sort [codegen id : 12]
 Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25]
 Arguments: [i_category#16 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST, s_company_name#11 ASC NULLS FIRST, rn#25 ASC NULLS FIRST], false, 0
 
-(33) ReusedExchange [Reuses operator id: 23]
-Output [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum#33]
+(34) ReusedExchange [Reuses operator id: 23]
+Output [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum#34]
 
-(34) HashAggregate [codegen id : 19]
-Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum#33]
-Keys [6]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32]
-Functions [1]: [sum(UnscaledValue(ss_sales_price#34))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#34))#21]
-Results [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, MakeDecimal(sum(UnscaledValue(ss_sales_price#34))#21,17,2) AS sum_sales#22]
+(35) HashAggregate [codegen id : 20]
+Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum#34]
+Keys [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33]
+Functions [1]: [sum(UnscaledValue(ss_sales_price#35))]
+Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#35))#21]
+Results [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, MakeDecimal(sum(UnscaledValue(ss_sales_price#35))#21,17,2) AS sum_sales#22]
 
-(35) Exchange
-Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#22]
-Arguments: hashpartitioning(i_category#27, i_brand#28, s_store_name#29, s_company_name#30, 5), ENSURE_REQUIREMENTS, [id=#35]
+(36) Exchange
+Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#22]
+Arguments: hashpartitioning(i_category#28, i_brand#29, s_store_name#30, s_company_name#31, 5), ENSURE_REQUIREMENTS, [id=#36]
 
-(36) Sort [codegen id : 20]
-Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#22]
-Arguments: [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, s_store_name#29 ASC NULLS FIRST, s_company_name#30 ASC NULLS FIRST, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST], false, 0
+(37) Sort [codegen id : 21]
+Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#22]
+Arguments: [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, s_store_name#30 ASC NULLS FIRST, s_company_name#31 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0
 
-(37) Window
-Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#22]
-Arguments: [rank(d_year#31, d_moy#32) windowspecdefinition(i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#36], [i_category#27, i_brand#28, s_store_name#29, s_company_name#30], [d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST]
+(38) Window
+Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#22]
+Arguments: [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#37], [i_category#28, i_brand#29, s_store_name#30, s_company_name#31], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST]
 
-(38) Project [codegen id : 21]
-Output [6]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, sum_sales#22 AS sum_sales#37, rn#36]
-Input [8]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#22, rn#36]
+(39) Project [codegen id : 22]
+Output [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#22 AS sum_sales#38, rn#37]
+Input [8]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#22, rn#37]
 
-(39) Sort [codegen id : 21]
-Input [6]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, sum_sales#37, rn#36]
-Arguments: [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, s_store_name#29 ASC NULLS FIRST, s_company_name#30 ASC NULLS FIRST, (rn#36 + 1) ASC NULLS FIRST], false, 0
+(40) Exchange
+Input [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#38, rn#37]
+Arguments: hashpartitioning(i_category#28, i_brand#29, s_store_name#30, s_company_name#31, (rn#37 + 1), 5), ENSURE_REQUIREMENTS, [id=#39]
 
-(40) SortMergeJoin [codegen id : 22]
+(41) Sort [codegen id : 23]
+Input [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#38, rn#37]
+Arguments: [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, s_store_name#30 ASC NULLS FIRST, s_company_name#31 ASC NULLS FIRST, (rn#37 + 1) ASC NULLS FIRST], false, 0
+
+(42) SortMergeJoin [codegen id : 24]
 Left keys [5]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, rn#25]
-Right keys [5]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, (rn#36 + 1)]
+Right keys [5]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, (rn#37 + 1)]
 Join condition: None
 
-(41) Project [codegen id : 22]
-Output [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25, sum_sales#37]
-Input [15]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25, i_category#27, i_brand#28, s_store_name#29, s_company_name#30, sum_sales#37, rn#36]
+(43) Project [codegen id : 24]
+Output [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25, sum_sales#38]
+Input [15]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25, i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#38, rn#37]
+
+(44) ReusedExchange [Reuses operator id: 36]
+Output [7]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44, d_moy#45, sum_sales#22]
 
-(42) ReusedExchange [Reuses operator id: 35]
-Output [7]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42, d_moy#43, sum_sales#22]
+(45) Sort [codegen id : 33]
+Input [7]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44, d_moy#45, sum_sales#22]
+Arguments: [i_category#40 ASC NULLS FIRST, i_brand#41 ASC NULLS FIRST, s_store_name#42 ASC NULLS FIRST, s_company_name#43 ASC NULLS FIRST, d_year#44 ASC NULLS FIRST, d_moy#45 ASC NULLS FIRST], false, 0
 
-(43) Sort [codegen id : 31]
-Input [7]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42, d_moy#43, sum_sales#22]
-Arguments: [i_category#38 ASC NULLS FIRST, i_brand#39 ASC NULLS FIRST, s_store_name#40 ASC NULLS FIRST, s_company_name#41 ASC NULLS FIRST, d_year#42 ASC NULLS FIRST, d_moy#43 ASC NULLS FIRST], false, 0
+(46) Window
+Input [7]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44, d_moy#45, sum_sales#22]
+Arguments: [rank(d_year#44, d_moy#45) windowspecdefinition(i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44 ASC NULLS FIRST, d_moy#45 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#46], [i_category#40, i_brand#41, s_store_name#42, s_company_name#43], [d_year#44 ASC NULLS FIRST, d_moy#45 ASC NULLS FIRST]
 
-(44) Window
-Input [7]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42, d_moy#43, sum_sales#22]
-Arguments: [rank(d_year#42, d_moy#43) windowspecdefinition(i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42 ASC NULLS FIRST, d_moy#43 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#44], [i_category#38, i_brand#39, s_store_name#40, s_company_name#41], [d_year#42 ASC NULLS FIRST, d_moy#43 ASC NULLS FIRST]
+(47) Project [codegen id : 34]
+Output [6]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, sum_sales#22 AS sum_sales#47, rn#46]
+Input [8]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44, d_moy#45, sum_sales#22, rn#46]
 
-(45) Project [codegen id : 32]
-Output [6]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, sum_sales#22 AS sum_sales#45, rn#44]
-Input [8]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42, d_moy#43, sum_sales#22, rn#44]
+(48) Exchange
+Input [6]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, sum_sales#47, rn#46]
+Arguments: hashpartitioning(i_category#40, i_brand#41, s_store_name#42, s_company_name#43, (rn#46 - 1), 5), ENSURE_REQUIREMENTS, [id=#48]
 
-(46) Sort [codegen id : 32]
-Input [6]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, sum_sales#45, rn#44]
-Arguments: [i_category#38 ASC NULLS FIRST, i_brand#39 ASC NULLS FIRST, s_store_name#40 ASC NULLS FIRST, s_company_name#41 ASC NULLS FIRST, (rn#44 - 1) ASC NULLS FIRST], false, 0
+(49) Sort [codegen id : 35]
+Input [6]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, sum_sales#47, rn#46]
+Arguments: [i_category#40 ASC NULLS FIRST, i_brand#41 ASC NULLS FIRST, s_store_name#42 ASC NULLS FIRST, s_company_name#43 ASC NULLS FIRST, (rn#46 - 1) ASC NULLS FIRST], false, 0
 
-(47) SortMergeJoin [codegen id : 33]
+(50) SortMergeJoin [codegen id : 36]
 Left keys [5]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, rn#25]
-Right keys [5]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, (rn#44 - 1)]
+Right keys [5]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, (rn#46 - 1)]
 Join condition: None
 
-(48) Project [codegen id : 33]
-Output [7]: [i_category#16, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, sum_sales#37 AS psum#46, sum_sales#45 AS nsum#47]
-Input [16]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25, sum_sales#37, i_category#38, i_brand#39, s_store_name#40, s_company_name#41, sum_sales#45, rn#44]
+(51) Project [codegen id : 36]
+Output [7]: [i_category#16, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, sum_sales#38 AS psum#49, sum_sales#47 AS nsum#50]
+Input [16]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25, sum_sales#38, i_category#40, i_brand#41, s_store_name#42, s_company_name#43, sum_sales#47, rn#46]
 
-(49) TakeOrderedAndProject
-Input [7]: [i_category#16, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, psum#46, nsum#47]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], [i_category#16, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, psum#46, nsum#47]
+(52) TakeOrderedAndProject
+Input [7]: [i_category#16, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, psum#49, nsum#50]
+Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], [i_category#16, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, psum#49, nsum#50]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (53)
-+- * Filter (52)
-   +- * ColumnarToRow (51)
-      +- Scan parquet default.date_dim (50)
+BroadcastExchange (56)
++- * Filter (55)
+   +- * ColumnarToRow (54)
+      +- Scan parquet default.date_dim (53)
 
 
-(50) Scan parquet default.date_dim
+(53) Scan parquet default.date_dim
 Output [3]: [d_date_sk#6, d_year#7, d_moy#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(51) ColumnarToRow [codegen id : 1]
+(54) ColumnarToRow [codegen id : 1]
 Input [3]: [d_date_sk#6, d_year#7, d_moy#8]
 
-(52) Filter [codegen id : 1]
+(55) Filter [codegen id : 1]
 Input [3]: [d_date_sk#6, d_year#7, d_moy#8]
 Condition : ((((d_year#7 = 1999) OR ((d_year#7 = 1998) AND (d_moy#8 = 12))) OR ((d_year#7 = 2000) AND (d_moy#8 = 1))) AND isnotnull(d_date_sk#6))
 
-(53) BroadcastExchange
+(56) BroadcastExchange
 Input [3]: [d_date_sk#6, d_year#7, d_moy#8]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#51]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/simplified.txt
index 65bcf10a8518b..5f64a22717270 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/simplified.txt
@@ -1,95 +1,104 @@
 TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_moy,i_category,d_year,psum,nsum]
-  WholeStageCodegen (33)
+  WholeStageCodegen (36)
     Project [i_category,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales]
       SortMergeJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn]
         InputAdapter
-          WholeStageCodegen (22)
+          WholeStageCodegen (24)
             Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales]
               SortMergeJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn]
                 InputAdapter
-                  WholeStageCodegen (11)
+                  WholeStageCodegen (12)
                     Sort [i_category,i_brand,s_store_name,s_company_name,rn]
-                      Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn]
-                        Filter [avg_monthly_sales,sum_sales]
-                          InputAdapter
-                            Window [_w0,i_category,i_brand,s_store_name,s_company_name,d_year]
-                              WholeStageCodegen (10)
-                                Filter [d_year]
-                                  InputAdapter
-                                    Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name]
-                                      WholeStageCodegen (9)
-                                        Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy]
-                                          InputAdapter
-                                            Exchange [i_category,i_brand,s_store_name,s_company_name] #1
-                                              WholeStageCodegen (8)
-                                                HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum]
-                                                  InputAdapter
-                                                    Exchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] #2
-                                                      WholeStageCodegen (7)
-                                                        HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,ss_sales_price] [sum,sum]
-                                                          Project [i_brand,i_category,ss_sales_price,d_year,d_moy,s_store_name,s_company_name]
-                                                            SortMergeJoin [ss_item_sk,i_item_sk]
-                                                              InputAdapter
-                                                                WholeStageCodegen (4)
-                                                                  Sort [ss_item_sk]
+                      InputAdapter
+                        Exchange [i_category,i_brand,s_store_name,s_company_name,rn] #1
+                          WholeStageCodegen (11)
+                            Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn]
+                              Filter [avg_monthly_sales,sum_sales]
+                                InputAdapter
+                                  Window [_w0,i_category,i_brand,s_store_name,s_company_name,d_year]
+                                    WholeStageCodegen (10)
+                                      Filter [d_year]
+                                        InputAdapter
+                                          Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name]
+                                            WholeStageCodegen (9)
+                                              Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy]
+                                                InputAdapter
+                                                  Exchange [i_category,i_brand,s_store_name,s_company_name] #2
+                                                    WholeStageCodegen (8)
+                                                      HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum]
+                                                        InputAdapter
+                                                          Exchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] #3
+                                                            WholeStageCodegen (7)
+                                                              HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,ss_sales_price] [sum,sum]
+                                                                Project [i_brand,i_category,ss_sales_price,d_year,d_moy,s_store_name,s_company_name]
+                                                                  SortMergeJoin [ss_item_sk,i_item_sk]
                                                                     InputAdapter
-                                                                      Exchange [ss_item_sk] #3
-                                                                        WholeStageCodegen (3)
-                                                                          Project [ss_item_sk,ss_sales_price,d_year,d_moy,s_store_name,s_company_name]
-                                                                            BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                                                              Project [ss_item_sk,ss_store_sk,ss_sales_price,d_year,d_moy]
-                                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                  Filter [ss_item_sk,ss_store_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
-                                                                                          SubqueryBroadcast [d_date_sk] #1
-                                                                                            BroadcastExchange #4
-                                                                                              WholeStageCodegen (1)
-                                                                                                Filter [d_year,d_moy,d_date_sk]
-                                                                                                  ColumnarToRow
-                                                                                                    InputAdapter
-                                                                                                      Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                                                                                  InputAdapter
-                                                                                    ReusedExchange [d_date_sk,d_year,d_moy] #4
-                                                                              InputAdapter
-                                                                                BroadcastExchange #5
-                                                                                  WholeStageCodegen (2)
-                                                                                    Filter [s_store_sk,s_store_name,s_company_name]
-                                                                                      ColumnarToRow
+                                                                      WholeStageCodegen (4)
+                                                                        Sort [ss_item_sk]
+                                                                          InputAdapter
+                                                                            Exchange [ss_item_sk] #4
+                                                                              WholeStageCodegen (3)
+                                                                                Project [ss_item_sk,ss_sales_price,d_year,d_moy,s_store_name,s_company_name]
+                                                                                  BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                                                    Project [ss_item_sk,ss_store_sk,ss_sales_price,d_year,d_moy]
+                                                                                      BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                                        Filter [ss_item_sk,ss_store_sk]
+                                                                                          ColumnarToRow
+                                                                                            InputAdapter
+                                                                                              Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                                                                SubqueryBroadcast [d_date_sk] #1
+                                                                                                  BroadcastExchange #5
+                                                                                                    WholeStageCodegen (1)
+                                                                                                      Filter [d_year,d_moy,d_date_sk]
+                                                                                                        ColumnarToRow
+                                                                                                          InputAdapter
+                                                                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
                                                                                         InputAdapter
-                                                                                          Scan parquet default.store [s_store_sk,s_store_name,s_company_name]
-                                                              InputAdapter
-                                                                WholeStageCodegen (6)
-                                                                  Sort [i_item_sk]
+                                                                                          ReusedExchange [d_date_sk,d_year,d_moy] #5
+                                                                                    InputAdapter
+                                                                                      BroadcastExchange #6
+                                                                                        WholeStageCodegen (2)
+                                                                                          Filter [s_store_sk,s_store_name,s_company_name]
+                                                                                            ColumnarToRow
+                                                                                              InputAdapter
+                                                                                                Scan parquet default.store [s_store_sk,s_store_name,s_company_name]
                                                                     InputAdapter
-                                                                      Exchange [i_item_sk] #6
-                                                                        WholeStageCodegen (5)
-                                                                          Filter [i_item_sk,i_category,i_brand]
-                                                                            ColumnarToRow
-                                                                              InputAdapter
-                                                                                Scan parquet default.item [i_item_sk,i_brand,i_category]
+                                                                      WholeStageCodegen (6)
+                                                                        Sort [i_item_sk]
+                                                                          InputAdapter
+                                                                            Exchange [i_item_sk] #7
+                                                                              WholeStageCodegen (5)
+                                                                                Filter [i_item_sk,i_category,i_brand]
+                                                                                  ColumnarToRow
+                                                                                    InputAdapter
+                                                                                      Scan parquet default.item [i_item_sk,i_brand,i_category]
                 InputAdapter
-                  WholeStageCodegen (21)
+                  WholeStageCodegen (23)
                     Sort [i_category,i_brand,s_store_name,s_company_name,rn]
-                      Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn]
-                        InputAdapter
-                          Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name]
-                            WholeStageCodegen (20)
-                              Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy]
-                                InputAdapter
-                                  Exchange [i_category,i_brand,s_store_name,s_company_name] #7
-                                    WholeStageCodegen (19)
-                                      HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,sum]
-                                        InputAdapter
-                                          ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] #2
+                      InputAdapter
+                        Exchange [i_category,i_brand,s_store_name,s_company_name,rn] #8
+                          WholeStageCodegen (22)
+                            Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn]
+                              InputAdapter
+                                Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name]
+                                  WholeStageCodegen (21)
+                                    Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy]
+                                      InputAdapter
+                                        Exchange [i_category,i_brand,s_store_name,s_company_name] #9
+                                          WholeStageCodegen (20)
+                                            HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,sum]
+                                              InputAdapter
+                                                ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] #3
         InputAdapter
-          WholeStageCodegen (32)
+          WholeStageCodegen (35)
             Sort [i_category,i_brand,s_store_name,s_company_name,rn]
-              Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn]
-                InputAdapter
-                  Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name]
-                    WholeStageCodegen (31)
-                      Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy]
-                        InputAdapter
-                          ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #7
+              InputAdapter
+                Exchange [i_category,i_brand,s_store_name,s_company_name,rn] #10
+                  WholeStageCodegen (34)
+                    Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn]
+                      InputAdapter
+                        Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name]
+                          WholeStageCodegen (33)
+                            Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy]
+                              InputAdapter
+                                ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #9
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/explain.txt
index e3d76bfea8c2c..64111eef627d2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/explain.txt
@@ -1,72 +1,74 @@
 == Physical Plan ==
-TakeOrderedAndProject (68)
-+- * Filter (67)
-   +- * HashAggregate (66)
-      +- * HashAggregate (65)
-         +- * Project (64)
-            +- * SortMergeJoin Inner (63)
-               :- Window (58)
-               :  +- * Sort (57)
-               :     +- Exchange (56)
-               :        +- * Project (55)
-               :           +- * Filter (54)
-               :              +- * SortMergeJoin FullOuter (53)
-               :                 :- * Sort (26)
-               :                 :  +- * HashAggregate (25)
-               :                 :     +- * HashAggregate (24)
-               :                 :        +- * Project (23)
-               :                 :           +- * SortMergeJoin Inner (22)
-               :                 :              :- * Sort (15)
-               :                 :              :  +- Exchange (14)
-               :                 :              :     +- * Project (13)
-               :                 :              :        +- Window (12)
-               :                 :              :           +- * Sort (11)
-               :                 :              :              +- Exchange (10)
-               :                 :              :                 +- * HashAggregate (9)
-               :                 :              :                    +- Exchange (8)
-               :                 :              :                       +- * HashAggregate (7)
-               :                 :              :                          +- * Project (6)
-               :                 :              :                             +- * BroadcastHashJoin Inner BuildRight (5)
-               :                 :              :                                :- * Filter (3)
-               :                 :              :                                :  +- * ColumnarToRow (2)
-               :                 :              :                                :     +- Scan parquet default.web_sales (1)
-               :                 :              :                                +- ReusedExchange (4)
-               :                 :              +- * Sort (21)
-               :                 :                 +- Exchange (20)
-               :                 :                    +- * Project (19)
-               :                 :                       +- Window (18)
-               :                 :                          +- * Sort (17)
-               :                 :                             +- ReusedExchange (16)
-               :                 +- * Sort (52)
-               :                    +- * HashAggregate (51)
-               :                       +- * HashAggregate (50)
-               :                          +- * Project (49)
-               :                             +- * SortMergeJoin Inner (48)
-               :                                :- * Sort (41)
-               :                                :  +- Exchange (40)
-               :                                :     +- * Project (39)
-               :                                :        +- Window (38)
-               :                                :           +- * Sort (37)
-               :                                :              +- Exchange (36)
-               :                                :                 +- * HashAggregate (35)
-               :                                :                    +- Exchange (34)
-               :                                :                       +- * HashAggregate (33)
-               :                                :                          +- * Project (32)
-               :                                :                             +- * BroadcastHashJoin Inner BuildRight (31)
-               :                                :                                :- * Filter (29)
-               :                                :                                :  +- * ColumnarToRow (28)
-               :                                :                                :     +- Scan parquet default.store_sales (27)
-               :                                :                                +- ReusedExchange (30)
-               :                                +- * Sort (47)
-               :                                   +- Exchange (46)
-               :                                      +- * Project (45)
-               :                                         +- Window (44)
-               :                                            +- * Sort (43)
-               :                                               +- ReusedExchange (42)
-               +- * Project (62)
-                  +- Window (61)
-                     +- * Sort (60)
-                        +- ReusedExchange (59)
+TakeOrderedAndProject (70)
++- * Filter (69)
+   +- * HashAggregate (68)
+      +- * HashAggregate (67)
+         +- * Project (66)
+            +- * SortMergeJoin Inner (65)
+               :- Window (60)
+               :  +- * Sort (59)
+               :     +- Exchange (58)
+               :        +- * Project (57)
+               :           +- * Filter (56)
+               :              +- * SortMergeJoin FullOuter (55)
+               :                 :- * Sort (27)
+               :                 :  +- Exchange (26)
+               :                 :     +- * HashAggregate (25)
+               :                 :        +- * HashAggregate (24)
+               :                 :           +- * Project (23)
+               :                 :              +- * SortMergeJoin Inner (22)
+               :                 :                 :- * Sort (15)
+               :                 :                 :  +- Exchange (14)
+               :                 :                 :     +- * Project (13)
+               :                 :                 :        +- Window (12)
+               :                 :                 :           +- * Sort (11)
+               :                 :                 :              +- Exchange (10)
+               :                 :                 :                 +- * HashAggregate (9)
+               :                 :                 :                    +- Exchange (8)
+               :                 :                 :                       +- * HashAggregate (7)
+               :                 :                 :                          +- * Project (6)
+               :                 :                 :                             +- * BroadcastHashJoin Inner BuildRight (5)
+               :                 :                 :                                :- * Filter (3)
+               :                 :                 :                                :  +- * ColumnarToRow (2)
+               :                 :                 :                                :     +- Scan parquet default.web_sales (1)
+               :                 :                 :                                +- ReusedExchange (4)
+               :                 :                 +- * Sort (21)
+               :                 :                    +- Exchange (20)
+               :                 :                       +- * Project (19)
+               :                 :                          +- Window (18)
+               :                 :                             +- * Sort (17)
+               :                 :                                +- ReusedExchange (16)
+               :                 +- * Sort (54)
+               :                    +- Exchange (53)
+               :                       +- * HashAggregate (52)
+               :                          +- * HashAggregate (51)
+               :                             +- * Project (50)
+               :                                +- * SortMergeJoin Inner (49)
+               :                                   :- * Sort (42)
+               :                                   :  +- Exchange (41)
+               :                                   :     +- * Project (40)
+               :                                   :        +- Window (39)
+               :                                   :           +- * Sort (38)
+               :                                   :              +- Exchange (37)
+               :                                   :                 +- * HashAggregate (36)
+               :                                   :                    +- Exchange (35)
+               :                                   :                       +- * HashAggregate (34)
+               :                                   :                          +- * Project (33)
+               :                                   :                             +- * BroadcastHashJoin Inner BuildRight (32)
+               :                                   :                                :- * Filter (30)
+               :                                   :                                :  +- * ColumnarToRow (29)
+               :                                   :                                :     +- Scan parquet default.store_sales (28)
+               :                                   :                                +- ReusedExchange (31)
+               :                                   +- * Sort (48)
+               :                                      +- Exchange (47)
+               :                                         +- * Project (46)
+               :                                            +- Window (45)
+               :                                               +- * Sort (44)
+               :                                                  +- ReusedExchange (43)
+               +- * Project (64)
+                  +- Window (63)
+                     +- * Sort (62)
+                        +- ReusedExchange (61)
 
 
 (1) Scan parquet default.web_sales
@@ -84,7 +86,7 @@ Input [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3]
 Input [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3]
 Condition : isnotnull(ws_item_sk#1)
 
-(4) ReusedExchange [Reuses operator id: 73]
+(4) ReusedExchange [Reuses operator id: 75]
 Output [2]: [d_date_sk#5, d_date#6]
 
 (5) BroadcastHashJoin [codegen id : 2]
@@ -184,232 +186,240 @@ Functions [1]: [sum(sumws#20)]
 Aggregate Attributes [1]: [sum(sumws#20)#26]
 Results [3]: [item_sk#11, d_date#6, sum(sumws#20)#26 AS cume_sales#27]
 
-(26) Sort [codegen id : 13]
+(26) Exchange
+Input [3]: [item_sk#11, d_date#6, cume_sales#27]
+Arguments: hashpartitioning(item_sk#11, d_date#6, 5), ENSURE_REQUIREMENTS, [id=#28]
+
+(27) Sort [codegen id : 14]
 Input [3]: [item_sk#11, d_date#6, cume_sales#27]
 Arguments: [item_sk#11 ASC NULLS FIRST, d_date#6 ASC NULLS FIRST], false, 0
 
-(27) Scan parquet default.store_sales
-Output [3]: [ss_item_sk#28, ss_sales_price#29, ss_sold_date_sk#30]
+(28) Scan parquet default.store_sales
+Output [3]: [ss_item_sk#29, ss_sales_price#30, ss_sold_date_sk#31]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#30), dynamicpruningexpression(ss_sold_date_sk#30 IN dynamicpruning#4)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#31), dynamicpruningexpression(ss_sold_date_sk#31 IN dynamicpruning#4)]
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_sales_price:decimal(7,2)>
 
-(28) ColumnarToRow [codegen id : 15]
-Input [3]: [ss_item_sk#28, ss_sales_price#29, ss_sold_date_sk#30]
+(29) ColumnarToRow [codegen id : 16]
+Input [3]: [ss_item_sk#29, ss_sales_price#30, ss_sold_date_sk#31]
 
-(29) Filter [codegen id : 15]
-Input [3]: [ss_item_sk#28, ss_sales_price#29, ss_sold_date_sk#30]
-Condition : isnotnull(ss_item_sk#28)
+(30) Filter [codegen id : 16]
+Input [3]: [ss_item_sk#29, ss_sales_price#30, ss_sold_date_sk#31]
+Condition : isnotnull(ss_item_sk#29)
 
-(30) ReusedExchange [Reuses operator id: 73]
-Output [2]: [d_date_sk#31, d_date#32]
+(31) ReusedExchange [Reuses operator id: 75]
+Output [2]: [d_date_sk#32, d_date#33]
 
-(31) BroadcastHashJoin [codegen id : 15]
-Left keys [1]: [ss_sold_date_sk#30]
-Right keys [1]: [d_date_sk#31]
+(32) BroadcastHashJoin [codegen id : 16]
+Left keys [1]: [ss_sold_date_sk#31]
+Right keys [1]: [d_date_sk#32]
 Join condition: None
 
-(32) Project [codegen id : 15]
-Output [3]: [ss_item_sk#28, ss_sales_price#29, d_date#32]
-Input [5]: [ss_item_sk#28, ss_sales_price#29, ss_sold_date_sk#30, d_date_sk#31, d_date#32]
-
-(33) HashAggregate [codegen id : 15]
-Input [3]: [ss_item_sk#28, ss_sales_price#29, d_date#32]
-Keys [2]: [ss_item_sk#28, d_date#32]
-Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#29))]
-Aggregate Attributes [1]: [sum#33]
-Results [3]: [ss_item_sk#28, d_date#32, sum#34]
-
-(34) Exchange
-Input [3]: [ss_item_sk#28, d_date#32, sum#34]
-Arguments: hashpartitioning(ss_item_sk#28, d_date#32, 5), ENSURE_REQUIREMENTS, [id=#35]
-
-(35) HashAggregate [codegen id : 16]
-Input [3]: [ss_item_sk#28, d_date#32, sum#34]
-Keys [2]: [ss_item_sk#28, d_date#32]
-Functions [1]: [sum(UnscaledValue(ss_sales_price#29))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#29))#36]
-Results [4]: [ss_item_sk#28 AS item_sk#37, d_date#32, MakeDecimal(sum(UnscaledValue(ss_sales_price#29))#36,17,2) AS sumss#38, ss_item_sk#28]
-
-(36) Exchange
-Input [4]: [item_sk#37, d_date#32, sumss#38, ss_item_sk#28]
-Arguments: hashpartitioning(ss_item_sk#28, 5), ENSURE_REQUIREMENTS, [id=#39]
-
-(37) Sort [codegen id : 17]
-Input [4]: [item_sk#37, d_date#32, sumss#38, ss_item_sk#28]
-Arguments: [ss_item_sk#28 ASC NULLS FIRST, d_date#32 ASC NULLS FIRST], false, 0
-
-(38) Window
-Input [4]: [item_sk#37, d_date#32, sumss#38, ss_item_sk#28]
-Arguments: [row_number() windowspecdefinition(ss_item_sk#28, d_date#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#40], [ss_item_sk#28], [d_date#32 ASC NULLS FIRST]
-
-(39) Project [codegen id : 18]
-Output [4]: [item_sk#37, d_date#32, sumss#38, rk#40]
-Input [5]: [item_sk#37, d_date#32, sumss#38, ss_item_sk#28, rk#40]
-
-(40) Exchange
-Input [4]: [item_sk#37, d_date#32, sumss#38, rk#40]
-Arguments: hashpartitioning(item_sk#37, 5), ENSURE_REQUIREMENTS, [id=#41]
-
-(41) Sort [codegen id : 19]
-Input [4]: [item_sk#37, d_date#32, sumss#38, rk#40]
-Arguments: [item_sk#37 ASC NULLS FIRST], false, 0
-
-(42) ReusedExchange [Reuses operator id: 36]
-Output [4]: [item_sk#37, d_date#42, sumss#38, ss_item_sk#43]
-
-(43) Sort [codegen id : 23]
-Input [4]: [item_sk#37, d_date#42, sumss#38, ss_item_sk#43]
-Arguments: [ss_item_sk#43 ASC NULLS FIRST, d_date#42 ASC NULLS FIRST], false, 0
-
-(44) Window
-Input [4]: [item_sk#37, d_date#42, sumss#38, ss_item_sk#43]
-Arguments: [row_number() windowspecdefinition(ss_item_sk#43, d_date#42 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#44], [ss_item_sk#43], [d_date#42 ASC NULLS FIRST]
-
-(45) Project [codegen id : 24]
-Output [3]: [item_sk#37 AS item_sk#45, sumss#38 AS sumss#46, rk#44]
-Input [5]: [item_sk#37, d_date#42, sumss#38, ss_item_sk#43, rk#44]
-
-(46) Exchange
-Input [3]: [item_sk#45, sumss#46, rk#44]
-Arguments: hashpartitioning(item_sk#45, 5), ENSURE_REQUIREMENTS, [id=#47]
-
-(47) Sort [codegen id : 25]
-Input [3]: [item_sk#45, sumss#46, rk#44]
-Arguments: [item_sk#45 ASC NULLS FIRST], false, 0
-
-(48) SortMergeJoin [codegen id : 26]
-Left keys [1]: [item_sk#37]
-Right keys [1]: [item_sk#45]
-Join condition: (rk#40 >= rk#44)
-
-(49) Project [codegen id : 26]
-Output [4]: [item_sk#37, d_date#32, sumss#38, sumss#46]
-Input [7]: [item_sk#37, d_date#32, sumss#38, rk#40, item_sk#45, sumss#46, rk#44]
-
-(50) HashAggregate [codegen id : 26]
-Input [4]: [item_sk#37, d_date#32, sumss#38, sumss#46]
-Keys [3]: [item_sk#37, d_date#32, sumss#38]
-Functions [1]: [partial_sum(sumss#46)]
-Aggregate Attributes [2]: [sum#48, isEmpty#49]
-Results [5]: [item_sk#37, d_date#32, sumss#38, sum#50, isEmpty#51]
-
-(51) HashAggregate [codegen id : 26]
-Input [5]: [item_sk#37, d_date#32, sumss#38, sum#50, isEmpty#51]
-Keys [3]: [item_sk#37, d_date#32, sumss#38]
-Functions [1]: [sum(sumss#46)]
-Aggregate Attributes [1]: [sum(sumss#46)#52]
-Results [3]: [item_sk#37, d_date#32, sum(sumss#46)#52 AS cume_sales#53]
-
-(52) Sort [codegen id : 26]
-Input [3]: [item_sk#37, d_date#32, cume_sales#53]
-Arguments: [item_sk#37 ASC NULLS FIRST, d_date#32 ASC NULLS FIRST], false, 0
-
-(53) SortMergeJoin [codegen id : 27]
+(33) Project [codegen id : 16]
+Output [3]: [ss_item_sk#29, ss_sales_price#30, d_date#33]
+Input [5]: [ss_item_sk#29, ss_sales_price#30, ss_sold_date_sk#31, d_date_sk#32, d_date#33]
+
+(34) HashAggregate [codegen id : 16]
+Input [3]: [ss_item_sk#29, ss_sales_price#30, d_date#33]
+Keys [2]: [ss_item_sk#29, d_date#33]
+Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#30))]
+Aggregate Attributes [1]: [sum#34]
+Results [3]: [ss_item_sk#29, d_date#33, sum#35]
+
+(35) Exchange
+Input [3]: [ss_item_sk#29, d_date#33, sum#35]
+Arguments: hashpartitioning(ss_item_sk#29, d_date#33, 5), ENSURE_REQUIREMENTS, [id=#36]
+
+(36) HashAggregate [codegen id : 17]
+Input [3]: [ss_item_sk#29, d_date#33, sum#35]
+Keys [2]: [ss_item_sk#29, d_date#33]
+Functions [1]: [sum(UnscaledValue(ss_sales_price#30))]
+Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#30))#37]
+Results [4]: [ss_item_sk#29 AS item_sk#38, d_date#33, MakeDecimal(sum(UnscaledValue(ss_sales_price#30))#37,17,2) AS sumss#39, ss_item_sk#29]
+
+(37) Exchange
+Input [4]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#29]
+Arguments: hashpartitioning(ss_item_sk#29, 5), ENSURE_REQUIREMENTS, [id=#40]
+
+(38) Sort [codegen id : 18]
+Input [4]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#29]
+Arguments: [ss_item_sk#29 ASC NULLS FIRST, d_date#33 ASC NULLS FIRST], false, 0
+
+(39) Window
+Input [4]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#29]
+Arguments: [row_number() windowspecdefinition(ss_item_sk#29, d_date#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#41], [ss_item_sk#29], [d_date#33 ASC NULLS FIRST]
+
+(40) Project [codegen id : 19]
+Output [4]: [item_sk#38, d_date#33, sumss#39, rk#41]
+Input [5]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#29, rk#41]
+
+(41) Exchange
+Input [4]: [item_sk#38, d_date#33, sumss#39, rk#41]
+Arguments: hashpartitioning(item_sk#38, 5), ENSURE_REQUIREMENTS, [id=#42]
+
+(42) Sort [codegen id : 20]
+Input [4]: [item_sk#38, d_date#33, sumss#39, rk#41]
+Arguments: [item_sk#38 ASC NULLS FIRST], false, 0
+
+(43) ReusedExchange [Reuses operator id: 37]
+Output [4]: [item_sk#38, d_date#43, sumss#39, ss_item_sk#44]
+
+(44) Sort [codegen id : 24]
+Input [4]: [item_sk#38, d_date#43, sumss#39, ss_item_sk#44]
+Arguments: [ss_item_sk#44 ASC NULLS FIRST, d_date#43 ASC NULLS FIRST], false, 0
+
+(45) Window
+Input [4]: [item_sk#38, d_date#43, sumss#39, ss_item_sk#44]
+Arguments: [row_number() windowspecdefinition(ss_item_sk#44, d_date#43 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#45], [ss_item_sk#44], [d_date#43 ASC NULLS FIRST]
+
+(46) Project [codegen id : 25]
+Output [3]: [item_sk#38 AS item_sk#46, sumss#39 AS sumss#47, rk#45]
+Input [5]: [item_sk#38, d_date#43, sumss#39, ss_item_sk#44, rk#45]
+
+(47) Exchange
+Input [3]: [item_sk#46, sumss#47, rk#45]
+Arguments: hashpartitioning(item_sk#46, 5), ENSURE_REQUIREMENTS, [id=#48]
+
+(48) Sort [codegen id : 26]
+Input [3]: [item_sk#46, sumss#47, rk#45]
+Arguments: [item_sk#46 ASC NULLS FIRST], false, 0
+
+(49) SortMergeJoin [codegen id : 27]
+Left keys [1]: [item_sk#38]
+Right keys [1]: [item_sk#46]
+Join condition: (rk#41 >= rk#45)
+
+(50) Project [codegen id : 27]
+Output [4]: [item_sk#38, d_date#33, sumss#39, sumss#47]
+Input [7]: [item_sk#38, d_date#33, sumss#39, rk#41, item_sk#46, sumss#47, rk#45]
+
+(51) HashAggregate [codegen id : 27]
+Input [4]: [item_sk#38, d_date#33, sumss#39, sumss#47]
+Keys [3]: [item_sk#38, d_date#33, sumss#39]
+Functions [1]: [partial_sum(sumss#47)]
+Aggregate Attributes [2]: [sum#49, isEmpty#50]
+Results [5]: [item_sk#38, d_date#33, sumss#39, sum#51, isEmpty#52]
+
+(52) HashAggregate [codegen id : 27]
+Input [5]: [item_sk#38, d_date#33, sumss#39, sum#51, isEmpty#52]
+Keys [3]: [item_sk#38, d_date#33, sumss#39]
+Functions [1]: [sum(sumss#47)]
+Aggregate Attributes [1]: [sum(sumss#47)#53]
+Results [3]: [item_sk#38, d_date#33, sum(sumss#47)#53 AS cume_sales#54]
+
+(53) Exchange
+Input [3]: [item_sk#38, d_date#33, cume_sales#54]
+Arguments: hashpartitioning(item_sk#38, d_date#33, 5), ENSURE_REQUIREMENTS, [id=#55]
+
+(54) Sort [codegen id : 28]
+Input [3]: [item_sk#38, d_date#33, cume_sales#54]
+Arguments: [item_sk#38 ASC NULLS FIRST, d_date#33 ASC NULLS FIRST], false, 0
+
+(55) SortMergeJoin [codegen id : 29]
 Left keys [2]: [item_sk#11, d_date#6]
-Right keys [2]: [item_sk#37, d_date#32]
+Right keys [2]: [item_sk#38, d_date#33]
 Join condition: None
 
-(54) Filter [codegen id : 27]
-Input [6]: [item_sk#11, d_date#6, cume_sales#27, item_sk#37, d_date#32, cume_sales#53]
-Condition : isnotnull(CASE WHEN isnotnull(item_sk#11) THEN item_sk#11 ELSE item_sk#37 END)
+(56) Filter [codegen id : 29]
+Input [6]: [item_sk#11, d_date#6, cume_sales#27, item_sk#38, d_date#33, cume_sales#54]
+Condition : isnotnull(CASE WHEN isnotnull(item_sk#11) THEN item_sk#11 ELSE item_sk#38 END)
 
-(55) Project [codegen id : 27]
-Output [4]: [CASE WHEN isnotnull(item_sk#11) THEN item_sk#11 ELSE item_sk#37 END AS item_sk#54, CASE WHEN isnotnull(d_date#6) THEN d_date#6 ELSE d_date#32 END AS d_date#55, cume_sales#27 AS web_sales#56, cume_sales#53 AS store_sales#57]
-Input [6]: [item_sk#11, d_date#6, cume_sales#27, item_sk#37, d_date#32, cume_sales#53]
+(57) Project [codegen id : 29]
+Output [4]: [CASE WHEN isnotnull(item_sk#11) THEN item_sk#11 ELSE item_sk#38 END AS item_sk#56, CASE WHEN isnotnull(d_date#6) THEN d_date#6 ELSE d_date#33 END AS d_date#57, cume_sales#27 AS web_sales#58, cume_sales#54 AS store_sales#59]
+Input [6]: [item_sk#11, d_date#6, cume_sales#27, item_sk#38, d_date#33, cume_sales#54]
 
-(56) Exchange
-Input [4]: [item_sk#54, d_date#55, web_sales#56, store_sales#57]
-Arguments: hashpartitioning(item_sk#54, 5), ENSURE_REQUIREMENTS, [id=#58]
+(58) Exchange
+Input [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59]
+Arguments: hashpartitioning(item_sk#56, 5), ENSURE_REQUIREMENTS, [id=#60]
 
-(57) Sort [codegen id : 28]
-Input [4]: [item_sk#54, d_date#55, web_sales#56, store_sales#57]
-Arguments: [item_sk#54 ASC NULLS FIRST, d_date#55 ASC NULLS FIRST], false, 0
+(59) Sort [codegen id : 30]
+Input [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59]
+Arguments: [item_sk#56 ASC NULLS FIRST, d_date#57 ASC NULLS FIRST], false, 0
 
-(58) Window
-Input [4]: [item_sk#54, d_date#55, web_sales#56, store_sales#57]
-Arguments: [row_number() windowspecdefinition(item_sk#54, d_date#55 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#59], [item_sk#54], [d_date#55 ASC NULLS FIRST]
+(60) Window
+Input [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59]
+Arguments: [row_number() windowspecdefinition(item_sk#56, d_date#57 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#61], [item_sk#56], [d_date#57 ASC NULLS FIRST]
 
-(59) ReusedExchange [Reuses operator id: 56]
-Output [4]: [item_sk#54, d_date#55, web_sales#56, store_sales#57]
+(61) ReusedExchange [Reuses operator id: 58]
+Output [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59]
 
-(60) Sort [codegen id : 56]
-Input [4]: [item_sk#54, d_date#55, web_sales#56, store_sales#57]
-Arguments: [item_sk#54 ASC NULLS FIRST, d_date#55 ASC NULLS FIRST], false, 0
+(62) Sort [codegen id : 60]
+Input [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59]
+Arguments: [item_sk#56 ASC NULLS FIRST, d_date#57 ASC NULLS FIRST], false, 0
 
-(61) Window
-Input [4]: [item_sk#54, d_date#55, web_sales#56, store_sales#57]
-Arguments: [row_number() windowspecdefinition(item_sk#54, d_date#55 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#60], [item_sk#54], [d_date#55 ASC NULLS FIRST]
+(63) Window
+Input [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59]
+Arguments: [row_number() windowspecdefinition(item_sk#56, d_date#57 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#62], [item_sk#56], [d_date#57 ASC NULLS FIRST]
 
-(62) Project [codegen id : 57]
-Output [4]: [item_sk#54 AS item_sk#61, web_sales#56 AS web_sales#62, store_sales#57 AS store_sales#63, rk#60]
-Input [5]: [item_sk#54, d_date#55, web_sales#56, store_sales#57, rk#60]
+(64) Project [codegen id : 61]
+Output [4]: [item_sk#56 AS item_sk#63, web_sales#58 AS web_sales#64, store_sales#59 AS store_sales#65, rk#62]
+Input [5]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, rk#62]
 
-(63) SortMergeJoin [codegen id : 58]
-Left keys [1]: [item_sk#54]
-Right keys [1]: [item_sk#61]
-Join condition: (rk#59 >= rk#60)
+(65) SortMergeJoin [codegen id : 62]
+Left keys [1]: [item_sk#56]
+Right keys [1]: [item_sk#63]
+Join condition: (rk#61 >= rk#62)
 
-(64) Project [codegen id : 58]
-Output [6]: [item_sk#54, d_date#55, web_sales#56, store_sales#57, web_sales#62, store_sales#63]
-Input [9]: [item_sk#54, d_date#55, web_sales#56, store_sales#57, rk#59, item_sk#61, web_sales#62, store_sales#63, rk#60]
+(66) Project [codegen id : 62]
+Output [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, web_sales#64, store_sales#65]
+Input [9]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, rk#61, item_sk#63, web_sales#64, store_sales#65, rk#62]
 
-(65) HashAggregate [codegen id : 58]
-Input [6]: [item_sk#54, d_date#55, web_sales#56, store_sales#57, web_sales#62, store_sales#63]
-Keys [4]: [item_sk#54, d_date#55, web_sales#56, store_sales#57]
-Functions [2]: [partial_max(web_sales#62), partial_max(store_sales#63)]
-Aggregate Attributes [2]: [max#64, max#65]
-Results [6]: [item_sk#54, d_date#55, web_sales#56, store_sales#57, max#66, max#67]
+(67) HashAggregate [codegen id : 62]
+Input [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, web_sales#64, store_sales#65]
+Keys [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59]
+Functions [2]: [partial_max(web_sales#64), partial_max(store_sales#65)]
+Aggregate Attributes [2]: [max#66, max#67]
+Results [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, max#68, max#69]
 
-(66) HashAggregate [codegen id : 58]
-Input [6]: [item_sk#54, d_date#55, web_sales#56, store_sales#57, max#66, max#67]
-Keys [4]: [item_sk#54, d_date#55, web_sales#56, store_sales#57]
-Functions [2]: [max(web_sales#62), max(store_sales#63)]
-Aggregate Attributes [2]: [max(web_sales#62)#68, max(store_sales#63)#69]
-Results [6]: [item_sk#54, d_date#55, web_sales#56, store_sales#57, max(web_sales#62)#68 AS web_cumulative#70, max(store_sales#63)#69 AS store_cumulative#71]
+(68) HashAggregate [codegen id : 62]
+Input [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, max#68, max#69]
+Keys [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59]
+Functions [2]: [max(web_sales#64), max(store_sales#65)]
+Aggregate Attributes [2]: [max(web_sales#64)#70, max(store_sales#65)#71]
+Results [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, max(web_sales#64)#70 AS web_cumulative#72, max(store_sales#65)#71 AS store_cumulative#73]
 
-(67) Filter [codegen id : 58]
-Input [6]: [item_sk#54, d_date#55, web_sales#56, store_sales#57, web_cumulative#70, store_cumulative#71]
-Condition : ((isnotnull(web_cumulative#70) AND isnotnull(store_cumulative#71)) AND (web_cumulative#70 > store_cumulative#71))
+(69) Filter [codegen id : 62]
+Input [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, web_cumulative#72, store_cumulative#73]
+Condition : ((isnotnull(web_cumulative#72) AND isnotnull(store_cumulative#73)) AND (web_cumulative#72 > store_cumulative#73))
 
-(68) TakeOrderedAndProject
-Input [6]: [item_sk#54, d_date#55, web_sales#56, store_sales#57, web_cumulative#70, store_cumulative#71]
-Arguments: 100, [item_sk#54 ASC NULLS FIRST, d_date#55 ASC NULLS FIRST], [item_sk#54, d_date#55, web_sales#56, store_sales#57, web_cumulative#70, store_cumulative#71]
+(70) TakeOrderedAndProject
+Input [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, web_cumulative#72, store_cumulative#73]
+Arguments: 100, [item_sk#56 ASC NULLS FIRST, d_date#57 ASC NULLS FIRST], [item_sk#56, d_date#57, web_sales#58, store_sales#59, web_cumulative#72, store_cumulative#73]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = ws_sold_date_sk#3 IN dynamicpruning#4
-BroadcastExchange (73)
-+- * Project (72)
-   +- * Filter (71)
-      +- * ColumnarToRow (70)
-         +- Scan parquet default.date_dim (69)
+BroadcastExchange (75)
++- * Project (74)
+   +- * Filter (73)
+      +- * ColumnarToRow (72)
+         +- Scan parquet default.date_dim (71)
 
 
-(69) Scan parquet default.date_dim
-Output [3]: [d_date_sk#5, d_date#6, d_month_seq#72]
+(71) Scan parquet default.date_dim
+Output [3]: [d_date_sk#5, d_date#6, d_month_seq#74]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date,d_month_seq:int>
 
-(70) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#5, d_date#6, d_month_seq#72]
+(72) ColumnarToRow [codegen id : 1]
+Input [3]: [d_date_sk#5, d_date#6, d_month_seq#74]
 
-(71) Filter [codegen id : 1]
-Input [3]: [d_date_sk#5, d_date#6, d_month_seq#72]
-Condition : (((isnotnull(d_month_seq#72) AND (d_month_seq#72 >= 1212)) AND (d_month_seq#72 <= 1223)) AND isnotnull(d_date_sk#5))
+(73) Filter [codegen id : 1]
+Input [3]: [d_date_sk#5, d_date#6, d_month_seq#74]
+Condition : (((isnotnull(d_month_seq#74) AND (d_month_seq#74 >= 1212)) AND (d_month_seq#74 <= 1223)) AND isnotnull(d_date_sk#5))
 
-(72) Project [codegen id : 1]
+(74) Project [codegen id : 1]
 Output [2]: [d_date_sk#5, d_date#6]
-Input [3]: [d_date_sk#5, d_date#6, d_month_seq#72]
+Input [3]: [d_date_sk#5, d_date#6, d_month_seq#74]
 
-(73) BroadcastExchange
+(75) BroadcastExchange
 Input [2]: [d_date_sk#5, d_date#6]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#73]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#75]
 
-Subquery:2 Hosting operator id = 27 Hosting Expression = ss_sold_date_sk#30 IN dynamicpruning#4
+Subquery:2 Hosting operator id = 28 Hosting Expression = ss_sold_date_sk#31 IN dynamicpruning#4
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/simplified.txt
index b1d245a9ffc43..1a89b7c72a169 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/simplified.txt
@@ -1,5 +1,5 @@
 TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store_cumulative]
-  WholeStageCodegen (58)
+  WholeStageCodegen (62)
     Filter [web_cumulative,store_cumulative]
       HashAggregate [item_sk,d_date,web_sales,store_sales,max,max] [max(web_sales),max(store_sales),web_cumulative,store_cumulative,max,max]
         HashAggregate [item_sk,d_date,web_sales,store_sales,web_sales,store_sales] [max,max,max,max]
@@ -7,123 +7,129 @@ TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store
             SortMergeJoin [item_sk,item_sk,rk,rk]
               InputAdapter
                 Window [item_sk,d_date]
-                  WholeStageCodegen (28)
+                  WholeStageCodegen (30)
                     Sort [item_sk,d_date]
                       InputAdapter
                         Exchange [item_sk] #1
-                          WholeStageCodegen (27)
+                          WholeStageCodegen (29)
                             Project [item_sk,item_sk,d_date,d_date,cume_sales,cume_sales]
                               Filter [item_sk,item_sk]
                                 SortMergeJoin [item_sk,d_date,item_sk,d_date]
                                   InputAdapter
-                                    WholeStageCodegen (13)
+                                    WholeStageCodegen (14)
                                       Sort [item_sk,d_date]
-                                        HashAggregate [item_sk,d_date,sumws,sum,isEmpty] [sum(sumws),cume_sales,sum,isEmpty]
-                                          HashAggregate [item_sk,d_date,sumws,sumws] [sum,isEmpty,sum,isEmpty]
-                                            Project [item_sk,d_date,sumws,sumws]
-                                              SortMergeJoin [item_sk,item_sk,rk,rk]
-                                                InputAdapter
-                                                  WholeStageCodegen (6)
-                                                    Sort [item_sk]
+                                        InputAdapter
+                                          Exchange [item_sk,d_date] #2
+                                            WholeStageCodegen (13)
+                                              HashAggregate [item_sk,d_date,sumws,sum,isEmpty] [sum(sumws),cume_sales,sum,isEmpty]
+                                                HashAggregate [item_sk,d_date,sumws,sumws] [sum,isEmpty,sum,isEmpty]
+                                                  Project [item_sk,d_date,sumws,sumws]
+                                                    SortMergeJoin [item_sk,item_sk,rk,rk]
                                                       InputAdapter
-                                                        Exchange [item_sk] #2
-                                                          WholeStageCodegen (5)
-                                                            Project [item_sk,d_date,sumws,rk]
-                                                              InputAdapter
-                                                                Window [ws_item_sk,d_date]
-                                                                  WholeStageCodegen (4)
-                                                                    Sort [ws_item_sk,d_date]
-                                                                      InputAdapter
-                                                                        Exchange [ws_item_sk] #3
-                                                                          WholeStageCodegen (3)
-                                                                            HashAggregate [ws_item_sk,d_date,sum] [sum(UnscaledValue(ws_sales_price)),item_sk,sumws,sum]
-                                                                              InputAdapter
-                                                                                Exchange [ws_item_sk,d_date] #4
-                                                                                  WholeStageCodegen (2)
-                                                                                    HashAggregate [ws_item_sk,d_date,ws_sales_price] [sum,sum]
-                                                                                      Project [ws_item_sk,ws_sales_price,d_date]
-                                                                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                                          Filter [ws_item_sk]
-                                                                                            ColumnarToRow
-                                                                                              InputAdapter
-                                                                                                Scan parquet default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk]
-                                                                                                  SubqueryBroadcast [d_date_sk] #1
-                                                                                                    BroadcastExchange #5
-                                                                                                      WholeStageCodegen (1)
-                                                                                                        Project [d_date_sk,d_date]
-                                                                                                          Filter [d_month_seq,d_date_sk]
-                                                                                                            ColumnarToRow
-                                                                                                              InputAdapter
-                                                                                                                Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
-                                                                                          InputAdapter
-                                                                                            ReusedExchange [d_date_sk,d_date] #5
-                                                InputAdapter
-                                                  WholeStageCodegen (12)
-                                                    Sort [item_sk]
+                                                        WholeStageCodegen (6)
+                                                          Sort [item_sk]
+                                                            InputAdapter
+                                                              Exchange [item_sk] #3
+                                                                WholeStageCodegen (5)
+                                                                  Project [item_sk,d_date,sumws,rk]
+                                                                    InputAdapter
+                                                                      Window [ws_item_sk,d_date]
+                                                                        WholeStageCodegen (4)
+                                                                          Sort [ws_item_sk,d_date]
+                                                                            InputAdapter
+                                                                              Exchange [ws_item_sk] #4
+                                                                                WholeStageCodegen (3)
+                                                                                  HashAggregate [ws_item_sk,d_date,sum] [sum(UnscaledValue(ws_sales_price)),item_sk,sumws,sum]
+                                                                                    InputAdapter
+                                                                                      Exchange [ws_item_sk,d_date] #5
+                                                                                        WholeStageCodegen (2)
+                                                                                          HashAggregate [ws_item_sk,d_date,ws_sales_price] [sum,sum]
+                                                                                            Project [ws_item_sk,ws_sales_price,d_date]
+                                                                                              BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                                                Filter [ws_item_sk]
+                                                                                                  ColumnarToRow
+                                                                                                    InputAdapter
+                                                                                                      Scan parquet default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk]
+                                                                                                        SubqueryBroadcast [d_date_sk] #1
+                                                                                                          BroadcastExchange #6
+                                                                                                            WholeStageCodegen (1)
+                                                                                                              Project [d_date_sk,d_date]
+                                                                                                                Filter [d_month_seq,d_date_sk]
+                                                                                                                  ColumnarToRow
+                                                                                                                    InputAdapter
+                                                                                                                      Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
+                                                                                                InputAdapter
+                                                                                                  ReusedExchange [d_date_sk,d_date] #6
                                                       InputAdapter
-                                                        Exchange [item_sk] #6
-                                                          WholeStageCodegen (11)
-                                                            Project [item_sk,sumws,rk]
-                                                              InputAdapter
-                                                                Window [ws_item_sk,d_date]
-                                                                  WholeStageCodegen (10)
-                                                                    Sort [ws_item_sk,d_date]
-                                                                      InputAdapter
-                                                                        ReusedExchange [item_sk,d_date,sumws,ws_item_sk] #3
+                                                        WholeStageCodegen (12)
+                                                          Sort [item_sk]
+                                                            InputAdapter
+                                                              Exchange [item_sk] #7
+                                                                WholeStageCodegen (11)
+                                                                  Project [item_sk,sumws,rk]
+                                                                    InputAdapter
+                                                                      Window [ws_item_sk,d_date]
+                                                                        WholeStageCodegen (10)
+                                                                          Sort [ws_item_sk,d_date]
+                                                                            InputAdapter
+                                                                              ReusedExchange [item_sk,d_date,sumws,ws_item_sk] #4
                                   InputAdapter
-                                    WholeStageCodegen (26)
+                                    WholeStageCodegen (28)
                                       Sort [item_sk,d_date]
-                                        HashAggregate [item_sk,d_date,sumss,sum,isEmpty] [sum(sumss),cume_sales,sum,isEmpty]
-                                          HashAggregate [item_sk,d_date,sumss,sumss] [sum,isEmpty,sum,isEmpty]
-                                            Project [item_sk,d_date,sumss,sumss]
-                                              SortMergeJoin [item_sk,item_sk,rk,rk]
-                                                InputAdapter
-                                                  WholeStageCodegen (19)
-                                                    Sort [item_sk]
+                                        InputAdapter
+                                          Exchange [item_sk,d_date] #8
+                                            WholeStageCodegen (27)
+                                              HashAggregate [item_sk,d_date,sumss,sum,isEmpty] [sum(sumss),cume_sales,sum,isEmpty]
+                                                HashAggregate [item_sk,d_date,sumss,sumss] [sum,isEmpty,sum,isEmpty]
+                                                  Project [item_sk,d_date,sumss,sumss]
+                                                    SortMergeJoin [item_sk,item_sk,rk,rk]
                                                       InputAdapter
-                                                        Exchange [item_sk] #7
-                                                          WholeStageCodegen (18)
-                                                            Project [item_sk,d_date,sumss,rk]
-                                                              InputAdapter
-                                                                Window [ss_item_sk,d_date]
-                                                                  WholeStageCodegen (17)
-                                                                    Sort [ss_item_sk,d_date]
-                                                                      InputAdapter
-                                                                        Exchange [ss_item_sk] #8
-                                                                          WholeStageCodegen (16)
-                                                                            HashAggregate [ss_item_sk,d_date,sum] [sum(UnscaledValue(ss_sales_price)),item_sk,sumss,sum]
-                                                                              InputAdapter
-                                                                                Exchange [ss_item_sk,d_date] #9
-                                                                                  WholeStageCodegen (15)
-                                                                                    HashAggregate [ss_item_sk,d_date,ss_sales_price] [sum,sum]
-                                                                                      Project [ss_item_sk,ss_sales_price,d_date]
-                                                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                          Filter [ss_item_sk]
-                                                                                            ColumnarToRow
-                                                                                              InputAdapter
-                                                                                                Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk]
-                                                                                                  ReusedSubquery [d_date_sk] #1
-                                                                                          InputAdapter
-                                                                                            ReusedExchange [d_date_sk,d_date] #5
-                                                InputAdapter
-                                                  WholeStageCodegen (25)
-                                                    Sort [item_sk]
+                                                        WholeStageCodegen (20)
+                                                          Sort [item_sk]
+                                                            InputAdapter
+                                                              Exchange [item_sk] #9
+                                                                WholeStageCodegen (19)
+                                                                  Project [item_sk,d_date,sumss,rk]
+                                                                    InputAdapter
+                                                                      Window [ss_item_sk,d_date]
+                                                                        WholeStageCodegen (18)
+                                                                          Sort [ss_item_sk,d_date]
+                                                                            InputAdapter
+                                                                              Exchange [ss_item_sk] #10
+                                                                                WholeStageCodegen (17)
+                                                                                  HashAggregate [ss_item_sk,d_date,sum] [sum(UnscaledValue(ss_sales_price)),item_sk,sumss,sum]
+                                                                                    InputAdapter
+                                                                                      Exchange [ss_item_sk,d_date] #11
+                                                                                        WholeStageCodegen (16)
+                                                                                          HashAggregate [ss_item_sk,d_date,ss_sales_price] [sum,sum]
+                                                                                            Project [ss_item_sk,ss_sales_price,d_date]
+                                                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                                                Filter [ss_item_sk]
+                                                                                                  ColumnarToRow
+                                                                                                    InputAdapter
+                                                                                                      Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk]
+                                                                                                        ReusedSubquery [d_date_sk] #1
+                                                                                                InputAdapter
+                                                                                                  ReusedExchange [d_date_sk,d_date] #6
                                                       InputAdapter
-                                                        Exchange [item_sk] #10
-                                                          WholeStageCodegen (24)
-                                                            Project [item_sk,sumss,rk]
-                                                              InputAdapter
-                                                                Window [ss_item_sk,d_date]
-                                                                  WholeStageCodegen (23)
-                                                                    Sort [ss_item_sk,d_date]
-                                                                      InputAdapter
-                                                                        ReusedExchange [item_sk,d_date,sumss,ss_item_sk] #8
+                                                        WholeStageCodegen (26)
+                                                          Sort [item_sk]
+                                                            InputAdapter
+                                                              Exchange [item_sk] #12
+                                                                WholeStageCodegen (25)
+                                                                  Project [item_sk,sumss,rk]
+                                                                    InputAdapter
+                                                                      Window [ss_item_sk,d_date]
+                                                                        WholeStageCodegen (24)
+                                                                          Sort [ss_item_sk,d_date]
+                                                                            InputAdapter
+                                                                              ReusedExchange [item_sk,d_date,sumss,ss_item_sk] #10
               InputAdapter
-                WholeStageCodegen (57)
+                WholeStageCodegen (61)
                   Project [item_sk,web_sales,store_sales,rk]
                     InputAdapter
                       Window [item_sk,d_date]
-                        WholeStageCodegen (56)
+                        WholeStageCodegen (60)
                           Sort [item_sk,d_date]
                             InputAdapter
                               ReusedExchange [item_sk,d_date,web_sales,store_sales] #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt
index aa9b899a9308c..d214b321a4791 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt
@@ -1,53 +1,56 @@
 == Physical Plan ==
-TakeOrderedAndProject (49)
-+- * Project (48)
-   +- * SortMergeJoin Inner (47)
-      :- * Project (41)
-      :  +- * SortMergeJoin Inner (40)
-      :     :- * Sort (32)
-      :     :  +- * Project (31)
-      :     :     +- * Filter (30)
-      :     :        +- Window (29)
-      :     :           +- * Filter (28)
-      :     :              +- Window (27)
-      :     :                 +- * Sort (26)
-      :     :                    +- Exchange (25)
-      :     :                       +- * HashAggregate (24)
-      :     :                          +- Exchange (23)
-      :     :                             +- * HashAggregate (22)
-      :     :                                +- * Project (21)
-      :     :                                   +- * SortMergeJoin Inner (20)
-      :     :                                      :- * Sort (14)
-      :     :                                      :  +- Exchange (13)
-      :     :                                      :     +- * Project (12)
-      :     :                                      :        +- * BroadcastHashJoin Inner BuildRight (11)
-      :     :                                      :           :- * Project (6)
-      :     :                                      :           :  +- * BroadcastHashJoin Inner BuildRight (5)
-      :     :                                      :           :     :- * Filter (3)
-      :     :                                      :           :     :  +- * ColumnarToRow (2)
-      :     :                                      :           :     :     +- Scan parquet default.catalog_sales (1)
-      :     :                                      :           :     +- ReusedExchange (4)
-      :     :                                      :           +- BroadcastExchange (10)
-      :     :                                      :              +- * Filter (9)
-      :     :                                      :                 +- * ColumnarToRow (8)
-      :     :                                      :                    +- Scan parquet default.call_center (7)
-      :     :                                      +- * Sort (19)
-      :     :                                         +- Exchange (18)
-      :     :                                            +- * Filter (17)
-      :     :                                               +- * ColumnarToRow (16)
-      :     :                                                  +- Scan parquet default.item (15)
-      :     +- * Sort (39)
-      :        +- * Project (38)
-      :           +- Window (37)
-      :              +- * Sort (36)
-      :                 +- Exchange (35)
-      :                    +- * HashAggregate (34)
-      :                       +- ReusedExchange (33)
-      +- * Sort (46)
-         +- * Project (45)
-            +- Window (44)
-               +- * Sort (43)
-                  +- ReusedExchange (42)
+TakeOrderedAndProject (52)
++- * Project (51)
+   +- * SortMergeJoin Inner (50)
+      :- * Project (43)
+      :  +- * SortMergeJoin Inner (42)
+      :     :- * Sort (33)
+      :     :  +- Exchange (32)
+      :     :     +- * Project (31)
+      :     :        +- * Filter (30)
+      :     :           +- Window (29)
+      :     :              +- * Filter (28)
+      :     :                 +- Window (27)
+      :     :                    +- * Sort (26)
+      :     :                       +- Exchange (25)
+      :     :                          +- * HashAggregate (24)
+      :     :                             +- Exchange (23)
+      :     :                                +- * HashAggregate (22)
+      :     :                                   +- * Project (21)
+      :     :                                      +- * SortMergeJoin Inner (20)
+      :     :                                         :- * Sort (14)
+      :     :                                         :  +- Exchange (13)
+      :     :                                         :     +- * Project (12)
+      :     :                                         :        +- * BroadcastHashJoin Inner BuildRight (11)
+      :     :                                         :           :- * Project (6)
+      :     :                                         :           :  +- * BroadcastHashJoin Inner BuildRight (5)
+      :     :                                         :           :     :- * Filter (3)
+      :     :                                         :           :     :  +- * ColumnarToRow (2)
+      :     :                                         :           :     :     +- Scan parquet default.catalog_sales (1)
+      :     :                                         :           :     +- ReusedExchange (4)
+      :     :                                         :           +- BroadcastExchange (10)
+      :     :                                         :              +- * Filter (9)
+      :     :                                         :                 +- * ColumnarToRow (8)
+      :     :                                         :                    +- Scan parquet default.call_center (7)
+      :     :                                         +- * Sort (19)
+      :     :                                            +- Exchange (18)
+      :     :                                               +- * Filter (17)
+      :     :                                                  +- * ColumnarToRow (16)
+      :     :                                                     +- Scan parquet default.item (15)
+      :     +- * Sort (41)
+      :        +- Exchange (40)
+      :           +- * Project (39)
+      :              +- Window (38)
+      :                 +- * Sort (37)
+      :                    +- Exchange (36)
+      :                       +- * HashAggregate (35)
+      :                          +- ReusedExchange (34)
+      +- * Sort (49)
+         +- Exchange (48)
+            +- * Project (47)
+               +- Window (46)
+                  +- * Sort (45)
+                     +- ReusedExchange (44)
 
 
 (1) Scan parquet default.catalog_sales
@@ -65,7 +68,7 @@ Input [4]: [cs_call_center_sk#1, cs_item_sk#2, cs_sales_price#3, cs_sold_date_sk
 Input [4]: [cs_call_center_sk#1, cs_item_sk#2, cs_sales_price#3, cs_sold_date_sk#4]
 Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_call_center_sk#1))
 
-(4) ReusedExchange [Reuses operator id: 53]
+(4) ReusedExchange [Reuses operator id: 56]
 Output [3]: [d_date_sk#6, d_year#7, d_moy#8]
 
 (5) BroadcastHashJoin [codegen id : 3]
@@ -189,106 +192,118 @@ Condition : ((isnotnull(avg_monthly_sales#25) AND (avg_monthly_sales#25 > 0.0000
 Output [8]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24]
 Input [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, _w0#22, rn#24, avg_monthly_sales#25]
 
-(32) Sort [codegen id : 11]
+(32) Exchange
+Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24]
+Arguments: hashpartitioning(i_category#15, i_brand#14, cc_name#10, rn#24, 5), ENSURE_REQUIREMENTS, [id=#26]
+
+(33) Sort [codegen id : 12]
 Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24]
 Arguments: [i_category#15 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST, rn#24 ASC NULLS FIRST], false, 0
 
-(33) ReusedExchange [Reuses operator id: 23]
-Output [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum#31]
+(34) ReusedExchange [Reuses operator id: 23]
+Output [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum#32]
 
-(34) HashAggregate [codegen id : 19]
-Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum#31]
-Keys [5]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30]
-Functions [1]: [sum(UnscaledValue(cs_sales_price#32))]
-Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#32))#20]
-Results [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, MakeDecimal(sum(UnscaledValue(cs_sales_price#32))#20,17,2) AS sum_sales#21]
+(35) HashAggregate [codegen id : 20]
+Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum#32]
+Keys [5]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31]
+Functions [1]: [sum(UnscaledValue(cs_sales_price#33))]
+Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#33))#20]
+Results [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, MakeDecimal(sum(UnscaledValue(cs_sales_price#33))#20,17,2) AS sum_sales#21]
 
-(35) Exchange
-Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#21]
-Arguments: hashpartitioning(i_category#26, i_brand#27, cc_name#28, 5), ENSURE_REQUIREMENTS, [id=#33]
+(36) Exchange
+Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#21]
+Arguments: hashpartitioning(i_category#27, i_brand#28, cc_name#29, 5), ENSURE_REQUIREMENTS, [id=#34]
 
-(36) Sort [codegen id : 20]
-Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#21]
-Arguments: [i_category#26 ASC NULLS FIRST, i_brand#27 ASC NULLS FIRST, cc_name#28 ASC NULLS FIRST, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST], false, 0
+(37) Sort [codegen id : 21]
+Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#21]
+Arguments: [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, cc_name#29 ASC NULLS FIRST, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST], false, 0
 
-(37) Window
-Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#21]
-Arguments: [rank(d_year#29, d_moy#30) windowspecdefinition(i_category#26, i_brand#27, cc_name#28, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#34], [i_category#26, i_brand#27, cc_name#28], [d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST]
+(38) Window
+Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#21]
+Arguments: [rank(d_year#30, d_moy#31) windowspecdefinition(i_category#27, i_brand#28, cc_name#29, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#35], [i_category#27, i_brand#28, cc_name#29], [d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST]
 
-(38) Project [codegen id : 21]
-Output [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#21 AS sum_sales#35, rn#34]
-Input [7]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#21, rn#34]
+(39) Project [codegen id : 22]
+Output [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#21 AS sum_sales#36, rn#35]
+Input [7]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#21, rn#35]
 
-(39) Sort [codegen id : 21]
-Input [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#35, rn#34]
-Arguments: [i_category#26 ASC NULLS FIRST, i_brand#27 ASC NULLS FIRST, cc_name#28 ASC NULLS FIRST, (rn#34 + 1) ASC NULLS FIRST], false, 0
+(40) Exchange
+Input [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#36, rn#35]
+Arguments: hashpartitioning(i_category#27, i_brand#28, cc_name#29, (rn#35 + 1), 5), ENSURE_REQUIREMENTS, [id=#37]
 
-(40) SortMergeJoin [codegen id : 22]
+(41) Sort [codegen id : 23]
+Input [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#36, rn#35]
+Arguments: [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, cc_name#29 ASC NULLS FIRST, (rn#35 + 1) ASC NULLS FIRST], false, 0
+
+(42) SortMergeJoin [codegen id : 24]
 Left keys [4]: [i_category#15, i_brand#14, cc_name#10, rn#24]
-Right keys [4]: [i_category#26, i_brand#27, cc_name#28, (rn#34 + 1)]
+Right keys [4]: [i_category#27, i_brand#28, cc_name#29, (rn#35 + 1)]
 Join condition: None
 
-(41) Project [codegen id : 22]
-Output [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24, sum_sales#35]
-Input [13]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24, i_category#26, i_brand#27, cc_name#28, sum_sales#35, rn#34]
+(43) Project [codegen id : 24]
+Output [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24, sum_sales#36]
+Input [13]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24, i_category#27, i_brand#28, cc_name#29, sum_sales#36, rn#35]
+
+(44) ReusedExchange [Reuses operator id: 36]
+Output [6]: [i_category#38, i_brand#39, cc_name#40, d_year#41, d_moy#42, sum_sales#21]
 
-(42) ReusedExchange [Reuses operator id: 35]
-Output [6]: [i_category#36, i_brand#37, cc_name#38, d_year#39, d_moy#40, sum_sales#21]
+(45) Sort [codegen id : 33]
+Input [6]: [i_category#38, i_brand#39, cc_name#40, d_year#41, d_moy#42, sum_sales#21]
+Arguments: [i_category#38 ASC NULLS FIRST, i_brand#39 ASC NULLS FIRST, cc_name#40 ASC NULLS FIRST, d_year#41 ASC NULLS FIRST, d_moy#42 ASC NULLS FIRST], false, 0
 
-(43) Sort [codegen id : 31]
-Input [6]: [i_category#36, i_brand#37, cc_name#38, d_year#39, d_moy#40, sum_sales#21]
-Arguments: [i_category#36 ASC NULLS FIRST, i_brand#37 ASC NULLS FIRST, cc_name#38 ASC NULLS FIRST, d_year#39 ASC NULLS FIRST, d_moy#40 ASC NULLS FIRST], false, 0
+(46) Window
+Input [6]: [i_category#38, i_brand#39, cc_name#40, d_year#41, d_moy#42, sum_sales#21]
+Arguments: [rank(d_year#41, d_moy#42) windowspecdefinition(i_category#38, i_brand#39, cc_name#40, d_year#41 ASC NULLS FIRST, d_moy#42 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#43], [i_category#38, i_brand#39, cc_name#40], [d_year#41 ASC NULLS FIRST, d_moy#42 ASC NULLS FIRST]
 
-(44) Window
-Input [6]: [i_category#36, i_brand#37, cc_name#38, d_year#39, d_moy#40, sum_sales#21]
-Arguments: [rank(d_year#39, d_moy#40) windowspecdefinition(i_category#36, i_brand#37, cc_name#38, d_year#39 ASC NULLS FIRST, d_moy#40 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#41], [i_category#36, i_brand#37, cc_name#38], [d_year#39 ASC NULLS FIRST, d_moy#40 ASC NULLS FIRST]
+(47) Project [codegen id : 34]
+Output [5]: [i_category#38, i_brand#39, cc_name#40, sum_sales#21 AS sum_sales#44, rn#43]
+Input [7]: [i_category#38, i_brand#39, cc_name#40, d_year#41, d_moy#42, sum_sales#21, rn#43]
 
-(45) Project [codegen id : 32]
-Output [5]: [i_category#36, i_brand#37, cc_name#38, sum_sales#21 AS sum_sales#42, rn#41]
-Input [7]: [i_category#36, i_brand#37, cc_name#38, d_year#39, d_moy#40, sum_sales#21, rn#41]
+(48) Exchange
+Input [5]: [i_category#38, i_brand#39, cc_name#40, sum_sales#44, rn#43]
+Arguments: hashpartitioning(i_category#38, i_brand#39, cc_name#40, (rn#43 - 1), 5), ENSURE_REQUIREMENTS, [id=#45]
 
-(46) Sort [codegen id : 32]
-Input [5]: [i_category#36, i_brand#37, cc_name#38, sum_sales#42, rn#41]
-Arguments: [i_category#36 ASC NULLS FIRST, i_brand#37 ASC NULLS FIRST, cc_name#38 ASC NULLS FIRST, (rn#41 - 1) ASC NULLS FIRST], false, 0
+(49) Sort [codegen id : 35]
+Input [5]: [i_category#38, i_brand#39, cc_name#40, sum_sales#44, rn#43]
+Arguments: [i_category#38 ASC NULLS FIRST, i_brand#39 ASC NULLS FIRST, cc_name#40 ASC NULLS FIRST, (rn#43 - 1) ASC NULLS FIRST], false, 0
 
-(47) SortMergeJoin [codegen id : 33]
+(50) SortMergeJoin [codegen id : 36]
 Left keys [4]: [i_category#15, i_brand#14, cc_name#10, rn#24]
-Right keys [4]: [i_category#36, i_brand#37, cc_name#38, (rn#41 - 1)]
+Right keys [4]: [i_category#38, i_brand#39, cc_name#40, (rn#43 - 1)]
 Join condition: None
 
-(48) Project [codegen id : 33]
-Output [8]: [i_category#15, i_brand#14, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, sum_sales#35 AS psum#43, sum_sales#42 AS nsum#44]
-Input [14]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24, sum_sales#35, i_category#36, i_brand#37, cc_name#38, sum_sales#42, rn#41]
+(51) Project [codegen id : 36]
+Output [8]: [i_category#15, i_brand#14, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, sum_sales#36 AS psum#46, sum_sales#44 AS nsum#47]
+Input [14]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24, sum_sales#36, i_category#38, i_brand#39, cc_name#40, sum_sales#44, rn#43]
 
-(49) TakeOrderedAndProject
-Input [8]: [i_category#15, i_brand#14, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, psum#43, nsum#44]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, d_year#7 ASC NULLS FIRST], [i_category#15, i_brand#14, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, psum#43, nsum#44]
+(52) TakeOrderedAndProject
+Input [8]: [i_category#15, i_brand#14, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, psum#46, nsum#47]
+Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, d_year#7 ASC NULLS FIRST], [i_category#15, i_brand#14, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, psum#46, nsum#47]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = cs_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (53)
-+- * Filter (52)
-   +- * ColumnarToRow (51)
-      +- Scan parquet default.date_dim (50)
+BroadcastExchange (56)
++- * Filter (55)
+   +- * ColumnarToRow (54)
+      +- Scan parquet default.date_dim (53)
 
 
-(50) Scan parquet default.date_dim
+(53) Scan parquet default.date_dim
 Output [3]: [d_date_sk#6, d_year#7, d_moy#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(51) ColumnarToRow [codegen id : 1]
+(54) ColumnarToRow [codegen id : 1]
 Input [3]: [d_date_sk#6, d_year#7, d_moy#8]
 
-(52) Filter [codegen id : 1]
+(55) Filter [codegen id : 1]
 Input [3]: [d_date_sk#6, d_year#7, d_moy#8]
 Condition : ((((d_year#7 = 1999) OR ((d_year#7 = 1998) AND (d_moy#8 = 12))) OR ((d_year#7 = 2000) AND (d_moy#8 = 1))) AND isnotnull(d_date_sk#6))
 
-(53) BroadcastExchange
+(56) BroadcastExchange
 Input [3]: [d_date_sk#6, d_year#7, d_moy#8]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#45]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/simplified.txt
index 4389f6035a41b..b464f558bbc1a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/simplified.txt
@@ -1,95 +1,104 @@
 TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_year,i_category,i_brand,d_moy,psum,nsum]
-  WholeStageCodegen (33)
+  WholeStageCodegen (36)
     Project [i_category,i_brand,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales]
       SortMergeJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn]
         InputAdapter
-          WholeStageCodegen (22)
+          WholeStageCodegen (24)
             Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales]
               SortMergeJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn]
                 InputAdapter
-                  WholeStageCodegen (11)
+                  WholeStageCodegen (12)
                     Sort [i_category,i_brand,cc_name,rn]
-                      Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn]
-                        Filter [avg_monthly_sales,sum_sales]
-                          InputAdapter
-                            Window [_w0,i_category,i_brand,cc_name,d_year]
-                              WholeStageCodegen (10)
-                                Filter [d_year]
-                                  InputAdapter
-                                    Window [d_year,d_moy,i_category,i_brand,cc_name]
-                                      WholeStageCodegen (9)
-                                        Sort [i_category,i_brand,cc_name,d_year,d_moy]
-                                          InputAdapter
-                                            Exchange [i_category,i_brand,cc_name] #1
-                                              WholeStageCodegen (8)
-                                                HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,_w0,sum]
-                                                  InputAdapter
-                                                    Exchange [i_category,i_brand,cc_name,d_year,d_moy] #2
-                                                      WholeStageCodegen (7)
-                                                        HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,cs_sales_price] [sum,sum]
-                                                          Project [i_brand,i_category,cs_sales_price,d_year,d_moy,cc_name]
-                                                            SortMergeJoin [cs_item_sk,i_item_sk]
-                                                              InputAdapter
-                                                                WholeStageCodegen (4)
-                                                                  Sort [cs_item_sk]
+                      InputAdapter
+                        Exchange [i_category,i_brand,cc_name,rn] #1
+                          WholeStageCodegen (11)
+                            Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn]
+                              Filter [avg_monthly_sales,sum_sales]
+                                InputAdapter
+                                  Window [_w0,i_category,i_brand,cc_name,d_year]
+                                    WholeStageCodegen (10)
+                                      Filter [d_year]
+                                        InputAdapter
+                                          Window [d_year,d_moy,i_category,i_brand,cc_name]
+                                            WholeStageCodegen (9)
+                                              Sort [i_category,i_brand,cc_name,d_year,d_moy]
+                                                InputAdapter
+                                                  Exchange [i_category,i_brand,cc_name] #2
+                                                    WholeStageCodegen (8)
+                                                      HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,_w0,sum]
+                                                        InputAdapter
+                                                          Exchange [i_category,i_brand,cc_name,d_year,d_moy] #3
+                                                            WholeStageCodegen (7)
+                                                              HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,cs_sales_price] [sum,sum]
+                                                                Project [i_brand,i_category,cs_sales_price,d_year,d_moy,cc_name]
+                                                                  SortMergeJoin [cs_item_sk,i_item_sk]
                                                                     InputAdapter
-                                                                      Exchange [cs_item_sk] #3
-                                                                        WholeStageCodegen (3)
-                                                                          Project [cs_item_sk,cs_sales_price,d_year,d_moy,cc_name]
-                                                                            BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk]
-                                                                              Project [cs_call_center_sk,cs_item_sk,cs_sales_price,d_year,d_moy]
-                                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                  Filter [cs_item_sk,cs_call_center_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk]
-                                                                                          SubqueryBroadcast [d_date_sk] #1
-                                                                                            BroadcastExchange #4
-                                                                                              WholeStageCodegen (1)
-                                                                                                Filter [d_year,d_moy,d_date_sk]
-                                                                                                  ColumnarToRow
-                                                                                                    InputAdapter
-                                                                                                      Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                                                                                  InputAdapter
-                                                                                    ReusedExchange [d_date_sk,d_year,d_moy] #4
-                                                                              InputAdapter
-                                                                                BroadcastExchange #5
-                                                                                  WholeStageCodegen (2)
-                                                                                    Filter [cc_call_center_sk,cc_name]
-                                                                                      ColumnarToRow
+                                                                      WholeStageCodegen (4)
+                                                                        Sort [cs_item_sk]
+                                                                          InputAdapter
+                                                                            Exchange [cs_item_sk] #4
+                                                                              WholeStageCodegen (3)
+                                                                                Project [cs_item_sk,cs_sales_price,d_year,d_moy,cc_name]
+                                                                                  BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk]
+                                                                                    Project [cs_call_center_sk,cs_item_sk,cs_sales_price,d_year,d_moy]
+                                                                                      BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                                        Filter [cs_item_sk,cs_call_center_sk]
+                                                                                          ColumnarToRow
+                                                                                            InputAdapter
+                                                                                              Scan parquet default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk]
+                                                                                                SubqueryBroadcast [d_date_sk] #1
+                                                                                                  BroadcastExchange #5
+                                                                                                    WholeStageCodegen (1)
+                                                                                                      Filter [d_year,d_moy,d_date_sk]
+                                                                                                        ColumnarToRow
+                                                                                                          InputAdapter
+                                                                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
                                                                                         InputAdapter
-                                                                                          Scan parquet default.call_center [cc_call_center_sk,cc_name]
-                                                              InputAdapter
-                                                                WholeStageCodegen (6)
-                                                                  Sort [i_item_sk]
+                                                                                          ReusedExchange [d_date_sk,d_year,d_moy] #5
+                                                                                    InputAdapter
+                                                                                      BroadcastExchange #6
+                                                                                        WholeStageCodegen (2)
+                                                                                          Filter [cc_call_center_sk,cc_name]
+                                                                                            ColumnarToRow
+                                                                                              InputAdapter
+                                                                                                Scan parquet default.call_center [cc_call_center_sk,cc_name]
                                                                     InputAdapter
-                                                                      Exchange [i_item_sk] #6
-                                                                        WholeStageCodegen (5)
-                                                                          Filter [i_item_sk,i_category,i_brand]
-                                                                            ColumnarToRow
-                                                                              InputAdapter
-                                                                                Scan parquet default.item [i_item_sk,i_brand,i_category]
+                                                                      WholeStageCodegen (6)
+                                                                        Sort [i_item_sk]
+                                                                          InputAdapter
+                                                                            Exchange [i_item_sk] #7
+                                                                              WholeStageCodegen (5)
+                                                                                Filter [i_item_sk,i_category,i_brand]
+                                                                                  ColumnarToRow
+                                                                                    InputAdapter
+                                                                                      Scan parquet default.item [i_item_sk,i_brand,i_category]
                 InputAdapter
-                  WholeStageCodegen (21)
+                  WholeStageCodegen (23)
                     Sort [i_category,i_brand,cc_name,rn]
-                      Project [i_category,i_brand,cc_name,sum_sales,rn]
-                        InputAdapter
-                          Window [d_year,d_moy,i_category,i_brand,cc_name]
-                            WholeStageCodegen (20)
-                              Sort [i_category,i_brand,cc_name,d_year,d_moy]
-                                InputAdapter
-                                  Exchange [i_category,i_brand,cc_name] #7
-                                    WholeStageCodegen (19)
-                                      HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,sum]
-                                        InputAdapter
-                                          ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum] #2
+                      InputAdapter
+                        Exchange [i_category,i_brand,cc_name,rn] #8
+                          WholeStageCodegen (22)
+                            Project [i_category,i_brand,cc_name,sum_sales,rn]
+                              InputAdapter
+                                Window [d_year,d_moy,i_category,i_brand,cc_name]
+                                  WholeStageCodegen (21)
+                                    Sort [i_category,i_brand,cc_name,d_year,d_moy]
+                                      InputAdapter
+                                        Exchange [i_category,i_brand,cc_name] #9
+                                          WholeStageCodegen (20)
+                                            HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,sum]
+                                              InputAdapter
+                                                ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum] #3
         InputAdapter
-          WholeStageCodegen (32)
+          WholeStageCodegen (35)
             Sort [i_category,i_brand,cc_name,rn]
-              Project [i_category,i_brand,cc_name,sum_sales,rn]
-                InputAdapter
-                  Window [d_year,d_moy,i_category,i_brand,cc_name]
-                    WholeStageCodegen (31)
-                      Sort [i_category,i_brand,cc_name,d_year,d_moy]
-                        InputAdapter
-                          ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #7
+              InputAdapter
+                Exchange [i_category,i_brand,cc_name,rn] #10
+                  WholeStageCodegen (34)
+                    Project [i_category,i_brand,cc_name,sum_sales,rn]
+                      InputAdapter
+                        Window [d_year,d_moy,i_category,i_brand,cc_name]
+                          WholeStageCodegen (33)
+                            Sort [i_category,i_brand,cc_name,d_year,d_moy]
+                              InputAdapter
+                                ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #9
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt
index cfee2290adff9..ddaa34ab4e657 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt
@@ -1,185 +1,187 @@
 == Physical Plan ==
-* Sort (181)
-+- Exchange (180)
-   +- * Project (179)
-      +- * SortMergeJoin Inner (178)
-         :- * Sort (110)
-         :  +- * HashAggregate (109)
-         :     +- * HashAggregate (108)
-         :        +- * Project (107)
-         :           +- * BroadcastHashJoin Inner BuildRight (106)
-         :              :- * Project (100)
-         :              :  +- * BroadcastHashJoin Inner BuildRight (99)
-         :              :     :- * Project (97)
-         :              :     :  +- * BroadcastHashJoin Inner BuildRight (96)
-         :              :     :     :- * Project (91)
-         :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (90)
-         :              :     :     :     :- * Project (88)
-         :              :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (87)
-         :              :     :     :     :     :- * Project (82)
-         :              :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (81)
-         :              :     :     :     :     :     :- * Project (79)
-         :              :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (78)
-         :              :     :     :     :     :     :     :- * Project (73)
-         :              :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (72)
-         :              :     :     :     :     :     :     :     :- * Project (67)
-         :              :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (66)
-         :              :     :     :     :     :     :     :     :     :- * Project (64)
-         :              :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (63)
-         :              :     :     :     :     :     :     :     :     :     :- * Project (58)
-         :              :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (57)
-         :              :     :     :     :     :     :     :     :     :     :     :- * Project (55)
-         :              :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (54)
-         :              :     :     :     :     :     :     :     :     :     :     :     :- * Project (49)
-         :              :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (48)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (43)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (42)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (37)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (36)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (34)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- * SortMergeJoin Inner (33)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Sort (12)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- Exchange (11)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- * Project (10)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :        +- * BroadcastHashJoin Inner BuildLeft (9)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :- BroadcastExchange (4)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :  +- * Filter (3)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :     +- * ColumnarToRow (2)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :        +- Scan parquet default.store_sales (1)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * Project (8)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :              +- * Filter (7)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                 +- * ColumnarToRow (6)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                    +- Scan parquet default.store_returns (5)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- * Sort (32)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :        +- * Project (31)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * Filter (30)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :              +- * HashAggregate (29)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                 +- Exchange (28)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                    +- * HashAggregate (27)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                       +- * Project (26)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                          +- * SortMergeJoin Inner (25)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :- * Sort (18)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :  +- Exchange (17)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :     +- * Project (16)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :        +- * Filter (15)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :           +- * ColumnarToRow (14)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :              +- Scan parquet default.catalog_sales (13)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             +- * Sort (24)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                +- Exchange (23)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                   +- * Project (22)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                      +- * Filter (21)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                         +- * ColumnarToRow (20)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                            +- Scan parquet default.catalog_returns (19)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (35)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :     +- BroadcastExchange (41)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :        +- * Filter (40)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (39)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet default.store (38)
-         :              :     :     :     :     :     :     :     :     :     :     :     :     +- BroadcastExchange (47)
-         :              :     :     :     :     :     :     :     :     :     :     :     :        +- * Filter (46)
-         :              :     :     :     :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (45)
-         :              :     :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet default.customer (44)
-         :              :     :     :     :     :     :     :     :     :     :     :     +- BroadcastExchange (53)
-         :              :     :     :     :     :     :     :     :     :     :     :        +- * Filter (52)
-         :              :     :     :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (51)
-         :              :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet default.date_dim (50)
-         :              :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (56)
-         :              :     :     :     :     :     :     :     :     :     +- BroadcastExchange (62)
-         :              :     :     :     :     :     :     :     :     :        +- * Filter (61)
-         :              :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (60)
-         :              :     :     :     :     :     :     :     :     :              +- Scan parquet default.customer_demographics (59)
-         :              :     :     :     :     :     :     :     :     +- ReusedExchange (65)
-         :              :     :     :     :     :     :     :     +- BroadcastExchange (71)
-         :              :     :     :     :     :     :     :        +- * Filter (70)
-         :              :     :     :     :     :     :     :           +- * ColumnarToRow (69)
-         :              :     :     :     :     :     :     :              +- Scan parquet default.promotion (68)
-         :              :     :     :     :     :     :     +- BroadcastExchange (77)
-         :              :     :     :     :     :     :        +- * Filter (76)
-         :              :     :     :     :     :     :           +- * ColumnarToRow (75)
-         :              :     :     :     :     :     :              +- Scan parquet default.household_demographics (74)
-         :              :     :     :     :     :     +- ReusedExchange (80)
-         :              :     :     :     :     +- BroadcastExchange (86)
-         :              :     :     :     :        +- * Filter (85)
-         :              :     :     :     :           +- * ColumnarToRow (84)
-         :              :     :     :     :              +- Scan parquet default.customer_address (83)
-         :              :     :     :     +- ReusedExchange (89)
-         :              :     :     +- BroadcastExchange (95)
-         :              :     :        +- * Filter (94)
-         :              :     :           +- * ColumnarToRow (93)
-         :              :     :              +- Scan parquet default.income_band (92)
-         :              :     +- ReusedExchange (98)
-         :              +- BroadcastExchange (105)
-         :                 +- * Project (104)
-         :                    +- * Filter (103)
-         :                       +- * ColumnarToRow (102)
-         :                          +- Scan parquet default.item (101)
-         +- * Sort (177)
-            +- * HashAggregate (176)
-               +- * HashAggregate (175)
-                  +- * Project (174)
-                     +- * BroadcastHashJoin Inner BuildRight (173)
-                        :- * Project (171)
-                        :  +- * BroadcastHashJoin Inner BuildRight (170)
-                        :     :- * Project (168)
-                        :     :  +- * BroadcastHashJoin Inner BuildRight (167)
-                        :     :     :- * Project (165)
-                        :     :     :  +- * BroadcastHashJoin Inner BuildRight (164)
-                        :     :     :     :- * Project (162)
-                        :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (161)
-                        :     :     :     :     :- * Project (159)
-                        :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (158)
-                        :     :     :     :     :     :- * Project (156)
-                        :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (155)
-                        :     :     :     :     :     :     :- * Project (153)
-                        :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (152)
-                        :     :     :     :     :     :     :     :- * Project (150)
-                        :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (149)
-                        :     :     :     :     :     :     :     :     :- * Project (147)
-                        :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (146)
-                        :     :     :     :     :     :     :     :     :     :- * Project (144)
-                        :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (143)
-                        :     :     :     :     :     :     :     :     :     :     :- * Project (141)
-                        :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (140)
-                        :     :     :     :     :     :     :     :     :     :     :     :- * Project (138)
-                        :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (137)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (135)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (134)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (132)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (131)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (129)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- * SortMergeJoin Inner (128)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Sort (122)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- Exchange (121)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- * Project (120)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :        +- * BroadcastHashJoin Inner BuildLeft (119)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :- BroadcastExchange (114)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :  +- * Filter (113)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :     +- * ColumnarToRow (112)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :        +- Scan parquet default.store_sales (111)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * Project (118)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :              +- * Filter (117)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                 +- * ColumnarToRow (116)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                    +- Scan parquet default.store_returns (115)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- * Sort (127)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     :        +- * Project (126)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * Filter (125)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     :              +- * HashAggregate (124)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                 +- ReusedExchange (123)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (130)
-                        :     :     :     :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (133)
-                        :     :     :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (136)
-                        :     :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (139)
-                        :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (142)
-                        :     :     :     :     :     :     :     :     :     +- ReusedExchange (145)
-                        :     :     :     :     :     :     :     :     +- ReusedExchange (148)
-                        :     :     :     :     :     :     :     +- ReusedExchange (151)
-                        :     :     :     :     :     :     +- ReusedExchange (154)
-                        :     :     :     :     :     +- ReusedExchange (157)
-                        :     :     :     :     +- ReusedExchange (160)
-                        :     :     :     +- ReusedExchange (163)
-                        :     :     +- ReusedExchange (166)
-                        :     +- ReusedExchange (169)
-                        +- ReusedExchange (172)
+* Sort (183)
++- Exchange (182)
+   +- * Project (181)
+      +- * SortMergeJoin Inner (180)
+         :- * Sort (111)
+         :  +- Exchange (110)
+         :     +- * HashAggregate (109)
+         :        +- * HashAggregate (108)
+         :           +- * Project (107)
+         :              +- * BroadcastHashJoin Inner BuildRight (106)
+         :                 :- * Project (100)
+         :                 :  +- * BroadcastHashJoin Inner BuildRight (99)
+         :                 :     :- * Project (97)
+         :                 :     :  +- * BroadcastHashJoin Inner BuildRight (96)
+         :                 :     :     :- * Project (91)
+         :                 :     :     :  +- * BroadcastHashJoin Inner BuildRight (90)
+         :                 :     :     :     :- * Project (88)
+         :                 :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (87)
+         :                 :     :     :     :     :- * Project (82)
+         :                 :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (81)
+         :                 :     :     :     :     :     :- * Project (79)
+         :                 :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (78)
+         :                 :     :     :     :     :     :     :- * Project (73)
+         :                 :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (72)
+         :                 :     :     :     :     :     :     :     :- * Project (67)
+         :                 :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (66)
+         :                 :     :     :     :     :     :     :     :     :- * Project (64)
+         :                 :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (63)
+         :                 :     :     :     :     :     :     :     :     :     :- * Project (58)
+         :                 :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (57)
+         :                 :     :     :     :     :     :     :     :     :     :     :- * Project (55)
+         :                 :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (54)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :- * Project (49)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (48)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (43)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (42)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (37)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (36)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (34)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- * SortMergeJoin Inner (33)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Sort (12)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- Exchange (11)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- * Project (10)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :        +- * BroadcastHashJoin Inner BuildLeft (9)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :- BroadcastExchange (4)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :  +- * Filter (3)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :     +- * ColumnarToRow (2)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :        +- Scan parquet default.store_sales (1)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * Project (8)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :              +- * Filter (7)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                 +- * ColumnarToRow (6)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                    +- Scan parquet default.store_returns (5)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- * Sort (32)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :        +- * Project (31)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * Filter (30)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :              +- * HashAggregate (29)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                 +- Exchange (28)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                    +- * HashAggregate (27)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                       +- * Project (26)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                          +- * SortMergeJoin Inner (25)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :- * Sort (18)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :  +- Exchange (17)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :     +- * Project (16)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :        +- * Filter (15)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :           +- * ColumnarToRow (14)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :              +- Scan parquet default.catalog_sales (13)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             +- * Sort (24)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                +- Exchange (23)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                   +- * Project (22)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                      +- * Filter (21)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                         +- * ColumnarToRow (20)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                            +- Scan parquet default.catalog_returns (19)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (35)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     +- BroadcastExchange (41)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :        +- * Filter (40)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (39)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet default.store (38)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     +- BroadcastExchange (47)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :        +- * Filter (46)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (45)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet default.customer (44)
+         :                 :     :     :     :     :     :     :     :     :     :     :     +- BroadcastExchange (53)
+         :                 :     :     :     :     :     :     :     :     :     :     :        +- * Filter (52)
+         :                 :     :     :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (51)
+         :                 :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet default.date_dim (50)
+         :                 :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (56)
+         :                 :     :     :     :     :     :     :     :     :     +- BroadcastExchange (62)
+         :                 :     :     :     :     :     :     :     :     :        +- * Filter (61)
+         :                 :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (60)
+         :                 :     :     :     :     :     :     :     :     :              +- Scan parquet default.customer_demographics (59)
+         :                 :     :     :     :     :     :     :     :     +- ReusedExchange (65)
+         :                 :     :     :     :     :     :     :     +- BroadcastExchange (71)
+         :                 :     :     :     :     :     :     :        +- * Filter (70)
+         :                 :     :     :     :     :     :     :           +- * ColumnarToRow (69)
+         :                 :     :     :     :     :     :     :              +- Scan parquet default.promotion (68)
+         :                 :     :     :     :     :     :     +- BroadcastExchange (77)
+         :                 :     :     :     :     :     :        +- * Filter (76)
+         :                 :     :     :     :     :     :           +- * ColumnarToRow (75)
+         :                 :     :     :     :     :     :              +- Scan parquet default.household_demographics (74)
+         :                 :     :     :     :     :     +- ReusedExchange (80)
+         :                 :     :     :     :     +- BroadcastExchange (86)
+         :                 :     :     :     :        +- * Filter (85)
+         :                 :     :     :     :           +- * ColumnarToRow (84)
+         :                 :     :     :     :              +- Scan parquet default.customer_address (83)
+         :                 :     :     :     +- ReusedExchange (89)
+         :                 :     :     +- BroadcastExchange (95)
+         :                 :     :        +- * Filter (94)
+         :                 :     :           +- * ColumnarToRow (93)
+         :                 :     :              +- Scan parquet default.income_band (92)
+         :                 :     +- ReusedExchange (98)
+         :                 +- BroadcastExchange (105)
+         :                    +- * Project (104)
+         :                       +- * Filter (103)
+         :                          +- * ColumnarToRow (102)
+         :                             +- Scan parquet default.item (101)
+         +- * Sort (179)
+            +- Exchange (178)
+               +- * HashAggregate (177)
+                  +- * HashAggregate (176)
+                     +- * Project (175)
+                        +- * BroadcastHashJoin Inner BuildRight (174)
+                           :- * Project (172)
+                           :  +- * BroadcastHashJoin Inner BuildRight (171)
+                           :     :- * Project (169)
+                           :     :  +- * BroadcastHashJoin Inner BuildRight (168)
+                           :     :     :- * Project (166)
+                           :     :     :  +- * BroadcastHashJoin Inner BuildRight (165)
+                           :     :     :     :- * Project (163)
+                           :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (162)
+                           :     :     :     :     :- * Project (160)
+                           :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (159)
+                           :     :     :     :     :     :- * Project (157)
+                           :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (156)
+                           :     :     :     :     :     :     :- * Project (154)
+                           :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (153)
+                           :     :     :     :     :     :     :     :- * Project (151)
+                           :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (150)
+                           :     :     :     :     :     :     :     :     :- * Project (148)
+                           :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (147)
+                           :     :     :     :     :     :     :     :     :     :- * Project (145)
+                           :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (144)
+                           :     :     :     :     :     :     :     :     :     :     :- * Project (142)
+                           :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (141)
+                           :     :     :     :     :     :     :     :     :     :     :     :- * Project (139)
+                           :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (138)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (136)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (135)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (133)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (132)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (130)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- * SortMergeJoin Inner (129)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Sort (123)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- Exchange (122)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- * Project (121)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :        +- * BroadcastHashJoin Inner BuildLeft (120)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :- BroadcastExchange (115)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :  +- * Filter (114)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :     +- * ColumnarToRow (113)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :        +- Scan parquet default.store_sales (112)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * Project (119)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :              +- * Filter (118)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                 +- * ColumnarToRow (117)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                    +- Scan parquet default.store_returns (116)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- * Sort (128)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :        +- * Project (127)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * Filter (126)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :              +- * HashAggregate (125)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                 +- ReusedExchange (124)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (131)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (134)
+                           :     :     :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (137)
+                           :     :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (140)
+                           :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (143)
+                           :     :     :     :     :     :     :     :     :     +- ReusedExchange (146)
+                           :     :     :     :     :     :     :     :     +- ReusedExchange (149)
+                           :     :     :     :     :     :     :     +- ReusedExchange (152)
+                           :     :     :     :     :     :     +- ReusedExchange (155)
+                           :     :     :     :     :     +- ReusedExchange (158)
+                           :     :     :     :     +- ReusedExchange (161)
+                           :     :     :     +- ReusedExchange (164)
+                           :     :     +- ReusedExchange (167)
+                           :     +- ReusedExchange (170)
+                           +- ReusedExchange (173)
 
 
 (1) Scan parquet default.store_sales
@@ -336,7 +338,7 @@ Join condition: None
 Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
 Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, cs_item_sk#19]
 
-(35) ReusedExchange [Reuses operator id: 185]
+(35) ReusedExchange [Reuses operator id: 187]
 Output [2]: [d_date_sk#42, d_year#43]
 
 (36) BroadcastHashJoin [codegen id : 25]
@@ -669,360 +671,368 @@ Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#9)), sum(UnscaledV
 Aggregate Attributes [4]: [count(1)#99, sum(UnscaledValue(ss_wholesale_cost#9))#100, sum(UnscaledValue(ss_list_price#10))#101, sum(UnscaledValue(ss_coupon_amt#11))#102]
 Results [17]: [i_product_name#89 AS product_name#103, i_item_sk#86 AS item_sk#104, s_store_name#45 AS store_name#105, s_zip#46 AS store_zip#106, ca_street_number#73 AS b_street_number#107, ca_street_name#74 AS b_streen_name#108, ca_city#75 AS b_city#109, ca_zip#76 AS b_zip#110, ca_street_number#79 AS c_street_number#111, ca_street_name#80 AS c_street_name#112, ca_city#81 AS c_city#113, ca_zip#82 AS c_zip#114, d_year#43 AS syear#115, count(1)#99 AS cnt#116, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#9))#100,17,2) AS s1#117, MakeDecimal(sum(UnscaledValue(ss_list_price#10))#101,17,2) AS s2#118, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#11))#102,17,2) AS s3#119]
 
-(110) Sort [codegen id : 25]
+(110) Exchange
+Input [17]: [product_name#103, item_sk#104, store_name#105, store_zip#106, b_street_number#107, b_streen_name#108, b_city#109, b_zip#110, c_street_number#111, c_street_name#112, c_city#113, c_zip#114, syear#115, cnt#116, s1#117, s2#118, s3#119]
+Arguments: hashpartitioning(item_sk#104, store_name#105, store_zip#106, 5), ENSURE_REQUIREMENTS, [id=#120]
+
+(111) Sort [codegen id : 26]
 Input [17]: [product_name#103, item_sk#104, store_name#105, store_zip#106, b_street_number#107, b_streen_name#108, b_city#109, b_zip#110, c_street_number#111, c_street_name#112, c_city#113, c_zip#114, syear#115, cnt#116, s1#117, s2#118, s3#119]
 Arguments: [item_sk#104 ASC NULLS FIRST, store_name#105 ASC NULLS FIRST, store_zip#106 ASC NULLS FIRST], false, 0
 
-(111) Scan parquet default.store_sales
-Output [12]: [ss_item_sk#120, ss_customer_sk#121, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_store_sk#125, ss_promo_sk#126, ss_ticket_number#127, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, ss_sold_date_sk#131]
+(112) Scan parquet default.store_sales
+Output [12]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_ticket_number#128, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#131), dynamicpruningexpression(ss_sold_date_sk#131 IN dynamicpruning#132)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#132), dynamicpruningexpression(ss_sold_date_sk#132 IN dynamicpruning#133)]
 PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_customer_sk:int,ss_cdemo_sk:int,ss_hdemo_sk:int,ss_addr_sk:int,ss_store_sk:int,ss_promo_sk:int,ss_ticket_number:int,ss_wholesale_cost:decimal(7,2),ss_list_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
-(112) ColumnarToRow [codegen id : 26]
-Input [12]: [ss_item_sk#120, ss_customer_sk#121, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_store_sk#125, ss_promo_sk#126, ss_ticket_number#127, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, ss_sold_date_sk#131]
+(113) ColumnarToRow [codegen id : 27]
+Input [12]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_ticket_number#128, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132]
 
-(113) Filter [codegen id : 26]
-Input [12]: [ss_item_sk#120, ss_customer_sk#121, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_store_sk#125, ss_promo_sk#126, ss_ticket_number#127, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, ss_sold_date_sk#131]
-Condition : (((((((isnotnull(ss_item_sk#120) AND isnotnull(ss_ticket_number#127)) AND isnotnull(ss_store_sk#125)) AND isnotnull(ss_customer_sk#121)) AND isnotnull(ss_cdemo_sk#122)) AND isnotnull(ss_promo_sk#126)) AND isnotnull(ss_hdemo_sk#123)) AND isnotnull(ss_addr_sk#124))
+(114) Filter [codegen id : 27]
+Input [12]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_ticket_number#128, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132]
+Condition : (((((((isnotnull(ss_item_sk#121) AND isnotnull(ss_ticket_number#128)) AND isnotnull(ss_store_sk#126)) AND isnotnull(ss_customer_sk#122)) AND isnotnull(ss_cdemo_sk#123)) AND isnotnull(ss_promo_sk#127)) AND isnotnull(ss_hdemo_sk#124)) AND isnotnull(ss_addr_sk#125))
 
-(114) BroadcastExchange
-Input [12]: [ss_item_sk#120, ss_customer_sk#121, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_store_sk#125, ss_promo_sk#126, ss_ticket_number#127, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, ss_sold_date_sk#131]
-Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[7, int, false] as bigint) & 4294967295))),false), [id=#133]
+(115) BroadcastExchange
+Input [12]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_ticket_number#128, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132]
+Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[7, int, false] as bigint) & 4294967295))),false), [id=#134]
 
-(115) Scan parquet default.store_returns
-Output [3]: [sr_item_sk#134, sr_ticket_number#135, sr_returned_date_sk#136]
+(116) Scan parquet default.store_returns
+Output [3]: [sr_item_sk#135, sr_ticket_number#136, sr_returned_date_sk#137]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
 PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)]
 ReadSchema: struct<sr_item_sk:int,sr_ticket_number:int>
 
-(116) ColumnarToRow
-Input [3]: [sr_item_sk#134, sr_ticket_number#135, sr_returned_date_sk#136]
+(117) ColumnarToRow
+Input [3]: [sr_item_sk#135, sr_ticket_number#136, sr_returned_date_sk#137]
 
-(117) Filter
-Input [3]: [sr_item_sk#134, sr_ticket_number#135, sr_returned_date_sk#136]
-Condition : (isnotnull(sr_item_sk#134) AND isnotnull(sr_ticket_number#135))
+(118) Filter
+Input [3]: [sr_item_sk#135, sr_ticket_number#136, sr_returned_date_sk#137]
+Condition : (isnotnull(sr_item_sk#135) AND isnotnull(sr_ticket_number#136))
 
-(118) Project
-Output [2]: [sr_item_sk#134, sr_ticket_number#135]
-Input [3]: [sr_item_sk#134, sr_ticket_number#135, sr_returned_date_sk#136]
+(119) Project
+Output [2]: [sr_item_sk#135, sr_ticket_number#136]
+Input [3]: [sr_item_sk#135, sr_ticket_number#136, sr_returned_date_sk#137]
 
-(119) BroadcastHashJoin [codegen id : 27]
-Left keys [2]: [ss_item_sk#120, ss_ticket_number#127]
-Right keys [2]: [sr_item_sk#134, sr_ticket_number#135]
+(120) BroadcastHashJoin [codegen id : 28]
+Left keys [2]: [ss_item_sk#121, ss_ticket_number#128]
+Right keys [2]: [sr_item_sk#135, sr_ticket_number#136]
 Join condition: None
 
-(120) Project [codegen id : 27]
-Output [11]: [ss_item_sk#120, ss_customer_sk#121, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_store_sk#125, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, ss_sold_date_sk#131]
-Input [14]: [ss_item_sk#120, ss_customer_sk#121, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_store_sk#125, ss_promo_sk#126, ss_ticket_number#127, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, ss_sold_date_sk#131, sr_item_sk#134, sr_ticket_number#135]
+(121) Project [codegen id : 28]
+Output [11]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132]
+Input [14]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_ticket_number#128, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132, sr_item_sk#135, sr_ticket_number#136]
 
-(121) Exchange
-Input [11]: [ss_item_sk#120, ss_customer_sk#121, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_store_sk#125, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, ss_sold_date_sk#131]
-Arguments: hashpartitioning(ss_item_sk#120, 5), ENSURE_REQUIREMENTS, [id=#137]
+(122) Exchange
+Input [11]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132]
+Arguments: hashpartitioning(ss_item_sk#121, 5), ENSURE_REQUIREMENTS, [id=#138]
 
-(122) Sort [codegen id : 28]
-Input [11]: [ss_item_sk#120, ss_customer_sk#121, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_store_sk#125, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, ss_sold_date_sk#131]
-Arguments: [ss_item_sk#120 ASC NULLS FIRST], false, 0
+(123) Sort [codegen id : 29]
+Input [11]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132]
+Arguments: [ss_item_sk#121 ASC NULLS FIRST], false, 0
 
-(123) ReusedExchange [Reuses operator id: 28]
-Output [4]: [cs_item_sk#138, sum#139, sum#140, isEmpty#141]
+(124) ReusedExchange [Reuses operator id: 28]
+Output [4]: [cs_item_sk#139, sum#140, sum#141, isEmpty#142]
 
-(124) HashAggregate [codegen id : 34]
-Input [4]: [cs_item_sk#138, sum#139, sum#140, isEmpty#141]
-Keys [1]: [cs_item_sk#138]
-Functions [2]: [sum(UnscaledValue(cs_ext_list_price#142)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#143 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#144 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#145 as decimal(9,2)))), DecimalType(9,2), true))]
-Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#142))#38, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#143 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#144 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#145 as decimal(9,2)))), DecimalType(9,2), true))#39]
-Results [3]: [cs_item_sk#138, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#142))#38,17,2) AS sale#40, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#143 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#144 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#145 as decimal(9,2)))), DecimalType(9,2), true))#39 AS refund#41]
+(125) HashAggregate [codegen id : 35]
+Input [4]: [cs_item_sk#139, sum#140, sum#141, isEmpty#142]
+Keys [1]: [cs_item_sk#139]
+Functions [2]: [sum(UnscaledValue(cs_ext_list_price#143)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#144 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#145 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#146 as decimal(9,2)))), DecimalType(9,2), true))]
+Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#143))#38, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#144 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#145 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#146 as decimal(9,2)))), DecimalType(9,2), true))#39]
+Results [3]: [cs_item_sk#139, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#143))#38,17,2) AS sale#40, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#144 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#145 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#146 as decimal(9,2)))), DecimalType(9,2), true))#39 AS refund#41]
 
-(125) Filter [codegen id : 34]
-Input [3]: [cs_item_sk#138, sale#40, refund#41]
+(126) Filter [codegen id : 35]
+Input [3]: [cs_item_sk#139, sale#40, refund#41]
 Condition : (isnotnull(sale#40) AND (cast(sale#40 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(refund#41)), DecimalType(21,2), true)))
 
-(126) Project [codegen id : 34]
-Output [1]: [cs_item_sk#138]
-Input [3]: [cs_item_sk#138, sale#40, refund#41]
+(127) Project [codegen id : 35]
+Output [1]: [cs_item_sk#139]
+Input [3]: [cs_item_sk#139, sale#40, refund#41]
 
-(127) Sort [codegen id : 34]
-Input [1]: [cs_item_sk#138]
-Arguments: [cs_item_sk#138 ASC NULLS FIRST], false, 0
+(128) Sort [codegen id : 35]
+Input [1]: [cs_item_sk#139]
+Arguments: [cs_item_sk#139 ASC NULLS FIRST], false, 0
 
-(128) SortMergeJoin [codegen id : 50]
-Left keys [1]: [ss_item_sk#120]
-Right keys [1]: [cs_item_sk#138]
+(129) SortMergeJoin [codegen id : 51]
+Left keys [1]: [ss_item_sk#121]
+Right keys [1]: [cs_item_sk#139]
 Join condition: None
 
-(129) Project [codegen id : 50]
-Output [11]: [ss_item_sk#120, ss_customer_sk#121, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_store_sk#125, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, ss_sold_date_sk#131]
-Input [12]: [ss_item_sk#120, ss_customer_sk#121, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_store_sk#125, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, ss_sold_date_sk#131, cs_item_sk#138]
+(130) Project [codegen id : 51]
+Output [11]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132]
+Input [12]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132, cs_item_sk#139]
 
-(130) ReusedExchange [Reuses operator id: 189]
-Output [2]: [d_date_sk#146, d_year#147]
+(131) ReusedExchange [Reuses operator id: 191]
+Output [2]: [d_date_sk#147, d_year#148]
 
-(131) BroadcastHashJoin [codegen id : 50]
-Left keys [1]: [ss_sold_date_sk#131]
-Right keys [1]: [d_date_sk#146]
+(132) BroadcastHashJoin [codegen id : 51]
+Left keys [1]: [ss_sold_date_sk#132]
+Right keys [1]: [d_date_sk#147]
 Join condition: None
 
-(132) Project [codegen id : 50]
-Output [11]: [ss_item_sk#120, ss_customer_sk#121, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_store_sk#125, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147]
-Input [13]: [ss_item_sk#120, ss_customer_sk#121, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_store_sk#125, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, ss_sold_date_sk#131, d_date_sk#146, d_year#147]
+(133) Project [codegen id : 51]
+Output [11]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148]
+Input [13]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132, d_date_sk#147, d_year#148]
 
-(133) ReusedExchange [Reuses operator id: 41]
-Output [3]: [s_store_sk#148, s_store_name#149, s_zip#150]
+(134) ReusedExchange [Reuses operator id: 41]
+Output [3]: [s_store_sk#149, s_store_name#150, s_zip#151]
 
-(134) BroadcastHashJoin [codegen id : 50]
-Left keys [1]: [ss_store_sk#125]
-Right keys [1]: [s_store_sk#148]
+(135) BroadcastHashJoin [codegen id : 51]
+Left keys [1]: [ss_store_sk#126]
+Right keys [1]: [s_store_sk#149]
 Join condition: None
 
-(135) Project [codegen id : 50]
-Output [12]: [ss_item_sk#120, ss_customer_sk#121, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150]
-Input [14]: [ss_item_sk#120, ss_customer_sk#121, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_store_sk#125, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_sk#148, s_store_name#149, s_zip#150]
+(136) Project [codegen id : 51]
+Output [12]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151]
+Input [14]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_sk#149, s_store_name#150, s_zip#151]
 
-(136) ReusedExchange [Reuses operator id: 47]
-Output [6]: [c_customer_sk#151, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, c_first_shipto_date_sk#155, c_first_sales_date_sk#156]
+(137) ReusedExchange [Reuses operator id: 47]
+Output [6]: [c_customer_sk#152, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, c_first_shipto_date_sk#156, c_first_sales_date_sk#157]
 
-(137) BroadcastHashJoin [codegen id : 50]
-Left keys [1]: [ss_customer_sk#121]
-Right keys [1]: [c_customer_sk#151]
+(138) BroadcastHashJoin [codegen id : 51]
+Left keys [1]: [ss_customer_sk#122]
+Right keys [1]: [c_customer_sk#152]
 Join condition: None
 
-(138) Project [codegen id : 50]
-Output [16]: [ss_item_sk#120, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, c_first_shipto_date_sk#155, c_first_sales_date_sk#156]
-Input [18]: [ss_item_sk#120, ss_customer_sk#121, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, c_customer_sk#151, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, c_first_shipto_date_sk#155, c_first_sales_date_sk#156]
+(139) Project [codegen id : 51]
+Output [16]: [ss_item_sk#121, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, c_first_shipto_date_sk#156, c_first_sales_date_sk#157]
+Input [18]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_customer_sk#152, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, c_first_shipto_date_sk#156, c_first_sales_date_sk#157]
 
-(139) ReusedExchange [Reuses operator id: 53]
-Output [2]: [d_date_sk#157, d_year#158]
+(140) ReusedExchange [Reuses operator id: 53]
+Output [2]: [d_date_sk#158, d_year#159]
 
-(140) BroadcastHashJoin [codegen id : 50]
-Left keys [1]: [c_first_sales_date_sk#156]
-Right keys [1]: [d_date_sk#157]
+(141) BroadcastHashJoin [codegen id : 51]
+Left keys [1]: [c_first_sales_date_sk#157]
+Right keys [1]: [d_date_sk#158]
 Join condition: None
 
-(141) Project [codegen id : 50]
-Output [16]: [ss_item_sk#120, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, c_first_shipto_date_sk#155, d_year#158]
-Input [18]: [ss_item_sk#120, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, c_first_shipto_date_sk#155, c_first_sales_date_sk#156, d_date_sk#157, d_year#158]
+(142) Project [codegen id : 51]
+Output [16]: [ss_item_sk#121, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, c_first_shipto_date_sk#156, d_year#159]
+Input [18]: [ss_item_sk#121, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, c_first_shipto_date_sk#156, c_first_sales_date_sk#157, d_date_sk#158, d_year#159]
 
-(142) ReusedExchange [Reuses operator id: 53]
-Output [2]: [d_date_sk#159, d_year#160]
+(143) ReusedExchange [Reuses operator id: 53]
+Output [2]: [d_date_sk#160, d_year#161]
 
-(143) BroadcastHashJoin [codegen id : 50]
-Left keys [1]: [c_first_shipto_date_sk#155]
-Right keys [1]: [d_date_sk#159]
+(144) BroadcastHashJoin [codegen id : 51]
+Left keys [1]: [c_first_shipto_date_sk#156]
+Right keys [1]: [d_date_sk#160]
 Join condition: None
 
-(144) Project [codegen id : 50]
-Output [16]: [ss_item_sk#120, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160]
-Input [18]: [ss_item_sk#120, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, c_first_shipto_date_sk#155, d_year#158, d_date_sk#159, d_year#160]
+(145) Project [codegen id : 51]
+Output [16]: [ss_item_sk#121, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161]
+Input [18]: [ss_item_sk#121, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, c_first_shipto_date_sk#156, d_year#159, d_date_sk#160, d_year#161]
 
-(145) ReusedExchange [Reuses operator id: 62]
-Output [2]: [cd_demo_sk#161, cd_marital_status#162]
+(146) ReusedExchange [Reuses operator id: 62]
+Output [2]: [cd_demo_sk#162, cd_marital_status#163]
 
-(146) BroadcastHashJoin [codegen id : 50]
-Left keys [1]: [ss_cdemo_sk#122]
-Right keys [1]: [cd_demo_sk#161]
+(147) BroadcastHashJoin [codegen id : 51]
+Left keys [1]: [ss_cdemo_sk#123]
+Right keys [1]: [cd_demo_sk#162]
 Join condition: None
 
-(147) Project [codegen id : 50]
-Output [16]: [ss_item_sk#120, ss_hdemo_sk#123, ss_addr_sk#124, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160, cd_marital_status#162]
-Input [18]: [ss_item_sk#120, ss_cdemo_sk#122, ss_hdemo_sk#123, ss_addr_sk#124, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160, cd_demo_sk#161, cd_marital_status#162]
+(148) Project [codegen id : 51]
+Output [16]: [ss_item_sk#121, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161, cd_marital_status#163]
+Input [18]: [ss_item_sk#121, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161, cd_demo_sk#162, cd_marital_status#163]
 
-(148) ReusedExchange [Reuses operator id: 62]
-Output [2]: [cd_demo_sk#163, cd_marital_status#164]
+(149) ReusedExchange [Reuses operator id: 62]
+Output [2]: [cd_demo_sk#164, cd_marital_status#165]
 
-(149) BroadcastHashJoin [codegen id : 50]
-Left keys [1]: [c_current_cdemo_sk#152]
-Right keys [1]: [cd_demo_sk#163]
-Join condition: NOT (cd_marital_status#162 = cd_marital_status#164)
+(150) BroadcastHashJoin [codegen id : 51]
+Left keys [1]: [c_current_cdemo_sk#153]
+Right keys [1]: [cd_demo_sk#164]
+Join condition: NOT (cd_marital_status#163 = cd_marital_status#165)
 
-(150) Project [codegen id : 50]
-Output [14]: [ss_item_sk#120, ss_hdemo_sk#123, ss_addr_sk#124, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160]
-Input [18]: [ss_item_sk#120, ss_hdemo_sk#123, ss_addr_sk#124, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160, cd_marital_status#162, cd_demo_sk#163, cd_marital_status#164]
+(151) Project [codegen id : 51]
+Output [14]: [ss_item_sk#121, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161]
+Input [18]: [ss_item_sk#121, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161, cd_marital_status#163, cd_demo_sk#164, cd_marital_status#165]
 
-(151) ReusedExchange [Reuses operator id: 71]
-Output [1]: [p_promo_sk#165]
+(152) ReusedExchange [Reuses operator id: 71]
+Output [1]: [p_promo_sk#166]
 
-(152) BroadcastHashJoin [codegen id : 50]
-Left keys [1]: [ss_promo_sk#126]
-Right keys [1]: [p_promo_sk#165]
+(153) BroadcastHashJoin [codegen id : 51]
+Left keys [1]: [ss_promo_sk#127]
+Right keys [1]: [p_promo_sk#166]
 Join condition: None
 
-(153) Project [codegen id : 50]
-Output [13]: [ss_item_sk#120, ss_hdemo_sk#123, ss_addr_sk#124, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160]
-Input [15]: [ss_item_sk#120, ss_hdemo_sk#123, ss_addr_sk#124, ss_promo_sk#126, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160, p_promo_sk#165]
+(154) Project [codegen id : 51]
+Output [13]: [ss_item_sk#121, ss_hdemo_sk#124, ss_addr_sk#125, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161]
+Input [15]: [ss_item_sk#121, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161, p_promo_sk#166]
 
-(154) ReusedExchange [Reuses operator id: 77]
-Output [2]: [hd_demo_sk#166, hd_income_band_sk#167]
+(155) ReusedExchange [Reuses operator id: 77]
+Output [2]: [hd_demo_sk#167, hd_income_band_sk#168]
 
-(155) BroadcastHashJoin [codegen id : 50]
-Left keys [1]: [ss_hdemo_sk#123]
-Right keys [1]: [hd_demo_sk#166]
+(156) BroadcastHashJoin [codegen id : 51]
+Left keys [1]: [ss_hdemo_sk#124]
+Right keys [1]: [hd_demo_sk#167]
 Join condition: None
 
-(156) Project [codegen id : 50]
-Output [13]: [ss_item_sk#120, ss_addr_sk#124, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160, hd_income_band_sk#167]
-Input [15]: [ss_item_sk#120, ss_hdemo_sk#123, ss_addr_sk#124, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160, hd_demo_sk#166, hd_income_band_sk#167]
+(157) Project [codegen id : 51]
+Output [13]: [ss_item_sk#121, ss_addr_sk#125, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161, hd_income_band_sk#168]
+Input [15]: [ss_item_sk#121, ss_hdemo_sk#124, ss_addr_sk#125, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161, hd_demo_sk#167, hd_income_band_sk#168]
 
-(157) ReusedExchange [Reuses operator id: 77]
-Output [2]: [hd_demo_sk#168, hd_income_band_sk#169]
+(158) ReusedExchange [Reuses operator id: 77]
+Output [2]: [hd_demo_sk#169, hd_income_band_sk#170]
 
-(158) BroadcastHashJoin [codegen id : 50]
-Left keys [1]: [c_current_hdemo_sk#153]
-Right keys [1]: [hd_demo_sk#168]
+(159) BroadcastHashJoin [codegen id : 51]
+Left keys [1]: [c_current_hdemo_sk#154]
+Right keys [1]: [hd_demo_sk#169]
 Join condition: None
 
-(159) Project [codegen id : 50]
-Output [13]: [ss_item_sk#120, ss_addr_sk#124, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, c_current_addr_sk#154, d_year#158, d_year#160, hd_income_band_sk#167, hd_income_band_sk#169]
-Input [15]: [ss_item_sk#120, ss_addr_sk#124, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160, hd_income_band_sk#167, hd_demo_sk#168, hd_income_band_sk#169]
+(160) Project [codegen id : 51]
+Output [13]: [ss_item_sk#121, ss_addr_sk#125, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_addr_sk#155, d_year#159, d_year#161, hd_income_band_sk#168, hd_income_band_sk#170]
+Input [15]: [ss_item_sk#121, ss_addr_sk#125, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161, hd_income_band_sk#168, hd_demo_sk#169, hd_income_band_sk#170]
 
-(160) ReusedExchange [Reuses operator id: 86]
-Output [5]: [ca_address_sk#170, ca_street_number#171, ca_street_name#172, ca_city#173, ca_zip#174]
+(161) ReusedExchange [Reuses operator id: 86]
+Output [5]: [ca_address_sk#171, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175]
 
-(161) BroadcastHashJoin [codegen id : 50]
-Left keys [1]: [ss_addr_sk#124]
-Right keys [1]: [ca_address_sk#170]
+(162) BroadcastHashJoin [codegen id : 51]
+Left keys [1]: [ss_addr_sk#125]
+Right keys [1]: [ca_address_sk#171]
 Join condition: None
 
-(162) Project [codegen id : 50]
-Output [16]: [ss_item_sk#120, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, c_current_addr_sk#154, d_year#158, d_year#160, hd_income_band_sk#167, hd_income_band_sk#169, ca_street_number#171, ca_street_name#172, ca_city#173, ca_zip#174]
-Input [18]: [ss_item_sk#120, ss_addr_sk#124, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, c_current_addr_sk#154, d_year#158, d_year#160, hd_income_band_sk#167, hd_income_band_sk#169, ca_address_sk#170, ca_street_number#171, ca_street_name#172, ca_city#173, ca_zip#174]
+(163) Project [codegen id : 51]
+Output [16]: [ss_item_sk#121, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_addr_sk#155, d_year#159, d_year#161, hd_income_band_sk#168, hd_income_band_sk#170, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175]
+Input [18]: [ss_item_sk#121, ss_addr_sk#125, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_addr_sk#155, d_year#159, d_year#161, hd_income_band_sk#168, hd_income_band_sk#170, ca_address_sk#171, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175]
 
-(163) ReusedExchange [Reuses operator id: 86]
-Output [5]: [ca_address_sk#175, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179]
+(164) ReusedExchange [Reuses operator id: 86]
+Output [5]: [ca_address_sk#176, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180]
 
-(164) BroadcastHashJoin [codegen id : 50]
-Left keys [1]: [c_current_addr_sk#154]
-Right keys [1]: [ca_address_sk#175]
+(165) BroadcastHashJoin [codegen id : 51]
+Left keys [1]: [c_current_addr_sk#155]
+Right keys [1]: [ca_address_sk#176]
 Join condition: None
 
-(165) Project [codegen id : 50]
-Output [19]: [ss_item_sk#120, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, d_year#158, d_year#160, hd_income_band_sk#167, hd_income_band_sk#169, ca_street_number#171, ca_street_name#172, ca_city#173, ca_zip#174, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179]
-Input [21]: [ss_item_sk#120, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, c_current_addr_sk#154, d_year#158, d_year#160, hd_income_band_sk#167, hd_income_band_sk#169, ca_street_number#171, ca_street_name#172, ca_city#173, ca_zip#174, ca_address_sk#175, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179]
+(166) Project [codegen id : 51]
+Output [19]: [ss_item_sk#121, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, d_year#159, d_year#161, hd_income_band_sk#168, hd_income_band_sk#170, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180]
+Input [21]: [ss_item_sk#121, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_addr_sk#155, d_year#159, d_year#161, hd_income_band_sk#168, hd_income_band_sk#170, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_address_sk#176, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180]
 
-(166) ReusedExchange [Reuses operator id: 95]
-Output [1]: [ib_income_band_sk#180]
+(167) ReusedExchange [Reuses operator id: 95]
+Output [1]: [ib_income_band_sk#181]
 
-(167) BroadcastHashJoin [codegen id : 50]
-Left keys [1]: [hd_income_band_sk#167]
-Right keys [1]: [ib_income_band_sk#180]
+(168) BroadcastHashJoin [codegen id : 51]
+Left keys [1]: [hd_income_band_sk#168]
+Right keys [1]: [ib_income_band_sk#181]
 Join condition: None
 
-(168) Project [codegen id : 50]
-Output [18]: [ss_item_sk#120, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, d_year#158, d_year#160, hd_income_band_sk#169, ca_street_number#171, ca_street_name#172, ca_city#173, ca_zip#174, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179]
-Input [20]: [ss_item_sk#120, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, d_year#158, d_year#160, hd_income_band_sk#167, hd_income_band_sk#169, ca_street_number#171, ca_street_name#172, ca_city#173, ca_zip#174, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, ib_income_band_sk#180]
+(169) Project [codegen id : 51]
+Output [18]: [ss_item_sk#121, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, d_year#159, d_year#161, hd_income_band_sk#170, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180]
+Input [20]: [ss_item_sk#121, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, d_year#159, d_year#161, hd_income_band_sk#168, hd_income_band_sk#170, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180, ib_income_band_sk#181]
 
-(169) ReusedExchange [Reuses operator id: 95]
-Output [1]: [ib_income_band_sk#181]
+(170) ReusedExchange [Reuses operator id: 95]
+Output [1]: [ib_income_band_sk#182]
 
-(170) BroadcastHashJoin [codegen id : 50]
-Left keys [1]: [hd_income_band_sk#169]
-Right keys [1]: [ib_income_band_sk#181]
+(171) BroadcastHashJoin [codegen id : 51]
+Left keys [1]: [hd_income_band_sk#170]
+Right keys [1]: [ib_income_band_sk#182]
 Join condition: None
 
-(171) Project [codegen id : 50]
-Output [17]: [ss_item_sk#120, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, d_year#158, d_year#160, ca_street_number#171, ca_street_name#172, ca_city#173, ca_zip#174, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179]
-Input [19]: [ss_item_sk#120, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, d_year#158, d_year#160, hd_income_band_sk#169, ca_street_number#171, ca_street_name#172, ca_city#173, ca_zip#174, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, ib_income_band_sk#181]
+(172) Project [codegen id : 51]
+Output [17]: [ss_item_sk#121, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, d_year#159, d_year#161, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180]
+Input [19]: [ss_item_sk#121, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, d_year#159, d_year#161, hd_income_band_sk#170, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180, ib_income_band_sk#182]
 
-(172) ReusedExchange [Reuses operator id: 105]
-Output [2]: [i_item_sk#182, i_product_name#183]
+(173) ReusedExchange [Reuses operator id: 105]
+Output [2]: [i_item_sk#183, i_product_name#184]
 
-(173) BroadcastHashJoin [codegen id : 50]
-Left keys [1]: [ss_item_sk#120]
-Right keys [1]: [i_item_sk#182]
+(174) BroadcastHashJoin [codegen id : 51]
+Left keys [1]: [ss_item_sk#121]
+Right keys [1]: [i_item_sk#183]
 Join condition: None
 
-(174) Project [codegen id : 50]
-Output [18]: [ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, d_year#158, d_year#160, s_store_name#149, s_zip#150, ca_street_number#171, ca_street_name#172, ca_city#173, ca_zip#174, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, i_item_sk#182, i_product_name#183]
-Input [19]: [ss_item_sk#120, ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, s_store_name#149, s_zip#150, d_year#158, d_year#160, ca_street_number#171, ca_street_name#172, ca_city#173, ca_zip#174, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, i_item_sk#182, i_product_name#183]
-
-(175) HashAggregate [codegen id : 50]
-Input [18]: [ss_wholesale_cost#128, ss_list_price#129, ss_coupon_amt#130, d_year#147, d_year#158, d_year#160, s_store_name#149, s_zip#150, ca_street_number#171, ca_street_name#172, ca_city#173, ca_zip#174, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, i_item_sk#182, i_product_name#183]
-Keys [15]: [i_product_name#183, i_item_sk#182, s_store_name#149, s_zip#150, ca_street_number#171, ca_street_name#172, ca_city#173, ca_zip#174, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, d_year#147, d_year#158, d_year#160]
-Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#128)), partial_sum(UnscaledValue(ss_list_price#129)), partial_sum(UnscaledValue(ss_coupon_amt#130))]
-Aggregate Attributes [4]: [count#91, sum#184, sum#185, sum#186]
-Results [19]: [i_product_name#183, i_item_sk#182, s_store_name#149, s_zip#150, ca_street_number#171, ca_street_name#172, ca_city#173, ca_zip#174, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, d_year#147, d_year#158, d_year#160, count#95, sum#187, sum#188, sum#189]
-
-(176) HashAggregate [codegen id : 50]
-Input [19]: [i_product_name#183, i_item_sk#182, s_store_name#149, s_zip#150, ca_street_number#171, ca_street_name#172, ca_city#173, ca_zip#174, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, d_year#147, d_year#158, d_year#160, count#95, sum#187, sum#188, sum#189]
-Keys [15]: [i_product_name#183, i_item_sk#182, s_store_name#149, s_zip#150, ca_street_number#171, ca_street_name#172, ca_city#173, ca_zip#174, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, d_year#147, d_year#158, d_year#160]
-Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#128)), sum(UnscaledValue(ss_list_price#129)), sum(UnscaledValue(ss_coupon_amt#130))]
-Aggregate Attributes [4]: [count(1)#99, sum(UnscaledValue(ss_wholesale_cost#128))#100, sum(UnscaledValue(ss_list_price#129))#101, sum(UnscaledValue(ss_coupon_amt#130))#102]
-Results [8]: [i_item_sk#182 AS item_sk#190, s_store_name#149 AS store_name#191, s_zip#150 AS store_zip#192, d_year#147 AS syear#193, count(1)#99 AS cnt#194, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#128))#100,17,2) AS s1#195, MakeDecimal(sum(UnscaledValue(ss_list_price#129))#101,17,2) AS s2#196, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#130))#102,17,2) AS s3#197]
-
-(177) Sort [codegen id : 50]
-Input [8]: [item_sk#190, store_name#191, store_zip#192, syear#193, cnt#194, s1#195, s2#196, s3#197]
-Arguments: [item_sk#190 ASC NULLS FIRST, store_name#191 ASC NULLS FIRST, store_zip#192 ASC NULLS FIRST], false, 0
-
-(178) SortMergeJoin [codegen id : 51]
+(175) Project [codegen id : 51]
+Output [18]: [ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, d_year#159, d_year#161, s_store_name#150, s_zip#151, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180, i_item_sk#183, i_product_name#184]
+Input [19]: [ss_item_sk#121, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, d_year#159, d_year#161, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180, i_item_sk#183, i_product_name#184]
+
+(176) HashAggregate [codegen id : 51]
+Input [18]: [ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, d_year#159, d_year#161, s_store_name#150, s_zip#151, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180, i_item_sk#183, i_product_name#184]
+Keys [15]: [i_product_name#184, i_item_sk#183, s_store_name#150, s_zip#151, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180, d_year#148, d_year#159, d_year#161]
+Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#129)), partial_sum(UnscaledValue(ss_list_price#130)), partial_sum(UnscaledValue(ss_coupon_amt#131))]
+Aggregate Attributes [4]: [count#91, sum#185, sum#186, sum#187]
+Results [19]: [i_product_name#184, i_item_sk#183, s_store_name#150, s_zip#151, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180, d_year#148, d_year#159, d_year#161, count#95, sum#188, sum#189, sum#190]
+
+(177) HashAggregate [codegen id : 51]
+Input [19]: [i_product_name#184, i_item_sk#183, s_store_name#150, s_zip#151, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180, d_year#148, d_year#159, d_year#161, count#95, sum#188, sum#189, sum#190]
+Keys [15]: [i_product_name#184, i_item_sk#183, s_store_name#150, s_zip#151, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180, d_year#148, d_year#159, d_year#161]
+Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#129)), sum(UnscaledValue(ss_list_price#130)), sum(UnscaledValue(ss_coupon_amt#131))]
+Aggregate Attributes [4]: [count(1)#99, sum(UnscaledValue(ss_wholesale_cost#129))#100, sum(UnscaledValue(ss_list_price#130))#101, sum(UnscaledValue(ss_coupon_amt#131))#102]
+Results [8]: [i_item_sk#183 AS item_sk#191, s_store_name#150 AS store_name#192, s_zip#151 AS store_zip#193, d_year#148 AS syear#194, count(1)#99 AS cnt#195, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#129))#100,17,2) AS s1#196, MakeDecimal(sum(UnscaledValue(ss_list_price#130))#101,17,2) AS s2#197, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#131))#102,17,2) AS s3#198]
+
+(178) Exchange
+Input [8]: [item_sk#191, store_name#192, store_zip#193, syear#194, cnt#195, s1#196, s2#197, s3#198]
+Arguments: hashpartitioning(item_sk#191, store_name#192, store_zip#193, 5), ENSURE_REQUIREMENTS, [id=#199]
+
+(179) Sort [codegen id : 52]
+Input [8]: [item_sk#191, store_name#192, store_zip#193, syear#194, cnt#195, s1#196, s2#197, s3#198]
+Arguments: [item_sk#191 ASC NULLS FIRST, store_name#192 ASC NULLS FIRST, store_zip#193 ASC NULLS FIRST], false, 0
+
+(180) SortMergeJoin [codegen id : 53]
 Left keys [3]: [item_sk#104, store_name#105, store_zip#106]
-Right keys [3]: [item_sk#190, store_name#191, store_zip#192]
-Join condition: (cnt#194 <= cnt#116)
+Right keys [3]: [item_sk#191, store_name#192, store_zip#193]
+Join condition: (cnt#195 <= cnt#116)
 
-(179) Project [codegen id : 51]
-Output [21]: [product_name#103, store_name#105, store_zip#106, b_street_number#107, b_streen_name#108, b_city#109, b_zip#110, c_street_number#111, c_street_name#112, c_city#113, c_zip#114, syear#115, cnt#116, s1#117, s2#118, s3#119, s1#195, s2#196, s3#197, syear#193, cnt#194]
-Input [25]: [product_name#103, item_sk#104, store_name#105, store_zip#106, b_street_number#107, b_streen_name#108, b_city#109, b_zip#110, c_street_number#111, c_street_name#112, c_city#113, c_zip#114, syear#115, cnt#116, s1#117, s2#118, s3#119, item_sk#190, store_name#191, store_zip#192, syear#193, cnt#194, s1#195, s2#196, s3#197]
+(181) Project [codegen id : 53]
+Output [21]: [product_name#103, store_name#105, store_zip#106, b_street_number#107, b_streen_name#108, b_city#109, b_zip#110, c_street_number#111, c_street_name#112, c_city#113, c_zip#114, syear#115, cnt#116, s1#117, s2#118, s3#119, s1#196, s2#197, s3#198, syear#194, cnt#195]
+Input [25]: [product_name#103, item_sk#104, store_name#105, store_zip#106, b_street_number#107, b_streen_name#108, b_city#109, b_zip#110, c_street_number#111, c_street_name#112, c_city#113, c_zip#114, syear#115, cnt#116, s1#117, s2#118, s3#119, item_sk#191, store_name#192, store_zip#193, syear#194, cnt#195, s1#196, s2#197, s3#198]
 
-(180) Exchange
-Input [21]: [product_name#103, store_name#105, store_zip#106, b_street_number#107, b_streen_name#108, b_city#109, b_zip#110, c_street_number#111, c_street_name#112, c_city#113, c_zip#114, syear#115, cnt#116, s1#117, s2#118, s3#119, s1#195, s2#196, s3#197, syear#193, cnt#194]
-Arguments: rangepartitioning(product_name#103 ASC NULLS FIRST, store_name#105 ASC NULLS FIRST, cnt#194 ASC NULLS FIRST, s1#117 ASC NULLS FIRST, s1#195 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#198]
+(182) Exchange
+Input [21]: [product_name#103, store_name#105, store_zip#106, b_street_number#107, b_streen_name#108, b_city#109, b_zip#110, c_street_number#111, c_street_name#112, c_city#113, c_zip#114, syear#115, cnt#116, s1#117, s2#118, s3#119, s1#196, s2#197, s3#198, syear#194, cnt#195]
+Arguments: rangepartitioning(product_name#103 ASC NULLS FIRST, store_name#105 ASC NULLS FIRST, cnt#195 ASC NULLS FIRST, s1#117 ASC NULLS FIRST, s1#196 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#200]
 
-(181) Sort [codegen id : 52]
-Input [21]: [product_name#103, store_name#105, store_zip#106, b_street_number#107, b_streen_name#108, b_city#109, b_zip#110, c_street_number#111, c_street_name#112, c_city#113, c_zip#114, syear#115, cnt#116, s1#117, s2#118, s3#119, s1#195, s2#196, s3#197, syear#193, cnt#194]
-Arguments: [product_name#103 ASC NULLS FIRST, store_name#105 ASC NULLS FIRST, cnt#194 ASC NULLS FIRST, s1#117 ASC NULLS FIRST, s1#195 ASC NULLS FIRST], true, 0
+(183) Sort [codegen id : 54]
+Input [21]: [product_name#103, store_name#105, store_zip#106, b_street_number#107, b_streen_name#108, b_city#109, b_zip#110, c_street_number#111, c_street_name#112, c_city#113, c_zip#114, syear#115, cnt#116, s1#117, s2#118, s3#119, s1#196, s2#197, s3#198, syear#194, cnt#195]
+Arguments: [product_name#103 ASC NULLS FIRST, store_name#105 ASC NULLS FIRST, cnt#195 ASC NULLS FIRST, s1#117 ASC NULLS FIRST, s1#196 ASC NULLS FIRST], true, 0
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#12 IN dynamicpruning#13
-BroadcastExchange (185)
-+- * Filter (184)
-   +- * ColumnarToRow (183)
-      +- Scan parquet default.date_dim (182)
+BroadcastExchange (187)
++- * Filter (186)
+   +- * ColumnarToRow (185)
+      +- Scan parquet default.date_dim (184)
 
 
-(182) Scan parquet default.date_dim
+(184) Scan parquet default.date_dim
 Output [2]: [d_date_sk#42, d_year#43]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(183) ColumnarToRow [codegen id : 1]
+(185) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#42, d_year#43]
 
-(184) Filter [codegen id : 1]
+(186) Filter [codegen id : 1]
 Input [2]: [d_date_sk#42, d_year#43]
 Condition : ((isnotnull(d_year#43) AND (d_year#43 = 1999)) AND isnotnull(d_date_sk#42))
 
-(185) BroadcastExchange
+(187) BroadcastExchange
 Input [2]: [d_date_sk#42, d_year#43]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#199]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#201]
 
-Subquery:2 Hosting operator id = 111 Hosting Expression = ss_sold_date_sk#131 IN dynamicpruning#132
-BroadcastExchange (189)
-+- * Filter (188)
-   +- * ColumnarToRow (187)
-      +- Scan parquet default.date_dim (186)
+Subquery:2 Hosting operator id = 112 Hosting Expression = ss_sold_date_sk#132 IN dynamicpruning#133
+BroadcastExchange (191)
++- * Filter (190)
+   +- * ColumnarToRow (189)
+      +- Scan parquet default.date_dim (188)
 
 
-(186) Scan parquet default.date_dim
-Output [2]: [d_date_sk#146, d_year#147]
+(188) Scan parquet default.date_dim
+Output [2]: [d_date_sk#147, d_year#148]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(187) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#146, d_year#147]
+(189) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#147, d_year#148]
 
-(188) Filter [codegen id : 1]
-Input [2]: [d_date_sk#146, d_year#147]
-Condition : ((isnotnull(d_year#147) AND (d_year#147 = 2000)) AND isnotnull(d_date_sk#146))
+(190) Filter [codegen id : 1]
+Input [2]: [d_date_sk#147, d_year#148]
+Condition : ((isnotnull(d_year#148) AND (d_year#148 = 2000)) AND isnotnull(d_date_sk#147))
 
-(189) BroadcastExchange
-Input [2]: [d_date_sk#146, d_year#147]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#200]
+(191) BroadcastExchange
+Input [2]: [d_date_sk#147, d_year#148]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#202]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/simplified.txt
index 716aaa2663630..6917f8f6c6e2d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/simplified.txt
@@ -1,283 +1,289 @@
-WholeStageCodegen (52)
+WholeStageCodegen (54)
   Sort [product_name,store_name,cnt,s1,s1]
     InputAdapter
       Exchange [product_name,store_name,cnt,s1,s1] #1
-        WholeStageCodegen (51)
+        WholeStageCodegen (53)
           Project [product_name,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,s1,s2,s3,syear,cnt]
             SortMergeJoin [item_sk,store_name,store_zip,item_sk,store_name,store_zip,cnt,cnt]
               InputAdapter
-                WholeStageCodegen (25)
+                WholeStageCodegen (26)
                   Sort [item_sk,store_name,store_zip]
-                    HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum]
-                      HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum]
-                        Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name]
-                          BroadcastHashJoin [ss_item_sk,i_item_sk]
-                            Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip]
-                              BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk]
-                                Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip]
-                                  BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk]
-                                    Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip]
-                                      BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
-                                        Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip]
-                                          BroadcastHashJoin [ss_addr_sk,ca_address_sk]
-                                            Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk]
-                                              BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk]
-                                                Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk]
-                                                  BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk]
-                                                    Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year]
-                                                      BroadcastHashJoin [ss_promo_sk,p_promo_sk]
-                                                        Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year]
-                                                          BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status]
-                                                            Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status]
-                                                              BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk]
-                                                                Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year]
-                                                                  BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk]
-                                                                    Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year]
-                                                                      BroadcastHashJoin [c_first_sales_date_sk,d_date_sk]
-                                                                        Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk]
-                                                                          BroadcastHashJoin [ss_customer_sk,c_customer_sk]
-                                                                            Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip]
-                                                                              BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                                                                Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year]
-                                                                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                    Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
-                                                                                      SortMergeJoin [ss_item_sk,cs_item_sk]
-                                                                                        InputAdapter
-                                                                                          WholeStageCodegen (3)
-                                                                                            Sort [ss_item_sk]
+                    InputAdapter
+                      Exchange [item_sk,store_name,store_zip] #2
+                        WholeStageCodegen (25)
+                          HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum]
+                            HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum]
+                              Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name]
+                                BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                  Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip]
+                                    BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk]
+                                      Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip]
+                                        BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk]
+                                          Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip]
+                                            BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
+                                              Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip]
+                                                BroadcastHashJoin [ss_addr_sk,ca_address_sk]
+                                                  Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk]
+                                                    BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk]
+                                                      Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk]
+                                                        BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk]
+                                                          Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year]
+                                                            BroadcastHashJoin [ss_promo_sk,p_promo_sk]
+                                                              Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year]
+                                                                BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status]
+                                                                  Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status]
+                                                                    BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk]
+                                                                      Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year]
+                                                                        BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk]
+                                                                          Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year]
+                                                                            BroadcastHashJoin [c_first_sales_date_sk,d_date_sk]
+                                                                              Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk]
+                                                                                BroadcastHashJoin [ss_customer_sk,c_customer_sk]
+                                                                                  Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip]
+                                                                                    BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                                                      Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year]
+                                                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                                          Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                                                                            SortMergeJoin [ss_item_sk,cs_item_sk]
                                                                                               InputAdapter
-                                                                                                Exchange [ss_item_sk] #2
-                                                                                                  WholeStageCodegen (2)
-                                                                                                    Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
-                                                                                                      BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number]
-                                                                                                        InputAdapter
-                                                                                                          BroadcastExchange #3
-                                                                                                            WholeStageCodegen (1)
-                                                                                                              Filter [ss_item_sk,ss_ticket_number,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk]
-                                                                                                                ColumnarToRow
-                                                                                                                  InputAdapter
-                                                                                                                    Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
-                                                                                                                      SubqueryBroadcast [d_date_sk] #1
-                                                                                                                        BroadcastExchange #4
-                                                                                                                          WholeStageCodegen (1)
-                                                                                                                            Filter [d_year,d_date_sk]
-                                                                                                                              ColumnarToRow
-                                                                                                                                InputAdapter
-                                                                                                                                  Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                                                        Project [sr_item_sk,sr_ticket_number]
-                                                                                                          Filter [sr_item_sk,sr_ticket_number]
-                                                                                                            ColumnarToRow
-                                                                                                              InputAdapter
-                                                                                                                Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
-                                                                                        InputAdapter
-                                                                                          WholeStageCodegen (9)
-                                                                                            Sort [cs_item_sk]
-                                                                                              Project [cs_item_sk]
-                                                                                                Filter [sale,refund]
-                                                                                                  HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sale,refund,sum,sum,isEmpty]
+                                                                                                WholeStageCodegen (3)
+                                                                                                  Sort [ss_item_sk]
                                                                                                     InputAdapter
-                                                                                                      Exchange [cs_item_sk] #5
-                                                                                                        WholeStageCodegen (8)
-                                                                                                          HashAggregate [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] [sum,sum,isEmpty,sum,sum,isEmpty]
-                                                                                                            Project [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit]
-                                                                                                              SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number]
-                                                                                                                InputAdapter
-                                                                                                                  WholeStageCodegen (5)
-                                                                                                                    Sort [cs_item_sk,cs_order_number]
+                                                                                                      Exchange [ss_item_sk] #3
+                                                                                                        WholeStageCodegen (2)
+                                                                                                          Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                                                                                            BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number]
+                                                                                                              InputAdapter
+                                                                                                                BroadcastExchange #4
+                                                                                                                  WholeStageCodegen (1)
+                                                                                                                    Filter [ss_item_sk,ss_ticket_number,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk]
+                                                                                                                      ColumnarToRow
+                                                                                                                        InputAdapter
+                                                                                                                          Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                                                                                                            SubqueryBroadcast [d_date_sk] #1
+                                                                                                                              BroadcastExchange #5
+                                                                                                                                WholeStageCodegen (1)
+                                                                                                                                  Filter [d_year,d_date_sk]
+                                                                                                                                    ColumnarToRow
+                                                                                                                                      InputAdapter
+                                                                                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                                              Project [sr_item_sk,sr_ticket_number]
+                                                                                                                Filter [sr_item_sk,sr_ticket_number]
+                                                                                                                  ColumnarToRow
+                                                                                                                    InputAdapter
+                                                                                                                      Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                                                              InputAdapter
+                                                                                                WholeStageCodegen (9)
+                                                                                                  Sort [cs_item_sk]
+                                                                                                    Project [cs_item_sk]
+                                                                                                      Filter [sale,refund]
+                                                                                                        HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sale,refund,sum,sum,isEmpty]
+                                                                                                          InputAdapter
+                                                                                                            Exchange [cs_item_sk] #6
+                                                                                                              WholeStageCodegen (8)
+                                                                                                                HashAggregate [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] [sum,sum,isEmpty,sum,sum,isEmpty]
+                                                                                                                  Project [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit]
+                                                                                                                    SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number]
                                                                                                                       InputAdapter
-                                                                                                                        Exchange [cs_item_sk,cs_order_number] #6
-                                                                                                                          WholeStageCodegen (4)
-                                                                                                                            Project [cs_item_sk,cs_order_number,cs_ext_list_price]
-                                                                                                                              Filter [cs_item_sk,cs_order_number]
-                                                                                                                                ColumnarToRow
-                                                                                                                                  InputAdapter
-                                                                                                                                    Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk]
-                                                                                                                InputAdapter
-                                                                                                                  WholeStageCodegen (7)
-                                                                                                                    Sort [cr_item_sk,cr_order_number]
+                                                                                                                        WholeStageCodegen (5)
+                                                                                                                          Sort [cs_item_sk,cs_order_number]
+                                                                                                                            InputAdapter
+                                                                                                                              Exchange [cs_item_sk,cs_order_number] #7
+                                                                                                                                WholeStageCodegen (4)
+                                                                                                                                  Project [cs_item_sk,cs_order_number,cs_ext_list_price]
+                                                                                                                                    Filter [cs_item_sk,cs_order_number]
+                                                                                                                                      ColumnarToRow
+                                                                                                                                        InputAdapter
+                                                                                                                                          Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk]
                                                                                                                       InputAdapter
-                                                                                                                        Exchange [cr_item_sk,cr_order_number] #7
-                                                                                                                          WholeStageCodegen (6)
-                                                                                                                            Project [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit]
-                                                                                                                              Filter [cr_item_sk,cr_order_number]
-                                                                                                                                ColumnarToRow
-                                                                                                                                  InputAdapter
-                                                                                                                                    Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk]
-                                                                                    InputAdapter
-                                                                                      ReusedExchange [d_date_sk,d_year] #4
-                                                                                InputAdapter
-                                                                                  BroadcastExchange #8
-                                                                                    WholeStageCodegen (11)
-                                                                                      Filter [s_store_sk,s_store_name,s_zip]
-                                                                                        ColumnarToRow
+                                                                                                                        WholeStageCodegen (7)
+                                                                                                                          Sort [cr_item_sk,cr_order_number]
+                                                                                                                            InputAdapter
+                                                                                                                              Exchange [cr_item_sk,cr_order_number] #8
+                                                                                                                                WholeStageCodegen (6)
+                                                                                                                                  Project [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit]
+                                                                                                                                    Filter [cr_item_sk,cr_order_number]
+                                                                                                                                      ColumnarToRow
+                                                                                                                                        InputAdapter
+                                                                                                                                          Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk]
                                                                                           InputAdapter
-                                                                                            Scan parquet default.store [s_store_sk,s_store_name,s_zip]
-                                                                            InputAdapter
-                                                                              BroadcastExchange #9
-                                                                                WholeStageCodegen (12)
-                                                                                  Filter [c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk]
-                                                                                    ColumnarToRow
+                                                                                            ReusedExchange [d_date_sk,d_year] #5
                                                                                       InputAdapter
-                                                                                        Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk]
-                                                                        InputAdapter
-                                                                          BroadcastExchange #10
-                                                                            WholeStageCodegen (13)
-                                                                              Filter [d_date_sk]
-                                                                                ColumnarToRow
+                                                                                        BroadcastExchange #9
+                                                                                          WholeStageCodegen (11)
+                                                                                            Filter [s_store_sk,s_store_name,s_zip]
+                                                                                              ColumnarToRow
+                                                                                                InputAdapter
+                                                                                                  Scan parquet default.store [s_store_sk,s_store_name,s_zip]
                                                                                   InputAdapter
-                                                                                    Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                    InputAdapter
-                                                                      ReusedExchange [d_date_sk,d_year] #10
-                                                                InputAdapter
-                                                                  BroadcastExchange #11
-                                                                    WholeStageCodegen (15)
-                                                                      Filter [cd_demo_sk,cd_marital_status]
-                                                                        ColumnarToRow
+                                                                                    BroadcastExchange #10
+                                                                                      WholeStageCodegen (12)
+                                                                                        Filter [c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk]
+                                                                                          ColumnarToRow
+                                                                                            InputAdapter
+                                                                                              Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk]
+                                                                              InputAdapter
+                                                                                BroadcastExchange #11
+                                                                                  WholeStageCodegen (13)
+                                                                                    Filter [d_date_sk]
+                                                                                      ColumnarToRow
+                                                                                        InputAdapter
+                                                                                          Scan parquet default.date_dim [d_date_sk,d_year]
                                                                           InputAdapter
-                                                                            Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status]
-                                                            InputAdapter
-                                                              ReusedExchange [cd_demo_sk,cd_marital_status] #11
-                                                        InputAdapter
-                                                          BroadcastExchange #12
-                                                            WholeStageCodegen (17)
-                                                              Filter [p_promo_sk]
-                                                                ColumnarToRow
+                                                                            ReusedExchange [d_date_sk,d_year] #11
+                                                                      InputAdapter
+                                                                        BroadcastExchange #12
+                                                                          WholeStageCodegen (15)
+                                                                            Filter [cd_demo_sk,cd_marital_status]
+                                                                              ColumnarToRow
+                                                                                InputAdapter
+                                                                                  Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status]
                                                                   InputAdapter
-                                                                    Scan parquet default.promotion [p_promo_sk]
-                                                    InputAdapter
-                                                      BroadcastExchange #13
-                                                        WholeStageCodegen (18)
-                                                          Filter [hd_demo_sk,hd_income_band_sk]
-                                                            ColumnarToRow
+                                                                    ReusedExchange [cd_demo_sk,cd_marital_status] #12
                                                               InputAdapter
-                                                                Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk]
-                                                InputAdapter
-                                                  ReusedExchange [hd_demo_sk,hd_income_band_sk] #13
-                                            InputAdapter
-                                              BroadcastExchange #14
-                                                WholeStageCodegen (20)
-                                                  Filter [ca_address_sk]
-                                                    ColumnarToRow
+                                                                BroadcastExchange #13
+                                                                  WholeStageCodegen (17)
+                                                                    Filter [p_promo_sk]
+                                                                      ColumnarToRow
+                                                                        InputAdapter
+                                                                          Scan parquet default.promotion [p_promo_sk]
+                                                          InputAdapter
+                                                            BroadcastExchange #14
+                                                              WholeStageCodegen (18)
+                                                                Filter [hd_demo_sk,hd_income_band_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk]
                                                       InputAdapter
-                                                        Scan parquet default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip]
-                                        InputAdapter
-                                          ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #14
-                                    InputAdapter
-                                      BroadcastExchange #15
-                                        WholeStageCodegen (22)
-                                          Filter [ib_income_band_sk]
+                                                        ReusedExchange [hd_demo_sk,hd_income_band_sk] #14
+                                                  InputAdapter
+                                                    BroadcastExchange #15
+                                                      WholeStageCodegen (20)
+                                                        Filter [ca_address_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip]
+                                              InputAdapter
+                                                ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15
+                                          InputAdapter
+                                            BroadcastExchange #16
+                                              WholeStageCodegen (22)
+                                                Filter [ib_income_band_sk]
+                                                  ColumnarToRow
+                                                    InputAdapter
+                                                      Scan parquet default.income_band [ib_income_band_sk]
+                                      InputAdapter
+                                        ReusedExchange [ib_income_band_sk] #16
+                                  InputAdapter
+                                    BroadcastExchange #17
+                                      WholeStageCodegen (24)
+                                        Project [i_item_sk,i_product_name]
+                                          Filter [i_current_price,i_color,i_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.income_band [ib_income_band_sk]
-                                InputAdapter
-                                  ReusedExchange [ib_income_band_sk] #15
-                            InputAdapter
-                              BroadcastExchange #16
-                                WholeStageCodegen (24)
-                                  Project [i_item_sk,i_product_name]
-                                    Filter [i_current_price,i_color,i_item_sk]
-                                      ColumnarToRow
-                                        InputAdapter
-                                          Scan parquet default.item [i_item_sk,i_current_price,i_color,i_product_name]
+                                                Scan parquet default.item [i_item_sk,i_current_price,i_color,i_product_name]
               InputAdapter
-                WholeStageCodegen (50)
+                WholeStageCodegen (52)
                   Sort [item_sk,store_name,store_zip]
-                    HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),item_sk,store_name,store_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum]
-                      HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum]
-                        Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name]
-                          BroadcastHashJoin [ss_item_sk,i_item_sk]
-                            Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip]
-                              BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk]
-                                Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip]
-                                  BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk]
-                                    Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip]
-                                      BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
-                                        Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip]
-                                          BroadcastHashJoin [ss_addr_sk,ca_address_sk]
-                                            Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk]
-                                              BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk]
-                                                Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk]
-                                                  BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk]
-                                                    Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year]
-                                                      BroadcastHashJoin [ss_promo_sk,p_promo_sk]
-                                                        Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year]
-                                                          BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status]
-                                                            Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status]
-                                                              BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk]
-                                                                Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year]
-                                                                  BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk]
-                                                                    Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year]
-                                                                      BroadcastHashJoin [c_first_sales_date_sk,d_date_sk]
-                                                                        Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk]
-                                                                          BroadcastHashJoin [ss_customer_sk,c_customer_sk]
-                                                                            Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip]
-                                                                              BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                                                                Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year]
-                                                                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                    Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
-                                                                                      SortMergeJoin [ss_item_sk,cs_item_sk]
-                                                                                        InputAdapter
-                                                                                          WholeStageCodegen (28)
-                                                                                            Sort [ss_item_sk]
+                    InputAdapter
+                      Exchange [item_sk,store_name,store_zip] #18
+                        WholeStageCodegen (51)
+                          HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),item_sk,store_name,store_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum]
+                            HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum]
+                              Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name]
+                                BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                  Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip]
+                                    BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk]
+                                      Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip]
+                                        BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk]
+                                          Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip]
+                                            BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
+                                              Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip]
+                                                BroadcastHashJoin [ss_addr_sk,ca_address_sk]
+                                                  Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk]
+                                                    BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk]
+                                                      Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk]
+                                                        BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk]
+                                                          Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year]
+                                                            BroadcastHashJoin [ss_promo_sk,p_promo_sk]
+                                                              Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year]
+                                                                BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status]
+                                                                  Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status]
+                                                                    BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk]
+                                                                      Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year]
+                                                                        BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk]
+                                                                          Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year]
+                                                                            BroadcastHashJoin [c_first_sales_date_sk,d_date_sk]
+                                                                              Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk]
+                                                                                BroadcastHashJoin [ss_customer_sk,c_customer_sk]
+                                                                                  Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip]
+                                                                                    BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                                                      Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year]
+                                                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                                          Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                                                                            SortMergeJoin [ss_item_sk,cs_item_sk]
                                                                                               InputAdapter
-                                                                                                Exchange [ss_item_sk] #17
-                                                                                                  WholeStageCodegen (27)
-                                                                                                    Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
-                                                                                                      BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number]
-                                                                                                        InputAdapter
-                                                                                                          BroadcastExchange #18
-                                                                                                            WholeStageCodegen (26)
-                                                                                                              Filter [ss_item_sk,ss_ticket_number,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk]
-                                                                                                                ColumnarToRow
-                                                                                                                  InputAdapter
-                                                                                                                    Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
-                                                                                                                      SubqueryBroadcast [d_date_sk] #2
-                                                                                                                        BroadcastExchange #19
-                                                                                                                          WholeStageCodegen (1)
-                                                                                                                            Filter [d_year,d_date_sk]
-                                                                                                                              ColumnarToRow
-                                                                                                                                InputAdapter
-                                                                                                                                  Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                                                        Project [sr_item_sk,sr_ticket_number]
-                                                                                                          Filter [sr_item_sk,sr_ticket_number]
-                                                                                                            ColumnarToRow
-                                                                                                              InputAdapter
-                                                                                                                Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
-                                                                                        InputAdapter
-                                                                                          WholeStageCodegen (34)
-                                                                                            Sort [cs_item_sk]
-                                                                                              Project [cs_item_sk]
-                                                                                                Filter [sale,refund]
-                                                                                                  HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sale,refund,sum,sum,isEmpty]
+                                                                                                WholeStageCodegen (29)
+                                                                                                  Sort [ss_item_sk]
                                                                                                     InputAdapter
-                                                                                                      ReusedExchange [cs_item_sk,sum,sum,isEmpty] #5
-                                                                                    InputAdapter
-                                                                                      ReusedExchange [d_date_sk,d_year] #19
-                                                                                InputAdapter
-                                                                                  ReusedExchange [s_store_sk,s_store_name,s_zip] #8
-                                                                            InputAdapter
-                                                                              ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #9
-                                                                        InputAdapter
-                                                                          ReusedExchange [d_date_sk,d_year] #10
-                                                                    InputAdapter
-                                                                      ReusedExchange [d_date_sk,d_year] #10
-                                                                InputAdapter
-                                                                  ReusedExchange [cd_demo_sk,cd_marital_status] #11
-                                                            InputAdapter
-                                                              ReusedExchange [cd_demo_sk,cd_marital_status] #11
-                                                        InputAdapter
-                                                          ReusedExchange [p_promo_sk] #12
-                                                    InputAdapter
-                                                      ReusedExchange [hd_demo_sk,hd_income_band_sk] #13
-                                                InputAdapter
-                                                  ReusedExchange [hd_demo_sk,hd_income_band_sk] #13
-                                            InputAdapter
-                                              ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #14
-                                        InputAdapter
-                                          ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #14
-                                    InputAdapter
-                                      ReusedExchange [ib_income_band_sk] #15
-                                InputAdapter
-                                  ReusedExchange [ib_income_band_sk] #15
-                            InputAdapter
-                              ReusedExchange [i_item_sk,i_product_name] #16
+                                                                                                      Exchange [ss_item_sk] #19
+                                                                                                        WholeStageCodegen (28)
+                                                                                                          Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                                                                                            BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number]
+                                                                                                              InputAdapter
+                                                                                                                BroadcastExchange #20
+                                                                                                                  WholeStageCodegen (27)
+                                                                                                                    Filter [ss_item_sk,ss_ticket_number,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk]
+                                                                                                                      ColumnarToRow
+                                                                                                                        InputAdapter
+                                                                                                                          Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                                                                                                            SubqueryBroadcast [d_date_sk] #2
+                                                                                                                              BroadcastExchange #21
+                                                                                                                                WholeStageCodegen (1)
+                                                                                                                                  Filter [d_year,d_date_sk]
+                                                                                                                                    ColumnarToRow
+                                                                                                                                      InputAdapter
+                                                                                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                                              Project [sr_item_sk,sr_ticket_number]
+                                                                                                                Filter [sr_item_sk,sr_ticket_number]
+                                                                                                                  ColumnarToRow
+                                                                                                                    InputAdapter
+                                                                                                                      Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                                                              InputAdapter
+                                                                                                WholeStageCodegen (35)
+                                                                                                  Sort [cs_item_sk]
+                                                                                                    Project [cs_item_sk]
+                                                                                                      Filter [sale,refund]
+                                                                                                        HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sale,refund,sum,sum,isEmpty]
+                                                                                                          InputAdapter
+                                                                                                            ReusedExchange [cs_item_sk,sum,sum,isEmpty] #6
+                                                                                          InputAdapter
+                                                                                            ReusedExchange [d_date_sk,d_year] #21
+                                                                                      InputAdapter
+                                                                                        ReusedExchange [s_store_sk,s_store_name,s_zip] #9
+                                                                                  InputAdapter
+                                                                                    ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #10
+                                                                              InputAdapter
+                                                                                ReusedExchange [d_date_sk,d_year] #11
+                                                                          InputAdapter
+                                                                            ReusedExchange [d_date_sk,d_year] #11
+                                                                      InputAdapter
+                                                                        ReusedExchange [cd_demo_sk,cd_marital_status] #12
+                                                                  InputAdapter
+                                                                    ReusedExchange [cd_demo_sk,cd_marital_status] #12
+                                                              InputAdapter
+                                                                ReusedExchange [p_promo_sk] #13
+                                                          InputAdapter
+                                                            ReusedExchange [hd_demo_sk,hd_income_band_sk] #14
+                                                      InputAdapter
+                                                        ReusedExchange [hd_demo_sk,hd_income_band_sk] #14
+                                                  InputAdapter
+                                                    ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15
+                                              InputAdapter
+                                                ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15
+                                          InputAdapter
+                                            ReusedExchange [ib_income_band_sk] #16
+                                      InputAdapter
+                                        ReusedExchange [ib_income_band_sk] #16
+                                  InputAdapter
+                                    ReusedExchange [i_item_sk,i_product_name] #17
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt
index 42f7488ad66d3..e5e42f2be1366 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt
@@ -1,72 +1,74 @@
 == Physical Plan ==
-TakeOrderedAndProject (68)
-+- * HashAggregate (67)
-   +- Exchange (66)
-      +- * HashAggregate (65)
-         +- * Project (64)
-            +- * SortMergeJoin LeftOuter (63)
-               :- * Sort (56)
-               :  +- * Project (55)
-               :     +- * BroadcastHashJoin LeftOuter BuildRight (54)
-               :        :- * Project (49)
-               :        :  +- * SortMergeJoin Inner (48)
-               :        :     :- * Sort (36)
-               :        :     :  +- * Project (35)
-               :        :     :     +- * BroadcastHashJoin Inner BuildRight (34)
-               :        :     :        :- * Project (32)
-               :        :     :        :  +- * SortMergeJoin Inner (31)
-               :        :     :        :     :- * Sort (25)
-               :        :     :        :     :  +- Exchange (24)
-               :        :     :        :     :     +- * Project (23)
-               :        :     :        :     :        +- * BroadcastHashJoin Inner BuildRight (22)
-               :        :     :        :     :           :- * Project (17)
-               :        :     :        :     :           :  +- * BroadcastHashJoin Inner BuildRight (16)
-               :        :     :        :     :           :     :- * Project (10)
-               :        :     :        :     :           :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-               :        :     :        :     :           :     :     :- * Filter (3)
-               :        :     :        :     :           :     :     :  +- * ColumnarToRow (2)
-               :        :     :        :     :           :     :     :     +- Scan parquet default.catalog_sales (1)
-               :        :     :        :     :           :     :     +- BroadcastExchange (8)
-               :        :     :        :     :           :     :        +- * Project (7)
-               :        :     :        :     :           :     :           +- * Filter (6)
-               :        :     :        :     :           :     :              +- * ColumnarToRow (5)
-               :        :     :        :     :           :     :                 +- Scan parquet default.household_demographics (4)
-               :        :     :        :     :           :     +- BroadcastExchange (15)
-               :        :     :        :     :           :        +- * Project (14)
-               :        :     :        :     :           :           +- * Filter (13)
-               :        :     :        :     :           :              +- * ColumnarToRow (12)
-               :        :     :        :     :           :                 +- Scan parquet default.customer_demographics (11)
-               :        :     :        :     :           +- BroadcastExchange (21)
-               :        :     :        :     :              +- * Filter (20)
-               :        :     :        :     :                 +- * ColumnarToRow (19)
-               :        :     :        :     :                    +- Scan parquet default.date_dim (18)
-               :        :     :        :     +- * Sort (30)
-               :        :     :        :        +- Exchange (29)
-               :        :     :        :           +- * Filter (28)
-               :        :     :        :              +- * ColumnarToRow (27)
-               :        :     :        :                 +- Scan parquet default.item (26)
-               :        :     :        +- ReusedExchange (33)
-               :        :     +- * Sort (47)
-               :        :        +- Exchange (46)
-               :        :           +- * Project (45)
-               :        :              +- * BroadcastHashJoin Inner BuildRight (44)
-               :        :                 :- * Filter (39)
-               :        :                 :  +- * ColumnarToRow (38)
-               :        :                 :     +- Scan parquet default.inventory (37)
-               :        :                 +- BroadcastExchange (43)
-               :        :                    +- * Filter (42)
-               :        :                       +- * ColumnarToRow (41)
-               :        :                          +- Scan parquet default.warehouse (40)
-               :        +- BroadcastExchange (53)
-               :           +- * Filter (52)
-               :              +- * ColumnarToRow (51)
-               :                 +- Scan parquet default.promotion (50)
-               +- * Sort (62)
-                  +- Exchange (61)
-                     +- * Project (60)
-                        +- * Filter (59)
-                           +- * ColumnarToRow (58)
-                              +- Scan parquet default.catalog_returns (57)
+TakeOrderedAndProject (70)
++- * HashAggregate (69)
+   +- Exchange (68)
+      +- * HashAggregate (67)
+         +- * Project (66)
+            +- * SortMergeJoin LeftOuter (65)
+               :- * Sort (58)
+               :  +- Exchange (57)
+               :     +- * Project (56)
+               :        +- * BroadcastHashJoin LeftOuter BuildRight (55)
+               :           :- * Project (50)
+               :           :  +- * SortMergeJoin Inner (49)
+               :           :     :- * Sort (37)
+               :           :     :  +- Exchange (36)
+               :           :     :     +- * Project (35)
+               :           :     :        +- * BroadcastHashJoin Inner BuildRight (34)
+               :           :     :           :- * Project (32)
+               :           :     :           :  +- * SortMergeJoin Inner (31)
+               :           :     :           :     :- * Sort (25)
+               :           :     :           :     :  +- Exchange (24)
+               :           :     :           :     :     +- * Project (23)
+               :           :     :           :     :        +- * BroadcastHashJoin Inner BuildRight (22)
+               :           :     :           :     :           :- * Project (17)
+               :           :     :           :     :           :  +- * BroadcastHashJoin Inner BuildRight (16)
+               :           :     :           :     :           :     :- * Project (10)
+               :           :     :           :     :           :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+               :           :     :           :     :           :     :     :- * Filter (3)
+               :           :     :           :     :           :     :     :  +- * ColumnarToRow (2)
+               :           :     :           :     :           :     :     :     +- Scan parquet default.catalog_sales (1)
+               :           :     :           :     :           :     :     +- BroadcastExchange (8)
+               :           :     :           :     :           :     :        +- * Project (7)
+               :           :     :           :     :           :     :           +- * Filter (6)
+               :           :     :           :     :           :     :              +- * ColumnarToRow (5)
+               :           :     :           :     :           :     :                 +- Scan parquet default.household_demographics (4)
+               :           :     :           :     :           :     +- BroadcastExchange (15)
+               :           :     :           :     :           :        +- * Project (14)
+               :           :     :           :     :           :           +- * Filter (13)
+               :           :     :           :     :           :              +- * ColumnarToRow (12)
+               :           :     :           :     :           :                 +- Scan parquet default.customer_demographics (11)
+               :           :     :           :     :           +- BroadcastExchange (21)
+               :           :     :           :     :              +- * Filter (20)
+               :           :     :           :     :                 +- * ColumnarToRow (19)
+               :           :     :           :     :                    +- Scan parquet default.date_dim (18)
+               :           :     :           :     +- * Sort (30)
+               :           :     :           :        +- Exchange (29)
+               :           :     :           :           +- * Filter (28)
+               :           :     :           :              +- * ColumnarToRow (27)
+               :           :     :           :                 +- Scan parquet default.item (26)
+               :           :     :           +- ReusedExchange (33)
+               :           :     +- * Sort (48)
+               :           :        +- Exchange (47)
+               :           :           +- * Project (46)
+               :           :              +- * BroadcastHashJoin Inner BuildRight (45)
+               :           :                 :- * Filter (40)
+               :           :                 :  +- * ColumnarToRow (39)
+               :           :                 :     +- Scan parquet default.inventory (38)
+               :           :                 +- BroadcastExchange (44)
+               :           :                    +- * Filter (43)
+               :           :                       +- * ColumnarToRow (42)
+               :           :                          +- Scan parquet default.warehouse (41)
+               :           +- BroadcastExchange (54)
+               :              +- * Filter (53)
+               :                 +- * ColumnarToRow (52)
+               :                    +- Scan parquet default.promotion (51)
+               +- * Sort (64)
+                  +- Exchange (63)
+                     +- * Project (62)
+                        +- * Filter (61)
+                           +- * ColumnarToRow (60)
+                              +- Scan parquet default.catalog_returns (59)
 
 
 (1) Scan parquet default.catalog_sales
@@ -212,7 +214,7 @@ Join condition: None
 Output [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#17, i_item_desc#21]
 Input [8]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#17, i_item_sk#20, i_item_desc#21]
 
-(33) ReusedExchange [Reuses operator id: 79]
+(33) ReusedExchange [Reuses operator id: 81]
 Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26]
 
 (34) BroadcastHashJoin [codegen id : 10]
@@ -224,220 +226,228 @@ Join condition: (d_date#17 > date_add(d_date#24, 5))
 Output [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#21, d_week_seq#25, d_date_sk#26]
 Input [11]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#17, i_item_desc#21, d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26]
 
-(36) Sort [codegen id : 10]
+(36) Exchange
+Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#21, d_week_seq#25, d_date_sk#26]
+Arguments: hashpartitioning(cs_item_sk#4, d_date_sk#26, 5), ENSURE_REQUIREMENTS, [id=#27]
+
+(37) Sort [codegen id : 11]
 Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#21, d_week_seq#25, d_date_sk#26]
 Arguments: [cs_item_sk#4 ASC NULLS FIRST, d_date_sk#26 ASC NULLS FIRST], false, 0
 
-(37) Scan parquet default.inventory
-Output [4]: [inv_item_sk#27, inv_warehouse_sk#28, inv_quantity_on_hand#29, inv_date_sk#30]
+(38) Scan parquet default.inventory
+Output [4]: [inv_item_sk#28, inv_warehouse_sk#29, inv_quantity_on_hand#30, inv_date_sk#31]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(inv_date_sk#30), dynamicpruningexpression(true)]
+PartitionFilters: [isnotnull(inv_date_sk#31), dynamicpruningexpression(true)]
 PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)]
 ReadSchema: struct<inv_item_sk:int,inv_warehouse_sk:int,inv_quantity_on_hand:int>
 
-(38) ColumnarToRow [codegen id : 12]
-Input [4]: [inv_item_sk#27, inv_warehouse_sk#28, inv_quantity_on_hand#29, inv_date_sk#30]
+(39) ColumnarToRow [codegen id : 13]
+Input [4]: [inv_item_sk#28, inv_warehouse_sk#29, inv_quantity_on_hand#30, inv_date_sk#31]
 
-(39) Filter [codegen id : 12]
-Input [4]: [inv_item_sk#27, inv_warehouse_sk#28, inv_quantity_on_hand#29, inv_date_sk#30]
-Condition : ((isnotnull(inv_quantity_on_hand#29) AND isnotnull(inv_item_sk#27)) AND isnotnull(inv_warehouse_sk#28))
+(40) Filter [codegen id : 13]
+Input [4]: [inv_item_sk#28, inv_warehouse_sk#29, inv_quantity_on_hand#30, inv_date_sk#31]
+Condition : ((isnotnull(inv_quantity_on_hand#30) AND isnotnull(inv_item_sk#28)) AND isnotnull(inv_warehouse_sk#29))
 
-(40) Scan parquet default.warehouse
-Output [2]: [w_warehouse_sk#31, w_warehouse_name#32]
+(41) Scan parquet default.warehouse
+Output [2]: [w_warehouse_sk#32, w_warehouse_name#33]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
 PushedFilters: [IsNotNull(w_warehouse_sk)]
 ReadSchema: struct<w_warehouse_sk:int,w_warehouse_name:string>
 
-(41) ColumnarToRow [codegen id : 11]
-Input [2]: [w_warehouse_sk#31, w_warehouse_name#32]
+(42) ColumnarToRow [codegen id : 12]
+Input [2]: [w_warehouse_sk#32, w_warehouse_name#33]
 
-(42) Filter [codegen id : 11]
-Input [2]: [w_warehouse_sk#31, w_warehouse_name#32]
-Condition : isnotnull(w_warehouse_sk#31)
+(43) Filter [codegen id : 12]
+Input [2]: [w_warehouse_sk#32, w_warehouse_name#33]
+Condition : isnotnull(w_warehouse_sk#32)
 
-(43) BroadcastExchange
-Input [2]: [w_warehouse_sk#31, w_warehouse_name#32]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#33]
+(44) BroadcastExchange
+Input [2]: [w_warehouse_sk#32, w_warehouse_name#33]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#34]
 
-(44) BroadcastHashJoin [codegen id : 12]
-Left keys [1]: [inv_warehouse_sk#28]
-Right keys [1]: [w_warehouse_sk#31]
+(45) BroadcastHashJoin [codegen id : 13]
+Left keys [1]: [inv_warehouse_sk#29]
+Right keys [1]: [w_warehouse_sk#32]
 Join condition: None
 
-(45) Project [codegen id : 12]
-Output [4]: [inv_item_sk#27, inv_quantity_on_hand#29, inv_date_sk#30, w_warehouse_name#32]
-Input [6]: [inv_item_sk#27, inv_warehouse_sk#28, inv_quantity_on_hand#29, inv_date_sk#30, w_warehouse_sk#31, w_warehouse_name#32]
+(46) Project [codegen id : 13]
+Output [4]: [inv_item_sk#28, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_name#33]
+Input [6]: [inv_item_sk#28, inv_warehouse_sk#29, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_sk#32, w_warehouse_name#33]
 
-(46) Exchange
-Input [4]: [inv_item_sk#27, inv_quantity_on_hand#29, inv_date_sk#30, w_warehouse_name#32]
-Arguments: hashpartitioning(inv_item_sk#27, 5), ENSURE_REQUIREMENTS, [id=#34]
+(47) Exchange
+Input [4]: [inv_item_sk#28, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_name#33]
+Arguments: hashpartitioning(inv_item_sk#28, inv_date_sk#31, 5), ENSURE_REQUIREMENTS, [id=#35]
 
-(47) Sort [codegen id : 13]
-Input [4]: [inv_item_sk#27, inv_quantity_on_hand#29, inv_date_sk#30, w_warehouse_name#32]
-Arguments: [inv_item_sk#27 ASC NULLS FIRST, inv_date_sk#30 ASC NULLS FIRST], false, 0
+(48) Sort [codegen id : 14]
+Input [4]: [inv_item_sk#28, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_name#33]
+Arguments: [inv_item_sk#28 ASC NULLS FIRST, inv_date_sk#31 ASC NULLS FIRST], false, 0
 
-(48) SortMergeJoin [codegen id : 15]
+(49) SortMergeJoin [codegen id : 16]
 Left keys [2]: [cs_item_sk#4, d_date_sk#26]
-Right keys [2]: [inv_item_sk#27, inv_date_sk#30]
-Join condition: (inv_quantity_on_hand#29 < cs_quantity#7)
+Right keys [2]: [inv_item_sk#28, inv_date_sk#31]
+Join condition: (inv_quantity_on_hand#30 < cs_quantity#7)
 
-(49) Project [codegen id : 15]
-Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#32, i_item_desc#21, d_week_seq#25]
-Input [11]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#21, d_week_seq#25, d_date_sk#26, inv_item_sk#27, inv_quantity_on_hand#29, inv_date_sk#30, w_warehouse_name#32]
+(50) Project [codegen id : 16]
+Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25]
+Input [11]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#21, d_week_seq#25, d_date_sk#26, inv_item_sk#28, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_name#33]
 
-(50) Scan parquet default.promotion
-Output [1]: [p_promo_sk#35]
+(51) Scan parquet default.promotion
+Output [1]: [p_promo_sk#36]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
 PushedFilters: [IsNotNull(p_promo_sk)]
 ReadSchema: struct<p_promo_sk:int>
 
-(51) ColumnarToRow [codegen id : 14]
-Input [1]: [p_promo_sk#35]
+(52) ColumnarToRow [codegen id : 15]
+Input [1]: [p_promo_sk#36]
 
-(52) Filter [codegen id : 14]
-Input [1]: [p_promo_sk#35]
-Condition : isnotnull(p_promo_sk#35)
+(53) Filter [codegen id : 15]
+Input [1]: [p_promo_sk#36]
+Condition : isnotnull(p_promo_sk#36)
 
-(53) BroadcastExchange
-Input [1]: [p_promo_sk#35]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#36]
+(54) BroadcastExchange
+Input [1]: [p_promo_sk#36]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#37]
 
-(54) BroadcastHashJoin [codegen id : 15]
+(55) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [cs_promo_sk#5]
-Right keys [1]: [p_promo_sk#35]
+Right keys [1]: [p_promo_sk#36]
 Join condition: None
 
-(55) Project [codegen id : 15]
-Output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#32, i_item_desc#21, d_week_seq#25]
-Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#32, i_item_desc#21, d_week_seq#25, p_promo_sk#35]
+(56) Project [codegen id : 16]
+Output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25]
+Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25, p_promo_sk#36]
+
+(57) Exchange
+Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25]
+Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREMENTS, [id=#38]
 
-(56) Sort [codegen id : 15]
-Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#32, i_item_desc#21, d_week_seq#25]
+(58) Sort [codegen id : 17]
+Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25]
 Arguments: [cs_item_sk#4 ASC NULLS FIRST, cs_order_number#6 ASC NULLS FIRST], false, 0
 
-(57) Scan parquet default.catalog_returns
-Output [3]: [cr_item_sk#37, cr_order_number#38, cr_returned_date_sk#39]
+(59) Scan parquet default.catalog_returns
+Output [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
 PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)]
 ReadSchema: struct<cr_item_sk:int,cr_order_number:int>
 
-(58) ColumnarToRow [codegen id : 16]
-Input [3]: [cr_item_sk#37, cr_order_number#38, cr_returned_date_sk#39]
+(60) ColumnarToRow [codegen id : 18]
+Input [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41]
 
-(59) Filter [codegen id : 16]
-Input [3]: [cr_item_sk#37, cr_order_number#38, cr_returned_date_sk#39]
-Condition : (isnotnull(cr_item_sk#37) AND isnotnull(cr_order_number#38))
+(61) Filter [codegen id : 18]
+Input [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41]
+Condition : (isnotnull(cr_item_sk#39) AND isnotnull(cr_order_number#40))
 
-(60) Project [codegen id : 16]
-Output [2]: [cr_item_sk#37, cr_order_number#38]
-Input [3]: [cr_item_sk#37, cr_order_number#38, cr_returned_date_sk#39]
+(62) Project [codegen id : 18]
+Output [2]: [cr_item_sk#39, cr_order_number#40]
+Input [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41]
 
-(61) Exchange
-Input [2]: [cr_item_sk#37, cr_order_number#38]
-Arguments: hashpartitioning(cr_item_sk#37, 5), ENSURE_REQUIREMENTS, [id=#40]
+(63) Exchange
+Input [2]: [cr_item_sk#39, cr_order_number#40]
+Arguments: hashpartitioning(cr_item_sk#39, cr_order_number#40, 5), ENSURE_REQUIREMENTS, [id=#42]
 
-(62) Sort [codegen id : 17]
-Input [2]: [cr_item_sk#37, cr_order_number#38]
-Arguments: [cr_item_sk#37 ASC NULLS FIRST, cr_order_number#38 ASC NULLS FIRST], false, 0
+(64) Sort [codegen id : 19]
+Input [2]: [cr_item_sk#39, cr_order_number#40]
+Arguments: [cr_item_sk#39 ASC NULLS FIRST, cr_order_number#40 ASC NULLS FIRST], false, 0
 
-(63) SortMergeJoin [codegen id : 18]
+(65) SortMergeJoin [codegen id : 20]
 Left keys [2]: [cs_item_sk#4, cs_order_number#6]
-Right keys [2]: [cr_item_sk#37, cr_order_number#38]
+Right keys [2]: [cr_item_sk#39, cr_order_number#40]
 Join condition: None
 
-(64) Project [codegen id : 18]
-Output [3]: [w_warehouse_name#32, i_item_desc#21, d_week_seq#25]
-Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#32, i_item_desc#21, d_week_seq#25, cr_item_sk#37, cr_order_number#38]
+(66) Project [codegen id : 20]
+Output [3]: [w_warehouse_name#33, i_item_desc#21, d_week_seq#25]
+Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25, cr_item_sk#39, cr_order_number#40]
 
-(65) HashAggregate [codegen id : 18]
-Input [3]: [w_warehouse_name#32, i_item_desc#21, d_week_seq#25]
-Keys [3]: [i_item_desc#21, w_warehouse_name#32, d_week_seq#25]
+(67) HashAggregate [codegen id : 20]
+Input [3]: [w_warehouse_name#33, i_item_desc#21, d_week_seq#25]
+Keys [3]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25]
 Functions [1]: [partial_count(1)]
-Aggregate Attributes [1]: [count#41]
-Results [4]: [i_item_desc#21, w_warehouse_name#32, d_week_seq#25, count#42]
+Aggregate Attributes [1]: [count#43]
+Results [4]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, count#44]
 
-(66) Exchange
-Input [4]: [i_item_desc#21, w_warehouse_name#32, d_week_seq#25, count#42]
-Arguments: hashpartitioning(i_item_desc#21, w_warehouse_name#32, d_week_seq#25, 5), ENSURE_REQUIREMENTS, [id=#43]
+(68) Exchange
+Input [4]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, count#44]
+Arguments: hashpartitioning(i_item_desc#21, w_warehouse_name#33, d_week_seq#25, 5), ENSURE_REQUIREMENTS, [id=#45]
 
-(67) HashAggregate [codegen id : 19]
-Input [4]: [i_item_desc#21, w_warehouse_name#32, d_week_seq#25, count#42]
-Keys [3]: [i_item_desc#21, w_warehouse_name#32, d_week_seq#25]
+(69) HashAggregate [codegen id : 21]
+Input [4]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, count#44]
+Keys [3]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25]
 Functions [1]: [count(1)]
-Aggregate Attributes [1]: [count(1)#44]
-Results [6]: [i_item_desc#21, w_warehouse_name#32, d_week_seq#25, count(1)#44 AS no_promo#45, count(1)#44 AS promo#46, count(1)#44 AS total_cnt#47]
+Aggregate Attributes [1]: [count(1)#46]
+Results [6]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, count(1)#46 AS no_promo#47, count(1)#46 AS promo#48, count(1)#46 AS total_cnt#49]
 
-(68) TakeOrderedAndProject
-Input [6]: [i_item_desc#21, w_warehouse_name#32, d_week_seq#25, no_promo#45, promo#46, total_cnt#47]
-Arguments: 100, [total_cnt#47 DESC NULLS LAST, i_item_desc#21 ASC NULLS FIRST, w_warehouse_name#32 ASC NULLS FIRST, d_week_seq#25 ASC NULLS FIRST], [i_item_desc#21, w_warehouse_name#32, d_week_seq#25, no_promo#45, promo#46, total_cnt#47]
+(70) TakeOrderedAndProject
+Input [6]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, no_promo#47, promo#48, total_cnt#49]
+Arguments: 100, [total_cnt#49 DESC NULLS LAST, i_item_desc#21 ASC NULLS FIRST, w_warehouse_name#33 ASC NULLS FIRST, d_week_seq#25 ASC NULLS FIRST], [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, no_promo#47, promo#48, total_cnt#49]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = cs_sold_date_sk#8 IN dynamicpruning#9
-BroadcastExchange (79)
-+- * Project (78)
-   +- * BroadcastHashJoin Inner BuildLeft (77)
-      :- BroadcastExchange (73)
-      :  +- * Project (72)
-      :     +- * Filter (71)
-      :        +- * ColumnarToRow (70)
-      :           +- Scan parquet default.date_dim (69)
-      +- * Filter (76)
-         +- * ColumnarToRow (75)
-            +- Scan parquet default.date_dim (74)
-
-
-(69) Scan parquet default.date_dim
-Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#48]
+BroadcastExchange (81)
++- * Project (80)
+   +- * BroadcastHashJoin Inner BuildLeft (79)
+      :- BroadcastExchange (75)
+      :  +- * Project (74)
+      :     +- * Filter (73)
+      :        +- * ColumnarToRow (72)
+      :           +- Scan parquet default.date_dim (71)
+      +- * Filter (78)
+         +- * ColumnarToRow (77)
+            +- Scan parquet default.date_dim (76)
+
+
+(71) Scan parquet default.date_dim
+Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#50]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)]
 ReadSchema: struct<d_date_sk:int,d_date:date,d_week_seq:int,d_year:int>
 
-(70) ColumnarToRow [codegen id : 1]
-Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#48]
+(72) ColumnarToRow [codegen id : 1]
+Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#50]
 
-(71) Filter [codegen id : 1]
-Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#48]
-Condition : ((((isnotnull(d_year#48) AND (d_year#48 = 2001)) AND isnotnull(d_date_sk#23)) AND isnotnull(d_week_seq#25)) AND isnotnull(d_date#24))
+(73) Filter [codegen id : 1]
+Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#50]
+Condition : ((((isnotnull(d_year#50) AND (d_year#50 = 2001)) AND isnotnull(d_date_sk#23)) AND isnotnull(d_week_seq#25)) AND isnotnull(d_date#24))
 
-(72) Project [codegen id : 1]
+(74) Project [codegen id : 1]
 Output [3]: [d_date_sk#23, d_date#24, d_week_seq#25]
-Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#48]
+Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#50]
 
-(73) BroadcastExchange
+(75) BroadcastExchange
 Input [3]: [d_date_sk#23, d_date#24, d_week_seq#25]
-Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#49]
+Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#51]
 
-(74) Scan parquet default.date_dim
-Output [2]: [d_date_sk#26, d_week_seq#50]
+(76) Scan parquet default.date_dim
+Output [2]: [d_date_sk#26, d_week_seq#52]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_week_seq:int>
 
-(75) ColumnarToRow
-Input [2]: [d_date_sk#26, d_week_seq#50]
+(77) ColumnarToRow
+Input [2]: [d_date_sk#26, d_week_seq#52]
 
-(76) Filter
-Input [2]: [d_date_sk#26, d_week_seq#50]
-Condition : (isnotnull(d_week_seq#50) AND isnotnull(d_date_sk#26))
+(78) Filter
+Input [2]: [d_date_sk#26, d_week_seq#52]
+Condition : (isnotnull(d_week_seq#52) AND isnotnull(d_date_sk#26))
 
-(77) BroadcastHashJoin [codegen id : 2]
+(79) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [d_week_seq#25]
-Right keys [1]: [d_week_seq#50]
+Right keys [1]: [d_week_seq#52]
 Join condition: None
 
-(78) Project [codegen id : 2]
+(80) Project [codegen id : 2]
 Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26]
-Input [5]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26, d_week_seq#50]
+Input [5]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26, d_week_seq#52]
 
-(79) BroadcastExchange
+(81) BroadcastExchange
 Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#51]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#53]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt
index d84393b2ff106..e838025a71db8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt
@@ -1,126 +1,132 @@
 TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_promo,promo]
-  WholeStageCodegen (19)
+  WholeStageCodegen (21)
     HashAggregate [i_item_desc,w_warehouse_name,d_week_seq,count] [count(1),no_promo,promo,total_cnt,count]
       InputAdapter
         Exchange [i_item_desc,w_warehouse_name,d_week_seq] #1
-          WholeStageCodegen (18)
+          WholeStageCodegen (20)
             HashAggregate [i_item_desc,w_warehouse_name,d_week_seq] [count,count]
               Project [w_warehouse_name,i_item_desc,d_week_seq]
                 SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number]
                   InputAdapter
-                    WholeStageCodegen (15)
+                    WholeStageCodegen (17)
                       Sort [cs_item_sk,cs_order_number]
-                        Project [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq]
-                          BroadcastHashJoin [cs_promo_sk,p_promo_sk]
-                            Project [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq]
-                              SortMergeJoin [cs_item_sk,d_date_sk,inv_item_sk,inv_date_sk,inv_quantity_on_hand,cs_quantity]
-                                InputAdapter
-                                  WholeStageCodegen (10)
-                                    Sort [cs_item_sk,d_date_sk]
-                                      Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,i_item_desc,d_week_seq,d_date_sk]
-                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk,d_date,d_date]
-                                          Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk,d_date,i_item_desc]
-                                            SortMergeJoin [cs_item_sk,i_item_sk]
-                                              InputAdapter
-                                                WholeStageCodegen (5)
-                                                  Sort [cs_item_sk]
-                                                    InputAdapter
-                                                      Exchange [cs_item_sk] #2
-                                                        WholeStageCodegen (4)
-                                                          Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk,d_date]
-                                                            BroadcastHashJoin [cs_ship_date_sk,d_date_sk]
-                                                              Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
-                                                                BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk]
-                                                                  Project [cs_ship_date_sk,cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
-                                                                    BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk]
-                                                                      Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_ship_date_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.catalog_sales [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
-                                                                              SubqueryBroadcast [d_date_sk] #1
-                                                                                BroadcastExchange #3
-                                                                                  WholeStageCodegen (2)
-                                                                                    Project [d_date_sk,d_date,d_week_seq,d_date_sk]
-                                                                                      BroadcastHashJoin [d_week_seq,d_week_seq]
-                                                                                        InputAdapter
-                                                                                          BroadcastExchange #4
-                                                                                            WholeStageCodegen (1)
-                                                                                              Project [d_date_sk,d_date,d_week_seq]
-                                                                                                Filter [d_year,d_date_sk,d_week_seq,d_date]
-                                                                                                  ColumnarToRow
+                        InputAdapter
+                          Exchange [cs_item_sk,cs_order_number] #2
+                            WholeStageCodegen (16)
+                              Project [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq]
+                                BroadcastHashJoin [cs_promo_sk,p_promo_sk]
+                                  Project [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq]
+                                    SortMergeJoin [cs_item_sk,d_date_sk,inv_item_sk,inv_date_sk,inv_quantity_on_hand,cs_quantity]
+                                      InputAdapter
+                                        WholeStageCodegen (11)
+                                          Sort [cs_item_sk,d_date_sk]
+                                            InputAdapter
+                                              Exchange [cs_item_sk,d_date_sk] #3
+                                                WholeStageCodegen (10)
+                                                  Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,i_item_desc,d_week_seq,d_date_sk]
+                                                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk,d_date,d_date]
+                                                      Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk,d_date,i_item_desc]
+                                                        SortMergeJoin [cs_item_sk,i_item_sk]
+                                                          InputAdapter
+                                                            WholeStageCodegen (5)
+                                                              Sort [cs_item_sk]
+                                                                InputAdapter
+                                                                  Exchange [cs_item_sk] #4
+                                                                    WholeStageCodegen (4)
+                                                                      Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk,d_date]
+                                                                        BroadcastHashJoin [cs_ship_date_sk,d_date_sk]
+                                                                          Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
+                                                                            BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk]
+                                                                              Project [cs_ship_date_sk,cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
+                                                                                BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk]
+                                                                                  Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_ship_date_sk]
+                                                                                    ColumnarToRow
+                                                                                      InputAdapter
+                                                                                        Scan parquet default.catalog_sales [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
+                                                                                          SubqueryBroadcast [d_date_sk] #1
+                                                                                            BroadcastExchange #5
+                                                                                              WholeStageCodegen (2)
+                                                                                                Project [d_date_sk,d_date,d_week_seq,d_date_sk]
+                                                                                                  BroadcastHashJoin [d_week_seq,d_week_seq]
                                                                                                     InputAdapter
-                                                                                                      Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year]
-                                                                                        Filter [d_week_seq,d_date_sk]
-                                                                                          ColumnarToRow
-                                                                                            InputAdapter
-                                                                                              Scan parquet default.date_dim [d_date_sk,d_week_seq]
-                                                                      InputAdapter
-                                                                        BroadcastExchange #5
-                                                                          WholeStageCodegen (1)
-                                                                            Project [hd_demo_sk]
-                                                                              Filter [hd_buy_potential,hd_demo_sk]
-                                                                                ColumnarToRow
+                                                                                                      BroadcastExchange #6
+                                                                                                        WholeStageCodegen (1)
+                                                                                                          Project [d_date_sk,d_date,d_week_seq]
+                                                                                                            Filter [d_year,d_date_sk,d_week_seq,d_date]
+                                                                                                              ColumnarToRow
+                                                                                                                InputAdapter
+                                                                                                                  Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year]
+                                                                                                    Filter [d_week_seq,d_date_sk]
+                                                                                                      ColumnarToRow
+                                                                                                        InputAdapter
+                                                                                                          Scan parquet default.date_dim [d_date_sk,d_week_seq]
                                                                                   InputAdapter
-                                                                                    Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential]
-                                                                  InputAdapter
-                                                                    BroadcastExchange #6
-                                                                      WholeStageCodegen (2)
-                                                                        Project [cd_demo_sk]
-                                                                          Filter [cd_marital_status,cd_demo_sk]
-                                                                            ColumnarToRow
+                                                                                    BroadcastExchange #7
+                                                                                      WholeStageCodegen (1)
+                                                                                        Project [hd_demo_sk]
+                                                                                          Filter [hd_buy_potential,hd_demo_sk]
+                                                                                            ColumnarToRow
+                                                                                              InputAdapter
+                                                                                                Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential]
                                                                               InputAdapter
-                                                                                Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status]
-                                                              InputAdapter
-                                                                BroadcastExchange #7
-                                                                  WholeStageCodegen (3)
-                                                                    Filter [d_date,d_date_sk]
-                                                                      ColumnarToRow
-                                                                        InputAdapter
-                                                                          Scan parquet default.date_dim [d_date_sk,d_date]
-                                              InputAdapter
-                                                WholeStageCodegen (7)
-                                                  Sort [i_item_sk]
-                                                    InputAdapter
-                                                      Exchange [i_item_sk] #8
-                                                        WholeStageCodegen (6)
-                                                          Filter [i_item_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.item [i_item_sk,i_item_desc]
-                                          InputAdapter
-                                            ReusedExchange [d_date_sk,d_date,d_week_seq,d_date_sk] #3
-                                InputAdapter
-                                  WholeStageCodegen (13)
-                                    Sort [inv_item_sk,inv_date_sk]
+                                                                                BroadcastExchange #8
+                                                                                  WholeStageCodegen (2)
+                                                                                    Project [cd_demo_sk]
+                                                                                      Filter [cd_marital_status,cd_demo_sk]
+                                                                                        ColumnarToRow
+                                                                                          InputAdapter
+                                                                                            Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status]
+                                                                          InputAdapter
+                                                                            BroadcastExchange #9
+                                                                              WholeStageCodegen (3)
+                                                                                Filter [d_date,d_date_sk]
+                                                                                  ColumnarToRow
+                                                                                    InputAdapter
+                                                                                      Scan parquet default.date_dim [d_date_sk,d_date]
+                                                          InputAdapter
+                                                            WholeStageCodegen (7)
+                                                              Sort [i_item_sk]
+                                                                InputAdapter
+                                                                  Exchange [i_item_sk] #10
+                                                                    WholeStageCodegen (6)
+                                                                      Filter [i_item_sk]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.item [i_item_sk,i_item_desc]
+                                                      InputAdapter
+                                                        ReusedExchange [d_date_sk,d_date,d_week_seq,d_date_sk] #5
                                       InputAdapter
-                                        Exchange [inv_item_sk] #9
-                                          WholeStageCodegen (12)
-                                            Project [inv_item_sk,inv_quantity_on_hand,inv_date_sk,w_warehouse_name]
-                                              BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk]
-                                                Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk]
-                                                  ColumnarToRow
-                                                    InputAdapter
-                                                      Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
-                                                InputAdapter
-                                                  BroadcastExchange #10
-                                                    WholeStageCodegen (11)
-                                                      Filter [w_warehouse_sk]
+                                        WholeStageCodegen (14)
+                                          Sort [inv_item_sk,inv_date_sk]
+                                            InputAdapter
+                                              Exchange [inv_item_sk,inv_date_sk] #11
+                                                WholeStageCodegen (13)
+                                                  Project [inv_item_sk,inv_quantity_on_hand,inv_date_sk,w_warehouse_name]
+                                                    BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk]
+                                                      Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
-                            InputAdapter
-                              BroadcastExchange #11
-                                WholeStageCodegen (14)
-                                  Filter [p_promo_sk]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.promotion [p_promo_sk]
+                                                            Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                                      InputAdapter
+                                                        BroadcastExchange #12
+                                                          WholeStageCodegen (12)
+                                                            Filter [w_warehouse_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
+                                  InputAdapter
+                                    BroadcastExchange #13
+                                      WholeStageCodegen (15)
+                                        Filter [p_promo_sk]
+                                          ColumnarToRow
+                                            InputAdapter
+                                              Scan parquet default.promotion [p_promo_sk]
                   InputAdapter
-                    WholeStageCodegen (17)
+                    WholeStageCodegen (19)
                       Sort [cr_item_sk,cr_order_number]
                         InputAdapter
-                          Exchange [cr_item_sk] #12
-                            WholeStageCodegen (16)
+                          Exchange [cr_item_sk,cr_order_number] #14
+                            WholeStageCodegen (18)
                               Project [cr_item_sk,cr_order_number]
                                 Filter [cr_item_sk,cr_order_number]
                                   ColumnarToRow
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 3bda5625471b3..383b84dc0d8f1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -706,14 +706,14 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
     outputPlan match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
              SortExec(_, _,
-               DummySparkPlan(_, _, HashPartitioning(leftPartitioningExpressions, _), _, _), _),
+               ShuffleExchangeExec(HashPartitioning(leftPartitioningExpressions, _), _, _), _),
              SortExec(_, _,
                ShuffleExchangeExec(HashPartitioning(rightPartitioningExpressions, _),
                _, _), _), _) =>
         assert(leftKeys === smjExec.leftKeys)
         assert(rightKeys === smjExec.rightKeys)
-        assert(leftPartitioningExpressions == Seq(exprA, exprB, exprA))
-        assert(rightPartitioningExpressions == Seq(exprA, exprC, exprA))
+        assert(leftKeys === leftPartitioningExpressions)
+        assert(rightKeys === rightPartitioningExpressions)
       case _ => fail(outputPlan.toString)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
index 046ff78ce9bd3..db99557466d95 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
@@ -138,6 +138,14 @@ class EnsureRequirementsSuite extends SharedSparkSession {
     }
   }
 
+  private def applyEnsureRequirementsWithSubsetKeys(plan: SparkPlan): SparkPlan = {
+    var res: SparkPlan = null
+    withSQLConf(SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION.key -> "false") {
+      res = EnsureRequirements.apply(plan)
+    }
+    res
+  }
+
   test("Successful compatibility check with HashShuffleSpec") {
     val plan1 = DummySparkPlan(
       outputPartitioning = HashPartitioning(exprA :: Nil, 5))
@@ -155,10 +163,14 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       case other => fail(other.toString)
     }
 
-    // should also work if both partition keys are subset of their corresponding cluster keys
     smjExec = SortMergeJoinExec(
       exprA :: exprB :: Nil, exprB :: exprC :: Nil, Inner, None, plan1, plan2)
-    EnsureRequirements.apply(smjExec) match {
+    // By default we can't eliminate shuffles if the partitions keys are subset of join keys.
+    assert(EnsureRequirements.apply(smjExec)
+      .collect { case s: ShuffleExchangeLike => s }.length == 2)
+    // with the config set, it should also work if both partition keys are subset of their
+    // corresponding cluster keys
+    applyEnsureRequirementsWithSubsetKeys(smjExec) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
         SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _),
         SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _), _) =>
@@ -169,7 +181,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
 
     smjExec = SortMergeJoinExec(
       exprB :: exprA :: Nil, exprC :: exprB :: Nil, Inner, None, plan1, plan2)
-    EnsureRequirements.apply(smjExec) match {
+    applyEnsureRequirementsWithSubsetKeys(smjExec) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
         SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _),
         SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _), _) =>
@@ -186,7 +198,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       outputPartitioning = HashPartitioning(exprA :: exprC :: Nil, 5))
     var smjExec = SortMergeJoinExec(
       exprA :: exprB :: exprB :: Nil, exprA :: exprC :: exprC :: Nil, Inner, None, plan1, plan2)
-    EnsureRequirements.apply(smjExec) match {
+    applyEnsureRequirementsWithSubsetKeys(smjExec) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
       SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _),
       SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _), _) =>
@@ -201,7 +213,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       outputPartitioning = HashPartitioning(exprA :: exprC :: exprA :: Nil, 5))
     smjExec = SortMergeJoinExec(
       exprA :: exprB :: exprB :: Nil, exprA :: exprC :: exprC :: Nil, Inner, None, plan1, plan2)
-    EnsureRequirements.apply(smjExec) match {
+    applyEnsureRequirementsWithSubsetKeys(smjExec) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
       SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _),
       SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _), _) =>
@@ -216,7 +228,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       outputPartitioning = HashPartitioning(exprA :: exprC :: exprA :: Nil, 5))
     smjExec = SortMergeJoinExec(
       exprA :: exprB :: exprB :: Nil, exprA :: exprC :: exprD :: Nil, Inner, None, plan1, plan2)
-    EnsureRequirements.apply(smjExec) match {
+    applyEnsureRequirementsWithSubsetKeys(smjExec) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
       SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _),
       SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _), _) =>
@@ -231,7 +243,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       outputPartitioning = HashPartitioning(exprA :: exprC :: Nil, 5))
     smjExec = SortMergeJoinExec(
       exprA :: exprB :: exprB :: Nil, exprA :: exprC :: exprC :: Nil, Inner, None, plan1, plan2)
-    EnsureRequirements.apply(smjExec) match {
+    applyEnsureRequirementsWithSubsetKeys(smjExec) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
       SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _),
       SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _), _) =>
@@ -249,7 +261,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
         outputPartitioning = HashPartitioning(exprD :: Nil, 5))
       var smjExec = SortMergeJoinExec(
         exprA :: exprB :: Nil, exprC :: exprD :: Nil, Inner, None, plan1, plan2)
-      EnsureRequirements.apply(smjExec) match {
+      applyEnsureRequirementsWithSubsetKeys(smjExec) match {
         case SortMergeJoinExec(leftKeys, rightKeys, _, _,
         SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _),
         SortExec(_, _, ShuffleExchangeExec(p: HashPartitioning, _, _), _), _) =>
@@ -266,7 +278,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
         outputPartitioning = HashPartitioning(exprD :: Nil, 10))
       smjExec = SortMergeJoinExec(
         exprA :: exprB :: Nil, exprC :: exprD :: Nil, Inner, None, plan1, plan2)
-      EnsureRequirements.apply(smjExec) match {
+      applyEnsureRequirementsWithSubsetKeys(smjExec) match {
         case SortMergeJoinExec(leftKeys, rightKeys, _, _,
         SortExec(_, _, ShuffleExchangeExec(p: HashPartitioning, _, _), _),
         SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _), _) =>
@@ -283,7 +295,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
         outputPartitioning = HashPartitioning(exprD :: Nil, 5))
       smjExec = SortMergeJoinExec(
         exprA :: exprB :: Nil, exprC :: exprD :: Nil, Inner, None, plan1, plan2)
-      EnsureRequirements.apply(smjExec) match {
+      applyEnsureRequirementsWithSubsetKeys(smjExec) match {
         case SortMergeJoinExec(leftKeys, rightKeys, _, _,
         SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _),
         SortExec(_, _, ShuffleExchangeExec(p: HashPartitioning, _, _), _), _) =>
@@ -292,8 +304,6 @@ class EnsureRequirementsSuite extends SharedSparkSession {
           assert(p.expressions == Seq(exprC))
         case other => fail(other.toString)
       }
-
-
     }
   }
 
@@ -304,7 +314,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       outputPartitioning = HashPartitioning(exprA :: exprC :: exprB :: Nil, 5))
     var smjExec = SortMergeJoinExec(
       exprA :: exprB :: exprB :: Nil, exprA :: exprC :: exprC :: Nil, Inner, None, plan1, plan2)
-    EnsureRequirements.apply(smjExec) match {
+    applyEnsureRequirementsWithSubsetKeys(smjExec) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
       SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _),
       SortExec(_, _, ShuffleExchangeExec(p: HashPartitioning, _, _), _), _) =>
@@ -320,7 +330,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       outputPartitioning = HashPartitioning(exprA :: exprC :: exprB :: Nil, 5))
     smjExec = SortMergeJoinExec(
       exprA :: exprB :: exprB :: Nil, exprA :: exprC :: exprD :: Nil, Inner, None, plan1, plan2)
-    EnsureRequirements.apply(smjExec) match {
+    applyEnsureRequirementsWithSubsetKeys(smjExec) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
       SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _),
       SortExec(_, _, ShuffleExchangeExec(p: HashPartitioning, _, _), _), _) =>
@@ -403,13 +413,26 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       }
 
       // HashPartitioning(1) <-> RangePartitioning(10)
-      // Only RHS should be shuffled and be converted to HashPartitioning(1) <-> HashPartitioning(1)
+      // If the conf is not set, both sides should be shuffled and be converted to
+      // HashPartitioning(5) <-> HashPartitioning(5)
+      // If the conf is set, only RHS should be shuffled and be converted to
+      // HashPartitioning(1) <-> HashPartitioning(1)
       plan1 = DummySparkPlan(outputPartitioning = HashPartitioning(Seq(exprA), 1))
       plan2 = DummySparkPlan(outputPartitioning = RangePartitioning(
         Seq(SortOrder.apply(exprC, Ascending, sameOrderExpressions = Seq.empty)), 10))
       smjExec = SortMergeJoinExec(
         exprA :: exprB :: Nil, exprC :: exprD :: Nil, Inner, None, plan1, plan2)
       EnsureRequirements.apply(smjExec) match {
+        case SortMergeJoinExec(_, _, _, _,
+        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _),
+        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
+          assert(left.numPartitions == 5)
+          assert(left.expressions == Seq(exprA, exprB))
+          assert(right.numPartitions == 5)
+          assert(right.expressions == Seq(exprC, exprD))
+        case other => fail(other.toString)
+      }
+      applyEnsureRequirementsWithSubsetKeys(smjExec) match {
         case SortMergeJoinExec(_, _, _, _,
         SortExec(_, _, DummySparkPlan(_, _, left: HashPartitioning, _, _), _),
         SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
@@ -446,7 +469,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       smjExec = SortMergeJoinExec(
         exprA :: exprB :: exprC :: exprD :: Nil, exprA :: exprB :: exprC :: exprD :: Nil,
         Inner, None, plan1, plan2)
-      EnsureRequirements.apply(smjExec) match {
+      applyEnsureRequirementsWithSubsetKeys(smjExec) match {
         case SortMergeJoinExec(_, _, _, _,
         SortExec(_, _, DummySparkPlan(_, _, left: PartitioningCollection, _, _), _),
         SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
@@ -463,7 +486,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       smjExec = SortMergeJoinExec(
         exprA :: exprB :: exprC :: exprD :: Nil, exprA :: exprB :: exprC :: exprD :: Nil,
         Inner, None, plan1, plan2)
-      EnsureRequirements.apply(smjExec) match {
+      applyEnsureRequirementsWithSubsetKeys(smjExec) match {
         case SortMergeJoinExec(_, _, _, _,
         SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _),
         SortExec(_, _, DummySparkPlan(_, _, right: PartitioningCollection, _, _), _), _) =>
@@ -482,17 +505,17 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       // HashPartitioning(5) <-> HashPartitioning(5)
       // No shuffle should be inserted
       var plan1: SparkPlan = DummySparkPlan(
-        outputPartitioning = HashPartitioning(exprA :: Nil, 5))
+        outputPartitioning = HashPartitioning(exprA :: exprB :: Nil, 5))
       var plan2: SparkPlan = DummySparkPlan(
-        outputPartitioning = HashPartitioning(exprC :: Nil, 5))
+        outputPartitioning = HashPartitioning(exprC :: exprD :: Nil, 5))
       var smjExec = SortMergeJoinExec(
         exprA :: exprB :: Nil, exprC :: exprD :: Nil, Inner, None, plan1, plan2)
       EnsureRequirements.apply(smjExec) match {
         case SortMergeJoinExec(_, _, _, _,
         SortExec(_, _, DummySparkPlan(_, _, left: HashPartitioning, _, _), _),
         SortExec(_, _, DummySparkPlan(_, _, right: HashPartitioning, _, _), _), _) =>
-          assert(left.expressions === Seq(exprA))
-          assert(right.expressions === Seq(exprC))
+          assert(left.expressions === Seq(exprA, exprB))
+          assert(right.expressions === Seq(exprC, exprD))
         case other => fail(other.toString)
       }
 
@@ -521,15 +544,15 @@ class EnsureRequirementsSuite extends SharedSparkSession {
         outputPartitioning = RangePartitioning(
           Seq(SortOrder.apply(exprA, Ascending, sameOrderExpressions = Seq.empty)), 10))
       plan2 = DummySparkPlan(
-        outputPartitioning = HashPartitioning(exprD :: Nil, 5))
+        outputPartitioning = HashPartitioning(exprC :: exprD :: Nil, 5))
       smjExec = SortMergeJoinExec(
         exprA :: exprB :: Nil, exprC :: exprD :: Nil, Inner, None, plan1, plan2)
       EnsureRequirements.apply(smjExec) match {
         case SortMergeJoinExec(_, _, _, _,
         SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _),
         SortExec(_, _, DummySparkPlan(_, _, right: HashPartitioning, _, _), _), _) =>
-          assert(left.expressions === Seq(exprB))
-          assert(right.expressions === Seq(exprD))
+          assert(left.expressions === Seq(exprA, exprB))
+          assert(right.expressions === Seq(exprC, exprD))
           assert(left.numPartitions == 5)
           assert(right.numPartitions == 5)
         case other => fail(other.toString)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index be9d1b0e179fe..d90c8732ea287 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -773,8 +773,7 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
 
     // join predicates is a super set of child's partitioning columns
     val bucketedTableTestSpec1 =
-      BucketedTableTestSpec(Some(BucketSpec(8, Seq("i", "j"), Seq("i", "j"))),
-        numPartitions = 1, expectedShuffle = false)
+      BucketedTableTestSpec(Some(BucketSpec(8, Seq("i", "j"), Seq("i", "j"))), numPartitions = 1)
     testBucketing(
       bucketedTableTestSpecLeft = bucketedTableTestSpec1,
       bucketedTableTestSpecRight = bucketedTableTestSpec1,

From 99805558fc80743747f32c7008cb7cc99c1cda01 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Thu, 13 Jan 2022 09:07:31 -0800
Subject: [PATCH 011/513] [SPARK-37864][SQL] Support vectorized read boolean
 values use RLE encoding with Parquet DataPage V2

### What changes were proposed in this pull request?
Parquet v2 data page write Boolean Values use RLE encoding, when read v2 boolean type values it will throw exceptions as follows now:

```java
Caused by: java.lang.UnsupportedOperationException: Unsupported encoding: RLE
    at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.getValuesReader(VectorizedColumnReader.java:305) ~[classes/:?]
    at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.initDataReader(VectorizedColumnReader.java:277) ~[classes/:?]
    at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.readPageV2(VectorizedColumnReader.java:344) ~[classes/:?]
    at
```

This PR extends the `readBooleans` and `skipBooleans` of `VectorizedRleValuesReader` to ensure that the above scenario can pass.

### Why are the changes needed?
Support Parquet v2 data page RLE encoding  for the vectorized read path

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Add new test case

Closes #35163 from LuciferYang/SPARK-37864.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Chao Sun <sunchao@apple.com>
---
 .../parquet/VectorizedColumnReader.java       | 11 ++++
 .../parquet/VectorizedRleValuesReader.java    | 58 ++++++++++++++-----
 .../parquet/ParquetEncodingSuite.scala        | 36 ++++++++++++
 3 files changed, 89 insertions(+), 16 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java
index 0a7b929dafea3..57a307b1b7b6b 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java
@@ -39,6 +39,7 @@
 import org.apache.spark.sql.types.Decimal;
 
 import static org.apache.parquet.column.ValuesType.REPETITION_LEVEL;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN;
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
 
 /**
@@ -292,6 +293,16 @@ private ValuesReader getValuesReader(Encoding encoding) {
         return new VectorizedDeltaByteArrayReader();
       case DELTA_BINARY_PACKED:
         return new VectorizedDeltaBinaryPackedReader();
+      case RLE:
+        PrimitiveType.PrimitiveTypeName typeName =
+          this.descriptor.getPrimitiveType().getPrimitiveTypeName();
+        // RLE encoding only supports boolean type `Values`, and  `bitwidth` is always 1.
+        if (typeName == BOOLEAN) {
+          return new VectorizedRleValuesReader(1);
+        } else {
+          throw new UnsupportedOperationException(
+            "RLE encoding is not supported for values of type: " + typeName);
+        }
       default:
         throw new UnsupportedOperationException("Unsupported encoding: " + encoding);
     }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java
index cd97fb6c3cd55..bd7cbc7e17188 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java
@@ -40,6 +40,7 @@
  * This encoding is used in multiple places:
  *  - Definition/Repetition levels
  *  - Dictionary ids.
+ *  - Boolean type values of Parquet DataPageV2
  */
 public final class VectorizedRleValuesReader extends ValuesReader
     implements VectorizedValuesReader {
@@ -369,7 +370,25 @@ public void readBinary(int total, WritableColumnVector c, int rowId) {
 
   @Override
   public void readBooleans(int total, WritableColumnVector c, int rowId) {
-    throw new UnsupportedOperationException("only readInts is valid.");
+    int left = total;
+    while (left > 0) {
+      if (this.currentCount == 0) this.readNextGroup();
+      int n = Math.min(left, this.currentCount);
+      switch (mode) {
+        case RLE:
+          c.putBooleans(rowId, n, currentValue != 0);
+          break;
+        case PACKED:
+          for (int i = 0; i < n; ++i) {
+            // For Boolean types, `currentBuffer[currentBufferIdx++]` can only be 0 or 1
+            c.putByte(rowId + i, (byte) currentBuffer[currentBufferIdx++]);
+          }
+          break;
+      }
+      rowId += n;
+      left -= n;
+      currentCount -= n;
+    }
   }
 
   @Override
@@ -389,25 +408,12 @@ public Binary readBinary(int len) {
 
   @Override
   public void skipIntegers(int total) {
-    int left = total;
-    while (left > 0) {
-      if (this.currentCount == 0) this.readNextGroup();
-      int n = Math.min(left, this.currentCount);
-      switch (mode) {
-        case RLE:
-          break;
-        case PACKED:
-          currentBufferIdx += n;
-          break;
-      }
-      currentCount -= n;
-      left -= n;
-    }
+    skipValues(total);
   }
 
   @Override
   public void skipBooleans(int total) {
-    throw new UnsupportedOperationException("only skipIntegers is valid");
+    skipValues(total);
   }
 
   @Override
@@ -533,4 +539,24 @@ private void readNextGroup() {
       throw new ParquetDecodingException("Failed to read from input stream", e);
     }
   }
+
+  /**
+   * Skip `n` values from the current reader.
+   */
+  private void skipValues(int n) {
+    int left = n;
+    while (left > 0) {
+      if (this.currentCount == 0) this.readNextGroup();
+      int num = Math.min(left, this.currentCount);
+      switch (mode) {
+        case RLE:
+          break;
+        case PACKED:
+          currentBufferIdx += num;
+          break;
+      }
+      currentCount -= num;
+      left -= num;
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala
index f545e88517700..746d9c6358083 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala
@@ -182,4 +182,40 @@ class ParquetEncodingSuite extends ParquetCompatibilityTest with SharedSparkSess
       }
     }
   }
+
+  test("parquet v2 pages - rle encoding for boolean value columns") {
+    val extraOptions = Map[String, String](
+      ParquetOutputFormat.WRITER_VERSION -> ParquetProperties.WriterVersion.PARQUET_2_0.toString
+    )
+
+    val hadoopConf = spark.sessionState.newHadoopConfWithOptions(extraOptions)
+    withSQLConf(
+      SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
+      ParquetOutputFormat.JOB_SUMMARY_LEVEL -> "ALL") {
+      withTempPath { dir =>
+        val path = s"${dir.getCanonicalPath}/test.parquet"
+        val size = 10000
+        val data = (1 to size).map { i => (true, false, i % 2 == 1) }
+
+        spark.createDataFrame(data)
+          .write.options(extraOptions).mode("overwrite").parquet(path)
+
+        val blockMetadata = readFooter(new Path(path), hadoopConf).getBlocks.asScala.head
+        val columnChunkMetadataList = blockMetadata.getColumns.asScala
+
+        // Verify that indeed rle encoding is used for each column
+        assert(columnChunkMetadataList.length === 3)
+        assert(columnChunkMetadataList.head.getEncodings.contains(Encoding.RLE))
+        assert(columnChunkMetadataList(1).getEncodings.contains(Encoding.RLE))
+        assert(columnChunkMetadataList(2).getEncodings.contains(Encoding.RLE))
+
+        val actual = spark.read.parquet(path).collect()
+        assert(actual.length == size)
+        assert(actual.map(_.getBoolean(0)).forall(_ == true))
+        assert(actual.map(_.getBoolean(1)).forall(_ == false))
+        val excepted = (1 to size).map { i => i % 2 == 1 }
+        assert(actual.map(_.getBoolean(2)).sameElements(excepted))
+      }
+    }
+  }
 }

From f7dd37c40fa1ef5ed813b044080c07a19589e3d1 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 13 Jan 2022 14:06:43 -0800
Subject: [PATCH 012/513] [SPARK-37900][CORE] Use
 `SparkMasterRegex.KUBERNETES_REGEX` in `SecurityManager`

### What changes were proposed in this pull request?

This PR removes `SecurityManager.k8sRegex` and use `SparkMasterRegex.KUBERNETES_REGEX` in `SecurityManager`.

### Why are the changes needed?

`SparkMasterRegex.KUBERNETES_REGEX` is more accurate and official than the existing `val k8sRegex = "k8s.*".r` pattern.

https://github.com/apache/spark/blob/99805558fc80743747f32c7008cb7cc99c1cda01/core/src/main/scala/org/apache/spark/SparkContext.scala#L3063

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs with the existing test coverage.

Closes #35195 from dongjoon-hyun/SPARK-37900.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 core/src/main/scala/org/apache/spark/SecurityManager.scala | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala b/core/src/main/scala/org/apache/spark/SecurityManager.scala
index d061627bea69c..f11176cc23310 100644
--- a/core/src/main/scala/org/apache/spark/SecurityManager.scala
+++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala
@@ -324,7 +324,7 @@ private[spark] class SecurityManager(
       case "yarn" | "local" | LOCAL_N_REGEX(_) | LOCAL_N_FAILURES_REGEX(_, _) =>
         true
 
-      case k8sRegex() =>
+      case KUBERNETES_REGEX(_) =>
         // Don't propagate the secret through the user's credentials in kubernetes. That conflicts
         // with the way k8s handles propagation of delegation tokens.
         false
@@ -354,7 +354,7 @@ private[spark] class SecurityManager(
   private def secretKeyFromFile(): Option[String] = {
     sparkConf.get(authSecretFileConf).flatMap { secretFilePath =>
       sparkConf.getOption(SparkLauncher.SPARK_MASTER).map {
-        case k8sRegex() =>
+        case SparkMasterRegex.KUBERNETES_REGEX(_) =>
           val secretFile = new File(secretFilePath)
           require(secretFile.isFile, s"No file found containing the secret key at $secretFilePath.")
           val base64Key = Base64.getEncoder.encodeToString(Files.readAllBytes(secretFile.toPath))
@@ -391,7 +391,6 @@ private[spark] class SecurityManager(
 
 private[spark] object SecurityManager {
 
-  val k8sRegex = "k8s.*".r
   val SPARK_AUTH_CONF = NETWORK_AUTH_ENABLED.key
   val SPARK_AUTH_SECRET_CONF = AUTH_SECRET.key
   // This is used to set auth secret to an executor's env variable. It should have the same

From 1431a4aa74cf69b3ee607e313dece6fed8390de6 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 13 Jan 2022 15:28:43 -0800
Subject: [PATCH 013/513] [SPARK-37887][CORE] Fix the check of repl log level

### What changes were proposed in this pull request?

This patch fixes the check of repl's log level. So we can correctly know if the repl class is set with log level or not.

### Why are the changes needed?

Same as the check in `SparkShellLoggingFilter`, `getLevel` cannot be used anymore to check if the log level is set or not for a logger in log4j2.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manual verified locally.

Closes #35198 from viirya/SPARK-37887.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/internal/Logging.scala   | 28 +++++++++++--------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/Logging.scala b/core/src/main/scala/org/apache/spark/internal/Logging.scala
index bdc5139fd918e..d483a93464c06 100644
--- a/core/src/main/scala/org/apache/spark/internal/Logging.scala
+++ b/core/src/main/scala/org/apache/spark/internal/Logging.scala
@@ -154,7 +154,11 @@ trait Logging {
         // Use the repl's main class to define the default log level when running the shell,
         // overriding the root logger's config if they're different.
         val replLogger = LogManager.getLogger(logName).asInstanceOf[Log4jLogger]
-        val replLevel = Option(replLogger.getLevel()).getOrElse(Level.WARN)
+        val replLevel = if (Logging.loggerWithCustomConfig(replLogger)) {
+          replLogger.getLevel()
+        } else {
+          Level.WARN
+        }
         // Update the consoleAppender threshold to replLevel
         if (replLevel != rootLogger.getLevel()) {
           if (!silent) {
@@ -229,6 +233,17 @@ private[spark] object Logging {
     "org.apache.logging.slf4j.Log4jLoggerFactory".equals(binderClass)
   }
 
+  // Return true if the logger has custom configuration. It depends on:
+  // 1. If the logger isn't attached with root logger config (i.e., with custom configuration), or
+  // 2. the logger level is different to root config level (i.e., it is changed programmatically).
+  //
+  // Note that if a logger is programmatically changed log level but set to same level
+  // as root config level, we cannot tell if it is with custom configuration.
+  private def loggerWithCustomConfig(logger: Log4jLogger): Boolean = {
+    val rootConfig = LogManager.getRootLogger.asInstanceOf[Log4jLogger].get()
+    (logger.get() ne rootConfig) || (logger.getLevel != rootConfig.getLevel())
+  }
+
   /**
    * Return true if log4j2 is initialized by default configuration which has one
    * appender with error level. See `org.apache.logging.log4j.core.config.DefaultConfiguration`.
@@ -267,17 +282,6 @@ private[spark] object Logging {
       }
     }
 
-    // Return true if the logger has custom configuration. It depends on:
-    // 1. If the logger isn't attached with root logger config (i.e., with custom configuration), or
-    // 2. the logger level is different to root config level (i.e., it is changed programmatically).
-    //
-    // Note that if a logger is programmatically changed log level but set to same level
-    // as root config level, we cannot tell if it is with custom configuration.
-    private def loggerWithCustomConfig(logger: Log4jLogger): Boolean = {
-      val rootConfig = LogManager.getRootLogger.asInstanceOf[Log4jLogger].get()
-      (logger.get() ne rootConfig) || (logger.getLevel != rootConfig.getLevel())
-    }
-
     override def getState: LifeCycle.State = status
 
     override def initialize(): Unit = {

From b092321077400e4344f8ea11592600f5e759041b Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Fri, 14 Jan 2022 08:42:49 +0900
Subject: [PATCH 014/513] [SPARK-37879][INFRA] Show test report in GitHub
 Actions builds from PRs

### What changes were proposed in this pull request?

This PR is a retry of https://github.com/apache/spark/pull/35179. This PR does the same thing - replacing Actions view to Check run view for `See test results` link.

The main difference with the PR https://github.com/apache/spark/pull/35179 is that we now keep the Actions run id as is as metadata so this Actions run id can be used to update the status of tests in PRs at Apache Spark:

https://github.com/apache/spark/blob/85efc85f9aa93b3fac9e591c96efa38d4414adf8/.github/workflows/update_build_status.yml#L63-L74

Now this PR shouldn't affect [update_build_status.yml](https://github.com/apache/spark/blob/master/.github/workflows/update_build_status.yml) which was the main reason of a followup and revert.

### Why are the changes needed?

For developers to see the test report, and they can easily detect which test is failed.

### Does this PR introduce _any_ user-facing change?

No, dev-only

### How was this patch tested?

Tested in https://github.com/HyukjinKwon/spark/pull/51.

Closes #35193 from HyukjinKwon/SPARK-37879-retry.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .github/workflows/notify_test_workflow.yml | 31 +++++++++++++++++-----
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/notify_test_workflow.yml b/.github/workflows/notify_test_workflow.yml
index 17d75938a802c..04e7ab8309025 100644
--- a/.github/workflows/notify_test_workflow.yml
+++ b/.github/workflows/notify_test_workflow.yml
@@ -39,6 +39,7 @@ jobs:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           script: |
             const endpoint = 'GET /repos/:owner/:repo/actions/workflows/:id/runs?&branch=:branch'
+            const check_run_endpoint = 'GET /repos/:owner/:repo/commits/:ref/check-runs'
 
             // TODO: Should use pull_request.user and pull_request.user.repos_url?
             // If a different person creates a commit to another forked repo,
@@ -49,6 +50,11 @@ jobs:
               id: 'build_and_test.yml',
               branch: context.payload.pull_request.head.ref,
             }
+            const check_run_params = {
+              owner: context.payload.pull_request.head.repo.owner.login,
+              repo: context.payload.pull_request.head.repo.name,
+              ref: context.payload.pull_request.head.ref,
+            }
 
             console.log('Ref: ' + context.payload.pull_request.head.ref)
             console.log('SHA: ' + context.payload.pull_request.head.sha)
@@ -100,16 +106,29 @@ jobs:
                 }
               })
             } else {
-              const runID = runs.data.workflow_runs[0].id
+              const run_id = runs.data.workflow_runs[0].id
 
               if (runs.data.workflow_runs[0].head_sha != context.payload.pull_request.head.sha) {
                 throw new Error('There was a new unsynced commit pushed. Please retrigger the workflow.');
               }
 
-              const runUrl = 'https://github.com/'
+              // Here we get check run ID to provide Check run view instead of Actions view, see also SPARK-37879.
+              const check_runs = await github.request(check_run_endpoint, check_run_params)
+              const check_run_head = check_runs.data.check_runs.filter(r => r.name === "Configure jobs")[0]
+
+              if (check_run_head.head_sha != context.payload.pull_request.head.sha) {
+                throw new Error('There was a new unsynced commit pushed. Please retrigger the workflow.');
+              }
+
+              const check_run_url = 'https://github.com/'
+                + context.payload.pull_request.head.repo.full_name
+                + '/runs/'
+                + check_run_head.id
+
+              const actions_url = 'https://github.com/'
                 + context.payload.pull_request.head.repo.full_name
                 + '/actions/runs/'
-                + runID
+                + run_id
 
               github.checks.create({
                 owner: context.repo.owner,
@@ -119,13 +138,13 @@ jobs:
                 status: status,
                 output: {
                   title: 'Test results',
-                  summary: '[See test results](' + runUrl + ')',
+                  summary: '[See test results](' + check_run_url + ')',
                   text: JSON.stringify({
                     owner: context.payload.pull_request.head.repo.owner.login,
                     repo: context.payload.pull_request.head.repo.name,
-                    run_id: runID
+                    run_id: run_id
                   })
                 },
-                details_url: runUrl,
+                details_url: actions_url,
               })
             }

From db9807443dabaee8237a4748c99572c010ddb0c9 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Fri, 14 Jan 2022 08:52:18 +0900
Subject: [PATCH 015/513] [MINOR][PYTHON] Replace Iterable import from
 collections with collections.abc

### What changes were proposed in this pull request?

Replace

```python
from collections import Iterable
```

with

```python
from collections.abc import Iterable
```

### Why are the changes needed?

> Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3, and in 3.10 it will stop working.

In other places, like `pyspark.pandas.indexing`

https://github.com/apache/spark/blob/99805558fc80743747f32c7008cb7cc99c1cda01/python/pyspark/pandas/indexing.py#L22

we already import from `collections.abc`, but this one somehow passed under the radar.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing unit tests.

Closes #35197 from zero323/TYPING-ABC-ITERABLE.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/pandas/typedef/typehints.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/pandas/typedef/typehints.py b/python/pyspark/pandas/typedef/typehints.py
index eddd06d061def..6b3083a22f853 100644
--- a/python/pyspark/pandas/typedef/typehints.py
+++ b/python/pyspark/pandas/typedef/typehints.py
@@ -22,7 +22,7 @@
 import decimal
 import sys
 import typing
-from collections import Iterable
+from collections.abc import Iterable
 from distutils.version import LooseVersion
 from inspect import getfullargspec, isclass
 from typing import (

From 31d8489f3993f608ed2c8d39727b345ac71170b8 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Fri, 14 Jan 2022 10:14:58 +0900
Subject: [PATCH 016/513] [SPARK-37903][PYTHON] Replace string_typehints with
 get_type_hints

### What changes were proposed in this pull request?

Replaces `string_typehints` with `get_type_hints`.

### Why are the changes needed?

Currently we have a hacky way to resolve type hints written as strings, but we can use `get_type_hints` instead.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #35200 from ueshin/issues/SPARK-37903/string_typehints.

Authored-by: Takuya UESHIN <ueshin@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/pandas/datetimes.py            |  64 ++++------
 python/pyspark/pandas/frame.py                |  13 +-
 python/pyspark/pandas/indexes/datetimes.py    |   6 +-
 python/pyspark/pandas/indexes/timedelta.py    |   3 +-
 python/pyspark/pandas/namespace.py            |   5 +-
 python/pyspark/pandas/strings.py              | 116 ++++++------------
 .../pyspark/pandas/tests/test_categorical.py  |   4 +-
 python/pyspark/pandas/tests/test_typedef.py   |  12 --
 .../pandas/typedef/string_typehints.py        |  40 ------
 python/pyspark/pandas/typedef/typehints.py    |  19 +--
 10 files changed, 77 insertions(+), 205 deletions(-)
 delete mode 100644 python/pyspark/pandas/typedef/string_typehints.py

diff --git a/python/pyspark/pandas/datetimes.py b/python/pyspark/pandas/datetimes.py
index f52809d5abef7..d0b3f2ff3d749 100644
--- a/python/pyspark/pandas/datetimes.py
+++ b/python/pyspark/pandas/datetimes.py
@@ -18,17 +18,16 @@
 """
 Date/Time related functions on pandas-on-Spark Series
 """
-from typing import Any, Optional, Union, TYPE_CHECKING, no_type_check
+from typing import Any, Optional, Union, no_type_check
 
 import numpy as np
 import pandas as pd  # noqa: F401
 from pandas.tseries.offsets import DateOffset
+
+import pyspark.pandas as ps
 import pyspark.sql.functions as F
 from pyspark.sql.types import DateType, TimestampType, TimestampNTZType, LongType
 
-if TYPE_CHECKING:
-    import pyspark.pandas as ps
-
 
 class DatetimeMethods:
     """Date/Time methods for pandas-on-Spark Series"""
@@ -107,8 +106,7 @@ def microsecond(self) -> "ps.Series":
         The microseconds of the datetime.
         """
 
-        @no_type_check
-        def pandas_microsecond(s) -> "ps.Series[np.int64]":
+        def pandas_microsecond(s) -> ps.Series[np.int64]:  # type: ignore[no-untyped-def]
             return s.dt.microsecond
 
         return self._data.pandas_on_spark.transform_batch(pandas_microsecond)
@@ -167,8 +165,7 @@ def dayofweek(self) -> "ps.Series":
         dtype: int64
         """
 
-        @no_type_check
-        def pandas_dayofweek(s) -> "ps.Series[np.int64]":
+        def pandas_dayofweek(s) -> ps.Series[np.int64]:  # type: ignore[no-untyped-def]
             return s.dt.dayofweek
 
         return self._data.pandas_on_spark.transform_batch(pandas_dayofweek)
@@ -185,8 +182,7 @@ def dayofyear(self) -> "ps.Series":
         The ordinal day of the year.
         """
 
-        @no_type_check
-        def pandas_dayofyear(s) -> "ps.Series[np.int64]":
+        def pandas_dayofyear(s) -> ps.Series[np.int64]:  # type: ignore[no-untyped-def]
             return s.dt.dayofyear
 
         return self._data.pandas_on_spark.transform_batch(pandas_dayofyear)
@@ -197,8 +193,7 @@ def quarter(self) -> "ps.Series":
         The quarter of the date.
         """
 
-        @no_type_check
-        def pandas_quarter(s) -> "ps.Series[np.int64]":
+        def pandas_quarter(s) -> ps.Series[np.int64]:  # type: ignore[no-untyped-def]
             return s.dt.quarter
 
         return self._data.pandas_on_spark.transform_batch(pandas_quarter)
@@ -237,8 +232,7 @@ def is_month_start(self) -> "ps.Series":
         dtype: bool
         """
 
-        @no_type_check
-        def pandas_is_month_start(s) -> "ps.Series[bool]":
+        def pandas_is_month_start(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.dt.is_month_start
 
         return self._data.pandas_on_spark.transform_batch(pandas_is_month_start)
@@ -277,8 +271,7 @@ def is_month_end(self) -> "ps.Series":
         dtype: bool
         """
 
-        @no_type_check
-        def pandas_is_month_end(s) -> "ps.Series[bool]":
+        def pandas_is_month_end(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.dt.is_month_end
 
         return self._data.pandas_on_spark.transform_batch(pandas_is_month_end)
@@ -328,8 +321,7 @@ def is_quarter_start(self) -> "ps.Series":
         Name: dates, dtype: bool
         """
 
-        @no_type_check
-        def pandas_is_quarter_start(s) -> "ps.Series[bool]":
+        def pandas_is_quarter_start(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.dt.is_quarter_start
 
         return self._data.pandas_on_spark.transform_batch(pandas_is_quarter_start)
@@ -379,8 +371,7 @@ def is_quarter_end(self) -> "ps.Series":
         Name: dates, dtype: bool
         """
 
-        @no_type_check
-        def pandas_is_quarter_end(s) -> "ps.Series[bool]":
+        def pandas_is_quarter_end(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.dt.is_quarter_end
 
         return self._data.pandas_on_spark.transform_batch(pandas_is_quarter_end)
@@ -419,8 +410,7 @@ def is_year_start(self) -> "ps.Series":
         dtype: bool
         """
 
-        @no_type_check
-        def pandas_is_year_start(s) -> "ps.Series[bool]":
+        def pandas_is_year_start(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.dt.is_year_start
 
         return self._data.pandas_on_spark.transform_batch(pandas_is_year_start)
@@ -459,8 +449,7 @@ def is_year_end(self) -> "ps.Series":
         dtype: bool
         """
 
-        @no_type_check
-        def pandas_is_year_end(s) -> "ps.Series[bool]":
+        def pandas_is_year_end(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.dt.is_year_end
 
         return self._data.pandas_on_spark.transform_batch(pandas_is_year_end)
@@ -499,8 +488,7 @@ def is_leap_year(self) -> "ps.Series":
         dtype: bool
         """
 
-        @no_type_check
-        def pandas_is_leap_year(s) -> "ps.Series[bool]":
+        def pandas_is_leap_year(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.dt.is_leap_year
 
         return self._data.pandas_on_spark.transform_batch(pandas_is_leap_year)
@@ -511,8 +499,7 @@ def daysinmonth(self) -> "ps.Series":
         The number of days in the month.
         """
 
-        @no_type_check
-        def pandas_daysinmonth(s) -> "ps.Series[np.int64]":
+        def pandas_daysinmonth(s) -> ps.Series[np.int64]:  # type: ignore[no-untyped-def]
             return s.dt.daysinmonth
 
         return self._data.pandas_on_spark.transform_batch(pandas_daysinmonth)
@@ -574,8 +561,7 @@ def normalize(self) -> "ps.Series":
         dtype: datetime64[ns]
         """
 
-        @no_type_check
-        def pandas_normalize(s) -> "ps.Series[np.datetime64]":
+        def pandas_normalize(s) -> ps.Series[np.datetime64]:  # type: ignore[no-untyped-def]
             return s.dt.normalize()
 
         return self._data.pandas_on_spark.transform_batch(pandas_normalize)
@@ -623,8 +609,7 @@ def strftime(self, date_format: str) -> "ps.Series":
         dtype: object
         """
 
-        @no_type_check
-        def pandas_strftime(s) -> "ps.Series[str]":
+        def pandas_strftime(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.dt.strftime(date_format)
 
         return self._data.pandas_on_spark.transform_batch(pandas_strftime)
@@ -679,8 +664,7 @@ def round(self, freq: Union[str, DateOffset], *args: Any, **kwargs: Any) -> "ps.
         dtype: datetime64[ns]
         """
 
-        @no_type_check
-        def pandas_round(s) -> "ps.Series[np.datetime64]":
+        def pandas_round(s) -> ps.Series[np.datetime64]:  # type: ignore[no-untyped-def]
             return s.dt.round(freq, *args, **kwargs)
 
         return self._data.pandas_on_spark.transform_batch(pandas_round)
@@ -735,8 +719,7 @@ def floor(self, freq: Union[str, DateOffset], *args: Any, **kwargs: Any) -> "ps.
         dtype: datetime64[ns]
         """
 
-        @no_type_check
-        def pandas_floor(s) -> "ps.Series[np.datetime64]":
+        def pandas_floor(s) -> ps.Series[np.datetime64]:  # type: ignore[no-untyped-def]
             return s.dt.floor(freq, *args, **kwargs)
 
         return self._data.pandas_on_spark.transform_batch(pandas_floor)
@@ -791,8 +774,7 @@ def ceil(self, freq: Union[str, DateOffset], *args: Any, **kwargs: Any) -> "ps.S
         dtype: datetime64[ns]
         """
 
-        @no_type_check
-        def pandas_ceil(s) -> "ps.Series[np.datetime64]":
+        def pandas_ceil(s) -> ps.Series[np.datetime64]:  # type: ignore[no-untyped-def]
             return s.dt.ceil(freq, *args, **kwargs)
 
         return self._data.pandas_on_spark.transform_batch(pandas_ceil)
@@ -828,8 +810,7 @@ def month_name(self, locale: Optional[str] = None) -> "ps.Series":
         dtype: object
         """
 
-        @no_type_check
-        def pandas_month_name(s) -> "ps.Series[str]":
+        def pandas_month_name(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.dt.month_name(locale=locale)
 
         return self._data.pandas_on_spark.transform_batch(pandas_month_name)
@@ -865,8 +846,7 @@ def day_name(self, locale: Optional[str] = None) -> "ps.Series":
         dtype: object
         """
 
-        @no_type_check
-        def pandas_day_name(s) -> "ps.Series[str]":
+        def pandas_day_name(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.dt.day_name(locale=locale)
 
         return self._data.pandas_on_spark.transform_batch(pandas_day_name)
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index 0a11a0f15f80f..d400a3701b2f2 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -3028,8 +3028,9 @@ def between_time(
         psdf.index.name = verify_temp_column_name(psdf, "__index_name__")
         return_types = [psdf.index.dtype] + list(psdf.dtypes)
 
-        @no_type_check
-        def pandas_between_time(pdf) -> ps.DataFrame[return_types]:
+        def pandas_between_time(  # type: ignore[no-untyped-def]
+            pdf,
+        ) -> ps.DataFrame[return_types]:  # type: ignore[valid-type]
             return pdf.between_time(start_time, end_time, include_start, include_end).reset_index()
 
         # apply_batch will remove the index of the pandas-on-Spark DataFrame and attach a
@@ -3106,8 +3107,9 @@ def at_time(
         psdf.index.name = verify_temp_column_name(psdf, "__index_name__")
         return_types = [psdf.index.dtype] + list(psdf.dtypes)
 
-        @no_type_check
-        def pandas_at_time(pdf) -> ps.DataFrame[return_types]:
+        def pandas_at_time(  # type: ignore[no-untyped-def]
+            pdf,
+        ) -> ps.DataFrame[return_types]:  # type: ignore[valid-type]
             return pdf.at_time(time, asof, axis).reset_index()
 
         # apply_batch will remove the index of the pandas-on-Spark DataFrame and attach
@@ -11645,8 +11647,7 @@ def eval(self, expr: str, inplace: bool = False) -> Optional[DataFrameOrSeries]:
         # Since `eval_func` doesn't have a type hint, inferring the schema is always preformed
         # in the `apply_batch`. Hence, the variables `should_return_series`, `series_name`,
         # and `should_return_scalar` can be updated.
-        @no_type_check
-        def eval_func(pdf):
+        def eval_func(pdf):  # type: ignore[no-untyped-def]
             nonlocal should_return_series
             nonlocal series_name
             nonlocal should_return_scalar
diff --git a/python/pyspark/pandas/indexes/datetimes.py b/python/pyspark/pandas/indexes/datetimes.py
index abc1d8c35f5a4..e673af77a2219 100644
--- a/python/pyspark/pandas/indexes/datetimes.py
+++ b/python/pyspark/pandas/indexes/datetimes.py
@@ -682,8 +682,7 @@ def indexer_between_time(
         Int64Index([2], dtype='int64')
         """
 
-        @no_type_check
-        def pandas_between_time(pdf) -> ps.DataFrame[int]:
+        def pandas_between_time(pdf) -> ps.DataFrame[int]:  # type: ignore[no-untyped-def]
             return pdf.between_time(start_time, end_time, include_start, include_end)
 
         psdf = self.to_frame()[[]]
@@ -728,8 +727,7 @@ def indexer_at_time(self, time: Union[datetime.time, str], asof: bool = False) -
         if asof:
             raise NotImplementedError("'asof' argument is not supported")
 
-        @no_type_check
-        def pandas_at_time(pdf) -> ps.DataFrame[int]:
+        def pandas_at_time(pdf) -> ps.DataFrame[int]:  # type: ignore[no-untyped-def]
             return pdf.at_time(time, asof)
 
         psdf = self.to_frame()[[]]
diff --git a/python/pyspark/pandas/indexes/timedelta.py b/python/pyspark/pandas/indexes/timedelta.py
index 2888642375655..c45f36e277882 100644
--- a/python/pyspark/pandas/indexes/timedelta.py
+++ b/python/pyspark/pandas/indexes/timedelta.py
@@ -137,8 +137,7 @@ def days(self) -> Index:
         Number of days for each element.
         """
 
-        @no_type_check
-        def pandas_days(x) -> int:
+        def pandas_days(x) -> int:  # type: ignore[no-untyped-def]
             return x.days
 
         return Index(self.to_series().transform(pandas_days))
diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py
index ae0018ca3d385..5cf639a947de7 100644
--- a/python/pyspark/pandas/namespace.py
+++ b/python/pyspark/pandas/namespace.py
@@ -816,8 +816,9 @@ def read_parquet(
     if index_col is None and pandas_metadata:
         # Try to read pandas metadata
 
-        @no_type_check
-        @pandas_udf("index_col array<string>, index_names array<string>")
+        @pandas_udf(  # type: ignore[call-overload]
+            "index_col array<string>, index_names array<string>"
+        )
         def read_index_metadata(pser: pd.Series) -> pd.DataFrame:
             binary = pser.iloc[0]
             metadata = pq.ParquetFile(pa.BufferReader(binary)).metadata.metadata
diff --git a/python/pyspark/pandas/strings.py b/python/pyspark/pandas/strings.py
index 986e3d1a0ace5..774fd6c7ca0bf 100644
--- a/python/pyspark/pandas/strings.py
+++ b/python/pyspark/pandas/strings.py
@@ -25,7 +25,6 @@
     List,
     Optional,
     Union,
-    TYPE_CHECKING,
     cast,
     no_type_check,
 )
@@ -37,11 +36,9 @@
 from pyspark.sql import functions as F
 from pyspark.sql.functions import pandas_udf
 
+import pyspark.pandas as ps
 from pyspark.pandas.spark import functions as SF
 
-if TYPE_CHECKING:
-    import pyspark.pandas as ps
-
 
 class StringMethods:
     """String methods for pandas-on-Spark Series"""
@@ -74,8 +71,7 @@ def capitalize(self) -> "ps.Series":
         dtype: object
         """
 
-        @no_type_check
-        def pandas_capitalize(s) -> "ps.Series[str]":
+        def pandas_capitalize(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.capitalize()
 
         return self._data.pandas_on_spark.transform_batch(pandas_capitalize)
@@ -102,8 +98,7 @@ def title(self) -> "ps.Series":
         dtype: object
         """
 
-        @no_type_check
-        def pandas_title(s) -> "ps.Series[str]":
+        def pandas_title(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.title()
 
         return self._data.pandas_on_spark.transform_batch(pandas_title)
@@ -176,8 +171,7 @@ def swapcase(self) -> "ps.Series":
         dtype: object
         """
 
-        @no_type_check
-        def pandas_swapcase(s) -> "ps.Series[str]":
+        def pandas_swapcase(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.swapcase()
 
         return self._data.pandas_on_spark.transform_batch(pandas_swapcase)
@@ -228,8 +222,7 @@ def startswith(self, pattern: str, na: Optional[Any] = None) -> "ps.Series":
         dtype: bool
         """
 
-        @no_type_check
-        def pandas_startswith(s) -> "ps.Series[bool]":
+        def pandas_startswith(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.str.startswith(pattern, na)
 
         return self._data.pandas_on_spark.transform_batch(pandas_startswith)
@@ -280,8 +273,7 @@ def endswith(self, pattern: str, na: Optional[Any] = None) -> "ps.Series":
         dtype: bool
         """
 
-        @no_type_check
-        def pandas_endswith(s) -> "ps.Series[bool]":
+        def pandas_endswith(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.str.endswith(pattern, na)
 
         return self._data.pandas_on_spark.transform_batch(pandas_endswith)
@@ -333,8 +325,7 @@ def strip(self, to_strip: Optional[str] = None) -> "ps.Series":
         dtype: object
         """
 
-        @no_type_check
-        def pandas_strip(s) -> "ps.Series[str]":
+        def pandas_strip(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.strip(to_strip)
 
         return self._data.pandas_on_spark.transform_batch(pandas_strip)
@@ -374,8 +365,7 @@ def lstrip(self, to_strip: Optional[str] = None) -> "ps.Series":
         dtype: object
         """
 
-        @no_type_check
-        def pandas_lstrip(s) -> "ps.Series[str]":
+        def pandas_lstrip(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.lstrip(to_strip)
 
         return self._data.pandas_on_spark.transform_batch(pandas_lstrip)
@@ -415,8 +405,7 @@ def rstrip(self, to_strip: Optional[str] = None) -> "ps.Series":
         dtype: object
         """
 
-        @no_type_check
-        def pandas_rstrip(s) -> "ps.Series[str]":
+        def pandas_rstrip(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.rstrip(to_strip)
 
         return self._data.pandas_on_spark.transform_batch(pandas_rstrip)
@@ -470,8 +459,7 @@ def get(self, i: int) -> "ps.Series":
         dtype: object
         """
 
-        @no_type_check
-        def pandas_get(s) -> "ps.Series[str]":
+        def pandas_get(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.get(i)
 
         return self._data.pandas_on_spark.transform_batch(pandas_get)
@@ -507,8 +495,7 @@ def isalnum(self) -> "ps.Series":
         dtype: bool
         """
 
-        @no_type_check
-        def pandas_isalnum(s) -> "ps.Series[bool]":
+        def pandas_isalnum(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.str.isalnum()
 
         return self._data.pandas_on_spark.transform_batch(pandas_isalnum)
@@ -533,8 +520,7 @@ def isalpha(self) -> "ps.Series":
         dtype: bool
         """
 
-        @no_type_check
-        def pandas_isalpha(s) -> "ps.Series[bool]":
+        def pandas_isalpha(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.str.isalpha()
 
         return self._data.pandas_on_spark.transform_batch(pandas_isalpha)
@@ -584,8 +570,7 @@ def isdigit(self) -> "ps.Series":
         dtype: bool
         """
 
-        @no_type_check
-        def pandas_isdigit(s) -> "ps.Series[bool]":
+        def pandas_isdigit(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.str.isdigit()
 
         return self._data.pandas_on_spark.transform_batch(pandas_isdigit)
@@ -608,8 +593,7 @@ def isspace(self) -> "ps.Series":
         dtype: bool
         """
 
-        @no_type_check
-        def pandas_isspace(s) -> "ps.Series[bool]":
+        def pandas_isspace(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.str.isspace()
 
         return self._data.pandas_on_spark.transform_batch(pandas_isspace)
@@ -633,8 +617,7 @@ def islower(self) -> "ps.Series":
         dtype: bool
         """
 
-        @no_type_check
-        def pandas_isspace(s) -> "ps.Series[bool]":
+        def pandas_isspace(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.str.islower()
 
         return self._data.pandas_on_spark.transform_batch(pandas_isspace)
@@ -658,8 +641,7 @@ def isupper(self) -> "ps.Series":
         dtype: bool
         """
 
-        @no_type_check
-        def pandas_isspace(s) -> "ps.Series[bool]":
+        def pandas_isspace(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.str.isupper()
 
         return self._data.pandas_on_spark.transform_batch(pandas_isspace)
@@ -689,8 +671,7 @@ def istitle(self) -> "ps.Series":
         dtype: bool
         """
 
-        @no_type_check
-        def pandas_istitle(s) -> "ps.Series[bool]":
+        def pandas_istitle(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.str.istitle()
 
         return self._data.pandas_on_spark.transform_batch(pandas_istitle)
@@ -748,8 +729,7 @@ def isnumeric(self) -> "ps.Series":
         dtype: bool
         """
 
-        @no_type_check
-        def pandas_isnumeric(s) -> "ps.Series[bool]":
+        def pandas_isnumeric(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.str.isnumeric()
 
         return self._data.pandas_on_spark.transform_batch(pandas_isnumeric)
@@ -799,8 +779,7 @@ def isdecimal(self) -> "ps.Series":
         dtype: bool
         """
 
-        @no_type_check
-        def pandas_isdecimal(s) -> "ps.Series[bool]":
+        def pandas_isdecimal(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.str.isdecimal()
 
         return self._data.pandas_on_spark.transform_batch(pandas_isdecimal)
@@ -843,8 +822,7 @@ def center(self, width: int, fillchar: str = " ") -> "ps.Series":
         dtype: object
         """
 
-        @no_type_check
-        def pandas_center(s) -> "ps.Series[str]":
+        def pandas_center(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.center(width, fillchar)
 
         return self._data.pandas_on_spark.transform_batch(pandas_center)
@@ -963,8 +941,7 @@ def contains(
         dtype: bool
         """
 
-        @no_type_check
-        def pandas_contains(s) -> "ps.Series[bool]":
+        def pandas_contains(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.str.contains(pat, case, flags, na, regex)
 
         return self._data.pandas_on_spark.transform_batch(pandas_contains)
@@ -1014,8 +991,7 @@ def count(self, pat: str, flags: int = 0) -> "ps.Series":
         dtype: int64
         """
 
-        @no_type_check
-        def pandas_count(s) -> "ps.Series[int]":
+        def pandas_count(s) -> ps.Series[int]:  # type: ignore[no-untyped-def]
             return s.str.count(pat, flags)
 
         return self._data.pandas_on_spark.transform_batch(pandas_count)
@@ -1098,8 +1074,7 @@ def find(self, sub: str, start: int = 0, end: Optional[int] = None) -> "ps.Serie
         dtype: int64
         """
 
-        @no_type_check
-        def pandas_find(s) -> "ps.Series[int]":
+        def pandas_find(s) -> ps.Series[int]:  # type: ignore[no-untyped-def]
             return s.str.find(sub, start, end)
 
         return self._data.pandas_on_spark.transform_batch(pandas_find)
@@ -1229,8 +1204,7 @@ def index(self, sub: str, start: int = 0, end: Optional[int] = None) -> "ps.Seri
         >>> s.str.index('a', start=2) # doctest: +SKIP
         """
 
-        @no_type_check
-        def pandas_index(s) -> "ps.Series[np.int64]":
+        def pandas_index(s) -> ps.Series[np.int64]:  # type: ignore[no-untyped-def]
             return s.str.index(sub, start, end)
 
         return self._data.pandas_on_spark.transform_batch(pandas_index)
@@ -1279,8 +1253,7 @@ def join(self, sep: str) -> "ps.Series":
         dtype: object
         """
 
-        @no_type_check
-        def pandas_join(s) -> "ps.Series[str]":
+        def pandas_join(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.join(sep)
 
         return self._data.pandas_on_spark.transform_batch(pandas_join)
@@ -1350,8 +1323,7 @@ def ljust(self, width: int, fillchar: str = " ") -> "ps.Series":
         dtype: object
         """
 
-        @no_type_check
-        def pandas_ljust(s) -> "ps.Series[str]":
+        def pandas_ljust(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.ljust(width, fillchar)
 
         return self._data.pandas_on_spark.transform_batch(pandas_ljust)
@@ -1417,8 +1389,7 @@ def match(self, pat: str, case: bool = True, flags: int = 0, na: Any = np.NaN) -
         dtype: object
         """
 
-        @no_type_check
-        def pandas_match(s) -> "ps.Series[bool]":
+        def pandas_match(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
             return s.str.match(pat, case, flags, na)
 
         return self._data.pandas_on_spark.transform_batch(pandas_match)
@@ -1441,8 +1412,7 @@ def normalize(self, form: str) -> "ps.Series":
             A Series of normalized strings.
         """
 
-        @no_type_check
-        def pandas_normalize(s) -> "ps.Series[str]":
+        def pandas_normalize(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.normalize(form)
 
         return self._data.pandas_on_spark.transform_batch(pandas_normalize)
@@ -1490,8 +1460,7 @@ def pad(self, width: int, side: str = "left", fillchar: str = " ") -> "ps.Series
         dtype: object
         """
 
-        @no_type_check
-        def pandas_pad(s) -> "ps.Series[str]":
+        def pandas_pad(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.pad(width, side, fillchar)
 
         return self._data.pandas_on_spark.transform_batch(pandas_pad)
@@ -1636,8 +1605,7 @@ def replace(
         dtype: object
         """
 
-        @no_type_check
-        def pandas_replace(s) -> "ps.Series[str]":
+        def pandas_replace(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.replace(pat, repl, n=n, case=case, flags=flags, regex=regex)
 
         return self._data.pandas_on_spark.transform_batch(pandas_replace)
@@ -1692,8 +1660,7 @@ def rfind(self, sub: str, start: int = 0, end: Optional[int] = None) -> "ps.Seri
         dtype: int64
         """
 
-        @no_type_check
-        def pandas_rfind(s) -> "ps.Series[int]":
+        def pandas_rfind(s) -> ps.Series[int]:  # type: ignore[no-untyped-def]
             return s.str.rfind(sub, start, end)
 
         return self._data.pandas_on_spark.transform_batch(pandas_rfind)
@@ -1736,8 +1703,7 @@ def rindex(self, sub: str, start: int = 0, end: Optional[int] = None) -> "ps.Ser
         >>> s.str.rindex('a', start=2) # doctest: +SKIP
         """
 
-        @no_type_check
-        def pandas_rindex(s) -> "ps.Series[np.int64]":
+        def pandas_rindex(s) -> ps.Series[np.int64]:  # type: ignore[no-untyped-def]
             return s.str.rindex(sub, start, end)
 
         return self._data.pandas_on_spark.transform_batch(pandas_rindex)
@@ -1778,8 +1744,7 @@ def rjust(self, width: int, fillchar: str = " ") -> "ps.Series":
         dtype: object
         """
 
-        @no_type_check
-        def pandas_rjust(s) -> "ps.Series[str]":
+        def pandas_rjust(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.rjust(width, fillchar)
 
         return self._data.pandas_on_spark.transform_batch(pandas_rjust)
@@ -1844,8 +1809,7 @@ def slice(
         dtype: object
         """
 
-        @no_type_check
-        def pandas_slice(s) -> "ps.Series[str]":
+        def pandas_slice(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.slice(start, stop, step)
 
         return self._data.pandas_on_spark.transform_batch(pandas_slice)
@@ -1921,8 +1885,7 @@ def slice_replace(
         dtype: object
         """
 
-        @no_type_check
-        def pandas_slice_replace(s) -> "ps.Series[str]":
+        def pandas_slice_replace(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.slice_replace(start, stop, repl)
 
         return self._data.pandas_on_spark.transform_batch(pandas_slice_replace)
@@ -2259,8 +2222,7 @@ def translate(self, table: Dict) -> "ps.Series":
         dtype: object
         """
 
-        @no_type_check
-        def pandas_translate(s) -> "ps.Series[str]":
+        def pandas_translate(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.translate(table)
 
         return self._data.pandas_on_spark.transform_batch(pandas_translate)
@@ -2311,8 +2273,7 @@ def wrap(self, width: int, **kwargs: bool) -> "ps.Series":
         dtype: object
         """
 
-        @no_type_check
-        def pandas_wrap(s) -> "ps.Series[str]":
+        def pandas_wrap(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.wrap(width, **kwargs)
 
         return self._data.pandas_on_spark.transform_batch(pandas_wrap)
@@ -2362,8 +2323,7 @@ def zfill(self, width: int) -> "ps.Series":
         dtype: object
         """
 
-        @no_type_check
-        def pandas_zfill(s) -> "ps.Series[str]":
+        def pandas_zfill(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.zfill(width)
 
         return self._data.pandas_on_spark.transform_batch(pandas_zfill)
diff --git a/python/pyspark/pandas/tests/test_categorical.py b/python/pyspark/pandas/tests/test_categorical.py
index 2430935ecbe57..de412bb7ff48f 100644
--- a/python/pyspark/pandas/tests/test_categorical.py
+++ b/python/pyspark/pandas/tests/test_categorical.py
@@ -16,7 +16,6 @@
 #
 
 from distutils.version import LooseVersion
-from typing import no_type_check
 
 import numpy as np
 import pandas as pd
@@ -438,8 +437,7 @@ def test_groupby_transform_without_shortcut(self):
 
         pdf, psdf = self.df_pair
 
-        @no_type_check
-        def identity(x) -> ps.Series[psdf.b.dtype]:
+        def identity(x) -> ps.Series[psdf.b.dtype]:  # type: ignore[name-defined, no-untyped-def]
             return x
 
         self.assert_eq(
diff --git a/python/pyspark/pandas/tests/test_typedef.py b/python/pyspark/pandas/tests/test_typedef.py
index ef331da8bec1e..f292f0f320325 100644
--- a/python/pyspark/pandas/tests/test_typedef.py
+++ b/python/pyspark/pandas/tests/test_typedef.py
@@ -56,10 +56,6 @@
 
 
 class TypeHintTests(unittest.TestCase):
-    @unittest.skipIf(
-        sys.version_info < (3, 7),
-        "Type inference from pandas instances is supported with Python 3.7+",
-    )
     def test_infer_schema_from_pandas_instances(self):
         def func() -> pd.Series[int]:
             pass
@@ -148,10 +144,6 @@ def test_if_pandas_implements_class_getitem(self):
         assert not ps._frame_has_class_getitem
         assert not ps._series_has_class_getitem
 
-    @unittest.skipIf(
-        sys.version_info < (3, 7),
-        "Type inference from pandas instances is supported with Python 3.7+",
-    )
     def test_infer_schema_with_names_pandas_instances(self):
         def func() -> 'pd.DataFrame["a" : np.float_, "b":str]':  # noqa: F405
             pass
@@ -201,10 +193,6 @@ def func() -> pd.DataFrame[zip(pdf.columns, pdf.dtypes)]:
         self.assertEqual(inferred.dtypes, [np.int64, CategoricalDtype(categories=["a", "b", "c"])])
         self.assertEqual(inferred.spark_type, expected)
 
-    @unittest.skipIf(
-        sys.version_info < (3, 7),
-        "Type inference from pandas instances is supported with Python 3.7+",
-    )
     def test_infer_schema_with_names_pandas_instances_negative(self):
         def try_infer_return_type():
             def f() -> 'pd.DataFrame["a" : np.float_ : 1, "b":str:2]':  # noqa: F405
diff --git a/python/pyspark/pandas/typedef/string_typehints.py b/python/pyspark/pandas/typedef/string_typehints.py
deleted file mode 100644
index c7a72351ad934..0000000000000
--- a/python/pyspark/pandas/typedef/string_typehints.py
+++ /dev/null
@@ -1,40 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-from inspect import FullArgSpec
-from typing import List, Optional, Type, cast as _cast  # noqa: F401
-
-import numpy as np  # noqa: F401
-import pandas  # noqa: F401
-import pandas as pd  # noqa: F401
-from numpy import *  # noqa: F401
-from pandas import *  # type: ignore[no-redef] # noqa: F401
-from inspect import getfullargspec  # noqa: F401
-
-
-def resolve_string_type_hint(tpe: str) -> Optional[Type]:
-    import pyspark.pandas as ps
-    from pyspark.pandas import DataFrame, Series  # type: ignore[misc]
-
-    locs = {
-        "ps": ps,
-        "pyspark.pandas": ps,
-        "DataFrame": DataFrame,
-        "Series": Series,
-    }
-    # This is a hack to resolve the forward reference string.
-    exec("def func() -> %s: pass\narg_spec = getfullargspec(func)" % tpe, globals(), locs)
-    return _cast(FullArgSpec, locs["arg_spec"]).annotations.get("return", None)
diff --git a/python/pyspark/pandas/typedef/typehints.py b/python/pyspark/pandas/typedef/typehints.py
index 6b3083a22f853..9b42daffcd351 100644
--- a/python/pyspark/pandas/typedef/typehints.py
+++ b/python/pyspark/pandas/typedef/typehints.py
@@ -24,16 +24,8 @@
 import typing
 from collections.abc import Iterable
 from distutils.version import LooseVersion
-from inspect import getfullargspec, isclass
-from typing import (
-    Any,
-    Callable,
-    Generic,
-    List,
-    Tuple,
-    Union,
-    Type,
-)
+from inspect import isclass
+from typing import Any, Callable, Generic, List, Tuple, Union, Type, get_type_hints
 
 import numpy as np
 import pandas as pd
@@ -76,7 +68,6 @@
 # For running doctests and reference resolution in PyCharm.
 from pyspark import pandas as ps  # noqa: F401
 from pyspark.pandas._typing import Dtype, T
-from pyspark.pandas.typedef.string_typehints import resolve_string_type_hint
 
 if typing.TYPE_CHECKING:
     from pyspark.pandas.internal import InternalField
@@ -566,11 +557,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp
     from pyspark.pandas.typedef import SeriesType, NameTypeHolder, IndexNameTypeHolder
     from pyspark.pandas.utils import name_like_string
 
-    spec = getfullargspec(f)
-    tpe = spec.annotations.get("return", None)
-    if isinstance(tpe, str):
-        # This type hint can happen when given hints are string to avoid forward reference.
-        tpe = resolve_string_type_hint(tpe)
+    tpe = get_type_hints(f).get("return", None)
 
     if hasattr(tpe, "__origin__") and issubclass(tpe.__origin__, SeriesType):
         tpe = tpe.__args__[0]

From 2ed827a8169a0ad098c5ee4f6101bd003f8c6de2 Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxin_gao@apple.com>
Date: Fri, 14 Jan 2022 09:44:29 +0800
Subject: [PATCH 017/513] [SPARK-37627][SQL][FOLLOWUP] Separate
 SortedBucketTransform from BucketTransform

### What changes were proposed in this pull request?

1. Currently only a single bucket column is supported in `BucketTransform`, fix the code to make multiple bucket columns work.
2. Separate `SortedBucketTransform` from `BucketTransform`, and make the `arguments` in `SortedBucketTransform` in the format of `columns numBuckets sortedColumns` so we have a way to find out the `columns` and `sortedColumns`.
3. add more test coverage.

### Why are the changes needed?

Fix bugs in `BucketTransform` and `SortedBucketTransform`.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
New tests

Closes #34914 from huaxingao/sorted_followup.

Lead-authored-by: Huaxin Gao <huaxin_gao@apple.com>
Co-authored-by: huaxingao <huaxin.gao11@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalog/CatalogV2Implicits.scala          |  4 +-
 .../connector/expressions/expressions.scala   | 76 +++++++++++--------
 .../sql/connector/catalog/InMemoryTable.scala | 14 +++-
 .../expressions/TransformExtractorSuite.scala | 62 ++++++++++++++-
 .../datasources/v2/V2SessionCatalog.scala     |  9 ++-
 .../sql/connector/DataSourceV2SQLSuite.scala  | 28 ++++---
 6 files changed, 144 insertions(+), 49 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
index dbc4bd373751f..407f25ba20e5d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.quoteIfNeeded
-import org.apache.spark.sql.connector.expressions.{BucketTransform, IdentityTransform, LogicalExpressions, Transform}
+import org.apache.spark.sql.connector.expressions.{IdentityTransform, LogicalExpressions, Transform}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 
 /**
@@ -37,7 +37,7 @@ private[sql] object CatalogV2Implicits {
   }
 
   implicit class BucketSpecHelper(spec: BucketSpec) {
-    def asTransform: BucketTransform = {
+    def asTransform: Transform = {
       val references = spec.bucketColumnNames.map(col => reference(Seq(col)))
       if (spec.sortColumnNames.nonEmpty) {
         val sortedCol = spec.sortColumnNames.map(col => reference(Seq(col)))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
index 996b2566eeb7b..fbd2520e2a774 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.connector.expressions
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.types.{DataType, IntegerType, StringType}
@@ -48,8 +49,8 @@ private[sql] object LogicalExpressions {
   def bucket(
       numBuckets: Int,
       references: Array[NamedReference],
-      sortedCols: Array[NamedReference]): BucketTransform =
-    BucketTransform(literal(numBuckets, IntegerType), references, sortedCols)
+      sortedCols: Array[NamedReference]): SortedBucketTransform =
+    SortedBucketTransform(literal(numBuckets, IntegerType), references, sortedCols)
 
   def identity(reference: NamedReference): IdentityTransform = IdentityTransform(reference)
 
@@ -101,8 +102,7 @@ private[sql] abstract class SingleColumnTransform(ref: NamedReference) extends R
 
 private[sql] final case class BucketTransform(
     numBuckets: Literal[Int],
-    columns: Seq[NamedReference],
-    sortedColumns: Seq[NamedReference] = Seq.empty[NamedReference]) extends RewritableTransform {
+    columns: Seq[NamedReference]) extends RewritableTransform {
 
   override val name: String = "bucket"
 
@@ -112,13 +112,9 @@ private[sql] final case class BucketTransform(
 
   override def arguments: Array[Expression] = numBuckets +: columns.toArray
 
-  override def toString: String =
-    if (sortedColumns.nonEmpty) {
-      s"bucket(${arguments.map(_.describe).mkString(", ")}," +
-        s" ${sortedColumns.map(_.describe).mkString(", ")})"
-    } else {
-      s"bucket(${arguments.map(_.describe).mkString(", ")})"
-    }
+  override def describe: String = s"bucket(${arguments.map(_.describe).mkString(", ")})"
+
+  override def toString: String = describe
 
   override def withReferences(newReferences: Seq[NamedReference]): Transform = {
     this.copy(columns = newReferences)
@@ -126,32 +122,52 @@ private[sql] final case class BucketTransform(
 }
 
 private[sql] object BucketTransform {
-  def unapply(expr: Expression): Option[(Int, FieldReference, FieldReference)] =
-      expr match {
-    case transform: Transform =>
+  def unapply(transform: Transform): Option[(Int, Seq[NamedReference], Seq[NamedReference])] =
       transform match {
-        case BucketTransform(n, FieldReference(parts), FieldReference(sortCols)) =>
-          Some((n, FieldReference(parts), FieldReference(sortCols)))
+    case NamedTransform("sorted_bucket", arguments) =>
+      var posOfLit: Int = -1
+      var numOfBucket: Int = -1
+      arguments.zipWithIndex.foreach {
+        case (Lit(value: Int, IntegerType), i) =>
+          numOfBucket = value
+          posOfLit = i
         case _ =>
-          None
       }
+      Some(numOfBucket, arguments.take(posOfLit).map(_.asInstanceOf[NamedReference]),
+        arguments.drop(posOfLit + 1).map(_.asInstanceOf[NamedReference]))
+    case NamedTransform("bucket", arguments) =>
+      var numOfBucket: Int = -1
+      arguments(0) match {
+        case Lit(value: Int, IntegerType) =>
+          numOfBucket = value
+        case _ => throw new SparkException("The first element in BucketTransform arguments " +
+          "should be an Integer Literal.")
+      }
+      Some(numOfBucket, arguments.drop(1).map(_.asInstanceOf[NamedReference]),
+        Seq.empty[FieldReference])
     case _ =>
       None
   }
+}
 
-  def unapply(transform: Transform): Option[(Int, NamedReference, NamedReference)] =
-      transform match {
-    case NamedTransform("bucket", Seq(
-        Lit(value: Int, IntegerType),
-        Ref(partCols: Seq[String]),
-        Ref(sortCols: Seq[String]))) =>
-      Some((value, FieldReference(partCols), FieldReference(sortCols)))
-    case NamedTransform("bucket", Seq(
-        Lit(value: Int, IntegerType),
-        Ref(partCols: Seq[String]))) =>
-      Some((value, FieldReference(partCols), FieldReference(Seq.empty[String])))
-    case _ =>
-      None
+private[sql] final case class SortedBucketTransform(
+    numBuckets: Literal[Int],
+    columns: Seq[NamedReference],
+    sortedColumns: Seq[NamedReference] = Seq.empty[NamedReference]) extends RewritableTransform {
+
+  override val name: String = "sorted_bucket"
+
+  override def references: Array[NamedReference] = {
+    arguments.collect { case named: NamedReference => named }
+  }
+
+  override def arguments: Array[Expression] = (columns.toArray :+ numBuckets) ++ sortedColumns
+
+  override def toString: String = s"$name(${arguments.map(_.describe).mkString(", ")})"
+
+  override def withReferences(newReferences: Seq[NamedReference]): Transform = {
+    this.copy(columns = newReferences.take(columns.length),
+      sortedColumns = newReferences.drop(columns.length))
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
index fa8be1b8fa3c0..8e5e920d89abe 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
@@ -80,6 +80,7 @@ class InMemoryTable(
     case _: DaysTransform =>
     case _: HoursTransform =>
     case _: BucketTransform =>
+    case _: SortedBucketTransform =>
     case t if !allowUnsupportedTransforms =>
       throw new IllegalArgumentException(s"Transform $t is not a supported transform")
   }
@@ -161,10 +162,15 @@ class InMemoryTable(
           case (v, t) =>
             throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
         }
-      case BucketTransform(numBuckets, ref, _) =>
-        val (value, dataType) = extractor(ref.fieldNames, cleanedSchema, row)
-        val valueHashCode = if (value == null) 0 else value.hashCode
-        ((valueHashCode + 31 * dataType.hashCode()) & Integer.MAX_VALUE) % numBuckets
+      case BucketTransform(numBuckets, cols, _) =>
+        val valueTypePairs = cols.map(col => extractor(col.fieldNames, cleanedSchema, row))
+        var valueHashCode = 0
+        valueTypePairs.foreach( pair =>
+          if ( pair._1 != null) valueHashCode += pair._1.hashCode()
+        )
+        var dataTypeHashCode = 0
+        valueTypePairs.foreach(dataTypeHashCode += _._2.hashCode())
+        ((valueHashCode + 31 * dataTypeHashCode) & Integer.MAX_VALUE) % numBuckets
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/expressions/TransformExtractorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/expressions/TransformExtractorSuite.scala
index b2371ce667ffc..54ab1df3fa8f8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/expressions/TransformExtractorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/expressions/TransformExtractorSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.connector.expressions
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst
+import org.apache.spark.sql.connector.expressions.LogicalExpressions.bucket
 import org.apache.spark.sql.types.DataType
 
 class TransformExtractorSuite extends SparkFunSuite {
@@ -139,9 +140,9 @@ class TransformExtractorSuite extends SparkFunSuite {
     }
 
     bucketTransform match {
-      case BucketTransform(numBuckets, FieldReference(seq), _) =>
+      case BucketTransform(numBuckets, cols, _) =>
         assert(numBuckets === 16)
-        assert(seq === Seq("a", "b"))
+        assert(cols(0).fieldNames === Seq("a", "b"))
       case _ =>
         fail("Did not match BucketTransform extractor")
     }
@@ -153,4 +154,61 @@ class TransformExtractorSuite extends SparkFunSuite {
       // expected
     }
   }
+
+  test("Sorted Bucket extractor") {
+    val col = Array(ref("a"), ref("b"))
+    val sortedCol = Array(ref("c"), ref("d"))
+
+    val sortedBucketTransform = new Transform {
+      override def name: String = "sorted_bucket"
+      override def references: Array[NamedReference] = col ++ sortedCol
+      override def arguments: Array[Expression] = (col :+ lit(16)) ++ sortedCol
+      override def describe: String = s"bucket(16, ${col(0).describe}, ${col(1).describe} " +
+        s"${sortedCol(0).describe} ${sortedCol(1).describe})"
+    }
+
+    sortedBucketTransform match {
+      case BucketTransform(numBuckets, cols, sortCols) =>
+        assert(numBuckets === 16)
+        assert(cols.flatMap(c => c.fieldNames()) === Seq("a", "b"))
+        assert(sortCols.flatMap(c => c.fieldNames()) === Seq("c", "d"))
+      case _ =>
+        fail("Did not match BucketTransform extractor")
+    }
+  }
+
+  test("test bucket") {
+    val col = Array(ref("a"), ref("b"))
+    val sortedCol = Array(ref("c"), ref("d"))
+
+    val bucketTransform = bucket(16, col)
+    val reference1 = bucketTransform.references
+    assert(reference1.length == 2)
+    assert(reference1(0).fieldNames() === Seq("a"))
+    assert(reference1(1).fieldNames() === Seq("b"))
+    val arguments1 = bucketTransform.arguments
+    assert(arguments1.length == 3)
+    assert(arguments1(0).asInstanceOf[LiteralValue[Integer]].value === 16)
+    assert(arguments1(1).asInstanceOf[NamedReference].fieldNames() === Seq("a"))
+    assert(arguments1(2).asInstanceOf[NamedReference].fieldNames() === Seq("b"))
+    val copied1 = bucketTransform.withReferences(reference1)
+    assert(copied1.equals(bucketTransform))
+
+    val sortedBucketTransform = bucket(16, col, sortedCol)
+    val reference2 = sortedBucketTransform.references
+    assert(reference2.length == 4)
+    assert(reference2(0).fieldNames() === Seq("a"))
+    assert(reference2(1).fieldNames() === Seq("b"))
+    assert(reference2(2).fieldNames() === Seq("c"))
+    assert(reference2(3).fieldNames() === Seq("d"))
+    val arguments2 = sortedBucketTransform.arguments
+    assert(arguments2.length == 5)
+    assert(arguments2(0).asInstanceOf[NamedReference].fieldNames() === Seq("a"))
+    assert(arguments2(1).asInstanceOf[NamedReference].fieldNames() === Seq("b"))
+    assert(arguments2(2).asInstanceOf[LiteralValue[Integer]].value === 16)
+    assert(arguments2(3).asInstanceOf[NamedReference].fieldNames() === Seq("c"))
+    assert(arguments2(4).asInstanceOf[NamedReference].fieldNames() === Seq("d"))
+    val copied2 = sortedBucketTransform.withReferences(reference2)
+    assert(copied2.equals(sortedBucketTransform))
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index 906107a1227f8..3ea7d0f578b3f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -342,8 +342,13 @@ private[sql] object V2SessionCatalog {
       case IdentityTransform(FieldReference(Seq(col))) =>
         identityCols += col
 
-      case BucketTransform(numBuckets, FieldReference(Seq(col)), FieldReference(Seq(sortCol))) =>
-        bucketSpec = Some(BucketSpec(numBuckets, col :: Nil, sortCol :: Nil))
+      case BucketTransform(numBuckets, col, sortCol) =>
+        if (sortCol.isEmpty) {
+          bucketSpec = Some(BucketSpec(numBuckets, col.map(_.fieldNames.mkString(".")), Nil))
+        } else {
+          bucketSpec = Some(BucketSpec(numBuckets, col.map(_.fieldNames.mkString(".")),
+            sortCol.map(_.fieldNames.mkString("."))))
+        }
 
       case transform =>
         throw QueryExecutionErrors.unsupportedPartitionTransformError(transform)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 2ed7f6163be79..3e0627c505341 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -410,9 +410,12 @@ class DataSourceV2SQLSuite
   test("SPARK-36850: CreateTableAsSelect partitions can be specified using " +
     "PARTITIONED BY and/or CLUSTERED BY") {
     val identifier = "testcat.table_name"
+    val df = spark.createDataFrame(Seq((1L, "a", "a1", "a2", "a3"), (2L, "b", "b1", "b2", "b3"),
+      (3L, "c", "c1", "c2", "c3"))).toDF("id", "data1", "data2", "data3", "data4")
+    df.createOrReplaceTempView("source_table")
     withTable(identifier) {
       spark.sql(s"CREATE TABLE $identifier USING foo PARTITIONED BY (id) " +
-        s"CLUSTERED BY (data) INTO 4 BUCKETS AS SELECT * FROM source")
+        s"CLUSTERED BY (data1, data2, data3, data4) INTO 4 BUCKETS AS SELECT * FROM source_table")
       val describe = spark.sql(s"DESCRIBE $identifier")
       val part1 = describe
         .filter("col_name = 'Part 0'")
@@ -421,18 +424,22 @@ class DataSourceV2SQLSuite
       val part2 = describe
         .filter("col_name = 'Part 1'")
         .select("data_type").head.getString(0)
-      assert(part2 === "bucket(4, data)")
+      assert(part2 === "bucket(4, data1, data2, data3, data4)")
     }
   }
 
   test("SPARK-36850: ReplaceTableAsSelect partitions can be specified using " +
     "PARTITIONED BY and/or CLUSTERED BY") {
     val identifier = "testcat.table_name"
+    val df = spark.createDataFrame(Seq((1L, "a", "a1", "a2", "a3"), (2L, "b", "b1", "b2", "b3"),
+      (3L, "c", "c1", "c2", "c3"))).toDF("id", "data1", "data2", "data3", "data4")
+    df.createOrReplaceTempView("source_table")
     withTable(identifier) {
       spark.sql(s"CREATE TABLE $identifier USING foo " +
         "AS SELECT id FROM source")
       spark.sql(s"REPLACE TABLE $identifier USING foo PARTITIONED BY (id) " +
-        s"CLUSTERED BY (data) INTO 4 BUCKETS AS SELECT * FROM source")
+        s"CLUSTERED BY (data1, data2) SORTED by (data3, data4) INTO 4 BUCKETS " +
+        s"AS SELECT * FROM source_table")
       val describe = spark.sql(s"DESCRIBE $identifier")
       val part1 = describe
         .filter("col_name = 'Part 0'")
@@ -441,7 +448,7 @@ class DataSourceV2SQLSuite
       val part2 = describe
         .filter("col_name = 'Part 1'")
         .select("data_type").head.getString(0)
-      assert(part2 === "bucket(4, data)")
+      assert(part2 === "sorted_bucket(data1, data2, 4, data3, data4)")
     }
   }
 
@@ -1479,18 +1486,21 @@ class DataSourceV2SQLSuite
   test("create table using - with sorted bucket") {
     val identifier = "testcat.table_name"
     withTable(identifier) {
-      sql(s"CREATE TABLE $identifier (a int, b string, c int) USING $v2Source PARTITIONED BY (c)" +
-        s" CLUSTERED BY (b) SORTED by (a) INTO 4 BUCKETS")
-      val table = getTableMetadata(identifier)
+      sql(s"CREATE TABLE $identifier (a int, b string, c int, d int, e int, f int) USING" +
+        s" $v2Source PARTITIONED BY (a, b) CLUSTERED BY (c, d) SORTED by (e, f) INTO 4 BUCKETS")
       val describe = spark.sql(s"DESCRIBE $identifier")
       val part1 = describe
         .filter("col_name = 'Part 0'")
         .select("data_type").head.getString(0)
-      assert(part1 === "c")
+      assert(part1 === "a")
       val part2 = describe
         .filter("col_name = 'Part 1'")
         .select("data_type").head.getString(0)
-      assert(part2 === "bucket(4, b, a)")
+      assert(part2 === "b")
+      val part3 = describe
+        .filter("col_name = 'Part 2'")
+        .select("data_type").head.getString(0)
+      assert(part3 === "sorted_bucket(c, d, 4, e, f)")
     }
   }
 

From a1e86373253d77329b2b252c653a69ae8ac0bd6c Mon Sep 17 00:00:00 2001
From: Karen Feng <karen.feng@databricks.com>
Date: Fri, 14 Jan 2022 10:54:53 +0800
Subject: [PATCH 018/513] [SPARK-37859][SQL] Do not check for metadata during
 schema comparison

### What changes were proposed in this pull request?

Ignores the metadata when comparing the user-provided schema and the actual schema during BaseRelation resolution.

### Why are the changes needed?

Makes it possible to read tables with Spark 3.2 that were written with Spark 3.1, as https://github.com/apache/spark/blob/bd24b4884b804fc85a083f82b864823851d5980c/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala#L312 added a new metadata field that broke compatibility.

### Does this PR introduce _any_ user-facing change?

Yes. Previously, an error was thrown when a SQL table written with JDBC in Spark 3.1 was read in Spark 3.2. Now, no error is thrown.

### How was this patch tested?

Unit test and manual test with a SQL table written with Spark 3.1.

Query:

```
select * from jdbc_table
```

Before:

```
org.apache.spark.sql.AnalysisException: The user-specified schema doesn't match the actual schema:
```

After: no error

Closes #35158 from karenfeng/SPARK-37859.

Authored-by: Karen Feng <karen.feng@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/datasources/DataSource.scala    |  2 +-
 .../spark/sql/sources/TableScanSuite.scala    |  2 +-
 .../sql/test/DataFrameReaderWriterSuite.scala | 25 ++++++++++++++++---
 3 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index a7e505ebd93da..2bb3d48c1458c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -353,7 +353,7 @@ case class DataSource(
       case (dataSource: RelationProvider, Some(schema)) =>
         val baseRelation =
           dataSource.createRelation(sparkSession.sqlContext, caseInsensitiveOptions)
-        if (baseRelation.schema != schema) {
+        if (!DataType.equalsIgnoreCompatibleNullability(baseRelation.schema, schema)) {
           throw QueryCompilationErrors.userSpecifiedSchemaMismatchActualSchemaError(
             schema, baseRelation.schema)
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
index de54b38627443..47bacde5fea29 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
@@ -359,7 +359,7 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession {
       val schemaNotMatch = intercept[Exception] {
         sql(
           s"""
-             |CREATE $tableType relationProviderWithSchema (i int)
+             |CREATE $tableType relationProviderWithSchema (i string)
              |USING org.apache.spark.sql.sources.SimpleScanSource
              |OPTIONS (
              |  From '1',
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index 41d11568750cc..ea007c149dd8e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -536,12 +536,31 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
       .option("TO", "10")
       .format("org.apache.spark.sql.sources.SimpleScanSource")
 
+    val answerDf = spark.range(1, 11).toDF()
+
     // when users do not specify the schema
-    checkAnswer(dfReader.load(), spark.range(1, 11).toDF())
+    checkAnswer(dfReader.load(), answerDf)
+
+    // same base schema, differing metadata and nullability
+    val fooBarMetadata = new MetadataBuilder().putString("foo", "bar").build()
+    val nullableAndMetadataCases = Seq(
+      (false, fooBarMetadata),
+      (false, Metadata.empty),
+      (true, fooBarMetadata),
+      (true, Metadata.empty))
+    nullableAndMetadataCases.foreach { case (nullable, metadata) =>
+      val inputSchema = new StructType()
+        .add("i", IntegerType, nullable = nullable, metadata = metadata)
+      checkAnswer(dfReader.schema(inputSchema).load(), answerDf)
+    }
 
     // when users specify a wrong schema
-    val inputSchema = new StructType().add("s", IntegerType, nullable = false)
-    val e = intercept[AnalysisException] { dfReader.schema(inputSchema).load() }
+    var inputSchema = new StructType().add("s", IntegerType, nullable = false)
+    var e = intercept[AnalysisException] { dfReader.schema(inputSchema).load() }
+    assert(e.getMessage.contains("The user-specified schema doesn't match the actual schema"))
+
+    inputSchema = new StructType().add("i", StringType, nullable = true)
+    e = intercept[AnalysisException] { dfReader.schema(inputSchema).load() }
     assert(e.getMessage.contains("The user-specified schema doesn't match the actual schema"))
   }
 

From 1ef56e969f42726c630572f4e781cbeb3b57b888 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 13 Jan 2022 18:55:47 -0800
Subject: [PATCH 019/513] [SPARK-37893][CORE][TESTS] Avoid
 ConcurrentModificationException related to
 SparkFunSuite.LogAppender#_threshold"

### What changes were proposed in this pull request?
When `mvn clean install -pl sql/core -am -Pscala-2.13` is executed, `AdaptiveQueryExecSuite` failed with a very small probability, the error stack as follows:

```
- Logging plan changes for AQE *** FAILED ***
  java.util.ConcurrentModificationException: mutation occurred during iteration
  at scala.collection.mutable.MutationTracker$.checkMutations(MutationTracker.scala:43)
  at scala.collection.mutable.CheckedIndexedSeqView$CheckedIterator.hasNext(CheckedIndexedSeqView.scala:47)
  at scala.collection.StrictOptimizedIterableOps.filterImpl(StrictOptimizedIterableOps.scala:225)
  at scala.collection.StrictOptimizedIterableOps.filterImpl$(StrictOptimizedIterableOps.scala:222)
  at scala.collection.mutable.ArrayBuffer.filterImpl(ArrayBuffer.scala:43)
  at scala.collection.StrictOptimizedIterableOps.filterNot(StrictOptimizedIterableOps.scala:220)
  at scala.collection.StrictOptimizedIterableOps.filterNot$(StrictOptimizedIterableOps.scala:220)
  at scala.collection.mutable.ArrayBuffer.filterNot(ArrayBuffer.scala:43)
  at org.apache.spark.SparkFunSuite$LogAppender.loggingEvents(SparkFunSuite.scala:288)
  at org.apache.spark.sql.execution.adaptive.AdaptiveQueryExecSuite.$anonfun$new$152(AdaptiveQueryExecSuite.scala:1487)
```

So this pr adds synchronous access control for `SparkFunSuite$LogAppender.loggingEvents` to avoid the above exceptions

### Why are the changes needed?
Bug fix

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35190 from LuciferYang/SPARK-37893.

Lead-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Co-authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../scala/org/apache/spark/SparkFunSuite.scala   | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
index 81b40a324d0de..02e67c0af1258 100644
--- a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
@@ -272,12 +272,14 @@ abstract class SparkFunSuite
     override def append(loggingEvent: LogEvent): Unit = loggingEvent.synchronized {
       val copyEvent = loggingEvent.toImmutable
       if (copyEvent.getLevel.isMoreSpecificThan(_threshold)) {
-        if (_loggingEvents.size >= maxEvents) {
-          val loggingInfo = if (msg == "") "." else s" while logging $msg."
-          throw new IllegalStateException(
-            s"Number of events reached the limit of $maxEvents$loggingInfo")
+        _loggingEvents.synchronized {
+          if (_loggingEvents.size >= maxEvents) {
+            val loggingInfo = if (msg == "") "." else s" while logging $msg."
+            throw new IllegalStateException(
+              s"Number of events reached the limit of $maxEvents$loggingInfo")
+          }
+          _loggingEvents.append(copyEvent)
         }
-        _loggingEvents.append(copyEvent)
       }
     }
 
@@ -285,6 +287,8 @@ abstract class SparkFunSuite
       _threshold = threshold
     }
 
-    def loggingEvents: ArrayBuffer[LogEvent] = _loggingEvents.filterNot(_ == null)
+    def loggingEvents: ArrayBuffer[LogEvent] = _loggingEvents.synchronized {
+      _loggingEvents.filterNot(_ == null)
+    }
   }
 }

From 489391a56dc16459dde27bf29168381ac90e5a34 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Thu, 13 Jan 2022 19:15:20 -0800
Subject: [PATCH 020/513] [SPARK-37880][BUILD] Upgrade Scala to 2.13.8

### What changes were proposed in this pull request?
This pr aims to update from Scala 2.13.7 to Scala 2.13.8 for Apache Spark 3.3.

### Why are the changes needed?
Scala 2.13.8 is a maintenance release for 2.13 line and the release notes as follows:
- https://github.com/scala/scala/releases/tag/v2.13.8

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

- Pass the GitHub Action Scala 2.13 job

- Manual test (Will add)

Closes #35181 from LuciferYang/SPARK-37880.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index fc15a86edfa79..61d576c390b8f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3581,7 +3581,7 @@
     <profile>
       <id>scala-2.13</id>
       <properties>
-        <scala.version>2.13.7</scala.version>
+        <scala.version>2.13.8</scala.version>
         <scala.binary.version>2.13</scala.binary.version>
       </properties>
       <build>

From ef837ca71020950b841f9891c70dc4b29d968bf1 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 13 Jan 2022 20:32:46 -0800
Subject: [PATCH 021/513] [SPARK-37905][INFRA] Make `merge_spark_pr.py` set
 primary author from the first commit in case of ties

### What changes were proposed in this pull request?

This PR aim to make `merge_spark_pr.py` set the primary author from the first commit in case of ties.

### Why are the changes needed?

Currently, `merge_spark_pr.py` chooses the primary author randomly when there are two commits from two authors.

https://github.com/apache/spark/pull/35190

The best case could choose the primary author based on the number of lines, but it seems to hard. So, this PR aims to become better than before.

### Does this PR introduce _any_ user-facing change?

No. This is a dev only.

### How was this patch tested?

Manually.

Closes #35205 from dongjoon-hyun/SPARK-37905.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/merge_spark_pr.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index 8d09c530dfb7f..e21a39a688170 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -135,11 +135,12 @@ def merge_pr(pr_num, target_ref, title, body, pr_repo_desc):
         continue_maybe(msg)
         had_conflicts = True
 
+    # First commit author should be considered as the primary author when the rank is the same
     commit_authors = run_cmd(
-        ["git", "log", "HEAD..%s" % pr_branch_name, "--pretty=format:%an <%ae>"]
+        ["git", "log", "HEAD..%s" % pr_branch_name, "--pretty=format:%an <%ae>", "--reverse"]
     ).split("\n")
     distinct_authors = sorted(
-        set(commit_authors), key=lambda x: commit_authors.count(x), reverse=True
+        list(dict.fromkeys(commit_authors)), key=lambda x: commit_authors.count(x), reverse=True
     )
     primary_author = input(
         'Enter primary author in the format of "name <email>" [%s]: ' % distinct_authors[0]

From fcc51767aa4a594868eff33a7ddd8c5242006cd8 Mon Sep 17 00:00:00 2001
From: Kun Wan <wankun@apache.org>
Date: Fri, 14 Jan 2022 08:25:26 +0100
Subject: [PATCH 022/513] [SPARK-36967][CORE] Report accurate shuffle block
 size if its skewed

### What changes were proposed in this pull request?

A shuffle block is considered as skewed and will be accurately recorded in HighlyCompressedMapStatus if its size if larger than this factor multiplying  the median shuffle block size.

Before this change

![map_status_before](https://user-images.githubusercontent.com/3626747/137251903-08a3544c-dc77-4b78-8ae5-93b42a54bd03.png)

After this change

![map_status_after](https://user-images.githubusercontent.com/3626747/137251871-355db24d-d66b-4702-8766-216db30a39e0.jpg)

### Why are the changes needed?

Now map task will report accurate shuffle block size if the block size is greater than "spark.shuffle.accurateBlockThreshold"( 100M by default ). But if there are a large number of map tasks and the shuffle block sizes of these tasks are smaller than "spark.shuffle.accurateBlockThreshold", there may be unrecognized data skew.

For example, there are 10000 map task and 10000 reduce task, and each map task create 50M shuffle blocks for reduce 0, and 10K shuffle blocks for the left reduce tasks, reduce 0 is data skew, but the stat of this plan do not have this information.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

Update exists UTs

Closes #34234 from wankunde/map_status.

Authored-by: Kun Wan <wankun@apache.org>
Signed-off-by: attilapiros <piros.attila.zsolt@gmail.com>
---
 .../spark/internal/config/package.scala       | 24 +++++
 .../apache/spark/scheduler/MapStatus.scala    | 32 +++++-
 .../scala/org/apache/spark/util/Utils.scala   | 16 +++
 .../spark/scheduler/MapStatusSuite.scala      | 97 +++++++++++++++++++
 .../adaptive/OptimizeSkewedJoin.scala         | 15 +--
 5 files changed, 169 insertions(+), 15 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index a942ba5401ab2..9e6cf341c197b 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -1178,6 +1178,30 @@ package object config {
       .bytesConf(ByteUnit.BYTE)
       .createWithDefault(100 * 1024 * 1024)
 
+  private[spark] val SHUFFLE_ACCURATE_BLOCK_SKEWED_FACTOR =
+    ConfigBuilder("spark.shuffle.accurateBlockSkewedFactor")
+      .internal()
+      .doc("A shuffle block is considered as skewed and will be accurately recorded in " +
+        "HighlyCompressedMapStatus if its size is larger than this factor multiplying " +
+        "the median shuffle block size or SHUFFLE_ACCURATE_BLOCK_THRESHOLD. It is " +
+        "recommended to set this parameter to be the same as SKEW_JOIN_SKEWED_PARTITION_FACTOR." +
+        "Set to -1.0 to disable this feature by default.")
+      .version("3.3.0")
+      .doubleConf
+      .createWithDefault(-1.0)
+
+  private[spark] val SHUFFLE_MAX_ACCURATE_SKEWED_BLOCK_NUMBER =
+    ConfigBuilder("spark.shuffle.maxAccurateSkewedBlockNumber")
+      .internal()
+      .doc("Max skewed shuffle blocks allowed to be accurately recorded in " +
+        "HighlyCompressedMapStatus if its size is larger than " +
+        "SHUFFLE_ACCURATE_BLOCK_SKEWED_FACTOR multiplying the median shuffle block size or " +
+        "SHUFFLE_ACCURATE_BLOCK_THRESHOLD.")
+      .version("3.3.0")
+      .intConf
+      .checkValue(_ > 0, "Allowed max accurate skewed block number must be positive.")
+      .createWithDefault(100)
+
   private[spark] val SHUFFLE_REGISTRATION_TIMEOUT =
     ConfigBuilder("spark.shuffle.registration.timeout")
       .doc("Timeout in milliseconds for registration to the external shuffle service.")
diff --git a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
index 07eed76805dd2..1a7a1675fe05f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
@@ -255,9 +255,35 @@ private[spark] object HighlyCompressedMapStatus {
     // we expect that there will be far fewer of them, so we will perform fewer bitmap insertions.
     val emptyBlocks = new RoaringBitmap()
     val totalNumBlocks = uncompressedSizes.length
-    val threshold = Option(SparkEnv.get)
-      .map(_.conf.get(config.SHUFFLE_ACCURATE_BLOCK_THRESHOLD))
-      .getOrElse(config.SHUFFLE_ACCURATE_BLOCK_THRESHOLD.defaultValue.get)
+    val accurateBlockSkewedFactor = Option(SparkEnv.get)
+      .map(_.conf.get(config.SHUFFLE_ACCURATE_BLOCK_SKEWED_FACTOR))
+      .getOrElse(config.SHUFFLE_ACCURATE_BLOCK_SKEWED_FACTOR.defaultValue.get)
+    val shuffleAccurateBlockThreshold =
+      Option(SparkEnv.get)
+        .map(_.conf.get(config.SHUFFLE_ACCURATE_BLOCK_THRESHOLD))
+        .getOrElse(config.SHUFFLE_ACCURATE_BLOCK_THRESHOLD.defaultValue.get)
+    val threshold =
+      if (accurateBlockSkewedFactor > 0) {
+        val sortedSizes = uncompressedSizes.sorted
+        val medianSize: Long = Utils.median(sortedSizes)
+        val maxAccurateSkewedBlockNumber =
+          Math.min(
+            Option(SparkEnv.get)
+              .map(_.conf.get(config.SHUFFLE_MAX_ACCURATE_SKEWED_BLOCK_NUMBER))
+              .getOrElse(config.SHUFFLE_MAX_ACCURATE_SKEWED_BLOCK_NUMBER.defaultValue.get),
+            totalNumBlocks
+          )
+        val skewSizeThreshold =
+          Math.max(
+            medianSize * accurateBlockSkewedFactor,
+            sortedSizes(totalNumBlocks - maxAccurateSkewedBlockNumber)
+          )
+        Math.min(shuffleAccurateBlockThreshold, skewSizeThreshold)
+      } else {
+        // Disable skew detection if accurateBlockSkewedFactor <= 0
+        shuffleAccurateBlockThreshold
+      }
+
     val hugeBlockSizes = mutable.Map.empty[Int, Byte]
     while (i < totalNumBlocks) {
       val size = uncompressedSizes(i)
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 4410fe7fa8657..8f3d1de33cef3 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -3216,6 +3216,22 @@ private[spark] object Utils extends Logging {
     }
     files.toSeq
   }
+
+  /**
+   * Return the median number of a long array
+   *
+   * @param sizes
+   * @return
+   */
+  def median(sizes: Array[Long]): Long = {
+    val len = sizes.length
+    val sortedSize = sizes.sorted
+    len match {
+      case _ if (len % 2 == 0) =>
+        math.max((sortedSize(len / 2) + sortedSize(len / 2 - 1)) / 2, 1)
+      case _ => math.max(sortedSize(len / 2), 1)
+    }
+  }
 }
 
 private[util] object CallerContext extends Logging {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
index 23cc416f8572f..47723c5d8c689 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
@@ -29,6 +29,7 @@ import org.apache.spark.LocalSparkContext._
 import org.apache.spark.internal.config
 import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
 import org.apache.spark.storage.BlockManagerId
+import org.apache.spark.util.Utils
 
 class MapStatusSuite extends SparkFunSuite {
   private def doReturn(value: Any) = org.mockito.Mockito.doReturn(value, Seq.empty: _*)
@@ -191,4 +192,100 @@ class MapStatusSuite extends SparkFunSuite {
       assert(count === 3000)
     }
   }
+
+  def compressAndDecompressSize(size: Long): Long = {
+    MapStatus.decompressSize(MapStatus.compressSize(size))
+  }
+
+  test("SPARK-36967: HighlyCompressedMapStatus should record accurately the size " +
+    "of skewed shuffle blocks") {
+    val emptyBlocksLength = 3
+    val smallAndUntrackedBlocksLength = 2889
+    val trackedSkewedBlocksLength = 20
+
+    val conf = new SparkConf().set(config.SHUFFLE_ACCURATE_BLOCK_SKEWED_FACTOR.key, "5")
+    val env = mock(classOf[SparkEnv])
+    doReturn(conf).when(env).conf
+    SparkEnv.set(env)
+
+    val emptyBlocks = Array.fill[Long](emptyBlocksLength)(0L)
+    val smallAndUntrackedBlocks = Array.tabulate[Long](smallAndUntrackedBlocksLength)(i => i)
+    val trackedSkewedBlocks =
+      Array.tabulate[Long](trackedSkewedBlocksLength)(i => i + 350 * 1024)
+    val allBlocks = emptyBlocks ++: smallAndUntrackedBlocks ++: trackedSkewedBlocks
+    val avg = smallAndUntrackedBlocks.sum / smallAndUntrackedBlocks.length
+    val loc = BlockManagerId("a", "b", 10)
+    val mapTaskAttemptId = 5
+    val status = MapStatus(loc, allBlocks, mapTaskAttemptId)
+    val status1 = compressAndDecompressMapStatus(status)
+    assert(status1.isInstanceOf[HighlyCompressedMapStatus])
+    assert(status1.location == loc)
+    assert(status1.mapId == mapTaskAttemptId)
+    assert(status1.getSizeForBlock(0) == 0)
+    for (i <- 1 until emptyBlocksLength) {
+      assert(status1.getSizeForBlock(i) === 0L)
+    }
+    for (i <- 1 until smallAndUntrackedBlocksLength) {
+      assert(status1.getSizeForBlock(emptyBlocksLength + i) === avg)
+    }
+    for (i <- 0 until trackedSkewedBlocksLength) {
+      assert(status1.getSizeForBlock(emptyBlocksLength + smallAndUntrackedBlocksLength + i) ===
+        compressAndDecompressSize(trackedSkewedBlocks(i)),
+        "Only tracked skewed block size is accurate")
+    }
+  }
+
+  test("SPARK-36967: Limit accurate skewed block number if too many blocks are skewed") {
+    val accurateBlockSkewedFactor = 5
+    val emptyBlocksLength = 3
+    val smallBlocksLength = 2500
+    val untrackedSkewedBlocksLength = 500
+    val trackedSkewedBlocksLength = 20
+
+    val conf =
+      new SparkConf()
+        .set(config.SHUFFLE_ACCURATE_BLOCK_SKEWED_FACTOR.key, accurateBlockSkewedFactor.toString)
+        .set(
+          config.SHUFFLE_MAX_ACCURATE_SKEWED_BLOCK_NUMBER.key,
+          trackedSkewedBlocksLength.toString)
+    val env = mock(classOf[SparkEnv])
+    doReturn(conf).when(env).conf
+    SparkEnv.set(env)
+
+    val emptyBlocks = Array.fill[Long](emptyBlocksLength)(0L)
+    val smallBlockSizes = Array.tabulate[Long](smallBlocksLength)(i => i + 1)
+    val untrackedSkewedBlocksSizes =
+      Array.tabulate[Long](untrackedSkewedBlocksLength)(i => i + 3500 * 1024)
+    val trackedSkewedBlocksSizes =
+      Array.tabulate[Long](trackedSkewedBlocksLength)(i => i + 4500 * 1024)
+    val nonEmptyBlocks =
+      smallBlockSizes ++: untrackedSkewedBlocksSizes ++: trackedSkewedBlocksSizes
+    val allBlocks = emptyBlocks ++: nonEmptyBlocks
+
+    val skewThreshold = Utils.median(allBlocks.sorted) * accurateBlockSkewedFactor
+    assert(nonEmptyBlocks.filter(_ > skewThreshold).size ==
+      untrackedSkewedBlocksLength + trackedSkewedBlocksLength,
+      "number of skewed block sizes")
+
+    val smallAndUntrackedBlocks =
+      nonEmptyBlocks.slice(0, nonEmptyBlocks.size - trackedSkewedBlocksLength)
+    val avg = smallAndUntrackedBlocks.sum / smallAndUntrackedBlocks.length
+
+    val loc = BlockManagerId("a", "b", 10)
+    val mapTaskAttemptId = 5
+    val status = MapStatus(loc, allBlocks, mapTaskAttemptId)
+    val status1 = compressAndDecompressMapStatus(status)
+    assert(status1.isInstanceOf[HighlyCompressedMapStatus])
+    assert(status1.location == loc)
+    assert(status1.mapId == mapTaskAttemptId)
+    assert(status1.getSizeForBlock(0) == 0)
+    for (i <- emptyBlocksLength until allBlocks.length - trackedSkewedBlocksLength) {
+      assert(status1.getSizeForBlock(i) === avg)
+    }
+    for (i <- 0 until trackedSkewedBlocksLength) {
+      assert(status1.getSizeForBlock(allBlocks.length - trackedSkewedBlocksLength + i) ===
+        compressAndDecompressSize(trackedSkewedBlocksSizes(i)),
+        "Only tracked skewed block size is accurate")
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala
index 1c1ee7d03a4df..c3e3eb7b2b21e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, EnsureRequirements, ValidateRequirements}
 import org.apache.spark.sql.execution.joins.{ShuffledHashJoinExec, SortMergeJoinExec}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.util.Utils
 
 /**
  * A rule to optimize skewed joins to avoid straggler tasks whose share of data are significantly
@@ -66,16 +67,6 @@ case class OptimizeSkewedJoin(ensureRequirements: EnsureRequirements)
       medianSize * conf.getConf(SQLConf.SKEW_JOIN_SKEWED_PARTITION_FACTOR))
   }
 
-  private def medianSize(sizes: Array[Long]): Long = {
-    val numPartitions = sizes.length
-    val bytes = sizes.sorted
-    numPartitions match {
-      case _ if (numPartitions % 2 == 0) =>
-        math.max((bytes(numPartitions / 2) + bytes(numPartitions / 2 - 1)) / 2, 1)
-      case _ => math.max(bytes(numPartitions / 2), 1)
-    }
-  }
-
   /**
    * The goal of skew join optimization is to make the data distribution more even. The target size
    * to split skewed partitions is the average size of non-skewed partition, or the
@@ -130,8 +121,8 @@ case class OptimizeSkewedJoin(ensureRequirements: EnsureRequirements)
     assert(leftSizes.length == rightSizes.length)
     val numPartitions = leftSizes.length
     // We use the median size of the original shuffle partitions to detect skewed partitions.
-    val leftMedSize = medianSize(leftSizes)
-    val rightMedSize = medianSize(rightSizes)
+    val leftMedSize = Utils.median(leftSizes)
+    val rightMedSize = Utils.median(rightSizes)
     logDebug(
       s"""
          |Optimizing skewed join.

From 6f908de99b8f486996d77f0dfab6ba0577ea93ba Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Fri, 14 Jan 2022 09:21:24 -0800
Subject: [PATCH 023/513] [SPARK-37372][K8S] Removing redundant label addition

### What changes were proposed in this pull request?
Remove redundant Pod label addtions in driver and executor.

### Why are the changes needed?
These labels are already included by conf.labels as preset labels, we don't need do a extra addition.

### Does this PR introduce _any_ user-facing change?
NO

### How was this patch tested?
UT passed:
Especially:
https://github.com/apache/spark/blob/a3886ba976469bef0dfafc3da8686a53c5a59d95/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala#L157-L164

Closes #34646 from Yikun/SPARK-labels-improve.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/deploy/k8s/features/BasicDriverFeatureStep.scala   | 1 -
 .../spark/deploy/k8s/features/BasicExecutorFeatureStep.scala | 5 -----
 2 files changed, 6 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
index 49681dc8191c2..925f9dc93a26d 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
@@ -142,7 +142,6 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf)
       .editOrNewMetadata()
         .withName(driverPodName)
         .addToLabels(conf.labels.asJava)
-        .addToLabels(SPARK_APP_NAME_LABEL, KubernetesConf.getAppNameLabel(conf.appName))
         .addToAnnotations(conf.annotations.asJava)
         .endMetadata()
       .editOrNewSpec()
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
index 3f0a21e72ffbf..6a339efcf3f85 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -276,11 +276,6 @@ private[spark] class BasicExecutorFeatureStep(
       .editOrNewMetadata()
         .withName(name)
         .addToLabels(kubernetesConf.labels.asJava)
-        .addToLabels(SPARK_RESOURCE_PROFILE_ID_LABEL, resourceProfile.id.toString)
-        .addToLabels(
-          SPARK_APP_NAME_LABEL,
-          KubernetesConf.getAppNameLabel(kubernetesConf.appName)
-        )
         .addToAnnotations(kubernetesConf.annotations.asJava)
         .addToOwnerReferences(ownerReference.toSeq: _*)
         .endMetadata()

From b12fc70ef9e9b661e1fc029ba5e66635afe0dd99 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Fri, 14 Jan 2022 18:46:40 +0100
Subject: [PATCH 024/513] [SPARK-37429][PYTHON][MLLIB] Inline annotations for
 pyspark.mllib.linalg.__init__.py

### What changes were proposed in this pull request?

Inline annotations for `pyspark.mllib.linalg.__init__.py`

### Why are the changes needed?

Currently, there is type hint stub files  `pyspark.mllib.linalg.__init__.pyi` to show the expected types for functions, but we can also take advantage of static type checking within the functions by inlining the type hints.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #35020 from zero323/SPARK-37429.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/mllib/linalg/__init__.py  | 443 +++++++++++++++--------
 python/pyspark/mllib/linalg/__init__.pyi | 278 --------------
 2 files changed, 301 insertions(+), 420 deletions(-)
 delete mode 100644 python/pyspark/mllib/linalg/__init__.pyi

diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index 7d1818d3f2a36..bbe87280b74e3 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -42,6 +42,32 @@
     BooleanType,
 )
 
+from typing import (
+    Any,
+    Callable,
+    cast,
+    Dict,
+    Generic,
+    Iterable,
+    List,
+    Optional,
+    overload,
+    Sequence,
+    Tuple,
+    Type,
+    TypeVar,
+    TYPE_CHECKING,
+    Union,
+)
+
+if TYPE_CHECKING:
+    from pyspark.mllib._typing import VectorLike
+    from scipy.sparse import spmatrix
+
+
+QT = TypeVar("QT")
+RT = TypeVar("RT")
+
 
 __all__ = [
     "Vector",
@@ -68,23 +94,23 @@
     _have_scipy = False
 
 
-def _convert_to_vector(d):
+def _convert_to_vector(d: Union["VectorLike", "spmatrix", range]) -> "Vector":
     if isinstance(d, Vector):
         return d
     elif type(d) in (array.array, np.array, np.ndarray, list, tuple, range):
         return DenseVector(d)
     elif _have_scipy and scipy.sparse.issparse(d):
-        assert d.shape[1] == 1, "Expected column vector"
+        assert cast("spmatrix", d).shape[1] == 1, "Expected column vector"
         # Make sure the converted csc_matrix has sorted indices.
-        csc = d.tocsc()
+        csc = cast("spmatrix", d).tocsc()
         if not csc.has_sorted_indices:
             csc.sort_indices()
-        return SparseVector(d.shape[0], csc.indices, csc.data)
+        return SparseVector(cast("spmatrix", d).shape[0], csc.indices, csc.data)
     else:
         raise TypeError("Cannot convert type %s into Vector" % type(d))
 
 
-def _vector_size(v):
+def _vector_size(v: Union["VectorLike", "spmatrix", range]) -> int:
     """
     Returns the size of the vector.
 
@@ -115,26 +141,26 @@ def _vector_size(v):
         else:
             raise ValueError("Cannot treat an ndarray of shape %s as a vector" % str(v.shape))
     elif _have_scipy and scipy.sparse.issparse(v):
-        assert v.shape[1] == 1, "Expected column vector"
-        return v.shape[0]
+        assert cast("spmatrix", v).shape[1] == 1, "Expected column vector"
+        return cast("spmatrix", v).shape[0]
     else:
         raise TypeError("Cannot treat type %s as a vector" % type(v))
 
 
-def _format_float(f, digits=4):
+def _format_float(f: float, digits: int = 4) -> str:
     s = str(round(f, digits))
     if "." in s:
         s = s[: s.index(".") + 1 + digits]
     return s
 
 
-def _format_float_list(xs):
+def _format_float_list(xs: Iterable[float]) -> List[str]:
     return [_format_float(x) for x in xs]
 
 
-def _double_to_long_bits(value):
+def _double_to_long_bits(value: float) -> int:
     if np.isnan(value):
-        value = float("nan")
+        value = float("nan")  # type: ignore[assignment]
     # pack double into 64 bits, then unpack as long int
     return struct.unpack("Q", struct.pack("d", value))[0]
 
@@ -145,7 +171,7 @@ class VectorUDT(UserDefinedType):
     """
 
     @classmethod
-    def sqlType(cls):
+    def sqlType(cls) -> StructType:
         return StructType(
             [
                 StructField("type", ByteType(), False),
@@ -156,37 +182,41 @@ def sqlType(cls):
         )
 
     @classmethod
-    def module(cls):
+    def module(cls) -> str:
         return "pyspark.mllib.linalg"
 
     @classmethod
-    def scalaUDT(cls):
+    def scalaUDT(cls) -> str:
         return "org.apache.spark.mllib.linalg.VectorUDT"
 
-    def serialize(self, obj):
+    def serialize(
+        self, obj: "Vector"
+    ) -> Tuple[int, Optional[int], Optional[List[int]], List[float]]:
         if isinstance(obj, SparseVector):
             indices = [int(i) for i in obj.indices]
             values = [float(v) for v in obj.values]
             return (0, obj.size, indices, values)
         elif isinstance(obj, DenseVector):
-            values = [float(v) for v in obj]
+            values = [float(v) for v in obj]  # type: ignore[attr-defined]
             return (1, None, None, values)
         else:
             raise TypeError("cannot serialize %r of type %r" % (obj, type(obj)))
 
-    def deserialize(self, datum):
+    def deserialize(
+        self, datum: Tuple[int, Optional[int], Optional[List[int]], List[float]]
+    ) -> "Vector":
         assert (
             len(datum) == 4
         ), "VectorUDT.deserialize given row with length %d but requires 4" % len(datum)
         tpe = datum[0]
         if tpe == 0:
-            return SparseVector(datum[1], datum[2], datum[3])
+            return SparseVector(cast(int, datum[1]), cast(List[int], datum[2]), datum[3])
         elif tpe == 1:
             return DenseVector(datum[3])
         else:
             raise ValueError("do not recognize type %r" % tpe)
 
-    def simpleString(self):
+    def simpleString(self) -> str:
         return "vector"
 
 
@@ -196,7 +226,7 @@ class MatrixUDT(UserDefinedType):
     """
 
     @classmethod
-    def sqlType(cls):
+    def sqlType(cls) -> StructType:
         return StructType(
             [
                 StructField("type", ByteType(), False),
@@ -210,14 +240,16 @@ def sqlType(cls):
         )
 
     @classmethod
-    def module(cls):
+    def module(cls) -> str:
         return "pyspark.mllib.linalg"
 
     @classmethod
-    def scalaUDT(cls):
+    def scalaUDT(cls) -> str:
         return "org.apache.spark.mllib.linalg.MatrixUDT"
 
-    def serialize(self, obj):
+    def serialize(
+        self, obj: "Matrix"
+    ) -> Tuple[int, int, int, Optional[List[int]], Optional[List[int]], List[float], bool]:
         if isinstance(obj, SparseMatrix):
             colPtrs = [int(i) for i in obj.colPtrs]
             rowIndices = [int(i) for i in obj.rowIndices]
@@ -237,19 +269,29 @@ def serialize(self, obj):
         else:
             raise TypeError("cannot serialize type %r" % (type(obj)))
 
-    def deserialize(self, datum):
+    def deserialize(
+        self,
+        datum: Tuple[int, int, int, Optional[List[int]], Optional[List[int]], List[float], bool],
+    ) -> "Matrix":
         assert (
             len(datum) == 7
         ), "MatrixUDT.deserialize given row with length %d but requires 7" % len(datum)
         tpe = datum[0]
         if tpe == 0:
-            return SparseMatrix(*datum[1:])
+            return SparseMatrix(
+                datum[1],
+                datum[2],
+                cast(List[int], datum[3]),
+                cast(List[int], datum[4]),
+                datum[5],
+                datum[6],
+            )
         elif tpe == 1:
             return DenseMatrix(datum[1], datum[2], datum[5], datum[6])
         else:
             raise ValueError("do not recognize type %r" % tpe)
 
-    def simpleString(self):
+    def simpleString(self) -> str:
         return "matrix"
 
 
@@ -261,7 +303,7 @@ class Vector:
     Abstract class for DenseVector and SparseVector
     """
 
-    def toArray(self):
+    def toArray(self) -> np.ndarray:
         """
         Convert the vector into an numpy.ndarray
 
@@ -271,7 +313,7 @@ def toArray(self):
         """
         raise NotImplementedError
 
-    def asML(self):
+    def asML(self) -> newlinalg.Vector:
         """
         Convert this vector to the new mllib-local representation.
         This does NOT copy the data; it copies references.
@@ -282,6 +324,9 @@ def asML(self):
         """
         raise NotImplementedError
 
+    def __len__(self) -> int:
+        raise NotImplementedError
+
 
 class DenseVector(Vector):
     """
@@ -309,17 +354,18 @@ class DenseVector(Vector):
     DenseVector([-1.0, -2.0])
     """
 
-    def __init__(self, ar):
+    def __init__(self, ar: Union[bytes, np.ndarray, Iterable[float]]):
+        ar_: np.ndarray
         if isinstance(ar, bytes):
-            ar = np.frombuffer(ar, dtype=np.float64)
+            ar_ = np.frombuffer(ar, dtype=np.float64)
         elif not isinstance(ar, np.ndarray):
-            ar = np.array(ar, dtype=np.float64)
-        if ar.dtype != np.float64:
-            ar = ar.astype(np.float64)
-        self.array = ar
+            ar_ = np.array(ar, dtype=np.float64)
+        else:
+            ar_ = ar.astype(np.float64) if ar.dtype != np.float64 else ar
+        self.array = ar_
 
     @staticmethod
-    def parse(s):
+    def parse(s: str) -> "DenseVector":
         """
         Parse string representation back into the DenseVector.
 
@@ -342,16 +388,16 @@ def parse(s):
             raise ValueError("Unable to parse values from %s" % s)
         return DenseVector(values)
 
-    def __reduce__(self):
-        return DenseVector, (self.array.tostring(),)
+    def __reduce__(self) -> Tuple[Type["DenseVector"], Tuple[bytes]]:
+        return DenseVector, (self.array.tostring(),)  # type: ignore[attr-defined]
 
-    def numNonzeros(self):
+    def numNonzeros(self) -> int:
         """
         Number of nonzero elements. This scans all active values and count non zeros
         """
         return np.count_nonzero(self.array)
 
-    def norm(self, p):
+    def norm(self, p: Union[float, str]) -> np.float64:
         """
         Calculates the norm of a DenseVector.
 
@@ -365,7 +411,7 @@ def norm(self, p):
         """
         return np.linalg.norm(self.array, p)
 
-    def dot(self, other):
+    def dot(self, other: Iterable[float]) -> np.float64:
         """
         Compute the dot product of two Vectors. We support
         (Numpy array, list, SparseVector, or SciPy sparse)
@@ -399,8 +445,8 @@ def dot(self, other):
                 assert len(self) == other.shape[0], "dimension mismatch"
             return np.dot(self.array, other)
         elif _have_scipy and scipy.sparse.issparse(other):
-            assert len(self) == other.shape[0], "dimension mismatch"
-            return other.transpose().dot(self.toArray())
+            assert len(self) == cast("spmatrix", other).shape[0], "dimension mismatch"
+            return cast("spmatrix", other).transpose().dot(self.toArray())
         else:
             assert len(self) == _vector_size(other), "dimension mismatch"
             if isinstance(other, SparseVector):
@@ -410,7 +456,7 @@ def dot(self, other):
             else:
                 return np.dot(self.toArray(), other)
 
-    def squared_distance(self, other):
+    def squared_distance(self, other: Iterable[float]) -> np.float64:
         """
         Squared distance of two Vectors.
 
@@ -441,22 +487,22 @@ def squared_distance(self, other):
         if isinstance(other, SparseVector):
             return other.squared_distance(self)
         elif _have_scipy and scipy.sparse.issparse(other):
-            return _convert_to_vector(other).squared_distance(self)
+            return _convert_to_vector(other).squared_distance(self)  # type: ignore[attr-defined]
 
         if isinstance(other, Vector):
             other = other.toArray()
         elif not isinstance(other, np.ndarray):
             other = np.array(other)
-        diff = self.toArray() - other
+        diff: np.ndarray = self.toArray() - other
         return np.dot(diff, diff)
 
-    def toArray(self):
+    def toArray(self) -> np.ndarray:
         """
         Returns an numpy.ndarray
         """
         return self.array
 
-    def asML(self):
+    def asML(self) -> newlinalg.DenseVector:
         """
         Convert this vector to the new mllib-local representation.
         This does NOT copy the data; it copies references.
@@ -470,25 +516,33 @@ def asML(self):
         return newlinalg.DenseVector(self.array)
 
     @property
-    def values(self):
+    def values(self) -> np.ndarray:
         """
         Returns a list of values
         """
         return self.array
 
-    def __getitem__(self, item):
+    @overload
+    def __getitem__(self, item: int) -> np.float64:
+        ...
+
+    @overload
+    def __getitem__(self, item: slice) -> np.ndarray:
+        ...
+
+    def __getitem__(self, item: Union[int, slice]) -> Union[np.float64, np.ndarray]:
         return self.array[item]
 
-    def __len__(self):
+    def __len__(self) -> int:
         return len(self.array)
 
-    def __str__(self):
+    def __str__(self) -> str:
         return "[" + ",".join([str(v) for v in self.array]) + "]"
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return "DenseVector([%s])" % (", ".join(_format_float(i) for i in self.array))
 
-    def __eq__(self, other):
+    def __eq__(self, other: Any) -> bool:
         if isinstance(other, DenseVector):
             return np.array_equal(self.array, other.array)
         elif isinstance(other, SparseVector):
@@ -497,10 +551,10 @@ def __eq__(self, other):
             return Vectors._equals(list(range(len(self))), self.array, other.indices, other.values)
         return False
 
-    def __ne__(self, other):
+    def __ne__(self, other: Any) -> bool:
         return not self == other
 
-    def __hash__(self):
+    def __hash__(self) -> int:
         size = len(self)
         result = 31 + size
         nnz = 0
@@ -514,14 +568,14 @@ def __hash__(self):
             i += 1
         return result
 
-    def __getattr__(self, item):
+    def __getattr__(self, item: str) -> Any:
         return getattr(self.array, item)
 
-    def __neg__(self):
+    def __neg__(self) -> "DenseVector":
         return DenseVector(-self.array)
 
-    def _delegate(op):
-        def func(self, other):
+    def _delegate(op: str) -> Callable[["DenseVector", Any], Any]:  # type: ignore[misc]
+        def func(self: "DenseVector", other: Any) -> Any:
             if isinstance(other, DenseVector):
                 other = other.array
             return DenseVector(getattr(self.array, op)(other))
@@ -548,7 +602,33 @@ class SparseVector(Vector):
     alternatively pass SciPy's {scipy.sparse} data types.
     """
 
-    def __init__(self, size, *args):
+    @overload
+    def __init__(self, size: int, __indices: bytes, __values: bytes):
+        ...
+
+    @overload
+    def __init__(self, size: int, *args: Tuple[int, float]):
+        ...
+
+    @overload
+    def __init__(self, size: int, __indices: Iterable[int], __values: Iterable[float]):
+        ...
+
+    @overload
+    def __init__(self, size: int, __pairs: Iterable[Tuple[int, float]]):
+        ...
+
+    @overload
+    def __init__(self, size: int, __map: Dict[int, float]):
+        ...
+
+    def __init__(
+        self,
+        size: int,
+        *args: Union[
+            bytes, Tuple[int, float], Iterable[float], Iterable[Tuple[int, float]], Dict[int, float]
+        ],
+    ):
         """
         Create a sparse vector, using either a dictionary, a list of
         (index, value) pairs, or two separate arrays of indices and
@@ -580,7 +660,7 @@ def __init__(self, size, *args):
             pairs = args[0]
             if type(pairs) == dict:
                 pairs = pairs.items()
-            pairs = sorted(pairs)
+            pairs = cast(Iterable[Tuple[int, float]], sorted(pairs))
             self.indices = np.array([p[0] for p in pairs], dtype=np.int32)
             """ A list of indices corresponding to active entries. """
             self.values = np.array([p[1] for p in pairs], dtype=np.float64)
@@ -606,13 +686,13 @@ def __init__(self, size, *args):
                         % (self.indices[i], self.indices[i + 1])
                     )
 
-    def numNonzeros(self):
+    def numNonzeros(self) -> int:
         """
         Number of nonzero elements. This scans all active values and count non zeros.
         """
         return np.count_nonzero(self.values)
 
-    def norm(self, p):
+    def norm(self, p: Union[float, str]) -> np.float64:
         """
         Calculates the norm of a SparseVector.
 
@@ -626,11 +706,18 @@ def norm(self, p):
         """
         return np.linalg.norm(self.values, p)
 
-    def __reduce__(self):
-        return (SparseVector, (self.size, self.indices.tostring(), self.values.tostring()))
+    def __reduce__(self) -> Tuple[Type["SparseVector"], Tuple[int, bytes, bytes]]:
+        return (
+            SparseVector,
+            (
+                self.size,
+                self.indices.tostring(),  # type: ignore[attr-defined]
+                self.values.tostring(),  # type: ignore[attr-defined]
+            ),
+        )
 
     @staticmethod
-    def parse(s):
+    def parse(s: str) -> "SparseVector":
         """
         Parse string representation back into the SparseVector.
 
@@ -649,7 +736,7 @@ def parse(s):
 
         size = s[: s.find(",")]
         try:
-            size = int(size)
+            size = int(size)  # type: ignore[assignment]
         except ValueError:
             raise ValueError("Cannot parse size %s." % size)
 
@@ -678,9 +765,9 @@ def parse(s):
             values = [float(val) for val in val_list if val]
         except ValueError:
             raise ValueError("Unable to parse values from %s." % s)
-        return SparseVector(size, indices, values)
+        return SparseVector(cast(int, size), indices, values)
 
-    def dot(self, other):
+    def dot(self, other: Any) -> np.float64:
         """
         Dot product with a SparseVector or 1- or 2-dimensional Numpy array.
 
@@ -730,7 +817,7 @@ def dot(self, other):
             self_cmind = np.in1d(self.indices, other.indices, assume_unique=True)
             self_values = self.values[self_cmind]
             if self_values.size == 0:
-                return 0.0
+                return np.float64(0.0)
             else:
                 other_cmind = np.in1d(other.indices, self.indices, assume_unique=True)
                 return np.dot(self_values, other.values[other_cmind])
@@ -738,7 +825,7 @@ def dot(self, other):
         else:
             return self.dot(_convert_to_vector(other))
 
-    def squared_distance(self, other):
+    def squared_distance(self, other: Any) -> np.float64:
         """
         Squared distance from a SparseVector or 1-dimensional NumPy array.
 
@@ -808,7 +895,7 @@ def squared_distance(self, other):
         else:
             return self.squared_distance(_convert_to_vector(other))
 
-    def toArray(self):
+    def toArray(self) -> np.ndarray:
         """
         Returns a copy of this SparseVector as a 1-dimensional NumPy array.
         """
@@ -816,7 +903,7 @@ def toArray(self):
         arr[self.indices] = self.values
         return arr
 
-    def asML(self):
+    def asML(self) -> newlinalg.SparseVector:
         """
         Convert this vector to the new mllib-local representation.
         This does NOT copy the data; it copies references.
@@ -829,15 +916,15 @@ def asML(self):
         """
         return newlinalg.SparseVector(self.size, self.indices, self.values)
 
-    def __len__(self):
+    def __len__(self) -> int:
         return self.size
 
-    def __str__(self):
+    def __str__(self) -> str:
         inds = "[" + ",".join([str(i) for i in self.indices]) + "]"
         vals = "[" + ",".join([str(v) for v in self.values]) + "]"
         return "(" + ",".join((str(self.size), inds, vals)) + ")"
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         inds = self.indices
         vals = self.values
         entries = ", ".join(
@@ -845,7 +932,7 @@ def __repr__(self):
         )
         return "SparseVector({0}, {{{1}}})".format(self.size, entries)
 
-    def __eq__(self, other):
+    def __eq__(self, other: Any) -> bool:
         if isinstance(other, SparseVector):
             return (
                 other.size == self.size
@@ -858,7 +945,7 @@ def __eq__(self, other):
             return Vectors._equals(self.indices, self.values, list(range(len(other))), other.array)
         return False
 
-    def __getitem__(self, index):
+    def __getitem__(self, index: int) -> np.float64:
         inds = self.indices
         vals = self.values
         if not isinstance(index, int):
@@ -870,18 +957,18 @@ def __getitem__(self, index):
             index += self.size
 
         if (inds.size == 0) or (index > inds.item(-1)):
-            return 0.0
+            return np.float64(0.0)
 
         insert_index = np.searchsorted(inds, index)
         row_ind = inds[insert_index]
         if row_ind == index:
             return vals[insert_index]
-        return 0.0
+        return np.float64(0.0)
 
-    def __ne__(self, other):
+    def __ne__(self, other: Any) -> bool:
         return not self.__eq__(other)
 
-    def __hash__(self):
+    def __hash__(self) -> int:
         result = 31 + self.size
         nnz = 0
         i = 0
@@ -909,7 +996,37 @@ class Vectors:
     """
 
     @staticmethod
-    def sparse(size, *args):
+    @overload
+    def sparse(size: int, __indices: bytes, __values: bytes) -> SparseVector:
+        ...
+
+    @staticmethod
+    @overload
+    def sparse(size: int, *args: Tuple[int, float]) -> SparseVector:
+        ...
+
+    @staticmethod
+    @overload
+    def sparse(size: int, __indices: Iterable[int], __values: Iterable[float]) -> SparseVector:
+        ...
+
+    @staticmethod
+    @overload
+    def sparse(size: int, __pairs: Iterable[Tuple[int, float]]) -> SparseVector:
+        ...
+
+    @staticmethod
+    @overload
+    def sparse(size: int, __map: Dict[int, float]) -> SparseVector:
+        ...
+
+    @staticmethod
+    def sparse(
+        size: int,
+        *args: Union[
+            bytes, Tuple[int, float], Iterable[float], Iterable[Tuple[int, float]], Dict[int, float]
+        ],
+    ) -> SparseVector:
         """
         Create a sparse vector, using either a dictionary, a list of
         (index, value) pairs, or two separate arrays of indices and
@@ -932,10 +1049,25 @@ def sparse(size, *args):
         >>> Vectors.sparse(4, [1, 3], [1.0, 5.5])
         SparseVector(4, {1: 1.0, 3: 5.5})
         """
-        return SparseVector(size, *args)
+        return SparseVector(size, *args)  # type: ignore[arg-type]
 
+    @overload
     @staticmethod
-    def dense(*elements):
+    def dense(*elements: float) -> DenseVector:
+        ...
+
+    @overload
+    @staticmethod
+    def dense(__arr: bytes) -> DenseVector:
+        ...
+
+    @overload
+    @staticmethod
+    def dense(__arr: Iterable[float]) -> DenseVector:
+        ...
+
+    @staticmethod
+    def dense(*elements: Union[float, bytes, np.ndarray, Iterable[float]]) -> DenseVector:
         """
         Create a dense vector of 64-bit floats from a Python list or numbers.
 
@@ -948,11 +1080,11 @@ def dense(*elements):
         """
         if len(elements) == 1 and not isinstance(elements[0], (float, int)):
             # it's list, numpy.array or other iterable object.
-            elements = elements[0]
-        return DenseVector(elements)
+            elements = elements[0]  # type: ignore[assignment]
+        return DenseVector(cast(Iterable[float], elements))
 
     @staticmethod
-    def fromML(vec):
+    def fromML(vec: newlinalg.DenseVector) -> DenseVector:
         """
         Convert a vector from the new mllib-local representation.
         This does NOT copy the data; it copies references.
@@ -975,7 +1107,7 @@ def fromML(vec):
             raise TypeError("Unsupported vector type %s" % type(vec))
 
     @staticmethod
-    def stringify(vector):
+    def stringify(vector: Vector) -> str:
         """
         Converts a vector into a string, which can be recognized by
         Vectors.parse().
@@ -990,7 +1122,7 @@ def stringify(vector):
         return str(vector)
 
     @staticmethod
-    def squared_distance(v1, v2):
+    def squared_distance(v1: Vector, v2: Vector) -> np.float64:
         """
         Squared distance between two vectors.
         a and b can be of type SparseVector, DenseVector, np.ndarray
@@ -1004,17 +1136,17 @@ def squared_distance(v1, v2):
         51.0
         """
         v1, v2 = _convert_to_vector(v1), _convert_to_vector(v2)
-        return v1.squared_distance(v2)
+        return v1.squared_distance(v2)  # type: ignore[attr-defined]
 
     @staticmethod
-    def norm(vector, p):
+    def norm(vector: Vector, p: Union[float, str]) -> np.float64:
         """
         Find norm of the given vector.
         """
-        return _convert_to_vector(vector).norm(p)
+        return _convert_to_vector(vector).norm(p)  # type: ignore[attr-defined]
 
     @staticmethod
-    def parse(s):
+    def parse(s: str) -> Vector:
         """Parse a string representation back into the Vector.
 
         Examples
@@ -1032,11 +1164,16 @@ def parse(s):
             raise ValueError("Cannot find tokens '[' or '(' from the input string.")
 
     @staticmethod
-    def zeros(size):
+    def zeros(size: int) -> DenseVector:
         return DenseVector(np.zeros(size))
 
     @staticmethod
-    def _equals(v1_indices, v1_values, v2_indices, v2_values):
+    def _equals(
+        v1_indices: Union[Sequence[int], np.ndarray],
+        v1_values: Union[Sequence[float], np.ndarray],
+        v2_indices: Union[Sequence[int], np.ndarray],
+        v2_values: Union[Sequence[float], np.ndarray],
+    ) -> bool:
         """
         Check equality between sparse/dense vectors,
         v1_indices and v2_indices assume to be strictly increasing.
@@ -1069,18 +1206,18 @@ class Matrix:
     Represents a local matrix.
     """
 
-    def __init__(self, numRows, numCols, isTransposed=False):
+    def __init__(self, numRows: int, numCols: int, isTransposed: bool = False) -> None:
         self.numRows = numRows
         self.numCols = numCols
         self.isTransposed = isTransposed
 
-    def toArray(self):
+    def toArray(self) -> np.ndarray:
         """
         Returns its elements in a NumPy ndarray.
         """
         raise NotImplementedError
 
-    def asML(self):
+    def asML(self) -> newlinalg.Matrix:
         """
         Convert this matrix to the new mllib-local representation.
         This does NOT copy the data; it copies references.
@@ -1088,7 +1225,7 @@ def asML(self):
         raise NotImplementedError
 
     @staticmethod
-    def _convert_to_array(array_like, dtype):
+    def _convert_to_array(array_like: Union[bytes, Iterable[float]], dtype: Any) -> np.ndarray:
         """
         Convert Matrix attributes which are array-like or buffer to array.
         """
@@ -1102,21 +1239,27 @@ class DenseMatrix(Matrix):
     Column-major dense matrix.
     """
 
-    def __init__(self, numRows, numCols, values, isTransposed=False):
+    def __init__(
+        self,
+        numRows: int,
+        numCols: int,
+        values: Union[bytes, Iterable[float]],
+        isTransposed: bool = False,
+    ):
         Matrix.__init__(self, numRows, numCols, isTransposed)
         values = self._convert_to_array(values, np.float64)
         assert len(values) == numRows * numCols
         self.values = values
 
-    def __reduce__(self):
+    def __reduce__(self) -> Tuple[Type["DenseMatrix"], Tuple[int, int, bytes, int]]:
         return DenseMatrix, (
             self.numRows,
             self.numCols,
-            self.values.tostring(),
+            self.values.tostring(),  # type: ignore[attr-defined]
             int(self.isTransposed),
         )
 
-    def __str__(self):
+    def __str__(self) -> str:
         """
         Pretty printing of a DenseMatrix
 
@@ -1139,7 +1282,7 @@ def __str__(self):
         x = "\n".join([(" " * 6 + line) for line in array_lines[1:]])
         return array_lines[0].replace("array", "DenseMatrix") + "\n" + x
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """
         Representation of a DenseMatrix
 
@@ -1158,12 +1301,11 @@ def __repr__(self):
                 _format_float_list(self.values[:8]) + ["..."] + _format_float_list(self.values[-8:])
             )
 
-        entries = ", ".join(entries)
         return "DenseMatrix({0}, {1}, [{2}], {3})".format(
-            self.numRows, self.numCols, entries, self.isTransposed
+            self.numRows, self.numCols, ", ".join(entries), self.isTransposed
         )
 
-    def toArray(self):
+    def toArray(self) -> np.ndarray:
         """
         Return an numpy.ndarray
 
@@ -1179,7 +1321,7 @@ def toArray(self):
         else:
             return self.values.reshape((self.numRows, self.numCols), order="F")
 
-    def toSparse(self):
+    def toSparse(self) -> "SparseMatrix":
         """Convert to SparseMatrix"""
         if self.isTransposed:
             values = np.ravel(self.toArray(), order="F")
@@ -1193,7 +1335,7 @@ def toSparse(self):
 
         return SparseMatrix(self.numRows, self.numCols, colPtrs, rowIndices, values)
 
-    def asML(self):
+    def asML(self) -> newlinalg.DenseMatrix:
         """
         Convert this matrix to the new mllib-local representation.
         This does NOT copy the data; it copies references.
@@ -1206,7 +1348,7 @@ def asML(self):
         """
         return newlinalg.DenseMatrix(self.numRows, self.numCols, self.values, self.isTransposed)
 
-    def __getitem__(self, indices):
+    def __getitem__(self, indices: Tuple[int, int]) -> np.float64:
         i, j = indices
         if i < 0 or i >= self.numRows:
             raise IndexError("Row index %d is out of range [0, %d)" % (i, self.numRows))
@@ -1218,21 +1360,29 @@ def __getitem__(self, indices):
         else:
             return self.values[i + j * self.numRows]
 
-    def __eq__(self, other):
+    def __eq__(self, other: Any) -> bool:
         if self.numRows != other.numRows or self.numCols != other.numCols:
             return False
         if isinstance(other, SparseMatrix):
-            return np.all(self.toArray() == other.toArray())
+            return np.all(self.toArray() == other.toArray()).tolist()
 
         self_values = np.ravel(self.toArray(), order="F")
         other_values = np.ravel(other.toArray(), order="F")
-        return np.all(self_values == other_values)
+        return np.all(self_values == other_values).tolist()
 
 
 class SparseMatrix(Matrix):
     """Sparse Matrix stored in CSC format."""
 
-    def __init__(self, numRows, numCols, colPtrs, rowIndices, values, isTransposed=False):
+    def __init__(
+        self,
+        numRows: int,
+        numCols: int,
+        colPtrs: Union[bytes, Iterable[int]],
+        rowIndices: Union[bytes, Iterable[int]],
+        values: Union[bytes, Iterable[float]],
+        isTransposed: bool = False,
+    ) -> None:
         Matrix.__init__(self, numRows, numCols, isTransposed)
         self.colPtrs = self._convert_to_array(colPtrs, np.int32)
         self.rowIndices = self._convert_to_array(rowIndices, np.int32)
@@ -1254,7 +1404,7 @@ def __init__(self, numRows, numCols, colPtrs, rowIndices, values, isTransposed=F
                 % (self.rowIndices.size, self.values.size)
             )
 
-    def __str__(self):
+    def __str__(self) -> str:
         """
         Pretty printing of a SparseMatrix
 
@@ -1300,7 +1450,7 @@ def __str__(self):
             spstr += "\n.." * 2
         return spstr
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """
         Representation of a SparseMatrix
 
@@ -1325,24 +1475,26 @@ def __repr__(self):
         if len(self.colPtrs) > 16:
             colPtrs = colPtrs[:8] + ["..."] + colPtrs[-8:]
 
-        values = ", ".join(values)
-        rowIndices = ", ".join([str(ind) for ind in rowIndices])
-        colPtrs = ", ".join([str(ptr) for ptr in colPtrs])
         return "SparseMatrix({0}, {1}, [{2}], [{3}], [{4}], {5})".format(
-            self.numRows, self.numCols, colPtrs, rowIndices, values, self.isTransposed
+            self.numRows,
+            self.numCols,
+            ", ".join([str(ptr) for ptr in colPtrs]),
+            ", ".join([str(ind) for ind in rowIndices]),
+            ", ".join(values),
+            self.isTransposed,
         )
 
-    def __reduce__(self):
+    def __reduce__(self) -> Tuple[Type["SparseMatrix"], Tuple[int, int, bytes, bytes, bytes, int]]:
         return SparseMatrix, (
             self.numRows,
             self.numCols,
-            self.colPtrs.tostring(),
-            self.rowIndices.tostring(),
-            self.values.tostring(),
+            self.colPtrs.tostring(),  # type: ignore[attr-defined]
+            self.rowIndices.tostring(),  # type: ignore[attr-defined]
+            self.values.tostring(),  # type: ignore[attr-defined]
             int(self.isTransposed),
         )
 
-    def __getitem__(self, indices):
+    def __getitem__(self, indices: Tuple[int, int]) -> np.float64:
         i, j = indices
         if i < 0 or i >= self.numRows:
             raise IndexError("Row index %d is out of range [0, %d)" % (i, self.numRows))
@@ -1362,9 +1514,9 @@ def __getitem__(self, indices):
         if ind < colEnd and self.rowIndices[ind] == i:
             return self.values[ind]
         else:
-            return 0.0
+            return np.float64(0.0)
 
-    def toArray(self):
+    def toArray(self) -> np.ndarray:
         """
         Return an numpy.ndarray
         """
@@ -1378,11 +1530,11 @@ def toArray(self):
                 A[self.rowIndices[startptr:endptr], k] = self.values[startptr:endptr]
         return A
 
-    def toDense(self):
+    def toDense(self) -> "DenseMatrix":
         densevals = np.ravel(self.toArray(), order="F")
         return DenseMatrix(self.numRows, self.numCols, densevals)
 
-    def asML(self):
+    def asML(self) -> newlinalg.SparseMatrix:
         """
         Convert this matrix to the new mllib-local representation.
         This does NOT copy the data; it copies references.
@@ -1403,27 +1555,34 @@ def asML(self):
         )
 
     # TODO: More efficient implementation:
-    def __eq__(self, other):
-        return np.all(self.toArray() == other.toArray())
+    def __eq__(self, other: Any) -> bool:
+        assert isinstance(other, Matrix)
+        return np.all(self.toArray() == other.toArray()).tolist()
 
 
 class Matrices:
     @staticmethod
-    def dense(numRows, numCols, values):
+    def dense(numRows: int, numCols: int, values: Union[bytes, Iterable[float]]) -> DenseMatrix:
         """
         Create a DenseMatrix
         """
         return DenseMatrix(numRows, numCols, values)
 
     @staticmethod
-    def sparse(numRows, numCols, colPtrs, rowIndices, values):
+    def sparse(
+        numRows: int,
+        numCols: int,
+        colPtrs: Union[bytes, Iterable[int]],
+        rowIndices: Union[bytes, Iterable[int]],
+        values: Union[bytes, Iterable[float]],
+    ) -> SparseMatrix:
         """
         Create a SparseMatrix
         """
         return SparseMatrix(numRows, numCols, colPtrs, rowIndices, values)
 
     @staticmethod
-    def fromML(mat):
+    def fromML(mat: newlinalg.Matrix) -> Matrix:
         """
         Convert a matrix from the new mllib-local representation.
         This does NOT copy the data; it copies references.
@@ -1448,34 +1607,34 @@ def fromML(mat):
             raise TypeError("Unsupported matrix type %s" % type(mat))
 
 
-class QRDecomposition:
+class QRDecomposition(Generic[QT, RT]):
     """
     Represents QR factors.
     """
 
-    def __init__(self, Q, R):
+    def __init__(self, Q: QT, R: RT) -> None:
         self._Q = Q
         self._R = R
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def Q(self):
+    def Q(self) -> QT:
         """
         An orthogonal matrix Q in a QR decomposition.
         May be null if not computed.
         """
         return self._Q
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def R(self):
+    def R(self) -> RT:
         """
         An upper triangular matrix R in a QR decomposition.
         """
         return self._R
 
 
-def _test():
+def _test() -> None:
     import doctest
     import numpy
 
diff --git a/python/pyspark/mllib/linalg/__init__.pyi b/python/pyspark/mllib/linalg/__init__.pyi
deleted file mode 100644
index 8988e92f5c29a..0000000000000
--- a/python/pyspark/mllib/linalg/__init__.pyi
+++ /dev/null
@@ -1,278 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import overload
-from typing import (
-    Any,
-    Dict,
-    Generic,
-    Iterable,
-    List,
-    Optional,
-    Tuple,
-    Type,
-    TypeVar,
-    Union,
-)
-from pyspark.ml import linalg as newlinalg
-from pyspark.sql.types import StructType, UserDefinedType
-from numpy import float64, ndarray
-
-QT = TypeVar("QT")
-RT = TypeVar("RT")
-
-class VectorUDT(UserDefinedType):
-    @classmethod
-    def sqlType(cls) -> StructType: ...
-    @classmethod
-    def module(cls) -> str: ...
-    @classmethod
-    def scalaUDT(cls) -> str: ...
-    def serialize(
-        self, obj: Vector
-    ) -> Tuple[int, Optional[int], Optional[List[int]], List[float]]: ...
-    def deserialize(self, datum: Any) -> Vector: ...
-    def simpleString(self) -> str: ...
-
-class MatrixUDT(UserDefinedType):
-    @classmethod
-    def sqlType(cls) -> StructType: ...
-    @classmethod
-    def module(cls) -> str: ...
-    @classmethod
-    def scalaUDT(cls) -> str: ...
-    def serialize(
-        self, obj: Matrix
-    ) -> Tuple[int, int, int, Optional[List[int]], Optional[List[int]], List[float], bool]: ...
-    def deserialize(self, datum: Any) -> Matrix: ...
-    def simpleString(self) -> str: ...
-
-class Vector:
-    __UDT__: VectorUDT
-    def toArray(self) -> ndarray: ...
-    def asML(self) -> newlinalg.Vector: ...
-
-class DenseVector(Vector):
-    array: ndarray
-    @overload
-    def __init__(self, *elements: float) -> None: ...
-    @overload
-    def __init__(self, __arr: bytes) -> None: ...
-    @overload
-    def __init__(self, __arr: Iterable[float]) -> None: ...
-    @staticmethod
-    def parse(s: str) -> DenseVector: ...
-    def __reduce__(self) -> Tuple[Type[DenseVector], bytes]: ...
-    def numNonzeros(self) -> int: ...
-    def norm(self, p: Union[float, str]) -> float64: ...
-    def dot(self, other: Iterable[float]) -> float64: ...
-    def squared_distance(self, other: Iterable[float]) -> float64: ...
-    def toArray(self) -> ndarray: ...
-    def asML(self) -> newlinalg.DenseVector: ...
-    @property
-    def values(self) -> ndarray: ...
-    def __getitem__(self, item: int) -> float64: ...
-    def __len__(self) -> int: ...
-    def __eq__(self, other: Any) -> bool: ...
-    def __ne__(self, other: Any) -> bool: ...
-    def __hash__(self) -> int: ...
-    def __getattr__(self, item: str) -> Any: ...
-    def __neg__(self) -> DenseVector: ...
-    def __add__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __sub__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __mul__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __div__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __truediv__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __mod__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __radd__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __rsub__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __rmul__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __rdiv__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __rtruediv__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __rmod__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-
-class SparseVector(Vector):
-    size: int
-    indices: ndarray
-    values: ndarray
-    @overload
-    def __init__(self, size: int, *args: Tuple[int, float]) -> None: ...
-    @overload
-    def __init__(self, size: int, __indices: bytes, __values: bytes) -> None: ...
-    @overload
-    def __init__(self, size: int, __indices: Iterable[int], __values: Iterable[float]) -> None: ...
-    @overload
-    def __init__(self, size: int, __pairs: Iterable[Tuple[int, float]]) -> None: ...
-    @overload
-    def __init__(self, size: int, __map: Dict[int, float]) -> None: ...
-    def numNonzeros(self) -> int: ...
-    def norm(self, p: Union[float, str]) -> float64: ...
-    def __reduce__(self) -> Tuple[Type[SparseVector], Tuple[int, bytes, bytes]]: ...
-    @staticmethod
-    def parse(s: str) -> SparseVector: ...
-    def dot(self, other: Iterable[float]) -> float64: ...
-    def squared_distance(self, other: Iterable[float]) -> float64: ...
-    def toArray(self) -> ndarray: ...
-    def asML(self) -> newlinalg.SparseVector: ...
-    def __len__(self) -> int: ...
-    def __eq__(self, other: Any) -> bool: ...
-    def __getitem__(self, index: int) -> float64: ...
-    def __ne__(self, other: Any) -> bool: ...
-    def __hash__(self) -> int: ...
-
-class Vectors:
-    @overload
-    @staticmethod
-    def sparse(size: int, *args: Tuple[int, float]) -> SparseVector: ...
-    @overload
-    @staticmethod
-    def sparse(size: int, __indices: bytes, __values: bytes) -> SparseVector: ...
-    @overload
-    @staticmethod
-    def sparse(size: int, __indices: Iterable[int], __values: Iterable[float]) -> SparseVector: ...
-    @overload
-    @staticmethod
-    def sparse(size: int, __pairs: Iterable[Tuple[int, float]]) -> SparseVector: ...
-    @overload
-    @staticmethod
-    def sparse(size: int, __map: Dict[int, float]) -> SparseVector: ...
-    @overload
-    @staticmethod
-    def dense(*elements: float) -> DenseVector: ...
-    @overload
-    @staticmethod
-    def dense(__arr: bytes) -> DenseVector: ...
-    @overload
-    @staticmethod
-    def dense(__arr: Iterable[float]) -> DenseVector: ...
-    @staticmethod
-    def fromML(vec: newlinalg.DenseVector) -> DenseVector: ...
-    @staticmethod
-    def stringify(vector: Vector) -> str: ...
-    @staticmethod
-    def squared_distance(v1: Vector, v2: Vector) -> float64: ...
-    @staticmethod
-    def norm(vector: Vector, p: Union[float, str]) -> float64: ...
-    @staticmethod
-    def parse(s: str) -> Vector: ...
-    @staticmethod
-    def zeros(size: int) -> DenseVector: ...
-
-class Matrix:
-    __UDT__: MatrixUDT
-    numRows: int
-    numCols: int
-    isTransposed: bool
-    def __init__(self, numRows: int, numCols: int, isTransposed: bool = ...) -> None: ...
-    def toArray(self) -> ndarray: ...
-    def asML(self) -> newlinalg.Matrix: ...
-
-class DenseMatrix(Matrix):
-    values: Any
-    @overload
-    def __init__(
-        self, numRows: int, numCols: int, values: bytes, isTransposed: bool = ...
-    ) -> None: ...
-    @overload
-    def __init__(
-        self,
-        numRows: int,
-        numCols: int,
-        values: Iterable[float],
-        isTransposed: bool = ...,
-    ) -> None: ...
-    def __reduce__(self) -> Tuple[Type[DenseMatrix], Tuple[int, int, bytes, int]]: ...
-    def toArray(self) -> ndarray: ...
-    def toSparse(self) -> SparseMatrix: ...
-    def asML(self) -> newlinalg.DenseMatrix: ...
-    def __getitem__(self, indices: Tuple[int, int]) -> float64: ...
-    def __eq__(self, other: Any) -> bool: ...
-
-class SparseMatrix(Matrix):
-    colPtrs: ndarray
-    rowIndices: ndarray
-    values: ndarray
-    @overload
-    def __init__(
-        self,
-        numRows: int,
-        numCols: int,
-        colPtrs: bytes,
-        rowIndices: bytes,
-        values: bytes,
-        isTransposed: bool = ...,
-    ) -> None: ...
-    @overload
-    def __init__(
-        self,
-        numRows: int,
-        numCols: int,
-        colPtrs: Iterable[int],
-        rowIndices: Iterable[int],
-        values: Iterable[float],
-        isTransposed: bool = ...,
-    ) -> None: ...
-    def __reduce__(
-        self,
-    ) -> Tuple[Type[SparseMatrix], Tuple[int, int, bytes, bytes, bytes, int]]: ...
-    def __getitem__(self, indices: Tuple[int, int]) -> float64: ...
-    def toArray(self) -> ndarray: ...
-    def toDense(self) -> DenseMatrix: ...
-    def asML(self) -> newlinalg.SparseMatrix: ...
-    def __eq__(self, other: Any) -> bool: ...
-
-class Matrices:
-    @overload
-    @staticmethod
-    def dense(
-        numRows: int, numCols: int, values: bytes, isTransposed: bool = ...
-    ) -> DenseMatrix: ...
-    @overload
-    @staticmethod
-    def dense(
-        numRows: int, numCols: int, values: Iterable[float], isTransposed: bool = ...
-    ) -> DenseMatrix: ...
-    @overload
-    @staticmethod
-    def sparse(
-        numRows: int,
-        numCols: int,
-        colPtrs: bytes,
-        rowIndices: bytes,
-        values: bytes,
-        isTransposed: bool = ...,
-    ) -> SparseMatrix: ...
-    @overload
-    @staticmethod
-    def sparse(
-        numRows: int,
-        numCols: int,
-        colPtrs: Iterable[int],
-        rowIndices: Iterable[int],
-        values: Iterable[float],
-        isTransposed: bool = ...,
-    ) -> SparseMatrix: ...
-    @staticmethod
-    def fromML(mat: newlinalg.Matrix) -> Matrix: ...
-
-class QRDecomposition(Generic[QT, RT]):
-    def __init__(self, Q: QT, R: RT) -> None: ...
-    @property
-    def Q(self) -> QT: ...
-    @property
-    def R(self) -> RT: ...

From 034850866c957604f908b5a9952b2a1f18a03a96 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Fri, 14 Jan 2022 20:50:33 +0100
Subject: [PATCH 025/513] [SPARK-37902][PYTHON] Resolve typing issues detected
 by mypy==0.931

### What changes were proposed in this pull request?

This PR resolves the following typing issue detected by `mypy==0.931`:

```
python/pyspark/pandas/base.py:879: error: "Sequence[Any]" has no attribute "tolist"  [attr-defined]
python/pyspark/sql/tests/test_pandas_udf_typehints_with_future_annotations.py:268: error: Incompatible return value type (got "floating[Any]", expected "float")  [return-value]
python/pyspark/sql/tests/test_pandas_udf_typehints.py:265: error: Incompatible return value type (got "floating[Any]", expected "float")  [return-value]
```

### Why are the changes needed?

To enable smooth migration to newer mypy versions.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing unit tests and `dev/lint-python`.

Closes #35199 from zero323/SPARK-37902.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/pandas/base.py                                 | 4 +++-
 python/pyspark/sql/tests/test_pandas_udf_typehints.py         | 2 +-
 .../test_pandas_udf_typehints_with_future_annotations.py      | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py
index 5cb60ca9ffac3..d7de9b1774a7c 100644
--- a/python/pyspark/pandas/base.py
+++ b/python/pyspark/pandas/base.py
@@ -876,7 +876,9 @@ def isin(self: IndexOpsLike, values: Sequence[Any]) -> IndexOpsLike:
                 " to isin(), you passed a [{values_type}]".format(values_type=type(values).__name__)
             )
 
-        values = values.tolist() if isinstance(values, np.ndarray) else list(values)
+        values = (
+            cast(np.ndarray, values).tolist() if isinstance(values, np.ndarray) else list(values)
+        )
 
         other = [SF.lit(v) for v in values]
         scol = self.spark.column.isin(other)
diff --git a/python/pyspark/sql/tests/test_pandas_udf_typehints.py b/python/pyspark/sql/tests/test_pandas_udf_typehints.py
index 661dd7b38479f..44315c95614b8 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_typehints.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_typehints.py
@@ -261,7 +261,7 @@ def plus_one(itr: Iterator[pd.Series]) -> Iterator[pd.Series]:
     def test_group_agg_udf_type_hint(self):
         df = self.spark.range(10).selectExpr("id", "id as v")
 
-        def weighted_mean(v: pd.Series, w: pd.Series) -> float:
+        def weighted_mean(v: pd.Series, w: pd.Series) -> np.float64:
             return np.average(v, weights=w)
 
         weighted_mean = pandas_udf("double")(weighted_mean)
diff --git a/python/pyspark/sql/tests/test_pandas_udf_typehints_with_future_annotations.py b/python/pyspark/sql/tests/test_pandas_udf_typehints_with_future_annotations.py
index 1dfec04495893..832086cb9ec8f 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_typehints_with_future_annotations.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_typehints_with_future_annotations.py
@@ -264,7 +264,7 @@ def plus_one(itr: Iterator[pd.Series]) -> Iterator[pd.Series]:
     def test_group_agg_udf_type_hint(self):
         df = self.spark.range(10).selectExpr("id", "id as v")
 
-        def weighted_mean(v: pd.Series, w: pd.Series) -> float:
+        def weighted_mean(v: pd.Series, w: pd.Series) -> np.float64:
             return np.average(v, weights=w)
 
         weighted_mean = pandas_udf("double")(weighted_mean)

From 747dcd6fa2dd0dc0d4eef18aad4871bbbf54a50a Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Sat, 15 Jan 2022 09:29:49 +0900
Subject: [PATCH 026/513] [SPARK-37909] Replace global F403 exclude with
 file-specific rules

### What changes were proposed in this pull request?

This PR rolls back global exclude on F403 introduced in SPARK-37909.

Instead, we can use file-specific on major offenders (test files).

### Why are the changes needed?

Based on [CI behavior](https://github.com/apache/spark/pull/35199#discussion_r784526473) and local test runs, it seems like it takes precedence over F401 and silences unused import errors.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #35210 from zero323/SPARK-37909.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 dev/tox.ini                                        | 13 ++++++++++---
 python/pyspark/ml/param/_shared_params_code_gen.py |  2 +-
 python/pyspark/ml/param/shared.py                  |  2 +-
 python/pyspark/pandas/__init__.py                  |  2 +-
 python/pyspark/pandas/plot/__init__.py             |  2 +-
 python/pyspark/pandas/typedef/__init__.py          |  2 +-
 6 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/dev/tox.ini b/dev/tox.ini
index df4dfce5dcaa0..fbe44573a96b1 100644
--- a/dev/tox.ini
+++ b/dev/tox.ini
@@ -18,7 +18,6 @@
 ignore =
     E203, # Skip as black formatter adds a whitespace around ':'.
     E402, # Module top level import is disabled for optional import check, etc.
-    F403, # Using wildcard discouraged but F401 can detect. Disabled to reduce the usage of noqa. 
     # 1. Type hints with def are treated as redefinition (e.g., functions.log).
     # 2. Some are used for testing.
     F811,
@@ -29,10 +28,18 @@ ignore =
     # Below rules should be enabled in the future.
     E731,
 per-file-ignores =
-    # F405 and E501 are ignored as shared.py is auto-generated.
-    python/pyspark/ml/param/shared.py: F405 E501,
+    # E501 is ignored as shared.py is auto-generated.
+    python/pyspark/ml/param/shared.py: E501,
     # Examples contain some unused variables.
     examples/src/main/python/sql/datasource.py: F841,
+    # Exclude * imports in test files
+    python/pyspark/ml/tests/*.py: F403,
+    python/pyspark/mllib/tests/*.py: F403,
+    python/pyspark/pandas/tests/*.py: F401 F403,
+    python/pyspark/resource/tests/*.py: F403,
+    python/pyspark/sql/tests/*.py: F403,
+    python/pyspark/streaming/tests/*.py: F403,
+    python/pyspark/tests/*.py: F403
 exclude =
     */target/*,
     docs/.local_ruby_bundle/,
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index 82c752187f9ca..5df1782084a81 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -102,7 +102,7 @@ def get{name[0].upper()}{name[1:]}(self) -> {paramType}:
     print(header)
     print("\n# DO NOT MODIFY THIS FILE! It was generated by _shared_params_code_gen.py.\n")
     print("from typing import List\n")
-    print("from pyspark.ml.param import *\n\n")
+    print("from pyspark.ml.param import Param, Params, TypeConverters\n\n")
     shared = [
         (
             "maxIter",
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index f24d1796a72db..fcfced2e566df 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -19,7 +19,7 @@
 
 from typing import List
 
-from pyspark.ml.param import *
+from pyspark.ml.param import Param, Params, TypeConverters
 
 
 class HasMaxIter(Params):
diff --git a/python/pyspark/pandas/__init__.py b/python/pyspark/pandas/__init__.py
index cb503a23d3444..df84c118db5a6 100644
--- a/python/pyspark/pandas/__init__.py
+++ b/python/pyspark/pandas/__init__.py
@@ -149,5 +149,5 @@ def _auto_patch_pandas() -> None:
 
 # Import after the usage logger is attached.
 from pyspark.pandas.config import get_option, options, option_context, reset_option, set_option
-from pyspark.pandas.namespace import *  # F405
+from pyspark.pandas.namespace import *  # noqa: F403
 from pyspark.pandas.sql_formatter import sql
diff --git a/python/pyspark/pandas/plot/__init__.py b/python/pyspark/pandas/plot/__init__.py
index 8b3376e7b214f..d00e002266ebd 100644
--- a/python/pyspark/pandas/plot/__init__.py
+++ b/python/pyspark/pandas/plot/__init__.py
@@ -14,4 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from pyspark.pandas.plot.core import *  # noqa: F401
+from pyspark.pandas.plot.core import *  # noqa: F401,F403
diff --git a/python/pyspark/pandas/typedef/__init__.py b/python/pyspark/pandas/typedef/__init__.py
index 5f7ea2834a52a..49490674d7291 100644
--- a/python/pyspark/pandas/typedef/__init__.py
+++ b/python/pyspark/pandas/typedef/__init__.py
@@ -15,4 +15,4 @@
 # limitations under the License.
 #
 
-from pyspark.pandas.typedef.typehints import *  # noqa: F401,F405
+from pyspark.pandas.typedef.typehints import *  # noqa: F401,F403,F405

From 29498656656779c5689e73dee11e73ac7190b205 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Sat, 15 Jan 2022 09:43:48 +0900
Subject: [PATCH 027/513] [SPARK-37886][PYTHON][TESTS] Use ComparisonTestBase
 in pandas test

### What changes were proposed in this pull request?
Use `ComparisonTestBase` as base class instead of `PandasOnSparkTestCase` with self.psdf in pandas test

https://github.com/apache/spark/blob/a70006d9a7b578721d152d0f89d1a894de38c25d/python/pyspark/testing/pandasutils.py#L265-L272

### Why are the changes needed?
We have many testcase are using same logic to covert `pdf` to `psdf`, we can use ComparisonTestBase as base class to reduce redundant.

### Does this PR introduce _any_ user-facing change?
NO, test only

### How was this patch tested?
UT passed.

Closes #35183 from Yikun/SPARK-37886.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../pandas/tests/data_type_ops/test_categorical_ops.py    | 4 ----
 python/pyspark/pandas/tests/indexes/test_base.py          | 8 ++------
 python/pyspark/pandas/tests/test_categorical.py           | 8 ++------
 python/pyspark/pandas/tests/test_dataframe.py             | 8 ++------
 python/pyspark/pandas/tests/test_dataframe_conversion.py  | 8 ++------
 python/pyspark/pandas/tests/test_extension.py             | 8 ++------
 python/pyspark/pandas/tests/test_indexing.py              | 8 ++------
 python/pyspark/pandas/tests/test_numpy_compat.py          | 8 ++------
 8 files changed, 14 insertions(+), 46 deletions(-)

diff --git a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
index 0aa2e108d799a..e07af724f6905 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
@@ -54,10 +54,6 @@ def pdf(self):
             }
         )
 
-    @property
-    def psdf(self):
-        return ps.from_pandas(self.pdf)
-
     @property
     def pser(self):
         return pd.Series([1, 2, 3], dtype="category")
diff --git a/python/pyspark/pandas/tests/indexes/test_base.py b/python/pyspark/pandas/tests/indexes/test_base.py
index 88c826eea786b..dc1f26dfc4588 100644
--- a/python/pyspark/pandas/tests/indexes/test_base.py
+++ b/python/pyspark/pandas/tests/indexes/test_base.py
@@ -31,10 +31,10 @@
     MissingPandasLikeMultiIndex,
     MissingPandasLikeTimedeltaIndex,
 )
-from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils, SPARK_CONF_ARROW_ENABLED
+from pyspark.testing.pandasutils import ComparisonTestBase, TestUtils, SPARK_CONF_ARROW_ENABLED
 
 
-class IndexesTest(PandasOnSparkTestCase, TestUtils):
+class IndexesTest(ComparisonTestBase, TestUtils):
     @property
     def pdf(self):
         return pd.DataFrame(
@@ -42,10 +42,6 @@ def pdf(self):
             index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
         )
 
-    @property
-    def psdf(self):
-        return ps.from_pandas(self.pdf)
-
     def test_index_basic(self):
         for pdf in [
             pd.DataFrame(np.random.randn(10, 5), index=np.random.randint(100, size=10)),
diff --git a/python/pyspark/pandas/tests/test_categorical.py b/python/pyspark/pandas/tests/test_categorical.py
index de412bb7ff48f..a4746cdda148e 100644
--- a/python/pyspark/pandas/tests/test_categorical.py
+++ b/python/pyspark/pandas/tests/test_categorical.py
@@ -22,10 +22,10 @@
 from pandas.api.types import CategoricalDtype
 
 import pyspark.pandas as ps
-from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
+from pyspark.testing.pandasutils import ComparisonTestBase, TestUtils
 
 
-class CategoricalTest(PandasOnSparkTestCase, TestUtils):
+class CategoricalTest(ComparisonTestBase, TestUtils):
     @property
     def pdf(self):
         return pd.DataFrame(
@@ -37,10 +37,6 @@ def pdf(self):
             },
         )
 
-    @property
-    def psdf(self):
-        return ps.from_pandas(self.pdf)
-
     @property
     def df_pair(self):
         return self.pdf, self.psdf
diff --git a/python/pyspark/pandas/tests/test_dataframe.py b/python/pyspark/pandas/tests/test_dataframe.py
index 0bf9291f1d2ed..ab1edbe4b2c25 100644
--- a/python/pyspark/pandas/tests/test_dataframe.py
+++ b/python/pyspark/pandas/tests/test_dataframe.py
@@ -43,7 +43,7 @@
 )
 from pyspark.testing.pandasutils import (
     have_tabulate,
-    PandasOnSparkTestCase,
+    ComparisonTestBase,
     SPARK_CONF_ARROW_ENABLED,
     tabulate_requirement_message,
 )
@@ -51,7 +51,7 @@
 from pyspark.pandas.utils import name_like_string
 
 
-class DataFrameTest(PandasOnSparkTestCase, SQLTestUtils):
+class DataFrameTest(ComparisonTestBase, SQLTestUtils):
     @property
     def pdf(self):
         return pd.DataFrame(
@@ -59,10 +59,6 @@ def pdf(self):
             index=np.random.rand(9),
         )
 
-    @property
-    def psdf(self):
-        return ps.from_pandas(self.pdf)
-
     @property
     def df_pair(self):
         pdf = self.pdf
diff --git a/python/pyspark/pandas/tests/test_dataframe_conversion.py b/python/pyspark/pandas/tests/test_dataframe_conversion.py
index 2cc2f15e1ae08..123dd14324c13 100644
--- a/python/pyspark/pandas/tests/test_dataframe_conversion.py
+++ b/python/pyspark/pandas/tests/test_dataframe_conversion.py
@@ -25,11 +25,11 @@
 import pandas as pd
 
 from pyspark import pandas as ps
-from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
+from pyspark.testing.pandasutils import ComparisonTestBase, TestUtils
 from pyspark.testing.sqlutils import SQLTestUtils
 
 
-class DataFrameConversionTest(PandasOnSparkTestCase, SQLTestUtils, TestUtils):
+class DataFrameConversionTest(ComparisonTestBase, SQLTestUtils, TestUtils):
     """Test cases for "small data" conversion and I/O."""
 
     def setUp(self):
@@ -42,10 +42,6 @@ def tearDown(self):
     def pdf(self):
         return pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=[0, 1, 3])
 
-    @property
-    def psdf(self):
-        return ps.from_pandas(self.pdf)
-
     @staticmethod
     def strip_all_whitespace(str):
         """A helper function to remove all whitespace from a string."""
diff --git a/python/pyspark/pandas/tests/test_extension.py b/python/pyspark/pandas/tests/test_extension.py
index fb5f9bbc8eed3..dd2d08dded058 100644
--- a/python/pyspark/pandas/tests/test_extension.py
+++ b/python/pyspark/pandas/tests/test_extension.py
@@ -21,7 +21,7 @@
 import pandas as pd
 
 from pyspark import pandas as ps
-from pyspark.testing.pandasutils import assert_produces_warning, PandasOnSparkTestCase
+from pyspark.testing.pandasutils import assert_produces_warning, ComparisonTestBase
 from pyspark.pandas.extensions import (
     register_dataframe_accessor,
     register_series_accessor,
@@ -66,7 +66,7 @@ def check_length(self, col=None):
                 raise ValueError(str(e))
 
 
-class ExtensionTest(PandasOnSparkTestCase):
+class ExtensionTest(ComparisonTestBase):
     @property
     def pdf(self):
         return pd.DataFrame(
@@ -74,10 +74,6 @@ def pdf(self):
             index=np.random.rand(9),
         )
 
-    @property
-    def psdf(self):
-        return ps.from_pandas(self.pdf)
-
     @property
     def accessor(self):
         return CustomAccessor(self.psdf)
diff --git a/python/pyspark/pandas/tests/test_indexing.py b/python/pyspark/pandas/tests/test_indexing.py
index 0b76e9ea12912..fcce93aaafba3 100644
--- a/python/pyspark/pandas/tests/test_indexing.py
+++ b/python/pyspark/pandas/tests/test_indexing.py
@@ -24,7 +24,7 @@
 
 from pyspark import pandas as ps
 from pyspark.pandas.exceptions import SparkPandasIndexingError
-from pyspark.testing.pandasutils import ComparisonTestBase, PandasOnSparkTestCase, compare_both
+from pyspark.testing.pandasutils import ComparisonTestBase, compare_both
 
 
 class BasicIndexingTest(ComparisonTestBase):
@@ -153,7 +153,7 @@ def test_limitations(self):
         )
 
 
-class IndexingTest(PandasOnSparkTestCase):
+class IndexingTest(ComparisonTestBase):
     @property
     def pdf(self):
         return pd.DataFrame(
@@ -161,10 +161,6 @@ def pdf(self):
             index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
         )
 
-    @property
-    def psdf(self):
-        return ps.from_pandas(self.pdf)
-
     @property
     def pdf2(self):
         return pd.DataFrame(
diff --git a/python/pyspark/pandas/tests/test_numpy_compat.py b/python/pyspark/pandas/tests/test_numpy_compat.py
index 0d6a8fb682579..c6b6e5dba9201 100644
--- a/python/pyspark/pandas/tests/test_numpy_compat.py
+++ b/python/pyspark/pandas/tests/test_numpy_compat.py
@@ -21,11 +21,11 @@
 from pyspark import pandas as ps
 from pyspark.pandas import set_option, reset_option
 from pyspark.pandas.numpy_compat import unary_np_spark_mappings, binary_np_spark_mappings
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.testing.pandasutils import ComparisonTestBase
 from pyspark.testing.sqlutils import SQLTestUtils
 
 
-class NumPyCompatTest(PandasOnSparkTestCase, SQLTestUtils):
+class NumPyCompatTest(ComparisonTestBase, SQLTestUtils):
     blacklist = [
         # Koalas does not currently support
         "conj",
@@ -55,10 +55,6 @@ def pdf(self):
             index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
         )
 
-    @property
-    def psdf(self):
-        return ps.from_pandas(self.pdf)
-
     def test_np_add_series(self):
         psdf = self.psdf
         pdf = self.pdf

From c7c51bcab5cb067d36bccf789e0e4ad7f37ffb7c Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Sat, 15 Jan 2022 08:54:16 -0600
Subject: [PATCH 028/513] [SPARK-37854][CORE] Replace type check with pattern
 matching in Spark code

### What changes were proposed in this pull request?

There are many method use `isInstanceOf  + asInstanceOf` for type conversion in Spark code now, the main change of this pr is replace `type check` with `pattern matching` for code simplification.

### Why are the changes needed?
Code simplification

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35154 from LuciferYang/SPARK-37854.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../scala/org/apache/spark/TestUtils.scala    | 36 ++++++-----
 .../scala/org/apache/spark/api/r/SerDe.scala  | 12 ++--
 .../spark/internal/config/ConfigBuilder.scala | 18 +++---
 .../org/apache/spark/rdd/HadoopRDD.scala      | 64 ++++++++++---------
 .../scala/org/apache/spark/rdd/PipedRDD.scala |  7 +-
 .../main/scala/org/apache/spark/rdd/RDD.scala |  8 ++-
 .../scala/org/apache/spark/util/Utils.scala   | 38 +++++------
 .../ShuffleBlockFetcherIteratorSuite.scala    | 10 +--
 .../apache/spark/util/FileAppenderSuite.scala | 17 ++---
 .../org/apache/spark/util/UtilsSuite.scala    | 19 +++---
 .../spark/examples/mllib/LDAExample.scala     | 11 ++--
 .../mllib/api/python/PythonMLLibAPI.scala     | 12 ++--
 .../expressions/aggregate/Percentile.scala    | 14 ++--
 .../spark/sql/catalyst/trees/TreeNode.scala   |  7 +-
 .../catalyst/encoders/RowEncoderSuite.scala   | 11 ++--
 .../execution/columnar/ColumnAccessor.scala   | 10 +--
 .../sql/execution/columnar/ColumnType.scala   | 50 ++++++++-------
 .../execution/datasources/FileScanRDD.scala   | 19 +++---
 .../org/apache/spark/sql/jdbc/H2Dialect.scala | 30 +++++----
 .../sql/SparkSessionExtensionSuite.scala      | 57 ++++++++---------
 .../execution/joins/BroadcastJoinSuite.scala  | 13 ++--
 .../spark/sql/streaming/StreamTest.scala      |  6 +-
 .../hive/client/IsolatedClientLoader.scala    | 12 ++--
 .../streaming/scheduler/JobGenerator.scala    | 10 +--
 .../spark/streaming/util/StateMap.scala       | 21 +++---
 25 files changed, 263 insertions(+), 249 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
index 20159afc51a6c..d2af95554ffab 100644
--- a/core/src/main/scala/org/apache/spark/TestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -337,22 +337,26 @@ private[spark] object TestUtils {
     connection.setRequestMethod(method)
     headers.foreach { case (k, v) => connection.setRequestProperty(k, v) }
 
-    // Disable cert and host name validation for HTTPS tests.
-    if (connection.isInstanceOf[HttpsURLConnection]) {
-      val sslCtx = SSLContext.getInstance("SSL")
-      val trustManager = new X509TrustManager {
-        override def getAcceptedIssuers(): Array[X509Certificate] = null
-        override def checkClientTrusted(x509Certificates: Array[X509Certificate],
-            s: String): Unit = {}
-        override def checkServerTrusted(x509Certificates: Array[X509Certificate],
-            s: String): Unit = {}
-      }
-      val verifier = new HostnameVerifier() {
-        override def verify(hostname: String, session: SSLSession): Boolean = true
-      }
-      sslCtx.init(null, Array(trustManager), new SecureRandom())
-      connection.asInstanceOf[HttpsURLConnection].setSSLSocketFactory(sslCtx.getSocketFactory())
-      connection.asInstanceOf[HttpsURLConnection].setHostnameVerifier(verifier)
+    connection match {
+      // Disable cert and host name validation for HTTPS tests.
+      case httpConnection: HttpsURLConnection =>
+        val sslCtx = SSLContext.getInstance("SSL")
+        val trustManager = new X509TrustManager {
+          override def getAcceptedIssuers: Array[X509Certificate] = null
+
+          override def checkClientTrusted(x509Certificates: Array[X509Certificate],
+              s: String): Unit = {}
+
+          override def checkServerTrusted(x509Certificates: Array[X509Certificate],
+              s: String): Unit = {}
+        }
+        val verifier = new HostnameVerifier() {
+          override def verify(hostname: String, session: SSLSession): Boolean = true
+        }
+        sslCtx.init(null, Array(trustManager), new SecureRandom())
+        httpConnection.setSSLSocketFactory(sslCtx.getSocketFactory)
+        httpConnection.setHostnameVerifier(verifier)
+      case _ => // do nothing
     }
 
     try {
diff --git a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
index 917203831404f..f9f8c56eb86c4 100644
--- a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
@@ -22,7 +22,7 @@ import java.nio.charset.StandardCharsets
 import java.sql.{Date, Time, Timestamp}
 
 import scala.collection.JavaConverters._
-import scala.collection.mutable.WrappedArray
+import scala.collection.mutable
 
 /**
  * Utility functions to serialize, deserialize objects to / from R
@@ -303,12 +303,10 @@ private[spark] object SerDe {
       // Convert ArrayType collected from DataFrame to Java array
       // Collected data of ArrayType from a DataFrame is observed to be of
       // type "scala.collection.mutable.WrappedArray"
-      val value =
-        if (obj.isInstanceOf[WrappedArray[_]]) {
-          obj.asInstanceOf[WrappedArray[_]].toArray
-        } else {
-          obj
-        }
+      val value = obj match {
+        case wa: mutable.WrappedArray[_] => wa.array
+        case other => other
+      }
 
       value match {
         case v: java.lang.Character =>
diff --git a/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala b/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala
index 38e057b16dcc5..e3190269a5349 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala
@@ -140,15 +140,15 @@ private[spark] class TypedConfigBuilder[T](
   def createWithDefault(default: T): ConfigEntry[T] = {
     // Treat "String" as a special case, so that both createWithDefault and createWithDefaultString
     // behave the same w.r.t. variable expansion of default values.
-    if (default.isInstanceOf[String]) {
-      createWithDefaultString(default.asInstanceOf[String])
-    } else {
-      val transformedDefault = converter(stringConverter(default))
-      val entry = new ConfigEntryWithDefault[T](parent.key, parent._prependedKey,
-        parent._prependSeparator, parent._alternatives, transformedDefault, converter,
-        stringConverter, parent._doc, parent._public, parent._version)
-      parent._onCreate.foreach(_(entry))
-      entry
+    default match {
+      case str: String => createWithDefaultString(str)
+      case _ =>
+        val transformedDefault = converter(stringConverter(default))
+        val entry = new ConfigEntryWithDefault[T](parent.key, parent._prependedKey,
+          parent._prependSeparator, parent._alternatives, transformedDefault, converter,
+          stringConverter, parent._doc, parent._public, parent._version)
+        parent._onCreate.foreach(_ (entry))
+        entry
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index 701145107482e..fcc2275585e83 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -61,14 +61,14 @@ private[spark] class HadoopPartition(rddId: Int, override val index: Int, s: Inp
    * @return a Map with the environment variables and corresponding values, it could be empty
    */
   def getPipeEnvVars(): Map[String, String] = {
-    val envVars: Map[String, String] = if (inputSplit.value.isInstanceOf[FileSplit]) {
-      val is: FileSplit = inputSplit.value.asInstanceOf[FileSplit]
-      // map_input_file is deprecated in favor of mapreduce_map_input_file but set both
-      // since it's not removed yet
-      Map("map_input_file" -> is.getPath().toString(),
-        "mapreduce_map_input_file" -> is.getPath().toString())
-    } else {
-      Map()
+    val envVars: Map[String, String] = inputSplit.value match {
+      case is: FileSplit =>
+        // map_input_file is deprecated in favor of mapreduce_map_input_file but set both
+        // since it's not removed yet
+        Map("map_input_file" -> is.getPath().toString(),
+          "mapreduce_map_input_file" -> is.getPath().toString())
+      case _ =>
+        Map()
     }
     envVars
   }
@@ -161,29 +161,31 @@ class HadoopRDD[K, V](
         newJobConf
       }
     } else {
-      if (conf.isInstanceOf[JobConf]) {
-        logDebug("Re-using user-broadcasted JobConf")
-        conf.asInstanceOf[JobConf]
-      } else {
-        Option(HadoopRDD.getCachedMetadata(jobConfCacheKey))
-          .map { conf =>
-            logDebug("Re-using cached JobConf")
-            conf.asInstanceOf[JobConf]
-          }
-          .getOrElse {
-            // Create a JobConf that will be cached and used across this RDD's getJobConf() calls in
-            // the local process. The local cache is accessed through HadoopRDD.putCachedMetadata().
-            // The caching helps minimize GC, since a JobConf can contain ~10KB of temporary
-            // objects. Synchronize to prevent ConcurrentModificationException (SPARK-1097,
-            // HADOOP-10456).
-            HadoopRDD.CONFIGURATION_INSTANTIATION_LOCK.synchronized {
-              logDebug("Creating new JobConf and caching it for later re-use")
-              val newJobConf = new JobConf(conf)
-              initLocalJobConfFuncOpt.foreach(f => f(newJobConf))
-              HadoopRDD.putCachedMetadata(jobConfCacheKey, newJobConf)
-              newJobConf
-          }
-        }
+      conf match {
+        case jobConf: JobConf =>
+          logDebug("Re-using user-broadcasted JobConf")
+          jobConf
+        case _ =>
+          Option(HadoopRDD.getCachedMetadata(jobConfCacheKey))
+            .map { conf =>
+              logDebug("Re-using cached JobConf")
+              conf.asInstanceOf[JobConf]
+            }
+            .getOrElse {
+              // Create a JobConf that will be cached and used across this RDD's getJobConf()
+              // calls in the local process. The local cache is accessed through
+              // HadoopRDD.putCachedMetadata().
+              // The caching helps minimize GC, since a JobConf can contain ~10KB of temporary
+              // objects. Synchronize to prevent ConcurrentModificationException (SPARK-1097,
+              // HADOOP-10456).
+              HadoopRDD.CONFIGURATION_INSTANTIATION_LOCK.synchronized {
+                logDebug("Creating new JobConf and caching it for later re-use")
+                val newJobConf = new JobConf(conf)
+                initLocalJobConfFuncOpt.foreach(f => f(newJobConf))
+                HadoopRDD.putCachedMetadata(jobConfCacheKey, newJobConf)
+                newJobConf
+              }
+            }
       }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
index 285da043c0b9a..7e121e9a7ef2c 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
@@ -72,9 +72,10 @@ private[spark] class PipedRDD[T: ClassTag](
 
     // for compatibility with Hadoop which sets these env variables
     // so the user code can access the input filename
-    if (split.isInstanceOf[HadoopPartition]) {
-      val hadoopSplit = split.asInstanceOf[HadoopPartition]
-      currentEnvVars.putAll(hadoopSplit.getPipeEnvVars().asJava)
+    split match {
+      case hadoopSplit: HadoopPartition =>
+        currentEnvVars.putAll(hadoopSplit.getPipeEnvVars().asJava)
+      case _ => // do nothing
     }
 
     // When spark.worker.separated.working.directory option is turned on, each
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 4c39d178d38e8..71885664513ac 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1764,9 +1764,11 @@ abstract class RDD[T: ClassTag](
        * Clean the shuffles & all of its parents.
        */
       def cleanEagerly(dep: Dependency[_]): Unit = {
-        if (dep.isInstanceOf[ShuffleDependency[_, _, _]]) {
-          val shuffleId = dep.asInstanceOf[ShuffleDependency[_, _, _]].shuffleId
-          cleaner.doCleanupShuffle(shuffleId, blocking)
+        dep match {
+          case dependency: ShuffleDependency[_, _, _] =>
+            val shuffleId = dependency.shuffleId
+            cleaner.doCleanupShuffle(shuffleId, blocking)
+          case _ => // do nothing
         }
         val rdd = dep.rdd
         val rddDepsOpt = rdd.internalDependencies
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 8f3d1de33cef3..a9d6180d2fd7b 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -355,26 +355,26 @@ private[spark] object Utils extends Logging {
       closeStreams: Boolean = false,
       transferToEnabled: Boolean = false): Long = {
     tryWithSafeFinally {
-      if (in.isInstanceOf[FileInputStream] && out.isInstanceOf[FileOutputStream]
-        && transferToEnabled) {
-        // When both streams are File stream, use transferTo to improve copy performance.
-        val inChannel = in.asInstanceOf[FileInputStream].getChannel()
-        val outChannel = out.asInstanceOf[FileOutputStream].getChannel()
-        val size = inChannel.size()
-        copyFileStreamNIO(inChannel, outChannel, 0, size)
-        size
-      } else {
-        var count = 0L
-        val buf = new Array[Byte](8192)
-        var n = 0
-        while (n != -1) {
-          n = in.read(buf)
-          if (n != -1) {
-            out.write(buf, 0, n)
-            count += n
+      (in, out) match {
+        case (input: FileInputStream, output: FileOutputStream) if transferToEnabled =>
+          // When both streams are File stream, use transferTo to improve copy performance.
+          val inChannel = input.getChannel
+          val outChannel = output.getChannel
+          val size = inChannel.size()
+          copyFileStreamNIO(inChannel, outChannel, 0, size)
+          size
+        case (input, output) =>
+          var count = 0L
+          val buf = new Array[Byte](8192)
+          var n = 0
+          while (n != -1) {
+            n = input.read(buf)
+            if (n != -1) {
+              output.write(buf, 0, n)
+              count += n
+            }
           }
-        }
-        count
+          count
       }
     } {
       if (closeStreams) {
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index afb9a862b113c..56043ea901906 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -160,10 +160,12 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     verify(buffer, times(0)).release()
     val delegateAccess = PrivateMethod[InputStream](Symbol("delegate"))
     var in = wrappedInputStream.invokePrivate(delegateAccess())
-    if (in.isInstanceOf[CheckedInputStream]) {
-      val underlyingInputFiled = classOf[CheckedInputStream].getSuperclass.getDeclaredField("in")
-      underlyingInputFiled.setAccessible(true)
-      in = underlyingInputFiled.get(in.asInstanceOf[CheckedInputStream]).asInstanceOf[InputStream]
+    in match {
+      case stream: CheckedInputStream =>
+        val underlyingInputFiled = classOf[CheckedInputStream].getSuperclass.getDeclaredField("in")
+        underlyingInputFiled.setAccessible(true)
+        in = underlyingInputFiled.get(stream).asInstanceOf[InputStream]
+      case _ => // do nothing
     }
     verify(in, times(0)).close()
     wrappedInputStream.close()
diff --git a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
index 1a2eb6950c403..8ca4bc9a1527b 100644
--- a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
@@ -222,14 +222,15 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter with Logging {
       // assert(appender.getClass === classTag[ExpectedAppender].getClass)
       assert(appender.getClass.getSimpleName ===
         classTag[ExpectedAppender].runtimeClass.getSimpleName)
-      if (appender.isInstanceOf[RollingFileAppender]) {
-        val rollingPolicy = appender.asInstanceOf[RollingFileAppender].rollingPolicy
-        val policyParam = if (rollingPolicy.isInstanceOf[TimeBasedRollingPolicy]) {
-          rollingPolicy.asInstanceOf[TimeBasedRollingPolicy].rolloverIntervalMillis
-        } else {
-          rollingPolicy.asInstanceOf[SizeBasedRollingPolicy].rolloverSizeBytes
-        }
-        assert(policyParam === expectedRollingPolicyParam)
+      appender match {
+        case rfa: RollingFileAppender =>
+          val rollingPolicy = rfa.rollingPolicy
+          val policyParam = rollingPolicy match {
+            case timeBased: TimeBasedRollingPolicy => timeBased.rolloverIntervalMillis
+            case sizeBased: SizeBasedRollingPolicy => sizeBased.rolloverSizeBytes
+          }
+          assert(policyParam === expectedRollingPolicyParam)
+        case _ => // do nothing
       }
       testOutputStream.close()
       appender.awaitTermination()
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 6117decbf47eb..62cd819177663 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -227,15 +227,16 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
       try {
         // Get a handle on the buffered data, to make sure memory gets freed once we read past the
         // end of it. Need to use reflection to get handle on inner structures for this check
-        val byteBufferInputStream = if (mergedStream.isInstanceOf[ChunkedByteBufferInputStream]) {
-          assert(inputLength < limit)
-          mergedStream.asInstanceOf[ChunkedByteBufferInputStream]
-        } else {
-          assert(inputLength >= limit)
-          val sequenceStream = mergedStream.asInstanceOf[SequenceInputStream]
-          val fieldValue = getFieldValue(sequenceStream, "in")
-          assert(fieldValue.isInstanceOf[ChunkedByteBufferInputStream])
-          fieldValue.asInstanceOf[ChunkedByteBufferInputStream]
+        val byteBufferInputStream = mergedStream match {
+          case stream: ChunkedByteBufferInputStream =>
+            assert(inputLength < limit)
+            stream
+          case _ =>
+            assert(inputLength >= limit)
+            val sequenceStream = mergedStream.asInstanceOf[SequenceInputStream]
+            val fieldValue = getFieldValue(sequenceStream, "in")
+            assert(fieldValue.isInstanceOf[ChunkedByteBufferInputStream])
+            fieldValue.asInstanceOf[ChunkedByteBufferInputStream]
         }
         (0 until inputLength).foreach { idx =>
           assert(bytes(idx) === mergedStream.read().asInstanceOf[Byte])
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
index a3006a1fa2be0..afd529c2c4c13 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
@@ -158,11 +158,12 @@ object LDAExample {
     println(s"Finished training LDA model.  Summary:")
     println(s"\t Training time: $elapsed sec")
 
-    if (ldaModel.isInstanceOf[DistributedLDAModel]) {
-      val distLDAModel = ldaModel.asInstanceOf[DistributedLDAModel]
-      val avgLogLikelihood = distLDAModel.logLikelihood / actualCorpusSize.toDouble
-      println(s"\t Training data average log likelihood: $avgLogLikelihood")
-      println()
+    ldaModel match {
+      case distLDAModel: DistributedLDAModel =>
+        val avgLogLikelihood = distLDAModel.logLikelihood / actualCorpusSize.toDouble
+        println(s"\t Training data average log likelihood: $avgLogLikelihood")
+        println()
+      case _ => // do nothing
     }
 
     // Print the topics, showing the top-weighted terms for each topic.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 80707f0f95a75..56aaaa31a9ed4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -90,12 +90,12 @@ private[python] class PythonMLLibAPI extends Serializable {
       initialWeights: Vector): JList[Object] = {
     try {
       val model = learner.run(data.rdd.persist(StorageLevel.MEMORY_AND_DISK), initialWeights)
-      if (model.isInstanceOf[LogisticRegressionModel]) {
-        val lrModel = model.asInstanceOf[LogisticRegressionModel]
-        List(lrModel.weights, lrModel.intercept, lrModel.numFeatures, lrModel.numClasses)
-          .map(_.asInstanceOf[Object]).asJava
-      } else {
-        List(model.weights, model.intercept).map(_.asInstanceOf[Object]).asJava
+      model match {
+        case lrModel: LogisticRegressionModel =>
+          List(lrModel.weights, lrModel.intercept, lrModel.numFeatures, lrModel.numClasses)
+            .map(_.asInstanceOf[Object]).asJava
+        case _ =>
+          List(model.weights, model.intercept).map(_.asInstanceOf[Object]).asJava
       }
     } finally {
       data.rdd.unpersist()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala
index 7d3dd0ae1c52e..a98585e0ff1e7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala
@@ -198,14 +198,12 @@ case class Percentile(
       return Seq.empty
     }
 
-    val ordering =
-      if (child.dataType.isInstanceOf[NumericType]) {
-        child.dataType.asInstanceOf[NumericType].ordering
-      } else if (child.dataType.isInstanceOf[YearMonthIntervalType]) {
-        child.dataType.asInstanceOf[YearMonthIntervalType].ordering
-      } else if (child.dataType.isInstanceOf[DayTimeIntervalType]) {
-        child.dataType.asInstanceOf[DayTimeIntervalType].ordering
-      }
+    val ordering = child.dataType match {
+      case numericType: NumericType => numericType.ordering
+      case intervalType: YearMonthIntervalType => intervalType.ordering
+      case intervalType: DayTimeIntervalType => intervalType.ordering
+      case otherType => QueryExecutionErrors.unsupportedTypeError(otherType)
+    }
     val sortedCounts = buffer.toSeq.sortBy(_._1)(ordering.asInstanceOf[Ordering[AnyRef]])
     val accumulatedCounts = sortedCounts.scanLeft((sortedCounts.head._1, 0L)) {
       case ((key1, count1), (key2, count2)) => (key2, count1 + count2)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index f78bbbf6c7516..9e50be36a9f23 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -341,10 +341,9 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product with Tre
   // This is a temporary solution, we will change the type of children to IndexedSeq in a
   // followup PR
   private def asIndexedSeq(seq: Seq[BaseType]): IndexedSeq[BaseType] = {
-    if (seq.isInstanceOf[IndexedSeq[BaseType]]) {
-      seq.asInstanceOf[IndexedSeq[BaseType]]
-    } else {
-      seq.toIndexedSeq
+    seq match {
+      case types: IndexedSeq[BaseType] => types
+      case other => other.toIndexedSeq
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
index 1a427848fa11b..44b06d9b3471e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
@@ -31,12 +31,11 @@ import org.apache.spark.sql.types.DataTypeTestUtils.{dayTimeIntervalTypes, yearM
 class ExamplePoint(val x: Double, val y: Double) extends Serializable {
   override def hashCode: Int = 41 * (41 + x.toInt) + y.toInt
   override def equals(that: Any): Boolean = {
-    if (that.isInstanceOf[ExamplePoint]) {
-      val e = that.asInstanceOf[ExamplePoint]
-      (this.x == e.x || (this.x.isNaN && e.x.isNaN) || (this.x.isInfinity && e.x.isInfinity)) &&
-        (this.y == e.y || (this.y.isNaN && e.y.isNaN) || (this.y.isInfinity && e.y.isInfinity))
-    } else {
-      false
+    that match {
+      case e: ExamplePoint =>
+        (this.x == e.x || (this.x.isNaN && e.x.isNaN) || (this.x.isInfinity && e.x.isInfinity)) &&
+          (this.y == e.y || (this.y.isNaN && e.y.isNaN) || (this.y.isInfinity && e.y.isInfinity))
+      case _ => false
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
index 2f68e89d9c1f9..fa7140be7f326 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
@@ -158,11 +158,11 @@ private[sql] object ColumnAccessor {
 
   def decompress(columnAccessor: ColumnAccessor, columnVector: WritableColumnVector, numRows: Int):
       Unit = {
-    if (columnAccessor.isInstanceOf[NativeColumnAccessor[_]]) {
-      val nativeAccessor = columnAccessor.asInstanceOf[NativeColumnAccessor[_]]
-      nativeAccessor.decompress(columnVector, numRows)
-    } else {
-      throw QueryExecutionErrors.notSupportNonPrimitiveTypeError()
+    columnAccessor match {
+      case nativeAccessor: NativeColumnAccessor[_] =>
+        nativeAccessor.decompress(columnVector, numRows)
+      case _ =>
+        throw QueryExecutionErrors.notSupportNonPrimitiveTypeError()
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
index 419dcc6cdeca7..9b4c136273451 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
@@ -473,23 +473,25 @@ private[columnar] trait DirectCopyColumnType[JvmType] extends ColumnType[JvmType
 
   // copy the bytes from ByteBuffer to UnsafeRow
   override def extract(buffer: ByteBuffer, row: InternalRow, ordinal: Int): Unit = {
-    if (row.isInstanceOf[MutableUnsafeRow]) {
-      val numBytes = buffer.getInt
-      val cursor = buffer.position()
-      buffer.position(cursor + numBytes)
-      row.asInstanceOf[MutableUnsafeRow].writer.write(ordinal, buffer.array(),
-        buffer.arrayOffset() + cursor, numBytes)
-    } else {
-      setField(row, ordinal, extract(buffer))
+    row match {
+      case mutable: MutableUnsafeRow =>
+        val numBytes = buffer.getInt
+        val cursor = buffer.position()
+        buffer.position(cursor + numBytes)
+        mutable.writer.write(ordinal, buffer.array(),
+          buffer.arrayOffset() + cursor, numBytes)
+      case _ =>
+        setField(row, ordinal, extract(buffer))
     }
   }
 
   // copy the bytes from UnsafeRow to ByteBuffer
   override def append(row: InternalRow, ordinal: Int, buffer: ByteBuffer): Unit = {
-    if (row.isInstanceOf[UnsafeRow]) {
-      row.asInstanceOf[UnsafeRow].writeFieldTo(ordinal, buffer)
-    } else {
-      super.append(row, ordinal, buffer)
+    row match {
+      case unsafe: UnsafeRow =>
+        unsafe.writeFieldTo(ordinal, buffer)
+      case _ =>
+        super.append(row, ordinal, buffer)
     }
   }
 }
@@ -514,10 +516,11 @@ private[columnar] object STRING
   }
 
   override def setField(row: InternalRow, ordinal: Int, value: UTF8String): Unit = {
-    if (row.isInstanceOf[MutableUnsafeRow]) {
-      row.asInstanceOf[MutableUnsafeRow].writer.write(ordinal, value)
-    } else {
-      row.update(ordinal, value.clone())
+    row match {
+      case mutable: MutableUnsafeRow =>
+        mutable.writer.write(ordinal, value)
+      case _ =>
+        row.update(ordinal, value.clone())
     }
   }
 
@@ -792,13 +795,14 @@ private[columnar] object CALENDAR_INTERVAL extends ColumnType[CalendarInterval]
 
   // copy the bytes from ByteBuffer to UnsafeRow
   override def extract(buffer: ByteBuffer, row: InternalRow, ordinal: Int): Unit = {
-    if (row.isInstanceOf[MutableUnsafeRow]) {
-      val cursor = buffer.position()
-      buffer.position(cursor + defaultSize)
-      row.asInstanceOf[MutableUnsafeRow].writer.write(ordinal, buffer.array(),
-        buffer.arrayOffset() + cursor, defaultSize)
-    } else {
-      setField(row, ordinal, extract(buffer))
+    row match {
+      case mutable: MutableUnsafeRow =>
+        val cursor = buffer.position()
+        buffer.position(cursor + defaultSize)
+        mutable.writer.write(ordinal, buffer.array(),
+          buffer.arrayOffset() + cursor, defaultSize)
+      case _ =>
+        setField(row, ordinal, extract(buffer))
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index 47f279babef58..5baa597582553 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -214,16 +214,17 @@ class FileScanRDD(
         val nextElement = currentIterator.next()
         // TODO: we should have a better separation of row based and batch based scan, so that we
         // don't need to run this `if` for every record.
-        if (nextElement.isInstanceOf[ColumnarBatch]) {
-          incTaskInputMetricsBytesRead()
-          inputMetrics.incRecordsRead(nextElement.asInstanceOf[ColumnarBatch].numRows())
-        } else {
-          // too costly to update every record
-          if (inputMetrics.recordsRead %
-              SparkHadoopUtil.UPDATE_INPUT_METRICS_INTERVAL_RECORDS == 0) {
+        nextElement match {
+          case batch: ColumnarBatch =>
             incTaskInputMetricsBytesRead()
-          }
-          inputMetrics.incRecordsRead(1)
+            inputMetrics.incRecordsRead(batch.numRows())
+          case _ =>
+            // too costly to update every record
+            if (inputMetrics.recordsRead %
+              SparkHadoopUtil.UPDATE_INPUT_METRICS_INTERVAL_RECORDS == 0) {
+              incTaskInputMetricsBytesRead()
+            }
+            inputMetrics.incRecordsRead(1)
         }
         addMetadataColumnsIfNeeded(nextElement)
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
index 1f422e5a59cf8..7bd51f809cd04 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
@@ -65,20 +65,22 @@ private object H2Dialect extends JdbcDialect {
   }
 
   override def classifyException(message: String, e: Throwable): AnalysisException = {
-    if (e.isInstanceOf[SQLException]) {
-      // Error codes are from https://www.h2database.com/javadoc/org/h2/api/ErrorCode.html
-      e.asInstanceOf[SQLException].getErrorCode match {
-        // TABLE_OR_VIEW_ALREADY_EXISTS_1
-        case 42101 =>
-          throw new TableAlreadyExistsException(message, cause = Some(e))
-        // TABLE_OR_VIEW_NOT_FOUND_1
-        case 42102 =>
-          throw new NoSuchTableException(message, cause = Some(e))
-        // SCHEMA_NOT_FOUND_1
-        case 90079 =>
-          throw new NoSuchNamespaceException(message, cause = Some(e))
-        case _ =>
-      }
+    e match {
+      case exception: SQLException =>
+        // Error codes are from https://www.h2database.com/javadoc/org/h2/api/ErrorCode.html
+        exception.getErrorCode match {
+          // TABLE_OR_VIEW_ALREADY_EXISTS_1
+          case 42101 =>
+            throw new TableAlreadyExistsException(message, cause = Some(e))
+          // TABLE_OR_VIEW_NOT_FOUND_1
+          case 42102 =>
+            throw NoSuchTableException(message, cause = Some(e))
+          // SCHEMA_NOT_FOUND_1
+          case 90079 =>
+            throw NoSuchNamespaceException(message, cause = Some(e))
+          case _ => // do nothing
+        }
+      case _ => // do nothing
     }
     super.classifyException(message, e)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index 4994968fdd6ba..3577812ac6f37 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -725,37 +725,32 @@ class BrokenColumnarAdd(
       lhs = left.columnarEval(batch)
       rhs = right.columnarEval(batch)
 
-      if (lhs == null || rhs == null) {
-        ret = null
-      } else if (lhs.isInstanceOf[ColumnVector] && rhs.isInstanceOf[ColumnVector]) {
-        val l = lhs.asInstanceOf[ColumnVector]
-        val r = rhs.asInstanceOf[ColumnVector]
-        val result = new OnHeapColumnVector(batch.numRows(), dataType)
-        ret = result
-
-        for (i <- 0 until batch.numRows()) {
-          result.appendLong(l.getLong(i) + r.getLong(i) + 1) // BUG to show we replaced Add
-        }
-      } else if (rhs.isInstanceOf[ColumnVector]) {
-        val l = lhs.asInstanceOf[Long]
-        val r = rhs.asInstanceOf[ColumnVector]
-        val result = new OnHeapColumnVector(batch.numRows(), dataType)
-        ret = result
-
-        for (i <- 0 until batch.numRows()) {
-          result.appendLong(l + r.getLong(i) + 1) // BUG to show we replaced Add
-        }
-      } else if (lhs.isInstanceOf[ColumnVector]) {
-        val l = lhs.asInstanceOf[ColumnVector]
-        val r = rhs.asInstanceOf[Long]
-        val result = new OnHeapColumnVector(batch.numRows(), dataType)
-        ret = result
-
-        for (i <- 0 until batch.numRows()) {
-          result.appendLong(l.getLong(i) + r + 1) // BUG to show we replaced Add
-        }
-      } else {
-        ret = nullSafeEval(lhs, rhs)
+      (lhs, rhs) match {
+        case (null, null) =>
+          ret = null
+        case (l: ColumnVector, r: ColumnVector) =>
+          val result = new OnHeapColumnVector(batch.numRows(), dataType)
+          ret = result
+
+          for (i <- 0 until batch.numRows()) {
+            result.appendLong(l.getLong(i) + r.getLong(i) + 1) // BUG to show we replaced Add
+          }
+        case (l: Long, r: ColumnVector) =>
+          val result = new OnHeapColumnVector(batch.numRows(), dataType)
+          ret = result
+
+          for (i <- 0 until batch.numRows()) {
+            result.appendLong(l + r.getLong(i) + 1) // BUG to show we replaced Add
+          }
+        case (l: ColumnVector, r: Long) =>
+          val result = new OnHeapColumnVector(batch.numRows(), dataType)
+          ret = result
+
+          for (i <- 0 until batch.numRows()) {
+            result.appendLong(l.getLong(i) + r + 1) // BUG to show we replaced Add
+          }
+        case  (l, r) =>
+          ret = nullSafeEval(l, r)
       }
     } finally {
       if (lhs != null && lhs.isInstanceOf[ColumnVector]) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
index a8b4856261d83..f27a249c8f753 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
@@ -402,13 +402,12 @@ abstract class BroadcastJoinSuiteBase extends QueryTest with SQLTestUtils
         assert(b.buildSide === buildSide)
       case w: WholeStageCodegenExec =>
         assert(w.children.head.getClass.getSimpleName === joinMethod)
-        if (w.children.head.isInstanceOf[BroadcastNestedLoopJoinExec]) {
-          assert(
-            w.children.head.asInstanceOf[BroadcastNestedLoopJoinExec].buildSide === buildSide)
-        } else if (w.children.head.isInstanceOf[BroadcastHashJoinExec]) {
-          assert(w.children.head.asInstanceOf[BroadcastHashJoinExec].buildSide === buildSide)
-        } else {
-          fail()
+        w.children.head match {
+          case bnlj: BroadcastNestedLoopJoinExec =>
+            assert(bnlj.buildSide === buildSide)
+          case bhj: BroadcastHashJoinExec =>
+            assert(bhj.buildSide === buildSide)
+          case _ => fail()
         }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index ff182b524be70..2bb43ec930760 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -528,8 +528,10 @@ trait StreamTest extends QueryTest with SharedSparkSession with TimeLimits with
           verify(triggerClock.isInstanceOf[SystemClock]
             || triggerClock.isInstanceOf[StreamManualClock],
             "Use either SystemClock or StreamManualClock to start the stream")
-          if (triggerClock.isInstanceOf[StreamManualClock]) {
-            manualClockExpectedTime = triggerClock.asInstanceOf[StreamManualClock].getTimeMillis()
+          triggerClock match {
+            case clock: StreamManualClock =>
+              manualClockExpectedTime = clock.getTimeMillis()
+            case _ =>
           }
           val metadataRoot = Option(checkpointLocation).getOrElse(defaultCheckpointLocation)
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 828f9872eb159..671b80f4b8abe 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -316,12 +316,12 @@ private[hive] class IsolatedClientLoader(
         .asInstanceOf[HiveClient]
     } catch {
       case e: InvocationTargetException =>
-        if (e.getCause().isInstanceOf[NoClassDefFoundError]) {
-          val cnf = e.getCause().asInstanceOf[NoClassDefFoundError]
-          throw QueryExecutionErrors.loadHiveClientCausesNoClassDefFoundError(
-            cnf, execJars, HiveUtils.HIVE_METASTORE_JARS.key, e)
-        } else {
-          throw e
+        e.getCause match {
+          case cnf: NoClassDefFoundError =>
+            throw QueryExecutionErrors.loadHiveClientCausesNoClassDefFoundError(
+              cnf, execJars, HiveUtils.HIVE_METASTORE_JARS.key, e)
+          case _ =>
+            throw e
         }
     } finally {
       Thread.currentThread.setContextClassLoader(origLoader)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
index 8008a5c495e9d..282946dd8ef4b 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
@@ -204,10 +204,12 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
     // If manual clock is being used for testing, then
     // either set the manual clock to the last checkpointed time,
     // or if the property is defined set it to that time
-    if (clock.isInstanceOf[ManualClock]) {
-      val lastTime = ssc.initialCheckpoint.checkpointTime.milliseconds
-      val jumpTime = ssc.sc.conf.get(StreamingConf.MANUAL_CLOCK_JUMP)
-      clock.asInstanceOf[ManualClock].setTime(lastTime + jumpTime)
+    clock match {
+      case manualClock: ManualClock =>
+        val lastTime = ssc.initialCheckpoint.checkpointTime.milliseconds
+        val jumpTime = ssc.sc.conf.get(StreamingConf.MANUAL_CLOCK_JUMP)
+        manualClock.setTime(lastTime + jumpTime)
+      case _ => // do nothing
     }
 
     val batchDuration = ssc.graph.batchDuration
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala
index 4224cef1cbae1..8069e7915b1d1 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala
@@ -296,16 +296,17 @@ private[streaming] class OpenHashMapBasedStateMap[K, S](
     var parentSessionLoopDone = false
     while(!parentSessionLoopDone) {
       val obj = inputStream.readObject()
-      if (obj.isInstanceOf[LimitMarker]) {
-        parentSessionLoopDone = true
-        val expectedCount = obj.asInstanceOf[LimitMarker].num
-        assert(expectedCount == newParentSessionStore.deltaMap.size)
-      } else {
-        val key = obj.asInstanceOf[K]
-        val state = inputStream.readObject().asInstanceOf[S]
-        val updateTime = inputStream.readLong()
-        newParentSessionStore.deltaMap.update(
-          key, StateInfo(state, updateTime, deleted = false))
+      obj match {
+        case marker: LimitMarker =>
+          parentSessionLoopDone = true
+          val expectedCount = marker.num
+          assert(expectedCount == newParentSessionStore.deltaMap.size)
+        case _ =>
+          val key = obj.asInstanceOf[K]
+          val state = inputStream.readObject().asInstanceOf[S]
+          val updateTime = inputStream.readLong()
+          newParentSessionStore.deltaMap.update(
+            key, StateInfo(state, updateTime, deleted = false))
       }
     }
     parentStateMap = newParentSessionStore

From 8ae970790814a0080713857261a3b1c2e2b01dd7 Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Sat, 15 Jan 2022 08:59:56 -0600
Subject: [PATCH 029/513] [SPARK-37862][SQL] RecordBinaryComparator should fast
 skip the check of aligning with unaligned platform

### What changes were proposed in this pull request?

`RecordBinaryComparator` compare the entire row, so it need to check if the platform is unaligned. #35078 had given the perf number to show the benefits. So this PR aims to do the same thing that fast skip the check of aligning with unaligned platform.

### Why are the changes needed?

Improve the performance.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

Pass CI. And the perf number should be same with #35078

Closes #35161 from ulysses-you/unaligned.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../apache/spark/sql/execution/RecordBinaryComparator.java   | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java b/sql/core/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java
index 1f243406c77e0..e91873a008860 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java
@@ -24,6 +24,7 @@
 
 public final class RecordBinaryComparator extends RecordComparator {
 
+  private static final boolean UNALIGNED = Platform.unaligned();
   private static final boolean LITTLE_ENDIAN =
       ByteOrder.nativeOrder().equals(ByteOrder.LITTLE_ENDIAN);
 
@@ -41,7 +42,7 @@ public int compare(
     // we have guaranteed `leftLen` == `rightLen`.
 
     // check if stars align and we can get both offsets to be aligned
-    if ((leftOff % 8) == (rightOff % 8)) {
+    if (!UNALIGNED && ((leftOff % 8) == (rightOff % 8))) {
       while ((leftOff + i) % 8 != 0 && i < leftLen) {
         final int v1 = Platform.getByte(leftObj, leftOff + i);
         final int v2 = Platform.getByte(rightObj, rightOff + i);
@@ -52,7 +53,7 @@ public int compare(
       }
     }
     // for architectures that support unaligned accesses, chew it up 8 bytes at a time
-    if (Platform.unaligned() || (((leftOff + i) % 8 == 0) && ((rightOff + i) % 8 == 0))) {
+    if (UNALIGNED || (((leftOff + i) % 8 == 0) && ((rightOff + i) % 8 == 0))) {
       while (i <= leftLen - 8) {
         long v1 = Platform.getLong(leftObj, leftOff + i);
         long v2 = Platform.getLong(rightObj, rightOff + i);

From 7614472950cb57ffefa0a51dd1163103c5d42df6 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Sat, 15 Jan 2022 09:01:55 -0600
Subject: [PATCH 030/513] [SPARK-37876][CORE][SQL] Move
 `SpecificParquetRecordReaderBase.listDirectory` to `TestUtils`

### What changes were proposed in this pull request?
`SpecificParquetRecordReaderBase.listDirectory`  is used to return the list of files at `path` recursively and the result will skips files that are ignored normally by MapReduce.

This method is only used by tests in Spark now and the tests also includes non-parquet test scenario, such as `OrcColumnarBatchReaderSuite`.

So this pr move this method from `SpecificParquetRecordReaderBase` to `TestUtils` to make it as a test method.

### Why are the changes needed?
Refactoring: move test method to `TestUtils`.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35177 from LuciferYang/list-directory.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../scala/org/apache/spark/TestUtils.scala    | 15 +++++++++++++
 .../SpecificParquetRecordReaderBase.java      | 21 -------------------
 .../benchmark/DataSourceReadBenchmark.scala   | 11 +++++-----
 .../orc/OrcColumnarBatchReaderSuite.scala     |  4 ++--
 .../parquet/ParquetEncodingSuite.scala        | 11 +++++-----
 .../datasources/parquet/ParquetIOSuite.scala  |  6 +++---
 .../datasources/parquet/ParquetTest.scala     |  3 ++-
 .../sql/test/DataFrameReaderWriterSuite.scala |  5 ++---
 8 files changed, 36 insertions(+), 40 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
index d2af95554ffab..505b3ab3a783a 100644
--- a/core/src/main/scala/org/apache/spark/TestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -446,6 +446,21 @@ private[spark] object TestUtils {
     current ++ current.filter(_.isDirectory).flatMap(recursiveList)
   }
 
+  /**
+   * Returns the list of files at 'path' recursively. This skips files that are ignored normally
+   * by MapReduce.
+   */
+  def listDirectory(path: File): Array[String] = {
+    val result = ArrayBuffer.empty[String]
+    if (path.isDirectory) {
+      path.listFiles.foreach(f => result.appendAll(listDirectory(f)))
+    } else {
+      val c = path.getName.charAt(0)
+      if (c != '.' && c != '_') result.append(path.getAbsolutePath)
+    }
+    result.toArray
+  }
+
   /** Creates a temp JSON file that contains the input JSON record. */
   def createTempJsonFile(dir: File, prefix: String, jsonValue: JValue): String = {
     val file = File.createTempFile(prefix, ".json", dir)
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
index e1a0607d37c2c..07e35c158c8cb 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
@@ -19,10 +19,8 @@
 package org.apache.spark.sql.execution.datasources.parquet;
 
 import java.io.Closeable;
-import java.io.File;
 import java.io.IOException;
 import java.lang.reflect.InvocationTargetException;
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -121,25 +119,6 @@ public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptCont
     }
   }
 
-  /**
-   * Returns the list of files at 'path' recursively. This skips files that are ignored normally
-   * by MapReduce.
-   */
-  public static List<String> listDirectory(File path) {
-    List<String> result = new ArrayList<>();
-    if (path.isDirectory()) {
-      for (File f: path.listFiles()) {
-        result.addAll(listDirectory(f));
-      }
-    } else {
-      char c = path.getName().charAt(0);
-      if (c != '.' && c != '_') {
-        result.add(path.getAbsolutePath());
-      }
-    }
-    return result;
-  }
-
   /**
    * Initializes the reader to read the file at `path` with `columns` projected. If columns is
    * null, all the columns are projected.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
index 31cee48c1787d..5094cdf2296e0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
@@ -25,10 +25,11 @@ import org.apache.parquet.column.ParquetProperties
 import org.apache.parquet.hadoop.ParquetOutputFormat
 
 import org.apache.spark.SparkConf
+import org.apache.spark.TestUtils
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.{DataFrame, DataFrameWriter, Row, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.datasources.parquet.{SpecificParquetRecordReaderBase, VectorizedParquetRecordReader}
+import org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.vectorized.ColumnVector
@@ -167,7 +168,7 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
         sqlBenchmark.run()
 
         // Driving the parquet reader in batch mode directly.
-        val files = SpecificParquetRecordReaderBase.listDirectory(new File(dir, "parquet")).toArray
+        val files = TestUtils.listDirectory(new File(dir, "parquet"))
         val enableOffHeapColumnVector = spark.sessionState.conf.offHeapColumnVectorEnabled
         val vectorizedReaderBatchSize = spark.sessionState.conf.parquetVectorizedReaderBatchSize
         parquetReaderBenchmark.addCase("ParquetReader Vectorized") { _ =>
@@ -183,7 +184,7 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
             case DoubleType => (col: ColumnVector, i: Int) => doubleSum += col.getDouble(i)
           }
 
-          files.map(_.asInstanceOf[String]).foreach { p =>
+          files.foreach { p =>
             val reader = new VectorizedParquetRecordReader(
               enableOffHeapColumnVector, vectorizedReaderBatchSize)
             try {
@@ -468,12 +469,12 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
           }
         }
 
-        val files = SpecificParquetRecordReaderBase.listDirectory(new File(dir, "parquet")).toArray
+        val files = TestUtils.listDirectory(new File(dir, "parquet"))
         val enableOffHeapColumnVector = spark.sessionState.conf.offHeapColumnVectorEnabled
         val vectorizedReaderBatchSize = spark.sessionState.conf.parquetVectorizedReaderBatchSize
         benchmark.addCase("ParquetReader Vectorized") { num =>
           var sum = 0
-          files.map(_.asInstanceOf[String]).foreach { p =>
+          files.foreach { p =>
             val reader = new VectorizedParquetRecordReader(
               enableOffHeapColumnVector, vectorizedReaderBatchSize)
             try {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
index bfcef46339908..4ff9612ab4847 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
@@ -25,11 +25,11 @@ import org.apache.hadoop.mapreduce.lib.input.FileSplit
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
 import org.apache.orc.TypeDescription
 
+import org.apache.spark.TestUtils
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase
 import org.apache.spark.sql.execution.vectorized.{OnHeapColumnVector, WritableColumnVector}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -117,7 +117,7 @@ class OrcColumnarBatchReaderSuite extends QueryTest with SharedSparkSession {
       dataTypes.zip(constantValues).foreach { case (dt, v) =>
         val schema = StructType(StructField("col1", IntegerType) :: StructField("pcol", dt) :: Nil)
         val partitionValues = new GenericInternalRow(Array(v))
-        val file = new File(SpecificParquetRecordReaderBase.listDirectory(dir).get(0))
+        val file = new File(TestUtils.listDirectory(dir).head)
         val fileSplit = new FileSplit(new Path(file.getCanonicalPath), 0L, file.length, Array.empty)
         val taskConf = sqlContext.sessionState.newHadoopConf()
         val orcFileSchema = TypeDescription.fromString(schema.simpleString)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala
index 746d9c6358083..f7100a53444aa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala
@@ -26,6 +26,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.parquet.column.{Encoding, ParquetProperties}
 import org.apache.parquet.hadoop.ParquetOutputFormat
 
+import org.apache.spark.TestUtils
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.internal.SQLConf
@@ -50,12 +51,12 @@ class ParquetEncodingSuite extends ParquetCompatibilityTest with SharedSparkSess
     (1 :: 1000 :: Nil).foreach { n => {
       withTempPath { dir =>
         List.fill(n)(ROW).toDF.repartition(1).write.parquet(dir.getCanonicalPath)
-        val file = SpecificParquetRecordReaderBase.listDirectory(dir).toArray.head
+        val file = TestUtils.listDirectory(dir).head
 
         val conf = sqlContext.conf
         val reader = new VectorizedParquetRecordReader(
           conf.offHeapColumnVectorEnabled, conf.parquetVectorizedReaderBatchSize)
-        reader.initialize(file.asInstanceOf[String], null)
+        reader.initialize(file, null)
         val batch = reader.resultBatch()
         assert(reader.nextBatch())
         assert(batch.numRows() == n)
@@ -80,12 +81,12 @@ class ParquetEncodingSuite extends ParquetCompatibilityTest with SharedSparkSess
       withTempPath { dir =>
         val data = List.fill(n)(NULL_ROW).toDF
         data.repartition(1).write.parquet(dir.getCanonicalPath)
-        val file = SpecificParquetRecordReaderBase.listDirectory(dir).toArray.head
+        val file = TestUtils.listDirectory(dir).head
 
         val conf = sqlContext.conf
         val reader = new VectorizedParquetRecordReader(
           conf.offHeapColumnVectorEnabled, conf.parquetVectorizedReaderBatchSize)
-        reader.initialize(file.asInstanceOf[String], null)
+        reader.initialize(file, null)
         val batch = reader.resultBatch()
         assert(reader.nextBatch())
         assert(batch.numRows() == n)
@@ -114,7 +115,7 @@ class ParquetEncodingSuite extends ParquetCompatibilityTest with SharedSparkSess
         // first page is dictionary encoded and the remaining two are plain encoded.
         val data = (0 until 512).flatMap(i => Seq.fill(3)(i.toString))
         data.toDF("f").coalesce(1).write.parquet(dir.getCanonicalPath)
-        val file = SpecificParquetRecordReaderBase.listDirectory(dir).asScala.head
+        val file = TestUtils.listDirectory(dir).head
 
         val conf = sqlContext.conf
         val reader = new VectorizedParquetRecordReader(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 0966319f53fc7..1e2bb9104cfc0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -38,7 +38,7 @@ import org.apache.parquet.hadoop.metadata.CompressionCodecName
 import org.apache.parquet.hadoop.metadata.CompressionCodecName.GZIP
 import org.apache.parquet.schema.{MessageType, MessageTypeParser}
 
-import org.apache.spark.{SPARK_VERSION_SHORT, SparkException}
+import org.apache.spark.{SPARK_VERSION_SHORT, SparkException, TestUtils}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.{InternalRow, ScalaReflection}
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeRow}
@@ -928,7 +928,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
     val data = (0 to 10).map(i => (i, (i + 'a').toChar.toString))
     withTempPath { dir =>
       spark.createDataFrame(data).repartition(1).write.parquet(dir.getCanonicalPath)
-      val file = SpecificParquetRecordReaderBase.listDirectory(dir).get(0);
+      val file = TestUtils.listDirectory(dir).head;
       {
         val conf = sqlContext.conf
         val reader = new VectorizedParquetRecordReader(
@@ -1032,7 +1032,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
         val vectorizedReader = new VectorizedParquetRecordReader(
           conf.offHeapColumnVectorEnabled, conf.parquetVectorizedReaderBatchSize)
         val partitionValues = new GenericInternalRow(Array(v))
-        val file = SpecificParquetRecordReaderBase.listDirectory(dir).get(0)
+        val file = TestUtils.listDirectory(dir).head
 
         try {
           vectorizedReader.initialize(file, null)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala
index 47723166213dd..7a7957c67dce1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala
@@ -33,6 +33,7 @@ import org.apache.parquet.hadoop.metadata.{BlockMetaData, FileMetaData, ParquetM
 import org.apache.parquet.hadoop.util.HadoopInputFile
 import org.apache.parquet.schema.MessageType
 
+import org.apache.spark.TestUtils
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.execution.datasources.FileBasedDataSourceTest
 import org.apache.spark.sql.internal.SQLConf
@@ -179,7 +180,7 @@ private[sql] trait ParquetTest extends FileBasedDataSourceTest {
   }
 
   def getMetaData(dir: java.io.File): Map[String, String] = {
-    val file = SpecificParquetRecordReaderBase.listDirectory(dir).get(0)
+    val file = TestUtils.listDirectory(dir).head
     val conf = new Configuration()
     val hadoopInputFile = HadoopInputFile.fromPath(new Path(file), conf)
     val parquetReadOptions = HadoopReadOptions.builder(conf).build()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index ea007c149dd8e..cb3bd29c27991 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -32,7 +32,7 @@ import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
 import org.apache.parquet.schema.Type.Repetition
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.SparkContext
+import org.apache.spark.{SparkContext, TestUtils}
 import org.apache.spark.internal.io.FileCommitProtocol.TaskCommitMessage
 import org.apache.spark.internal.io.HadoopMapReduceCommitProtocol
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
@@ -42,7 +42,6 @@ import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, Ove
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.datasources.{DataSourceUtils, HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.noop.NoopDataSource
-import org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
@@ -764,7 +763,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
     withTempPath { dir =>
       val path = dir.getAbsolutePath
       df.write.mode("overwrite").parquet(path)
-      val file = SpecificParquetRecordReaderBase.listDirectory(dir).get(0)
+      val file = TestUtils.listDirectory(dir).head
 
       val hadoopInputFile = HadoopInputFile.fromPath(new Path(file), new Configuration())
       val f = ParquetFileReader.open(hadoopInputFile)

From 482439ff4620be9d30b36aa32a26722be9f4a30e Mon Sep 17 00:00:00 2001
From: stczwd <qcsd2011@163.com>
Date: Sat, 15 Jan 2022 15:58:56 -0800
Subject: [PATCH 031/513] [SPARK-37920][BUILD] Remove tab character and
 trailing space in pom.xml

### Why are the changes needed?
There are some tabs in pom.xml, which don't seem to be standardized. This pr tries to modify this problem.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
origin tests

Closes #35218 from stczwd/SPARK-37920.

Authored-by: stczwd <qcsd2011@163.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 pom.xml | 70 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/pom.xml b/pom.xml
index 61d576c390b8f..07a8861c6132f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2756,39 +2756,39 @@
             </execution>
           </executions>
         </plugin>
-	<plugin>
-	  <groupId>org.codehaus.mojo</groupId>
-	  <artifactId>build-helper-maven-plugin</artifactId>
-	  <version>3.2.0</version>
-	  <executions>
-	    <execution>
-	      <id>module-timestamp-property</id>
-	      <phase>validate</phase>
-	      <goals>
-		<goal>timestamp-property</goal>
-	      </goals>
-	      <configuration>
-		<name>module.build.timestamp</name>
-		<pattern>${maven.build.timestamp.format}</pattern>
-		<timeSource>current</timeSource>
-		<timeZone>America/Los_Angeles</timeZone>
-	      </configuration>
-	    </execution>
-	    <execution>
-	      <id>local-timestamp-property</id>
-	      <phase>validate</phase>
-	      <goals>
-		<goal>timestamp-property</goal>
-	      </goals>
-	      <configuration>
-		<name>local.build.timestamp</name>
-		<pattern>${maven.build.timestamp.format}</pattern>
-		<timeSource>build</timeSource>
-		<timeZone>America/Los_Angeles</timeZone>
-	      </configuration>
-	    </execution>
-	  </executions>
-	</plugin>
+        <plugin>
+          <groupId>org.codehaus.mojo</groupId>
+          <artifactId>build-helper-maven-plugin</artifactId>
+          <version>3.2.0</version>
+          <executions>
+            <execution>
+              <id>module-timestamp-property</id>
+              <phase>validate</phase>
+              <goals>
+                <goal>timestamp-property</goal>
+              </goals>
+              <configuration>
+                <name>module.build.timestamp</name>
+                <pattern>${maven.build.timestamp.format}</pattern>
+                <timeSource>current</timeSource>
+                <timeZone>America/Los_Angeles</timeZone>
+              </configuration>
+            </execution>
+            <execution>
+              <id>local-timestamp-property</id>
+              <phase>validate</phase>
+              <goals>
+                <goal>timestamp-property</goal>
+              </goals>
+              <configuration>
+                <name>local.build.timestamp</name>
+                <pattern>${maven.build.timestamp.format}</pattern>
+                <timeSource>build</timeSource>
+                <timeZone>America/Los_Angeles</timeZone>
+              </configuration>
+            </execution>
+          </executions>
+        </plugin>
         <plugin>
           <groupId>net.alchim31.maven</groupId>
           <artifactId>scala-maven-plugin</artifactId>
@@ -3564,9 +3564,9 @@
     <profile>
       <id>scala-2.12</id>
       <properties>
-        <!-- 
+        <!--
          SPARK-34774 Add this property to ensure change-scala-version.sh can replace the public `scala.version`
-         property correctly. 
+         property correctly.
         -->
         <scala.version>2.12.15</scala.version>
       </properties>

From 4c59a830a6a235400d0184fb6ce24c9e054d3e4b Mon Sep 17 00:00:00 2001
From: William Hyun <william@apache.org>
Date: Sat, 15 Jan 2022 21:52:31 -0800
Subject: [PATCH 032/513] [SPARK-37921][TESTS] Update OrcReadBenchmark to use
 Hive ORC reader as the basis

### What changes were proposed in this pull request?
This PR aims to update `OrcReadBenchmark` to use Hive ORC reader as the basis for comparison.

### Why are the changes needed?
This will improve the visibility of native ORC reader's improvement because currently the new improvements are shown as `1.0x`.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?
Manually review.

Closes #35219 from williamhyun/benchmark.

Authored-by: William Hyun <william@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../OrcReadBenchmark-jdk11-results.txt        | 188 +++++++-------
 .../OrcReadBenchmark-jdk17-results.txt        | 188 +++++++-------
 .../benchmarks/OrcReadBenchmark-results.txt   | 232 +++++++++---------
 .../spark/sql/hive/orc/OrcReadBenchmark.scala |  74 +++---
 4 files changed, 341 insertions(+), 341 deletions(-)

diff --git a/sql/hive/benchmarks/OrcReadBenchmark-jdk11-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-jdk11-results.txt
index 3f9e63f9b8f2d..f9ab5dd5d51ae 100644
--- a/sql/hive/benchmarks/OrcReadBenchmark-jdk11-results.txt
+++ b/sql/hive/benchmarks/OrcReadBenchmark-jdk11-results.txt
@@ -2,221 +2,221 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1064           1070           9         14.8          67.6       1.0X
-Native ORC Vectorized                               237            326          73         66.3          15.1       4.5X
-Hive built-in ORC                                  1232           1330         139         12.8          78.3       0.9X
+Hive built-in ORC                                  1137           1138           1         13.8          72.3       1.0X
+Native ORC MR                                       962            982          17         16.3          61.2       1.2X
+Native ORC Vectorized                               225            298          65         69.9          14.3       5.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       947           1056         155         16.6          60.2       1.0X
-Native ORC Vectorized                               232            311          56         67.7          14.8       4.1X
-Hive built-in ORC                                  1317           1330          19         11.9          83.7       0.7X
+Hive built-in ORC                                  1250           1253           4         12.6          79.5       1.0X
+Native ORC MR                                      1038           1135         136         15.1          66.0       1.2X
+Native ORC Vectorized                               232            307          47         67.9          14.7       5.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       964           1070         150         16.3          61.3       1.0X
-Native ORC Vectorized                               275            304          32         57.2          17.5       3.5X
-Hive built-in ORC                                  1328           1336          11         11.8          84.4       0.7X
+Hive built-in ORC                                  1360           1399          55         11.6          86.5       1.0X
+Native ORC MR                                      1047           1107          85         15.0          66.5       1.3X
+Native ORC Vectorized                               273            291          20         57.7          17.3       5.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1006           1066          84         15.6          64.0       1.0X
-Native ORC Vectorized                               342            353          12         46.0          21.7       2.9X
-Hive built-in ORC                                  1361           1386          36         11.6          86.5       0.7X
+Hive built-in ORC                                  1381           1425          62         11.4          87.8       1.0X
+Native ORC MR                                      1136           1138           4         13.9          72.2       1.2X
+Native ORC Vectorized                               336            377          31         46.8          21.4       4.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1020           1026           8         15.4          64.8       1.0X
-Native ORC Vectorized                               352            381          23         44.7          22.4       2.9X
-Hive built-in ORC                                  1457           1457           0         10.8          92.7       0.7X
+Hive built-in ORC                                  1425           1425           1         11.0          90.6       1.0X
+Native ORC MR                                      1090           1093           4         14.4          69.3       1.3X
+Native ORC Vectorized                               349            381          47         45.1          22.2       4.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1036           1056          28         15.2          65.9       1.0X
-Native ORC Vectorized                               387            403          15         40.6          24.6       2.7X
-Hive built-in ORC                                  1409           1417          11         11.2          89.6       0.7X
+Hive built-in ORC                                  1434           1477          61         11.0          91.2       1.0X
+Native ORC MR                                      1116           1125          12         14.1          71.0       1.3X
+Native ORC Vectorized                               366            388          18         43.0          23.2       3.9X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1993           2094         144          5.3         190.0       1.0X
-Native ORC Vectorized                              1290           1348          83          8.1         123.0       1.5X
-Hive built-in ORC                                  2336           2426         127          4.5         222.8       0.9X
+Hive built-in ORC                                  2442           2543         143          4.3         232.8       1.0X
+Native ORC MR                                      2030           2048          25          5.2         193.6       1.2X
+Native ORC Vectorized                              1261           1266           8          8.3         120.2       1.9X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Data column - Native ORC MR                        1369           1384          22         11.5          87.0       1.0X
-Data column - Native ORC Vectorized                 406            428          20         38.7          25.8       3.4X
-Data column - Hive built-in ORC                    1444           1527         118         10.9          91.8       0.9X
-Partition column - Native ORC MR                    745            796          45         21.1          47.4       1.8X
-Partition column - Native ORC Vectorized             70             96          28        223.2           4.5      19.4X
-Partition column - Hive built-in ORC               1035           1063          39         15.2          65.8       1.3X
-Both columns - Native ORC MR                       1245           1306          86         12.6          79.2       1.1X
-Both columns - Native ORC Vectorized                385            424          35         40.9          24.5       3.6X
-Both columns - Hive built-in ORC                   1481           1566         120         10.6          94.2       0.9X
+Data column - Hive built-in ORC                    1615           1617           3          9.7         102.7       1.0X
+Data column - Native ORC MR                        1330           1373          61         11.8          84.6       1.2X
+Data column - Native ORC Vectorized                 343            404          83         45.8          21.8       4.7X
+Partition column - Hive built-in ORC               1087           1099          18         14.5          69.1       1.5X
+Partition column - Native ORC MR                    912            922          12         17.2          58.0       1.8X
+Partition column - Native ORC Vectorized             67             94          33        234.6           4.3      24.1X
+Both columns - Hive built-in ORC                   1743           1748           7          9.0         110.8       0.9X
+Both columns - Native ORC MR                       1454           1459           6         10.8          92.5       1.1X
+Both columns - Native ORC Vectorized                354            414          57         44.4          22.5       4.6X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1102           1261         224          9.5         105.1       1.0X
-Native ORC Vectorized                               216            260          55         48.5          20.6       5.1X
-Hive built-in ORC                                  1299           1427         181          8.1         123.9       0.8X
+Hive built-in ORC                                  1331           1342          16          7.9         126.9       1.0X
+Native ORC MR                                       901            910          12         11.6          85.9       1.5X
+Native ORC Vectorized                               228            291          72         45.9          21.8       5.8X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1632           1653          30          6.4         155.6       1.0X
-Native ORC Vectorized                               689            698           8         15.2          65.7       2.4X
-Hive built-in ORC                                  2224           2254          43          4.7         212.1       0.7X
+Hive built-in ORC                                  2295           2298           4          4.6         218.9       1.0X
+Native ORC MR                                      1711           1743          46          6.1         163.1       1.3X
+Native ORC Vectorized                               686            692           8         15.3          65.4       3.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1516           1555          54          6.9         144.6       1.0X
-Native ORC Vectorized                               782            801          19         13.4          74.6       1.9X
-Hive built-in ORC                                  2023           2110         123          5.2         192.9       0.7X
+Hive built-in ORC                                  2045           2107          88          5.1         195.0       1.0X
+Native ORC MR                                      1577           1585          11          6.6         150.4       1.3X
+Native ORC Vectorized                               801            804           5         13.1          76.4       2.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       879            931          48         11.9          83.8       1.0X
-Native ORC Vectorized                               250            342          85         42.0          23.8       3.5X
-Hive built-in ORC                                  1204           1219          20          8.7         114.9       0.7X
+Hive built-in ORC                                  1254           1261          10          8.4         119.6       1.0X
+Native ORC MR                                       944            962          15         11.1          90.1       1.3X
+Native ORC Vectorized                               262            334         103         40.1          25.0       4.8X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       159            192          24          6.6         151.4       1.0X
-Native ORC Vectorized                                85            116          32         12.3          81.0       1.9X
-Hive built-in ORC                                   790            853          99          1.3         753.9       0.2X
+Hive built-in ORC                                   954           1002          68          1.1         909.8       1.0X
+Native ORC MR                                       149            188          30          7.0         141.9       6.4X
+Native ORC Vectorized                                83            108          30         12.7          78.7      11.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       161            196          40          6.5         153.9       1.0X
-Native ORC Vectorized                               110            139          28          9.6         104.6       1.5X
-Hive built-in ORC                                  1549           1585          51          0.7        1476.8       0.1X
+Hive built-in ORC                                  1939           1994          78          0.5        1848.9       1.0X
+Native ORC MR                                       187            259          57          5.6         178.2      10.4X
+Native ORC Vectorized                               117            193          46          9.0         111.2      16.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       201            221          14          5.2         191.8       1.0X
-Native ORC Vectorized                               135            163          23          7.8         128.6       1.5X
-Hive built-in ORC                                  2166           2172           8          0.5        2065.6       0.1X
+Hive built-in ORC                                  2759           2827          96          0.4        2631.6       1.0X
+Native ORC MR                                       328            368          50          3.2         312.5       8.4X
+Native ORC Vectorized                               149            210          68          7.0         141.9      18.5X
 
 
 ================================================================================================
 Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Struct Column Scan with 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       473            522          41          2.2         451.4       1.0X
-Native ORC Vectorized                               234            351          58          4.5         222.9       2.0X
-Hive built-in ORC                                   472            601         116          2.2         449.8       1.0X
+Hive built-in ORC                                   681            696          17          1.5         649.0       1.0X
+Native ORC MR                                       484            497           9          2.2         461.7       1.4X
+Native ORC Vectorized                               303            371          59          3.5         289.3       2.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Struct Column Scan with 100 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       3238           3394         221          0.3        3087.5       1.0X
-Native ORC Vectorized                               2724           2844         169          0.4        2598.2       1.2X
-Hive built-in ORC                                   3898           3934          52          0.3        3717.0       0.8X
+Hive built-in ORC                                   3762           4091         465          0.3        3588.1       1.0X
+Native ORC MR                                       3503           3577         104          0.3        3340.7       1.1X
+Native ORC Vectorized                               2296           2415         168          0.5        2189.9       1.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Struct Column Scan with 300 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      10723          10890         236          0.1       10226.4       1.0X
-Native ORC Vectorized                               9966          10091         177          0.1        9503.9       1.1X
-Hive built-in ORC                                  12360          12482         172          0.1       11787.4       0.9X
+Hive built-in ORC                                  11058          11109          72          0.1       10545.5       1.0X
+Native ORC MR                                      11323          11354          44          0.1       10798.4       1.0X
+Native ORC Vectorized                              11246          11315          97          0.1       10725.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Struct Column Scan with 600 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      24875          25382         717          0.0       23722.6       1.0X
-Native ORC Vectorized                              22763          22830          95          0.0       21708.5       1.1X
-Hive built-in ORC                                  27783          28079         419          0.0       26496.0       0.9X
+Hive built-in ORC                                  25265          29571         441          0.0       24094.4       1.0X
+Native ORC MR                                      26980          27178         280          0.0       25730.4       0.9X
+Native ORC Vectorized                              26603          26976         527          0.0       25370.3       0.9X
 
 
 ================================================================================================
 Nested Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Nested Struct Scan with 10 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                            4175           4184          12          0.3        3982.0       1.0X
-Native ORC Vectorized                                    1476           1483           9          0.7        1407.9       2.8X
-Hive built-in ORC                                        4128           4150          31          0.3        3936.6       1.0X
+Hive built-in ORC                                        4354           4453         140          0.2        4152.1       1.0X
+Native ORC MR                                            3674           4025         497          0.3        3503.4       1.2X
+Native ORC Vectorized                                    1000           1014          21          1.0         953.4       4.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Nested Struct Scan with 30 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                            9819           9945         178          0.1        9364.0       1.0X
-Native ORC Vectorized                                    3771           3809          54          0.3        3596.0       2.6X
-Hive built-in ORC                                       11067          11090          32          0.1       10554.8       0.9X
+Hive built-in ORC                                       11727          11762          50          0.1       11183.8       1.0X
+Native ORC MR                                            8861           8862           1          0.1        8450.8       1.3X
+Native ORC Vectorized                                    2441           2497          79          0.4        2327.9       4.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Nested Struct Scan with 10 Elements, 30 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                           10779          10781           3          0.1       10279.7       1.0X
-Native ORC Vectorized                                    7162           7392         325          0.1        6830.7       1.5X
-Hive built-in ORC                                        8417           8553         192          0.1        8027.5       1.3X
+Hive built-in ORC                                        9604           9616          17          0.1        9159.4       1.0X
+Native ORC MR                                            9501           9535          47          0.1        9061.0       1.0X
+Native ORC Vectorized                                    4418           4582         232          0.2        4213.6       2.2X
 
 
diff --git a/sql/hive/benchmarks/OrcReadBenchmark-jdk17-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-jdk17-results.txt
index 836b563063fa7..b24cef4ef4953 100644
--- a/sql/hive/benchmarks/OrcReadBenchmark-jdk17-results.txt
+++ b/sql/hive/benchmarks/OrcReadBenchmark-jdk17-results.txt
@@ -2,221 +2,221 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       803            838          38         19.6          51.1       1.0X
-Native ORC Vectorized                               147            173          21        107.1           9.3       5.5X
-Hive built-in ORC                                  1098           1115          23         14.3          69.8       0.7X
+Hive built-in ORC                                   933            962          48         16.9          59.3       1.0X
+Native ORC MR                                       864            910          76         18.2          54.9       1.1X
+Native ORC Vectorized                               144            172          22        108.9           9.2       6.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       856            927          81         18.4          54.4       1.0X
-Native ORC Vectorized                               136            161          15        115.3           8.7       6.3X
-Hive built-in ORC                                  1188           1328         198         13.2          75.5       0.7X
+Hive built-in ORC                                  1203           1301         139         13.1          76.5       1.0X
+Native ORC MR                                       848            875          27         18.5          53.9       1.4X
+Native ORC Vectorized                               117            139          17        134.3           7.4      10.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       813            875         105         19.3          51.7       1.0X
-Native ORC Vectorized                               138            158          15        113.9           8.8       5.9X
-Hive built-in ORC                                  1158           1158           0         13.6          73.6       0.7X
+Hive built-in ORC                                  1252           1257           6         12.6          79.6       1.0X
+Native ORC MR                                       873            939          92         18.0          55.5       1.4X
+Native ORC Vectorized                               127            146          17        124.0           8.1       9.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       839            844           7         18.8          53.3       1.0X
-Native ORC Vectorized                               180            207          30         87.4          11.4       4.7X
-Hive built-in ORC                                  1358           1394          52         11.6          86.3       0.6X
+Hive built-in ORC                                  1286           1299          19         12.2          81.8       1.0X
+Native ORC MR                                       948            966          17         16.6          60.3       1.4X
+Native ORC Vectorized                               171            203          24         91.9          10.9       7.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       906            968          58         17.4          57.6       1.0X
-Native ORC Vectorized                               237            292          56         66.3          15.1       3.8X
-Hive built-in ORC                                  1395           1416          30         11.3          88.7       0.6X
+Hive built-in ORC                                  1234           1243          13         12.7          78.4       1.0X
+Native ORC MR                                      1019           1048          41         15.4          64.8       1.2X
+Native ORC Vectorized                               219            235          15         71.8          13.9       5.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1041           1060          27         15.1          66.2       1.0X
-Native ORC Vectorized                               265            320          44         59.4          16.8       3.9X
-Hive built-in ORC                                  1339           1374          49         11.7          85.2       0.8X
+Hive built-in ORC                                  1304           1309           6         12.1          82.9       1.0X
+Native ORC MR                                      1007           1022          22         15.6          64.0       1.3X
+Native ORC Vectorized                               253            274          16         62.2          16.1       5.2X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      2091           2136          63          5.0         199.5       1.0X
-Native ORC Vectorized                              1253           1260          10          8.4         119.5       1.7X
-Hive built-in ORC                                  2384           2391           9          4.4         227.4       0.9X
+Hive built-in ORC                                  2178           2250         102          4.8         207.7       1.0X
+Native ORC MR                                      1816           1821           7          5.8         173.2       1.2X
+Native ORC Vectorized                              1003           1025          31         10.5          95.6       2.2X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Data column - Native ORC MR                        1549           1631         116         10.2          98.5       1.0X
-Data column - Native ORC Vectorized                 295            346          45         53.3          18.8       5.3X
-Data column - Hive built-in ORC                    1851           1896          64          8.5         117.7       0.8X
-Partition column - Native ORC MR                    850            868          19         18.5          54.1       1.8X
-Partition column - Native ORC Vectorized             54             67           9        288.7           3.5      28.4X
-Partition column - Hive built-in ORC               1131           1174          60         13.9          71.9       1.4X
-Both columns - Native ORC MR                       1069           1077          10         14.7          68.0       1.4X
-Both columns - Native ORC Vectorized                208            226          18         75.6          13.2       7.4X
-Both columns - Hive built-in ORC                   1811           1812           1          8.7         115.2       0.9X
+Data column - Hive built-in ORC                    1442           1449           9         10.9          91.7       1.0X
+Data column - Native ORC MR                        1171           1186          20         13.4          74.5       1.2X
+Data column - Native ORC Vectorized                 179            197          20         87.8          11.4       8.1X
+Partition column - Hive built-in ORC               1022           1045          32         15.4          65.0       1.4X
+Partition column - Native ORC MR                    848            887          43         18.5          53.9       1.7X
+Partition column - Native ORC Vectorized             54             64           8        293.9           3.4      26.9X
+Both columns - Hive built-in ORC                   1513           1548          50         10.4          96.2       1.0X
+Both columns - Native ORC MR                       1189           1204          21         13.2          75.6       1.2X
+Both columns - Native ORC Vectorized                197            225          24         79.7          12.6       7.3X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       825            830           5         12.7          78.6       1.0X
-Native ORC Vectorized                               199            207          10         52.8          18.9       4.2X
-Hive built-in ORC                                  1206           1210           6          8.7         115.0       0.7X
+Hive built-in ORC                                  1259           1271          17          8.3         120.1       1.0X
+Native ORC MR                                       842            864          21         12.5          80.3       1.5X
+Native ORC Vectorized                               187            199          13         56.2          17.8       6.7X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1542           1572          42          6.8         147.1       1.0X
-Native ORC Vectorized                               523            582          66         20.1          49.8       3.0X
-Hive built-in ORC                                  2190           2190           0          4.8         208.9       0.7X
+Hive built-in ORC                                  2140           2155          21          4.9         204.1       1.0X
+Native ORC MR                                      1559           1563           6          6.7         148.7       1.4X
+Native ORC Vectorized                               512            535          34         20.5          48.9       4.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1490           1499          13          7.0         142.1       1.0X
-Native ORC Vectorized                               630            695          97         16.7          60.1       2.4X
-Hive built-in ORC                                  2112           2121          13          5.0         201.4       0.7X
+Hive built-in ORC                                  1880           1920          56          5.6         179.3       1.0X
+Native ORC MR                                      1467           1484          24          7.1         139.9       1.3X
+Native ORC Vectorized                               608            624          11         17.2          58.0       3.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       815            830          23         12.9          77.7       1.0X
-Native ORC Vectorized                               225            249          26         46.5          21.5       3.6X
-Hive built-in ORC                                  1247           1259          16          8.4         119.0       0.7X
+Hive built-in ORC                                  1195           1209          20          8.8         113.9       1.0X
+Native ORC MR                                       857            895          34         12.2          81.7       1.4X
+Native ORC Vectorized                               218            233          15         48.1          20.8       5.5X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       141            173          19          7.5         134.0       1.0X
-Native ORC Vectorized                                77             91           9         13.7          73.2       1.8X
-Hive built-in ORC                                   758            776          16          1.4         722.9       0.2X
+Hive built-in ORC                                   884            924          43          1.2         842.7       1.0X
+Native ORC MR                                       122            145          18          8.6         116.7       7.2X
+Native ORC Vectorized                                67             82          13         15.7          63.9      13.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       190            232          29          5.5         181.4       1.0X
-Native ORC Vectorized                               118            149          41          8.9         112.7       1.6X
-Hive built-in ORC                                  1537           1558          30          0.7        1465.7       0.1X
+Hive built-in ORC                                  1473           1520          67          0.7        1404.6       1.0X
+Native ORC MR                                       161            177          16          6.5         153.4       9.2X
+Native ORC Vectorized                               107            126          14          9.8         102.0      13.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       237            268          28          4.4         226.0       1.0X
-Native ORC Vectorized                               165            188          17          6.4         157.2       1.4X
-Hive built-in ORC                                  2103           2171          96          0.5        2005.3       0.1X
+Hive built-in ORC                                  1988           2050          87          0.5        1896.3       1.0X
+Native ORC MR                                       210            237          27          5.0         199.9       9.5X
+Native ORC Vectorized                               149            166          16          7.0         142.0      13.4X
 
 
 ================================================================================================
 Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Struct Column Scan with 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       278            294          12          3.8         265.5       1.0X
-Native ORC Vectorized                               213            246          41          4.9         202.9       1.3X
-Hive built-in ORC                                   536            586          40          2.0         511.0       0.5X
+Hive built-in ORC                                   477            498          14          2.2         454.9       1.0X
+Native ORC MR                                       323            329           5          3.2         307.7       1.5X
+Native ORC Vectorized                               169            206          49          6.2         161.6       2.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Struct Column Scan with 100 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       2235           2244          13          0.5        2131.8       1.0X
-Native ORC Vectorized                               3154           3159           7          0.3        3007.6       0.7X
-Hive built-in ORC                                   3740           4089         493          0.3        3567.0       0.6X
+Hive built-in ORC                                   3006           3007           1          0.3        2867.0       1.0X
+Native ORC MR                                       2469           2707         337          0.4        2354.2       1.2X
+Native ORC Vectorized                               1407           1422          22          0.7        1341.4       2.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Struct Column Scan with 300 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       7350           8577        1735          0.1        7009.2       1.0X
-Native ORC Vectorized                               7161           8481        1867          0.1        6829.0       1.0X
-Hive built-in ORC                                  10307          10909         851          0.1        9829.6       0.7X
+Hive built-in ORC                                   8820           8867          67          0.1        8411.4       1.0X
+Native ORC MR                                       7301           7422         171          0.1        6962.8       1.2X
+Native ORC Vectorized                               7286           7300          20          0.1        6948.6       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Struct Column Scan with 600 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      15931          18238         NaN          0.1       15192.6       1.0X
-Native ORC Vectorized                              15192          16500        1851          0.1       14487.9       1.0X
-Hive built-in ORC                                  29853          30027         247          0.0       28469.9       0.5X
+Hive built-in ORC                                  24634          27218         NaN          0.0       23492.4       1.0X
+Native ORC MR                                      19304          19441         195          0.1       18409.3       1.3X
+Native ORC Vectorized                              19081          19091          14          0.1       18197.3       1.3X
 
 
 ================================================================================================
 Nested Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Nested Struct Scan with 10 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                            3399           3463          90          0.3        3241.5       1.0X
-Native ORC Vectorized                                    1513           1630         166          0.7        1442.7       2.2X
-Hive built-in ORC                                        3953           3960          10          0.3        3770.0       0.9X
+Hive built-in ORC                                        4044           4112          96          0.3        3857.0       1.0X
+Native ORC MR                                            4086           4092           8          0.3        3897.0       1.0X
+Native ORC Vectorized                                     977           1007          43          1.1         931.5       4.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Nested Struct Scan with 30 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                            7667           7684          24          0.1        7311.9       1.0X
-Native ORC Vectorized                                    3865           3881          22          0.3        3685.8       2.0X
-Hive built-in ORC                                       11223          11246          32          0.1       10703.5       0.7X
+Hive built-in ORC                                       10733          10785          73          0.1       10236.0       1.0X
+Native ORC MR                                            7707           7707           0          0.1        7349.8       1.4X
+Native ORC Vectorized                                    2260           2318          82          0.5        2155.3       4.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Nested Struct Scan with 10 Elements, 30 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                            9506           9633         181          0.1        9065.4       1.0X
-Native ORC Vectorized                                    4170           4320         212          0.3        3976.4       2.3X
-Hive built-in ORC                                       12756          13821        1506          0.1       12164.7       0.7X
+Hive built-in ORC                                        7851           8136         403          0.1        7487.6       1.0X
+Native ORC MR                                            9074           9180         150          0.1        8653.9       0.9X
+Native ORC Vectorized                                    2485           2588         146          0.4        2369.7       3.2X
 
 
diff --git a/sql/hive/benchmarks/OrcReadBenchmark-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-results.txt
index a08c34968c87b..137bfcc148927 100644
--- a/sql/hive/benchmarks/OrcReadBenchmark-results.txt
+++ b/sql/hive/benchmarks/OrcReadBenchmark-results.txt
@@ -2,221 +2,221 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1016           1068          74         15.5          64.6       1.0X
-Native ORC Vectorized                               220            252          33         71.4          14.0       4.6X
-Hive built-in ORC                                  1274           1290          22         12.3          81.0       0.8X
+Hive built-in ORC                                  1138           1191          76         13.8          72.3       1.0X
+Native ORC MR                                       999           1115         164         15.7          63.5       1.1X
+Native ORC Vectorized                               155            183          23        101.7           9.8       7.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1117           1142          36         14.1          71.0       1.0X
-Native ORC Vectorized                               157            189          20        100.4          10.0       7.1X
-Hive built-in ORC                                  1369           1399          42         11.5          87.1       0.8X
+Hive built-in ORC                                  1034           1056          30         15.2          65.8       1.0X
+Native ORC MR                                       859            878          19         18.3          54.6       1.2X
+Native ORC Vectorized                               130            155          22        121.1           8.3       8.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1064           1189         177         14.8          67.6       1.0X
-Native ORC Vectorized                               179            204          25         87.9          11.4       5.9X
-Hive built-in ORC                                  1454           1468          20         10.8          92.4       0.7X
+Hive built-in ORC                                  1056           1081          35         14.9          67.1       1.0X
+Native ORC MR                                       946           1015          96         16.6          60.2       1.1X
+Native ORC Vectorized                               152            173          25        103.5           9.7       6.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1070           1196         177         14.7          68.1       1.0X
-Native ORC Vectorized                               216            232          14         72.8          13.7       5.0X
-Hive built-in ORC                                  1484           1533          69         10.6          94.4       0.7X
+Hive built-in ORC                                  1619           1776         222          9.7         103.0       1.0X
+Native ORC MR                                       913           1015         145         17.2          58.0       1.8X
+Native ORC Vectorized                               187            207          19         84.3          11.9       8.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1164           1181          24         13.5          74.0       1.0X
-Native ORC Vectorized                               264            290          24         59.6          16.8       4.4X
-Hive built-in ORC                                  1536           1572          51         10.2          97.7       0.8X
+Hive built-in ORC                                  1117           1138          30         14.1          71.0       1.0X
+Native ORC MR                                       909            921          20         17.3          57.8       1.2X
+Native ORC Vectorized                               202            224          36         78.0          12.8       5.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1127           1174          67         14.0          71.7       1.0X
-Native ORC Vectorized                               285            302          17         55.2          18.1       4.0X
-Hive built-in ORC                                  1571           1582          16         10.0          99.9       0.7X
+Hive built-in ORC                                  1123           1124           2         14.0          71.4       1.0X
+Native ORC MR                                       933            951          22         16.9          59.3       1.2X
+Native ORC Vectorized                               231            247          34         68.1          14.7       4.9X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      2329           2413         119          4.5         222.1       1.0X
-Native ORC Vectorized                              1274           1282          12          8.2         121.5       1.8X
-Hive built-in ORC                                  2622           2692          99          4.0         250.0       0.9X
+Hive built-in ORC                                  2149           2163          21          4.9         204.9       1.0X
+Native ORC MR                                      1844           1863          27          5.7         175.9       1.2X
+Native ORC Vectorized                              1059           1071          18          9.9         101.0       2.0X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Data column - Native ORC MR                        1304           1309           8         12.1          82.9       1.0X
-Data column - Native ORC Vectorized                 221            259          25         71.1          14.1       5.9X
-Data column - Hive built-in ORC                    1586           1606          28          9.9         100.8       0.8X
-Partition column - Native ORC MR                    868            889          29         18.1          55.2       1.5X
-Partition column - Native ORC Vectorized             71             85          18        222.3           4.5      18.4X
-Partition column - Hive built-in ORC               1210           1241          43         13.0          77.0       1.1X
-Both columns - Native ORC MR                       1397           1435          54         11.3          88.8       0.9X
-Both columns - Native ORC Vectorized                236            257          22         66.5          15.0       5.5X
-Both columns - Hive built-in ORC                   1723           1726           4          9.1         109.6       0.8X
+Data column - Hive built-in ORC                    1218           1220           3         12.9          77.4       1.0X
+Data column - Native ORC MR                        1110           1113           4         14.2          70.6       1.1X
+Data column - Native ORC Vectorized                 185            205          19         85.1          11.7       6.6X
+Partition column - Hive built-in ORC                884            897          18         17.8          56.2       1.4X
+Partition column - Native ORC MR                    701            745          71         22.4          44.6       1.7X
+Partition column - Native ORC Vectorized             56             65           6        281.7           3.5      21.8X
+Both columns - Hive built-in ORC                   1206           1225          26         13.0          76.7       1.0X
+Both columns - Native ORC MR                       1103           1164          86         14.3          70.1       1.1X
+Both columns - Native ORC Vectorized                201            240          47         78.4          12.8       6.1X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1074           1089          21          9.8         102.4       1.0X
-Native ORC Vectorized                               221            254          33         47.5          21.0       4.9X
-Hive built-in ORC                                  1435           1437           2          7.3         136.9       0.7X
+Hive built-in ORC                                  1124           1136          17          9.3         107.2       1.0X
+Native ORC MR                                       854            867          17         12.3          81.5       1.3X
+Native ORC Vectorized                               173            179           6         60.5          16.5       6.5X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1948           1964          21          5.4         185.8       1.0X
-Native ORC Vectorized                               666            687          31         15.7          63.5       2.9X
-Hive built-in ORC                                  2454           2489          50          4.3         234.0       0.8X
+Hive built-in ORC                                  1985           1985           0          5.3         189.3       1.0X
+Native ORC MR                                      1557           1561           5          6.7         148.5       1.3X
+Native ORC Vectorized                               470            486          22         22.3          44.8       4.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      1744           1756          16          6.0         166.4       1.0X
-Native ORC Vectorized                               707            736          38         14.8          67.4       2.5X
-Hive built-in ORC                                  2225           2259          48          4.7         212.2       0.8X
+Hive built-in ORC                                  1857           1891          49          5.6         177.1       1.0X
+Native ORC MR                                      1508           1518          14          7.0         143.8       1.2X
+Native ORC Vectorized                               646            660          11         16.2          61.6       2.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       996           1101         149         10.5          95.0       1.0X
-Native ORC Vectorized                               282            311          18         37.1          26.9       3.5X
-Hive built-in ORC                                  1405           1420          20          7.5         134.0       0.7X
+Hive built-in ORC                                  1066           1084          25          9.8         101.7       1.0X
+Native ORC MR                                       834            851          14         12.6          79.6       1.3X
+Native ORC Vectorized                               242            269          36         43.3          23.1       4.4X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       153            180          17          6.8         146.2       1.0X
-Native ORC Vectorized                                85             99          18         12.3          81.4       1.8X
-Hive built-in ORC                                   912            971          97          1.2         869.4       0.2X
+Hive built-in ORC                                   912           1006         133          1.2         869.3       1.0X
+Native ORC MR                                       125            144          19          8.4         119.4       7.3X
+Native ORC Vectorized                                74             83          14         14.2          70.3      12.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       254            272          15          4.1         242.5       1.0X
-Native ORC Vectorized                               122            138          15          8.6         116.6       2.1X
-Hive built-in ORC                                  1772           1819          67          0.6        1689.5       0.1X
+Hive built-in ORC                                  1502           1531          40          0.7        1432.7       1.0X
+Native ORC MR                                       160            174          17          6.6         152.3       9.4X
+Native ORC Vectorized                               110            125          20          9.5         105.3      13.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       233            271          31          4.5         222.5       1.0X
-Native ORC Vectorized                               162            184          25          6.5         154.8       1.4X
-Hive built-in ORC                                  2591           2602          16          0.4        2470.6       0.1X
+Hive built-in ORC                                  2184           2191           9          0.5        2082.9       1.0X
+Native ORC MR                                       215            233          19          4.9         204.6      10.2X
+Native ORC Vectorized                               160            172          18          6.5         152.7      13.6X
 
 
 ================================================================================================
 Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Struct Column Scan with 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       369            415          54          2.8         351.7       1.0X
-Native ORC Vectorized                               201            214           9          5.2         191.3       1.8X
-Hive built-in ORC                                   712            719           6          1.5         679.0       0.5X
+Hive built-in ORC                                   513            558          70          2.0         489.3       1.0X
+Native ORC MR                                       316            327          11          3.3         301.6       1.6X
+Native ORC Vectorized                               171            189          28          6.1         163.3       3.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Struct Column Scan with 100 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       2764           2834          99          0.4        2636.2       1.0X
-Native ORC Vectorized                               1651           1669          26          0.6        1574.2       1.7X
-Hive built-in ORC                                   3957           3998          58          0.3        3774.0       0.7X
+Hive built-in ORC                                   3081           3260         254          0.3        2938.2       1.0X
+Native ORC MR                                       2552           2627         105          0.4        2434.1       1.2X
+Native ORC Vectorized                               1473           1610         193          0.7        1404.8       2.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Struct Column Scan with 300 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                       9368          11693         NaN          0.1        8934.4       1.0X
-Native ORC Vectorized                               9324           9737         585          0.1        8891.6       1.0X
-Hive built-in ORC                                  13303          13665         512          0.1       12687.2       0.7X
+Hive built-in ORC                                   9531          10232         991          0.1        9089.8       1.0X
+Native ORC MR                                       9412           9496         119          0.1        8975.6       1.0X
+Native ORC Vectorized                               9434           9483          69          0.1        8997.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Struct Column Scan with 600 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                      32403          35146         NaN          0.0       30902.3       1.0X
-Native ORC Vectorized                              38268          39336        1511          0.0       36495.2       0.8X
-Hive built-in ORC                                  47590          48669        1525          0.0       45385.7       0.7X
+Hive built-in ORC                                  34314          35490        1663          0.0       32724.4       1.0X
+Native ORC MR                                      36051          36191         197          0.0       34381.3       1.0X
+Native ORC Vectorized                              36014          37273        1780          0.0       34346.1       1.0X
 
 
 ================================================================================================
 Nested Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Nested Struct Scan with 10 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                            5127           5720         838          0.2        4889.8       1.0X
-Native ORC Vectorized                                    1064           1067           4          1.0        1014.8       4.8X
-Hive built-in ORC                                        4622           4647          36          0.2        4407.6       1.1X
+Hive built-in ORC                                        3492           3768         390          0.3        3330.1       1.0X
+Native ORC MR                                            3918           3932          20          0.3        3736.1       0.9X
+Native ORC Vectorized                                     893            911          17          1.2         851.7       3.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Nested Struct Scan with 30 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                           11342          11343           2          0.1       10816.3       1.0X
-Native ORC Vectorized                                    2889           2891           4          0.4        2755.1       3.9X
-Hive built-in ORC                                       12754          12890         192          0.1       12163.6       0.9X
+Hive built-in ORC                                        9499          10127         888          0.1        9058.7       1.0X
+Native ORC MR                                            9227           9234           9          0.1        8799.9       1.0X
+Native ORC Vectorized                                    2326           2389          89          0.5        2218.2       4.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Nested Struct Scan with 10 Elements, 30 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Native ORC MR                                           12483          12602         167          0.1       11905.1       1.0X
-Native ORC Vectorized                                    3522           3615         132          0.3        3358.5       3.5X
-Hive built-in ORC                                        9775           9784          12          0.1        9322.4       1.3X
+Hive built-in ORC                                        8315           8552         335          0.1        7929.5       1.0X
+Native ORC MR                                           11559          12147         832          0.1       11023.1       0.7X
+Native ORC Vectorized                                    2808           2965         222          0.4        2678.2       3.0X
 
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
index 990b34cda33a3..61a9360684166 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
@@ -90,6 +90,10 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
 
         prepareTable(dir, spark.sql(s"SELECT CAST(value as ${dataType.sql}) id FROM t1"))
 
+        benchmark.addCase("Hive built-in ORC") { _ =>
+          spark.sql("SELECT sum(id) FROM hiveOrcTable").noop()
+        }
+
         benchmark.addCase("Native ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
             spark.sql("SELECT sum(id) FROM nativeOrcTable").noop()
@@ -100,10 +104,6 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
           spark.sql("SELECT sum(id) FROM nativeOrcTable").noop()
         }
 
-        benchmark.addCase("Hive built-in ORC") { _ =>
-          spark.sql("SELECT sum(id) FROM hiveOrcTable").noop()
-        }
-
         benchmark.run()
       }
     }
@@ -121,6 +121,10 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
           dir,
           spark.sql("SELECT CAST(value AS INT) AS c1, CAST(value as STRING) AS c2 FROM t1"))
 
+        benchmark.addCase("Hive built-in ORC") { _ =>
+          spark.sql("SELECT sum(c1), sum(length(c2)) FROM hiveOrcTable").noop()
+        }
+
         benchmark.addCase("Native ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
             spark.sql("SELECT sum(c1), sum(length(c2)) FROM nativeOrcTable").noop()
@@ -131,10 +135,6 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
           spark.sql("SELECT sum(c1), sum(length(c2)) FROM nativeOrcTable").noop()
         }
 
-        benchmark.addCase("Hive built-in ORC") { _ =>
-          spark.sql("SELECT sum(c1), sum(length(c2)) FROM hiveOrcTable").noop()
-        }
-
         benchmark.run()
       }
     }
@@ -150,6 +150,10 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
 
         prepareTable(dir, spark.sql("SELECT value % 2 AS p, value AS id FROM t1"), Some("p"))
 
+        benchmark.addCase("Data column - Hive built-in ORC") { _ =>
+          spark.sql("SELECT sum(id) FROM hiveOrcTable").noop()
+        }
+
         benchmark.addCase("Data column - Native ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
             spark.sql("SELECT sum(id) FROM nativeOrcTable").noop()
@@ -160,8 +164,8 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
           spark.sql("SELECT sum(id) FROM nativeOrcTable").noop()
         }
 
-        benchmark.addCase("Data column - Hive built-in ORC") { _ =>
-          spark.sql("SELECT sum(id) FROM hiveOrcTable").noop()
+        benchmark.addCase("Partition column - Hive built-in ORC") { _ =>
+          spark.sql("SELECT sum(p) FROM hiveOrcTable").noop()
         }
 
         benchmark.addCase("Partition column - Native ORC MR") { _ =>
@@ -174,8 +178,8 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
           spark.sql("SELECT sum(p) FROM nativeOrcTable").noop()
         }
 
-        benchmark.addCase("Partition column - Hive built-in ORC") { _ =>
-          spark.sql("SELECT sum(p) FROM hiveOrcTable").noop()
+        benchmark.addCase("Both columns - Hive built-in ORC") { _ =>
+          spark.sql("SELECT sum(p), sum(id) FROM hiveOrcTable").noop()
         }
 
         benchmark.addCase("Both columns - Native ORC MR") { _ =>
@@ -188,10 +192,6 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
           spark.sql("SELECT sum(p), sum(id) FROM nativeOrcTable").noop()
         }
 
-        benchmark.addCase("Both columns - Hive built-in ORC") { _ =>
-          spark.sql("SELECT sum(p), sum(id) FROM hiveOrcTable").noop()
-        }
-
         benchmark.run()
       }
     }
@@ -206,6 +206,10 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
 
         prepareTable(dir, spark.sql("SELECT CAST((id % 200) + 10000 as STRING) AS c1 FROM t1"))
 
+        benchmark.addCase("Hive built-in ORC") { _ =>
+          spark.sql("SELECT sum(length(c1)) FROM hiveOrcTable").noop()
+        }
+
         benchmark.addCase("Native ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
             spark.sql("SELECT sum(length(c1)) FROM nativeOrcTable").noop()
@@ -216,10 +220,6 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
           spark.sql("SELECT sum(length(c1)) FROM nativeOrcTable").noop()
         }
 
-        benchmark.addCase("Hive built-in ORC") { _ =>
-          spark.sql("SELECT sum(length(c1)) FROM hiveOrcTable").noop()
-        }
-
         benchmark.run()
       }
     }
@@ -240,6 +240,11 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
         val benchmark =
           new Benchmark(s"String with Nulls Scan ($percentageOfNulls%)", values, output = output)
 
+        benchmark.addCase("Hive built-in ORC") { _ =>
+          spark.sql("SELECT SUM(LENGTH(c2)) FROM hiveOrcTable " +
+            "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").noop()
+        }
+
         benchmark.addCase("Native ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
             spark.sql("SELECT SUM(LENGTH(c2)) FROM nativeOrcTable " +
@@ -252,11 +257,6 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
             "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").noop()
         }
 
-        benchmark.addCase("Hive built-in ORC") { _ =>
-          spark.sql("SELECT SUM(LENGTH(c2)) FROM hiveOrcTable " +
-            "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").noop()
-        }
-
         benchmark.run()
       }
     }
@@ -275,6 +275,10 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
 
         prepareTable(dir, spark.sql("SELECT * FROM t1"))
 
+        benchmark.addCase("Hive built-in ORC") { _ =>
+          spark.sql(s"SELECT sum(c$middle) FROM hiveOrcTable").noop()
+        }
+
         benchmark.addCase("Native ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
             spark.sql(s"SELECT sum(c$middle) FROM nativeOrcTable").noop()
@@ -285,10 +289,6 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
           spark.sql(s"SELECT sum(c$middle) FROM nativeOrcTable").noop()
         }
 
-        benchmark.addCase("Hive built-in ORC") { _ =>
-          spark.sql(s"SELECT sum(c$middle) FROM hiveOrcTable").noop()
-        }
-
         benchmark.run()
       }
     }
@@ -307,6 +307,10 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
 
         prepareTable(dir, spark.sql("SELECT * FROM t1"))
 
+        benchmark.addCase("Hive built-in ORC") { _ =>
+          spark.sql(s"SELECT * FROM hiveOrcTable").noop()
+        }
+
         benchmark.addCase("Native ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
             spark.sql(s"SELECT * FROM nativeOrcTable").noop()
@@ -319,10 +323,6 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
           }
         }
 
-        benchmark.addCase("Hive built-in ORC") { _ =>
-          spark.sql(s"SELECT * FROM hiveOrcTable").noop()
-        }
-
         benchmark.run()
       }
     }
@@ -346,6 +346,10 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
 
         prepareTable(dir, spark.sql("SELECT * FROM t1"))
 
+        benchmark.addCase("Hive built-in ORC") { _ =>
+          spark.sql(s"SELECT * FROM hiveOrcTable").noop()
+        }
+
         benchmark.addCase("Native ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
             spark.sql(s"SELECT * FROM nativeOrcTable").noop()
@@ -358,10 +362,6 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
           }
         }
 
-        benchmark.addCase("Hive built-in ORC") { _ =>
-          spark.sql(s"SELECT * FROM hiveOrcTable").noop()
-        }
-
         benchmark.run()
       }
     }

From 0841579d704ebb340609a78ee4bd53099646c704 Mon Sep 17 00:00:00 2001
From: David Christle <dchristle@squareup.com>
Date: Sun, 16 Jan 2022 15:58:20 -0600
Subject: [PATCH 033/513] [SPARK-37901] Upgrade Netty from 4.1.72 to 4.1.73

### What changes were proposed in this pull request?
Upgrade Netty dependency from 4.1.72 to 4.1.73.

### Why are the changes needed?
Netty has a new release that upgrades log4j to 2.17.1. Although I didn't find obvious dependence on log4j via netty in my search of Spark's codebase, it would be good to pick up this specific version anyway. The Netty version Spark currently depends on is 4.1.72, which depends on log4j 2.15. Several CVE's have been fixed in log4j between 2.15 and 2.17.1.

Besides this dependency update, several minor bugfixes have been made in this release, as described [here](https://netty.io/news/2022/01/12/4-1-73-Final.html).

Associated JIRA is here: https://issues.apache.org/jira/browse/SPARK-37901

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Passes local tests and CI.

Closes #35196 from dchristle/dchristle/netty_log4j_2.17.

Authored-by: David Christle <dchristle@squareup.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 dev/deps/spark-deps-hadoop-2-hive-2.3 | 28 +++++++++++++--------------
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 28 +++++++++++++--------------
 pom.xml                               |  2 +-
 3 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index c35751c50622b..0227c7653a93b 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -200,21 +200,21 @@ metrics-jmx/4.2.2//metrics-jmx-4.2.2.jar
 metrics-json/4.2.2//metrics-json-4.2.2.jar
 metrics-jvm/4.2.2//metrics-jvm-4.2.2.jar
 minlog/1.3.0//minlog-1.3.0.jar
-netty-all/4.1.72.Final//netty-all-4.1.72.Final.jar
-netty-buffer/4.1.72.Final//netty-buffer-4.1.72.Final.jar
-netty-codec/4.1.72.Final//netty-codec-4.1.72.Final.jar
-netty-common/4.1.72.Final//netty-common-4.1.72.Final.jar
-netty-handler/4.1.72.Final//netty-handler-4.1.72.Final.jar
-netty-resolver/4.1.72.Final//netty-resolver-4.1.72.Final.jar
+netty-all/4.1.73.Final//netty-all-4.1.73.Final.jar
+netty-buffer/4.1.73.Final//netty-buffer-4.1.73.Final.jar
+netty-codec/4.1.73.Final//netty-codec-4.1.73.Final.jar
+netty-common/4.1.73.Final//netty-common-4.1.73.Final.jar
+netty-handler/4.1.73.Final//netty-handler-4.1.73.Final.jar
+netty-resolver/4.1.73.Final//netty-resolver-4.1.73.Final.jar
 netty-tcnative-classes/2.0.46.Final//netty-tcnative-classes-2.0.46.Final.jar
-netty-transport-classes-epoll/4.1.72.Final//netty-transport-classes-epoll-4.1.72.Final.jar
-netty-transport-classes-kqueue/4.1.72.Final//netty-transport-classes-kqueue-4.1.72.Final.jar
-netty-transport-native-epoll/4.1.72.Final/linux-aarch_64/netty-transport-native-epoll-4.1.72.Final-linux-aarch_64.jar
-netty-transport-native-epoll/4.1.72.Final/linux-x86_64/netty-transport-native-epoll-4.1.72.Final-linux-x86_64.jar
-netty-transport-native-kqueue/4.1.72.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.72.Final-osx-aarch_64.jar
-netty-transport-native-kqueue/4.1.72.Final/osx-x86_64/netty-transport-native-kqueue-4.1.72.Final-osx-x86_64.jar
-netty-transport-native-unix-common/4.1.72.Final//netty-transport-native-unix-common-4.1.72.Final.jar
-netty-transport/4.1.72.Final//netty-transport-4.1.72.Final.jar
+netty-transport-classes-epoll/4.1.73.Final//netty-transport-classes-epoll-4.1.73.Final.jar
+netty-transport-classes-kqueue/4.1.73.Final//netty-transport-classes-kqueue-4.1.73.Final.jar
+netty-transport-native-epoll/4.1.73.Final/linux-aarch_64/netty-transport-native-epoll-4.1.73.Final-linux-aarch_64.jar
+netty-transport-native-epoll/4.1.73.Final/linux-x86_64/netty-transport-native-epoll-4.1.73.Final-linux-x86_64.jar
+netty-transport-native-kqueue/4.1.73.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.73.Final-osx-aarch_64.jar
+netty-transport-native-kqueue/4.1.73.Final/osx-x86_64/netty-transport-native-kqueue-4.1.73.Final-osx-x86_64.jar
+netty-transport-native-unix-common/4.1.73.Final//netty-transport-native-unix-common-4.1.73.Final.jar
+netty-transport/4.1.73.Final//netty-transport-4.1.73.Final.jar
 objenesis/3.2//objenesis-3.2.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 51aaba30cf4e5..afa4ba5e1f28b 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -187,21 +187,21 @@ metrics-jmx/4.2.2//metrics-jmx-4.2.2.jar
 metrics-json/4.2.2//metrics-json-4.2.2.jar
 metrics-jvm/4.2.2//metrics-jvm-4.2.2.jar
 minlog/1.3.0//minlog-1.3.0.jar
-netty-all/4.1.72.Final//netty-all-4.1.72.Final.jar
-netty-buffer/4.1.72.Final//netty-buffer-4.1.72.Final.jar
-netty-codec/4.1.72.Final//netty-codec-4.1.72.Final.jar
-netty-common/4.1.72.Final//netty-common-4.1.72.Final.jar
-netty-handler/4.1.72.Final//netty-handler-4.1.72.Final.jar
-netty-resolver/4.1.72.Final//netty-resolver-4.1.72.Final.jar
+netty-all/4.1.73.Final//netty-all-4.1.73.Final.jar
+netty-buffer/4.1.73.Final//netty-buffer-4.1.73.Final.jar
+netty-codec/4.1.73.Final//netty-codec-4.1.73.Final.jar
+netty-common/4.1.73.Final//netty-common-4.1.73.Final.jar
+netty-handler/4.1.73.Final//netty-handler-4.1.73.Final.jar
+netty-resolver/4.1.73.Final//netty-resolver-4.1.73.Final.jar
 netty-tcnative-classes/2.0.46.Final//netty-tcnative-classes-2.0.46.Final.jar
-netty-transport-classes-epoll/4.1.72.Final//netty-transport-classes-epoll-4.1.72.Final.jar
-netty-transport-classes-kqueue/4.1.72.Final//netty-transport-classes-kqueue-4.1.72.Final.jar
-netty-transport-native-epoll/4.1.72.Final/linux-aarch_64/netty-transport-native-epoll-4.1.72.Final-linux-aarch_64.jar
-netty-transport-native-epoll/4.1.72.Final/linux-x86_64/netty-transport-native-epoll-4.1.72.Final-linux-x86_64.jar
-netty-transport-native-kqueue/4.1.72.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.72.Final-osx-aarch_64.jar
-netty-transport-native-kqueue/4.1.72.Final/osx-x86_64/netty-transport-native-kqueue-4.1.72.Final-osx-x86_64.jar
-netty-transport-native-unix-common/4.1.72.Final//netty-transport-native-unix-common-4.1.72.Final.jar
-netty-transport/4.1.72.Final//netty-transport-4.1.72.Final.jar
+netty-transport-classes-epoll/4.1.73.Final//netty-transport-classes-epoll-4.1.73.Final.jar
+netty-transport-classes-kqueue/4.1.73.Final//netty-transport-classes-kqueue-4.1.73.Final.jar
+netty-transport-native-epoll/4.1.73.Final/linux-aarch_64/netty-transport-native-epoll-4.1.73.Final-linux-aarch_64.jar
+netty-transport-native-epoll/4.1.73.Final/linux-x86_64/netty-transport-native-epoll-4.1.73.Final-linux-x86_64.jar
+netty-transport-native-kqueue/4.1.73.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.73.Final-osx-aarch_64.jar
+netty-transport-native-kqueue/4.1.73.Final/osx-x86_64/netty-transport-native-kqueue-4.1.73.Final-osx-x86_64.jar
+netty-transport-native-unix-common/4.1.73.Final//netty-transport-native-unix-common-4.1.73.Final.jar
+netty-transport/4.1.73.Final//netty-transport-4.1.73.Final.jar
 objenesis/3.2//objenesis-3.2.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
diff --git a/pom.xml b/pom.xml
index 07a8861c6132f..50871131c1da2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -811,7 +811,7 @@
       <dependency>
         <groupId>io.netty</groupId>
         <artifactId>netty-all</artifactId>
-        <version>4.1.72.Final</version>
+        <version>4.1.73.Final</version>
         <exclusions>
           <exclusion>
             <groupId>io.netty</groupId>

From 72940b30acb9afc6bd8b12518aa3eb03ff0b84bb Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 17 Jan 2022 11:51:01 +0900
Subject: [PATCH 034/513] [SPARK-37924][SQL] Sort table properties by key in
 SHOW CREATE TABLE on VIEW (v1)

### What changes were proposed in this pull request?

This PR is a sort of a followup of https://github.com/apache/spark/pull/34719. It added a test but it is flaky due to the order of `TABLPROPERTIES` in `SHOW CREATE TALBE` on `VIEW` in v1 code path.

This PR proposes to have a deterministic order by sorting the table properties in the show command. This is already being sorted in v2 (see `ShowCreateTableExec`).

### Why are the changes needed?

To have the deterministic order, and fix the flaky test.

### Does this PR introduce _any_ user-facing change?

Virtually no. It might affect the order of TABLEPROPERTIES in `SHOW TABLE`'s output on `VIEW` when users are rely on

### How was this patch tested?

Fixed the flaky unittest to explicitly test the order.

Closes #35222 from HyukjinKwon/SPARK-37924.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/sql/execution/command/tables.scala   | 2 +-
 .../scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index b989224d4e0f6..4234e373bd013 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -1043,7 +1043,7 @@ trait ShowCreateTableCommandBase {
   private def showViewProperties(metadata: CatalogTable, builder: StringBuilder): Unit = {
     val viewProps = metadata.properties.filterKeys(!_.startsWith(CatalogTable.VIEW_PREFIX))
     if (viewProps.nonEmpty) {
-      val props = viewProps.map { case (key, value) =>
+      val props = viewProps.toSeq.sortBy(_._1).map { case (key, value) =>
         s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
       }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
index 433264951acef..4ffe38abf3678 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -634,7 +634,7 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
     Seq(true, false).foreach { serde =>
       withView(viewName) {
         createView(viewName, "SELECT 1 AS c1, '2' AS c2", Seq("c1 COMMENT 'bla'", "c2"),
-          Seq("COMMENT 'table comment'", "TBLPROPERTIES ( 'prop1' = 'value1', 'prop2' = 'value2')"))
+          Seq("COMMENT 'table comment'", "TBLPROPERTIES ( 'prop2' = 'value2', 'prop1' = 'value1')"))
 
         val expected = "CREATE VIEW `default`.`v1` ( `c1` COMMENT 'bla', `c2`)" +
           " COMMENT 'table comment'" +

From 17aaf8ed29e24516ca460d92b481a0b2f0545ddf Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Mon, 17 Jan 2022 12:14:30 +0900
Subject: [PATCH 035/513] [SPARK-36885][PYTHON][FOLLOWUP] Fix drop subset
 inline type hint

### What changes were proposed in this pull request?
Fix drop subset inline type hint

### Why are the changes needed?
it should be same with `DataFrame.dropna`:
https://github.com/apache/spark/blob/90003398745bfee78416074ed786e986fcb2c8cd/python/pyspark/sql/dataframe.py#L2359

See also: https://github.com/apache/spark/pull/35191#discussion_r784446470

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
UT

Closes #35201 from Yikun/SPARK-36885-FOLLOWUP.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/dataframe.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 9e75006723eba..ee68865c98e39 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -3310,7 +3310,10 @@ def __init__(self, df: DataFrame):
         self.df = df
 
     def drop(
-        self, how: str = "any", thresh: Optional[int] = None, subset: Optional[List[str]] = None
+        self,
+        how: str = "any",
+        thresh: Optional[int] = None,
+        subset: Optional[Union[str, Tuple[str, ...], List[str]]] = None,
     ) -> DataFrame:
         return self.df.dropna(how=how, thresh=thresh, subset=subset)
 

From b50d4507f52315d5f6d75c617e845248a1c828a9 Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Mon, 17 Jan 2022 16:23:27 +0800
Subject: [PATCH 036/513] [SPARK-37904][SQL] Improve RebalancePartitions in
 rules of Optimizer

### What changes were proposed in this pull request?

Improve `RebalancePartitions` in following rules:
- `NestedColumnAliasing`
- `CollapseRepartition `
- `EliminateSorts`
- `FoldablePropagation`
- `PropagateEmptyRelationBase`

### Why are the changes needed?

After SPARK-37267, we support do optimize rebalance partitions in everywhere of plan rather than limit to the root node. So It should make sense to also let `RebalancePartitions` work in all rules of Optimizer like `Repartition` and `RepartitionByExpression` did.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

Add test in:
- `NestedColumnAliasingSuite`
- `CollapseRepartitionSuite`
- `EliminateSortsBeforeRepartitionSuite`
- `FoldablePropagationSuite`
- `PropagateEmptyRelationSuite`

Closes #35208 from ulysses-you/rebalance.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/dsl/package.scala      |  3 +++
 .../optimizer/NestedColumnAliasing.scala      |  1 +
 .../sql/catalyst/optimizer/Optimizer.scala    | 23 +++++++++++++------
 .../optimizer/PropagateEmptyRelation.scala    |  3 ++-
 .../sql/catalyst/optimizer/expressions.scala  |  1 +
 .../plans/logical/basicLogicalOperators.scala |  1 +
 .../sql/catalyst/trees/TreePatterns.scala     |  1 +
 .../optimizer/CollapseRepartitionSuite.scala  | 14 +++++++++++
 ...EliminateSortsBeforeRepartitionSuite.scala | 16 +++++++++++++
 .../optimizer/FoldablePropagationSuite.scala  |  8 +++++++
 .../optimizer/NestedColumnAliasingSuite.scala | 22 ++++++++++++++++++
 .../PropagateEmptyRelationSuite.scala         | 15 ++++++++++++
 12 files changed, 100 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 1c95ec8d1a573..dda0d193e7483 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -490,6 +490,9 @@ package object dsl {
       def distribute(exprs: Expression*)(n: Int): LogicalPlan =
         RepartitionByExpression(exprs, logicalPlan, numPartitions = n)
 
+      def rebalance(exprs: Expression*): LogicalPlan =
+        RebalancePartitions(exprs, logicalPlan)
+
       def analyze: LogicalPlan = {
         val analyzed = analysis.SimpleAnalyzer.execute(logicalPlan)
         analysis.SimpleAnalyzer.checkAnalysis(analyzed)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
index 9d63f4e94647c..c8c67f5000942 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
@@ -210,6 +210,7 @@ object NestedColumnAliasing {
     case _: Repartition => true
     case _: Sample => true
     case _: RepartitionByExpression => true
+    case _: RebalancePartitions => true
     case _: Join => true
     case _: Window => true
     case _: Sort => true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 3d41953ebfb58..1c2f0afb9d41c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -1049,11 +1049,11 @@ object CollapseProject extends Rule[LogicalPlan] with AliasHelper {
 }
 
 /**
- * Combines adjacent [[RepartitionOperation]] operators
+ * Combines adjacent [[RepartitionOperation]] and [[RebalancePartitions]] operators
  */
 object CollapseRepartition extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithPruning(
-    _.containsPattern(REPARTITION_OPERATION), ruleId) {
+    _.containsAnyPattern(REPARTITION_OPERATION, REBALANCE_PARTITIONS), ruleId) {
     // Case 1: When a Repartition has a child of Repartition or RepartitionByExpression,
     // 1) When the top node does not enable the shuffle (i.e., coalesce API), but the child
     //   enables the shuffle. Returns the child node if the last numPartitions is bigger;
@@ -1067,6 +1067,14 @@ object CollapseRepartition extends Rule[LogicalPlan] {
     // RepartitionByExpression we can remove the child.
     case r @ RepartitionByExpression(_, child @ (Sort(_, true, _) | _: RepartitionOperation), _) =>
       r.withNewChildren(child.children)
+    // Case 3: When a RebalancePartitions has a child of local or global Sort, Repartition or
+    // RepartitionByExpression we can remove the child.
+    case r @ RebalancePartitions(_, child @ (_: Sort | _: RepartitionOperation)) =>
+      r.withNewChildren(child.children)
+    // Case 4: When a RebalancePartitions has a child of RebalancePartitions we can remove the
+    // child.
+    case r @ RebalancePartitions(_, child: RebalancePartitions) =>
+      r.withNewChildren(child.children)
   }
 }
 
@@ -1363,13 +1371,13 @@ object CombineFilters extends Rule[LogicalPlan] with PredicateHelper {
  * 2) if the sort order is empty or the sort order does not have any reference
  * 3) if the Sort operator is a local sort and the child is already sorted
  * 4) if there is another Sort operator separated by 0...n Project, Filter, Repartition or
- *    RepartitionByExpression (with deterministic expressions) operators
+ *    RepartitionByExpression, RebalancePartitions (with deterministic expressions) operators
  * 5) if the Sort operator is within Join separated by 0...n Project, Filter, Repartition or
- *    RepartitionByExpression (with deterministic expressions) operators only and the Join condition
- *    is deterministic
+ *    RepartitionByExpression, RebalancePartitions (with deterministic expressions) operators only
+ *    and the Join condition is deterministic
  * 6) if the Sort operator is within GroupBy separated by 0...n Project, Filter, Repartition or
- *    RepartitionByExpression (with deterministic expressions) operators only and the aggregate
- *    function is order irrelevant
+ *    RepartitionByExpression, RebalancePartitions (with deterministic expressions) operators only
+ *    and the aggregate function is order irrelevant
  */
 object EliminateSorts extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
@@ -1409,6 +1417,7 @@ object EliminateSorts extends Rule[LogicalPlan] {
     case p: Project => p.projectList.forall(_.deterministic)
     case f: Filter => f.condition.deterministic
     case r: RepartitionByExpression => r.partitionExpressions.forall(_.deterministic)
+    case r: RebalancePartitions => r.partitionExpressions.forall(_.deterministic)
     case _: Repartition => true
     case _ => false
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
index d02f12d67e19f..2c964fa6da3db 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
@@ -39,7 +39,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.{LOCAL_RELATION, TRUE_OR_
  *       [[LocalRelation]].
  *  3. Unary-node Logical Plans
  *     - Project/Filter/Sample with all empty children.
- *     - Limit/Repartition with all empty children.
+ *     - Limit/Repartition/RepartitionByExpression/Rebalance with all empty children.
  *     - Aggregate with all empty children and at least one grouping expression.
  *     - Generate(Explode) with all empty children. Others like Hive UDTF may return results.
  */
@@ -138,6 +138,7 @@ abstract class PropagateEmptyRelationBase extends Rule[LogicalPlan] with CastSup
       case _: LocalLimit if !p.isStreaming => empty(p)
       case _: Repartition => empty(p)
       case _: RepartitionByExpression => empty(p)
+      case _: RebalancePartitions => empty(p)
       // An aggregate with non-empty group expression will return one output row per group when the
       // input to the aggregate is not empty. If the input to the aggregate is empty then all groups
       // will be empty and thus the output will be empty. If we're working on batch data, we can
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index b002930391222..0753e066ae02e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -1023,6 +1023,7 @@ object FoldablePropagation extends Rule[LogicalPlan] {
     case _: AppendColumnsWithObject => true
     case _: RepartitionByExpression => true
     case _: Repartition => true
+    case _: RebalancePartitions => true
     case _: Sort => true
     case _: TypedFilter => true
     case _ => false
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index e8a632d01598f..68b0f24f50145 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -1462,6 +1462,7 @@ case class RebalancePartitions(
     child: LogicalPlan) extends UnaryNode {
   override def maxRows: Option[Long] = child.maxRows
   override def output: Seq[Attribute] = child.output
+  override val nodePatterns: Seq[TreePattern] = Seq(REBALANCE_PARTITIONS)
 
   def partitioning: Partitioning = if (partitionExpressions.isEmpty) {
     RoundRobinPartitioning(conf.numShufflePartitions)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
index e02bc475cfee0..8db2f55e0ce63 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
@@ -111,6 +111,7 @@ object TreePattern extends Enumeration  {
   val PROJECT: Value = Value
   val RELATION_TIME_TRAVEL: Value = Value
   val REPARTITION_OPERATION: Value = Value
+  val REBALANCE_PARTITIONS: Value = Value
   val UNION: Value = Value
   val UNRESOLVED_RELATION: Value = Value
   val UNRESOLVED_WITH: Value = Value
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseRepartitionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseRepartitionSuite.scala
index 177545faa212f..dd5d6d48bcd3e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseRepartitionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseRepartitionSuite.scala
@@ -207,4 +207,18 @@ class CollapseRepartitionSuite extends PlanTest {
       .distribute('a)(20)
     comparePlans(Optimize.execute(originalQuery2.analyze), originalQuery2.analyze)
   }
+
+  test("SPARK-37904: Improve rebalance in CollapseRepartition") {
+    Seq(testRelation.sortBy($"a".asc),
+      testRelation.orderBy($"a".asc),
+      testRelation.coalesce(1),
+      testRelation.repartition(1),
+      testRelation.distribute($"a")(1),
+      testRelation.rebalance($"a")).foreach { prefix =>
+      val plan = prefix.rebalance($"a").analyze
+      val optimized = Optimize.execute(plan)
+      val expected = testRelation.rebalance($"a").analyze
+      comparePlans(optimized, expected)
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsBeforeRepartitionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsBeforeRepartitionSuite.scala
index bbb860086557a..5927cc2dfff6d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsBeforeRepartitionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsBeforeRepartitionSuite.scala
@@ -196,3 +196,19 @@ class EliminateSortsBeforeRepartitionByExprsSuite extends EliminateSortsBeforeRe
 class EliminateSortsBeforeCoalesceSuite extends EliminateSortsBeforeRepartitionSuite {
   override def repartition(plan: LogicalPlan): LogicalPlan = plan.coalesce(1)
 }
+
+class EliminateSortsBeforeRebalanceSuite extends EliminateSortsBeforeRepartitionSuite {
+  override def repartition(plan: LogicalPlan): LogicalPlan = plan.rebalance($"a")
+
+  test("sortBy before rebalance with non-deterministic expressions") {
+    val plan = testRelation.sortBy($"a".asc, $"b".asc).limit(10)
+    val planWithRepartition = plan.rebalance(rand(1).asc, $"a".asc)
+    checkRepartitionCases(plan = planWithRepartition, optimizedPlan = planWithRepartition)
+  }
+
+  test("orderBy before rebalance with non-deterministic expressions") {
+    val plan = testRelation.orderBy($"a".asc, $"b".asc).limit(10)
+    val planWithRebalance = plan.rebalance(rand(1).asc, $"a".asc)
+    checkRepartitionCases(plan = planWithRebalance, optimizedPlan = planWithRebalance)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
index 92e4fa345e2ad..732c50e225550 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
@@ -204,4 +204,12 @@ class FoldablePropagationSuite extends PlanTest {
       .select('a, 'b, Literal(1).as('c)).analyze
     comparePlans(optimized, correctAnswer)
   }
+
+  test("SPARK-37904: Improve rebalance in FoldablePropagation") {
+    val foldableAttr = Literal(1).as("x")
+    val plan = testRelation.select(foldableAttr, $"a").rebalance($"x", $"a").analyze
+    val optimized = Optimize.execute(plan)
+    val expected = testRelation.select(foldableAttr, $"a").rebalance(foldableAttr, $"a").analyze
+    comparePlans(optimized, expected)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala
index 40ab72c89f3bf..ff3414d901208 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala
@@ -790,6 +790,28 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
 
     comparePlans(optimized, expected)
   }
+
+  test("SPARK-37904: Improve rebalance in NestedColumnAliasing") {
+    // alias nested columns through rebalance
+    val plan1 = contact.rebalance($"id").select($"name.first").analyze
+    val optimized1 = Optimize.execute(plan1)
+    val expected1 = contact.select($"id", $"name.first".as("_extract_first"))
+      .rebalance($"id").select($"_extract_first".as("first")).analyze
+    comparePlans(optimized1, expected1)
+
+    // also alias rebalance nested columns
+    val plan2 = contact.rebalance($"name.first").select($"name.first").analyze
+    val optimized2 = Optimize.execute(plan2)
+    val expected2 = contact.select($"name.first".as("_extract_first"))
+      .rebalance($"_extract_first".as("first")).select($"_extract_first".as("first")).analyze
+    comparePlans(optimized2, expected2)
+
+    // do not alias nested columns if its child contains root reference
+    val plan3 = contact.rebalance($"name").select($"name.first").analyze
+    val optimized3 = Optimize.execute(plan3)
+    val expected3 = contact.select($"name").rebalance($"name").select($"name.first").analyze
+    comparePlans(optimized3, expected3)
+  }
 }
 
 object NestedColumnAliasingSuite {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
index 1aa4f4cbceae8..8277e44458bb1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
@@ -294,4 +294,19 @@ class PropagateEmptyRelationSuite extends PlanTest {
     val expected = LocalRelation.fromExternalRows(Seq('a.int, 'b.int, 'c.int), Nil)
     comparePlans(optimized, expected)
   }
+
+  test("SPARK-37904: Improve rebalance in PropagateEmptyRelation") {
+    val emptyRelation = LocalRelation($"a".int)
+    val expected = emptyRelation.analyze
+
+    // test root node
+    val plan1 = emptyRelation.rebalance($"a").analyze
+    val optimized1 = Optimize.execute(plan1)
+    comparePlans(optimized1, expected)
+
+    // test non-root node
+    val plan2 = emptyRelation.rebalance($"a").where($"a" > 0).select($"a").analyze
+    val optimized2 = Optimize.execute(plan2)
+    comparePlans(optimized2, expected)
+  }
 }

From 732477b28f36c86e8a6f99fbf6b831407ba5dbe9 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Tue, 18 Jan 2022 08:47:15 +0900
Subject: [PATCH 037/513] [SPARK-37498][PYTHON] Add eventually for
 test_reuse_worker_of_parallelize_range

### What changes were proposed in this pull request?
Add eventually for test_reuse_worker_of_parallelize_range

### Why are the changes needed?
Avoid test_reuse_worker_of_parallelize_range becoming flaky when resources are tight or some other reason

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
UT passed.

Closes #35228 from Yikun/SPARK-37498.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/tests/test_worker.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/python/pyspark/tests/test_worker.py b/python/pyspark/tests/test_worker.py
index 64e7b7d6a1bcf..0fdf6adb031bf 100644
--- a/python/pyspark/tests/test_worker.py
+++ b/python/pyspark/tests/test_worker.py
@@ -31,7 +31,7 @@
 from py4j.protocol import Py4JJavaError
 
 from pyspark import SparkConf, SparkContext
-from pyspark.testing.utils import ReusedPySparkTestCase, PySparkTestCase, QuietTest
+from pyspark.testing.utils import ReusedPySparkTestCase, PySparkTestCase, QuietTest, eventually
 
 
 class WorkerTests(ReusedPySparkTestCase):
@@ -188,11 +188,15 @@ def f():
 
 class WorkerReuseTest(PySparkTestCase):
     def test_reuse_worker_of_parallelize_range(self):
-        rdd = self.sc.parallelize(range(20), 8)
-        previous_pids = rdd.map(lambda x: os.getpid()).collect()
-        current_pids = rdd.map(lambda x: os.getpid()).collect()
-        for pid in current_pids:
-            self.assertTrue(pid in previous_pids)
+        def check_reuse_worker_of_parallelize_range():
+            rdd = self.sc.parallelize(range(20), 8)
+            previous_pids = rdd.map(lambda x: os.getpid()).collect()
+            current_pids = rdd.map(lambda x: os.getpid()).collect()
+            for pid in current_pids:
+                self.assertTrue(pid in previous_pids)
+            return True
+
+        eventually(check_reuse_worker_of_parallelize_range, catch_assertions=True)
 
 
 @unittest.skipIf(

From df7447bc62052e3d7391ba23d7220fb8c9b923fd Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Mon, 17 Jan 2022 18:23:17 -0600
Subject: [PATCH 038/513] [SPARK-37712][YARN] Spark request yarn cluster
 metrics slow cause delay

### What changes were proposed in this pull request?
Spark will request yarn cluster metrics and print a log about nodemanager number, it's not so important and this rpc is always slow
![image](https://user-images.githubusercontent.com/46485123/147055954-30698764-b313-419f-8759-772ad9f301ff.png)

We can make it as debug level

### Why are the changes needed?
Avoid unnecessary delay when submit application.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Not need

Closes #34982 from AngersZhuuuu/SPARK-37712.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../main/scala/org/apache/spark/deploy/yarn/Client.scala    | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index ca4fbbb97ad28..ae85ea8d6110a 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -183,8 +183,10 @@ private[spark] class Client(
       yarnClient.init(hadoopConf)
       yarnClient.start()
 
-      logInfo("Requesting a new application from cluster with %d NodeManagers"
-        .format(yarnClient.getYarnClusterMetrics.getNumNodeManagers))
+      if (log.isDebugEnabled) {
+        logDebug("Requesting a new application from cluster with %d NodeManagers"
+          .format(yarnClient.getYarnClusterMetrics.getNumNodeManagers))
+      }
 
       // Get a new application from our RM
       val newApp = yarnClient.createApplication()

From 2c825d190df7d9cb7d7e19d9eb2e57ee70cc6446 Mon Sep 17 00:00:00 2001
From: PengLei <peng.8lei@gmail.com>
Date: Tue, 18 Jan 2022 13:22:27 +0800
Subject: [PATCH 039/513] [SPARK-37878][SQL] Migrate SHOW CREATE TABLE to use
 v2 command by default

### What changes were proposed in this pull request?

1. Add `quoted(identifier: TableIdentifier)` to quoted the table name of V1 command(SHOW CREATE TABLE[AS SERDE]) to match V2 behavior. It just work when `quoteIfNeeded`
2. Change `addV2TableProperties` of `V1Table`. Just when `external == true`, we will add `location` property.
3. Change `V1Table`.`Schema`, re-construct the original schema from the string.
4. Use V2 command as default for `SHOW CRATE TABLE`
5. Change V2 behavior `ShowTablePartitions` to match V1 behavior.

### Why are the changes needed?

It's been a while since we introduced the v2 commands, and it seems reasonable to use v2 commands by default even for the session catalog, with a legacy config to fall back to the v1 commands.

### Does this PR introduce _any_ user-facing change?

use V2 command as default for `show create table`
if LEGACY_USE_V1_COMMAND == true
will use V1 command

### How was this patch tested?
build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *ShowCreateTableSuite"

Closes #35204 from Peng-Lei/SPARK-37878.

Authored-by: PengLei <peng.8lei@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalog/CatalogV2Implicits.scala          | 12 +++++
 .../spark/sql/connector/catalog/V1Table.scala | 18 ++------
 .../analysis/ResolveSessionCatalog.scala      | 17 ++++---
 .../spark/sql/execution/command/tables.scala  |  7 +--
 .../datasources/v2/ShowCreateTableExec.scala  | 44 +++++++++++++++----
 .../sql-tests/results/charvarchar.sql.out     |  8 ++--
 .../results/show-create-table.sql.out         | 30 ++++++-------
 .../sql/connector/DataSourceV2SQLSuite.scala  |  1 +
 .../sql/execution/SQLViewTestSuite.scala      |  8 ++--
 .../command/v1/ShowCreateTableSuite.scala     | 33 +++++++++++++-
 .../command/v2/ShowCreateTableSuite.scala     |  4 +-
 11 files changed, 126 insertions(+), 56 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
index 407f25ba20e5d..f4890cc3058d8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
@@ -164,6 +164,18 @@ private[sql] object CatalogV2Implicits {
     def quoted: String = parts.map(quoteIfNeeded).mkString(".")
   }
 
+  implicit class TableIdentifierHelper(identifier: TableIdentifier) {
+    def quoted: String = {
+      identifier.database match {
+        case Some(db) =>
+          Seq(db, identifier.table).map(quoteIfNeeded).mkString(".")
+        case _ =>
+          quoteIfNeeded(identifier.table)
+
+      }
+    }
+  }
+
   def parseColumnPath(name: String): Seq[String] = {
     CatalystSqlParser.parseMultipartIdentifier(name)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
index 07f66a614b2ad..bf92107f6ae2d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
@@ -22,9 +22,8 @@ import java.util
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
-import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
-import org.apache.spark.sql.catalyst.util.quoteIfNeeded
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.TableIdentifierHelper
 import org.apache.spark.sql.connector.catalog.V1Table.addV2TableProperties
 import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform}
 import org.apache.spark.sql.types.StructType
@@ -33,17 +32,6 @@ import org.apache.spark.sql.types.StructType
  * An implementation of catalog v2 `Table` to expose v1 table metadata.
  */
 private[sql] case class V1Table(v1Table: CatalogTable) extends Table {
-  implicit class IdentifierHelper(identifier: TableIdentifier) {
-    def quoted: String = {
-      identifier.database match {
-        case Some(db) =>
-          Seq(db, identifier.table).map(quoteIfNeeded).mkString(".")
-        case _ =>
-          quoteIfNeeded(identifier.table)
-
-      }
-    }
-  }
 
   def catalogTable: CatalogTable = v1Table
 
@@ -92,7 +80,9 @@ private[sql] object V1Table {
         TableCatalog.OPTION_PREFIX + key -> value } ++
       v1Table.provider.map(TableCatalog.PROP_PROVIDER -> _) ++
       v1Table.comment.map(TableCatalog.PROP_COMMENT -> _) ++
-      v1Table.storage.locationUri.map(TableCatalog.PROP_LOCATION -> _.toString) ++
+      (if (external) {
+        v1Table.storage.locationUri.map(TableCatalog.PROP_LOCATION -> _.toString)
+      } else None) ++
       (if (external) Some(TableCatalog.PROP_EXTERNAL -> "true") else None) ++
       Some(TableCatalog.PROP_OWNER -> v1Table.owner)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index aaf2ead592c98..3dde9985abbee 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -266,12 +266,19 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         isOverwrite,
         partition)
 
-    case ShowCreateTable(ResolvedV1TableOrViewIdentifier(ident), asSerde, output) =>
-      if (asSerde) {
-        ShowCreateTableAsSerdeCommand(ident.asTableIdentifier, output)
-      } else {
+    case ShowCreateTable(ResolvedV1TableOrViewIdentifier(ident), asSerde, output) if asSerde =>
+      ShowCreateTableAsSerdeCommand(ident.asTableIdentifier, output)
+
+    // If target is view, force use v1 command
+    case ShowCreateTable(ResolvedViewIdentifier(ident), _, output) =>
+      ShowCreateTableCommand(ident.asTableIdentifier, output)
+
+    case ShowCreateTable(ResolvedV1TableIdentifier(ident), _, output)
+      if conf.useV1Command => ShowCreateTableCommand(ident.asTableIdentifier, output)
+
+    case ShowCreateTable(ResolvedTable(catalog, ident, table: V1Table, _), _, output)
+      if isSessionCatalog(catalog) && DDLUtils.isHiveTable(table.catalogTable) =>
         ShowCreateTableCommand(ident.asTableIdentifier, output)
-      }
 
     case TruncateTable(ResolvedV1TableIdentifier(ident)) =>
       TruncateTableCommand(ident.asTableIdentifier, None)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 4234e373bd013..7ae0e017b28c0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier, CaseInsensitiveMap, CharVarcharUtils}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.TableIdentifierHelper
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
@@ -1104,12 +1105,12 @@ case class ShowCreateTableCommand(
       val builder = StringBuilder.newBuilder
 
       val stmt = if (tableMetadata.tableType == VIEW) {
-        builder ++= s"CREATE VIEW ${table.quotedString} "
+        builder ++= s"CREATE VIEW ${table.quoted} "
         showCreateView(metadata, builder)
 
         builder.toString()
       } else {
-        builder ++= s"CREATE TABLE ${table.quotedString} "
+        builder ++= s"CREATE TABLE ${table.quoted} "
 
         showCreateDataSourceTable(metadata, builder)
         builder.toString()
@@ -1247,7 +1248,7 @@ case class ShowCreateTableAsSerdeCommand(
           s"Unknown table type is found at showCreateHiveTable: $t")
     }
 
-    builder ++= s"CREATE$tableTypeString ${table.quotedString} "
+    builder ++= s"CREATE$tableTypeString ${table.quoted} "
 
     if (metadata.tableType == VIEW) {
       showCreateView(metadata, builder)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
index f21b9a5095a3b..8b3ad95216486 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
@@ -21,9 +21,11 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.util.escapeSingleQuotedString
+import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, CharVarcharUtils}
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table, TableCatalog}
+import org.apache.spark.sql.connector.expressions.BucketTransform
 import org.apache.spark.sql.execution.LeafExecNode
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -57,7 +59,7 @@ case class ShowCreateTableExec(
   }
 
   private def showTableDataColumns(table: Table, builder: StringBuilder): Unit = {
-    val columns = table.schema().fields.map(_.toDDL)
+    val columns = CharVarcharUtils.getRawSchema(table.schema(), conf).fields.map(_.toDDL)
     builder ++= concatByMultiLines(columns)
   }
 
@@ -71,8 +73,9 @@ case class ShowCreateTableExec(
       builder: StringBuilder,
       tableOptions: Map[String, String]): Unit = {
     if (tableOptions.nonEmpty) {
-      val props = tableOptions.toSeq.sortBy(_._1).map { case (key, value) =>
-        s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
+      val props = conf.redactOptions(tableOptions).toSeq.sortBy(_._1).map {
+        case (key, value) =>
+          s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
       }
       builder ++= "OPTIONS "
       builder ++= concatByMultiLines(props)
@@ -82,8 +85,31 @@ case class ShowCreateTableExec(
   private def showTablePartitioning(table: Table, builder: StringBuilder): Unit = {
     if (!table.partitioning.isEmpty) {
       val transforms = new ArrayBuffer[String]
-      table.partitioning.foreach(t => transforms += t.describe())
-      builder ++= s"PARTITIONED BY ${transforms.mkString("(", ", ", ")")}\n"
+      var bucketSpec = Option.empty[BucketSpec]
+      table.partitioning.map {
+        case BucketTransform(numBuckets, col, sortCol) =>
+          if (sortCol.isEmpty) {
+            bucketSpec = Some(BucketSpec(numBuckets, col.map(_.fieldNames.mkString(".")), Nil))
+          } else {
+            bucketSpec = Some(BucketSpec(numBuckets, col.map(_.fieldNames.mkString(".")),
+              sortCol.map(_.fieldNames.mkString("."))))
+          }
+        case t =>
+          transforms += t.describe()
+      }
+      if (transforms.nonEmpty) {
+        builder ++= s"PARTITIONED BY ${transforms.mkString("(", ", ", ")")}\n"
+      }
+
+      // compatible with v1
+      bucketSpec.map { bucket =>
+        assert(bucket.bucketColumnNames.nonEmpty)
+        builder ++= s"CLUSTERED BY ${bucket.bucketColumnNames.mkString("(", ", ", ")")}\n"
+        if (bucket.sortColumnNames.nonEmpty) {
+          builder ++= s"SORTED BY ${bucket.sortColumnNames.mkString("(", ", ", ")")}\n"
+        }
+        builder ++= s"INTO ${bucket.numBuckets} BUCKETS\n"
+      }
     }
   }
 
@@ -98,11 +124,12 @@ case class ShowCreateTableExec(
       builder: StringBuilder,
       tableOptions: Map[String, String]): Unit = {
 
-
     val showProps = table.properties.asScala
       .filterKeys(key => !CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(key)
         && !key.startsWith(TableCatalog.OPTION_PREFIX)
-        && !tableOptions.contains(key))
+        && !tableOptions.contains(key)
+        && !key.equals(TableCatalog.PROP_EXTERNAL)
+      )
     if (showProps.nonEmpty) {
       val props = showProps.toSeq.sortBy(_._1).map {
         case (key, value) =>
@@ -123,5 +150,4 @@ case class ShowCreateTableExec(
   private def concatByMultiLines(iter: Iterable[String]): String = {
     iter.mkString("(\n  ", ",\n  ", ")\n")
   }
-
 }
diff --git a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
index fcd207cd15001..5c6b1a727705d 100644
--- a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
@@ -51,7 +51,7 @@ show create table char_tbl
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE TABLE `default`.`char_tbl` (
+CREATE TABLE default.char_tbl (
   `c` CHAR(5),
   `v` VARCHAR(6))
 USING parquet
@@ -70,7 +70,7 @@ show create table char_tbl2
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE TABLE `default`.`char_tbl2` (
+CREATE TABLE default.char_tbl2 (
   `c` CHAR(5),
   `v` VARCHAR(6))
 USING parquet
@@ -161,7 +161,7 @@ show create table char_tbl3
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE TABLE `default`.`char_tbl3` (
+CREATE TABLE default.char_tbl3 (
   `c` CHAR(5),
   `v` VARCHAR(6))
 USING parquet
@@ -218,7 +218,7 @@ show create table char_view
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE VIEW `default`.`char_view` (
+CREATE VIEW default.char_view (
   `c`,
   `v`)
 AS select * from char_tbl
diff --git a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
index 49c27a2229c5b..ffcbb73458aa2 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
@@ -15,7 +15,7 @@ SHOW CREATE TABLE tbl
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE TABLE `default`.`tbl` (
+CREATE TABLE default.tbl (
   `a` INT,
   `b` STRING,
   `c` INT)
@@ -44,7 +44,7 @@ SHOW CREATE TABLE tbl
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE TABLE `default`.`tbl` (
+CREATE TABLE default.tbl (
   `a` INT,
   `b` STRING,
   `c` INT)
@@ -75,7 +75,7 @@ SHOW CREATE TABLE tbl
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE TABLE `default`.`tbl` (
+CREATE TABLE default.tbl (
   `a` INT,
   `b` STRING,
   `c` INT)
@@ -105,7 +105,7 @@ SHOW CREATE TABLE tbl
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE TABLE `default`.`tbl` (
+CREATE TABLE default.tbl (
   `a` INT,
   `b` STRING,
   `c` INT)
@@ -135,7 +135,7 @@ SHOW CREATE TABLE tbl
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE TABLE `default`.`tbl` (
+CREATE TABLE default.tbl (
   `b` STRING,
   `c` INT,
   `a` INT)
@@ -165,7 +165,7 @@ SHOW CREATE TABLE tbl
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE TABLE `default`.`tbl` (
+CREATE TABLE default.tbl (
   `a` INT,
   `b` STRING,
   `c` INT)
@@ -197,7 +197,7 @@ SHOW CREATE TABLE tbl
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE TABLE `default`.`tbl` (
+CREATE TABLE default.tbl (
   `a` INT,
   `b` STRING,
   `c` INT)
@@ -227,7 +227,7 @@ SHOW CREATE TABLE tbl
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE TABLE `default`.`tbl` (
+CREATE TABLE default.tbl (
   `a` INT,
   `b` STRING,
   `c` INT)
@@ -257,7 +257,7 @@ SHOW CREATE TABLE tbl
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE TABLE `default`.`tbl` (
+CREATE TABLE default.tbl (
   `a` FLOAT,
   `b` DECIMAL(10,0),
   `c` DECIMAL(10,0),
@@ -295,7 +295,7 @@ SHOW CREATE TABLE view_SPARK_30302 AS SERDE
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE VIEW `default`.`view_SPARK_30302` (
+CREATE VIEW default.view_SPARK_30302 (
   `aaa`,
   `bbb`)
 AS SELECT a, b FROM tbl
@@ -306,7 +306,7 @@ SHOW CREATE TABLE view_SPARK_30302
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE VIEW `default`.`view_SPARK_30302` (
+CREATE VIEW default.view_SPARK_30302 (
   `aaa`,
   `bbb`)
 AS SELECT a, b FROM tbl
@@ -335,7 +335,7 @@ SHOW CREATE TABLE view_SPARK_30302 AS SERDE
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE VIEW `default`.`view_SPARK_30302` (
+CREATE VIEW default.view_SPARK_30302 (
   `aaa` COMMENT 'comment with \'quoted text\' for aaa',
   `bbb`)
 COMMENT 'This is a comment with \'quoted text\' for view'
@@ -347,7 +347,7 @@ SHOW CREATE TABLE view_SPARK_30302
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE VIEW `default`.`view_SPARK_30302` (
+CREATE VIEW default.view_SPARK_30302 (
   `aaa` COMMENT 'comment with \'quoted text\' for aaa',
   `bbb`)
 COMMENT 'This is a comment with \'quoted text\' for view'
@@ -377,7 +377,7 @@ SHOW CREATE TABLE view_SPARK_30302 AS SERDE
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE VIEW `default`.`view_SPARK_30302` (
+CREATE VIEW default.view_SPARK_30302 (
   `aaa`,
   `bbb`)
 TBLPROPERTIES (
@@ -391,7 +391,7 @@ SHOW CREATE TABLE view_SPARK_30302
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE VIEW `default`.`view_SPARK_30302` (
+CREATE VIEW default.view_SPARK_30302 (
   `aaa`,
   `bbb`)
 TBLPROPERTIES (
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 3e0627c505341..d9e3342240bcf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2775,6 +2775,7 @@ class DataSourceV2SQLSuite
       assert(properties.get(TableCatalog.PROP_COMMENT) == "This is a comment")
       assert(properties.get(TableCatalog.PROP_LOCATION) == "file:/tmp")
       assert(properties.containsKey(TableCatalog.PROP_OWNER))
+      assert(properties.get(TableCatalog.PROP_EXTERNAL) == "true")
       assert(properties.get(s"${TableCatalog.OPTION_PREFIX}from") == "0")
       assert(properties.get(s"${TableCatalog.OPTION_PREFIX}to") == "1")
       assert(properties.get("prop1") == "1")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
index 4ffe38abf3678..da6826c7808aa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -611,7 +611,7 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
     Seq(true, false).foreach { serde =>
       withView(viewName) {
         createView(viewName, "SELECT 1 AS a")
-        val expected = "CREATE VIEW `default`.`v1` ( `a`) AS SELECT 1 AS a"
+        val expected = s"CREATE VIEW ${formattedViewName(viewName)} ( `a`) AS SELECT 1 AS a"
         assert(getShowCreateDDL(formattedViewName(viewName), serde) == expected)
       }
     }
@@ -622,8 +622,8 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
     Seq(true, false).foreach { serde =>
       withView(viewName) {
         createView(viewName, "SELECT 1 AS a, 2 AS b", Seq("a", "b COMMENT 'b column'"))
-        val expected = "CREATE VIEW `default`.`v1` ( `a`, `b` COMMENT 'b column')" +
-          " AS SELECT 1 AS a, 2 AS b"
+        val expected = s"CREATE VIEW ${formattedViewName(viewName)}" +
+          s" ( `a`, `b` COMMENT 'b column') AS SELECT 1 AS a, 2 AS b"
         assert(getShowCreateDDL(formattedViewName(viewName), serde) == expected)
       }
     }
@@ -636,7 +636,7 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
         createView(viewName, "SELECT 1 AS c1, '2' AS c2", Seq("c1 COMMENT 'bla'", "c2"),
           Seq("COMMENT 'table comment'", "TBLPROPERTIES ( 'prop2' = 'value2', 'prop1' = 'value1')"))
 
-        val expected = "CREATE VIEW `default`.`v1` ( `c1` COMMENT 'bla', `c2`)" +
+        val expected = s"CREATE VIEW ${formattedViewName(viewName)} ( `c1` COMMENT 'bla', `c2`)" +
           " COMMENT 'table comment'" +
           " TBLPROPERTIES ( 'prop1' = 'value1', 'prop2' = 'value2')" +
           " AS SELECT 1 AS c1, '2' AS c2"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
index 208ed4c08afc8..023dfce3ba9c9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.execution.command
  */
 trait ShowCreateTableSuiteBase extends command.ShowCreateTableSuiteBase
     with command.TestsV1AndV2Commands {
-  override def fullName: String = s"`$ns`.`$table`"
+  override def fullName: String = s"$ns.$table"
 
   test("show create table[simple]") {
     // todo After SPARK-37517 unify the testcase both v1 and v2
@@ -81,6 +81,21 @@ trait ShowCreateTableSuiteBase extends command.ShowCreateTableSuiteBase
   }
 
   test("bucketed data source table") {
+    withNamespaceAndTable(ns, table) { t =>
+      sql(
+        s"""CREATE TABLE $t
+           |USING json
+           |CLUSTERED BY (a) INTO 2 BUCKETS
+           |AS SELECT 1 AS a, "foo" AS b
+         """.stripMargin
+      )
+      val expected = s"CREATE TABLE $fullName ( `a` INT, `b` STRING) USING json" +
+        s" CLUSTERED BY (a) INTO 2 BUCKETS"
+      assert(getShowCreateDDL(t).mkString(" ") == expected)
+    }
+  }
+
+  test("sort bucketed data source table") {
     withNamespaceAndTable(ns, table) { t =>
       sql(
         s"""CREATE TABLE $t
@@ -96,6 +111,22 @@ trait ShowCreateTableSuiteBase extends command.ShowCreateTableSuiteBase
   }
 
   test("partitioned bucketed data source table") {
+    withNamespaceAndTable(ns, table) { t =>
+      sql(
+        s"""CREATE TABLE $t
+           |USING json
+           |PARTITIONED BY (c)
+           |CLUSTERED BY (a) INTO 2 BUCKETS
+           |AS SELECT 1 AS a, "foo" AS b, 2.5 AS c
+         """.stripMargin
+      )
+      val expected = s"CREATE TABLE $fullName ( `a` INT, `b` STRING, `c` DECIMAL(2,1)) USING json" +
+        s" PARTITIONED BY (c) CLUSTERED BY (a) INTO 2 BUCKETS"
+      assert(getShowCreateDDL(t).mkString(" ") == expected)
+    }
+  }
+
+  test("partitioned sort bucketed data source table") {
     withNamespaceAndTable(ns, table) { t =>
       sql(
         s"""CREATE TABLE $t
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
index 35b196fe0d8bb..47e59e965509a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
@@ -132,7 +132,9 @@ class ShowCreateTableSuite extends command.ShowCreateTableSuiteBase with Command
         "`b` STRING,",
         "`ts` TIMESTAMP)",
         defaultUsing,
-        "PARTITIONED BY (a, bucket(16, b), years(ts), months(ts), days(ts), hours(ts))"
+        "PARTITIONED BY (a, years(ts), months(ts), days(ts), hours(ts))",
+        "CLUSTERED BY (b)",
+        "INTO 16 BUCKETS"
       ))
     }
   }

From 450418bdfdc46c2569ff22c307de15c4fca76ebc Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Tue, 18 Jan 2022 13:42:33 +0800
Subject: [PATCH 040/513] [SPARK-37906][SQL] spark-sql should not pass last
 comment to backend

### What changes were proposed in this pull request?
In https://github.com/apache/spark/pull/34815 we change back support unclosed bracketed comment to backend.

But miss the case such as
```
SELECT 1; --comment
```

```
SELECT 1; /* comment */
```

It's a common use case in sql job. We should ignore the comment at end of SQL script.

Need to clarify that when use `-e`, we directly pass SQL to `splitSemiColon`, when use `-f`, CliDriver will add a `\n` for query.
```
  public int processReader(BufferedReader r) throws IOException {
    StringBuilder qsb = new StringBuilder();

    String line;
    while((line = r.readLine()) != null) {
      if (!line.startsWith("--")) {
        qsb.append(line + "\n");
      }
    }

    return this.processLine(qsb.toString());
  }
```

So in `splitSemiColon`, we should consider both case.

In this pr, the final behavior like below

For `-e`

| Query | Behavior before  | Behavior now|
-------|---------|--------------|
|   `SELECT 1; --comment` |  Will pass both `SELECT 1` and `--comment` to backend engine and throw exception since `--comment` can't be executed| Only pass `SELECT 1` to backend engine and will ignore the simple comment |
|   `SELECT 1;  /* comment */ ` |  Will pass both `SELECT 1` and `/* comment */` to backend engine and throw exception since `/* comment */` can't be executed |  Only pass `SELECT 1` to backend engine|
|   `SELECT 1;  /* comment ` | Will pass `SELECT 1` and `/* comment` to backend engine  |  Will pass `SELECT 1` and `/* comment` to backend engine  |
|   `SELECT 1; /* comment SELECT 1` | Will pass `SELECT 1` and `/* comment SELECT 1` to backend engine  |  Will pass `SELECT 1` and `/* comment SELECT 1` to backend engine  |
|   `/ * comment SELECT 1;`  |  Will pass `/ * comment SELECT 1;` to back end engine and throw `unclose bracketed comment exception`|  Will pass `/ * comment SELECT 1;` to back end engine and throw `unclose bracketed comment exception`|

For `-f`, since `-f` will add a `\n` at the end line if it's not started  as `--`
| Query | Behavior before  | Behavior now|
-------|---------|--------------|
|   `SELECT 1; --comment\n` |  Will pass both `SELECT 1` and `--comment` to backend engine and throw exception since `--comment` can't be executed| Only pass `SELECT 1` to backend engine and will ignore the simple comment |
|   `SELECT 1;  /* comment */ \n` |  Will pass both `SELECT 1` and `/* comment */` to backend engine and throw exception since `/* comment */` can't be executed |  Only pass `SELECT 1` to backend engine|
|   `SELECT 1;  /* comment \n` | Will pass `SELECT 1` and `/* comment\n` to backend engine  |  Will pass `SELECT 1` and `/* comment\n` to backend engine  |
|   `SELECT 1; /* comment SELECT 1\n` | Will pass `SELECT 1` and `/* comment SELECT 1\n` to backend engine  |  Will pass `SELECT 1` and `/* comment SELECT 1\n` to backend engine  |
|   `/ * comment SELECT 1;\n`  |  Will pass `/ * comment SELECT 1;\n` to back end engine and throw `unclose bracketed comment exception`|  Will pass `/ * comment SELECT 1;\n` to back end engine and throw `unclose bracketed comment exception`|

### Why are the changes needed?
Spark sql should not pass last entire comment   to backend

### Does this PR introduce _any_ user-facing change?
Use can write SQL script end with a comment
```
SELECT 1; --comment
```

```
SELECT 1; /* comment */
```

### How was this patch tested?
Added UT

Closes #35206 from AngersZhuuuu/SPARK-37906.

Lead-authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Co-authored-by: AngersZhuuuu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../hive/thriftserver/SparkSQLCLIDriver.scala | 14 +++++-
 .../sql/hive/thriftserver/CliSuite.scala      | 44 ++++++++++++++++++-
 2 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index e17b74873395e..4c26e93606083 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -527,7 +527,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
   // string, the origin implementation from Hive will not drop the trailing semicolon as expected,
   // hence we refined this function a little bit.
   // Note: [SPARK-33100] Ignore a semicolon inside a bracketed comment in spark-sql.
-  private def splitSemiColon(line: String): JList[String] = {
+  private[hive] def splitSemiColon(line: String): JList[String] = {
     var insideSingleQuote = false
     var insideDoubleQuote = false
     var insideSimpleComment = false
@@ -613,7 +613,17 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
 
       isStatement = statementInProgress(index)
     }
-    if (beginIndex < line.length()) {
+    // Check the last char is end of nested bracketed comment.
+    val endOfBracketedComment = leavingBracketedComment && bracketedCommentLevel == 1
+    // Spark SQL support simple comment and nested bracketed comment in query body.
+    // But if Spark SQL receives a comment alone, it will throw parser exception.
+    // In Spark SQL CLI, if there is a completed comment in the end of whole query,
+    // since Spark SQL CLL use `;` to split the query, CLI will pass the comment
+    // to the backend engine and throw exception. CLI should ignore this comment,
+    // If there is an uncompleted statement or an uncompleted bracketed comment in the end,
+    // CLI should also pass this part to the backend engine, which may throw an exception
+    // with clear error message.
+    if (!endOfBracketedComment && (isStatement || insideBracketedComment)) {
       ret.add(line.substring(beginIndex))
     }
     ret
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 234fb89b01a83..4af051746b96e 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -22,17 +22,23 @@ import java.nio.charset.StandardCharsets
 import java.sql.Timestamp
 import java.util.Date
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.Promise
 import scala.concurrent.duration._
 
+import org.apache.hadoop.hive.cli.CliSessionState
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
+import org.apache.hadoop.hive.ql.session.SessionState
 import org.scalatest.BeforeAndAfterAll
 
+import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.ProcessTestUtils.ProcessOutputCapturer
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.HiveUtils._
+import org.apache.spark.sql.hive.client.HiveClientImpl
 import org.apache.spark.sql.hive.test.HiveTestJars
 import org.apache.spark.sql.internal.StaticSQLConf
 import org.apache.spark.util.{ThreadUtils, Utils}
@@ -638,4 +644,40 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
     runCliWithin(2.minute, errorResponses = Seq("ParseException"))(
       "delete jar dummy.jar;" -> "missing 'FROM' at 'jar'(line 1, pos 7)")
   }
+
+  test("SPARK-37906: Spark SQL CLI should not pass final comment") {
+    val sparkConf = new SparkConf(loadDefaults = true)
+      .setMaster("local-cluster[1,1,1024]")
+      .setAppName("SPARK-37906")
+    val sparkContext = new SparkContext(sparkConf)
+    SparkSQLEnv.sparkContext = sparkContext
+    val hadoopConf = SparkHadoopUtil.get.newConfiguration(sparkConf)
+    val extraConfigs = HiveUtils.formatTimeVarsForHiveClient(hadoopConf)
+    val cliConf = HiveClientImpl.newHiveConf(sparkConf, hadoopConf, extraConfigs)
+    val sessionState = new CliSessionState(cliConf)
+    SessionState.setCurrentSessionState(sessionState)
+    val cli = new SparkSQLCLIDriver
+    Seq("SELECT 1; --comment" -> Seq("SELECT 1"),
+      "SELECT 1; /* comment */" -> Seq("SELECT 1"),
+      "SELECT 1; /* comment" -> Seq("SELECT 1", " /* comment"),
+      "SELECT 1; /* comment select 1;" -> Seq("SELECT 1", " /* comment select 1;"),
+      "/* This is a comment without end symbol SELECT 1;" ->
+        Seq("/* This is a comment without end symbol SELECT 1;"),
+      "SELECT 1; --comment\n" -> Seq("SELECT 1"),
+      "SELECT 1; /* comment */\n" -> Seq("SELECT 1"),
+      "SELECT 1; /* comment\n" -> Seq("SELECT 1", " /* comment\n"),
+      "SELECT 1; /* comment select 1;\n" -> Seq("SELECT 1", " /* comment select 1;\n"),
+      "/* This is a comment without end symbol SELECT 1;\n" ->
+        Seq("/* This is a comment without end symbol SELECT 1;\n"),
+      "/* comment */ SELECT 1;" -> Seq("/* comment */ SELECT 1"),
+      "SELECT /* comment */  1;" -> Seq("SELECT /* comment */  1"),
+      "-- comment " -> Seq(),
+      "-- comment \nSELECT 1" -> Seq("-- comment \nSELECT 1"),
+      "/*  comment */  " -> Seq()
+    ).foreach { case (query, ret) =>
+      assert(cli.splitSemiColon(query).asScala === ret)
+    }
+    sessionState.close()
+    SparkSQLEnv.stop()
+  }
 }

From 54f91d391acd2995defc1b5666dc0bb95100a575 Mon Sep 17 00:00:00 2001
From: yaohua <yaohua.zhao@databricks.com>
Date: Tue, 18 Jan 2022 13:48:06 +0800
Subject: [PATCH 041/513] [SPARK-37768][SQL][FOLLOWUP] Schema pruning for the
 metadata struct

### What changes were proposed in this pull request?
Follow-up PR of #34575. Support the metadata struct schema pruning for all file formats.

### Why are the changes needed?
Performance improvements.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing UTs and a new UT.

Closes #35147 from Yaohua628/spark-37768.

Authored-by: yaohua <yaohua.zhao@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/expressions/SchemaPruning.scala  |  8 +-
 .../expressions/namedExpressions.scala        |  2 +-
 .../sql/catalyst/optimizer/objects.scala      |  2 +-
 .../expressions/SchemaPruningSuite.scala      |  4 +-
 .../datasources/FileSourceStrategy.scala      |  3 +-
 .../execution/datasources/SchemaPruning.scala | 93 ++++++++++---------
 .../datasources/v2/PushDownUtils.scala        |  2 +-
 .../datasources/FileMetadataStructSuite.scala | 48 ++++++++++
 8 files changed, 107 insertions(+), 55 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruning.scala
index 2a182b6424db2..fd5b2db61f31e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruning.scala
@@ -33,8 +33,8 @@ object SchemaPruning extends SQLConfHelper {
    *   1. The schema field ordering at original schema is still preserved in pruned schema.
    *   2. The top-level fields are not pruned here.
    */
-  def pruneDataSchema(
-      dataSchema: StructType,
+  def pruneSchema(
+      schema: StructType,
       requestedRootFields: Seq[RootField]): StructType = {
     val resolver = conf.resolver
     // Merge the requested root fields into a single schema. Note the ordering of the fields
@@ -44,10 +44,10 @@ object SchemaPruning extends SQLConfHelper {
       .map { root: RootField => StructType(Array(root.field)) }
       .reduceLeft(_ merge _)
     val mergedDataSchema =
-      StructType(dataSchema.map(d => mergedSchema.find(m => resolver(m.name, d.name)).getOrElse(d)))
+      StructType(schema.map(d => mergedSchema.find(m => resolver(m.name, d.name)).getOrElse(d)))
     // Sort the fields of mergedDataSchema according to their order in dataSchema,
     // recursively. This makes mergedDataSchema a pruned schema of dataSchema
-    sortLeftFieldsByRight(mergedDataSchema, dataSchema).asInstanceOf[StructType]
+    sortLeftFieldsByRight(mergedDataSchema, schema).asInstanceOf[StructType]
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index c51030fdd6405..a099fadcec365 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -342,7 +342,7 @@ case class AttributeReference(
     AttributeReference(name, dataType, nullable, newMetadata)(exprId, qualifier)
   }
 
-  override def withDataType(newType: DataType): Attribute = {
+  override def withDataType(newType: DataType): AttributeReference = {
     AttributeReference(name, newType, nullable, metadata)(exprId, qualifier)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
index 52544ff3e241d..c347a2e807ef2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
@@ -222,7 +222,7 @@ object ObjectSerializerPruning extends Rule[LogicalPlan] {
 
       if (conf.serializerNestedSchemaPruningEnabled && rootFields.nonEmpty) {
         // Prunes nested fields in serializers.
-        val prunedSchema = SchemaPruning.pruneDataSchema(
+        val prunedSchema = SchemaPruning.pruneSchema(
           StructType.fromAttributes(prunedSerializer.map(_.toAttribute)), rootFields)
         val nestedPrunedSerializer = prunedSerializer.zipWithIndex.map { case (serializer, idx) =>
           pruneSerializer(serializer, prunedSchema(idx).dataType)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruningSuite.scala
index c67a9622b61fd..b64bc49f95446 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruningSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruningSuite.scala
@@ -31,7 +31,7 @@ class SchemaPruningSuite extends SparkFunSuite with SQLHelper {
       // `derivedFromAtt` doesn't affect the result of pruned schema.
       SchemaPruning.RootField(field = f, derivedFromAtt = true)
     }
-    val prunedSchema = SchemaPruning.pruneDataSchema(schema, requestedRootFields)
+    val prunedSchema = SchemaPruning.pruneSchema(schema, requestedRootFields)
     assert(prunedSchema === expectedSchema)
   }
 
@@ -140,7 +140,7 @@ class SchemaPruningSuite extends SparkFunSuite with SQLHelper {
     assert(field.metadata.getString("foo") == "bar")
 
     val schema = StructType(Seq(field))
-    val prunedSchema = SchemaPruning.pruneDataSchema(schema, rootFields)
+    val prunedSchema = SchemaPruning.pruneSchema(schema, rootFields)
     assert(prunedSchema.head.metadata.getString("foo") == "bar")
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index c1282fa69ca80..5df8057ea92fe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -213,11 +213,10 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging {
       val outputSchema = readDataColumns.toStructType
       logInfo(s"Output Data Schema: ${outputSchema.simpleString(5)}")
 
-      val metadataStructOpt = requiredAttributes.collectFirst {
+      val metadataStructOpt = l.output.collectFirst {
         case MetadataAttribute(attr) => attr
       }
 
-      // TODO (yaohua): should be able to prune the metadata struct only containing what needed
       val metadataColumns = metadataStructOpt.map { metadataStruct =>
         metadataStruct.dataType.asInstanceOf[StructType].fields.map { field =>
           MetadataAttribute(field.name, field.dataType)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
index 93bd1acc7377d..9dd2f40972ad8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
@@ -31,58 +31,68 @@ import org.apache.spark.sql.util.SchemaUtils._
  * By "physical column", we mean a column as defined in the data source format like Parquet format
  * or ORC format. For example, in Spark SQL, a root-level Parquet column corresponds to a SQL
  * column, and a nested Parquet column corresponds to a [[StructField]].
+ *
+ * Also prunes the unnecessary metadata columns if any for all file formats.
  */
 object SchemaPruning extends Rule[LogicalPlan] {
   import org.apache.spark.sql.catalyst.expressions.SchemaPruning._
 
   override def apply(plan: LogicalPlan): LogicalPlan =
-    if (conf.nestedSchemaPruningEnabled) {
-      apply0(plan)
-    } else {
-      plan
-    }
-
-  private def apply0(plan: LogicalPlan): LogicalPlan =
     plan transformDown {
       case op @ PhysicalOperation(projects, filters,
-          l @ LogicalRelation(hadoopFsRelation: HadoopFsRelation, _, _, _))
-        if canPruneRelation(hadoopFsRelation) =>
-
-        prunePhysicalColumns(l.output, projects, filters, hadoopFsRelation.dataSchema,
-          prunedDataSchema => {
+      l @ LogicalRelation(hadoopFsRelation: HadoopFsRelation, _, _, _)) =>
+        prunePhysicalColumns(l, projects, filters, hadoopFsRelation,
+          (prunedDataSchema, prunedMetadataSchema) => {
             val prunedHadoopRelation =
               hadoopFsRelation.copy(dataSchema = prunedDataSchema)(hadoopFsRelation.sparkSession)
-            buildPrunedRelation(l, prunedHadoopRelation)
+            buildPrunedRelation(l, prunedHadoopRelation, prunedMetadataSchema)
           }).getOrElse(op)
     }
 
   /**
    * This method returns optional logical plan. `None` is returned if no nested field is required or
    * all nested fields are required.
+   *
+   * This method will prune both the data schema and the metadata schema
    */
   private def prunePhysicalColumns(
-      output: Seq[AttributeReference],
+      relation: LogicalRelation,
       projects: Seq[NamedExpression],
       filters: Seq[Expression],
-      dataSchema: StructType,
-      leafNodeBuilder: StructType => LeafNode): Option[LogicalPlan] = {
+      hadoopFsRelation: HadoopFsRelation,
+      leafNodeBuilder: (StructType, StructType) => LeafNode): Option[LogicalPlan] = {
+
     val (normalizedProjects, normalizedFilters) =
-      normalizeAttributeRefNames(output, projects, filters)
+      normalizeAttributeRefNames(relation.output, projects, filters)
     val requestedRootFields = identifyRootFields(normalizedProjects, normalizedFilters)
 
     // If requestedRootFields includes a nested field, continue. Otherwise,
     // return op
     if (requestedRootFields.exists { root: RootField => !root.derivedFromAtt }) {
-      val prunedDataSchema = pruneDataSchema(dataSchema, requestedRootFields)
 
-      // If the data schema is different from the pruned data schema, continue. Otherwise,
-      // return op. We effect this comparison by counting the number of "leaf" fields in
-      // each schemata, assuming the fields in prunedDataSchema are a subset of the fields
-      // in dataSchema.
-      if (countLeaves(dataSchema) > countLeaves(prunedDataSchema)) {
-        val prunedRelation = leafNodeBuilder(prunedDataSchema)
-        val projectionOverSchema = ProjectionOverSchema(prunedDataSchema)
+      val prunedDataSchema = if (canPruneDataSchema(hadoopFsRelation)) {
+        pruneSchema(hadoopFsRelation.dataSchema, requestedRootFields)
+      } else {
+        hadoopFsRelation.dataSchema
+      }
+
+      val metadataSchema =
+        relation.output.collect { case MetadataAttribute(attr) => attr }.toStructType
+      val prunedMetadataSchema = if (metadataSchema.nonEmpty) {
+        pruneSchema(metadataSchema, requestedRootFields)
+      } else {
+        metadataSchema
+      }
 
+      // If the data schema is different from the pruned data schema
+      // OR
+      // the metadata schema is different from the pruned metadata schema, continue.
+      // Otherwise, return None.
+      if (countLeaves(hadoopFsRelation.dataSchema) > countLeaves(prunedDataSchema) ||
+        countLeaves(metadataSchema) > countLeaves(prunedMetadataSchema)) {
+        val prunedRelation = leafNodeBuilder(prunedDataSchema, prunedMetadataSchema)
+        val projectionOverSchema =
+          ProjectionOverSchema(prunedDataSchema.merge(prunedMetadataSchema))
         Some(buildNewProjection(projects, normalizedProjects, normalizedFilters,
           prunedRelation, projectionOverSchema))
       } else {
@@ -96,9 +106,10 @@ object SchemaPruning extends Rule[LogicalPlan] {
   /**
    * Checks to see if the given relation can be pruned. Currently we support Parquet and ORC v1.
    */
-  private def canPruneRelation(fsRelation: HadoopFsRelation) =
-    fsRelation.fileFormat.isInstanceOf[ParquetFileFormat] ||
-      fsRelation.fileFormat.isInstanceOf[OrcFileFormat]
+  private def canPruneDataSchema(fsRelation: HadoopFsRelation): Boolean =
+    conf.nestedSchemaPruningEnabled && (
+      fsRelation.fileFormat.isInstanceOf[ParquetFileFormat] ||
+        fsRelation.fileFormat.isInstanceOf[OrcFileFormat])
 
   /**
    * Normalizes the names of the attribute references in the given projects and filters to reflect
@@ -162,29 +173,25 @@ object SchemaPruning extends Rule[LogicalPlan] {
    */
   private def buildPrunedRelation(
       outputRelation: LogicalRelation,
-      prunedBaseRelation: HadoopFsRelation) = {
-    val prunedOutput = getPrunedOutput(outputRelation.output, prunedBaseRelation.schema)
-    // also add the metadata output if any
-    // TODO: should be able to prune the metadata schema
-    val metaOutput = outputRelation.output.collect {
-      case MetadataAttribute(attr) => attr
-    }
-    outputRelation.copy(relation = prunedBaseRelation, output = prunedOutput ++ metaOutput)
+      prunedBaseRelation: HadoopFsRelation,
+      prunedMetadataSchema: StructType) = {
+    val finalSchema = prunedBaseRelation.schema.merge(prunedMetadataSchema)
+    val prunedOutput = getPrunedOutput(outputRelation.output, finalSchema)
+    outputRelation.copy(relation = prunedBaseRelation, output = prunedOutput)
   }
 
   // Prune the given output to make it consistent with `requiredSchema`.
   private def getPrunedOutput(
       output: Seq[AttributeReference],
       requiredSchema: StructType): Seq[AttributeReference] = {
-    // We need to replace the expression ids of the pruned relation output attributes
-    // with the expression ids of the original relation output attributes so that
-    // references to the original relation's output are not broken
-    val outputIdMap = output.map(att => (att.name, att.exprId)).toMap
+    // We need to update the data type of the output attributes to use the pruned ones.
+    // so that references to the original relation's output are not broken
+    val nameAttributeMap = output.map(att => (att.name, att)).toMap
     requiredSchema
       .toAttributes
       .map {
-        case att if outputIdMap.contains(att.name) =>
-          att.withExprId(outputIdMap(att.name))
+        case att if nameAttributeMap.contains(att.name) =>
+          nameAttributeMap(att.name).withDataType(att.dataType)
         case att => att
       }
   }
@@ -203,6 +210,4 @@ object SchemaPruning extends Rule[LogicalPlan] {
       case _ => 1
     }
   }
-
-
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
index db7b3dc7248f3..29d86b67b28ec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
@@ -187,7 +187,7 @@ object PushDownUtils extends PredicateHelper {
       case r: SupportsPushDownRequiredColumns if SQLConf.get.nestedSchemaPruningEnabled =>
         val rootFields = SchemaPruning.identifyRootFields(projects, filters)
         val prunedSchema = if (rootFields.nonEmpty) {
-          SchemaPruning.pruneDataSchema(relation.schema, rootFields)
+          SchemaPruning.pruneSchema(relation.schema, rootFields)
         } else {
           new StructType()
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
index fffac885da5fc..8bf5d6183925c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
@@ -22,6 +22,7 @@ import java.sql.Timestamp
 import java.text.SimpleDateFormat
 
 import org.apache.spark.sql.{AnalysisException, Column, DataFrame, QueryTest, Row}
+import org.apache.spark.sql.execution.FileSourceScanExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -384,4 +385,51 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
       }
     }
   }
+
+  metadataColumnsTest("prune metadata schema in projects", schema) { (df, f0, f1) =>
+    val prunedDF = df.select("name", "age", "info.id", METADATA_FILE_NAME)
+    val fileSourceScanMetaCols = prunedDF.queryExecution.sparkPlan.collectFirst {
+      case p: FileSourceScanExec => p.metadataColumns
+    }.get
+    assert(fileSourceScanMetaCols.size == 1)
+    assert(fileSourceScanMetaCols.head.name == "file_name")
+
+    checkAnswer(
+      prunedDF,
+      Seq(Row("jack", 24, 12345L, f0(METADATA_FILE_NAME)),
+        Row("lily", 31, 54321L, f1(METADATA_FILE_NAME)))
+    )
+  }
+
+  metadataColumnsTest("prune metadata schema in filters", schema) { (df, f0, f1) =>
+    val prunedDF = df.select("name", "age", "info.id")
+      .where(col(METADATA_FILE_PATH).contains("data/f0"))
+
+    val fileSourceScanMetaCols = prunedDF.queryExecution.sparkPlan.collectFirst {
+      case p: FileSourceScanExec => p.metadataColumns
+    }.get
+    assert(fileSourceScanMetaCols.size == 1)
+    assert(fileSourceScanMetaCols.head.name == "file_path")
+
+    checkAnswer(
+      prunedDF,
+      Seq(Row("jack", 24, 12345L))
+    )
+  }
+
+  metadataColumnsTest("prune metadata schema in projects and filters", schema) { (df, f0, f1) =>
+    val prunedDF = df.select("name", "age", "info.id", METADATA_FILE_SIZE)
+      .where(col(METADATA_FILE_PATH).contains("data/f0"))
+
+    val fileSourceScanMetaCols = prunedDF.queryExecution.sparkPlan.collectFirst {
+      case p: FileSourceScanExec => p.metadataColumns
+    }.get
+    assert(fileSourceScanMetaCols.size == 2)
+    assert(fileSourceScanMetaCols.map(_.name).toSet == Set("file_size", "file_path"))
+
+    checkAnswer(
+      prunedDF,
+      Seq(Row("jack", 24, 12345L, f0(METADATA_FILE_SIZE)))
+    )
+  }
 }

From 1f496fbea688c7082bad7e6280c8a949fbfd31b7 Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Tue, 18 Jan 2022 16:22:03 +0800
Subject: [PATCH 042/513] [SPARK-37949][SQL] Improve Rebalance statistics
 estimation

### What changes were proposed in this pull request?

Match `RebalancePartitions` in `SizeInBytesOnlyStatsPlanVisitor` and `BasicStatsPlanVisitor`.

### Why are the changes needed?

The defualt statistics estimation only consider the size in bytes, which may lost the row rount and columns statistics.

The `RebalancePartitions` actually does not change the statistics of plan, so we can use the statistics of its child for more accurate.

### Does this PR introduce _any_ user-facing change?

no, only affect the statistics of plan

### How was this patch tested?

Unify the test in `BasicStatsEstimationSuite`

Closes #35235 from ulysses-you/SPARK-37949.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/plans/logical/LogicalPlanVisitor.scala   | 3 +++
 .../logical/statsEstimation/BasicStatsPlanVisitor.scala   | 2 ++
 .../statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala | 2 ++
 .../statsEstimation/BasicStatsEstimationSuite.scala       | 8 ++++++--
 4 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
index ba927746bbf6a..fd5f9051719dc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
@@ -37,6 +37,7 @@ trait LogicalPlanVisitor[T] {
     case p: Project => visitProject(p)
     case p: Repartition => visitRepartition(p)
     case p: RepartitionByExpression => visitRepartitionByExpr(p)
+    case p: RebalancePartitions => visitRebalancePartitions(p)
     case p: Sample => visitSample(p)
     case p: ScriptTransformation => visitScriptTransform(p)
     case p: Union => visitUnion(p)
@@ -77,6 +78,8 @@ trait LogicalPlanVisitor[T] {
 
   def visitRepartitionByExpr(p: RepartitionByExpression): T
 
+  def visitRebalancePartitions(p: RebalancePartitions): T
+
   def visitSample(p: Sample): T
 
   def visitScriptTransform(p: ScriptTransformation): T
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
index 3f702724cca53..0f09022fb9c2f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
@@ -88,6 +88,8 @@ object BasicStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
 
   override def visitRepartitionByExpr(p: RepartitionByExpression): Statistics = fallback(p)
 
+  override def visitRebalancePartitions(p: RebalancePartitions): Statistics = fallback(p)
+
   override def visitSample(p: Sample): Statistics = fallback(p)
 
   override def visitScriptTransform(p: ScriptTransformation): Statistics = default(p)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala
index 73c1b9445f693..67a045fe5ec1a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala
@@ -132,6 +132,8 @@ object SizeInBytesOnlyStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
 
   override def visitRepartitionByExpr(p: RepartitionByExpression): Statistics = p.child.stats
 
+  override def visitRebalancePartitions(p: RebalancePartitions): Statistics = p.child.stats
+
   override def visitSample(p: Sample): Statistics = {
     val ratio = p.upperBound - p.lowerBound
     var sizeInBytes = EstimationUtils.ceil(BigDecimal(p.child.stats.sizeInBytes) * ratio)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
index 31e289e052586..bc61a76ecfc22 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
@@ -259,12 +259,16 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
       expectedStatsCboOff = Statistics.DUMMY)
   }
 
-  test("SPARK-35203: Improve Repartition statistics estimation") {
+  test("Improve Repartition statistics estimation") {
+    // SPARK-35203 for repartition and repartitionByExpr
+    // SPARK-37949 for rebalance
     Seq(
       RepartitionByExpression(plan.output, plan, 10),
       RepartitionByExpression(Nil, plan, None),
       plan.repartition(2),
-      plan.coalesce(3)).foreach { rep =>
+      plan.coalesce(3),
+      plan.rebalance(),
+      plan.rebalance(plan.output: _*)).foreach { rep =>
       val expectedStats = Statistics(plan.size.get, Some(plan.rowCount), plan.attributeStats)
       checkStats(
         rep,

From eab2331ea2db10851492d44c35ab369c730527e1 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 19 Jan 2022 08:34:13 +0900
Subject: [PATCH 043/513] [SPARK-37850][PYTHON][INFRA] Enable flake's E731 rule
 in PySpark

### What changes were proposed in this pull request?

This PR enables flake's [E731](https://www.flake8rules.com/rules/E731.html) rule in PySpark codebase to comply PEP 8 (https://www.python.org/dev/peps/pep-0008/#programming-recommendations)

### Why are the changes needed?

To comply PEP 8.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

Existing test cases should cover.

Closes #35150 from HyukjinKwon/SPARK-37850.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 dev/tox.ini                                   |   3 -
 python/pyspark/pandas/accessors.py            |  38 +++++--
 python/pyspark/pandas/frame.py                |  39 +++++--
 python/pyspark/pandas/groupby.py              |  18 ++-
 python/pyspark/pandas/missing/common.py       |  99 ++++++++++-------
 python/pyspark/pandas/namespace.py            |  15 +--
 python/pyspark/pandas/series.py               |   7 +-
 python/pyspark/pandas/tests/test_groupby.py   | 104 ++++++++++++++----
 .../tests/test_ops_on_diff_frames_groupby.py  |  28 ++++-
 .../pandas/tests/test_series_string.py        |   5 +-
 python/pyspark/pandas/utils.py                |   4 +-
 python/pyspark/rdd.py                         |  10 +-
 python/pyspark/sql/session.py                 |   4 +-
 .../sql/tests/test_pandas_udf_scalar.py       |  14 ++-
 python/pyspark/sql/tests/test_udf.py          |   3 +-
 python/pyspark/sql/types.py                   |  37 +++++--
 python/pyspark/streaming/dstream.py           |  25 ++++-
 python/pyspark/tests/test_rdd.py              |   4 +-
 python/pyspark/tests/test_serializers.py      |   5 +-
 python/pyspark/worker.py                      |   3 +-
 20 files changed, 327 insertions(+), 138 deletions(-)

diff --git a/dev/tox.ini b/dev/tox.ini
index fbe44573a96b1..464b9b959fa14 100644
--- a/dev/tox.ini
+++ b/dev/tox.ini
@@ -24,9 +24,6 @@ ignore =
     # There are too many instances to fix. Ignored for now.
     W503,
     W504,
-
-    # Below rules should be enabled in the future.
-    E731,
 per-file-ignores =
     # E501 is ignored as shared.py is auto-generated.
     python/pyspark/ml/param/shared.py: E501,
diff --git a/python/pyspark/pandas/accessors.py b/python/pyspark/pandas/accessors.py
index 22042491eb072..674fa436af301 100644
--- a/python/pyspark/pandas/accessors.py
+++ b/python/pyspark/pandas/accessors.py
@@ -335,14 +335,19 @@ def apply_batch(
         if not isinstance(func, FunctionType):
             assert callable(func), "the first argument should be a callable function."
             f = func
-            func = lambda *args, **kwargs: f(*args, **kwargs)
+            # Note that the return type hint specified here affects actual return
+            # type in Spark (e.g., infer_return_type). And, MyPy does not allow
+            # redefinition of a function.
+            func = lambda *args, **kwargs: f(*args, **kwargs)  # noqa: E731
 
         spec = inspect.getfullargspec(func)
         return_sig = spec.annotations.get("return", None)
         should_infer_schema = return_sig is None
 
         original_func = func
-        func = lambda o: original_func(o, *args, **kwds)
+
+        def new_func(o: Any) -> pd.DataFrame:
+            return original_func(o, *args, **kwds)
 
         self_applied: DataFrame = DataFrame(self._psdf._internal.resolved_copy)
 
@@ -355,7 +360,7 @@ def apply_batch(
             )
             limit = ps.get_option("compute.shortcut_limit")
             pdf = self_applied.head(limit + 1)._to_internal_pandas()
-            applied = func(pdf)
+            applied = new_func(pdf)
             if not isinstance(applied, pd.DataFrame):
                 raise ValueError(
                     "The given function should return a frame; however, "
@@ -371,7 +376,7 @@ def apply_batch(
             return_schema = StructType([field.struct_field for field in index_fields + data_fields])
 
             output_func = GroupBy._make_pandas_df_builder_func(
-                self_applied, func, return_schema, retain_index=True
+                self_applied, new_func, return_schema, retain_index=True
             )
             sdf = self_applied._internal.spark_frame.mapInPandas(
                 lambda iterator: map(output_func, iterator), schema=return_schema
@@ -394,7 +399,7 @@ def apply_batch(
             return_schema = cast(DataFrameType, return_type).spark_type
 
             output_func = GroupBy._make_pandas_df_builder_func(
-                self_applied, func, return_schema, retain_index=should_retain_index
+                self_applied, new_func, return_schema, retain_index=should_retain_index
             )
             sdf = self_applied._internal.to_internal_spark_frame.mapInPandas(
                 lambda iterator: map(output_func, iterator), schema=return_schema
@@ -570,10 +575,12 @@ def transform_batch(
         should_infer_schema = return_sig is None
         should_retain_index = should_infer_schema
         original_func = func
-        func = lambda o: original_func(o, *args, **kwargs)
+
+        def new_func(o: Any) -> Union[pd.DataFrame, pd.Series]:
+            return original_func(o, *args, **kwargs)
 
         def apply_func(pdf: pd.DataFrame) -> pd.DataFrame:
-            return func(pdf).to_frame()
+            return new_func(pdf).to_frame()
 
         def pandas_series_func(
             f: Callable[[pd.DataFrame], pd.DataFrame], return_type: DataType
@@ -595,7 +602,7 @@ def udf(pdf: pd.DataFrame) -> pd.Series:
             )
             limit = ps.get_option("compute.shortcut_limit")
             pdf = self._psdf.head(limit + 1)._to_internal_pandas()
-            transformed = func(pdf)
+            transformed = new_func(pdf)
             if not isinstance(transformed, (pd.DataFrame, pd.Series)):
                 raise ValueError(
                     "The given function should return a frame; however, "
@@ -644,7 +651,10 @@ def udf(pdf: pd.DataFrame) -> pd.Series:
                 self_applied: DataFrame = DataFrame(self._psdf._internal.resolved_copy)
 
                 output_func = GroupBy._make_pandas_df_builder_func(
-                    self_applied, func, return_schema, retain_index=True  # type: ignore[arg-type]
+                    self_applied,
+                    new_func,  # type: ignore[arg-type]
+                    return_schema,
+                    retain_index=True,
                 )
                 columns = self_applied._internal.spark_columns
 
@@ -709,7 +719,10 @@ def udf(pdf: pd.DataFrame) -> pd.Series:
                 self_applied = DataFrame(self._psdf._internal.resolved_copy)
 
                 output_func = GroupBy._make_pandas_df_builder_func(
-                    self_applied, func, return_schema, should_retain_index  # type: ignore[arg-type]
+                    self_applied,
+                    new_func,  # type: ignore[arg-type]
+                    return_schema,
+                    retain_index=should_retain_index,
                 )
                 columns = self_applied._internal.spark_columns
 
@@ -892,7 +905,10 @@ def _transform_batch(
 
         if not isinstance(func, FunctionType):
             f = func
-            func = lambda *args, **kwargs: f(*args, **kwargs)
+            # Note that the return type hint specified here affects actual return
+            # type in Spark (e.g., infer_return_type). And, MyPy does not allow
+            # redefinition of a function.
+            func = lambda *args, **kwargs: f(*args, **kwargs)  # noqa: E731
 
         if return_type is None:
             # TODO: In this case, it avoids the shortcut for now (but only infers schema)
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index d400a3701b2f2..d4803eb60261b 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -2438,7 +2438,10 @@ def apply(
         if not isinstance(func, types.FunctionType):
             assert callable(func), "the first argument should be a callable function."
             f = func
-            func = lambda *args, **kwargs: f(*args, **kwargs)
+            # Note that the return type hint specified here affects actual return
+            # type in Spark (e.g., infer_return_type). And, MyPy does not allow
+            # redefinition of a function.
+            func = lambda *args, **kwargs: f(*args, **kwargs)  # noqa: E731
 
         axis = validate_axis(axis)
         should_return_series = False
@@ -2691,7 +2694,10 @@ def transform(
         if not isinstance(func, types.FunctionType):
             assert callable(func), "the first argument should be a callable function."
             f = func
-            func = lambda *args, **kwargs: f(*args, **kwargs)
+            # Note that the return type hint specified here affects actual return
+            # type in Spark (e.g., infer_return_type). And, MyPy does not allow
+            # redefinition of a function.
+            func = lambda *args, **kwargs: f(*args, **kwargs)  # noqa: E731
 
         axis = validate_axis(axis)
         if axis != 0:
@@ -5468,9 +5474,15 @@ def op(psser: ps.Series) -> ps.Series:
                         return psser
 
             else:
-                op = lambda psser: psser._fillna(value=value, method=method, axis=axis, limit=limit)
+
+                def op(psser: ps.Series) -> ps.Series:
+                    return psser._fillna(value=value, method=method, axis=axis, limit=limit)
+
         elif method is not None:
-            op = lambda psser: psser._fillna(value=value, method=method, axis=axis, limit=limit)
+
+            def op(psser: ps.Series) -> ps.Series:
+                return psser._fillna(value=value, method=method, axis=axis, limit=limit)
+
         else:
             raise ValueError("Must specify a fillna 'value' or 'method' parameter.")
 
@@ -5605,7 +5617,9 @@ def op(psser: ps.Series) -> ps.Series:
                     return psser
 
         else:
-            op = lambda psser: psser.replace(to_replace=to_replace, value=value, regex=regex)
+
+            def op(psser: ps.Series) -> ps.Series:
+                return psser.replace(to_replace=to_replace, value=value, regex=regex)
 
         psdf = self._apply_series_op(op)
         if inplace:
@@ -7700,7 +7714,9 @@ def to_list(os: Optional[Union[Name, List[Name]]]) -> List[Label]:
         how = validate_how(how)
 
         def resolve(internal: InternalFrame, side: str) -> InternalFrame:
-            rename = lambda col: "__{}_{}".format(side, col)
+            def rename(col: str) -> str:
+                return "__{}_{}".format(side, col)
+
             internal = internal.resolved_copy
             sdf = internal.spark_frame
             sdf = sdf.select(
@@ -7752,12 +7768,11 @@ def resolve(internal: InternalFrame, side: str) -> InternalFrame:
         data_columns = []
         column_labels = []
 
-        left_scol_for = lambda label: scol_for(
-            left_table, left_internal.spark_column_name_for(label)
-        )
-        right_scol_for = lambda label: scol_for(
-            right_table, right_internal.spark_column_name_for(label)
-        )
+        def left_scol_for(label: Label) -> Column:
+            return scol_for(left_table, left_internal.spark_column_name_for(label))
+
+        def right_scol_for(label: Label) -> Column:
+            return scol_for(right_table, right_internal.spark_column_name_for(label))
 
         for label in left_internal.column_labels:
             col = left_internal.spark_column_name_for(label)
diff --git a/python/pyspark/pandas/groupby.py b/python/pyspark/pandas/groupby.py
index b9b65208910f7..6d6f50c0a0b9d 100644
--- a/python/pyspark/pandas/groupby.py
+++ b/python/pyspark/pandas/groupby.py
@@ -2356,12 +2356,16 @@ def nunique(self, dropna: bool = True) -> FrameLike:
         Name: value1, dtype: int64
         """
         if dropna:
-            stat_function = lambda col: F.countDistinct(col)
+
+            def stat_function(col: Column) -> Column:
+                return F.countDistinct(col)
+
         else:
-            stat_function = lambda col: (
-                F.countDistinct(col)
-                + F.when(F.count(F.when(col.isNull(), 1).otherwise(None)) >= 1, 1).otherwise(0)
-            )
+
+            def stat_function(col: Column) -> Column:
+                return F.countDistinct(col) + F.when(
+                    F.count(F.when(col.isNull(), 1).otherwise(None)) >= 1, 1
+                ).otherwise(0)
 
         return self._reduce_for_stat_function(stat_function, only_numeric=False)
 
@@ -2563,7 +2567,9 @@ def median(self, numeric_only: bool = True, accuracy: int = 10000) -> FrameLike:
                 "accuracy must be an integer; however, got [%s]" % type(accuracy).__name__
             )
 
-        stat_function = lambda col: F.percentile_approx(col, 0.5, accuracy)
+        def stat_function(col: Column) -> Column:
+            return F.percentile_approx(col, 0.5, accuracy)
+
         return self._reduce_for_stat_function(stat_function, only_numeric=numeric_only)
 
     def _reduce_for_stat_function(
diff --git a/python/pyspark/pandas/missing/common.py b/python/pyspark/pandas/missing/common.py
index 1ebf28bb0bbf5..e6530a00bad14 100644
--- a/python/pyspark/pandas/missing/common.py
+++ b/python/pyspark/pandas/missing/common.py
@@ -16,44 +16,61 @@
 #
 
 
-memory_usage = lambda f: f(
-    "memory_usage",
-    reason="Unlike pandas, most DataFrames are not materialized in memory in Spark "
-    "(and pandas-on-Spark), and as a result memory_usage() does not do what you intend it "
-    "to do. Use Spark's web UI to monitor disk and memory usage of your application.",
-)
-
-array = lambda f: f(
-    "array", reason="If you want to collect your data as an NumPy array, use 'to_numpy()' instead."
-)
-
-to_pickle = lambda f: f(
-    "to_pickle",
-    reason="For storage, we encourage you to use Delta or Parquet, instead of Python pickle "
-    "format.",
-)
-
-to_xarray = lambda f: f(
-    "to_xarray",
-    reason="If you want to collect your data as an NumPy array, use 'to_numpy()' instead.",
-)
-
-to_list = lambda f: f(
-    "to_list",
-    reason="If you want to collect your data as an NumPy array, use 'to_numpy()' instead.",
-)
-
-tolist = lambda f: f(
-    "tolist", reason="If you want to collect your data as an NumPy array, use 'to_numpy()' instead."
-)
-
-__iter__ = lambda f: f(
-    "__iter__",
-    reason="If you want to collect your data as an NumPy array, use 'to_numpy()' instead.",
-)
-
-duplicated = lambda f: f(
-    "duplicated",
-    reason="'duplicated' API returns np.ndarray and the data size is too large."
-    "You can just use DataFrame.deduplicated instead",
-)
+def memory_usage(f):
+    return f(
+        "memory_usage",
+        reason="Unlike pandas, most DataFrames are not materialized in memory in Spark "
+        "(and pandas-on-Spark), and as a result memory_usage() does not do what you intend it "
+        "to do. Use Spark's web UI to monitor disk and memory usage of your application.",
+    )
+
+
+def array(f):
+    return f(
+        "array",
+        reason="If you want to collect your data as an NumPy array, use 'to_numpy()' instead.",
+    )
+
+
+def to_pickle(f):
+    return f(
+        "to_pickle",
+        reason="For storage, we encourage you to use Delta or Parquet, instead of Python pickle "
+        "format.",
+    )
+
+
+def to_xarray(f):
+    return f(
+        "to_xarray",
+        reason="If you want to collect your data as an NumPy array, use 'to_numpy()' instead.",
+    )
+
+
+def to_list(f):
+    return f(
+        "to_list",
+        reason="If you want to collect your data as an NumPy array, use 'to_numpy()' instead.",
+    )
+
+
+def tolist(f):
+    return f(
+        "tolist",
+        reason="If you want to collect your data as an NumPy array, use 'to_numpy()' instead.",
+    )
+
+
+def __iter__(f):
+    return f(
+        "__iter__",
+        reason="If you want to collect your data as an NumPy array, use 'to_numpy()' instead.",
+    )
+
+
+def duplicated(f):
+    return f(
+        "duplicated",
+        reason="'duplicated' API returns np.ndarray and the data size is too large."
+        "You can just use DataFrame.deduplicated instead",
+    )
diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py
index 5cf639a947de7..340e270ace551 100644
--- a/python/pyspark/pandas/namespace.py
+++ b/python/pyspark/pandas/namespace.py
@@ -3364,7 +3364,9 @@ def to_list(os: Optional[Union[Name, List[Name]]]) -> List[Label]:
     right_as_of_name = right_as_of_names[0]
 
     def resolve(internal: InternalFrame, side: str) -> InternalFrame:
-        rename = lambda col: "__{}_{}".format(side, col)
+        def rename(col: str) -> str:
+            return "__{}_{}".format(side, col)
+
         internal = internal.resolved_copy
         sdf = internal.spark_frame
         sdf = sdf.select(
@@ -3431,12 +3433,11 @@ def resolve(internal: InternalFrame, side: str) -> InternalFrame:
     data_columns = []
     column_labels = []
 
-    left_scol_for = lambda label: scol_for(
-        as_of_joined_table, left_internal.spark_column_name_for(label)
-    )
-    right_scol_for = lambda label: scol_for(
-        as_of_joined_table, right_internal.spark_column_name_for(label)
-    )
+    def left_scol_for(label: Label) -> Column:
+        return scol_for(as_of_joined_table, left_internal.spark_column_name_for(label))
+
+    def right_scol_for(label: Label) -> Column:
+        return scol_for(as_of_joined_table, right_internal.spark_column_name_for(label))
 
     for label in left_internal.column_labels:
         col = left_internal.spark_column_name_for(label)
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index d403d871d3f47..e2df500ca3dee 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -3210,7 +3210,8 @@ def apply(self, func: Callable, args: Sequence[Any] = (), **kwds: Any) -> "Serie
             # Falls back to schema inference if it fails to get signature.
             should_infer_schema = True
 
-        apply_each = lambda s: s.apply(func, args=args, **kwds)
+        def apply_each(s: Any) -> pd.Series:
+            return s.apply(func, args=args, **kwds)
 
         if should_infer_schema:
             return self.pandas_on_spark._transform_batch(apply_each, None)
@@ -3611,9 +3612,9 @@ def _rank(
             raise NotImplementedError("rank do not support MultiIndex now")
 
         if ascending:
-            asc_func = lambda scol: scol.asc()
+            asc_func = Column.asc
         else:
-            asc_func = lambda scol: scol.desc()
+            asc_func = Column.desc
 
         if method == "first":
             window = (
diff --git a/python/pyspark/pandas/tests/test_groupby.py b/python/pyspark/pandas/tests/test_groupby.py
index ec6d761dddd42..661526b160050 100644
--- a/python/pyspark/pandas/tests/test_groupby.py
+++ b/python/pyspark/pandas/tests/test_groupby.py
@@ -49,9 +49,15 @@ def test_groupby_simple(self):
 
         for as_index in [True, False]:
             if as_index:
-                sort = lambda df: df.sort_index()
+
+                def sort(df):
+                    return df.sort_index()
+
             else:
-                sort = lambda df: df.sort_values("a").reset_index(drop=True)
+
+                def sort(df):
+                    return df.sort_values("a").reset_index(drop=True)
+
             self.assert_eq(
                 sort(psdf.groupby("a", as_index=as_index).sum()),
                 sort(pdf.groupby("a", as_index=as_index).sum()),
@@ -156,9 +162,15 @@ def test_groupby_simple(self):
 
         for as_index in [True, False]:
             if as_index:
-                sort = lambda df: df.sort_index()
+
+                def sort(df):
+                    return df.sort_index()
+
             else:
-                sort = lambda df: df.sort_values(10).reset_index(drop=True)
+
+                def sort(df):
+                    return df.sort_values(10).reset_index(drop=True)
+
             self.assert_eq(
                 sort(psdf.groupby(10, as_index=as_index).sum()),
                 sort(pdf.groupby(10, as_index=as_index).sum()),
@@ -244,9 +256,14 @@ def test_split_apply_combine_on_series(self):
 
         for as_index in [True, False]:
             if as_index:
-                sort = lambda df: df.sort_index()
+
+                def sort(df):
+                    return df.sort_index()
+
             else:
-                sort = lambda df: df.sort_values(list(df.columns)).reset_index(drop=True)
+
+                def sort(df):
+                    return df.sort_values(list(df.columns)).reset_index(drop=True)
 
             for check_exact, almost, func in funcs:
                 for kkey, pkey in [("b", "b"), (psdf.b, pdf.b)]:
@@ -351,9 +368,14 @@ def test_aggregate(self):
 
         for as_index in [True, False]:
             if as_index:
-                sort = lambda df: df.sort_index()
+
+                def sort(df):
+                    return df.sort_index()
+
             else:
-                sort = lambda df: df.sort_values(list(df.columns)).reset_index(drop=True)
+
+                def sort(df):
+                    return df.sort_values(list(df.columns)).reset_index(drop=True)
 
             for kkey, pkey in [("A", "A"), (psdf.A, pdf.A)]:
                 with self.subTest(as_index=as_index, key=pkey):
@@ -564,9 +586,14 @@ def test_dropna(self):
             for dropna in [True, False]:
                 for as_index in [True, False]:
                     if as_index:
-                        sort = lambda df: df.sort_index()
+
+                        def sort(df):
+                            return df.sort_index()
+
                     else:
-                        sort = lambda df: df.sort_values("A").reset_index(drop=True)
+
+                        def sort(df):
+                            return df.sort_values("A").reset_index(drop=True)
 
                     self.assert_eq(
                         sort(psdf.groupby("A", as_index=as_index, dropna=dropna).std()),
@@ -598,9 +625,14 @@ def test_dropna(self):
             for dropna in [True, False]:
                 for as_index in [True, False]:
                     if as_index:
-                        sort = lambda df: df.sort_index()
+
+                        def sort(df):
+                            return df.sort_index()
+
                     else:
-                        sort = lambda df: df.sort_values(["A", "B"]).reset_index(drop=True)
+
+                        def sort(df):
+                            return df.sort_values(["A", "B"]).reset_index(drop=True)
 
                     self.assert_eq(
                         sort(
@@ -624,9 +656,15 @@ def test_dropna(self):
             for dropna in [True, False]:
                 for as_index in [True, False]:
                     if as_index:
-                        sort = lambda df: df.sort_index()
+
+                        def sort(df):
+                            return df.sort_index()
+
                     else:
-                        sort = lambda df: df.sort_values(("X", "A")).reset_index(drop=True)
+
+                        def sort(df):
+                            return df.sort_values(("X", "A")).reset_index(drop=True)
+
                     sorted_stats_psdf = sort(
                         psdf.groupby(("X", "A"), as_index=as_index, dropna=dropna).agg(
                             {("X", "B"): "min", ("Y", "C"): "std"}
@@ -642,9 +680,14 @@ def test_dropna(self):
             # Testing dropna=True (pandas default behavior)
             for as_index in [True, False]:
                 if as_index:
-                    sort = lambda df: df.sort_index()
+
+                    def sort(df):
+                        return df.sort_index()
+
                 else:
-                    sort = lambda df: df.sort_values("A").reset_index(drop=True)
+
+                    def sort(df):
+                        return df.sort_values("A").reset_index(drop=True)
 
                 self.assert_eq(
                     sort(psdf.groupby("A", as_index=as_index, dropna=True)["B"].min()),
@@ -652,9 +695,14 @@ def test_dropna(self):
                 )
 
                 if as_index:
-                    sort = lambda df: df.sort_index()
+
+                    def sort(df):
+                        return df.sort_index()
+
                 else:
-                    sort = lambda df: df.sort_values(["A", "B"]).reset_index(drop=True)
+
+                    def sort(df):
+                        return df.sort_values(["A", "B"]).reset_index(drop=True)
 
                 self.assert_eq(
                     sort(
@@ -847,9 +895,15 @@ def test_all_any(self):
 
         for as_index in [True, False]:
             if as_index:
-                sort = lambda df: df.sort_index()
+
+                def sort(df):
+                    return df.sort_index()
+
             else:
-                sort = lambda df: df.sort_values("A").reset_index(drop=True)
+
+                def sort(df):
+                    return df.sort_values("A").reset_index(drop=True)
+
             self.assert_eq(
                 sort(psdf.groupby("A", as_index=as_index).all()),
                 sort(pdf.groupby("A", as_index=as_index).all()),
@@ -882,9 +936,15 @@ def test_all_any(self):
 
         for as_index in [True, False]:
             if as_index:
-                sort = lambda df: df.sort_index()
+
+                def sort(df):
+                    return df.sort_index()
+
             else:
-                sort = lambda df: df.sort_values(("X", "A")).reset_index(drop=True)
+
+                def sort(df):
+                    return df.sort_values(("X", "A")).reset_index(drop=True)
+
             self.assert_eq(
                 sort(psdf.groupby(("X", "A"), as_index=as_index).all()),
                 sort(pdf.groupby(("X", "A"), as_index=as_index).all()),
diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py
index 3e8bcff8579f9..69621e49301f6 100644
--- a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py
+++ b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py
@@ -54,9 +54,15 @@ def test_groupby_different_lengths(self):
 
             for as_index in [True, False]:
                 if as_index:
-                    sort = lambda df: df.sort_index()
+
+                    def sort(df):
+                        return df.sort_index()
+
                 else:
-                    sort = lambda df: df.sort_values("c").reset_index(drop=True)
+
+                    def sort(df):
+                        return df.sort_values("c").reset_index(drop=True)
+
                 self.assert_eq(
                     sort(psdf1.groupby(psdf2.a, as_index=as_index).sum()),
                     sort(pdf1.groupby(pdf2.a, as_index=as_index).sum()),
@@ -112,9 +118,14 @@ def test_split_apply_combine_on_series(self):
 
         for as_index in [True, False]:
             if as_index:
-                sort = lambda df: df.sort_index()
+
+                def sort(df):
+                    return df.sort_index()
+
             else:
-                sort = lambda df: df.sort_values(list(df.columns)).reset_index(drop=True)
+
+                def sort(df):
+                    return df.sort_values(list(df.columns)).reset_index(drop=True)
 
             with self.subTest(as_index=as_index):
                 self.assert_eq(
@@ -164,9 +175,14 @@ def test_aggregate(self):
 
         for as_index in [True, False]:
             if as_index:
-                sort = lambda df: df.sort_index()
+
+                def sort(df):
+                    return df.sort_index()
+
             else:
-                sort = lambda df: df.sort_values(list(df.columns)).reset_index(drop=True)
+
+                def sort(df):
+                    return df.sort_values(list(df.columns)).reset_index(drop=True)
 
             with self.subTest(as_index=as_index):
                 self.assert_eq(
diff --git a/python/pyspark/pandas/tests/test_series_string.py b/python/pyspark/pandas/tests/test_series_string.py
index 832cc0bbfeb46..0b778583e735a 100644
--- a/python/pyspark/pandas/tests/test_series_string.py
+++ b/python/pyspark/pandas/tests/test_series_string.py
@@ -248,8 +248,11 @@ def test_string_replace(self):
         self.check_func(lambda x: x.str.replace("a.", "xx", regex=True))
         self.check_func(lambda x: x.str.replace("a.", "xx", regex=False))
         self.check_func(lambda x: x.str.replace("ing", "0", flags=re.IGNORECASE))
+
         # reverse every lowercase word
-        repl = lambda m: m.group(0)[::-1]
+        def repl(m):
+            return m.group(0)[::-1]
+
         self.check_func(lambda x: x.str.replace(r"[a-z]+", repl))
         # compiled regex with flags
         regex_pat = re.compile(r"WHITESPACE", flags=re.IGNORECASE)
diff --git a/python/pyspark/pandas/utils.py b/python/pyspark/pandas/utils.py
index 43c203f6de5d6..cd79acc22a225 100644
--- a/python/pyspark/pandas/utils.py
+++ b/python/pyspark/pandas/utils.py
@@ -149,7 +149,9 @@ def combine_frames(
     if get_option("compute.ops_on_diff_frames"):
 
         def resolve(internal: InternalFrame, side: str) -> InternalFrame:
-            rename = lambda col: "__{}_{}".format(side, col)
+            def rename(col: str) -> str:
+                return "__{}_{}".format(side, col)
+
             internal = internal.resolved_copy
             sdf = internal.spark_frame
             sdf = internal.spark_frame.select(
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 27b6665ecf1ce..97b87ea87e834 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -2362,7 +2362,10 @@ def flatMapValues(self, f):
         >>> x.flatMapValues(f).collect()
         [('a', 'x'), ('a', 'y'), ('a', 'z'), ('b', 'p'), ('b', 'r')]
         """
-        flat_map_fn = lambda kv: ((kv[0], x) for x in f(kv[1]))
+
+        def flat_map_fn(kv):
+            return ((kv[0], x) for x in f(kv[1]))
+
         return self.flatMap(flat_map_fn, preservesPartitioning=True)
 
     def mapValues(self, f):
@@ -2378,7 +2381,10 @@ def mapValues(self, f):
         >>> x.mapValues(f).collect()
         [('a', 3), ('b', 1)]
         """
-        map_values_fn = lambda kv: (kv[0], f(kv[1]))
+
+        def map_values_fn(kv):
+            return kv[0], f(kv[1])
+
         return self.map(map_values_fn, preservesPartitioning=True)
 
     def groupWith(self, other, *others):
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 58621491dfb9a..233d5298c4494 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -927,7 +927,9 @@ def prepare(obj):
                 return (obj,)
 
         else:
-            prepare = lambda obj: obj
+
+            def prepare(obj: Any) -> Any:
+                return obj
 
         if isinstance(data, RDD):
             rdd, struct = self._createFromRDD(data.map(prepare), schema, samplingRatio)
diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
index bee9cff525717..08fba7cea01fc 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
@@ -145,7 +145,10 @@ def test_vectorized_udf_basic(self):
             col("id").cast("boolean").alias("bool"),
             array(col("id")).alias("array_long"),
         )
-        f = lambda x: x
+
+        def f(x):
+            return x
+
         for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
             str_f = pandas_udf(f, StringType(), udf_type)
             int_f = pandas_udf(f, IntegerType(), udf_type)
@@ -283,7 +286,9 @@ def test_vectorized_udf_null_string(self):
 
     def test_vectorized_udf_string_in_udf(self):
         df = self.spark.range(10)
-        scalar_f = lambda x: pd.Series(map(str, x))
+
+        def scalar_f(x):
+            return pd.Series(map(str, x))
 
         def iter_f(it):
             for i in it:
@@ -305,7 +310,10 @@ def test_vectorized_udf_datatype_string(self):
             col("id").cast("decimal").alias("decimal"),
             col("id").cast("boolean").alias("bool"),
         )
-        f = lambda x: x
+
+        def f(x):
+            return x
+
         for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
             str_f = pandas_udf(f, "string", udf_type)
             int_f = pandas_udf(f, "integer", udf_type)
diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index 7f421aaea892c..a092d67df17de 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -747,7 +747,8 @@ def f(*a):
         self.assertEqual(r.first()[0], "success")
 
     def test_udf_cache(self):
-        func = lambda x: x
+        def func(x):
+            return x
 
         df = self.spark.range(1)
         df.select(udf(func)("id")).cache()
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index aad225aefcfae..52a9b89ec3df1 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -1355,11 +1355,20 @@ def _merge_type(
     name: Optional[str] = None,
 ) -> Union[StructType, ArrayType, MapType, DataType]:
     if name is None:
-        new_msg = lambda msg: msg
-        new_name = lambda n: "field %s" % n
+
+        def new_msg(msg: str) -> str:
+            return msg
+
+        def new_name(n: str) -> str:
+            return "field %s" % n
+
     else:
-        new_msg = lambda msg: "%s: %s" % (name, msg)
-        new_name = lambda n: "field %s in %s" % (n, name)
+
+        def new_msg(msg: str) -> str:
+            return "%s: %s" % (name, msg)
+
+        def new_name(n: str) -> str:
+            return "field %s in %s" % (n, name)
 
     if isinstance(a, NullType):
         return b
@@ -1551,11 +1560,20 @@ def _make_type_verifier(
     """
 
     if name is None:
-        new_msg = lambda msg: msg
-        new_name = lambda n: "field %s" % n
+
+        def new_msg(msg: str) -> str:
+            return msg
+
+        def new_name(n: str) -> str:
+            return "field %s" % n
+
     else:
-        new_msg = lambda msg: "%s: %s" % (name, msg)
-        new_name = lambda n: "field %s in %s" % (n, name)
+
+        def new_msg(msg: str) -> str:
+            return "%s: %s" % (name, msg)
+
+        def new_name(n: str) -> str:
+            return "field %s in %s" % (n, name)
 
     def verify_nullability(obj: Any) -> bool:
         if obj is None:
@@ -1582,7 +1600,8 @@ def verify_acceptable_types(obj: Any) -> None:
 
     if isinstance(dataType, StringType):
         # StringType can work with any types
-        verify_value = lambda _: _
+        def verify_value(obj: Any) -> None:
+            pass
 
     elif isinstance(dataType, UserDefinedType):
         verifier = _make_type_verifier(dataType.sqlType(), name=name)
diff --git a/python/pyspark/streaming/dstream.py b/python/pyspark/streaming/dstream.py
index 0c1aa19fdc29b..f445a78bd9530 100644
--- a/python/pyspark/streaming/dstream.py
+++ b/python/pyspark/streaming/dstream.py
@@ -161,7 +161,10 @@ def foreachRDD(self, func):
         """
         if func.__code__.co_argcount == 1:
             old_func = func
-            func = lambda t, rdd: old_func(rdd)
+
+            def func(_, rdd):
+                return old_func(rdd)
+
         jfunc = TransformFunction(self._sc, func, self._jrdd_deserializer)
         api = self._ssc._jvm.PythonDStream
         api.callForeachRDD(self._jdstream, jfunc)
@@ -194,7 +197,10 @@ def mapValues(self, f):
         Return a new DStream by applying a map function to the value of
         each key-value pairs in this DStream without changing the key.
         """
-        map_values_fn = lambda kv: (kv[0], f(kv[1]))
+
+        def map_values_fn(kv):
+            return kv[0], f(kv[1])
+
         return self.map(map_values_fn, preservesPartitioning=True)
 
     def flatMapValues(self, f):
@@ -202,7 +208,10 @@ def flatMapValues(self, f):
         Return a new DStream by applying a flatmap function to the value
         of each key-value pairs in this DStream without changing the key.
         """
-        flat_map_fn = lambda kv: ((kv[0], x) for x in f(kv[1]))
+
+        def flat_map_fn(kv):
+            return ((kv[0], x) for x in f(kv[1]))
+
         return self.flatMap(flat_map_fn, preservesPartitioning=True)
 
     def glom(self):
@@ -308,7 +317,10 @@ def transform(self, func):
         """
         if func.__code__.co_argcount == 1:
             oldfunc = func
-            func = lambda t, rdd: oldfunc(rdd)
+
+            def func(_, rdd):
+                return oldfunc(rdd)
+
         assert func.__code__.co_argcount == 2, "func should take one or two arguments"
         return TransformedDStream(self, func)
 
@@ -322,7 +334,10 @@ def transformWith(self, func, other, keepSerializer=False):
         """
         if func.__code__.co_argcount == 2:
             oldfunc = func
-            func = lambda t, a, b: oldfunc(a, b)
+
+            def func(_, a, b):
+                return oldfunc(a, b)
+
         assert func.__code__.co_argcount == 3, "func should take two or three arguments"
         jfunc = TransformFunction(self._sc, func, self._jrdd_deserializer, other._jrdd_deserializer)
         dstream = self._sc._jvm.PythonTransformed2DStream(
diff --git a/python/pyspark/tests/test_rdd.py b/python/pyspark/tests/test_rdd.py
index 5790cae616aca..bf066e80b6b3b 100644
--- a/python/pyspark/tests/test_rdd.py
+++ b/python/pyspark/tests/test_rdd.py
@@ -37,7 +37,7 @@
 from pyspark.testing.utils import ReusedPySparkTestCase, SPARK_HOME, QuietTest
 
 
-global_func = lambda: "Hi"
+global_func = lambda: "Hi"  # noqa: E731
 
 
 class RDDTests(ReusedPySparkTestCase):
@@ -764,7 +764,7 @@ def test_overwritten_global_func(self):
         # Regression test for SPARK-27000
         global global_func
         self.assertEqual(self.sc.parallelize([1]).map(lambda _: global_func()).first(), "Hi")
-        global_func = lambda: "Yeah"
+        global_func = lambda: "Yeah"  # noqa: E731
         self.assertEqual(self.sc.parallelize([1]).map(lambda _: global_func()).first(), "Yeah")
 
     def test_to_local_iterator_failure(self):
diff --git a/python/pyspark/tests/test_serializers.py b/python/pyspark/tests/test_serializers.py
index 1c04295213c77..e2fb5ed894e3b 100644
--- a/python/pyspark/tests/test_serializers.py
+++ b/python/pyspark/tests/test_serializers.py
@@ -72,7 +72,10 @@ def test_itemgetter(self):
 
     def test_function_module_name(self):
         ser = CloudPickleSerializer()
-        func = lambda x: x
+
+        def func(x):
+            return x
+
         func2 = ser.loads(ser.dumps(func))
         self.assertEqual(func.__module__, func2.__module__)
 
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 1935e27d66363..b00dc75e3d6e9 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -507,7 +507,8 @@ def mapper(a):
             else:
                 return result
 
-    func = lambda _, it: map(mapper, it)
+    def func(_, it):
+        return map(mapper, it)
 
     # profiling is not supported for UDF
     return func, None, ser, ser

From fc877e9a7cc60378c28891faf65eb3401ef28b0d Mon Sep 17 00:00:00 2001
From: wangshengjie3 <wangshengjie3@xiaomi.com>
Date: Wed, 19 Jan 2022 12:49:26 +0800
Subject: [PATCH 044/513] [SPARK-37580][CORE] Reset numFailures when one of
 task attempts succeeds

### What changes were proposed in this pull request?
When a task failed count reach the max threshold, abort check if another attempt succeed.

### Why are the changes needed?
In extreme situation, if  one task has failed 3 times(max failed threshold is 4 in default), and there is a retry task and speculative task both in running state, then one of these 2 task attempts succeed and to cancel another. But executor which task need to be cancelled lost(oom in our situcation), this task marked as failed, and TaskSetManager handle this failed task attempt, it has failed 4 times so abort this stage and cause job failed.

### Does this PR introduce _any_ user-facing change?
Yes, the meaning of `spark.task.maxFailures` has changed, from total count to continuous count of one particular task

### How was this patch tested?
Unit test.

Closes #34834 from wangshengjie123/fix_taskset_manager_abort_stage.

Lead-authored-by: wangshengjie3 <wangshengjie3@xiaomi.com>
Co-authored-by: wangshengjie <wangshengjie3@xiaomi.com>
Signed-off-by: yi.wu <yi.wu@databricks.com>
---
 .../spark/scheduler/TaskSetManager.scala      |  2 +
 .../spark/scheduler/TaskSetManagerSuite.scala | 75 +++++++++++++++++++
 docs/configuration.md                         |  5 +-
 3 files changed, 80 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 68b7065002f4e..b7fae2a533f0e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -820,6 +820,7 @@ private[spark] class TaskSetManager(
         s"on ${info.host} (executor ${info.executorId}) ($tasksSuccessful/$numTasks)")
       // Mark successful and stop if all the tasks have succeeded.
       successful(index) = true
+      numFailures(index) = 0
       if (tasksSuccessful == numTasks) {
         isZombie = true
       }
@@ -843,6 +844,7 @@ private[spark] class TaskSetManager(
       if (!successful(index)) {
         tasksSuccessful += 1
         successful(index) = true
+        numFailures(index) = 0
         if (tasksSuccessful == numTasks) {
           isZombie = true
         }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index 3d80a69246cc0..360a14b031139 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -2244,6 +2244,81 @@ class TaskSetManagerSuite
     // After 3s have elapsed now the task is marked as speculative task
     assert(sched.speculativeTasks.size == 1)
   }
+
+  test("SPARK-37580: Reset numFailures when one of task attempts succeeds") {
+    sc = new SparkContext("local", "test")
+    // Set the speculation multiplier to be 0 so speculative tasks are launched immediately
+    sc.conf.set(config.SPECULATION_MULTIPLIER, 0.0)
+    sc.conf.set(config.SPECULATION_QUANTILE, 0.6)
+    sc.conf.set(config.SPECULATION_ENABLED, true)
+
+    sched = new FakeTaskScheduler(sc, ("exec1", "host1"), ("exec2", "host2"), ("exec3", "host3"))
+    sched.backend = mock(classOf[SchedulerBackend])
+    val taskSet = FakeTask.createTaskSet(3)
+    val clock = new ManualClock()
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock = clock)
+
+    // Offer resources for 3 task to start
+    val tasks = new ArrayBuffer[TaskDescription]()
+    for ((k, v) <- List("exec1" -> "host1", "exec2" -> "host2", "exec3" -> "host3")) {
+      val taskOption = manager.resourceOffer(k, v, NO_PREF)._1
+      assert(taskOption.isDefined)
+      val task = taskOption.get
+      assert(task.executorId === k)
+      tasks += task
+    }
+    assert(sched.startedTasks.toSet === (0 until 3).toSet)
+
+    def runningTaskForIndex(index: Int): TaskDescription = {
+      tasks.find { task =>
+        task.index == index && !sched.endedTasks.contains(task.taskId)
+      }.getOrElse {
+        throw new RuntimeException(s"couldn't find index $index in " +
+          s"tasks: ${tasks.map { t => t.index -> t.taskId }} with endedTasks:" +
+          s" ${sched.endedTasks.keys}")
+      }
+    }
+    clock.advance(1)
+
+    // running task with index 1 fail 3 times (not enough to abort the stage)
+    (0 until 3).foreach { attempt =>
+      val task = runningTaskForIndex(1)
+      val endReason = ExceptionFailure("a", "b", Array(), "c", None)
+      manager.handleFailedTask(task.taskId, TaskState.FAILED, endReason)
+      sched.endedTasks(task.taskId) = endReason
+      assert(!manager.isZombie)
+      val nextTask = manager.resourceOffer(s"exec2", s"host2", NO_PREF)._1
+      assert(nextTask.isDefined, s"no offer for attempt $attempt of 1")
+      tasks += nextTask.get
+    }
+
+    val numFailuresField = classOf[TaskSetManager].getDeclaredField("numFailures")
+    numFailuresField.setAccessible(true)
+    val numFailures = numFailuresField.get(manager).asInstanceOf[Array[Int]]
+    // numFailures(1) should be 3
+    assert(numFailures(1) == 3)
+
+    // make task(TID 2) success to speculative other tasks
+    manager.handleSuccessfulTask(2, createTaskResult(2))
+
+    val originalTask = runningTaskForIndex(1)
+    clock.advance(1)
+    assert(manager.checkSpeculatableTasks(0))
+    assert(sched.speculativeTasks.toSet === Set(0, 1))
+
+    // make the speculative task(index 1) success
+    val speculativeTask = manager.resourceOffer("exec1", "host1", NO_PREF)._1
+    assert(speculativeTask.isDefined)
+    manager.handleSuccessfulTask(speculativeTask.get.taskId, createTaskResult(1))
+    // if task success, numFailures will be reset to 0
+    assert(numFailures(1) == 0)
+
+    // failed the originalTask(index 1) and check if the task manager is zombie
+    val failedReason = ExceptionFailure("a", "b", Array(), "c", None)
+    manager.handleFailedTask(originalTask.taskId, TaskState.FAILED, failedReason)
+    assert(!manager.isZombie)
+  }
+
 }
 
 class FakeLongTasks(stageId: Int, partitionId: Int) extends FakeTask(stageId, partitionId) {
diff --git a/docs/configuration.md b/docs/configuration.md
index 80f17a839a0d5..818a2e556337f 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -2461,9 +2461,10 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.task.maxFailures</code></td>
   <td>4</td>
   <td>
-    Number of failures of any particular task before giving up on the job.
+    Number of continuous failures of any particular task before giving up on the job.
     The total number of failures spread across different tasks will not cause the job
-    to fail; a particular task has to fail this number of attempts.
+    to fail; a particular task has to fail this number of attempts continuously.
+    If any attempt succeeds, the failure count for the task will be reset.
     Should be greater than or equal to 1. Number of allowed retries = this value - 1.
   </td>
   <td>0.8.0</td>

From 61abae36eaccfa0ccb6bd2916e28164a96207e34 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Wed, 19 Jan 2022 13:23:15 +0800
Subject: [PATCH 045/513] [SPARK-37917][SQL] Push down limit 1 for right side
 of left semi/anti join if join condition is empty

### What changes were proposed in this pull request?

It is safe to push down the limit 1 for the right side of left semi/anti join if the join condition is empty, since we only care if the right side is empty. For example:
```scala
val numRows = 1024 * 1024 * 40

spark.sql(s"CREATE TABLE t1 using parquet AS SELECT id AS a, id AS b, id AS c FROM range(1, ${numRows}L, 1, 5)")
spark.sql(s"CREATE TABLE t2 using parquet AS SELECT id AS a, id AS b, id AS c FROM range(1, ${numRows}L, 1, 5)")

spark.sql("SELECT * FROM t1 LEFT SEMI JOIN t2 LIMIT 5").explain(true)
```

Before this pr:
```
== Optimized Logical Plan ==
GlobalLimit 5
+- LocalLimit 5
   +- Join LeftSemi
      :- LocalLimit 5
      :  +- Relation default.t1[a#8L,b#9L,c#10L] parquet
      +- Project
         +- Relation default.t2[a#11L,b#12L,c#13L] parquet
```

After this pr:
```
== Optimized Logical Plan ==
GlobalLimit 5
+- LocalLimit 5
   +- Join LeftSemi
      :- LocalLimit 5
      :  +- Relation default.t1[a#8L,b#9L,c#10L] parquet
      +- LocalLimit 1
         +- Project
            +- Relation default.t2[a#11L,b#12L,c#13L] parquet
```

### Why are the changes needed?

Improve query performance.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #35216 from wangyum/SPARK-37917.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/catalyst/optimizer/Optimizer.scala   | 4 +++-
 .../spark/sql/catalyst/optimizer/LimitPushdownSuite.scala     | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 1c2f0afb9d41c..357d11c39f4e0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -684,7 +684,9 @@ object LimitPushDown extends Rule[LogicalPlan] {
           left = maybePushLocalLimit(limitExpr, join.left),
           right = maybePushLocalLimit(limitExpr, join.right))
       case LeftSemi | LeftAnti if join.condition.isEmpty =>
-        join.copy(left = maybePushLocalLimit(limitExpr, join.left))
+        join.copy(
+          left = maybePushLocalLimit(limitExpr, join.left),
+          right = maybePushLocalLimit(Literal(1, IntegerType), join.right))
       case _ => join
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
index 848416b09813e..ee7f872514985 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
@@ -216,9 +216,9 @@ class LimitPushdownSuite extends PlanTest {
   test("SPARK-34514: Push down limit through LEFT SEMI and LEFT ANTI join") {
     // Push down when condition is empty
     Seq(LeftSemi, LeftAnti).foreach { joinType =>
-      val originalQuery = x.join(y, joinType).limit(1)
+      val originalQuery = x.join(y, joinType).limit(5)
       val optimized = Optimize.execute(originalQuery.analyze)
-      val correctAnswer = Limit(1, LocalLimit(1, x).join(y, joinType)).analyze
+      val correctAnswer = Limit(5, LocalLimit(5, x).join(LocalLimit(1, y), joinType)).analyze
       comparePlans(optimized, correctAnswer)
     }
 

From 817d1d7020e7a2229c1243a0bc556995e6ab43b9 Mon Sep 17 00:00:00 2001
From: yaohua <yaohua.zhao@databricks.com>
Date: Wed, 19 Jan 2022 13:25:33 +0800
Subject: [PATCH 046/513] [SPARK-37769][SQL][FOLLOWUP] Filtering files if
 metadata columns are present in the data filter

### What changes were proposed in this pull request?
Follow-up PR of #34575. Filtering files if metadata columns are present in the data filter.

### Why are the changes needed?
Performance improvements.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing UTs and a new UT.

Closes #35055 from Yaohua628/spark-37769.

Authored-by: yaohua <yaohua.zhao@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/datasources/FileFormat.scala    | 31 +++++++++++++
 .../execution/datasources/FileScanRDD.scala   | 15 +-----
 .../PartitioningAwareFileIndex.scala          | 35 +++++++++++++-
 .../datasources/FileMetadataStructSuite.scala | 46 ++++++++++++++++++-
 4 files changed, 110 insertions(+), 17 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
index c3bcf06b6e5fb..02d88e9ffa43c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types.{DataType, LongType, StringType, StructField, StructType, TimestampType}
+import org.apache.spark.unsafe.types.UTF8String
 
 
 /**
@@ -192,6 +193,36 @@ object FileFormat {
 
   // create a file metadata struct col
   def createFileMetadataCol: AttributeReference = MetadataAttribute(METADATA_NAME, METADATA_STRUCT)
+
+  // create an internal row given required metadata fields and file information
+  def createMetadataInternalRow(
+      fieldNames: Seq[String],
+      filePath: Path,
+      fileSize: Long,
+      fileModificationTime: Long): InternalRow =
+    updateMetadataInternalRow(new GenericInternalRow(fieldNames.length), fieldNames,
+      filePath, fileSize, fileModificationTime)
+
+  // update an internal row given required metadata fields and file information
+  def updateMetadataInternalRow(
+      row: InternalRow,
+      fieldNames: Seq[String],
+      filePath: Path,
+      fileSize: Long,
+      fileModificationTime: Long): InternalRow = {
+    fieldNames.zipWithIndex.foreach { case (name, i) =>
+      name match {
+        case FILE_PATH => row.update(i, UTF8String.fromString(filePath.toString))
+        case FILE_NAME => row.update(i, UTF8String.fromString(filePath.getName))
+        case FILE_SIZE => row.update(i, fileSize)
+        case FILE_MODIFICATION_TIME =>
+          // the modificationTime from the file is in millisecond,
+          // while internally, the TimestampType `file_modification_time` is stored in microsecond
+          row.update(i, fileModificationTime * 1000L)
+      }
+    }
+    row
+  }
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index 5baa597582553..b2c7931b661e6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -34,7 +34,6 @@ import org.apache.spark.sql.execution.datasources.FileFormat._
 import org.apache.spark.sql.execution.vectorized.{OnHeapColumnVector, WritableColumnVector}
 import org.apache.spark.sql.types.{LongType, StringType, StructType}
 import org.apache.spark.sql.vectorized.ColumnarBatch
-import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.NextIterator
 
 /**
@@ -136,18 +135,8 @@ class FileScanRDD(
        */
       private def updateMetadataRow(): Unit = {
         if (metadataColumns.nonEmpty && currentFile != null) {
-          val path = new Path(currentFile.filePath)
-          metadataColumns.zipWithIndex.foreach { case (attr, i) =>
-            attr.name match {
-              case FILE_PATH => metadataRow.update(i, UTF8String.fromString(path.toString))
-              case FILE_NAME => metadataRow.update(i, UTF8String.fromString(path.getName))
-              case FILE_SIZE => metadataRow.update(i, currentFile.fileSize)
-              case FILE_MODIFICATION_TIME =>
-                // the modificationTime from the file is in millisecond,
-                // while internally, the TimestampType is stored in microsecond
-                metadataRow.update(i, currentFile.modificationTime * 1000L)
-            }
-          }
+          updateMetadataInternalRow(metadataRow, metadataColumns.map(_.name),
+            new Path(currentFile.filePath), currentFile.fileSize, currentFile.modificationTime)
         }
       }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index 5b0d0606da093..9b56bcf35365a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.{expressions, InternalRow}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
+import org.apache.spark.sql.execution.datasources.FileFormat.createMetadataInternalRow
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -71,8 +72,37 @@ abstract class PartitioningAwareFileIndex(
     def isNonEmptyFile(f: FileStatus): Boolean = {
       isDataPath(f.getPath) && f.getLen > 0
     }
+
+    // retrieve the file metadata filters and reduce to a final filter expression
+    val fileMetadataFilterOpt = dataFilters.filter(_.references.forall {
+      case MetadataAttribute(_) => true
+      case _ => false
+    }).reduceOption(expressions.And)
+
+    // - create a bound references for filters: put the metadata struct at 0 position for each file
+    // - retrieve the final metadata struct (could be pruned) from filters
+    val boundedFilterMetadataStructOpt = fileMetadataFilterOpt.map { fileMetadataFilter =>
+      val metadataStruct = fileMetadataFilter.references.head.dataType
+      val boundedFilter = Predicate.createInterpreted(fileMetadataFilter.transform {
+        case _: AttributeReference => BoundReference(0, metadataStruct, nullable = true)
+      })
+      (boundedFilter, metadataStruct)
+    }
+
+    def matchFileMetadataPredicate(f: FileStatus): Boolean = {
+      // use option.forall, so if there is no filter no metadata struct, return true
+      boundedFilterMetadataStructOpt.forall { case (boundedFilter, metadataStruct) =>
+        val row = InternalRow.fromSeq(Seq(
+          createMetadataInternalRow(metadataStruct.asInstanceOf[StructType].names,
+            f.getPath, f.getLen, f.getModificationTime)
+        ))
+        boundedFilter.eval(row)
+      }
+    }
+
     val selectedPartitions = if (partitionSpec().partitionColumns.isEmpty) {
-      PartitionDirectory(InternalRow.empty, allFiles().filter(isNonEmptyFile)) :: Nil
+      PartitionDirectory(InternalRow.empty, allFiles()
+        .filter(f => isNonEmptyFile(f) && matchFileMetadataPredicate(f))) :: Nil
     } else {
       if (recursiveFileLookup) {
         throw new IllegalArgumentException(
@@ -83,7 +113,8 @@ abstract class PartitioningAwareFileIndex(
           val files: Seq[FileStatus] = leafDirToChildrenFiles.get(path) match {
             case Some(existingDir) =>
               // Directory has children files in it, return them
-              existingDir.filter(f => matchPathPattern(f) && isNonEmptyFile(f))
+              existingDir.filter(f => matchPathPattern(f) && isNonEmptyFile(f) &&
+                matchFileMetadataPredicate(f))
 
             case None =>
               // Directory does not exist, or has no children files
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
index 8bf5d6183925c..0d391e0dcd5ff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
@@ -279,9 +279,21 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
   }
 
   metadataColumnsTest("filter", schema) { (df, f0, _) =>
+    val filteredDF = df.select("name", "age", METADATA_FILE_NAME)
+      .where(Column(METADATA_FILE_NAME) === f0(METADATA_FILE_NAME))
+
+    // check the filtered file
+    val partitions = filteredDF.queryExecution.sparkPlan.collectFirst {
+      case p: FileSourceScanExec => p.selectedPartitions
+    }.get
+
+    assert(partitions.length == 1) // 1 partition
+    assert(partitions.head.files.length == 1) // 1 file in that partition
+    assert(partitions.head.files.head.getPath.toString == f0(METADATA_FILE_PATH)) // the file is f0
+
+    // check result
     checkAnswer(
-      df.select("name", "age", METADATA_FILE_NAME)
-        .where(Column(METADATA_FILE_NAME) === f0(METADATA_FILE_NAME)),
+      filteredDF,
       Seq(
         // _file_name == f0's name, so we will only have 1 row
         Row("jack", 24, f0(METADATA_FILE_NAME))
@@ -289,6 +301,36 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
     )
   }
 
+  metadataColumnsTest("filter on metadata and user data", schema) { (df, _, f1) =>
+
+    val filteredDF = df.select("name", "age", "info",
+      METADATA_FILE_NAME, METADATA_FILE_PATH,
+      METADATA_FILE_SIZE, METADATA_FILE_MODIFICATION_TIME)
+      // mix metadata column + user column
+      .where(Column(METADATA_FILE_NAME) === f1(METADATA_FILE_NAME) and Column("name") === "lily")
+      // only metadata columns
+      .where(Column(METADATA_FILE_PATH) === f1(METADATA_FILE_PATH))
+      // only user column
+      .where("age == 31")
+
+    // check the filtered file
+    val partitions = filteredDF.queryExecution.sparkPlan.collectFirst {
+      case p: FileSourceScanExec => p.selectedPartitions
+    }.get
+
+    assert(partitions.length == 1) // 1 partition
+    assert(partitions.head.files.length == 1) // 1 file in that partition
+    assert(partitions.head.files.head.getPath.toString == f1(METADATA_FILE_PATH)) // the file is f1
+
+    // check result
+    checkAnswer(
+      filteredDF,
+      Seq(Row("lily", 31, Row(54321L, "ucb"),
+        f1(METADATA_FILE_NAME), f1(METADATA_FILE_PATH),
+        f1(METADATA_FILE_SIZE), f1(METADATA_FILE_MODIFICATION_TIME)))
+    )
+  }
+
   Seq(true, false).foreach { caseSensitive =>
     metadataColumnsTest(s"upper/lower case when case " +
       s"sensitive is $caseSensitive", schemaWithNameConflicts) { (df, f0, f1) =>

From 3a4598148c3c9e9995da8d6979b979a1c9f3ddbe Mon Sep 17 00:00:00 2001
From: yaohua <yaohua.zhao@databricks.com>
Date: Wed, 19 Jan 2022 14:11:59 +0800
Subject: [PATCH 047/513] [SPARK-37896][SQL] Implement a ConstantColumnVector
 and improve performance of the hidden file metadata

### What changes were proposed in this pull request?
Implement a new column vector named `ConstantColumnVector`, which avoids copying the same data for all rows but storing only one copy of the data.

Also, improve performance of hidden file metadata FileScanRDD

### Why are the changes needed?
Performance improvements.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
A new test suite.

Closes #35068 from Yaohua628/spark-37770.

Authored-by: yaohua <yaohua.zhao@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../vectorized/ConstantColumnVector.java      | 292 ++++++++++++++++++
 .../sql/execution/DataSourceScanExec.scala    |   6 +-
 .../execution/datasources/FileScanRDD.scala   |  45 +--
 .../ConstantColumnVectorSuite.scala           | 205 ++++++++++++
 4 files changed, 515 insertions(+), 33 deletions(-)
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ConstantColumnVectorSuite.scala

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java
new file mode 100644
index 0000000000000..134cb05c1265c
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java
@@ -0,0 +1,292 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.vectorized;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+
+import org.apache.spark.sql.types.*;
+import org.apache.spark.sql.vectorized.ColumnVector;
+import org.apache.spark.sql.vectorized.ColumnarArray;
+import org.apache.spark.sql.vectorized.ColumnarMap;
+import org.apache.spark.unsafe.types.UTF8String;
+
+/**
+ * This class adds the constant support to ColumnVector.
+ * It supports all the types and contains `set` APIs,
+ * which will set the exact same value to all rows.
+ *
+ * Capacity: The vector stores only one copy of the data.
+ */
+public class ConstantColumnVector extends ColumnVector {
+
+  // The data stored in this ConstantColumnVector, the vector stores only one copy of the data.
+  private byte nullData;
+  private byte byteData;
+  private short shortData;
+  private int intData;
+  private long longData;
+  private float floatData;
+  private double doubleData;
+  private UTF8String stringData;
+  private byte[] byteArrayData;
+  private ConstantColumnVector[] childData;
+  private ColumnarArray arrayData;
+  private ColumnarMap mapData;
+
+  private final int numRows;
+
+  /**
+   * @param numRows: The number of rows for this ConstantColumnVector
+   * @param type: The data type of this ConstantColumnVector
+   */
+  public ConstantColumnVector(int numRows, DataType type) {
+    super(type);
+    this.numRows = numRows;
+
+    if (type instanceof StructType) {
+      this.childData = new ConstantColumnVector[((StructType) type).fields().length];
+    } else if (type instanceof CalendarIntervalType) {
+      // Three columns. Months as int. Days as Int. Microseconds as Long.
+      this.childData = new ConstantColumnVector[3];
+    } else {
+      this.childData = null;
+    }
+  }
+
+  @Override
+  public void close() {
+    byteArrayData = null;
+    for (int i = 0; i < childData.length; i++) {
+      childData[i].close();
+      childData[i] = null;
+    }
+    childData = null;
+    arrayData = null;
+    mapData = null;
+  }
+
+  @Override
+  public boolean hasNull() {
+    return nullData == 1;
+  }
+
+  @Override
+  public int numNulls() {
+    return hasNull() ? numRows : 0;
+  }
+
+  @Override
+  public boolean isNullAt(int rowId) {
+    return nullData == 1;
+  }
+
+  /**
+   * Sets all rows as `null`
+   */
+  public void setNull() {
+    nullData = (byte) 1;
+  }
+
+  /**
+   * Sets all rows as not `null`
+   */
+  public void setNotNull() {
+    nullData = (byte) 0;
+  }
+
+  @Override
+  public boolean getBoolean(int rowId) {
+    return byteData == 1;
+  }
+
+  /**
+   * Sets the boolean `value` for all rows
+   */
+  public void setBoolean(boolean value) {
+    byteData = (byte) ((value) ? 1 : 0);
+  }
+
+  @Override
+  public byte getByte(int rowId) {
+    return byteData;
+  }
+
+  /**
+   * Sets the byte `value` for all rows
+   */
+  public void setByte(byte value) {
+    byteData = value;
+  }
+
+  @Override
+  public short getShort(int rowId) {
+    return shortData;
+  }
+
+  /**
+   * Sets the short `value` for all rows
+   */
+  public void setShort(short value) {
+    shortData = value;
+  }
+
+  @Override
+  public int getInt(int rowId) {
+    return intData;
+  }
+
+  /**
+   * Sets the int `value` for all rows
+   */
+  public void setInt(int value) {
+    intData = value;
+  }
+
+  @Override
+  public long getLong(int rowId) {
+    return longData;
+  }
+
+  /**
+   * Sets the long `value` for all rows
+   */
+  public void setLong(long value) {
+    longData = value;
+  }
+
+  @Override
+  public float getFloat(int rowId) {
+    return floatData;
+  }
+
+  /**
+   * Sets the float `value` for all rows
+   */
+  public void setFloat(float value) {
+    floatData = value;
+  }
+
+  @Override
+  public double getDouble(int rowId) {
+    return doubleData;
+  }
+
+  /**
+   * Sets the double `value` for all rows
+   */
+  public void setDouble(double value) {
+    doubleData = value;
+  }
+
+  @Override
+  public ColumnarArray getArray(int rowId) {
+    return arrayData;
+  }
+
+  /**
+   * Sets the `ColumnarArray` `value` for all rows
+   */
+  public void setArray(ColumnarArray value) {
+    arrayData = value;
+  }
+
+  @Override
+  public ColumnarMap getMap(int ordinal) {
+    return mapData;
+  }
+
+  /**
+   * Sets the `ColumnarMap` `value` for all rows
+   */
+  public void setMap(ColumnarMap value) {
+    mapData = value;
+  }
+
+  @Override
+  public Decimal getDecimal(int rowId, int precision, int scale) {
+    // copy and modify from WritableColumnVector
+    if (precision <= Decimal.MAX_INT_DIGITS()) {
+      return Decimal.createUnsafe(getInt(rowId), precision, scale);
+    } else if (precision <= Decimal.MAX_LONG_DIGITS()) {
+      return Decimal.createUnsafe(getLong(rowId), precision, scale);
+    } else {
+      byte[] bytes = getBinary(rowId);
+      BigInteger bigInteger = new BigInteger(bytes);
+      BigDecimal javaDecimal = new BigDecimal(bigInteger, scale);
+      return Decimal.apply(javaDecimal, precision, scale);
+    }
+  }
+
+  /**
+   * Sets the `Decimal` `value` with the precision for all rows
+   */
+  public void setDecimal(Decimal value, int precision) {
+    // copy and modify from WritableColumnVector
+    if (precision <= Decimal.MAX_INT_DIGITS()) {
+      setInt((int) value.toUnscaledLong());
+    } else if (precision <= Decimal.MAX_LONG_DIGITS()) {
+      setLong(value.toUnscaledLong());
+    } else {
+      BigInteger bigInteger = value.toJavaBigDecimal().unscaledValue();
+      setByteArray(bigInteger.toByteArray());
+    }
+  }
+
+  @Override
+  public UTF8String getUTF8String(int rowId) {
+    return stringData;
+  }
+
+  /**
+   * Sets the `UTF8String` `value` for all rows
+   */
+  public void setUtf8String(UTF8String value) {
+    stringData = value;
+  }
+
+  /**
+   * Sets the byte array `value` for all rows
+   */
+  private void setByteArray(byte[] value) {
+    byteArrayData =  value;
+  }
+
+  @Override
+  public byte[] getBinary(int rowId) {
+    return byteArrayData;
+  }
+
+  /**
+   * Sets the binary `value` for all rows
+   */
+  public void setBinary(byte[] value) {
+    setByteArray(value);
+  }
+
+  @Override
+  public ColumnVector getChild(int ordinal) {
+    return childData[ordinal];
+  }
+
+  /**
+   * Sets the child `ConstantColumnVector` `value` at the given ordinal for all rows
+   */
+  public void setChild(int ordinal, ConstantColumnVector value) {
+    childData[ordinal] = value;
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 4bd6c239a3367..443553f6ade03 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat => ParquetSource}
 import org.apache.spark.sql.execution.datasources.v2.PushedDownOperators
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
-import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
+import org.apache.spark.sql.execution.vectorized.ConstantColumnVector
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.{BaseRelation, Filter}
 import org.apache.spark.sql.types.StructType
@@ -221,8 +221,8 @@ case class FileSourceScanExec(
       requiredSchema = requiredSchema,
       partitionSchema = relation.partitionSchema,
       relation.sparkSession.sessionState.conf).map { vectorTypes =>
-        // for column-based file format, append metadata struct column's vector type classes if any
-        vectorTypes ++ Seq.fill(metadataColumns.size)(classOf[OnHeapColumnVector].getName)
+        // for column-based file format, append metadata column's vector type classes if any
+        vectorTypes ++ Seq.fill(metadataColumns.size)(classOf[ConstantColumnVector].getName)
       }
 
   private lazy val driverMetrics: HashMap[String, Long] = HashMap.empty
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index b2c7931b661e6..8cd62320bba18 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, GenericInternalRow, JoinedRow, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.datasources.FileFormat._
-import org.apache.spark.sql.execution.vectorized.{OnHeapColumnVector, WritableColumnVector}
+import org.apache.spark.sql.execution.vectorized.ConstantColumnVector
 import org.apache.spark.sql.types.{LongType, StringType, StructType}
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.NextIterator
@@ -133,49 +133,35 @@ class FileScanRDD(
        * For each partitioned file, metadata columns for each record in the file are exactly same.
        * Only update metadata row when `currentFile` is changed.
        */
-      private def updateMetadataRow(): Unit = {
+      private def updateMetadataRow(): Unit =
         if (metadataColumns.nonEmpty && currentFile != null) {
           updateMetadataInternalRow(metadataRow, metadataColumns.map(_.name),
             new Path(currentFile.filePath), currentFile.fileSize, currentFile.modificationTime)
         }
-      }
 
       /**
-       * Create a writable column vector containing all required metadata columns
+       * Create an array of constant column vectors containing all required metadata columns
        */
-      private def createMetadataColumnVector(c: ColumnarBatch): Array[WritableColumnVector] = {
+      private def createMetadataColumnVector(c: ColumnarBatch): Array[ConstantColumnVector] = {
         val path = new Path(currentFile.filePath)
-        val filePathBytes = path.toString.getBytes
-        val fileNameBytes = path.getName.getBytes
-        var rowId = 0
         metadataColumns.map(_.name).map {
           case FILE_PATH =>
-            val columnVector = new OnHeapColumnVector(c.numRows(), StringType)
-            rowId = 0
-            // use a tight-loop for better performance
-            while (rowId < c.numRows()) {
-              columnVector.putByteArray(rowId, filePathBytes)
-              rowId += 1
-            }
+            val columnVector = new ConstantColumnVector(c.numRows(), StringType)
+            columnVector.setUtf8String(UTF8String.fromString(path.toString))
             columnVector
           case FILE_NAME =>
-            val columnVector = new OnHeapColumnVector(c.numRows(), StringType)
-            rowId = 0
-            // use a tight-loop for better performance
-            while (rowId < c.numRows()) {
-              columnVector.putByteArray(rowId, fileNameBytes)
-              rowId += 1
-            }
+            val columnVector = new ConstantColumnVector(c.numRows(), StringType)
+            columnVector.setUtf8String(UTF8String.fromString(path.getName))
             columnVector
           case FILE_SIZE =>
-            val columnVector = new OnHeapColumnVector(c.numRows(), LongType)
-            columnVector.putLongs(0, c.numRows(), currentFile.fileSize)
+            val columnVector = new ConstantColumnVector(c.numRows(), LongType)
+            columnVector.setLong(currentFile.fileSize)
             columnVector
           case FILE_MODIFICATION_TIME =>
-            val columnVector = new OnHeapColumnVector(c.numRows(), LongType)
+            val columnVector = new ConstantColumnVector(c.numRows(), LongType)
             // the modificationTime from the file is in millisecond,
             // while internally, the TimestampType is stored in microsecond
-            columnVector.putLongs(0, c.numRows(), currentFile.modificationTime * 1000L)
+            columnVector.setLong(currentFile.modificationTime * 1000L)
             columnVector
         }.toArray
       }
@@ -187,10 +173,9 @@ class FileScanRDD(
       private def addMetadataColumnsIfNeeded(nextElement: Object): Object = {
         if (metadataColumns.nonEmpty) {
           nextElement match {
-            case c: ColumnarBatch =>
-              new ColumnarBatch(
-                Array.tabulate(c.numCols())(c.column) ++ createMetadataColumnVector(c),
-                c.numRows())
+            case c: ColumnarBatch => new ColumnarBatch(
+              Array.tabulate(c.numCols())(c.column) ++ createMetadataColumnVector(c),
+              c.numRows())
             case u: UnsafeRow => projection.apply(new JoinedRow(u, metadataRow))
             case i: InternalRow => new JoinedRow(i, metadataRow)
           }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ConstantColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ConstantColumnVectorSuite.scala
new file mode 100644
index 0000000000000..c8438f342d256
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ConstantColumnVectorSuite.scala
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.vectorized
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.vectorized.{ColumnarArray, ColumnarMap}
+import org.apache.spark.unsafe.types.UTF8String
+
+class ConstantColumnVectorSuite extends SparkFunSuite {
+
+  private def testVector(name: String, size: Int, dt: DataType)
+    (f: ConstantColumnVector => Unit): Unit = {
+    test(name) {
+      f(new ConstantColumnVector(size, dt))
+    }
+  }
+
+  testVector("null", 10, IntegerType) { vector =>
+    vector.setNull()
+    assert(vector.hasNull)
+    assert(vector.numNulls() == 10)
+    (0 until 10).foreach { i =>
+      assert(vector.isNullAt(i))
+    }
+
+    vector.setNotNull()
+    assert(!vector.hasNull)
+    assert(vector.numNulls() == 0)
+    (0 until 10).foreach { i =>
+      assert(!vector.isNullAt(i))
+    }
+  }
+
+  testVector("boolean", 10, BooleanType) { vector =>
+    vector.setBoolean(true)
+    (0 until 10).foreach { i =>
+      assert(vector.getBoolean(i))
+    }
+  }
+
+  testVector("byte", 10, ByteType) { vector =>
+    vector.setByte(3.toByte)
+    (0 until 10).foreach { i =>
+      assert(vector.getByte(i) == 3.toByte)
+    }
+  }
+
+  testVector("short", 10, ShortType) { vector =>
+    vector.setShort(3.toShort)
+    (0 until 10).foreach { i =>
+      assert(vector.getShort(i) == 3.toShort)
+    }
+  }
+
+  testVector("int", 10, IntegerType) { vector =>
+    vector.setInt(3)
+    (0 until 10).foreach { i =>
+      assert(vector.getInt(i) == 3)
+    }
+  }
+
+  testVector("long", 10, LongType) { vector =>
+    vector.setLong(3L)
+    (0 until 10).foreach { i =>
+      assert(vector.getLong(i) == 3L)
+    }
+  }
+
+  testVector("float", 10, FloatType) { vector =>
+    vector.setFloat(3.toFloat)
+    (0 until 10).foreach { i =>
+      assert(vector.getFloat(i) == 3.toFloat)
+    }
+  }
+
+  testVector("double", 10, DoubleType) { vector =>
+    vector.setDouble(3.toDouble)
+    (0 until 10).foreach { i =>
+      assert(vector.getDouble(i) == 3.toDouble)
+    }
+  }
+
+  testVector("array", 10, ArrayType(IntegerType)) { vector =>
+    // create an vector with constant array: [0, 1, 2, 3, 4]
+    val arrayVector = new OnHeapColumnVector(5, IntegerType)
+    (0 until 5).foreach { i =>
+      arrayVector.putInt(i, i)
+    }
+    val columnarArray = new ColumnarArray(arrayVector, 0, 5)
+
+    vector.setArray(columnarArray)
+
+    (0 until 10).foreach { i =>
+      assert(vector.getArray(i) == columnarArray)
+      assert(vector.getArray(i).toIntArray === Array(0, 1, 2, 3, 4))
+    }
+  }
+
+  testVector("map", 10, MapType(IntegerType, BooleanType)) { vector =>
+    // create an vector with constant map:
+    // [(0, true), (1, false), (2, true), (3, false), (4, true)]
+    val keys = new OnHeapColumnVector(5, IntegerType)
+    val values = new OnHeapColumnVector(5, BooleanType)
+
+    (0 until 5).foreach { i =>
+      keys.putInt(i, i)
+      values.putBoolean(i, i % 2 == 0)
+    }
+
+    val columnarMap = new ColumnarMap(keys, values, 0, 5)
+    vector.setMap(columnarMap)
+
+    (0 until 10).foreach { i =>
+      assert(vector.getMap(i) == columnarMap)
+      assert(vector.getMap(i).keyArray().toIntArray === Array(0, 1, 2, 3, 4))
+      assert(vector.getMap(i).valueArray().toBooleanArray ===
+        Array(true, false, true, false, true))
+    }
+  }
+
+  testVector("decimal", 10, DecimalType(10, 0)) { vector =>
+    val decimal = Decimal(100L)
+    vector.setDecimal(decimal, 10)
+    (0 until 10).foreach { i =>
+      assert(vector.getDecimal(i, 10, 0) == decimal)
+    }
+  }
+
+  testVector("utf8string", 10, StringType) { vector =>
+    vector.setUtf8String(UTF8String.fromString("hello"))
+    (0 until 10).foreach { i =>
+      assert(vector.getUTF8String(i) == UTF8String.fromString("hello"))
+    }
+  }
+
+  testVector("binary", 10, BinaryType) { vector =>
+    vector.setBinary("hello".getBytes("utf8"))
+    (0 until 10).foreach { i =>
+      assert(vector.getBinary(i) === "hello".getBytes("utf8"))
+    }
+  }
+
+  testVector("struct", 10,
+    new StructType()
+      .add(StructField("name", StringType))
+      .add(StructField("age", IntegerType))) { vector =>
+
+    val nameVector = new ConstantColumnVector(10, StringType)
+    nameVector.setUtf8String(UTF8String.fromString("jack"))
+    vector.setChild(0, nameVector)
+
+    val ageVector = new ConstantColumnVector(10, IntegerType)
+    ageVector.setInt(27)
+    vector.setChild(1, ageVector)
+
+
+    assert(vector.getChild(0) == nameVector)
+    assert(vector.getChild(1) == ageVector)
+    (0 until 10).foreach { i =>
+      assert(vector.getChild(0).getUTF8String(i) == UTF8String.fromString("jack"))
+      assert(vector.getChild(1).getInt(i) == 27)
+    }
+
+    // another API
+    (0 until 10).foreach { i =>
+      assert(vector.getStruct(i).get(0, StringType) == UTF8String.fromString("jack"))
+      assert(vector.getStruct(i).get(1, IntegerType) == 27)
+    }
+  }
+
+  testVector("calendar interval", 10, CalendarIntervalType) { vector =>
+    val monthsVector = new ConstantColumnVector(10, IntegerType)
+    monthsVector.setInt(3)
+    val daysVector = new ConstantColumnVector(10, IntegerType)
+    daysVector.setInt(25)
+    val microsecondsVector = new ConstantColumnVector(10, LongType)
+    microsecondsVector.setLong(12345L)
+
+    vector.setChild(0, monthsVector)
+    vector.setChild(1, daysVector)
+    vector.setChild(2, microsecondsVector)
+
+    (0 until 10).foreach { i =>
+      assert(vector.getChild(0).getInt(i) == 3)
+      assert(vector.getChild(1).getInt(i) == 25)
+      assert(vector.getChild(2).getLong(i) == 12345L)
+    }
+  }
+}

From f29dee606e8247ca7da792319385b1fc4364caa1 Mon Sep 17 00:00:00 2001
From: stczwd <qcsd2011@163.com>
Date: Wed, 19 Jan 2022 15:21:18 +0900
Subject: [PATCH 048/513] [SPARK-37933][SQL] Change the traversal method of
 V2ScanRelationPushDown push down rules

### What changes were proposed in this pull request?
This pr is trying to change the traversal method of V2ScanRelationPushDown push down rules , which is more readable and easier to extend and add new rules.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
origin tests

Closes #35242 from stczwd/SPARK-37933.

Authored-by: stczwd <qcsd2011@163.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../datasources/v2/V2ScanRelationPushDown.scala | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
index dec7189ac698d..3437dcba5e65f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
@@ -37,8 +37,17 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper {
   import DataSourceV2Implicits._
 
   def apply(plan: LogicalPlan): LogicalPlan = {
-    applyColumnPruning(
-      applyLimit(pushDownAggregates(pushDownFilters(pushDownSample(createScanBuilder(plan))))))
+    val pushdownRules = Seq[LogicalPlan => LogicalPlan] (
+      createScanBuilder,
+      pushDownSample,
+      pushDownFilters,
+      pushDownAggregates,
+      pushDownLimits,
+      pruneColumns)
+
+    pushdownRules.foldLeft(plan) { (newPlan, pushDownRule) =>
+      pushDownRule(newPlan)
+    }
   }
 
   private def createScanBuilder(plan: LogicalPlan) = plan.transform {
@@ -222,7 +231,7 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper {
       Cast(aggAttribute, aggDataType)
     }
 
-  def applyColumnPruning(plan: LogicalPlan): LogicalPlan = plan.transform {
+  def pruneColumns(plan: LogicalPlan): LogicalPlan = plan.transform {
     case ScanOperation(project, filters, sHolder: ScanBuilderHolder) =>
       // column pruning
       val normalizedProjects = DataSourceStrategy
@@ -308,7 +317,7 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper {
     case other => other
   }
 
-  def applyLimit(plan: LogicalPlan): LogicalPlan = plan.transform {
+  def pushDownLimits(plan: LogicalPlan): LogicalPlan = plan.transform {
     case globalLimit @ Limit(IntegerLiteral(limitValue), child) =>
       val newChild = pushDownLimit(child, limitValue)
       val newLocalLimit = globalLimit.child.asInstanceOf[LocalLimit].withNewChildren(Seq(newChild))

From 71af5b9c3e3ee7f4b011a11a520643f754d33744 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 19 Jan 2022 15:52:21 +0900
Subject: [PATCH 049/513] [SPARK-37769][SQL][FOLLOWUP] Add UTF8String import in
 FileScanRDD.scala

### What changes were proposed in this pull request?
This PR fixes the import missing. Logical conflict between https://github.com/apache/spark/pull/35068 and https://github.com/apache/spark/pull/35055.

### Why are the changes needed?

To fix up the complication.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
CI should test it out in compliation.

Closes #35245 from HyukjinKwon/SPARK-37896.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/execution/datasources/FileScanRDD.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index 8cd62320bba18..ccef75c2ec46a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -34,6 +34,7 @@ import org.apache.spark.sql.execution.datasources.FileFormat._
 import org.apache.spark.sql.execution.vectorized.ConstantColumnVector
 import org.apache.spark.sql.types.{LongType, StringType, StructType}
 import org.apache.spark.sql.vectorized.ColumnarBatch
+import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.NextIterator
 
 /**

From 6fba9e27f6b7743fd582a44f87c6d77b165af58f Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Wed, 19 Jan 2022 17:01:13 +0800
Subject: [PATCH 050/513] [SPARK-37951][MLLIB][K8S] Move test file from
 ../data/ to corresponding module's resource folder

### What changes were proposed in this pull request?
Move test file form `data/` dir to corresponding module's resource folder

1. move  `../data/mllib/images/partitioned` to mllib's `resources/inages/partitioned`
2. move `../data/mllib/iris_libsvm.txt`  to mllib's `resources/iris_libsvm.txt`
3. copy `data/mllib/pagerank_data.txt` to  kubenets-integration-test's `resources/pagerank_data.txt`

### Why are the changes needed?
Refactor code to avoid test failure

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existed UT

Closes #35237 from AngersZhuuuu/SPARK-37951.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/ml/source/image/ImageDataSource.scala     |   4 ++--
 .../date=2018-01/29.5.a_b_EGDP022204.jpg            | Bin
 .../cls=kittens/date=2018-01/not-image.txt          |   0
 .../partitioned/cls=kittens/date=2018-02/54893.jpg  | Bin
 .../cls=kittens/date=2018-02/DP153539.jpg           | Bin
 .../cls=kittens/date=2018-02/DP802813.jpg           | Bin
 .../cls=multichannel/date=2018-01/BGRA.png          | Bin
 .../cls=multichannel/date=2018-01/BGRA_alpha_60.png | Bin
 .../cls=multichannel/date=2018-02/chr30.4.184.jpg   | Bin
 .../cls=multichannel/date=2018-02/grayscale.jpg     | Bin
 .../src/test/resources}/iris_libsvm.txt             |   0
 .../ml/evaluation/ClusteringEvaluatorSuite.scala    |   2 +-
 .../ml/source/image/ImageFileFormatSuite.scala      |   3 +--
 .../src/test/resources/pagerank_data.txt            |   6 ++++++
 .../k8s/integrationtest/BasicTestsSuite.scala       |   6 +++---
 15 files changed, 13 insertions(+), 8 deletions(-)
 rename {data/mllib => mllib/src/test/resources}/images/partitioned/cls=kittens/date=2018-01/29.5.a_b_EGDP022204.jpg (100%)
 rename {data/mllib => mllib/src/test/resources}/images/partitioned/cls=kittens/date=2018-01/not-image.txt (100%)
 rename {data/mllib => mllib/src/test/resources}/images/partitioned/cls=kittens/date=2018-02/54893.jpg (100%)
 rename {data/mllib => mllib/src/test/resources}/images/partitioned/cls=kittens/date=2018-02/DP153539.jpg (100%)
 rename {data/mllib => mllib/src/test/resources}/images/partitioned/cls=kittens/date=2018-02/DP802813.jpg (100%)
 rename {data/mllib => mllib/src/test/resources}/images/partitioned/cls=multichannel/date=2018-01/BGRA.png (100%)
 rename {data/mllib => mllib/src/test/resources}/images/partitioned/cls=multichannel/date=2018-01/BGRA_alpha_60.png (100%)
 rename {data/mllib => mllib/src/test/resources}/images/partitioned/cls=multichannel/date=2018-02/chr30.4.184.jpg (100%)
 rename {data/mllib => mllib/src/test/resources}/images/partitioned/cls=multichannel/date=2018-02/grayscale.jpg (100%)
 rename {data/mllib => mllib/src/test/resources}/iris_libsvm.txt (100%)
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/pagerank_data.txt

diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/image/ImageDataSource.scala b/mllib/src/main/scala/org/apache/spark/ml/source/image/ImageDataSource.scala
index d4d74082dc8c5..9413d99f56413 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/source/image/ImageDataSource.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/source/image/ImageDataSource.scala
@@ -37,12 +37,12 @@ package org.apache.spark.ml.source.image
  *   // Scala
  *   val df = spark.read.format("image")
  *     .option("dropInvalid", true)
- *     .load("data/mllib/images/partitioned")
+ *     .load("/path/to/images")
  *
  *   // Java
  *   Dataset<Row> df = spark.read().format("image")
  *     .option("dropInvalid", true)
- *     .load("data/mllib/images/partitioned");
+ *     .load("/path/to/images");
  * }}}
  *
  * Image data source supports the following options:
diff --git a/data/mllib/images/partitioned/cls=kittens/date=2018-01/29.5.a_b_EGDP022204.jpg b/mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-01/29.5.a_b_EGDP022204.jpg
similarity index 100%
rename from data/mllib/images/partitioned/cls=kittens/date=2018-01/29.5.a_b_EGDP022204.jpg
rename to mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-01/29.5.a_b_EGDP022204.jpg
diff --git a/data/mllib/images/partitioned/cls=kittens/date=2018-01/not-image.txt b/mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-01/not-image.txt
similarity index 100%
rename from data/mllib/images/partitioned/cls=kittens/date=2018-01/not-image.txt
rename to mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-01/not-image.txt
diff --git a/data/mllib/images/partitioned/cls=kittens/date=2018-02/54893.jpg b/mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-02/54893.jpg
similarity index 100%
rename from data/mllib/images/partitioned/cls=kittens/date=2018-02/54893.jpg
rename to mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-02/54893.jpg
diff --git a/data/mllib/images/partitioned/cls=kittens/date=2018-02/DP153539.jpg b/mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-02/DP153539.jpg
similarity index 100%
rename from data/mllib/images/partitioned/cls=kittens/date=2018-02/DP153539.jpg
rename to mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-02/DP153539.jpg
diff --git a/data/mllib/images/partitioned/cls=kittens/date=2018-02/DP802813.jpg b/mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-02/DP802813.jpg
similarity index 100%
rename from data/mllib/images/partitioned/cls=kittens/date=2018-02/DP802813.jpg
rename to mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-02/DP802813.jpg
diff --git a/data/mllib/images/partitioned/cls=multichannel/date=2018-01/BGRA.png b/mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-01/BGRA.png
similarity index 100%
rename from data/mllib/images/partitioned/cls=multichannel/date=2018-01/BGRA.png
rename to mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-01/BGRA.png
diff --git a/data/mllib/images/partitioned/cls=multichannel/date=2018-01/BGRA_alpha_60.png b/mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-01/BGRA_alpha_60.png
similarity index 100%
rename from data/mllib/images/partitioned/cls=multichannel/date=2018-01/BGRA_alpha_60.png
rename to mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-01/BGRA_alpha_60.png
diff --git a/data/mllib/images/partitioned/cls=multichannel/date=2018-02/chr30.4.184.jpg b/mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-02/chr30.4.184.jpg
similarity index 100%
rename from data/mllib/images/partitioned/cls=multichannel/date=2018-02/chr30.4.184.jpg
rename to mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-02/chr30.4.184.jpg
diff --git a/data/mllib/images/partitioned/cls=multichannel/date=2018-02/grayscale.jpg b/mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-02/grayscale.jpg
similarity index 100%
rename from data/mllib/images/partitioned/cls=multichannel/date=2018-02/grayscale.jpg
rename to mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-02/grayscale.jpg
diff --git a/data/mllib/iris_libsvm.txt b/mllib/src/test/resources/iris_libsvm.txt
similarity index 100%
rename from data/mllib/iris_libsvm.txt
rename to mllib/src/test/resources/iris_libsvm.txt
diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/ClusteringEvaluatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/evaluation/ClusteringEvaluatorSuite.scala
index 06f2cb2b9788b..baeebfb59fe8d 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/evaluation/ClusteringEvaluatorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/evaluation/ClusteringEvaluatorSuite.scala
@@ -40,7 +40,7 @@ class ClusteringEvaluatorSuite
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    irisDataset = spark.read.format("libsvm").load("../data/mllib/iris_libsvm.txt")
+    irisDataset = spark.read.format("libsvm").load(getTestResourcePath("iris_libsvm.txt"))
     val datasets = MLTestingUtils.generateArrayFeatureDataset(irisDataset)
     newIrisDataset = datasets._1
     newIrisDatasetD = datasets._2
diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala
index 0ec2747be6585..10b9bbb0bfe24 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala
@@ -29,8 +29,7 @@ import org.apache.spark.sql.functions.{col, substring_index}
 class ImageFileFormatSuite extends SparkFunSuite with MLlibTestSparkContext {
 
   // Single column of images named "image"
-  private lazy val imagePath = "../data/mllib/images/partitioned"
-  private lazy val recursiveImagePath = "../data/mllib/images"
+  private lazy val imagePath = getTestResourcePath("images/partitioned")
 
   test("Smoke test: create basic ImageSchema dataframe") {
     val origin = "path"
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/pagerank_data.txt b/resource-managers/kubernetes/integration-tests/src/test/resources/pagerank_data.txt
new file mode 100644
index 0000000000000..95755ab8f5af8
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/pagerank_data.txt
@@ -0,0 +1,6 @@
+1 2
+1 3
+1 4
+2 1
+3 1
+4 1
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
index 6db4beef6d221..d704ef753ed63 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
@@ -22,7 +22,7 @@ import io.fabric8.kubernetes.api.model.Pod
 import org.scalatest.concurrent.Eventually
 import org.scalatest.matchers.should.Matchers._
 
-import org.apache.spark.TestUtils
+import org.apache.spark.{SparkFunSuite, TestUtils}
 import org.apache.spark.launcher.SparkLauncher
 
 private[spark] trait BasicTestsSuite { k8sSuite: KubernetesSuite =>
@@ -126,11 +126,11 @@ private[spark] trait BasicTestsSuite { k8sSuite: KubernetesSuite =>
   }
 }
 
-private[spark] object BasicTestsSuite {
+private[spark] object BasicTestsSuite extends SparkFunSuite {
   val SPARK_PAGE_RANK_MAIN_CLASS: String = "org.apache.spark.examples.SparkPageRank"
   val CONTAINER_LOCAL_FILE_DOWNLOAD_PATH = "/var/spark-data/spark-files"
   val CONTAINER_LOCAL_DOWNLOADED_PAGE_RANK_DATA_FILE =
      s"$CONTAINER_LOCAL_FILE_DOWNLOAD_PATH/pagerank_data.txt"
-  val REMOTE_PAGE_RANK_DATA_FILE = "data/mllib/pagerank_data.txt"
+  val REMOTE_PAGE_RANK_DATA_FILE = getTestResourcePath("pagerank_data.txt")
   val REMOTE_PAGE_RANK_FILE_NAME = "pagerank_data.txt"
 }

From a211a4359f865fc861c74b41ebb90221f1efd398 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 19 Jan 2022 19:09:30 +0800
Subject: [PATCH 051/513] [SPARK-35703][SQL][FOLLOWUP] ValidateRequirements
 should check the co-partitioning requirement

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/32875 . This PR updates `ValidateRequirements` to match the new change in the partitioning-distribution framework, and check the co-partitioning requirement for join nodes.

### Why are the changes needed?

Fix bugs in `ValidateRequirements`

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

a new test suite

Closes #35225 from cloud-fan/follow.

Lead-authored-by: Wenchen Fan <wenchen@databricks.com>
Co-authored-by: Wenchen Fan <cloud0fan@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../plans/physical/partitioning.scala         |   2 +-
 .../exchange/ValidateRequirements.scala       |  28 +--
 .../exchange/ValidateRequirementsSuite.scala  | 161 ++++++++++++++++++
 3 files changed, 179 insertions(+), 12 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/ValidateRequirementsSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
index ed360bbf1ca4e..7a730c4b7318b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
@@ -441,7 +441,7 @@ case class HashShuffleSpec(
     distribution.clustering.zipWithIndex.foreach { case (distKey, distKeyPos) =>
       distKeyToPos.getOrElseUpdate(distKey.canonicalized, mutable.BitSet.empty).add(distKeyPos)
     }
-    partitioning.expressions.map(k => distKeyToPos(k.canonicalized))
+    partitioning.expressions.map(k => distKeyToPos.getOrElse(k.canonicalized, mutable.BitSet.empty))
   }
 
   override def isCompatibleWith(other: ShuffleSpec): Boolean = other match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ValidateRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ValidateRequirements.scala
index 9538199590477..1ac6b809fd250 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ValidateRequirements.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ValidateRequirements.scala
@@ -45,17 +45,7 @@ object ValidateRequirements extends Logging {
     assert(requiredChildDistributions.length == children.length)
     assert(requiredChildOrderings.length == children.length)
 
-    // Verify partition number. For (hash) clustered distribution, the corresponding children must
-    // have the same number of partitions.
-    val numPartitions = requiredChildDistributions.zipWithIndex.collect {
-      case (_: ClusteredDistribution, i) => i
-    }.map(i => children(i).outputPartitioning.numPartitions)
-    if (numPartitions.length > 1 && !numPartitions.tail.forall(_ == numPartitions.head)) {
-      logDebug(s"ValidateRequirements failed: different partition num in\n$plan")
-      return false
-    }
-
-    children.zip(requiredChildDistributions.zip(requiredChildOrderings)).forall {
+    val satisfied = children.zip(requiredChildDistributions.zip(requiredChildOrderings)).forall {
       case (child, (distribution, ordering))
           if !child.outputPartitioning.satisfies(distribution)
             || !SortOrder.orderingSatisfies(child.outputOrdering, ordering) =>
@@ -63,5 +53,21 @@ object ValidateRequirements extends Logging {
         false
       case _ => true
     }
+
+    if (satisfied && children.length > 1 &&
+      requiredChildDistributions.forall(_.isInstanceOf[ClusteredDistribution])) {
+      // Check the co-partitioning requirement.
+      val specs = children.map(_.outputPartitioning).zip(requiredChildDistributions).map {
+        case (p, d) => p.createShuffleSpec(d.asInstanceOf[ClusteredDistribution])
+      }
+      if (specs.tail.forall(_.isCompatibleWith(specs.head))) {
+        true
+      } else {
+        logDebug(s"ValidateRequirements failed: children not co-partitioned in\n$plan")
+        false
+      }
+    } else {
+      satisfied
+    }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/ValidateRequirementsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/ValidateRequirementsSuite.scala
new file mode 100644
index 0000000000000..767a26876f902
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/ValidateRequirementsSuite.scala
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.exchange
+
+import org.apache.spark.sql.catalyst.expressions.{Ascending, SortOrder}
+import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest}
+import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, SinglePartition}
+import org.apache.spark.sql.execution.SortExec
+import org.apache.spark.sql.execution.joins.SortMergeJoinExec
+import org.apache.spark.sql.test.SharedSparkSession
+
+class ValidateRequirementsSuite extends PlanTest with SharedSparkSession {
+
+  import testImplicits._
+
+  private def testValidate(
+      joinKeyIndices: Seq[Int],
+      leftPartitionKeyIndices: Seq[Int],
+      rightPartitionKeyIndices: Seq[Int],
+      leftPartitionNum: Int,
+      rightPartitionNum: Int,
+      success: Boolean): Unit = {
+    val table1 =
+      spark.range(10).select('id + 1 as 'a1, 'id + 2 as 'b1, 'id + 3 as 'c1)
+        .queryExecution.executedPlan
+    val table2 =
+      spark.range(10).select('id + 1 as 'a2, 'id + 2 as 'b2, 'id + 3 as 'c2)
+        .queryExecution.executedPlan
+
+    val leftKeys = joinKeyIndices.map(table1.output)
+    val rightKeys = joinKeyIndices.map(table2.output)
+    val leftPartitioning =
+      HashPartitioning(leftPartitionKeyIndices.map(table1.output), leftPartitionNum)
+    val rightPartitioning =
+      HashPartitioning(rightPartitionKeyIndices.map(table2.output), rightPartitionNum)
+    val left =
+      SortExec(leftKeys.map(SortOrder(_, Ascending)), false,
+        ShuffleExchangeExec(leftPartitioning, table1))
+    val right =
+      SortExec(rightKeys.map(SortOrder(_, Ascending)), false,
+        ShuffleExchangeExec(rightPartitioning, table2))
+
+    val plan = SortMergeJoinExec(leftKeys, rightKeys, Inner, None, left, right)
+    assert(ValidateRequirements.validate(plan) == success, plan)
+  }
+
+  test("SMJ requirements satisfied with partial partition key") {
+    testValidate(Seq(0, 1, 2), Seq(1), Seq(1), 5, 5, true)
+  }
+
+  test("SMJ requirements satisfied with different partition key order") {
+    testValidate(Seq(0, 1, 2), Seq(2, 0, 1), Seq(2, 0, 1), 5, 5, true)
+  }
+
+  test("SMJ requirements not satisfied with unequal partition key order") {
+    testValidate(Seq(0, 1, 2), Seq(1, 0), Seq(0, 1), 5, 5, false)
+  }
+
+  test("SMJ requirements not satisfied with unequal partition key length") {
+    testValidate(Seq(0, 1, 2), Seq(1), Seq(1, 2), 5, 5, false)
+  }
+
+  test("SMJ requirements not satisfied with partition key missing from join key") {
+    testValidate(Seq(1, 2), Seq(1, 0), Seq(1, 0), 5, 5, false)
+  }
+
+  test("SMJ requirements not satisfied with unequal partition number") {
+    testValidate(Seq(0, 1, 2), Seq(0, 1, 2), Seq(0, 1, 2), 12, 10, false)
+  }
+
+  test("SMJ with HashPartitioning(1) and SinglePartition") {
+    val table1 = spark.range(10).queryExecution.executedPlan
+    val table2 = spark.range(10).queryExecution.executedPlan
+    val leftPartitioning = HashPartitioning(table1.output, 1)
+    val rightPartitioning = SinglePartition
+    val left =
+      SortExec(table1.output.map(SortOrder(_, Ascending)), false,
+        ShuffleExchangeExec(leftPartitioning, table1))
+    val right =
+      SortExec(table2.output.map(SortOrder(_, Ascending)), false,
+        ShuffleExchangeExec(rightPartitioning, table2))
+
+    val plan = SortMergeJoinExec(table1.output, table2.output, Inner, None, left, right)
+    assert(ValidateRequirements.validate(plan), plan)
+  }
+
+  private def testNestedJoin(
+      joinKeyIndices1: Seq[(Int, Int)],
+      joinKeyIndices2: Seq[(Int, Int)],
+      partNums: Seq[Int],
+      success: Boolean): Unit = {
+    val table1 =
+      spark.range(10).select('id + 1 as 'a1, 'id + 2 as 'b1, 'id + 3 as 'c1)
+        .queryExecution.executedPlan
+    val table2 =
+      spark.range(10).select('id + 1 as 'a2, 'id + 2 as 'b2, 'id + 3 as 'c2)
+        .queryExecution.executedPlan
+    val table3 =
+      spark.range(10).select('id + 1 as 'a3, 'id + 2 as 'b3, 'id + 3 as 'c3)
+        .queryExecution.executedPlan
+
+    val key1 = joinKeyIndices1.map(_._1).map(table1.output)
+    val key2 = joinKeyIndices1.map(_._2).map(table2.output)
+    val key3 = joinKeyIndices2.map(_._1).map(table3.output)
+    val key4 = joinKeyIndices2.map(_._2).map(table1.output ++ table2.output)
+    val partitioning1 = HashPartitioning(key1, partNums(0))
+    val partitioning2 = HashPartitioning(key2, partNums(1))
+    val partitioning3 = HashPartitioning(key3, partNums(2))
+    val joinRel1 =
+      SortExec(key1.map(SortOrder(_, Ascending)), false, ShuffleExchangeExec(partitioning1, table1))
+    val joinRel2 =
+      SortExec(key2.map(SortOrder(_, Ascending)), false, ShuffleExchangeExec(partitioning2, table2))
+    val joinRel3 =
+      SortExec(key3.map(SortOrder(_, Ascending)), false, ShuffleExchangeExec(partitioning3, table3))
+
+    val plan = SortMergeJoinExec(key3, key4, Inner, None,
+      joinRel3, SortMergeJoinExec(key1, key2, Inner, None, joinRel1, joinRel2))
+    assert(ValidateRequirements.validate(plan) == success, plan)
+  }
+
+  test("ValidateRequirements should work bottom up") {
+    Seq(true, false).foreach { success =>
+      testNestedJoin(Seq((0, 0)), Seq((0, 0)), Seq(5, if (success) 5 else 10, 5), success)
+    }
+  }
+
+  test("PartitioningCollection exact match") {
+    testNestedJoin(Seq((0, 0), (1, 1)), Seq((0, 0), (1, 1)), Seq(5, 5, 5), true)
+    testNestedJoin(Seq((0, 0), (1, 1)), Seq((0, 3), (1, 4)), Seq(5, 5, 5), true)
+  }
+
+  test("PartitioningCollection mismatch with different order") {
+    testNestedJoin(Seq((0, 0), (1, 1)), Seq((1, 1), (0, 0)), Seq(5, 5, 5), false)
+    testNestedJoin(Seq((0, 0), (1, 1)), Seq((1, 4), (0, 3)), Seq(5, 5, 5), false)
+  }
+
+  test("PartitioningCollection mismatch with different set") {
+    testNestedJoin(Seq((1, 1)), Seq((2, 2), (1, 1)), Seq(5, 5, 5), false)
+    testNestedJoin(Seq((1, 1)), Seq((2, 5), (1, 4)), Seq(5, 5, 5), false)
+  }
+
+  test("PartitioningCollection mismatch with key missing from required") {
+    testNestedJoin(Seq((2, 2), (1, 1)), Seq((2, 2)), Seq(5, 5, 5), false)
+    testNestedJoin(Seq((2, 2), (1, 1)), Seq((2, 5)), Seq(5, 5, 5), false)
+  }
+}

From 789fce8c8b200eba5f94c2d83b4b83e3bfb9a2b1 Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@foxmail.com>
Date: Wed, 19 Jan 2022 09:17:25 -0800
Subject: [PATCH 052/513] [SPARK-37959][ML] Fix the UT of checking norm in
 KMeans & BiKMeans

### What changes were proposed in this pull request?

In `KMeansSuite` and `BisectingKMeansSuite`, there are some unused lines:

```
model1.clusterCenters.forall(Vectors.norm(_, 2) == 1.0
```

For cosine distance, the norm of centering vector should be 1, so the norm checking is meaningful;

For euclidean distance, the norm checking is meaningless;

### Why are the changes needed?

to enable norm checking for cosine distance, and diable it for euclidean distance

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
updated testsuites

Closes #35247 from zhengruifeng/fix_kmeans_ut.

Authored-by: Ruifeng Zheng <ruifengz@foxmail.com>
Signed-off-by: huaxingao <huaxin.gao11@gmail.com>
---
 .../spark/ml/clustering/BisectingKMeansSuite.scala | 10 +++-------
 .../apache/spark/ml/clustering/KMeansSuite.scala   | 14 +++-----------
 2 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
index 04b20d1e58dd3..fb6110d6f269c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
@@ -186,7 +186,7 @@ class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
     assert(predictionsMap(Vectors.dense(-1.0, 1.0)) ==
       predictionsMap(Vectors.dense(-100.0, 90.0)))
 
-    model.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
+    assert(model.clusterCenters.forall(Vectors.norm(_, 2) ~== 1.0 absTol 1e-6))
   }
 
   test("Comparing with and without weightCol with cosine distance") {
@@ -217,7 +217,7 @@ class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
     assert(predictionsMap1(Vectors.dense(-1.0, 1.0)) ==
       predictionsMap1(Vectors.dense(-100.0, 90.0)))
 
-    model1.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
+    assert(model1.clusterCenters.forall(Vectors.norm(_, 2) ~== 1.0 absTol 1e-6))
 
     val df2 = spark.createDataFrame(spark.sparkContext.parallelize(Seq(
       (Vectors.dense(1.0, 1.0), 2.0), (Vectors.dense(10.0, 10.0), 2.0),
@@ -244,7 +244,7 @@ class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
     assert(predictionsMap2(Vectors.dense(-1.0, 1.0)) ==
       predictionsMap2(Vectors.dense(-100.0, 90.0)))
 
-    model2.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
+    assert(model2.clusterCenters.forall(Vectors.norm(_, 2) ~== 1.0 absTol 1e-6))
     assert(model1.clusterCenters === model2.clusterCenters)
   }
 
@@ -284,8 +284,6 @@ class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
     assert(predictionsMap1(Vectors.dense(10.0, 10.0)) ==
       predictionsMap1(Vectors.dense(10.0, 4.4)))
 
-    model1.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
-
     val df2 = spark.createDataFrame(spark.sparkContext.parallelize(Seq(
       (Vectors.dense(1.0, 1.0), 1.0), (Vectors.dense(10.0, 10.0), 2.0),
       (Vectors.dense(1.0, 0.5), 2.0), (Vectors.dense(10.0, 4.4), 3.0),
@@ -310,8 +308,6 @@ class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
     assert(predictionsMap2(Vectors.dense(10.0, 10.0)) ==
       predictionsMap2(Vectors.dense(10.0, 4.4)))
 
-    model2.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
-
     assert(model1.clusterCenters(0) === model2.clusterCenters(0))
     assert(model1.clusterCenters(1) === model2.clusterCenters(1))
     assert(model1.clusterCenters(2) ~== model2.clusterCenters(2) absTol 1e-6)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
index 61f4359d99ea9..7d2a0b8bd38c9 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
@@ -186,7 +186,7 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
     assert(predictionsMap(Vectors.dense(-1.0, 1.0)) ==
       predictionsMap(Vectors.dense(-100.0, 90.0)))
 
-    model.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
+    assert(model.clusterCenters.forall(Vectors.norm(_, 2) ~== 1.0 absTol 1e-6))
   }
 
   test("KMeans with cosine distance is not supported for 0-length vectors") {
@@ -283,7 +283,7 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
     assert(predictionsMap1(Vectors.dense(-1.0, 1.0)) ==
       predictionsMap1(Vectors.dense(-100.0, 90.0)))
 
-    model1.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
+    assert(model1.clusterCenters.forall(Vectors.norm(_, 2) ~== 1.0 absTol 1e-6))
 
     val df2 = spark.createDataFrame(spark.sparkContext.parallelize(Seq(
       (Vectors.dense(1.0, 1.0), 1.0),
@@ -313,7 +313,7 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
     assert(predictionsMap2(Vectors.dense(-1.0, 1.0)) ==
       predictionsMap2(Vectors.dense(-100.0, 90.0)))
 
-    model2.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
+    assert(model2.clusterCenters.forall(Vectors.norm(_, 2) ~== 1.0 absTol 1e-6))
 
     // compare if model1 and model2 have the same cluster centers
     assert(model1.clusterCenters.length === model2.clusterCenters.length)
@@ -350,8 +350,6 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
     assert(predictionsMap1(Vectors.dense(9.0, 0.2)) ==
       predictionsMap1(Vectors.dense(9.2, 0.0)))
 
-    model1.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
-
     // center 1:
     // total weights in cluster 1: 2.0 + 2.0 + 2.0 = 6.0
     // x: 9.0 * (2.0/6.0) + 9.0 * (2.0/6.0) + 9.2 * (2.0/6.0) = 9.066666666666666
@@ -394,8 +392,6 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
     assert(predictionsMap2(Vectors.dense(9.0, 0.2)) ==
       predictionsMap2(Vectors.dense(9.2, 0.0)))
 
-    model2.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
-
     // center 1:
     // total weights in cluster 1: 2.5 + 1.0 + 2.0 = 5.5
     // x: 9.0 * (2.5/5.5) + 9.0 * (1.0/5.5) + 9.2 * (2.0/5.5) = 9.072727272727272
@@ -441,8 +437,6 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
     assert(predictionsMap1(Vectors.dense(-6.0, -6.0)) ==
       predictionsMap1(Vectors.dense(-10.0, -10.0)))
 
-    model1.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
-
     // use same weight, should have the same result as no weight
     val df2 = spark.createDataFrame(spark.sparkContext.parallelize(Seq(
       (Vectors.dense(0.1, 0.1), 2.0),
@@ -474,8 +468,6 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
     assert(predictionsMap2(Vectors.dense(-6.0, -6.0)) ==
       predictionsMap2(Vectors.dense(-10.0, -10.0)))
 
-    model2.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
-
     assert(model1.clusterCenters === model2.clusterCenters)
   }
 }

From c13d0194960c047c882277cf0dea744f371b0d75 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Wed, 19 Jan 2022 09:34:07 -0800
Subject: [PATCH 053/513] [SPARK-37928][SQL][TESTS] Add Parquet Data Page V2
 test scenario to `DataSourceReadBenchmark`

### What changes were proposed in this pull request?
This PR adds a corresponding `Parquet Data Page V2` test scenario for each `Parquet Data Page V1` test scenario to `DataSourceReadBenchmark`.

### Why are the changes needed?
Add micro benchmark for `Parquet Data Page V2`.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35226 from LuciferYang/SPARK-37928.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Chao Sun <sunchao@apple.com>
---
 .../DataSourceReadBenchmark-jdk11-results.txt | 411 +++++++++-------
 .../DataSourceReadBenchmark-jdk17-results.txt | 459 ++++++++++--------
 .../DataSourceReadBenchmark-results.txt       | 459 ++++++++++--------
 .../benchmark/DataSourceReadBenchmark.scala   | 309 +++++++-----
 4 files changed, 923 insertions(+), 715 deletions(-)

diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt
index fb152e20c9449..25c43d8273df8 100644
--- a/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt
@@ -2,269 +2,322 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single BOOLEAN Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           11834          11929         134          1.3         752.4       1.0X
-SQL Json                                           8574           8597          32          1.8         545.1       1.4X
-SQL Parquet Vectorized                              116            136          17        135.5           7.4     102.0X
-SQL Parquet MR                                     1703           1715          17          9.2         108.2       7.0X
-SQL ORC Vectorized                                  172            215          48         91.2          11.0      68.6X
-SQL ORC MR                                         1819           1825           8          8.6         115.7       6.5X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+SQL CSV                                            9636           9771         191          1.6         612.6       1.0X
+SQL Json                                           7960           8227         378          2.0         506.1       1.2X
+SQL Parquet Vectorized: DataPageV1                  113            129          12        139.7           7.2      85.6X
+SQL Parquet Vectorized: DataPageV2                   84             93          12        186.6           5.4     114.3X
+SQL Parquet MR: DataPageV1                         1466           1470           6         10.7          93.2       6.6X
+SQL Parquet MR: DataPageV2                         1334           1347          18         11.8          84.8       7.2X
+SQL ORC Vectorized                                  163            197          27         96.3          10.4      59.0X
+SQL ORC MR                                         1554           1558           6         10.1          98.8       6.2X
+
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Parquet Reader Single BOOLEAN Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                             117            126          17        134.9           7.4       1.0X
-ParquetReader Vectorized -> Row                       47             49           3        336.5           3.0       2.5X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+Parquet Reader Single BOOLEAN Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                    94            103          13        167.1           6.0       1.0X
+ParquetReader Vectorized: DataPageV2                    77             86          11        204.3           4.9       1.2X
+ParquetReader Vectorized -> Row: DataPageV1             44             47           4        357.0           2.8       2.1X
+ParquetReader Vectorized -> Row: DataPageV2             35             37           3        445.2           2.2       2.7X
+
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           13434          13590         220          1.2         854.1       1.0X
-SQL Json                                          10056          10073          24          1.6         639.3       1.3X
-SQL Parquet Vectorized                              212            229          12         74.3          13.5      63.4X
-SQL Parquet MR                                     1883           1916          47          8.4         119.7       7.1X
-SQL ORC Vectorized                                  200            241          30         78.8          12.7      67.3X
-SQL ORC MR                                         1529           1549          28         10.3          97.2       8.8X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+SQL CSV                                           11479          11919         622          1.4         729.8       1.0X
+SQL Json                                           9894           9922          39          1.6         629.1       1.2X
+SQL Parquet Vectorized: DataPageV1                  123            156          30        128.3           7.8      93.6X
+SQL Parquet Vectorized: DataPageV2                  126            138          19        125.2           8.0      91.4X
+SQL Parquet MR: DataPageV1                         1986           2500         726          7.9         126.3       5.8X
+SQL Parquet MR: DataPageV2                         1810           1898         126          8.7         115.1       6.3X
+SQL ORC Vectorized                                  174            210          30         90.5          11.0      66.1X
+SQL ORC MR                                         1645           1652           9          9.6         104.6       7.0X
+
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Parquet Reader Single TINYINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                             229            254          13         68.6          14.6       1.0X
-ParquetReader Vectorized -> Row                      162            171          14         96.9          10.3       1.4X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+Parquet Reader Single TINYINT Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                   166            177          14         94.9          10.5       1.0X
+ParquetReader Vectorized: DataPageV2                   165            172          11         95.3          10.5       1.0X
+ParquetReader Vectorized -> Row: DataPageV1             95            100           5        165.7           6.0       1.7X
+ParquetReader Vectorized -> Row: DataPageV2             85             89           6        186.0           5.4       2.0X
+
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           14320          14476         221          1.1         910.4       1.0X
-SQL Json                                           9769          10067         423          1.6         621.1       1.5X
-SQL Parquet Vectorized                              187            228          28         84.3          11.9      76.8X
-SQL Parquet MR                                     2230           2240          14          7.1         141.8       6.4X
-SQL ORC Vectorized                                  221            265          36         71.1          14.1      64.8X
-SQL ORC MR                                         1763           1779          23          8.9         112.1       8.1X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+SQL CSV                                           12176          12646         664          1.3         774.1       1.0X
+SQL Json                                           9696           9729          46          1.6         616.5       1.3X
+SQL Parquet Vectorized: DataPageV1                  151            201          33        103.9           9.6      80.4X
+SQL Parquet Vectorized: DataPageV2                  216            235          15         72.7          13.8      56.3X
+SQL Parquet MR: DataPageV1                         1915           2017         145          8.2         121.8       6.4X
+SQL Parquet MR: DataPageV2                         1954           1978          33          8.0         124.3       6.2X
+SQL ORC Vectorized                                  197            235          25         79.7          12.6      61.7X
+SQL ORC MR                                         1769           1829          85          8.9         112.5       6.9X
+
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Parquet Reader Single SMALLINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                              246            255          12         64.1          15.6       1.0X
-ParquetReader Vectorized -> Row                       249            294          21         63.1          15.8       1.0X
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                   230            237          12         68.5          14.6       1.0X
+ParquetReader Vectorized: DataPageV2                   293            298           9         53.6          18.7       0.8X
+ParquetReader Vectorized -> Row: DataPageV1            215            265          23         73.2          13.7       1.1X
+ParquetReader Vectorized -> Row: DataPageV2            279            301          32         56.3          17.8       0.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           15460          15543         116          1.0         982.9       1.0X
-SQL Json                                          10199          10393         274          1.5         648.4       1.5X
-SQL Parquet Vectorized                              163            203          30         96.5          10.4      94.8X
-SQL Parquet MR                                     1914           2025         157          8.2         121.7       8.1X
-SQL ORC Vectorized                                  324            355          23         48.5          20.6      47.7X
-SQL ORC MR                                         1673           1701          39          9.4         106.4       9.2X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+SQL CSV                                           13069          13409         482          1.2         830.9       1.0X
+SQL Json                                          10599          10621          32          1.5         673.9       1.2X
+SQL Parquet Vectorized: DataPageV1                  142            177          34        110.6           9.0      91.9X
+SQL Parquet Vectorized: DataPageV2                  313            359          28         50.2          19.9      41.7X
+SQL Parquet MR: DataPageV1                         1979           2044          92          7.9         125.8       6.6X
+SQL Parquet MR: DataPageV2                         1958           2030         101          8.0         124.5       6.7X
+SQL ORC Vectorized                                  277            303          21         56.7          17.6      47.1X
+SQL ORC MR                                         1692           1782         128          9.3         107.6       7.7X
+
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Parquet Reader Single INT Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                            209            223          17         75.2          13.3       1.0X
-ParquetReader Vectorized -> Row                     303            307           6         51.9          19.3       0.7X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+Parquet Reader Single INT Column Scan:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                   253            269          18         62.1          16.1       1.0X
+ParquetReader Vectorized: DataPageV2                  1197           1199           3         13.1          76.1       0.2X
+ParquetReader Vectorized -> Row: DataPageV1            273            361         110         57.7          17.3       0.9X
+ParquetReader Vectorized -> Row: DataPageV2            379            438          37         41.5          24.1       0.7X
+
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           19075          19147         101          0.8        1212.8       1.0X
-SQL Json                                          12181          12369         265          1.3         774.5       1.6X
-SQL Parquet Vectorized                              230            268          25         68.5          14.6      83.1X
-SQL Parquet MR                                     2160           2244         120          7.3         137.3       8.8X
-SQL ORC Vectorized                                  396            444          41         39.7          25.2      48.2X
-SQL ORC MR                                         1924           1939          21          8.2         122.3       9.9X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+SQL CSV                                           17143          17467         458          0.9        1089.9       1.0X
+SQL Json                                          11507          12198         977          1.4         731.6       1.5X
+SQL Parquet Vectorized: DataPageV1                  238            253          19         66.0          15.2      71.9X
+SQL Parquet Vectorized: DataPageV2                  502            567          48         31.3          31.9      34.1X
+SQL Parquet MR: DataPageV1                         2333           2335           3          6.7         148.4       7.3X
+SQL Parquet MR: DataPageV2                         1948           1972          34          8.1         123.8       8.8X
+SQL ORC Vectorized                                  389            408          20         40.5          24.7      44.1X
+SQL ORC MR                                         1726           1817         128          9.1         109.7       9.9X
+
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Parquet Reader Single BIGINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                            273            311          43         57.5          17.4       1.0X
-ParquetReader Vectorized -> Row                     316            322           8         49.8          20.1       0.9X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+Parquet Reader Single BIGINT Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                   289            340          43         54.4          18.4       1.0X
+ParquetReader Vectorized: DataPageV2                   572            609          27         27.5          36.4       0.5X
+ParquetReader Vectorized -> Row: DataPageV1            329            353          48         47.8          20.9       0.9X
+ParquetReader Vectorized -> Row: DataPageV2            639            654          18         24.6          40.6       0.5X
+
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           15439          15605         235          1.0         981.6       1.0X
-SQL Json                                          11709          11852         201          1.3         744.5       1.3X
-SQL Parquet Vectorized                              157            199          33         99.9          10.0      98.0X
-SQL Parquet MR                                     1996           2120         176          7.9         126.9       7.7X
-SQL ORC Vectorized                                  439            466          28         35.8          27.9      35.1X
-SQL ORC MR                                         1965           1991          36          8.0         124.9       7.9X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+SQL CSV                                           13721          13812         129          1.1         872.4       1.0X
+SQL Json                                          12147          17632        2196          1.3         772.3       1.1X
+SQL Parquet Vectorized: DataPageV1                  138            164          25        113.9           8.8      99.4X
+SQL Parquet Vectorized: DataPageV2                  151            180          26        104.4           9.6      91.1X
+SQL Parquet MR: DataPageV1                         2006           2078         101          7.8         127.6       6.8X
+SQL Parquet MR: DataPageV2                         2038           2040           2          7.7         129.6       6.7X
+SQL ORC Vectorized                                  465            475          10         33.8          29.6      29.5X
+SQL ORC MR                                         1814           1860          64          8.7         115.4       7.6X
+
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Parquet Reader Single FLOAT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                            206            212           8         76.4          13.1       1.0X
-ParquetReader Vectorized -> Row                     220            266          29         71.4          14.0       0.9X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+Parquet Reader Single FLOAT Column Scan:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                   275            404         187         57.2          17.5       1.0X
+ParquetReader Vectorized: DataPageV2                   275            287          12         57.2          17.5       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            227            265          24         69.2          14.4       1.2X
+ParquetReader Vectorized -> Row: DataPageV2            228            259          28         69.1          14.5       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           20048          20816        1086          0.8        1274.6       1.0X
-SQL Json                                          16265          16314          69          1.0        1034.1       1.2X
-SQL Parquet Vectorized                              238            296          29         66.1          15.1      84.3X
-SQL Parquet MR                                     2414           2418           7          6.5         153.5       8.3X
-SQL ORC Vectorized                                  555            604          38         28.4          35.3      36.2X
-SQL ORC MR                                         2225           2242          24          7.1         141.5       9.0X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+SQL CSV                                           17269          17620         496          0.9        1097.9       1.0X
+SQL Json                                          15636          15952         447          1.0         994.1       1.1X
+SQL Parquet Vectorized: DataPageV1                  238            267          18         66.0          15.1      72.5X
+SQL Parquet Vectorized: DataPageV2                  222            260          21         70.9          14.1      77.9X
+SQL Parquet MR: DataPageV1                         2418           2457          56          6.5         153.7       7.1X
+SQL Parquet MR: DataPageV2                         2194           2207          18          7.2         139.5       7.9X
+SQL ORC Vectorized                                  519            528          14         30.3          33.0      33.3X
+SQL ORC MR                                         1760           1770          14          8.9         111.9       9.8X
+
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Parquet Reader Single DOUBLE Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                            317            352          35         49.6          20.2       1.0X
-ParquetReader Vectorized -> Row                     346            356           9         45.4          22.0       0.9X
+Parquet Reader Single DOUBLE Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                   284            305          30         55.3          18.1       1.0X
+ParquetReader Vectorized: DataPageV2                   286            286           1         55.1          18.2       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            325            337          16         48.4          20.6       0.9X
+ParquetReader Vectorized -> Row: DataPageV2            346            361          16         45.5          22.0       0.8X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           13981          14223         342          0.7        1333.4       1.0X
-SQL Json                                          11241          11293          74          0.9        1072.0       1.2X
-SQL Parquet Vectorized                             2060           2076          23          5.1         196.4       6.8X
-SQL Parquet MR                                     3779           3931         216          2.8         360.4       3.7X
-SQL ORC Vectorized                                 2085           2088           4          5.0         198.8       6.7X
-SQL ORC MR                                         3739           3767          39          2.8         356.6       3.7X
+SQL CSV                                           12428          12714         405          0.8        1185.2       1.0X
+SQL Json                                          11088          11251         231          0.9        1057.4       1.1X
+SQL Parquet Vectorized: DataPageV1                 1990           1997          10          5.3         189.8       6.2X
+SQL Parquet Vectorized: DataPageV2                 2551           2618          95          4.1         243.3       4.9X
+SQL Parquet MR: DataPageV1                         3903           3913          15          2.7         372.2       3.2X
+SQL Parquet MR: DataPageV2                         3734           3920         263          2.8         356.1       3.3X
+SQL ORC Vectorized                                 2153           2155           3          4.9         205.3       5.8X
+SQL ORC MR                                         3485           3549          91          3.0         332.4       3.6X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            8544           8579          50          1.2         814.8       1.0X
-SQL Json                                           6705           6952         348          1.6         639.5       1.3X
-SQL Parquet Vectorized                              603            615           9         17.4          57.5      14.2X
-SQL Parquet MR                                     1722           1725           4          6.1         164.2       5.0X
-SQL ORC Vectorized                                  515            547          24         20.4          49.1      16.6X
-SQL ORC MR                                         1827           1845          25          5.7         174.2       4.7X
+SQL CSV                                            7116           7167          72          1.5         678.7       1.0X
+SQL Json                                           6700           6741          58          1.6         639.0       1.1X
+SQL Parquet Vectorized: DataPageV1                  526            556          36         19.9          50.1      13.5X
+SQL Parquet Vectorized: DataPageV2                  518            533          15         20.2          49.4      13.7X
+SQL Parquet MR: DataPageV1                         1504           1656         216          7.0         143.4       4.7X
+SQL Parquet MR: DataPageV2                         1676           1676           1          6.3         159.8       4.2X
+SQL ORC Vectorized                                  497            518          20         21.1          47.4      14.3X
+SQL ORC MR                                         1657           1787         183          6.3         158.1       4.3X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Data column - CSV                                 18854          19521         943          0.8        1198.7       1.0X
-Data column - Json                                12579          12688         154          1.3         799.8       1.5X
-Data column - Parquet Vectorized                    246            298          28         63.9          15.7      76.5X
-Data column - Parquet MR                           2693           2699           9          5.8         171.2       7.0X
-Data column - ORC Vectorized                        434            463          25         36.2          27.6      43.4X
-Data column - ORC MR                               2249           2303          77          7.0         143.0       8.4X
-Partition column - CSV                             6045           6199         217          2.6         384.3       3.1X
-Partition column - Json                            9463           9679         305          1.7         601.7       2.0X
-Partition column - Parquet Vectorized                64             92          36        244.3           4.1     292.9X
-Partition column - Parquet MR                      1238           1252          20         12.7          78.7      15.2X
-Partition column - ORC Vectorized                    60             85          25        263.7           3.8     316.1X
-Partition column - ORC MR                          1440           1458          26         10.9          91.5      13.1X
-Both columns - CSV                                19647          20381        1038          0.8        1249.1       1.0X
-Both columns - Json                               12615          12654          55          1.2         802.0       1.5X
-Both columns - Parquet Vectorized                   337            345           9         46.7          21.4      56.0X
-Both columns - Parquet MR                          2461           2573         158          6.4         156.5       7.7X
-Both columns - ORC Vectorized                       432            470          54         36.4          27.5      43.6X
-Both columns - ORC MR                              2507           2536          40          6.3         159.4       7.5X
+Partitioned Table:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------------
+Data column - CSV                                          18247          18411         232          0.9        1160.1       1.0X
+Data column - Json                                         10860          11264         571          1.4         690.5       1.7X
+Data column - Parquet Vectorized: DataPageV1                 223            274          26         70.6          14.2      81.9X
+Data column - Parquet Vectorized: DataPageV2                 537            559          23         29.3          34.1      34.0X
+Data column - Parquet MR: DataPageV1                        2411           2517         150          6.5         153.3       7.6X
+Data column - Parquet MR: DataPageV2                        2299           2356          81          6.8         146.2       7.9X
+Data column - ORC Vectorized                                 417            433          11         37.7          26.5      43.8X
+Data column - ORC MR                                        2107           2178         101          7.5         134.0       8.7X
+Partition column - CSV                                      6090           6186         136          2.6         387.2       3.0X
+Partition column - Json                                     9479           9603         176          1.7         602.7       1.9X
+Partition column - Parquet Vectorized: DataPageV1             49             69          28        322.0           3.1     373.6X
+Partition column - Parquet Vectorized: DataPageV2             49             63          23        322.1           3.1     373.7X
+Partition column - Parquet MR: DataPageV1                   1200           1225          36         13.1          76.3      15.2X
+Partition column - Parquet MR: DataPageV2                   1199           1240          57         13.1          76.3      15.2X
+Partition column - ORC Vectorized                             53             77          26        295.0           3.4     342.2X
+Partition column - ORC MR                                   1287           1346          83         12.2          81.8      14.2X
+Both columns - CSV                                         17671          18140         663          0.9        1123.5       1.0X
+Both columns - Json                                        11675          12167         696          1.3         742.3       1.6X
+Both columns - Parquet Vectorized: DataPageV1                298            303           9         52.9          18.9      61.3X
+Both columns - Parquet Vectorized: DataPageV2                541            580          36         29.1          34.4      33.7X
+Both columns - Parquet MR: DataPageV1                       2448           2491          60          6.4         155.6       7.5X
+Both columns - Parquet MR: DataPageV2                       2303           2352          69          6.8         146.4       7.9X
+Both columns - ORC Vectorized                                385            406          25         40.9          24.5      47.4X
+Both columns - ORC MR                                       2118           2202         120          7.4         134.6       8.6X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           10199          10226          38          1.0         972.6       1.0X
-SQL Json                                          10744          10925         256          1.0        1024.6       0.9X
-SQL Parquet Vectorized                             1251           1261          15          8.4         119.3       8.2X
-SQL Parquet MR                                     3306           3315          13          3.2         315.3       3.1X
-ParquetReader Vectorized                            849            904          48         12.4          80.9      12.0X
-SQL ORC Vectorized                                 1184           1204          28          8.9         112.9       8.6X
-SQL ORC MR                                         2895           2945          71          3.6         276.1       3.5X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+SQL CSV                                            7966          12723        2892          1.3         759.7       1.0X
+SQL Json                                           9897          10008         157          1.1         943.9       0.8X
+SQL Parquet Vectorized: DataPageV1                 1176           1264         125          8.9         112.1       6.8X
+SQL Parquet Vectorized: DataPageV2                 2224           2326         144          4.7         212.1       3.6X
+SQL Parquet MR: DataPageV1                         3431           3483          73          3.1         327.2       2.3X
+SQL Parquet MR: DataPageV2                         3845           4043         280          2.7         366.7       2.1X
+ParquetReader Vectorized: DataPageV1               1055           1056           2          9.9         100.6       7.6X
+ParquetReader Vectorized: DataPageV2               2093           2119          37          5.0         199.6       3.8X
+SQL ORC Vectorized                                 1129           1217         125          9.3         107.7       7.1X
+SQL ORC MR                                         2931           2982          72          3.6         279.5       2.7X
+
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            7949           8052         145          1.3         758.1       1.0X
-SQL Json                                           7750           7868         167          1.4         739.1       1.0X
-SQL Parquet Vectorized                              949            976          24         11.0          90.5       8.4X
-SQL Parquet MR                                     2700           2722          31          3.9         257.5       2.9X
-ParquetReader Vectorized                            916            940          31         11.4          87.3       8.7X
-SQL ORC Vectorized                                 1240           1249          13          8.5         118.2       6.4X
-SQL ORC MR                                         2856           2929         103          3.7         272.4       2.8X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+SQL CSV                                            6338           6508         240          1.7         604.4       1.0X
+SQL Json                                           7149           7247         138          1.5         681.8       0.9X
+SQL Parquet Vectorized: DataPageV1                  937            984          45         11.2          89.3       6.8X
+SQL Parquet Vectorized: DataPageV2                 1582           1608          37          6.6         150.9       4.0X
+SQL Parquet MR: DataPageV1                         2525           2721         277          4.2         240.8       2.5X
+SQL Parquet MR: DataPageV2                         2969           2974           7          3.5         283.1       2.1X
+ParquetReader Vectorized: DataPageV1                933            940          12         11.2          88.9       6.8X
+ParquetReader Vectorized: DataPageV2               1535           1549          20          6.8         146.4       4.1X
+SQL ORC Vectorized                                 1144           1204          86          9.2         109.1       5.5X
+SQL ORC MR                                         2816           2822           8          3.7         268.6       2.3X
+
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            5416           5542         179          1.9         516.5       1.0X
-SQL Json                                           4760           4980         311          2.2         454.0       1.1X
-SQL Parquet Vectorized                              222            236           8         47.2          21.2      24.4X
-SQL Parquet MR                                     1669           1685          22          6.3         159.2       3.2X
-ParquetReader Vectorized                            248            252           3         42.3          23.6      21.9X
-SQL ORC Vectorized                                  409            472          81         25.6          39.0      13.2X
-SQL ORC MR                                         1686           1687           0          6.2         160.8       3.2X
+SQL CSV                                            4443           4504          86          2.4         423.7       1.0X
+SQL Json                                           4528           4563          49          2.3         431.8       1.0X
+SQL Parquet Vectorized: DataPageV1                  213            233          15         49.2          20.3      20.8X
+SQL Parquet Vectorized: DataPageV2                  267            294          22         39.3          25.4      16.7X
+SQL Parquet MR: DataPageV1                         1691           1700          13          6.2         161.2       2.6X
+SQL Parquet MR: DataPageV2                         1515           1565          70          6.9         144.5       2.9X
+ParquetReader Vectorized: DataPageV1                228            231           2         46.0          21.7      19.5X
+ParquetReader Vectorized: DataPageV2                285            296           9         36.8          27.1      15.6X
+SQL ORC Vectorized                                  369            425          82         28.4          35.2      12.1X
+SQL ORC MR                                         1457           1463           9          7.2         138.9       3.0X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Column Scan from 10 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            2244           2282          53          0.5        2140.4       1.0X
-SQL Json                                           3015           3099         119          0.3        2875.6       0.7X
-SQL Parquet Vectorized                               50             77          29         20.9          47.9      44.7X
-SQL Parquet MR                                      190            209          27          5.5         180.8      11.8X
-SQL ORC Vectorized                                   57             76          20         18.5          54.0      39.6X
-SQL ORC MR                                          158            195          40          6.6         151.0      14.2X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+SQL CSV                                            2374           2377           5          0.4        2264.2       1.0X
+SQL Json                                           2693           2726          46          0.4        2568.5       0.9X
+SQL Parquet Vectorized: DataPageV1                   44             62          16         23.8          42.0      54.0X
+SQL Parquet Vectorized: DataPageV2                   63             81          21         16.5          60.5      37.5X
+SQL Parquet MR: DataPageV1                          173            198          27          6.1         164.6      13.8X
+SQL Parquet MR: DataPageV2                          161            193          30          6.5         153.5      14.8X
+SQL ORC Vectorized                                   53             71          18         19.9          50.2      45.1X
+SQL ORC MR                                          149            182          34          7.0         142.3      15.9X
+
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Column Scan from 50 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            5114           5296         257          0.2        4876.7       1.0X
-SQL Json                                          11564          11828         373          0.1       11028.4       0.4X
-SQL Parquet Vectorized                               60             93          26         17.3          57.6      84.6X
-SQL Parquet MR                                      198            232          31          5.3         188.9      25.8X
-SQL ORC Vectorized                                   69            103          35         15.2          65.9      74.0X
-SQL ORC MR                                          175            212          36          6.0         166.9      29.2X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+SQL CSV                                            5149           5193          62          0.2        4910.9       1.0X
+SQL Json                                          10556          10891         475          0.1       10066.5       0.5X
+SQL Parquet Vectorized: DataPageV1                   64             96          28         16.3          61.3      80.1X
+SQL Parquet Vectorized: DataPageV2                   83            106          22         12.6          79.1      62.0X
+SQL Parquet MR: DataPageV1                          196            232          25          5.3         187.4      26.2X
+SQL Parquet MR: DataPageV2                          184            221          28          5.7         175.1      28.0X
+SQL ORC Vectorized                                   74             98          31         14.1          70.8      69.3X
+SQL ORC MR                                          182            214          38          5.8         173.9      28.2X
+
+OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            9072           9324         357          0.1        8651.4       1.0X
-SQL Json                                          23444          23735         411          0.0       22358.1       0.4X
-SQL Parquet Vectorized                               91            129          28         11.5          86.7      99.8X
-SQL Parquet MR                                      220            270          56          4.8         209.6      41.3X
-SQL ORC Vectorized                                   96            110          20         10.9          91.8      94.2X
-SQL ORC MR                                          216            240          33          4.8         206.2      41.9X
+SQL CSV                                            9077           9107          43          0.1        8656.2       1.0X
+SQL Json                                          20131          20886        1067          0.1       19198.5       0.5X
+SQL Parquet Vectorized: DataPageV1                   93            124          26         11.3          88.8      97.5X
+SQL Parquet Vectorized: DataPageV2                  103            128          29         10.2          98.5      87.9X
+SQL Parquet MR: DataPageV1                          218            257          35          4.8         207.6      41.7X
+SQL Parquet MR: DataPageV2                          213            255          29          4.9         202.7      42.7X
+SQL ORC Vectorized                                   80             95          20         13.0          76.6     112.9X
+SQL ORC MR                                          187            207          20          5.6         178.0      48.6X
 
 
diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-jdk17-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-jdk17-results.txt
index 85d506ec3454e..ecba57c0c3cc3 100644
--- a/sql/core/benchmarks/DataSourceReadBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/DataSourceReadBenchmark-jdk17-results.txt
@@ -2,269 +2,322 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 SQL Single BOOLEAN Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           11737          11812         106          1.3         746.2       1.0X
-SQL Json                                           7827           7904         109          2.0         497.6       1.5X
-SQL Parquet Vectorized                               98            116          12        160.6           6.2     119.8X
-SQL Parquet MR                                     1529           1541          18         10.3          97.2       7.7X
-SQL ORC Vectorized                                  165            185          14         95.5          10.5      71.2X
-SQL ORC MR                                         1433           1440           9         11.0          91.1       8.2X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Parquet Reader Single BOOLEAN Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                              38             40           3        416.2           2.4       1.0X
-ParquetReader Vectorized -> Row                       38             39           3        419.1           2.4       1.0X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+SQL CSV                                           15972          16369         561          1.0        1015.5       1.0X
+SQL Json                                           9543           9580          54          1.6         606.7       1.7X
+SQL Parquet Vectorized: DataPageV1                  115            144          19        136.3           7.3     138.4X
+SQL Parquet Vectorized: DataPageV2                   95            109          15        165.1           6.1     167.6X
+SQL Parquet MR: DataPageV1                         2098           2119          30          7.5         133.4       7.6X
+SQL Parquet MR: DataPageV2                         2007           2012           6          7.8         127.6       8.0X
+SQL ORC Vectorized                                  211            225          16         74.5          13.4      75.7X
+SQL ORC MR                                         2077           2103          36          7.6         132.1       7.7X
+
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+Parquet Reader Single BOOLEAN Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                    43             47           2        369.4           2.7       1.0X
+ParquetReader Vectorized: DataPageV2                    30             34           2        518.5           1.9       1.4X
+ParquetReader Vectorized -> Row: DataPageV1             47             50           2        333.6           3.0       0.9X
+ParquetReader Vectorized -> Row: DataPageV2             31             35           2        504.8           2.0       1.4X
+
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           13156          13192          51          1.2         836.4       1.0X
-SQL Json                                           8690           8784         133          1.8         552.5       1.5X
-SQL Parquet Vectorized                              196            207           8         80.4          12.4      67.2X
-SQL Parquet MR                                     1831           1834           4          8.6         116.4       7.2X
-SQL ORC Vectorized                                  157            167           7        100.2          10.0      83.8X
-SQL ORC MR                                         1381           1387           8         11.4          87.8       9.5X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Parquet Reader Single TINYINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                             147            153           6        107.0           9.3       1.0X
-ParquetReader Vectorized -> Row                      149            162          24        105.7           9.5       1.0X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+SQL CSV                                           17468          17543         105          0.9        1110.6       1.0X
+SQL Json                                          11059          11065           8          1.4         703.1       1.6X
+SQL Parquet Vectorized: DataPageV1                  128            142          15        123.1           8.1     136.7X
+SQL Parquet Vectorized: DataPageV2                  126            141           8        125.2           8.0     139.1X
+SQL Parquet MR: DataPageV1                         2305           2331          36          6.8         146.5       7.6X
+SQL Parquet MR: DataPageV2                         2075           2095          28          7.6         131.9       8.4X
+SQL ORC Vectorized                                  172            191          16         91.5          10.9     101.6X
+SQL ORC MR                                         1777           1796          26          8.8         113.0       9.8X
+
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+Parquet Reader Single TINYINT Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                    72             77           5        219.4           4.6       1.0X
+ParquetReader Vectorized: DataPageV2                    72             77           3        217.9           4.6       1.0X
+ParquetReader Vectorized -> Row: DataPageV1             76             83           6        206.6           4.8       0.9X
+ParquetReader Vectorized -> Row: DataPageV2             75             80           3        210.3           4.8       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           14024          14291         378          1.1         891.6       1.0X
-SQL Json                                           9777           9849         102          1.6         621.6       1.4X
-SQL Parquet Vectorized                              153            175          18        102.9           9.7      91.8X
-SQL Parquet MR                                     1971           1979          11          8.0         125.3       7.1X
-SQL ORC Vectorized                                  193            211          15         81.4          12.3      72.5X
-SQL ORC MR                                         1665           1693          39          9.4         105.9       8.4X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+SQL CSV                                           18330          18332           3          0.9        1165.4       1.0X
+SQL Json                                          11383          11429          66          1.4         723.7       1.6X
+SQL Parquet Vectorized: DataPageV1                  179            197          13         88.0          11.4     102.5X
+SQL Parquet Vectorized: DataPageV2                  239            263          18         65.7          15.2      76.6X
+SQL Parquet MR: DataPageV1                         2552           2567          21          6.2         162.3       7.2X
+SQL Parquet MR: DataPageV2                         2389           2436          67          6.6         151.9       7.7X
+SQL ORC Vectorized                                  246            263          14         64.0          15.6      74.6X
+SQL ORC MR                                         1965           2002          52          8.0         124.9       9.3X
+
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Parquet Reader Single SMALLINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                              217            227           7         72.6          13.8       1.0X
-ParquetReader Vectorized -> Row                       214            216           2         73.5          13.6       1.0X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                   253            263          11         62.2          16.1       1.0X
+ParquetReader Vectorized: DataPageV2                   306            317           7         51.4          19.4       0.8X
+ParquetReader Vectorized -> Row: DataPageV1            246            250           4         64.0          15.6       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            316            321           4         49.8          20.1       0.8X
+
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           15107          15205         139          1.0         960.5       1.0X
-SQL Json                                           9699           9773         104          1.6         616.7       1.6X
-SQL Parquet Vectorized                              144            160          24        109.6           9.1     105.2X
-SQL Parquet MR                                     1903           1906           4          8.3         121.0       7.9X
-SQL ORC Vectorized                                  227            234           6         69.4          14.4      66.6X
-SQL ORC MR                                         1566           1578          17         10.0          99.5       9.6X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Parquet Reader Single INT Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                            209            214           4         75.2          13.3       1.0X
-ParquetReader Vectorized -> Row                     192            194           2         81.9          12.2       1.1X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+SQL CSV                                           19573          19822         352          0.8        1244.4       1.0X
+SQL Json                                          12141          12217         107          1.3         771.9       1.6X
+SQL Parquet Vectorized: DataPageV1                  192            222          28         81.8          12.2     101.8X
+SQL Parquet Vectorized: DataPageV2                  345            373          24         45.6          21.9      56.7X
+SQL Parquet MR: DataPageV1                         2736           2741           7          5.7         173.9       7.2X
+SQL Parquet MR: DataPageV2                         2467           2536          97          6.4         156.9       7.9X
+SQL ORC Vectorized                                  332            356          20         47.4          21.1      59.0X
+SQL ORC MR                                         2188           2193           7          7.2         139.1       8.9X
+
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+Parquet Reader Single INT Column Scan:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                   291            295           4         54.1          18.5       1.0X
+ParquetReader Vectorized: DataPageV2                   493            518          39         31.9          31.3       0.6X
+ParquetReader Vectorized -> Row: DataPageV1            300            306           8         52.5          19.1       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            471            483          11         33.4          30.0       0.6X
+
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           19711          19743          44          0.8        1253.2       1.0X
-SQL Json                                          11459          11500          59          1.4         728.5       1.7X
-SQL Parquet Vectorized                              202            210           7         77.9          12.8      97.6X
-SQL Parquet MR                                     2093           2120          37          7.5         133.1       9.4X
-SQL ORC Vectorized                                  356            384          22         44.2          22.6      55.4X
-SQL ORC MR                                         1832           1844          17          8.6         116.4      10.8X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Parquet Reader Single BIGINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                            290            290           0         54.3          18.4       1.0X
-ParquetReader Vectorized -> Row                     308            314           8         51.1          19.6       0.9X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+SQL CSV                                           24692          24718          37          0.6        1569.9       1.0X
+SQL Json                                          14839          14875          50          1.1         943.5       1.7X
+SQL Parquet Vectorized: DataPageV1                  295            316          29         53.3          18.7      83.7X
+SQL Parquet Vectorized: DataPageV2                  477            505          24         32.9          30.4      51.7X
+SQL Parquet MR: DataPageV1                         2841           2981         197          5.5         180.6       8.7X
+SQL Parquet MR: DataPageV2                         2616           2632          23          6.0         166.3       9.4X
+SQL ORC Vectorized                                  388            403          11         40.5          24.7      63.6X
+SQL ORC MR                                         2274           2372         138          6.9         144.6      10.9X
+
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+Parquet Reader Single BIGINT Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                   376            387           9         41.9          23.9       1.0X
+ParquetReader Vectorized: DataPageV2                   585            591           6         26.9          37.2       0.6X
+ParquetReader Vectorized -> Row: DataPageV1            377            387           9         41.8          23.9       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            576            586          10         27.3          36.6       0.7X
+
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           16396          16602         292          1.0        1042.4       1.0X
-SQL Json                                          11284          11591         433          1.4         717.4       1.5X
-SQL Parquet Vectorized                              137            168          14        114.7           8.7     119.6X
-SQL Parquet MR                                     1901           1907           8          8.3         120.9       8.6X
-SQL ORC Vectorized                                  429            447          12         36.6          27.3      38.2X
-SQL ORC MR                                         1769           1841         102          8.9         112.4       9.3X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Parquet Reader Single FLOAT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                            234            253          10         67.2          14.9       1.0X
-ParquetReader Vectorized -> Row                     214            238          15         73.5          13.6       1.1X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+SQL CSV                                           20566          20651         119          0.8        1307.6       1.0X
+SQL Json                                          14337          14409         101          1.1         911.5       1.4X
+SQL Parquet Vectorized: DataPageV1                  154            167           8        101.9           9.8     133.2X
+SQL Parquet Vectorized: DataPageV2                  157            178          14         99.9          10.0     130.6X
+SQL Parquet MR: DataPageV1                         2730           2730           1          5.8         173.5       7.5X
+SQL Parquet MR: DataPageV2                         2459           2491          45          6.4         156.3       8.4X
+SQL ORC Vectorized                                  479            501          15         32.9          30.4      43.0X
+SQL ORC MR                                         2293           2343          71          6.9         145.8       9.0X
+
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+Parquet Reader Single FLOAT Column Scan:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                   272            283           9         57.9          17.3       1.0X
+ParquetReader Vectorized: DataPageV2                   250            288          27         62.9          15.9       1.1X
+ParquetReader Vectorized -> Row: DataPageV1            291            301           6         54.1          18.5       0.9X
+ParquetReader Vectorized -> Row: DataPageV2            293            305          14         53.6          18.6       0.9X
+
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           20303          20621         449          0.8        1290.9       1.0X
-SQL Json                                          14630          14734         147          1.1         930.1       1.4X
-SQL Parquet Vectorized                              212            246          23         74.0          13.5      95.6X
-SQL Parquet MR                                     2073           2212         198          7.6         131.8       9.8X
-SQL ORC Vectorized                                  445            455           9         35.4          28.3      45.6X
-SQL ORC MR                                         1835           1902          95          8.6         116.7      11.1X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Parquet Reader Single DOUBLE Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                            279            297          12         56.3          17.8       1.0X
-ParquetReader Vectorized -> Row                     280            292          12         56.1          17.8       1.0X
+SQL CSV                                           25753          25874         171          0.6        1637.3       1.0X
+SQL Json                                          19097          19391         416          0.8        1214.2       1.3X
+SQL Parquet Vectorized: DataPageV1                  273            288          11         57.6          17.4      94.3X
+SQL Parquet Vectorized: DataPageV2                  240            277          25         65.5          15.3     107.3X
+SQL Parquet MR: DataPageV1                         2969           3042         103          5.3         188.8       8.7X
+SQL Parquet MR: DataPageV2                         2692           2747          78          5.8         171.1       9.6X
+SQL ORC Vectorized                                  601            626          20         26.2          38.2      42.8X
+SQL ORC MR                                         2458           2467          13          6.4         156.3      10.5X
+
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+Parquet Reader Single DOUBLE Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                   354            363           7         44.4          22.5       1.0X
+ParquetReader Vectorized: DataPageV2                   345            359          12         45.5          22.0       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            337            345           8         46.7          21.4       1.1X
+ParquetReader Vectorized -> Row: DataPageV2            335            364          21         46.9          21.3       1.1X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           14027          14143         164          0.7        1337.7       1.0X
-SQL Json                                          10476          10606         183          1.0         999.1       1.3X
-SQL Parquet Vectorized                             1969           2040         100          5.3         187.8       7.1X
-SQL Parquet MR                                     3743           3834         128          2.8         357.0       3.7X
-SQL ORC Vectorized                                 1926           1936          14          5.4         183.6       7.3X
-SQL ORC MR                                         3383           3403          28          3.1         322.6       4.1X
+SQL CSV                                           18074          18101          37          0.6        1723.7       1.0X
+SQL Json                                          13211          13214           5          0.8        1259.9       1.4X
+SQL Parquet Vectorized: DataPageV1                 2249           2286          53          4.7         214.5       8.0X
+SQL Parquet Vectorized: DataPageV2                 2804           2818          20          3.7         267.4       6.4X
+SQL Parquet MR: DataPageV1                         4708           4779         100          2.2         449.0       3.8X
+SQL Parquet MR: DataPageV2                         4868           5046         251          2.2         464.3       3.7X
+SQL ORC Vectorized                                 2145           2160          20          4.9         204.6       8.4X
+SQL ORC MR                                         4180           4308         182          2.5         398.6       4.3X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            8672           8905         330          1.2         827.0       1.0X
-SQL Json                                           6369           6374           7          1.6         607.4       1.4X
-SQL Parquet Vectorized                              556            579          25         18.9          53.0      15.6X
-SQL Parquet MR                                     1574           1585          14          6.7         150.2       5.5X
-SQL ORC Vectorized                                  420            427           4         25.0          40.1      20.6X
-SQL ORC MR                                         1711           1733          31          6.1         163.2       5.1X
+SQL CSV                                           11320          11376          78          0.9        1079.6       1.0X
+SQL Json                                           7593           7664         101          1.4         724.1       1.5X
+SQL Parquet Vectorized: DataPageV1                  633            639           9         16.6          60.3      17.9X
+SQL Parquet Vectorized: DataPageV2                  621            644          20         16.9          59.2      18.2X
+SQL Parquet MR: DataPageV1                         2111           2157          65          5.0         201.3       5.4X
+SQL Parquet MR: DataPageV2                         2018           2064          65          5.2         192.4       5.6X
+SQL ORC Vectorized                                  505            540          36         20.8          48.2      22.4X
+SQL ORC MR                                         2302           2360          82          4.6         219.5       4.9X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Data column - CSV                                 21008          21367         508          0.7        1335.7       1.0X
-Data column - Json                                12091          12412         455          1.3         768.7       1.7X
-Data column - Parquet Vectorized                    210            217           6         75.0          13.3     100.1X
-Data column - Parquet MR                           2434           2450          22          6.5         154.8       8.6X
-Data column - ORC Vectorized                        323            347          26         48.7          20.5      65.1X
-Data column - ORC MR                               2223           2231          11          7.1         141.3       9.5X
-Partition column - CSV                             5889           5992         146          2.7         374.4       3.6X
-Partition column - Json                            9706           9870         233          1.6         617.1       2.2X
-Partition column - Parquet Vectorized                51             58           8        306.3           3.3     409.2X
-Partition column - Parquet MR                      1237           1241           5         12.7          78.7      17.0X
-Partition column - ORC Vectorized                    53             61           8        294.1           3.4     392.9X
-Partition column - ORC MR                          1322           1336          20         11.9          84.1      15.9X
-Both columns - CSV                                20362          20389          39          0.8        1294.6       1.0X
-Both columns - Json                               12267          12512         346          1.3         779.9       1.7X
-Both columns - Parquet Vectorized                   254            262           9         61.9          16.2      82.6X
-Both columns - Parquet MR                          2649           2745         136          5.9         168.4       7.9X
-Both columns - ORC Vectorized                       348            379          32         45.2          22.1      60.4X
-Both columns - ORC MR                              2339           2343           6          6.7         148.7       9.0X
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+Partitioned Table:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------------
+Data column - CSV                                          24867          25261         556          0.6        1581.0       1.0X
+Data column - Json                                         13937          13987          70          1.1         886.1       1.8X
+Data column - Parquet Vectorized: DataPageV1                 252            264           8         62.3          16.0      98.5X
+Data column - Parquet Vectorized: DataPageV2                 547            560          13         28.8          34.7      45.5X
+Data column - Parquet MR: DataPageV1                        3492           3509          25          4.5         222.0       7.1X
+Data column - Parquet MR: DataPageV2                        3148           3208          84          5.0         200.2       7.9X
+Data column - ORC Vectorized                                 493            512          21         31.9          31.3      50.5X
+Data column - ORC MR                                        2925           2943          26          5.4         185.9       8.5X
+Partition column - CSV                                      7847           7851           5          2.0         498.9       3.2X
+Partition column - Json                                    11759          11908         210          1.3         747.6       2.1X
+Partition column - Parquet Vectorized: DataPageV1             60             67           7        262.3           3.8     414.7X
+Partition column - Parquet Vectorized: DataPageV2             57             65           9        274.2           3.6     433.5X
+Partition column - Parquet MR: DataPageV1                   1762           1768           8          8.9         112.1      14.1X
+Partition column - Parquet MR: DataPageV2                   1742           1783          59          9.0         110.7      14.3X
+Partition column - ORC Vectorized                             59             71           7        265.6           3.8     419.9X
+Partition column - ORC MR                                   1743           1764          29          9.0         110.8      14.3X
+Both columns - CSV                                         25859          25924          92          0.6        1644.1       1.0X
+Both columns - Json                                        14693          14764         101          1.1         934.2       1.7X
+Both columns - Parquet Vectorized: DataPageV1                341            395          66         46.2          21.7      73.0X
+Both columns - Parquet Vectorized: DataPageV2                624            643          13         25.2          39.7      39.9X
+Both columns - Parquet MR: DataPageV1                       3541           3611          99          4.4         225.2       7.0X
+Both columns - Parquet MR: DataPageV2                       3279           3301          32          4.8         208.4       7.6X
+Both columns - ORC Vectorized                                434            483          40         36.2          27.6      57.3X
+Both columns - ORC MR                                       2946           2964          26          5.3         187.3       8.4X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            9872           9917          64          1.1         941.4       1.0X
-SQL Json                                           8698           8793         134          1.2         829.5       1.1X
-SQL Parquet Vectorized                             1277           1281           6          8.2         121.8       7.7X
-SQL Parquet MR                                     3649           3679          42          2.9         348.0       2.7X
-ParquetReader Vectorized                            969           1015          66         10.8          92.4      10.2X
-SQL ORC Vectorized                                 1022           1038          23         10.3          97.4       9.7X
-SQL ORC MR                                         3103           3122          27          3.4         295.9       3.2X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+SQL CSV                                           13698          13783         121          0.8        1306.3       1.0X
+SQL Json                                          11030          11144         161          1.0        1051.9       1.2X
+SQL Parquet Vectorized: DataPageV1                 1695           1699           7          6.2         161.6       8.1X
+SQL Parquet Vectorized: DataPageV2                 2740           2744           5          3.8         261.3       5.0X
+SQL Parquet MR: DataPageV1                         4547           4594          66          2.3         433.7       3.0X
+SQL Parquet MR: DataPageV2                         5382           5455         103          1.9         513.3       2.5X
+ParquetReader Vectorized: DataPageV1               1238           1238           0          8.5         118.0      11.1X
+ParquetReader Vectorized: DataPageV2               2312           2325          19          4.5         220.5       5.9X
+SQL ORC Vectorized                                 1134           1147          18          9.2         108.2      12.1X
+SQL ORC MR                                         3966           4015          69          2.6         378.2       3.5X
+
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            7321           7550         324          1.4         698.2       1.0X
-SQL Json                                           6939           6962          32          1.5         661.8       1.1X
-SQL Parquet Vectorized                              906            917          17         11.6          86.4       8.1X
-SQL Parquet MR                                     2617           2655          54          4.0         249.6       2.8X
-ParquetReader Vectorized                            832            837           5         12.6          79.4       8.8X
-SQL ORC Vectorized                                 1101           1109          11          9.5         105.0       6.6X
-SQL ORC MR                                         2777           2778           2          3.8         264.8       2.6X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+SQL CSV                                           10613          10658          64          1.0        1012.1       1.0X
+SQL Json                                           8973           8996          33          1.2         855.7       1.2X
+SQL Parquet Vectorized: DataPageV1                 1208           1221          18          8.7         115.2       8.8X
+SQL Parquet Vectorized: DataPageV2                 1949           1950           1          5.4         185.9       5.4X
+SQL Parquet MR: DataPageV1                         3701           3716          21          2.8         353.0       2.9X
+SQL Parquet MR: DataPageV2                         4150           4192          60          2.5         395.8       2.6X
+ParquetReader Vectorized: DataPageV1               1191           1192           1          8.8         113.6       8.9X
+ParquetReader Vectorized: DataPageV2               1874           1917          61          5.6         178.7       5.7X
+SQL ORC Vectorized                                 1338           1365          38          7.8         127.6       7.9X
+SQL ORC MR                                         3659           3674          21          2.9         349.0       2.9X
+
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            5670           5691          30          1.8         540.7       1.0X
-SQL Json                                           4309           4327          27          2.4         410.9       1.3X
-SQL Parquet Vectorized                              212            217           5         49.5          20.2      26.8X
-SQL Parquet MR                                     1634           1672          53          6.4         155.9       3.5X
-ParquetReader Vectorized                            212            214           3         49.5          20.2      26.8X
-SQL ORC Vectorized                                  356            359           4         29.5          33.9      15.9X
-SQL ORC MR                                         1519           1561          59          6.9         144.9       3.7X
+SQL CSV                                            8714           8809         134          1.2         831.0       1.0X
+SQL Json                                           5801           5819          25          1.8         553.2       1.5X
+SQL Parquet Vectorized: DataPageV1                  297            316          11         35.3          28.3      29.3X
+SQL Parquet Vectorized: DataPageV2                  363            382          12         28.9          34.6      24.0X
+SQL Parquet MR: DataPageV1                         2350           2366          22          4.5         224.1       3.7X
+SQL Parquet MR: DataPageV2                         2132           2183          73          4.9         203.3       4.1X
+ParquetReader Vectorized: DataPageV1                296            310          13         35.4          28.2      29.4X
+ParquetReader Vectorized: DataPageV2                368            372           3         28.5          35.1      23.7X
+SQL ORC Vectorized                                  474            487          10         22.1          45.2      18.4X
+SQL ORC MR                                         2025           2031           9          5.2         193.1       4.3X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Single Column Scan from 10 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            2172           2213          58          0.5        2071.4       1.0X
-SQL Json                                           2916           2934          26          0.4        2780.7       0.7X
-SQL Parquet Vectorized                               43             48           6         24.5          40.7      50.8X
-SQL Parquet MR                                      175            182           9          6.0         167.1      12.4X
-SQL ORC Vectorized                                   51             56           6         20.5          48.9      42.4X
-SQL ORC MR                                          152            157           5          6.9         144.9      14.3X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+SQL CSV                                            2677           2687          14          0.4        2553.2       1.0X
+SQL Json                                           3581           3588          10          0.3        3414.8       0.7X
+SQL Parquet Vectorized: DataPageV1                   52             59           7         20.2          49.6      51.5X
+SQL Parquet Vectorized: DataPageV2                   68             75           7         15.4          65.0      39.3X
+SQL Parquet MR: DataPageV1                          245            257           9          4.3         233.6      10.9X
+SQL Parquet MR: DataPageV2                          224            237           8          4.7         213.7      11.9X
+SQL ORC Vectorized                                   64             70           5         16.3          61.3      41.7X
+SQL ORC MR                                          208            216           8          5.0         198.2      12.9X
+
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Single Column Scan from 50 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            4658           4737         112          0.2        4442.6       1.0X
-SQL Json                                          12114          12242         181          0.1       11552.8       0.4X
-SQL Parquet Vectorized                               59             66           9         17.8          56.3      78.9X
-SQL Parquet MR                                      196            206          10          5.3         187.3      23.7X
-SQL ORC Vectorized                                   68             77           6         15.3          65.2      68.1X
-SQL ORC MR                                          171            183           9          6.1         163.4      27.2X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+SQL CSV                                            5753           5771          25          0.2        5486.7       1.0X
+SQL Json                                          13801          13851          71          0.1       13161.9       0.4X
+SQL Parquet Vectorized: DataPageV1                   75             83           9         14.1          71.1      77.2X
+SQL Parquet Vectorized: DataPageV2                   84             93           7         12.4          80.6      68.1X
+SQL Parquet MR: DataPageV1                          269            280           7          3.9         256.5      21.4X
+SQL Parquet MR: DataPageV2                          251            258           8          4.2         238.9      23.0X
+SQL ORC Vectorized                                   82             88           6         12.8          78.3      70.1X
+SQL ORC MR                                          223            239           8          4.7         213.0      25.8X
+
+OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            8008           8070          88          0.1        7636.6       1.0X
-SQL Json                                          22795          23224         607          0.0       21739.5       0.4X
-SQL Parquet Vectorized                               81             88           7         13.0          77.2      99.0X
-SQL Parquet MR                                      225            244          16          4.7         214.9      35.5X
-SQL ORC Vectorized                                   77             82           5         13.6          73.3     104.2X
-SQL ORC MR                                          185            190           6          5.7         176.2      43.3X
+SQL CSV                                            9487           9503          24          0.1        9047.1       1.0X
+SQL Json                                          26109          26240         186          0.0       24899.2       0.4X
+SQL Parquet Vectorized: DataPageV1                  100            110          10         10.4          95.8      94.5X
+SQL Parquet Vectorized: DataPageV2                  113            119           6          9.3         107.3      84.3X
+SQL Parquet MR: DataPageV1                          280            296          11          3.7         267.2      33.9X
+SQL Parquet MR: DataPageV2                          281            321          68          3.7         268.0      33.8X
+SQL ORC Vectorized                                   92            101           8         11.4          87.5     103.4X
+SQL ORC MR                                          228            245          10          4.6         217.7      41.6X
 
 
diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
index 1dd99011ba273..6a2b6bfb4a0a8 100644
--- a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
+++ b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
@@ -2,269 +2,322 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single BOOLEAN Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           13046          13274         322          1.2         829.5       1.0X
-SQL Json                                          10585          10610          37          1.5         672.9       1.2X
-SQL Parquet Vectorized                              147            168          27        106.7           9.4      88.5X
-SQL Parquet MR                                     1891           1897           7          8.3         120.3       6.9X
-SQL ORC Vectorized                                  200            213          15         78.8          12.7      65.4X
-SQL ORC MR                                         1939           1944           7          8.1         123.3       6.7X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
-Parquet Reader Single BOOLEAN Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                             164            165           3         96.2          10.4       1.0X
-ParquetReader Vectorized -> Row                       71             72           2        220.6           4.5       2.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           11570          12144         812          1.4         735.6       1.0X
+SQL Json                                           7542           7568          37          2.1         479.5       1.5X
+SQL Parquet Vectorized: DataPageV1                  129            144          16        121.9           8.2      89.7X
+SQL Parquet Vectorized: DataPageV2                   92            106          20        170.3           5.9     125.2X
+SQL Parquet MR: DataPageV1                         1416           1419           3         11.1          90.0       8.2X
+SQL Parquet MR: DataPageV2                         1281           1359         110         12.3          81.4       9.0X
+SQL ORC Vectorized                                  161            176          10         97.4          10.3      71.6X
+SQL ORC MR                                         1525           1545          29         10.3          96.9       7.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Parquet Reader Single BOOLEAN Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                   111            118           6        142.3           7.0       1.0X
+ParquetReader Vectorized: DataPageV2                   116            117           2        135.7           7.4       1.0X
+ParquetReader Vectorized -> Row: DataPageV1             48             49           1        324.9           3.1       2.3X
+ParquetReader Vectorized -> Row: DataPageV2             39             39           1        405.8           2.5       2.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           16466          16494          40          1.0        1046.9       1.0X
-SQL Json                                          12509          12528          28          1.3         795.3       1.3X
-SQL Parquet Vectorized                              170            179          11         92.7          10.8      97.1X
-SQL Parquet MR                                     2154           2167          19          7.3         136.9       7.6X
-SQL ORC Vectorized                                  203            213           9         77.4          12.9      81.1X
-SQL ORC MR                                         1977           1980           4          8.0         125.7       8.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
-Parquet Reader Single TINYINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                             216            218           3         72.8          13.7       1.0X
-ParquetReader Vectorized -> Row                      123            124           2        127.6           7.8       1.8X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           13807          14535        1030          1.1         877.8       1.0X
+SQL Json                                           8079           8094          21          1.9         513.6       1.7X
+SQL Parquet Vectorized: DataPageV1                  139            152          12        113.0           8.9      99.2X
+SQL Parquet Vectorized: DataPageV2                  140            147           5        112.5           8.9      98.7X
+SQL Parquet MR: DataPageV1                         1637           1741         148          9.6         104.1       8.4X
+SQL Parquet MR: DataPageV2                         1522           1636         161         10.3          96.8       9.1X
+SQL ORC Vectorized                                  147            160          10        106.9           9.4      93.8X
+SQL ORC MR                                         1542           1545           4         10.2          98.1       9.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Parquet Reader Single TINYINT Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                   166            171           8         94.7          10.6       1.0X
+ParquetReader Vectorized: DataPageV2                   166            169           4         94.7          10.6       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            156            157           2        100.7           9.9       1.1X
+ParquetReader Vectorized -> Row: DataPageV2            156            157           2        100.7           9.9       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           17321          17358          53          0.9        1101.2       1.0X
-SQL Json                                          12964          13001          52          1.2         824.2       1.3X
-SQL Parquet Vectorized                              243            251           7         64.8          15.4      71.3X
-SQL Parquet MR                                     2491           2499          12          6.3         158.4       7.0X
-SQL ORC Vectorized                                  214            217           3         73.4          13.6      80.9X
-SQL ORC MR                                         1960           1963           3          8.0         124.6       8.8X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           15327          15421         133          1.0         974.5       1.0X
+SQL Json                                           8564           8799         332          1.8         544.5       1.8X
+SQL Parquet Vectorized: DataPageV1                  202            219          11         77.8          12.8      75.8X
+SQL Parquet Vectorized: DataPageV2                  203            210           8         77.7          12.9      75.7X
+SQL Parquet MR: DataPageV1                         1874           2004         183          8.4         119.2       8.2X
+SQL Parquet MR: DataPageV2                         1606           1709         146          9.8         102.1       9.5X
+SQL ORC Vectorized                                  167            179          10         94.1          10.6      91.7X
+SQL ORC MR                                         1404           1408           6         11.2          89.3      10.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Parquet Reader Single SMALLINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                              361            365           6         43.6          22.9       1.0X
-ParquetReader Vectorized -> Row                       323            329          10         48.7          20.5       1.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                   222            236          13         70.7          14.1       1.0X
+ParquetReader Vectorized: DataPageV2                   259            268          14         60.8          16.5       0.9X
+ParquetReader Vectorized -> Row: DataPageV1            228            248          11         68.9          14.5       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            264            293          13         59.5          16.8       0.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           19098          19123          36          0.8        1214.2       1.0X
-SQL Json                                          13719          13736          23          1.1         872.3       1.4X
-SQL Parquet Vectorized                              188            192           5         83.5          12.0     101.4X
-SQL Parquet MR                                     2515           2536          30          6.3         159.9       7.6X
-SQL ORC Vectorized                                  287            295           5         54.8          18.3      66.5X
-SQL ORC MR                                         2034           2036           2          7.7         129.3       9.4X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
-Parquet Reader Single INT Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                            309            311           3         50.9          19.7       1.0X
-ParquetReader Vectorized -> Row                     270            272           5         58.4          17.1       1.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           17479          17651         243          0.9        1111.3       1.0X
+SQL Json                                           9565           9582          25          1.6         608.1       1.8X
+SQL Parquet Vectorized: DataPageV1                  152            159           8        103.2           9.7     114.7X
+SQL Parquet Vectorized: DataPageV2                  290            308          18         54.2          18.4      60.3X
+SQL Parquet MR: DataPageV1                         1861           1980         169          8.5         118.3       9.4X
+SQL Parquet MR: DataPageV2                         1647           1748         142          9.5         104.7      10.6X
+SQL ORC Vectorized                                  230            251          12         68.3          14.6      75.9X
+SQL ORC MR                                         1645           1648           3          9.6         104.6      10.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Parquet Reader Single INT Column Scan:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                   208            213           9         75.7          13.2       1.0X
+ParquetReader Vectorized: DataPageV2                   355            382          14         44.3          22.6       0.6X
+ParquetReader Vectorized -> Row: DataPageV1            212            233           8         74.1          13.5       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            350            353           7         45.0          22.2       0.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           25565          25574          13          0.6        1625.4       1.0X
-SQL Json                                          17510          17518          11          0.9        1113.3       1.5X
-SQL Parquet Vectorized                              259            266           9         60.7          16.5      98.6X
-SQL Parquet MR                                     2628           2647          28          6.0         167.1       9.7X
-SQL ORC Vectorized                                  357            365           6         44.1          22.7      71.6X
-SQL ORC MR                                         2144           2151          10          7.3         136.3      11.9X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
-Parquet Reader Single BIGINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                            385            390           8         40.8          24.5       1.0X
-ParquetReader Vectorized -> Row                     345            350           6         45.6          21.9       1.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           21825          21944         169          0.7        1387.6       1.0X
+SQL Json                                          11877          11927          71          1.3         755.1       1.8X
+SQL Parquet Vectorized: DataPageV1                  229            242          18         68.8          14.5      95.5X
+SQL Parquet Vectorized: DataPageV2                  435            452          23         36.1          27.7      50.1X
+SQL Parquet MR: DataPageV1                         2050           2184         190          7.7         130.3      10.6X
+SQL Parquet MR: DataPageV2                         1829           1927         138          8.6         116.3      11.9X
+SQL ORC Vectorized                                  287            308          14         54.8          18.3      76.0X
+SQL ORC MR                                         1579           1603          34         10.0         100.4      13.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Parquet Reader Single BIGINT Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                   299            341          86         52.6          19.0       1.0X
+ParquetReader Vectorized: DataPageV2                   551            607         110         28.5          35.1       0.5X
+ParquetReader Vectorized -> Row: DataPageV1            341            344           4         46.2          21.7       0.9X
+ParquetReader Vectorized -> Row: DataPageV2            508            557          33         31.0          32.3       0.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           19931          19941          13          0.8        1267.2       1.0X
-SQL Json                                          17274          17302          40          0.9        1098.2       1.2X
-SQL Parquet Vectorized                              175            182          10         90.0          11.1     114.1X
-SQL Parquet MR                                     2496           2502           9          6.3         158.7       8.0X
-SQL ORC Vectorized                                  432            436           4         36.4          27.5      46.1X
-SQL ORC MR                                         2184           2187           5          7.2         138.8       9.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
-Parquet Reader Single FLOAT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                            287            289           5         54.9          18.2       1.0X
-ParquetReader Vectorized -> Row                     281            283           3         55.9          17.9       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           17585          17926         482          0.9        1118.0       1.0X
+SQL Json                                          11927          12180         357          1.3         758.3       1.5X
+SQL Parquet Vectorized: DataPageV1                  150            161          11        104.6           9.6     116.9X
+SQL Parquet Vectorized: DataPageV2                  150            160           8        104.7           9.5     117.1X
+SQL Parquet MR: DataPageV1                         1830           1867          52          8.6         116.4       9.6X
+SQL Parquet MR: DataPageV2                         1715           1828         160          9.2         109.1      10.3X
+SQL ORC Vectorized                                  328            358          15         48.0          20.8      53.6X
+SQL ORC MR                                         1584           1687         145          9.9         100.7      11.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Parquet Reader Single FLOAT Column Scan:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                   207            211           8         76.0          13.2       1.0X
+ParquetReader Vectorized: DataPageV2                   207            220          11         75.8          13.2       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            208            214           9         75.7          13.2       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            208            213           9         75.6          13.2       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           26664          26695          44          0.6        1695.3       1.0X
-SQL Json                                          22655          22657           3          0.7        1440.4       1.2X
-SQL Parquet Vectorized                              249            254           8         63.2          15.8     107.1X
-SQL Parquet MR                                     2689           2750          86          5.8         171.0       9.9X
-SQL ORC Vectorized                                  517            523           7         30.4          32.9      51.6X
-SQL ORC MR                                         2269           2270           1          6.9         144.3      11.8X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
-Parquet Reader Single DOUBLE Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                            359            404         100         43.8          22.8       1.0X
-ParquetReader Vectorized -> Row                     325            329           5         48.4          20.7       1.1X
+SQL CSV                                           22569          22614          63          0.7        1434.9       1.0X
+SQL Json                                          15590          15600          15          1.0         991.2       1.4X
+SQL Parquet Vectorized: DataPageV1                  225            241          17         69.9          14.3     100.3X
+SQL Parquet Vectorized: DataPageV2                  219            236          13         72.0          13.9     103.3X
+SQL Parquet MR: DataPageV1                         2013           2109         136          7.8         128.0      11.2X
+SQL Parquet MR: DataPageV2                         1850           1967         165          8.5         117.6      12.2X
+SQL ORC Vectorized                                  396            416          25         39.7          25.2      56.9X
+SQL ORC MR                                         1707           1763          79          9.2         108.5      13.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Parquet Reader Single DOUBLE Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized: DataPageV1                   280            298          13         56.2          17.8       1.0X
+ParquetReader Vectorized: DataPageV2                   278            300          21         56.6          17.7       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            280            299          13         56.2          17.8       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            304            307           4         51.8          19.3       0.9X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           18336          18703         519          0.6        1748.7       1.0X
-SQL Json                                          15924          16092         238          0.7        1518.6       1.2X
-SQL Parquet Vectorized                             2534           2540           9          4.1         241.6       7.2X
-SQL Parquet MR                                     4768           4772           5          2.2         454.7       3.8X
-SQL ORC Vectorized                                 2477           2513          51          4.2         236.3       7.4X
-SQL ORC MR                                         4451           4470          27          2.4         424.5       4.1X
+SQL CSV                                           15548          16002         641          0.7        1482.8       1.0X
+SQL Json                                          10801          11108         434          1.0        1030.1       1.4X
+SQL Parquet Vectorized: DataPageV1                 1858           1966         152          5.6         177.2       8.4X
+SQL Parquet Vectorized: DataPageV2                 2342           2466         175          4.5         223.4       6.6X
+SQL Parquet MR: DataPageV1                         3873           3908          49          2.7         369.4       4.0X
+SQL Parquet MR: DataPageV2                         3764           3869         148          2.8         358.9       4.1X
+SQL ORC Vectorized                                 2018           2020           3          5.2         192.5       7.7X
+SQL ORC MR                                         3247           3450         287          3.2         309.7       4.8X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            9701           9753          74          1.1         925.1       1.0X
-SQL Json                                           9562           9566           6          1.1         911.9       1.0X
-SQL Parquet Vectorized                              907            916           8         11.6          86.5      10.7X
-SQL Parquet MR                                     2020           2021           2          5.2         192.6       4.8X
-SQL ORC Vectorized                                  536            539           3         19.6          51.1      18.1X
-SQL ORC MR                                         2211           2218           9          4.7         210.9       4.4X
+SQL CSV                                            8028           8337         436          1.3         765.6       1.0X
+SQL Json                                           6362           6488         178          1.6         606.7       1.3X
+SQL Parquet Vectorized: DataPageV1                  642            673          51         16.3          61.3      12.5X
+SQL Parquet Vectorized: DataPageV2                  646            678          40         16.2          61.6      12.4X
+SQL Parquet MR: DataPageV1                         1504           1604         141          7.0         143.5       5.3X
+SQL Parquet MR: DataPageV2                         1645           1646           1          6.4         156.9       4.9X
+SQL ORC Vectorized                                  386            415          25         27.2          36.8      20.8X
+SQL ORC MR                                         1704           1730          37          6.2         162.5       4.7X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
-Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Data column - CSV                                 25664          25733          97          0.6        1631.7       1.0X
-Data column - Json                                17014          17023          13          0.9        1081.7       1.5X
-Data column - Parquet Vectorized                    261            268           8         60.2          16.6      98.2X
-Data column - Parquet MR                           3173           3182          14          5.0         201.7       8.1X
-Data column - ORC Vectorized                        363            365           1         43.3          23.1      70.7X
-Data column - ORC MR                               2672           2675           4          5.9         169.9       9.6X
-Partition column - CSV                             8197           8202           7          1.9         521.2       3.1X
-Partition column - Json                           12495          12501           9          1.3         794.4       2.1X
-Partition column - Parquet Vectorized                67             69           2        236.1           4.2     385.3X
-Partition column - Parquet MR                      1465           1466           1         10.7          93.2      17.5X
-Partition column - ORC Vectorized                    68             71           4        232.7           4.3     379.7X
-Partition column - ORC MR                          1625           1625           0          9.7         103.3      15.8X
-Both columns - CSV                                26284          26309          36          0.6        1671.1       1.0X
-Both columns - Json                               19343          19369          37          0.8        1229.8       1.3X
-Both columns - Parquet Vectorized                   311            321          10         50.5          19.8      82.5X
-Both columns - Parquet MR                          3355           3356           2          4.7         213.3       7.6X
-Both columns - ORC Vectorized                       415            418           5         37.9          26.4      61.9X
-Both columns - ORC MR                              2739           2743           6          5.7         174.1       9.4X
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Partitioned Table:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------------
+Data column - CSV                                          21472          21514          59          0.7        1365.2       1.0X
+Data column - Json                                         11537          11606          97          1.4         733.5       1.9X
+Data column - Parquet Vectorized: DataPageV1                 238            256          11         66.1          15.1      90.2X
+Data column - Parquet Vectorized: DataPageV2                 482            507          17         32.6          30.6      44.6X
+Data column - Parquet MR: DataPageV1                        2213           2355         200          7.1         140.7       9.7X
+Data column - Parquet MR: DataPageV2                        2036           2163         179          7.7         129.4      10.5X
+Data column - ORC Vectorized                                 289            310          20         54.4          18.4      74.3X
+Data column - ORC MR                                        1898           1936          54          8.3         120.7      11.3X
+Partition column - CSV                                      6307           6364          80          2.5         401.0       3.4X
+Partition column - Json                                     9167           9253         121          1.7         582.8       2.3X
+Partition column - Parquet Vectorized: DataPageV1             62             66           3        253.5           3.9     346.1X
+Partition column - Parquet Vectorized: DataPageV2             61             65           2        259.2           3.9     353.8X
+Partition column - Parquet MR: DataPageV1                   1086           1088           3         14.5          69.0      19.8X
+Partition column - Parquet MR: DataPageV2                   1091           1146          78         14.4          69.4      19.7X
+Partition column - ORC Vectorized                             63             67           2        251.1           4.0     342.9X
+Partition column - ORC MR                                   1173           1175           3         13.4          74.6      18.3X
+Both columns - CSV                                         21458          22038         820          0.7        1364.3       1.0X
+Both columns - Json                                        12697          12712          22          1.2         807.2       1.7X
+Both columns - Parquet Vectorized: DataPageV1                275            288          10         57.2          17.5      78.0X
+Both columns - Parquet Vectorized: DataPageV2                505            525          24         31.2          32.1      42.5X
+Both columns - Parquet MR: DataPageV1                       2541           2547           9          6.2         161.5       8.5X
+Both columns - Parquet MR: DataPageV2                       2059           2060           2          7.6         130.9      10.4X
+Both columns - ORC Vectorized                                326            349          16         48.3          20.7      66.0X
+Both columns - ORC MR                                       2116           2151          50          7.4         134.5      10.1X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           12006          12014          11          0.9        1145.0       1.0X
-SQL Json                                          19062          19074          16          0.6        1817.9       0.6X
-SQL Parquet Vectorized                             1608           1612           6          6.5         153.3       7.5X
-SQL Parquet MR                                     3986           4005          27          2.6         380.1       3.0X
-ParquetReader Vectorized                           1199           1203           7          8.7         114.3      10.0X
-SQL ORC Vectorized                                 1114           1114           0          9.4         106.2      10.8X
-SQL ORC MR                                         3806           3806           1          2.8         362.9       3.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           10074          10372         422          1.0         960.7       1.0X
+SQL Json                                          10037          10147         156          1.0         957.2       1.0X
+SQL Parquet Vectorized: DataPageV1                 1192           1226          47          8.8         113.7       8.4X
+SQL Parquet Vectorized: DataPageV2                 2349           2423         105          4.5         224.0       4.3X
+SQL Parquet MR: DataPageV1                         2995           3114         168          3.5         285.6       3.4X
+SQL Parquet MR: DataPageV2                         3847           3900          75          2.7         366.9       2.6X
+ParquetReader Vectorized: DataPageV1                888            918          51         11.8          84.7      11.3X
+ParquetReader Vectorized: DataPageV2               2128           2159          43          4.9         203.0       4.7X
+SQL ORC Vectorized                                  837            908          61         12.5          79.8      12.0X
+SQL ORC MR                                         2792           2882         127          3.8         266.3       3.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            8707           8791         118          1.2         830.4       1.0X
-SQL Json                                          14505          14532          39          0.7        1383.3       0.6X
-SQL Parquet Vectorized                             1245           1265          27          8.4         118.8       7.0X
-SQL Parquet MR                                     3019           3028          12          3.5         287.9       2.9X
-ParquetReader Vectorized                           1143           1156          20          9.2         109.0       7.6X
-SQL ORC Vectorized                                 1543           1549           8          6.8         147.1       5.6X
-SQL ORC MR                                         3672           3685          18          2.9         350.2       2.4X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                            7808           7810           3          1.3         744.6       1.0X
+SQL Json                                           7434           7491          82          1.4         708.9       1.1X
+SQL Parquet Vectorized: DataPageV1                 1037           1044          10         10.1          98.9       7.5X
+SQL Parquet Vectorized: DataPageV2                 1528           1529           3          6.9         145.7       5.1X
+SQL Parquet MR: DataPageV1                         2300           2411         156          4.6         219.4       3.4X
+SQL Parquet MR: DataPageV2                         2637           2639           4          4.0         251.5       3.0X
+ParquetReader Vectorized: DataPageV1                843            907          56         12.4          80.4       9.3X
+ParquetReader Vectorized: DataPageV2               1424           1446          30          7.4         135.8       5.5X
+SQL ORC Vectorized                                 1131           1132           1          9.3         107.8       6.9X
+SQL ORC MR                                         2781           2856         106          3.8         265.3       2.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            5845           5848           4          1.8         557.4       1.0X
-SQL Json                                           8854           8858           5          1.2         844.4       0.7X
-SQL Parquet Vectorized                              272            278           8         38.6          25.9      21.5X
-SQL Parquet MR                                     1916           1936          27          5.5         182.7       3.1X
-ParquetReader Vectorized                            283            285           3         37.0          27.0      20.6X
-SQL ORC Vectorized                                  548            551           3         19.1          52.3      10.7X
-SQL ORC MR                                         1942           1944           2          5.4         185.2       3.0X
+SQL CSV                                            5357           5538         255          2.0         510.9       1.0X
+SQL Json                                           4354           4387          47          2.4         415.2       1.2X
+SQL Parquet Vectorized: DataPageV1                  212            226          15         49.5          20.2      25.3X
+SQL Parquet Vectorized: DataPageV2                  265            276          16         39.6          25.2      20.2X
+SQL Parquet MR: DataPageV1                         1575           1578           4          6.7         150.2       3.4X
+SQL Parquet MR: DataPageV2                         1624           1638          21          6.5         154.8       3.3X
+ParquetReader Vectorized: DataPageV1                219            234          14         47.8          20.9      24.4X
+ParquetReader Vectorized: DataPageV2                274            294          17         38.2          26.2      19.5X
+SQL ORC Vectorized                                  370            393          12         28.4          35.3      14.5X
+SQL ORC MR                                         1540           1545           7          6.8         146.9       3.5X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Column Scan from 10 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            3388           3395          10          0.3        3231.0       1.0X
-SQL Json                                           4079           4087          11          0.3        3889.6       0.8X
-SQL Parquet Vectorized                               55             59           7         19.2          52.1      62.0X
-SQL Parquet MR                                      226            229           2          4.6         215.2      15.0X
-SQL ORC Vectorized                                   62             67          13         17.0          58.7      55.0X
-SQL ORC MR                                          194            198           5          5.4         185.0      17.5X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                            2159           2212          74          0.5        2059.3       1.0X
+SQL Json                                           2836           2896          84          0.4        2704.5       0.8X
+SQL Parquet Vectorized: DataPageV1                   54             59           9         19.5          51.4      40.1X
+SQL Parquet Vectorized: DataPageV2                   66             72           8         15.9          63.1      32.7X
+SQL Parquet MR: DataPageV1                          173            186          10          6.1         164.5      12.5X
+SQL Parquet MR: DataPageV2                          159            172           8          6.6         151.8      13.6X
+SQL ORC Vectorized                                   54             60          10         19.2          52.0      39.6X
+SQL ORC MR                                          150            161           7          7.0         143.3      14.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Column Scan from 50 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            8141           8142           1          0.1        7764.3       1.0X
-SQL Json                                          15614          15694         113          0.1       14890.4       0.5X
-SQL Parquet Vectorized                               70             78          12         14.9          67.0     115.8X
-SQL Parquet MR                                      245            250           4          4.3         234.0      33.2X
-SQL ORC Vectorized                                   77             83           9         13.5          73.8     105.2X
-SQL ORC MR                                          212            215           2          4.9         202.1      38.4X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                            5877           5883           8          0.2        5605.0       1.0X
+SQL Json                                          11474          11587         159          0.1       10942.9       0.5X
+SQL Parquet Vectorized: DataPageV1                   66             72           7         15.9          63.1      88.9X
+SQL Parquet Vectorized: DataPageV2                   83             90           8         12.6          79.4      70.6X
+SQL Parquet MR: DataPageV1                          191            201           9          5.5         182.6      30.7X
+SQL Parquet MR: DataPageV2                          179            187           9          5.9         170.3      32.9X
+SQL ORC Vectorized                                   70             76          12         14.9          67.1      83.5X
+SQL ORC MR                                          167            175           7          6.3         159.2      35.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           14087          14102          20          0.1       13434.7       1.0X
-SQL Json                                          30069          30223         218          0.0       28676.2       0.5X
-SQL Parquet Vectorized                              107            113           8          9.8         101.9     131.9X
-SQL Parquet MR                                      289            295           4          3.6         275.9      48.7X
-SQL ORC Vectorized                                   99            105          14         10.6          94.4     142.3X
-SQL ORC MR                                          236            239           3          4.4         225.5      59.6X
+SQL CSV                                            9695           9965         382          0.1        9245.8       1.0X
+SQL Json                                          22119          23566        2045          0.0       21094.6       0.4X
+SQL Parquet Vectorized: DataPageV1                   96            104           7         10.9          91.6     100.9X
+SQL Parquet Vectorized: DataPageV2                  113            121           8          9.3         107.8      85.8X
+SQL Parquet MR: DataPageV1                          227            243           9          4.6         216.2      42.8X
+SQL Parquet MR: DataPageV2                          210            225          12          5.0         200.2      46.2X
+SQL ORC Vectorized                                   90             96          10         11.7          85.7     107.9X
+SQL ORC MR                                          188            199           9          5.6         178.9      51.7X
 
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
index 5094cdf2296e0..7c9fa58d77f42 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
@@ -24,8 +24,7 @@ import scala.util.Random
 import org.apache.parquet.column.ParquetProperties
 import org.apache.parquet.hadoop.ParquetOutputFormat
 
-import org.apache.spark.SparkConf
-import org.apache.spark.TestUtils
+import org.apache.spark.{SparkConf, TestUtils}
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.{DataFrame, DataFrameWriter, Row, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
@@ -79,7 +78,7 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
 
     saveAsCsvTable(testDf, dir.getCanonicalPath + "/csv")
     saveAsJsonTable(testDf, dir.getCanonicalPath + "/json")
-    saveAsParquetTable(testDf, dir.getCanonicalPath + "/parquet")
+    saveAsParquetV1Table(testDf, dir.getCanonicalPath + "/parquetV1")
     saveAsParquetV2Table(testDf, dir.getCanonicalPath + "/parquetV2")
     saveAsOrcTable(testDf, dir.getCanonicalPath + "/orc")
   }
@@ -94,9 +93,9 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
     spark.read.json(dir).createOrReplaceTempView("jsonTable")
   }
 
-  private def saveAsParquetTable(df: DataFrameWriter[Row], dir: String): Unit = {
+  private def saveAsParquetV1Table(df: DataFrameWriter[Row], dir: String): Unit = {
     df.mode("overwrite").option("compression", "snappy").parquet(dir)
-    spark.read.parquet(dir).createOrReplaceTempView("parquetTable")
+    spark.read.parquet(dir).createOrReplaceTempView("parquetV1Table")
   }
 
   private def saveAsParquetV2Table(df: DataFrameWriter[Row], dir: String): Unit = {
@@ -112,6 +111,8 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
     spark.read.orc(dir).createOrReplaceTempView("orcTable")
   }
 
+  private def withParquetVersions(f: String => Unit): Unit = Seq("V1", "V2").foreach(f)
+
   def numericScanBenchmark(values: Int, dataType: DataType): Unit = {
     // Benchmarks running through spark sql.
     val sqlBenchmark = new Benchmark(
@@ -126,7 +127,7 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
       output = output)
 
     withTempPath { dir =>
-      withTempTable("t1", "csvTable", "jsonTable", "parquetTable", "orcTable") {
+      withTempTable("t1", "csvTable", "jsonTable", "parquetV1Table", "parquetV2Table", "orcTable") {
         import spark.implicits._
         spark.range(values).map(_ => Random.nextLong).createOrReplaceTempView("t1")
 
@@ -145,13 +146,17 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
           spark.sql(s"select $query from jsonTable").noop()
         }
 
-        sqlBenchmark.addCase("SQL Parquet Vectorized") { _ =>
-          spark.sql(s"select $query from parquetTable").noop()
+        withParquetVersions { version =>
+          sqlBenchmark.addCase(s"SQL Parquet Vectorized: DataPage$version") { _ =>
+            spark.sql(s"select $query from parquet${version}Table").noop()
+          }
         }
 
-        sqlBenchmark.addCase("SQL Parquet MR") { _ =>
-          withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql(s"select $query from parquetTable").noop()
+        withParquetVersions { version =>
+          sqlBenchmark.addCase(s"SQL Parquet MR: DataPage$version") { _ =>
+            withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
+              spark.sql(s"select $query from parquet${version}Table").noop()
+            }
           }
         }
 
@@ -167,79 +172,93 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
 
         sqlBenchmark.run()
 
-        // Driving the parquet reader in batch mode directly.
-        val files = TestUtils.listDirectory(new File(dir, "parquet"))
         val enableOffHeapColumnVector = spark.sessionState.conf.offHeapColumnVectorEnabled
         val vectorizedReaderBatchSize = spark.sessionState.conf.parquetVectorizedReaderBatchSize
-        parquetReaderBenchmark.addCase("ParquetReader Vectorized") { _ =>
-          var longSum = 0L
-          var doubleSum = 0.0
-          val aggregateValue: (ColumnVector, Int) => Unit = dataType match {
-            case BooleanType => (col: ColumnVector, i: Int) => if (col.getBoolean(i)) longSum += 1L
-            case ByteType => (col: ColumnVector, i: Int) => longSum += col.getByte(i)
-            case ShortType => (col: ColumnVector, i: Int) => longSum += col.getShort(i)
-            case IntegerType => (col: ColumnVector, i: Int) => longSum += col.getInt(i)
-            case LongType => (col: ColumnVector, i: Int) => longSum += col.getLong(i)
-            case FloatType => (col: ColumnVector, i: Int) => doubleSum += col.getFloat(i)
-            case DoubleType => (col: ColumnVector, i: Int) => doubleSum += col.getDouble(i)
-          }
+        withParquetVersions { version =>
+          // Driving the parquet reader in batch mode directly.
+          val files = TestUtils.listDirectory(new File(dir, s"parquet$version"))
+          parquetReaderBenchmark.addCase(s"ParquetReader Vectorized: DataPage$version") { _ =>
+            var longSum = 0L
+            var doubleSum = 0.0
+            val aggregateValue: (ColumnVector, Int) => Unit = dataType match {
+              case BooleanType =>
+                (col: ColumnVector, i: Int) => if (col.getBoolean(i)) longSum += 1L
+              case ByteType =>
+                (col: ColumnVector, i: Int) => longSum += col.getByte(i)
+              case ShortType =>
+                (col: ColumnVector, i: Int) => longSum += col.getShort(i)
+              case IntegerType =>
+                (col: ColumnVector, i: Int) => longSum += col.getInt(i)
+              case LongType =>
+                (col: ColumnVector, i: Int) => longSum += col.getLong(i)
+              case FloatType =>
+                (col: ColumnVector, i: Int) => doubleSum += col.getFloat(i)
+              case DoubleType =>
+                (col: ColumnVector, i: Int) => doubleSum += col.getDouble(i)
+            }
 
-          files.foreach { p =>
-            val reader = new VectorizedParquetRecordReader(
-              enableOffHeapColumnVector, vectorizedReaderBatchSize)
-            try {
-              reader.initialize(p, ("id" :: Nil).asJava)
-              val batch = reader.resultBatch()
-              val col = batch.column(0)
-              while (reader.nextBatch()) {
-                val numRows = batch.numRows()
-                var i = 0
-                while (i < numRows) {
-                  if (!col.isNullAt(i)) aggregateValue(col, i)
-                  i += 1
+            files.foreach { p =>
+              val reader = new VectorizedParquetRecordReader(
+                enableOffHeapColumnVector, vectorizedReaderBatchSize)
+              try {
+                reader.initialize(p, ("id" :: Nil).asJava)
+                val batch = reader.resultBatch()
+                val col = batch.column(0)
+                while (reader.nextBatch()) {
+                  val numRows = batch.numRows()
+                  var i = 0
+                  while (i < numRows) {
+                    if (!col.isNullAt(i)) aggregateValue(col, i)
+                    i += 1
+                  }
                 }
+              } finally {
+                reader.close()
               }
-            } finally {
-              reader.close()
             }
           }
         }
 
-        // Decoding in vectorized but having the reader return rows.
-        parquetReaderBenchmark.addCase("ParquetReader Vectorized -> Row") { num =>
-          var longSum = 0L
-          var doubleSum = 0.0
-          val aggregateValue: (InternalRow) => Unit = dataType match {
-            case BooleanType => (col: InternalRow) => if (col.getBoolean(0)) longSum += 1L
-            case ByteType => (col: InternalRow) => longSum += col.getByte(0)
-            case ShortType => (col: InternalRow) => longSum += col.getShort(0)
-            case IntegerType => (col: InternalRow) => longSum += col.getInt(0)
-            case LongType => (col: InternalRow) => longSum += col.getLong(0)
-            case FloatType => (col: InternalRow) => doubleSum += col.getFloat(0)
-            case DoubleType => (col: InternalRow) => doubleSum += col.getDouble(0)
-          }
+        withParquetVersions { version =>
+          // Driving the parquet reader in batch mode directly.
+          val files = TestUtils.listDirectory(new File(dir, s"parquet$version"))
+          // Decoding in vectorized but having the reader return rows.
+          parquetReaderBenchmark
+            .addCase(s"ParquetReader Vectorized -> Row: DataPage$version") { _ =>
+              var longSum = 0L
+              var doubleSum = 0.0
+              val aggregateValue: (InternalRow) => Unit = dataType match {
+                case BooleanType => (col: InternalRow) => if (col.getBoolean(0)) longSum += 1L
+                case ByteType => (col: InternalRow) => longSum += col.getByte(0)
+                case ShortType => (col: InternalRow) => longSum += col.getShort(0)
+                case IntegerType => (col: InternalRow) => longSum += col.getInt(0)
+                case LongType => (col: InternalRow) => longSum += col.getLong(0)
+                case FloatType => (col: InternalRow) => doubleSum += col.getFloat(0)
+                case DoubleType => (col: InternalRow) => doubleSum += col.getDouble(0)
+              }
 
-          files.map(_.asInstanceOf[String]).foreach { p =>
-            val reader = new VectorizedParquetRecordReader(
-              enableOffHeapColumnVector, vectorizedReaderBatchSize)
-            try {
-              reader.initialize(p, ("id" :: Nil).asJava)
-              val batch = reader.resultBatch()
-              while (reader.nextBatch()) {
-                val it = batch.rowIterator()
-                while (it.hasNext) {
-                  val record = it.next()
-                  if (!record.isNullAt(0)) aggregateValue(record)
+              files.foreach { p =>
+                val reader = new VectorizedParquetRecordReader(
+                  enableOffHeapColumnVector, vectorizedReaderBatchSize)
+                try {
+                  reader.initialize(p, ("id" :: Nil).asJava)
+                  val batch = reader.resultBatch()
+                  while (reader.nextBatch()) {
+                    val it = batch.rowIterator()
+                    while (it.hasNext) {
+                      val record = it.next()
+                      if (!record.isNullAt(0)) aggregateValue(record)
+                    }
+                  }
+                } finally {
+                  reader.close()
                 }
               }
-            } finally {
-              reader.close()
             }
-          }
         }
-
-        parquetReaderBenchmark.run()
       }
+
+      parquetReaderBenchmark.run()
     }
   }
 
@@ -247,7 +266,7 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
     val benchmark = new Benchmark("Int and String Scan", values, output = output)
 
     withTempPath { dir =>
-      withTempTable("t1", "csvTable", "jsonTable", "parquetTable", "orcTable") {
+      withTempTable("t1", "csvTable", "jsonTable", "parquetV1Table", "parquetV2Table", "orcTable") {
         import spark.implicits._
         spark.range(values).map(_ => Random.nextLong).createOrReplaceTempView("t1")
 
@@ -263,13 +282,17 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
           spark.sql("select sum(c1), sum(length(c2)) from jsonTable").noop()
         }
 
-        benchmark.addCase("SQL Parquet Vectorized") { _ =>
-          spark.sql("select sum(c1), sum(length(c2)) from parquetTable").noop()
+        withParquetVersions { version =>
+          benchmark.addCase(s"SQL Parquet Vectorized: DataPage$version") { _ =>
+            spark.sql(s"select sum(c1), sum(length(c2)) from parquet${version}Table").noop()
+          }
         }
 
-        benchmark.addCase("SQL Parquet MR") { _ =>
-          withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("select sum(c1), sum(length(c2)) from parquetTable").noop()
+        withParquetVersions { version =>
+          benchmark.addCase(s"SQL Parquet MR: DataPage$version") { _ =>
+            withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
+              spark.sql(s"select sum(c1), sum(length(c2)) from parquet${version}Table").noop()
+            }
           }
         }
 
@@ -292,7 +315,7 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
     val benchmark = new Benchmark("Repeated String", values, output = output)
 
     withTempPath { dir =>
-      withTempTable("t1", "csvTable", "jsonTable", "parquetTable", "orcTable") {
+      withTempTable("t1", "csvTable", "jsonTable", "parquetV1Table", "parquetV2Table", "orcTable") {
         import spark.implicits._
         spark.range(values).map(_ => Random.nextLong).createOrReplaceTempView("t1")
 
@@ -308,13 +331,17 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
           spark.sql("select sum(length(c1)) from jsonTable").noop()
         }
 
-        benchmark.addCase("SQL Parquet Vectorized") { _ =>
-          spark.sql("select sum(length(c1)) from parquetTable").noop()
+        withParquetVersions { version =>
+          benchmark.addCase(s"SQL Parquet Vectorized: DataPage$version") { _ =>
+            spark.sql(s"select sum(length(c1)) from parquet${version}Table").noop()
+          }
         }
 
-        benchmark.addCase("SQL Parquet MR") { _ =>
-          withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("select sum(length(c1)) from parquetTable").noop()
+        withParquetVersions { version =>
+          benchmark.addCase(s"SQL Parquet MR: DataPage$version") { _ =>
+            withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
+              spark.sql(s"select sum(length(c1)) from parquet${version}Table").noop()
+            }
           }
         }
 
@@ -337,7 +364,7 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
     val benchmark = new Benchmark("Partitioned Table", values, output = output)
 
     withTempPath { dir =>
-      withTempTable("t1", "csvTable", "jsonTable", "parquetTable", "orcTable") {
+      withTempTable("t1", "csvTable", "jsonTable", "parquetV1Table", "parquetV2Table", "orcTable") {
         import spark.implicits._
         spark.range(values).map(_ => Random.nextLong).createOrReplaceTempView("t1")
 
@@ -351,13 +378,17 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
           spark.sql("select sum(id) from jsonTable").noop()
         }
 
-        benchmark.addCase("Data column - Parquet Vectorized") { _ =>
-          spark.sql("select sum(id) from parquetTable").noop()
+        withParquetVersions { version =>
+          benchmark.addCase(s"Data column - Parquet Vectorized: DataPage$version") { _ =>
+            spark.sql(s"select sum(id) from parquet${version}Table").noop()
+          }
         }
 
-        benchmark.addCase("Data column - Parquet MR") { _ =>
-          withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("select sum(id) from parquetTable").noop()
+        withParquetVersions { version =>
+          benchmark.addCase(s"Data column - Parquet MR: DataPage$version") { _ =>
+            withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
+              spark.sql(s"select sum(id) from parquet${version}Table").noop()
+            }
           }
         }
 
@@ -379,13 +410,17 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
           spark.sql("select sum(p) from jsonTable").noop()
         }
 
-        benchmark.addCase("Partition column - Parquet Vectorized") { _ =>
-          spark.sql("select sum(p) from parquetTable").noop()
+        withParquetVersions { version =>
+          benchmark.addCase(s"Partition column - Parquet Vectorized: DataPage$version") { _ =>
+            spark.sql(s"select sum(p) from parquet${version}Table").noop()
+          }
         }
 
-        benchmark.addCase("Partition column - Parquet MR") { _ =>
-          withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("select sum(p) from parquetTable").noop()
+        withParquetVersions { version =>
+          benchmark.addCase(s"Partition column - Parquet MR: DataPage$version") { _ =>
+            withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
+              spark.sql(s"select sum(p) from parquet${version}Table").noop()
+            }
           }
         }
 
@@ -407,13 +442,17 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
           spark.sql("select sum(p), sum(id) from jsonTable").noop()
         }
 
-        benchmark.addCase("Both columns - Parquet Vectorized") { _ =>
-          spark.sql("select sum(p), sum(id) from parquetTable").noop()
+        withParquetVersions { version =>
+          benchmark.addCase(s"Both columns - Parquet Vectorized: DataPage$version") { _ =>
+            spark.sql(s"select sum(p), sum(id) from parquet${version}Table").noop()
+          }
         }
 
-        benchmark.addCase("Both columns - Parquet MR") { _ =>
-          withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("select sum(p), sum(id) from parquetTable").noop()
+        withParquetVersions { version =>
+          benchmark.addCase(s"Both columns - Parquet MR: DataPage$version") { _ =>
+            withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
+              spark.sql(s"select sum(p), sum(id) from parquet${version}Table").noop()
+            }
           }
         }
 
@@ -438,7 +477,7 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
       new Benchmark(s"String with Nulls Scan ($percentageOfNulls%)", values, output = output)
 
     withTempPath { dir =>
-      withTempTable("t1", "csvTable", "jsonTable", "parquetTable", "orcTable") {
+      withTempTable("t1", "csvTable", "jsonTable", "parquetV1Table", "parquetV2Table", "orcTable") {
         spark.range(values).createOrReplaceTempView("t1")
 
         prepareTable(
@@ -457,39 +496,45 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
             "not NULL and c2 is not NULL").noop()
         }
 
-        benchmark.addCase("SQL Parquet Vectorized") { _ =>
-          spark.sql("select sum(length(c2)) from parquetTable where c1 is " +
-            "not NULL and c2 is not NULL").noop()
+        withParquetVersions { version =>
+          benchmark.addCase(s"SQL Parquet Vectorized: DataPage$version") { _ =>
+            spark.sql(s"select sum(length(c2)) from parquet${version}Table where c1 is " +
+              "not NULL and c2 is not NULL").noop()
+          }
         }
 
-        benchmark.addCase("SQL Parquet MR") { _ =>
-          withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("select sum(length(c2)) from parquetTable where c1 is " +
-              "not NULL and c2 is not NULL").noop()
+        withParquetVersions { version =>
+          benchmark.addCase(s"SQL Parquet MR: DataPage$version") { _ =>
+            withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
+              spark.sql(s"select sum(length(c2)) from parquet${version}Table where c1 is " +
+                "not NULL and c2 is not NULL").noop()
+            }
           }
         }
 
-        val files = TestUtils.listDirectory(new File(dir, "parquet"))
-        val enableOffHeapColumnVector = spark.sessionState.conf.offHeapColumnVectorEnabled
-        val vectorizedReaderBatchSize = spark.sessionState.conf.parquetVectorizedReaderBatchSize
-        benchmark.addCase("ParquetReader Vectorized") { num =>
-          var sum = 0
-          files.foreach { p =>
-            val reader = new VectorizedParquetRecordReader(
-              enableOffHeapColumnVector, vectorizedReaderBatchSize)
-            try {
-              reader.initialize(p, ("c1" :: "c2" :: Nil).asJava)
-              val batch = reader.resultBatch()
-              while (reader.nextBatch()) {
-                val rowIterator = batch.rowIterator()
-                while (rowIterator.hasNext) {
-                  val row = rowIterator.next()
-                  val value = row.getUTF8String(0)
-                  if (!row.isNullAt(0) && !row.isNullAt(1)) sum += value.numBytes()
+        withParquetVersions { version =>
+          val files = TestUtils.listDirectory(new File(dir, s"parquet$version"))
+          val enableOffHeapColumnVector = spark.sessionState.conf.offHeapColumnVectorEnabled
+          val vectorizedReaderBatchSize = spark.sessionState.conf.parquetVectorizedReaderBatchSize
+          benchmark.addCase(s"ParquetReader Vectorized: DataPage$version") { _ =>
+            var sum = 0
+            files.foreach { p =>
+              val reader = new VectorizedParquetRecordReader(
+                enableOffHeapColumnVector, vectorizedReaderBatchSize)
+              try {
+                reader.initialize(p, ("c1" :: "c2" :: Nil).asJava)
+                val batch = reader.resultBatch()
+                while (reader.nextBatch()) {
+                  val rowIterator = batch.rowIterator()
+                  while (rowIterator.hasNext) {
+                    val row = rowIterator.next()
+                    val value = row.getUTF8String(0)
+                    if (!row.isNullAt(0) && !row.isNullAt(1)) sum += value.numBytes()
+                  }
                 }
+              } finally {
+                reader.close()
               }
-            } finally {
-              reader.close()
             }
           }
         }
@@ -518,7 +563,7 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
       output = output)
 
     withTempPath { dir =>
-      withTempTable("t1", "csvTable", "jsonTable", "parquetTable", "orcTable") {
+      withTempTable("t1", "csvTable", "jsonTable", "parquetV1Table", "parquetV2Table", "orcTable") {
         import spark.implicits._
         val middle = width / 2
         val selectExpr = (1 to width).map(i => s"value as c$i")
@@ -535,13 +580,17 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
           spark.sql(s"SELECT sum(c$middle) FROM jsonTable").noop()
         }
 
-        benchmark.addCase("SQL Parquet Vectorized") { _ =>
-          spark.sql(s"SELECT sum(c$middle) FROM parquetTable").noop()
+        withParquetVersions { version =>
+          benchmark.addCase(s"SQL Parquet Vectorized: DataPage$version") { _ =>
+            spark.sql(s"SELECT sum(c$middle) FROM parquet${version}Table").noop()
+          }
         }
 
-        benchmark.addCase("SQL Parquet MR") { _ =>
-          withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql(s"SELECT sum(c$middle) FROM parquetTable").noop()
+        withParquetVersions { version =>
+          benchmark.addCase(s"SQL Parquet MR: DataPage$version") { _ =>
+            withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
+              spark.sql(s"SELECT sum(c$middle) FROM parquet${version}Table").noop()
+            }
           }
         }
 

From c288b3466158498e8e73279b3a7828ff608e35a7 Mon Sep 17 00:00:00 2001
From: Parth Chandra <parthc@apache.org>
Date: Wed, 19 Jan 2022 09:51:33 -0800
Subject: [PATCH 054/513] [SPARK-36879][SQL][FOLLOWUP] Address comments and fix
 code style

### What changes were proposed in this pull request?
Addresses some formatting changes that were requested in a previous PR (after it was merged).

### Why are the changes needed?
Review  comments addressed

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
Not needed. Existing unit tests pass.

Closes #35212 from parthchandra/SPARK-36879-PR2.

Authored-by: Parth Chandra <parthc@apache.org>
Signed-off-by: Chao Sun <sunchao@apple.com>
---
 .../VectorizedDeltaBinaryPackedReader.java    |  8 +--
 .../parquet/ParquetRebaseDatetimeSuite.scala  | 67 ++++++++++---------
 2 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaBinaryPackedReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaBinaryPackedReader.java
index 62fb5f8c96bbf..7b2aac3118e5f 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaBinaryPackedReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaBinaryPackedReader.java
@@ -73,10 +73,10 @@ public class VectorizedDeltaBinaryPackedReader extends VectorizedReaderBase {
   private ByteBufferInputStream in;
 
   // temporary buffers used by readByte, readShort, readInteger, and readLong
-  byte byteVal;
-  short shortVal;
-  int intVal;
-  long longVal;
+  private byte byteVal;
+  private short shortVal;
+  private int intVal;
+  private long longVal;
 
   @Override
   public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala
index 49251af54193f..dbf7f54f6ff90 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala
@@ -143,12 +143,12 @@ abstract class ParquetRebaseDatetimeSuite
           val df = Seq.tabulate(N)(rowFunc).toDF("dict", "plain")
             .select($"dict".cast(catalystType), $"plain".cast(catalystType))
           withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> tsOutputType) {
-          checkDefaultLegacyRead(oldPath)
+            checkDefaultLegacyRead(oldPath)
             withSQLConf(inWriteConf -> CORRECTED.toString) {
-            df.write.mode("overwrite").parquet(path3_x)
+              df.write.mode("overwrite").parquet(path3_x)
             }
             withSQLConf(inWriteConf -> LEGACY.toString) {
-            df.write.parquet(path3_x_rebase)
+              df.write.parquet(path3_x_rebase)
             }
           }
           // For Parquet files written by Spark 3.0, we know the writer info and don't need the
@@ -243,40 +243,41 @@ abstract class ParquetRebaseDatetimeSuite
             SQLConf.PARQUET_INT96_REBASE_MODE_IN_READ.key
           )
         ).foreach { case (outType, tsStr, nonRebased, inWriteConf, inReadConf) =>
-        // Ignore the default JVM time zone and use the session time zone instead of it in rebasing.
-        DateTimeTestUtils.withDefaultTimeZone(DateTimeTestUtils.JST) {
-          withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> DateTimeTestUtils.LA.getId) {
-          withClue(s"output type $outType") {
-            withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> outType) {
-              withTempPath { dir =>
-                val path = dir.getAbsolutePath
-                withSQLConf(inWriteConf -> LEGACY.toString) {
-                  Seq.tabulate(N)(_ => tsStr).toDF("tsS")
-                    .select($"tsS".cast("timestamp").as("ts"))
-                    .repartition(1)
-                    .write
-                    .option("parquet.enable.dictionary", dictionaryEncoding)
-                    .parquet(path)
-                }
+          // Ignore the default JVM time zone and use the session time zone instead of
+          // it in rebasing.
+          DateTimeTestUtils.withDefaultTimeZone(DateTimeTestUtils.JST) {
+            withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> DateTimeTestUtils.LA.getId) {
+              withClue(s"output type $outType") {
+                withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> outType) {
+                  withTempPath { dir =>
+                    val path = dir.getAbsolutePath
+                    withSQLConf(inWriteConf -> LEGACY.toString) {
+                      Seq.tabulate(N)(_ => tsStr).toDF("tsS")
+                        .select($"tsS".cast("timestamp").as("ts"))
+                        .repartition(1)
+                        .write
+                        .option("parquet.enable.dictionary", dictionaryEncoding)
+                        .parquet(path)
+                    }
 
-                withAllParquetReaders {
-                    // The file metadata indicates if it needs rebase or not, so we can always get
-                    // the correct result regardless of the "rebase mode" config.
-                  runInMode(inReadConf, Seq(LEGACY, CORRECTED, EXCEPTION)) { options =>
-                    checkAnswer(
-                        spark.read.options(options).parquet(path).select($"ts".cast("string")),
-                        Seq.tabulate(N)(_ => Row(tsStr)))
-                  }
+                    withAllParquetReaders {
+                      // The file metadata indicates if it needs rebase or not, so we can always get
+                      // the correct result regardless of the "rebase mode" config.
+                      runInMode(inReadConf, Seq(LEGACY, CORRECTED, EXCEPTION)) { options =>
+                        checkAnswer(
+                          spark.read.options(options).parquet(path).select($"ts".cast("string")),
+                          Seq.tabulate(N)(_ => Row(tsStr)))
+                      }
 
-                  // Force to not rebase to prove the written datetime values are rebased
-                  // and we will get wrong result if we don't rebase while reading.
-                  withSQLConf("spark.test.forceNoRebase" -> "true") {
-                    checkAnswer(
-                        spark.read.parquet(path).select($"ts".cast("string")),
-                        Seq.tabulate(N)(_ => Row(nonRebased)))
+                      // Force to not rebase to prove the written datetime values are rebased
+                      // and we will get wrong result if we don't rebase while reading.
+                      withSQLConf("spark.test.forceNoRebase" -> "true") {
+                        checkAnswer(
+                          spark.read.parquet(path).select($"ts".cast("string")),
+                          Seq.tabulate(N)(_ => Row(nonRebased)))
+                      }
                     }
                   }
-                  }
                 }
               }
             }

From 2e95c6f28d012c88c691ccd28cb04674461ff782 Mon Sep 17 00:00:00 2001
From: Sajith Ariyarathna <sajith.janaprasad@gmail.com>
Date: Wed, 19 Jan 2022 12:19:57 -0600
Subject: [PATCH 055/513] [SPARK-37934][BUILD] Upgrade Jetty version to 9.4.44

### What changes were proposed in this pull request?
This PR upgrades Jetty version to `9.4.44.v20210927`.

### Why are the changes needed?
We would like to have the fix for https://github.com/eclipse/jetty.project/issues/6973 in latest Spark.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
CI

Closes #35230 from this/upgrade-jetty-9.4.44.

Authored-by: Sajith Ariyarathna <sajith.janaprasad@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +-
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 4 ++--
 pom.xml                               | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index 0227c7653a93b..09b01a3524e22 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -145,7 +145,7 @@ jersey-hk2/2.34//jersey-hk2-2.34.jar
 jersey-server/2.34//jersey-server-2.34.jar
 jetty-sslengine/6.1.26//jetty-sslengine-6.1.26.jar
 jetty-util/6.1.26//jetty-util-6.1.26.jar
-jetty-util/9.4.43.v20210629//jetty-util-9.4.43.v20210629.jar
+jetty-util/9.4.44.v20210927//jetty-util-9.4.44.v20210927.jar
 jetty/6.1.26//jetty-6.1.26.jar
 jline/2.14.6//jline-2.14.6.jar
 joda-time/2.10.12//joda-time-2.10.12.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index afa4ba5e1f28b..b7cc91f9e0dfd 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -133,8 +133,8 @@ jersey-container-servlet/2.34//jersey-container-servlet-2.34.jar
 jersey-hk2/2.34//jersey-hk2-2.34.jar
 jersey-server/2.34//jersey-server-2.34.jar
 jettison/1.1//jettison-1.1.jar
-jetty-util-ajax/9.4.43.v20210629//jetty-util-ajax-9.4.43.v20210629.jar
-jetty-util/9.4.43.v20210629//jetty-util-9.4.43.v20210629.jar
+jetty-util-ajax/9.4.44.v20210927//jetty-util-ajax-9.4.44.v20210927.jar
+jetty-util/9.4.44.v20210927//jetty-util-9.4.44.v20210927.jar
 jline/2.14.6//jline-2.14.6.jar
 joda-time/2.10.12//joda-time-2.10.12.jar
 jodd-core/3.5.2//jodd-core-3.5.2.jar
diff --git a/pom.xml b/pom.xml
index 50871131c1da2..4f53da0f94a4a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -138,7 +138,7 @@
     <derby.version>10.14.2.0</derby.version>
     <parquet.version>1.12.2</parquet.version>
     <orc.version>1.7.2</orc.version>
-    <jetty.version>9.4.43.v20210629</jetty.version>
+    <jetty.version>9.4.44.v20210927</jetty.version>
     <jakartaservlet.version>4.0.3</jakartaservlet.version>
     <chill.version>0.10.0</chill.version>
     <ivy.version>2.5.0</ivy.version>

From 9ffca0c33f4f0e585daa6a649b67937952bcfbd9 Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Wed, 19 Jan 2022 14:07:30 -0800
Subject: [PATCH 056/513] [SPARK-37957][SQL] Correctly pass deterministic flag
 for V2 scalar functions

### What changes were proposed in this pull request?

Pass `isDeterministic` flag to `ApplyFunctionExpression`, `Invoke` and `StaticInvoke` when processing V2 scalar functions.

### Why are the changes needed?

A V2 scalar function can be declared as non-deterministic. However, currently Spark doesn't pass the flag when converting the V2 function to a catalyst expression, which could lead to incorrect results if being applied with certain optimizations.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added a unit test.

Closes #35243 from sunchao/SPARK-37957.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Chao Sun <sunchao@apple.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |   6 +-
 .../expressions/ApplyFunctionExpression.scala |   2 +
 .../expressions/objects/objects.scala         |  12 +-
 .../catalog/functions/JavaLongAdd.java        |   2 +-
 .../catalog/functions/JavaRandomAdd.java      | 110 ++++++++++++++++++
 .../catalog/functions/JavaStrLen.java         |   2 +-
 .../connector/DataSourceV2FunctionSuite.scala |  33 +++++-
 7 files changed, 158 insertions(+), 9 deletions(-)
 create mode 100644 sql/core/src/test/java/test/org/apache/spark/sql/connector/catalog/functions/JavaRandomAdd.java

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 182e5997ec34b..a6c6036520d53 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2274,12 +2274,14 @@ class Analyzer(override val catalogManager: CatalogManager)
           case Some(m) if Modifier.isStatic(m.getModifiers) =>
             StaticInvoke(scalarFunc.getClass, scalarFunc.resultType(),
               MAGIC_METHOD_NAME, arguments, inputTypes = declaredInputTypes,
-                propagateNull = false, returnNullable = scalarFunc.isResultNullable)
+                propagateNull = false, returnNullable = scalarFunc.isResultNullable,
+                isDeterministic = scalarFunc.isDeterministic)
           case Some(_) =>
             val caller = Literal.create(scalarFunc, ObjectType(scalarFunc.getClass))
             Invoke(caller, MAGIC_METHOD_NAME, scalarFunc.resultType(),
               arguments, methodInputTypes = declaredInputTypes, propagateNull = false,
-              returnNullable = scalarFunc.isResultNullable)
+              returnNullable = scalarFunc.isResultNullable,
+              isDeterministic = scalarFunc.isDeterministic)
           case _ =>
             // TODO: handle functions defined in Scala too - in Scala, even if a
             //  subclass do not override the default method in parent interface
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ApplyFunctionExpression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ApplyFunctionExpression.scala
index b33b9ed57f112..da4000f53e3e8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ApplyFunctionExpression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ApplyFunctionExpression.scala
@@ -31,6 +31,8 @@ case class ApplyFunctionExpression(
   override def name: String = function.name()
   override def dataType: DataType = function.resultType()
   override def inputTypes: Seq[AbstractDataType] = function.inputTypes().toSeq
+  override lazy val deterministic: Boolean = function.isDeterministic &&
+      children.forall(_.deterministic)
 
   private lazy val reusedRow = new SpecificInternalRow(function.inputTypes())
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 50e214011b616..6d251b6d1007d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -240,6 +240,8 @@ object SerializerSupport {
  *                      without invoking the function.
  * @param returnNullable When false, indicating the invoked method will always return
  *                       non-null value.
+ * @param isDeterministic Whether the method invocation is deterministic or not. If false, Spark
+ *                        will not apply certain optimizations such as constant folding.
  */
 case class StaticInvoke(
     staticObject: Class[_],
@@ -248,7 +250,8 @@ case class StaticInvoke(
     arguments: Seq[Expression] = Nil,
     inputTypes: Seq[AbstractDataType] = Nil,
     propagateNull: Boolean = true,
-    returnNullable: Boolean = true) extends InvokeLike {
+    returnNullable: Boolean = true,
+    isDeterministic: Boolean = true) extends InvokeLike {
 
   val objectName = staticObject.getName.stripSuffix("$")
   val cls = if (staticObject.getName == objectName) {
@@ -259,6 +262,7 @@ case class StaticInvoke(
 
   override def nullable: Boolean = needNullCheck || returnNullable
   override def children: Seq[Expression] = arguments
+  override lazy val deterministic: Boolean = isDeterministic && arguments.forall(_.deterministic)
 
   lazy val argClasses = ScalaReflection.expressionJavaClasses(arguments)
   @transient lazy val method = findMethod(cls, functionName, argClasses)
@@ -340,6 +344,8 @@ case class StaticInvoke(
  *                      without invoking the function.
  * @param returnNullable When false, indicating the invoked method will always return
  *                       non-null value.
+ * @param isDeterministic Whether the method invocation is deterministic or not. If false, Spark
+ *                        will not apply certain optimizations such as constant folding.
  */
 case class Invoke(
     targetObject: Expression,
@@ -348,12 +354,14 @@ case class Invoke(
     arguments: Seq[Expression] = Nil,
     methodInputTypes: Seq[AbstractDataType] = Nil,
     propagateNull: Boolean = true,
-    returnNullable : Boolean = true) extends InvokeLike {
+    returnNullable : Boolean = true,
+    isDeterministic: Boolean = true) extends InvokeLike {
 
   lazy val argClasses = ScalaReflection.expressionJavaClasses(arguments)
 
   override def nullable: Boolean = targetObject.nullable || needNullCheck || returnNullable
   override def children: Seq[Expression] = targetObject +: arguments
+  override lazy val deterministic: Boolean = isDeterministic && arguments.forall(_.deterministic)
   override def inputTypes: Seq[AbstractDataType] =
     if (methodInputTypes.nonEmpty) {
       Seq(targetObject.dataType) ++ methodInputTypes
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/connector/catalog/functions/JavaLongAdd.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/catalog/functions/JavaLongAdd.java
index e5b9c7f5bafaa..75ef5275684d6 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/connector/catalog/functions/JavaLongAdd.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/catalog/functions/JavaLongAdd.java
@@ -66,7 +66,7 @@ public String description() {
     return "long_add";
   }
 
-  private abstract static class JavaLongAddBase implements ScalarFunction<Long> {
+  public abstract static class JavaLongAddBase implements ScalarFunction<Long> {
     private final boolean isResultNullable;
 
     JavaLongAddBase(boolean isResultNullable) {
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/connector/catalog/functions/JavaRandomAdd.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/catalog/functions/JavaRandomAdd.java
new file mode 100644
index 0000000000000..b315fafd8ece8
--- /dev/null
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/catalog/functions/JavaRandomAdd.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.org.apache.spark.sql.connector.catalog.functions;
+
+import java.util.Random;
+
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.connector.catalog.functions.BoundFunction;
+import org.apache.spark.sql.connector.catalog.functions.ScalarFunction;
+import org.apache.spark.sql.connector.catalog.functions.UnboundFunction;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.IntegerType;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * Test V2 function which add a random number to the input integer.
+ */
+public class JavaRandomAdd implements UnboundFunction {
+  private final BoundFunction fn;
+
+  public JavaRandomAdd(BoundFunction fn) {
+    this.fn = fn;
+  }
+
+  @Override
+  public String name() {
+    return "rand";
+  }
+
+  @Override
+  public BoundFunction bind(StructType inputType) {
+    if (inputType.fields().length != 1) {
+      throw new UnsupportedOperationException("Expect exactly one argument");
+    }
+    if (inputType.fields()[0].dataType() instanceof IntegerType) {
+      return fn;
+    }
+    throw new UnsupportedOperationException("Expect IntegerType");
+  }
+
+  @Override
+  public String description() {
+    return "rand_add: add a random integer to the input\n" +
+      "rand_add(int) -> int";
+  }
+
+  public abstract static class JavaRandomAddBase implements ScalarFunction<Integer> {
+    @Override
+    public DataType[] inputTypes() {
+      return new DataType[] { DataTypes.IntegerType };
+    }
+
+    @Override
+    public DataType resultType() {
+      return DataTypes.IntegerType;
+    }
+
+    @Override
+    public String name() {
+      return "rand_add";
+    }
+
+    @Override
+    public boolean isDeterministic() {
+      return false;
+    }
+  }
+
+  public static class JavaRandomAddDefault extends JavaRandomAddBase {
+    private final Random rand = new Random();
+
+    @Override
+    public Integer produceResult(InternalRow input) {
+      return input.getInt(0) + rand.nextInt();
+    }
+  }
+
+  public static class JavaRandomAddMagic extends JavaRandomAddBase {
+    private final Random rand = new Random();
+
+    public int invoke(int input) {
+      return input + rand.nextInt();
+    }
+  }
+
+  public static class JavaRandomAddStaticMagic extends JavaRandomAddBase {
+    private static final Random rand = new Random();
+
+    public static int invoke(int input) {
+      return input + rand.nextInt();
+    }
+  }
+}
+
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/connector/catalog/functions/JavaStrLen.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/catalog/functions/JavaStrLen.java
index 1b1689668e1f6..dade2a113ef45 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/connector/catalog/functions/JavaStrLen.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/catalog/functions/JavaStrLen.java
@@ -49,7 +49,7 @@ public BoundFunction bind(StructType inputType) {
       return fn;
     }
 
-    throw new UnsupportedOperationException("Except StringType");
+    throw new UnsupportedOperationException("Expect StringType");
   }
 
   @Override
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2FunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2FunctionSuite.scala
index 3277cd69a0e93..a1463523d38ff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2FunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2FunctionSuite.scala
@@ -20,16 +20,18 @@ package org.apache.spark.sql.connector
 import java.util
 import java.util.Collections
 
-import test.org.apache.spark.sql.connector.catalog.functions.{JavaAverage, JavaLongAdd, JavaStrLen}
-import test.org.apache.spark.sql.connector.catalog.functions.JavaLongAdd.{JavaLongAddDefault, JavaLongAddMagic, JavaLongAddMismatchMagic, JavaLongAddStaticMagic}
+import test.org.apache.spark.sql.connector.catalog.functions._
+import test.org.apache.spark.sql.connector.catalog.functions.JavaLongAdd._
+import test.org.apache.spark.sql.connector.catalog.functions.JavaRandomAdd._
 import test.org.apache.spark.sql.connector.catalog.functions.JavaStrLen._
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.{AnalysisException, DataFrame, Row}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode.{FALLBACK, NO_CODEGEN}
 import org.apache.spark.sql.connector.catalog.{BasicInMemoryTableCatalog, Identifier, InMemoryCatalog, SupportsNamespaces}
 import org.apache.spark.sql.connector.catalog.functions.{AggregateFunction, _}
+import org.apache.spark.sql.execution.ProjectExec
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -428,6 +430,31 @@ class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
     }
   }
 
+  test("SPARK-37957: pass deterministic flag when creating V2 function expression") {
+    def checkDeterministic(df: DataFrame): Unit = {
+      val result = df.queryExecution.executedPlan.find(_.isInstanceOf[ProjectExec])
+      assert(result.isDefined, s"Expect to find ProjectExec")
+      assert(!result.get.asInstanceOf[ProjectExec].projectList.exists(_.deterministic),
+        "Expect expressions in projectList to be non-deterministic")
+    }
+
+    catalog("testcat").asInstanceOf[SupportsNamespaces].createNamespace(Array("ns"), emptyProps)
+    Seq(new JavaRandomAddDefault, new JavaRandomAddMagic,
+        new JavaRandomAddStaticMagic).foreach { fn =>
+      addFunction(Identifier.of(Array("ns"), "rand_add"), new JavaRandomAdd(fn))
+      checkDeterministic(sql("SELECT testcat.ns.rand_add(42)"))
+    }
+
+    // A function call is non-deterministic if one of its arguments is non-deterministic
+    Seq(new JavaLongAddDefault(true), new JavaLongAddMagic(true),
+        new JavaLongAddStaticMagic(true)).foreach { fn =>
+      addFunction(Identifier.of(Array("ns"), "add"), new JavaLongAdd(fn))
+      addFunction(Identifier.of(Array("ns"), "rand_add"),
+        new JavaRandomAdd(new JavaRandomAddDefault))
+      checkDeterministic(sql("SELECT testcat.ns.add(10, testcat.ns.rand_add(42))"))
+    }
+  }
+
   private case class StrLen(impl: BoundFunction) extends UnboundFunction {
     override def description(): String =
       """strlen: returns the length of the input string

From fcc5c34c546a45a32ababdd41932c620de9bc969 Mon Sep 17 00:00:00 2001
From: Jiaan Geng <beliefer@163.com>
Date: Thu, 20 Jan 2022 12:13:00 +0800
Subject: [PATCH 057/513] [SPARK-37839][SQL] DS V2 supports partial aggregate
 push-down `AVG`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
`max`，`min`，`count`，`sum`，`avg` are the most commonly used aggregation functions.
Currently, DS V2 supports complete aggregate push-down of `avg`. But, supports partial aggregate push-down of `avg` is very useful.

The aggregate push-down algorithm is:

1. Spark translates group expressions of `Aggregate` to DS V2 `Aggregation`.
2. Spark calls `supportCompletePushDown` to check if it can completely push down aggregate.
3. If `supportCompletePushDown` returns true, we preserves the aggregate expressions as final aggregate expressions. Otherwise, we split `AVG` into 2 functions: `SUM` and `COUNT`.
4. Spark translates final aggregate expressions and group expressions of `Aggregate` to DS V2 `Aggregation` again, and pushes the `Aggregation` to JDBC source.
5. Spark constructs the final aggregate.

### Why are the changes needed?
DS V2 supports partial aggregate push-down `AVG`

### Does this PR introduce _any_ user-facing change?
'Yes'. DS V2 could partial aggregate push-down `AVG`

### How was this patch tested?
New tests.

Closes #35130 from beliefer/SPARK-37839.

Authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../connector/expressions/aggregate/Avg.java  |  49 ++++++++
 .../aggregate/GeneralAggregateFunc.java       |   1 -
 .../expressions/aggregate/Average.scala       |   2 +-
 .../datasources/DataSourceStrategy.scala      |  29 ++++-
 .../datasources/v2/PushDownUtils.scala        |  36 +-----
 .../v2/V2ScanRelationPushDown.scala           | 106 ++++++++++++++----
 .../apache/spark/sql/jdbc/JdbcDialects.scala  |  11 +-
 .../apache/spark/sql/jdbc/JDBCV2Suite.scala   |  82 +++++++++++++-
 8 files changed, 249 insertions(+), 67 deletions(-)
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Avg.java

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Avg.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Avg.java
new file mode 100644
index 0000000000000..5e10ec9ee1644
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Avg.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.expressions.aggregate;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.expressions.NamedReference;
+
+/**
+ * An aggregate function that returns the mean of all the values in a group.
+ *
+ * @since 3.3.0
+ */
+@Evolving
+public final class Avg implements AggregateFunc {
+  private final NamedReference column;
+  private final boolean isDistinct;
+
+  public Avg(NamedReference column, boolean isDistinct) {
+    this.column = column;
+    this.isDistinct = isDistinct;
+  }
+
+  public NamedReference column() { return column; }
+  public boolean isDistinct() { return isDistinct; }
+
+  @Override
+  public String toString() {
+    if (isDistinct) {
+      return "AVG(DISTINCT " + column.describe() + ")";
+    } else {
+      return "AVG(" + column.describe() + ")";
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/GeneralAggregateFunc.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/GeneralAggregateFunc.java
index 32615e201643b..0ff26c8875b7a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/GeneralAggregateFunc.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/GeneralAggregateFunc.java
@@ -31,7 +31,6 @@
  * <p>
  * The currently supported SQL aggregate functions:
  * <ol>
- *  <li><pre>AVG(input1)</pre> Since 3.3.0</li>
  *  <li><pre>VAR_POP(input1)</pre> Since 3.3.0</li>
  *  <li><pre>VAR_SAMP(input1)</pre> Since 3.3.0</li>
  *  <li><pre>STDDEV_POP(input1)</pre> Since 3.3.0</li>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
index 9714a096a69a2..05f7edaeb5d48 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
@@ -69,7 +69,7 @@ case class Average(
     case _ => DoubleType
   }
 
-  private lazy val sumDataType = child.dataType match {
+  lazy val sumDataType = child.dataType match {
     case _ @ DecimalType.Fixed(p, s) => DecimalType.bounded(p + 10, s)
     case _: YearMonthIntervalType => YearMonthIntervalType()
     case _: DayTimeIntervalType => DayTimeIntervalType()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index e734de32d232f..ecde8a0bc8fb7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -41,7 +41,7 @@ import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
 import org.apache.spark.sql.connector.catalog.SupportsRead
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.connector.expressions.{FieldReference, NullOrdering, SortDirection, SortOrder => SortOrderV2, SortValue}
-import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Count, CountStar, GeneralAggregateFunc, Max, Min, Sum}
+import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Aggregation, Avg, Count, CountStar, GeneralAggregateFunc, Max, Min, Sum}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.{InSubqueryExec, RowDataSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.command._
@@ -720,7 +720,7 @@ object DataSourceStrategy
         case aggregate.Sum(PushableColumnWithoutNestedColumn(name), _) =>
           Some(new Sum(FieldReference.column(name), agg.isDistinct))
         case aggregate.Average(PushableColumnWithoutNestedColumn(name), _) =>
-          Some(new GeneralAggregateFunc("AVG", agg.isDistinct, Array(FieldReference.column(name))))
+          Some(new Avg(FieldReference.column(name), agg.isDistinct))
         case aggregate.VariancePop(PushableColumnWithoutNestedColumn(name), _) =>
           Some(new GeneralAggregateFunc(
             "VAR_POP", agg.isDistinct, Array(FieldReference.column(name))))
@@ -752,6 +752,31 @@ object DataSourceStrategy
     }
   }
 
+  /**
+   * Translate aggregate expressions and group by expressions.
+   *
+   * @return translated aggregation.
+   */
+  protected[sql] def translateAggregation(
+      aggregates: Seq[AggregateExpression], groupBy: Seq[Expression]): Option[Aggregation] = {
+
+    def columnAsString(e: Expression): Option[FieldReference] = e match {
+      case PushableColumnWithoutNestedColumn(name) =>
+        Some(FieldReference.column(name).asInstanceOf[FieldReference])
+      case _ => None
+    }
+
+    val translatedAggregates = aggregates.flatMap(translateAggregate)
+    val translatedGroupBys = groupBy.flatMap(columnAsString)
+
+    if (translatedAggregates.length != aggregates.length ||
+      translatedGroupBys.length != groupBy.length) {
+      return None
+    }
+
+    Some(new Aggregation(translatedAggregates.toArray, translatedGroupBys.toArray))
+  }
+
   protected[sql] def translateSortOrders(sortOrders: Seq[SortOrder]): Seq[SortOrderV2] = {
     def translateOortOrder(sortOrder: SortOrder): Option[SortOrderV2] = sortOrder match {
       case SortOrder(PushableColumnWithoutNestedColumn(name), directionV1, nullOrderingV1, _) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
index 29d86b67b28ec..9953658b65488 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
@@ -20,13 +20,11 @@ package org.apache.spark.sql.execution.datasources.v2
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, Expression, NamedExpression, PredicateHelper, SchemaPruning}
-import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
-import org.apache.spark.sql.connector.expressions.{FieldReference, SortOrder}
-import org.apache.spark.sql.connector.expressions.aggregate.Aggregation
+import org.apache.spark.sql.connector.expressions.SortOrder
 import org.apache.spark.sql.connector.expressions.filter.{Filter => V2Filter}
-import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownAggregates, SupportsPushDownFilters, SupportsPushDownLimit, SupportsPushDownRequiredColumns, SupportsPushDownTableSample, SupportsPushDownTopN, SupportsPushDownV2Filters}
-import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, PushableColumnWithoutNestedColumn}
+import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownFilters, SupportsPushDownLimit, SupportsPushDownRequiredColumns, SupportsPushDownTableSample, SupportsPushDownTopN, SupportsPushDownV2Filters}
+import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources
 import org.apache.spark.sql.types.StructType
@@ -106,34 +104,6 @@ object PushDownUtils extends PredicateHelper {
     }
   }
 
-  /**
-   * Pushes down aggregates to the data source reader
-   *
-   * @return pushed aggregation.
-   */
-  def pushAggregates(
-      scanBuilder: SupportsPushDownAggregates,
-      aggregates: Seq[AggregateExpression],
-      groupBy: Seq[Expression]): Option[Aggregation] = {
-
-    def columnAsString(e: Expression): Option[FieldReference] = e match {
-      case PushableColumnWithoutNestedColumn(name) =>
-        Some(FieldReference.column(name).asInstanceOf[FieldReference])
-      case _ => None
-    }
-
-    val translatedAggregates = aggregates.flatMap(DataSourceStrategy.translateAggregate)
-    val translatedGroupBys = groupBy.flatMap(columnAsString)
-
-    if (translatedAggregates.length != aggregates.length ||
-      translatedGroupBys.length != groupBy.length) {
-      return None
-    }
-
-    val agg = new Aggregation(translatedAggregates.toArray, translatedGroupBys.toArray)
-    Some(agg).filter(scanBuilder.pushAggregation)
-  }
-
   /**
    * Pushes down TableSample to the data source Scan
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
index 3437dcba5e65f..3ff917664b486 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
@@ -19,18 +19,18 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import scala.collection.mutable
 
-import org.apache.spark.sql.catalyst.expressions.{Alias, And, Attribute, AttributeReference, Cast, Expression, IntegerLiteral, NamedExpression, PredicateHelper, ProjectionOverSchema, SubqueryExpression}
+import org.apache.spark.sql.catalyst.expressions.{Alias, And, Attribute, AttributeReference, Cast, Divide, DivideDTInterval, DivideYMInterval, EqualTo, Expression, If, IntegerLiteral, Literal, NamedExpression, PredicateHelper, ProjectionOverSchema, SubqueryExpression}
 import org.apache.spark.sql.catalyst.expressions.aggregate
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.planning.ScanOperation
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, LeafNode, Limit, LocalLimit, LogicalPlan, Project, Sample, Sort}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.expressions.SortOrder
-import org.apache.spark.sql.connector.expressions.aggregate.{Aggregation, GeneralAggregateFunc}
+import org.apache.spark.sql.connector.expressions.aggregate.{Aggregation, Avg, GeneralAggregateFunc}
 import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownAggregates, SupportsPushDownFilters, V1Scan}
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.sources
-import org.apache.spark.sql.types.{DataType, LongType, StructType}
+import org.apache.spark.sql.types.{DataType, DayTimeIntervalType, LongType, StructType, YearMonthIntervalType}
 import org.apache.spark.sql.util.SchemaUtils._
 
 object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper {
@@ -97,25 +97,66 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper {
           sHolder.builder match {
             case r: SupportsPushDownAggregates =>
               val aggExprToOutputOrdinal = mutable.HashMap.empty[Expression, Int]
-              var ordinal = 0
-              val aggregates = resultExpressions.flatMap { expr =>
-                expr.collect {
-                  // Do not push down duplicated aggregate expressions. For example,
-                  // `SELECT max(a) + 1, max(a) + 2 FROM ...`, we should only push down one
-                  // `max(a)` to the data source.
-                  case agg: AggregateExpression
-                      if !aggExprToOutputOrdinal.contains(agg.canonicalized) =>
-                    aggExprToOutputOrdinal(agg.canonicalized) = ordinal
-                    ordinal += 1
-                    agg
-                }
-              }
+              val aggregates = collectAggregates(resultExpressions, aggExprToOutputOrdinal)
               val normalizedAggregates = DataSourceStrategy.normalizeExprs(
                 aggregates, sHolder.relation.output).asInstanceOf[Seq[AggregateExpression]]
               val normalizedGroupingExpressions = DataSourceStrategy.normalizeExprs(
                 groupingExpressions, sHolder.relation.output)
-              val pushedAggregates = PushDownUtils.pushAggregates(
-                r, normalizedAggregates, normalizedGroupingExpressions)
+              val translatedAggregates = DataSourceStrategy.translateAggregation(
+                normalizedAggregates, normalizedGroupingExpressions)
+              val (finalResultExpressions, finalAggregates, finalTranslatedAggregates) = {
+                if (translatedAggregates.isEmpty ||
+                  r.supportCompletePushDown(translatedAggregates.get) ||
+                  translatedAggregates.get.aggregateExpressions().forall(!_.isInstanceOf[Avg])) {
+                  (resultExpressions, aggregates, translatedAggregates)
+                } else {
+                  // scalastyle:off
+                  // The data source doesn't support the complete push-down of this aggregation.
+                  // Here we translate `AVG` to `SUM / COUNT`, so that it's more likely to be
+                  // pushed, completely or partially.
+                  // e.g. TABLE t (c1 INT, c2 INT, c3 INT)
+                  // SELECT avg(c1) FROM t GROUP BY c2;
+                  // The original logical plan is
+                  // Aggregate [c2#10],[avg(c1#9) AS avg(c1)#19]
+                  // +- ScanOperation[...]
+                  //
+                  // After convert avg(c1#9) to sum(c1#9)/count(c1#9)
+                  // we have the following
+                  // Aggregate [c2#10],[sum(c1#9)/count(c1#9) AS avg(c1)#19]
+                  // +- ScanOperation[...]
+                  // scalastyle:on
+                  val newResultExpressions = resultExpressions.map { expr =>
+                    expr.transform {
+                      case AggregateExpression(avg: aggregate.Average, _, isDistinct, _, _) =>
+                        val sum = aggregate.Sum(avg.child).toAggregateExpression(isDistinct)
+                        val count = aggregate.Count(avg.child).toAggregateExpression(isDistinct)
+                        // Closely follow `Average.evaluateExpression`
+                        avg.dataType match {
+                          case _: YearMonthIntervalType =>
+                            If(EqualTo(count, Literal(0L)),
+                              Literal(null, YearMonthIntervalType()), DivideYMInterval(sum, count))
+                          case _: DayTimeIntervalType =>
+                            If(EqualTo(count, Literal(0L)),
+                              Literal(null, DayTimeIntervalType()), DivideDTInterval(sum, count))
+                          case _ =>
+                            // TODO deal with the overflow issue
+                            Divide(addCastIfNeeded(sum, avg.dataType),
+                              addCastIfNeeded(count, avg.dataType), false)
+                        }
+                    }
+                  }.asInstanceOf[Seq[NamedExpression]]
+                  // Because aggregate expressions changed, translate them again.
+                  aggExprToOutputOrdinal.clear()
+                  val newAggregates =
+                    collectAggregates(newResultExpressions, aggExprToOutputOrdinal)
+                  val newNormalizedAggregates = DataSourceStrategy.normalizeExprs(
+                    newAggregates, sHolder.relation.output).asInstanceOf[Seq[AggregateExpression]]
+                  (newResultExpressions, newAggregates, DataSourceStrategy.translateAggregation(
+                    newNormalizedAggregates, normalizedGroupingExpressions))
+                }
+              }
+
+              val pushedAggregates = finalTranslatedAggregates.filter(r.pushAggregation)
               if (pushedAggregates.isEmpty) {
                 aggNode // return original plan node
               } else if (!supportPartialAggPushDown(pushedAggregates.get) &&
@@ -138,7 +179,7 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper {
                 // +- RelationV2[c2#10, min(c1)#21, max(c1)#22]
                 // scalastyle:on
                 val newOutput = scan.readSchema().toAttributes
-                assert(newOutput.length == groupingExpressions.length + aggregates.length)
+                assert(newOutput.length == groupingExpressions.length + finalAggregates.length)
                 val groupAttrs = normalizedGroupingExpressions.zip(newOutput).map {
                   case (a: Attribute, b: Attribute) => b.withExprId(a.exprId)
                   case (_, b) => b
@@ -173,7 +214,7 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper {
                   Project(projectExpressions, scanRelation)
                 } else {
                   val plan = Aggregate(
-                    output.take(groupingExpressions.length), resultExpressions, scanRelation)
+                    output.take(groupingExpressions.length), finalResultExpressions, scanRelation)
 
                   // scalastyle:off
                   // Change the optimized logical plan to reflect the pushed down aggregate
@@ -219,16 +260,33 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper {
       }
   }
 
+  private def collectAggregates(resultExpressions: Seq[NamedExpression],
+      aggExprToOutputOrdinal: mutable.HashMap[Expression, Int]): Seq[AggregateExpression] = {
+    var ordinal = 0
+    resultExpressions.flatMap { expr =>
+      expr.collect {
+        // Do not push down duplicated aggregate expressions. For example,
+        // `SELECT max(a) + 1, max(a) + 2 FROM ...`, we should only push down one
+        // `max(a)` to the data source.
+        case agg: AggregateExpression
+          if !aggExprToOutputOrdinal.contains(agg.canonicalized) =>
+          aggExprToOutputOrdinal(agg.canonicalized) = ordinal
+          ordinal += 1
+          agg
+      }
+    }
+  }
+
   private def supportPartialAggPushDown(agg: Aggregation): Boolean = {
     // We don't know the agg buffer of `GeneralAggregateFunc`, so can't do partial agg push down.
     agg.aggregateExpressions().forall(!_.isInstanceOf[GeneralAggregateFunc])
   }
 
-  private def addCastIfNeeded(aggAttribute: AttributeReference, aggDataType: DataType) =
-    if (aggAttribute.dataType == aggDataType) {
-      aggAttribute
+  private def addCastIfNeeded(expression: Expression, expectedDataType: DataType) =
+    if (expression.dataType == expectedDataType) {
+      expression
     } else {
-      Cast(aggAttribute, aggDataType)
+      Cast(expression, expectedDataType)
     }
 
   def pruneColumns(plan: LogicalPlan): LogicalPlan = plan.transform {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 344842d30b232..7b8b362e64c6d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.connector.catalog.TableChange
 import org.apache.spark.sql.connector.catalog.TableChange._
 import org.apache.spark.sql.connector.catalog.index.TableIndex
 import org.apache.spark.sql.connector.expressions.NamedReference
-import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Count, CountStar, GeneralAggregateFunc, Max, Min, Sum}
+import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Avg, Count, CountStar, Max, Min, Sum}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
 import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
@@ -220,10 +220,11 @@ abstract class JdbcDialect extends Serializable with Logging{
         Some(s"SUM($distinct$column)")
       case _: CountStar =>
         Some("COUNT(*)")
-      case f: GeneralAggregateFunc if f.name() == "AVG" =>
-        assert(f.inputs().length == 1)
-        val distinct = if (f.isDistinct) "DISTINCT " else ""
-        Some(s"AVG($distinct${f.inputs().head})")
+      case avg: Avg =>
+        if (avg.column.fieldNames.length != 1) return None
+        val distinct = if (avg.isDistinct) "DISTINCT " else ""
+        val column = quoteIdentifier(avg.column.fieldNames.head)
+        Some(s"AVG($distinct$column)")
       case _ => None
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
index c5e1a6ace7029..eadc2fb9e882d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Sort}
 import org.apache.spark.sql.connector.expressions.{FieldReference, NullOrdering, SortDirection, SortValue}
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanRelation, V1ScanWrapper}
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
-import org.apache.spark.sql.functions.{lit, sum, udf}
+import org.apache.spark.sql.functions.{avg, count, lit, sum, udf}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.util.Utils
 
@@ -874,4 +874,84 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     checkAnswer(df, Seq(Row(2)))
     // scalastyle:on
   }
+
+  test("scan with aggregate push-down: complete push-down SUM, AVG, COUNT") {
+    val df = spark.read
+      .option("partitionColumn", "dept")
+      .option("lowerBound", "0")
+      .option("upperBound", "2")
+      .option("numPartitions", "1")
+      .table("h2.test.employee")
+      .agg(sum($"SALARY").as("sum"), avg($"SALARY").as("avg"), count($"SALARY").as("count"))
+    checkAggregateRemoved(df)
+    df.queryExecution.optimizedPlan.collect {
+      case _: DataSourceV2ScanRelation =>
+        val expected_plan_fragment =
+          "PushedAggregates: [SUM(SALARY), AVG(SALARY), COUNT(SALARY)]"
+        checkKeywordsExistsInExplain(df, expected_plan_fragment)
+    }
+    checkAnswer(df, Seq(Row(53000.00, 10600.000000, 5)))
+
+    val df2 = spark.read
+      .option("partitionColumn", "dept")
+      .option("lowerBound", "0")
+      .option("upperBound", "2")
+      .option("numPartitions", "1")
+      .table("h2.test.employee")
+      .groupBy($"name")
+      .agg(sum($"SALARY").as("sum"), avg($"SALARY").as("avg"), count($"SALARY").as("count"))
+    checkAggregateRemoved(df)
+    df.queryExecution.optimizedPlan.collect {
+      case _: DataSourceV2ScanRelation =>
+        val expected_plan_fragment =
+          "PushedAggregates: [SUM(SALARY), AVG(SALARY), COUNT(SALARY)]"
+        checkKeywordsExistsInExplain(df, expected_plan_fragment)
+    }
+    checkAnswer(df2, Seq(
+      Row("alex", 12000.00, 12000.000000, 1),
+      Row("amy", 10000.00, 10000.000000, 1),
+      Row("cathy", 9000.00, 9000.000000, 1),
+      Row("david", 10000.00, 10000.000000, 1),
+      Row("jen", 12000.00, 12000.000000, 1)))
+  }
+
+  test("scan with aggregate push-down: partial push-down SUM, AVG, COUNT") {
+    val df = spark.read
+      .option("partitionColumn", "dept")
+      .option("lowerBound", "0")
+      .option("upperBound", "2")
+      .option("numPartitions", "2")
+      .table("h2.test.employee")
+      .agg(sum($"SALARY").as("sum"), avg($"SALARY").as("avg"), count($"SALARY").as("count"))
+    checkAggregateRemoved(df, false)
+    df.queryExecution.optimizedPlan.collect {
+      case _: DataSourceV2ScanRelation =>
+        val expected_plan_fragment =
+          "PushedAggregates: [SUM(SALARY), COUNT(SALARY)]"
+        checkKeywordsExistsInExplain(df, expected_plan_fragment)
+    }
+    checkAnswer(df, Seq(Row(53000.00, 10600.000000, 5)))
+
+    val df2 = spark.read
+      .option("partitionColumn", "dept")
+      .option("lowerBound", "0")
+      .option("upperBound", "2")
+      .option("numPartitions", "2")
+      .table("h2.test.employee")
+      .groupBy($"name")
+      .agg(sum($"SALARY").as("sum"), avg($"SALARY").as("avg"), count($"SALARY").as("count"))
+    checkAggregateRemoved(df, false)
+    df.queryExecution.optimizedPlan.collect {
+      case _: DataSourceV2ScanRelation =>
+        val expected_plan_fragment =
+          "PushedAggregates: [SUM(SALARY), COUNT(SALARY)]"
+        checkKeywordsExistsInExplain(df, expected_plan_fragment)
+    }
+    checkAnswer(df2, Seq(
+      Row("alex", 12000.00, 12000.000000, 1),
+      Row("amy", 10000.00, 10000.000000, 1),
+      Row("cathy", 9000.00, 9000.000000, 1),
+      Row("david", 10000.00, 10000.000000, 1),
+      Row("jen", 12000.00, 12000.000000, 1)))
+  }
 }

From 15464e37a19ee99147550bab96d2674fb05d06df Mon Sep 17 00:00:00 2001
From: PengLei <peng.8lei@gmail.com>
Date: Thu, 20 Jan 2022 16:24:58 +0800
Subject: [PATCH 058/513] [SPARK-37931][SQL] Quote the column name if
 neededQuote the column name if needed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
Quote the column name just needed instead of anyway.

### Why are the changes needed?
[#comments](https://github.com/apache/spark/pull/35204#discussion_r785725545)

### Does this PR introduce _any_ user-facing change?
Yes，It will change the result that users get the schema
eg:
```
"STRUCT<`_c0`: STRING, `_c1`: INT>"  => "STRUCT<_c0: STRING, _c1: INT>"
```
At now. for end-user. I learn about the 3 way to get schema directly
1. the function: eg
 ```
schema_of_json
schema_of_csv
```
2. table schema
    df.schema or show create table
3. call `toDDL` for StructType or StructField.

### How was this patch tested?
existed testcase.

Closes #35227 from Peng-Lei/Quote-Column.

Authored-by: PengLei <peng.8lei@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 R/pkg/tests/fulltests/test_sparkSQL.R         |  8 +--
 python/pyspark/sql/functions.py               |  8 +--
 .../catalyst/expressions/csvExpressions.scala |  2 +-
 .../expressions/jsonExpressions.scala         |  4 +-
 .../apache/spark/sql/types/StructField.scala  |  6 +-
 .../expressions/CsvExpressionsSuite.scala     |  4 +-
 .../expressions/JsonExpressionsSuite.scala    |  8 +--
 .../spark/sql/types/StructTypeSuite.scala     | 16 +++---
 .../sql-tests/results/charvarchar.sql.out     | 12 ++--
 .../sql-tests/results/csv-functions.sql.out   |  2 +-
 .../sql-tests/results/json-functions.sql.out  |  6 +-
 .../results/show-create-table.sql.out         | 56 +++++++++----------
 .../apache/spark/sql/CsvFunctionsSuite.scala  |  8 +--
 .../sql/DataFrameSetOperationsSuite.scala     | 50 ++++++++---------
 .../apache/spark/sql/JsonFunctionsSuite.scala | 10 ++--
 .../command/ShowCreateTableSuiteBase.scala    | 22 ++++----
 .../command/v1/ShowCreateTableSuite.scala     | 18 +++---
 .../command/v2/ShowCreateTableSuite.scala     | 20 +++----
 .../command/ShowCreateTableSuite.scala        | 16 +++---
 19 files changed, 138 insertions(+), 138 deletions(-)

diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 0e46324ed5c47..73b9dcc0a5728 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1690,9 +1690,9 @@ test_that("column functions", {
 
   df <- as.DataFrame(list(list("col" = "1")))
   c <- collect(select(df, schema_of_csv("Amsterdam,2018")))
-  expect_equal(c[[1]], "STRUCT<`_c0`: STRING, `_c1`: INT>")
+  expect_equal(c[[1]], "STRUCT<_c0: STRING, _c1: INT>")
   c <- collect(select(df, schema_of_csv(lit("Amsterdam,2018"))))
-  expect_equal(c[[1]], "STRUCT<`_c0`: STRING, `_c1`: INT>")
+  expect_equal(c[[1]], "STRUCT<_c0: STRING, _c1: INT>")
 
   # Test to_json(), from_json(), schema_of_json()
   df <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
@@ -1725,9 +1725,9 @@ test_that("column functions", {
 
   df <- as.DataFrame(list(list("col" = "1")))
   c <- collect(select(df, schema_of_json('{"name":"Bob"}')))
-  expect_equal(c[[1]], "STRUCT<`name`: STRING>")
+  expect_equal(c[[1]], "STRUCT<name: STRING>")
   c <- collect(select(df, schema_of_json(lit('{"name":"Bob"}'))))
-  expect_equal(c[[1]], "STRUCT<`name`: STRING>")
+  expect_equal(c[[1]], "STRUCT<name: STRING>")
 
   # Test to_json() supports arrays of primitive types and arrays
   df <- sql("SELECT array(19, 42, 70) as age")
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index f2bca0b5d0505..e69c37d320a34 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -4091,10 +4091,10 @@ def schema_of_json(json: "ColumnOrName", options: Optional[Dict[str, str]] = Non
     --------
     >>> df = spark.range(1)
     >>> df.select(schema_of_json(lit('{"a": 0}')).alias("json")).collect()
-    [Row(json='STRUCT<`a`: BIGINT>')]
+    [Row(json='STRUCT<a: BIGINT>')]
     >>> schema = schema_of_json('{a: 1}', {'allowUnquotedFieldNames':'true'})
     >>> df.select(schema.alias("json")).collect()
-    [Row(json='STRUCT<`a`: BIGINT>')]
+    [Row(json='STRUCT<a: BIGINT>')]
     """
     if isinstance(json, str):
         col = _create_column_from_literal(json)
@@ -4127,9 +4127,9 @@ def schema_of_csv(csv: "ColumnOrName", options: Optional[Dict[str, str]] = None)
     --------
     >>> df = spark.range(1)
     >>> df.select(schema_of_csv(lit('1|a'), {'sep':'|'}).alias("csv")).collect()
-    [Row(csv='STRUCT<`_c0`: INT, `_c1`: STRING>')]
+    [Row(csv='STRUCT<_c0: INT, _c1: STRING>')]
     >>> df.select(schema_of_csv('1|a', {'sep':'|'}).alias("csv")).collect()
-    [Row(csv='STRUCT<`_c0`: INT, `_c1`: STRING>')]
+    [Row(csv='STRUCT<_c0: INT, _c1: STRING>')]
     """
     if isinstance(csv, str):
         col = _create_column_from_literal(csv)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
index 79bbc103c92d3..30d992a2eea6d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
@@ -153,7 +153,7 @@ case class CsvToStructs(
   examples = """
     Examples:
       > SELECT _FUNC_('1,abc');
-       STRUCT<`_c0`: INT, `_c1`: STRING>
+       STRUCT<_c0: INT, _c1: STRING>
   """,
   since = "3.0.0",
   group = "csv_funcs")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index 5b058626e2227..9f00b7c8b7409 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -766,9 +766,9 @@ case class StructsToJson(
   examples = """
     Examples:
       > SELECT _FUNC_('[{"col":0}]');
-       ARRAY<STRUCT<`col`: BIGINT>>
+       ARRAY<STRUCT<col: BIGINT>>
       > SELECT _FUNC_('[{"col":01}]', map('allowNumericLeadingZeros', 'true'));
-       ARRAY<STRUCT<`col`: BIGINT>>
+       ARRAY<STRUCT<col: BIGINT>>
   """,
   group = "json_funcs",
   since = "2.4.0")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
index 93d57a7fe6f3b..f490f8318ef84 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
@@ -21,7 +21,7 @@ import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Stable
-import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier}
+import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIfNeeded}
 import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 import org.apache.spark.sql.util.SchemaUtils
 
@@ -93,7 +93,7 @@ case class StructField(
    * Returns a string containing a schema in SQL format. For example the following value:
    * `StructField("eventId", IntegerType)` will be converted to `eventId`: INT.
    */
-  private[sql] def sql = s"${quoteIdentifier(name)}: ${dataType.sql}$getDDLComment"
+  private[sql] def sql = s"${quoteIfNeeded(name)}: ${dataType.sql}$getDDLComment"
 
   /**
    * Returns a string containing a schema in DDL format. For example, the following value:
@@ -103,6 +103,6 @@ case class StructField(
    */
   def toDDL: String = {
     val nullString = if (nullable) "" else " NOT NULL"
-    s"${quoteIdentifier(name)} ${dataType.sql}${nullString}$getDDLComment"
+    s"${quoteIfNeeded(name)} ${dataType.sql}${nullString}$getDDLComment"
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala
index 7945974a1f3dc..1d174ed214523 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala
@@ -158,13 +158,13 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P
   }
 
   test("infer schema of CSV strings") {
-    checkEvaluation(new SchemaOfCsv(Literal.create("1,abc")), "STRUCT<`_c0`: INT, `_c1`: STRING>")
+    checkEvaluation(new SchemaOfCsv(Literal.create("1,abc")), "STRUCT<_c0: INT, _c1: STRING>")
   }
 
   test("infer schema of CSV strings by using options") {
     checkEvaluation(
       new SchemaOfCsv(Literal.create("1|abc"), Map("delimiter" -> "|")),
-      "STRUCT<`_c0`: INT, `_c1`: STRING>")
+      "STRUCT<_c0: INT, _c1: STRING>")
   }
 
   test("to_csv - struct") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index 2ae7c76599e5a..af071727b10dc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -736,17 +736,17 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
 
   test("SPARK-24709: infer schema of json strings") {
     checkEvaluation(new SchemaOfJson(Literal.create("""{"col":0}""")),
-      "STRUCT<`col`: BIGINT>")
+      "STRUCT<col: BIGINT>")
     checkEvaluation(
       new SchemaOfJson(Literal.create("""{"col0":["a"], "col1": {"col2": "b"}}""")),
-      "STRUCT<`col0`: ARRAY<STRING>, `col1`: STRUCT<`col2`: STRING>>")
+      "STRUCT<col0: ARRAY<STRING>, col1: STRUCT<col2: STRING>>")
   }
 
   test("infer schema of JSON strings by using options") {
     checkEvaluation(
       new SchemaOfJson(Literal.create("""{"col":01}"""),
         CreateMap(Seq(Literal.create("allowNumericLeadingZeros"), Literal.create("true")))),
-      "STRUCT<`col`: BIGINT>")
+      "STRUCT<col: BIGINT>")
   }
 
   test("parse date with locale") {
@@ -811,7 +811,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
     }
 
     Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach {
-        checkDecimalInfer(_, """STRUCT<`d`: DECIMAL(7,3)>""")
+        checkDecimalInfer(_, """STRUCT<d: DECIMAL(7,3)>""")
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
index a7e22e9403275..16f122334f370 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
@@ -51,7 +51,7 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
   test("SPARK-24849: toDDL - simple struct") {
     val struct = StructType(Seq(StructField("a", IntegerType)))
 
-    assert(struct.toDDL == "`a` INT")
+    assert(struct.toDDL == "a INT")
   }
 
   test("SPARK-24849: round trip toDDL - fromDDL") {
@@ -61,7 +61,7 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("SPARK-24849: round trip fromDDL - toDDL") {
-    val struct = "`a` MAP<INT, STRING>,`b` INT"
+    val struct = "a MAP<INT, STRING>,b INT"
 
     assert(fromDDL(struct).toDDL === struct)
   }
@@ -70,14 +70,14 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
     val struct = new StructType()
       .add("metaData", new StructType().add("eventId", StringType))
 
-    assert(struct.toDDL == "`metaData` STRUCT<`eventId`: STRING>")
+    assert(struct.toDDL == "metaData STRUCT<eventId: STRING>")
   }
 
   test("SPARK-24849: toDDL should output field's comment") {
     val struct = StructType(Seq(
       StructField("b", BooleanType).withComment("Field's comment")))
 
-    assert(struct.toDDL == """`b` BOOLEAN COMMENT 'Field\'s comment'""")
+    assert(struct.toDDL == """b BOOLEAN COMMENT 'Field\'s comment'""")
   }
 
   private val nestedStruct = new StructType()
@@ -89,7 +89,7 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
     ).withComment("comment"))
 
   test("SPARK-33846: toDDL should output nested field's comment") {
-    val ddl = "`a` STRUCT<`b`: STRUCT<`c`: STRING COMMENT 'Deep Nested comment'> " +
+    val ddl = "a STRUCT<b: STRUCT<c: STRING COMMENT 'Deep Nested comment'> " +
       "COMMENT 'Nested comment'> COMMENT 'comment'"
     assert(nestedStruct.toDDL == ddl)
   }
@@ -153,7 +153,7 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("interval keyword in schema string") {
-    val interval = "`a` INTERVAL"
+    val interval = "a INTERVAL"
     assert(fromDDL(interval).toDDL === interval)
   }
 
@@ -250,10 +250,10 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("SPARK-35285: ANSI interval types in schema") {
-    val yearMonthInterval = "`ymi` INTERVAL YEAR TO MONTH"
+    val yearMonthInterval = "ymi INTERVAL YEAR TO MONTH"
     assert(fromDDL(yearMonthInterval).toDDL === yearMonthInterval)
 
-    val dayTimeInterval = "`dti` INTERVAL DAY TO SECOND"
+    val dayTimeInterval = "dti INTERVAL DAY TO SECOND"
     assert(fromDDL(dayTimeInterval).toDDL === dayTimeInterval)
   }
 
diff --git a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
index 5c6b1a727705d..de994d67c9f34 100644
--- a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
@@ -52,8 +52,8 @@ show create table char_tbl
 struct<createtab_stmt:string>
 -- !query output
 CREATE TABLE default.char_tbl (
-  `c` CHAR(5),
-  `v` VARCHAR(6))
+  c CHAR(5),
+  v VARCHAR(6))
 USING parquet
 
 
@@ -71,8 +71,8 @@ show create table char_tbl2
 struct<createtab_stmt:string>
 -- !query output
 CREATE TABLE default.char_tbl2 (
-  `c` CHAR(5),
-  `v` VARCHAR(6))
+  c CHAR(5),
+  v VARCHAR(6))
 USING parquet
 
 
@@ -162,8 +162,8 @@ show create table char_tbl3
 struct<createtab_stmt:string>
 -- !query output
 CREATE TABLE default.char_tbl3 (
-  `c` CHAR(5),
-  `v` VARCHAR(6))
+  c CHAR(5),
+  v VARCHAR(6))
 USING parquet
 
 
diff --git a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
index 2ca44d51244a5..53cae3f935568 100644
--- a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
@@ -89,7 +89,7 @@ select schema_of_csv('1|abc', map('delimiter', '|'))
 -- !query schema
 struct<schema_of_csv(1|abc):string>
 -- !query output
-STRUCT<`_c0`: INT, `_c1`: STRING>
+STRUCT<_c0: INT, _c1: STRING>
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
index ff59553e4e9d9..e509d4e4cc27b 100644
--- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
@@ -236,7 +236,7 @@ select schema_of_json('{"c1":0, "c2":[1]}')
 -- !query schema
 struct<schema_of_json({"c1":0, "c2":[1]}):string>
 -- !query output
-STRUCT<`c1`: BIGINT, `c2`: ARRAY<BIGINT>>
+STRUCT<c1: BIGINT, c2: ARRAY<BIGINT>>
 
 
 -- !query
@@ -375,7 +375,7 @@ select schema_of_json('{"c1":1}', map('primitivesAsString', 'true'))
 -- !query schema
 struct<schema_of_json({"c1":1}):string>
 -- !query output
-STRUCT<`c1`: STRING>
+STRUCT<c1: STRING>
 
 
 -- !query
@@ -383,7 +383,7 @@ select schema_of_json('{"c1":01, "c2":0.1}', map('allowNumericLeadingZeros', 'tr
 -- !query schema
 struct<schema_of_json({"c1":01, "c2":0.1}):string>
 -- !query output
-STRUCT<`c1`: BIGINT, `c2`: DECIMAL(1,1)>
+STRUCT<c1: BIGINT, c2: DECIMAL(1,1)>
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
index ffcbb73458aa2..4c7f124e72fe1 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
@@ -16,9 +16,9 @@ SHOW CREATE TABLE tbl
 struct<createtab_stmt:string>
 -- !query output
 CREATE TABLE default.tbl (
-  `a` INT,
-  `b` STRING,
-  `c` INT)
+  a INT,
+  b STRING,
+  c INT)
 USING parquet
 
 
@@ -45,9 +45,9 @@ SHOW CREATE TABLE tbl
 struct<createtab_stmt:string>
 -- !query output
 CREATE TABLE default.tbl (
-  `a` INT,
-  `b` STRING,
-  `c` INT)
+  a INT,
+  b STRING,
+  c INT)
 USING parquet
 OPTIONS (
   'a' = '1')
@@ -76,9 +76,9 @@ SHOW CREATE TABLE tbl
 struct<createtab_stmt:string>
 -- !query output
 CREATE TABLE default.tbl (
-  `a` INT,
-  `b` STRING,
-  `c` INT)
+  a INT,
+  b STRING,
+  c INT)
 USING parquet
 LOCATION 'file:/path/to/table'
 
@@ -106,9 +106,9 @@ SHOW CREATE TABLE tbl
 struct<createtab_stmt:string>
 -- !query output
 CREATE TABLE default.tbl (
-  `a` INT,
-  `b` STRING,
-  `c` INT)
+  a INT,
+  b STRING,
+  c INT)
 USING parquet
 LOCATION 'file:/path/to/table'
 
@@ -136,9 +136,9 @@ SHOW CREATE TABLE tbl
 struct<createtab_stmt:string>
 -- !query output
 CREATE TABLE default.tbl (
-  `b` STRING,
-  `c` INT,
-  `a` INT)
+  b STRING,
+  c INT,
+  a INT)
 USING parquet
 PARTITIONED BY (a)
 
@@ -166,9 +166,9 @@ SHOW CREATE TABLE tbl
 struct<createtab_stmt:string>
 -- !query output
 CREATE TABLE default.tbl (
-  `a` INT,
-  `b` STRING,
-  `c` INT)
+  a INT,
+  b STRING,
+  c INT)
 USING parquet
 CLUSTERED BY (a)
 SORTED BY (b)
@@ -198,9 +198,9 @@ SHOW CREATE TABLE tbl
 struct<createtab_stmt:string>
 -- !query output
 CREATE TABLE default.tbl (
-  `a` INT,
-  `b` STRING,
-  `c` INT)
+  a INT,
+  b STRING,
+  c INT)
 USING parquet
 COMMENT 'This is a comment'
 
@@ -228,9 +228,9 @@ SHOW CREATE TABLE tbl
 struct<createtab_stmt:string>
 -- !query output
 CREATE TABLE default.tbl (
-  `a` INT,
-  `b` STRING,
-  `c` INT)
+  a INT,
+  b STRING,
+  c INT)
 USING parquet
 TBLPROPERTIES (
   'a' = '1')
@@ -258,10 +258,10 @@ SHOW CREATE TABLE tbl
 struct<createtab_stmt:string>
 -- !query output
 CREATE TABLE default.tbl (
-  `a` FLOAT,
-  `b` DECIMAL(10,0),
-  `c` DECIMAL(10,0),
-  `d` DECIMAL(10,1))
+  a FLOAT,
+  b DECIMAL(10,0),
+  c DECIMAL(10,0),
+  d DECIMAL(10,1))
 USING parquet
 
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
index 2808652f2998d..461bbd8987cef 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
@@ -82,16 +82,16 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
   test("schema_of_csv - infers schemas") {
     checkAnswer(
       spark.range(1).select(schema_of_csv(lit("0.1,1"))),
-      Seq(Row("STRUCT<`_c0`: DOUBLE, `_c1`: INT>")))
+      Seq(Row("STRUCT<_c0: DOUBLE, _c1: INT>")))
     checkAnswer(
       spark.range(1).select(schema_of_csv("0.1,1")),
-      Seq(Row("STRUCT<`_c0`: DOUBLE, `_c1`: INT>")))
+      Seq(Row("STRUCT<_c0: DOUBLE, _c1: INT>")))
   }
 
   test("schema_of_csv - infers schemas using options") {
     val df = spark.range(1)
       .select(schema_of_csv(lit("0.1 1"), Map("sep" -> " ").asJava))
-    checkAnswer(df, Seq(Row("STRUCT<`_c0`: DOUBLE, `_c1`: INT>")))
+    checkAnswer(df, Seq(Row("STRUCT<_c0: DOUBLE, _c1: INT>")))
   }
 
   test("to_csv - struct") {
@@ -220,7 +220,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
     val input = concat_ws(",", lit(0.1), lit(1))
     checkAnswer(
       spark.range(1).select(schema_of_csv(input)),
-      Seq(Row("STRUCT<`_c0`: DOUBLE, `_c1`: INT>")))
+      Seq(Row("STRUCT<_c0: DOUBLE, _c1: INT>")))
   }
 
   test("optional datetime parser does not affect csv time formatting") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
index b19e4300b5af4..19a62c25f5c5b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
@@ -804,7 +804,7 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
       StructType(Seq(StructField("topLevelCol", nestedStructType2))))
 
     val union = df1.unionByName(df2, allowMissingColumns = true)
-    assert(union.schema.toDDL == "`topLevelCol` STRUCT<`b`: STRING, `a`: STRING>")
+    assert(union.schema.toDDL == "topLevelCol STRUCT<b: STRING, a: STRING>")
     checkAnswer(union, Row(Row("b", null)) :: Row(Row("b", "a")) :: Nil)
   }
 
@@ -836,15 +836,15 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
       StructType(Seq(StructField("topLevelCol", nestedStructType2))))
 
     var unionDf = df1.unionByName(df2, true)
-    assert(unionDf.schema.toDDL == "`topLevelCol` " +
-      "STRUCT<`b`: STRUCT<`ba`: STRING, `bb`: STRING>, `a`: STRUCT<`aa`: STRING>>")
+    assert(unionDf.schema.toDDL == "topLevelCol " +
+      "STRUCT<b: STRUCT<ba: STRING, bb: STRING>, a: STRUCT<aa: STRING>>")
     checkAnswer(unionDf,
       Row(Row(Row("ba", null), null)) ::
       Row(Row(Row(null, "bb"), Row("aa"))) :: Nil)
 
     unionDf = df2.unionByName(df1, true)
-    assert(unionDf.schema.toDDL == "`topLevelCol` STRUCT<`a`: STRUCT<`aa`: STRING>, " +
-      "`b`: STRUCT<`bb`: STRING, `ba`: STRING>>")
+    assert(unionDf.schema.toDDL == "topLevelCol STRUCT<a: STRUCT<aa: STRING>, " +
+      "b: STRUCT<bb: STRING, ba: STRING>>")
     checkAnswer(unionDf,
       Row(Row(null, Row(null, "ba"))) ::
       Row(Row(Row("aa"), Row("bb", null))) :: Nil)
@@ -1112,13 +1112,13 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
       StructType(Seq(StructField("arr", arrayType2))))
 
     var unionDf = df1.unionByName(df2)
-    assert(unionDf.schema.toDDL == "`arr` ARRAY<STRUCT<`ba`: STRING, `bb`: STRING>>")
+    assert(unionDf.schema.toDDL == "arr ARRAY<STRUCT<ba: STRING, bb: STRING>>")
     checkAnswer(unionDf,
       Row(Seq(Row("ba", "bb"))) ::
       Row(Seq(Row("ba", "bb"))) :: Nil)
 
     unionDf = df2.unionByName(df1)
-    assert(unionDf.schema.toDDL == "`arr` ARRAY<STRUCT<`bb`: STRING, `ba`: STRING>>")
+    assert(unionDf.schema.toDDL == "arr ARRAY<STRUCT<bb: STRING, ba: STRING>>")
     checkAnswer(unionDf,
       Row(Seq(Row("bb", "ba"))) ::
       Row(Seq(Row("bb", "ba"))) :: Nil)
@@ -1150,7 +1150,7 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
     }
 
     unionDf = df3.unionByName(df4, true)
-    assert(unionDf.schema.toDDL == "`arr` ARRAY<STRUCT<`ba`: STRING, `bb`: STRING>>")
+    assert(unionDf.schema.toDDL == "arr ARRAY<STRUCT<ba: STRING, bb: STRING>>")
     checkAnswer(unionDf,
       Row(Seq(Row("ba", null))) ::
       Row(Seq(Row(null, "bb"))) :: Nil)
@@ -1160,7 +1160,7 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
     }
 
     unionDf = df4.unionByName(df3, true)
-    assert(unionDf.schema.toDDL == "`arr` ARRAY<STRUCT<`bb`: STRING, `ba`: STRING>>")
+    assert(unionDf.schema.toDDL == "arr ARRAY<STRUCT<bb: STRING, ba: STRING>>")
     checkAnswer(unionDf,
       Row(Seq(Row("bb", null))) ::
       Row(Seq(Row(null, "ba"))) :: Nil)
@@ -1196,15 +1196,15 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
       StructType(Seq(StructField("topLevelCol", nestedStructType2))))
 
     var unionDf = df1.unionByName(df2)
-    assert(unionDf.schema.toDDL == "`topLevelCol` " +
-      "STRUCT<`b`: ARRAY<STRUCT<`ba`: STRING, `bb`: STRING>>>")
+    assert(unionDf.schema.toDDL == "topLevelCol " +
+      "STRUCT<b: ARRAY<STRUCT<ba: STRING, bb: STRING>>>")
     checkAnswer(unionDf,
       Row(Row(Seq(Row("ba", "bb")))) ::
       Row(Row(Seq(Row("ba", "bb")))) :: Nil)
 
     unionDf = df2.unionByName(df1)
-    assert(unionDf.schema.toDDL == "`topLevelCol` STRUCT<" +
-      "`b`: ARRAY<STRUCT<`bb`: STRING, `ba`: STRING>>>")
+    assert(unionDf.schema.toDDL == "topLevelCol STRUCT<" +
+      "b: ARRAY<STRUCT<bb: STRING, ba: STRING>>>")
     checkAnswer(unionDf,
       Row(Row(Seq(Row("bb", "ba")))) ::
       Row(Row(Seq(Row("bb", "ba")))) :: Nil)
@@ -1240,8 +1240,8 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
     }
 
     unionDf = df3.unionByName(df4, true)
-    assert(unionDf.schema.toDDL == "`topLevelCol` " +
-      "STRUCT<`b`: ARRAY<STRUCT<`ba`: STRING, `bb`: STRING>>>")
+    assert(unionDf.schema.toDDL == "topLevelCol " +
+      "STRUCT<b: ARRAY<STRUCT<ba: STRING, bb: STRING>>>")
     checkAnswer(unionDf,
       Row(Row(Seq(Row("ba", null)))) ::
       Row(Row(Seq(Row(null, "bb")))) :: Nil)
@@ -1251,8 +1251,8 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
     }
 
     unionDf = df4.unionByName(df3, true)
-    assert(unionDf.schema.toDDL == "`topLevelCol` STRUCT<" +
-      "`b`: ARRAY<STRUCT<`bb`: STRING, `ba`: STRING>>>")
+    assert(unionDf.schema.toDDL == "topLevelCol STRUCT<" +
+      "b: ARRAY<STRUCT<bb: STRING, ba: STRING>>>")
     checkAnswer(unionDf,
       Row(Row(Seq(Row("bb", null)))) ::
       Row(Row(Seq(Row(null, "ba")))) :: Nil)
@@ -1292,15 +1292,15 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
       StructType(Seq(StructField("topLevelCol", nestedStructType2))))
 
     var unionDf = df1.unionByName(df2)
-    assert(unionDf.schema.toDDL == "`topLevelCol` " +
-      "STRUCT<`b`: ARRAY<ARRAY<STRUCT<`ba`: STRING, `bb`: STRING>>>>")
+    assert(unionDf.schema.toDDL == "topLevelCol " +
+      "STRUCT<b: ARRAY<ARRAY<STRUCT<ba: STRING, bb: STRING>>>>")
     checkAnswer(unionDf,
       Row(Row(Seq(Seq(Row("ba", "bb"))))) ::
       Row(Row(Seq(Seq(Row("ba", "bb"))))) :: Nil)
 
     unionDf = df2.unionByName(df1)
-    assert(unionDf.schema.toDDL == "`topLevelCol` STRUCT<" +
-      "`b`: ARRAY<ARRAY<STRUCT<`bb`: STRING, `ba`: STRING>>>>")
+    assert(unionDf.schema.toDDL == "topLevelCol STRUCT<" +
+      "b: ARRAY<ARRAY<STRUCT<bb: STRING, ba: STRING>>>>")
     checkAnswer(unionDf,
       Row(Row(Seq(Seq(Row("bb", "ba"))))) ::
       Row(Row(Seq(Seq(Row("bb", "ba"))))) :: Nil)
@@ -1340,8 +1340,8 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
     }
 
     unionDf = df3.unionByName(df4, true)
-    assert(unionDf.schema.toDDL == "`topLevelCol` " +
-      "STRUCT<`b`: ARRAY<ARRAY<STRUCT<`ba`: STRING, `bb`: STRING>>>>")
+    assert(unionDf.schema.toDDL == "topLevelCol " +
+      "STRUCT<b: ARRAY<ARRAY<STRUCT<ba: STRING, bb: STRING>>>>")
     checkAnswer(unionDf,
       Row(Row(Seq(Seq(Row("ba", null))))) ::
       Row(Row(Seq(Seq(Row(null, "bb"))))) :: Nil)
@@ -1351,8 +1351,8 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
     }
 
     unionDf = df4.unionByName(df3, true)
-    assert(unionDf.schema.toDDL == "`topLevelCol` STRUCT<" +
-      "`b`: ARRAY<ARRAY<STRUCT<`bb`: STRING, `ba`: STRING>>>>")
+    assert(unionDf.schema.toDDL == "topLevelCol STRUCT<" +
+      "b: ARRAY<ARRAY<STRUCT<bb: STRING, ba: STRING>>>>")
     checkAnswer(unionDf,
       Row(Row(Seq(Seq(Row("bb", null))))) ::
       Row(Row(Seq(Seq(Row(null, "ba"))))) :: Nil)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 06babab122fd2..6661b58b8f522 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -417,7 +417,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
   test("infers schemas using options") {
     val df = spark.range(1)
       .select(schema_of_json(lit("{a:1}"), Map("allowUnquotedFieldNames" -> "true").asJava))
-    checkAnswer(df, Seq(Row("STRUCT<`a`: BIGINT>")))
+    checkAnswer(df, Seq(Row("STRUCT<a: BIGINT>")))
   }
 
   test("from_json - array of primitive types") {
@@ -697,14 +697,14 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
     val input = regexp_replace(lit("""{"item_id": 1, "item_price": 0.1}"""), "item_", "")
     checkAnswer(
       spark.range(1).select(schema_of_json(input)),
-      Seq(Row("STRUCT<`id`: BIGINT, `price`: DOUBLE>")))
+      Seq(Row("STRUCT<id: BIGINT, price: DOUBLE>")))
   }
 
   test("SPARK-31065: schema_of_json - null and empty strings as strings") {
     Seq("""{"id": null}""", """{"id": ""}""").foreach { input =>
       checkAnswer(
         spark.range(1).select(schema_of_json(input)),
-        Seq(Row("STRUCT<`id`: STRING>")))
+        Seq(Row("STRUCT<id: STRING>")))
     }
   }
 
@@ -716,7 +716,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
         schema_of_json(
           lit("""{"id": "a", "drop": {"drop": null}}"""),
           options.asJava)),
-      Seq(Row("STRUCT<`id`: STRING>")))
+      Seq(Row("STRUCT<id: STRING>")))
 
     // Array of structs
     checkAnswer(
@@ -724,7 +724,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
         schema_of_json(
           lit("""[{"id": "a", "drop": {"drop": null}}]"""),
           options.asJava)),
-      Seq(Row("ARRAY<STRUCT<`id`: STRING>>")))
+      Seq(Row("ARRAY<STRUCT<id: STRING>>")))
 
     // Other types are not affected.
     checkAnswer(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowCreateTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowCreateTableSuiteBase.scala
index 53cdec0d2b6c0..7bc076561f448 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowCreateTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowCreateTableSuiteBase.scala
@@ -51,8 +51,8 @@ trait ShowCreateTableSuiteBase extends QueryTest with DDLCommandTestUtils {
         """.stripMargin)
       val showDDL = getShowCreateDDL(t)
       assert(showDDL(0) == s"CREATE TABLE $fullName (")
-      assert(showDDL(1) == "`a` BIGINT NOT NULL,")
-      assert(showDDL(2) == "`b` BIGINT)")
+      assert(showDDL(1) == "a BIGINT NOT NULL,")
+      assert(showDDL(2) == "b BIGINT)")
       assert(showDDL(3) == s"USING ${classOf[SimpleInsertSource].getName}")
     }
   }
@@ -75,10 +75,10 @@ trait ShowCreateTableSuiteBase extends QueryTest with DDLCommandTestUtils {
       )
       val showDDL = getShowCreateDDL(t)
       assert(showDDL(0) == s"CREATE TABLE $fullName (")
-      assert(showDDL(1) == "`a` STRING,")
-      assert(showDDL(2) == "`b` STRING,")
+      assert(showDDL(1) == "a STRING,")
+      assert(showDDL(2) == "b STRING,")
       assert(showDDL(3) == "`extra col` ARRAY<INT>,")
-      assert(showDDL(4) == "`<another>` STRUCT<`x`: INT, `y`: ARRAY<BOOLEAN>>)")
+      assert(showDDL(4) == "`<another>` STRUCT<x: INT, y: ARRAY<BOOLEAN>>)")
       assert(showDDL(5) == "USING json")
       assert(showDDL(6).startsWith("LOCATION 'file:") && showDDL(6).endsWith("sample.json'"))
     }
@@ -95,7 +95,7 @@ trait ShowCreateTableSuiteBase extends QueryTest with DDLCommandTestUtils {
         """.stripMargin)
       val showDDL = getShowCreateDDL(t)
       assert(showDDL(0) == s"CREATE TABLE $fullName (")
-      assert(showDDL(1) == "`a` STRUCT<`b`: STRING>)")
+      assert(showDDL(1) == "a STRUCT<b: STRING>)")
       assert(showDDL(2) == "USING json")
     }
   }
@@ -119,7 +119,7 @@ trait ShowCreateTableSuiteBase extends QueryTest with DDLCommandTestUtils {
            |)
          """.stripMargin
       )
-      val expected = s"CREATE TABLE $fullName ( `a` STRING) USING json" +
+      val expected = s"CREATE TABLE $fullName ( a STRING) USING json" +
         " OPTIONS ( 'k1' = 'v1', 'k2' = 'v2', 'k3' = 'v3', 'k4' = 'v4', 'k5' = 'v5')" +
         " TBLPROPERTIES ( 'a' = '2', 'b' = '1')"
       assert(getShowCreateDDL(t).mkString(" ") == expected)
@@ -134,7 +134,7 @@ trait ShowCreateTableSuiteBase extends QueryTest with DDLCommandTestUtils {
            |AS SELECT 1 AS a, "foo" AS b
          """.stripMargin
       )
-      val expected = s"CREATE TABLE $fullName ( `a` INT, `b` STRING) USING json"
+      val expected = s"CREATE TABLE $fullName ( a INT, b STRING) USING json"
       assert(getShowCreateDDL(t).mkString(" ") == expected)
     }
   }
@@ -148,7 +148,7 @@ trait ShowCreateTableSuiteBase extends QueryTest with DDLCommandTestUtils {
            |AS SELECT 1 AS a, "foo" AS b
          """.stripMargin
       )
-      val expected = s"CREATE TABLE $fullName ( `a` INT, `b` STRING) USING json PARTITIONED BY (b)"
+      val expected = s"CREATE TABLE $fullName ( a INT, b STRING) USING json PARTITIONED BY (b)"
       assert(getShowCreateDDL(t).mkString(" ") == expected)
     }
   }
@@ -162,7 +162,7 @@ trait ShowCreateTableSuiteBase extends QueryTest with DDLCommandTestUtils {
            |AS SELECT 1 AS a, "foo" AS b, 2.5 AS c
          """.stripMargin
       )
-      val expected = s"CREATE TABLE $fullName ( `a` INT, `b` STRING, `c` DECIMAL(2,1)) USING json" +
+      val expected = s"CREATE TABLE $fullName ( a INT, b STRING, c DECIMAL(2,1)) USING json" +
         s" COMMENT 'This is a comment'"
       assert(getShowCreateDDL(t).mkString(" ") == expected)
     }
@@ -177,7 +177,7 @@ trait ShowCreateTableSuiteBase extends QueryTest with DDLCommandTestUtils {
            |AS SELECT 1 AS a, "foo" AS b, 2.5 AS c
          """.stripMargin
       )
-      val expected = s"CREATE TABLE $fullName ( `a` INT, `b` STRING, `c` DECIMAL(2,1)) USING json" +
+      val expected = s"CREATE TABLE $fullName ( a INT, b STRING, c DECIMAL(2,1)) USING json" +
         s" TBLPROPERTIES ( 'a' = '1')"
       assert(getShowCreateDDL(t).mkString(" ") == expected)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
index 023dfce3ba9c9..1dd5e4a5aaa79 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
@@ -58,11 +58,11 @@ trait ShowCreateTableSuiteBase extends command.ShowCreateTableSuiteBase
       val showDDL = getShowCreateDDL(t)
       assert(showDDL === Array(
         s"CREATE TABLE $fullName (",
-        "`b` BIGINT,",
-        "`c` BIGINT,",
-        "`extraCol` ARRAY<INT>,",
-        "`<another>` STRUCT<`x`: INT, `y`: ARRAY<BOOLEAN>>,",
-        "`a` BIGINT NOT NULL)",
+        "b BIGINT,",
+        "c BIGINT,",
+        "extraCol ARRAY<INT>,",
+        "`<another>` STRUCT<x: INT, y: ARRAY<BOOLEAN>>,",
+        "a BIGINT NOT NULL)",
         "USING parquet",
         "OPTIONS (",
         "'from' = '0',",
@@ -89,7 +89,7 @@ trait ShowCreateTableSuiteBase extends command.ShowCreateTableSuiteBase
            |AS SELECT 1 AS a, "foo" AS b
          """.stripMargin
       )
-      val expected = s"CREATE TABLE $fullName ( `a` INT, `b` STRING) USING json" +
+      val expected = s"CREATE TABLE $fullName ( a INT, b STRING) USING json" +
         s" CLUSTERED BY (a) INTO 2 BUCKETS"
       assert(getShowCreateDDL(t).mkString(" ") == expected)
     }
@@ -104,7 +104,7 @@ trait ShowCreateTableSuiteBase extends command.ShowCreateTableSuiteBase
            |AS SELECT 1 AS a, "foo" AS b
          """.stripMargin
       )
-      val expected = s"CREATE TABLE $fullName ( `a` INT, `b` STRING) USING json" +
+      val expected = s"CREATE TABLE $fullName ( a INT, b STRING) USING json" +
         s" CLUSTERED BY (a) SORTED BY (b) INTO 2 BUCKETS"
       assert(getShowCreateDDL(t).mkString(" ") == expected)
     }
@@ -120,7 +120,7 @@ trait ShowCreateTableSuiteBase extends command.ShowCreateTableSuiteBase
            |AS SELECT 1 AS a, "foo" AS b, 2.5 AS c
          """.stripMargin
       )
-      val expected = s"CREATE TABLE $fullName ( `a` INT, `b` STRING, `c` DECIMAL(2,1)) USING json" +
+      val expected = s"CREATE TABLE $fullName ( a INT, b STRING, c DECIMAL(2,1)) USING json" +
         s" PARTITIONED BY (c) CLUSTERED BY (a) INTO 2 BUCKETS"
       assert(getShowCreateDDL(t).mkString(" ") == expected)
     }
@@ -136,7 +136,7 @@ trait ShowCreateTableSuiteBase extends command.ShowCreateTableSuiteBase
            |AS SELECT 1 AS a, "foo" AS b, 2.5 AS c
          """.stripMargin
       )
-      val expected = s"CREATE TABLE $fullName ( `a` INT, `b` STRING, `c` DECIMAL(2,1)) USING json" +
+      val expected = s"CREATE TABLE $fullName ( a INT, b STRING, c DECIMAL(2,1)) USING json" +
         s" PARTITIONED BY (c) CLUSTERED BY (a) SORTED BY (b) INTO 2 BUCKETS"
       assert(getShowCreateDDL(t).mkString(" ") == expected)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
index 47e59e965509a..7c506812079ec 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
@@ -51,8 +51,8 @@ class ShowCreateTableSuite extends command.ShowCreateTableSuiteBase with Command
       val showDDL = getShowCreateDDL(t, false)
       assert(showDDL === Array(
         s"CREATE TABLE $t (",
-        "`a` INT,",
-        "`b` STRING)",
+        "a INT,",
+        "b STRING)",
         defaultUsing,
         "PARTITIONED BY (a)",
         "COMMENT 'This is a comment'",
@@ -89,11 +89,11 @@ class ShowCreateTableSuite extends command.ShowCreateTableSuiteBase with Command
       val showDDL = getShowCreateDDL(t, false)
       assert(showDDL === Array(
         s"CREATE TABLE $t (",
-        "`a` BIGINT NOT NULL,",
-        "`b` BIGINT,",
-        "`c` BIGINT,",
-        "`extraCol` ARRAY<INT>,",
-        "`<another>` STRUCT<`x`: INT, `y`: ARRAY<BOOLEAN>>)",
+        "a BIGINT NOT NULL,",
+        "b BIGINT,",
+        "c BIGINT,",
+        "extraCol ARRAY<INT>,",
+        "`<another>` STRUCT<x: INT, y: ARRAY<BOOLEAN>>)",
         defaultUsing,
         "OPTIONS (",
         "'from' = '0',",
@@ -128,9 +128,9 @@ class ShowCreateTableSuite extends command.ShowCreateTableSuiteBase with Command
       val showDDL = getShowCreateDDL(t, false)
       assert(showDDL === Array(
         s"CREATE TABLE $t (",
-        "`a` INT,",
-        "`b` STRING,",
-        "`ts` TIMESTAMP)",
+        "a INT,",
+        "b STRING,",
+        "ts TIMESTAMP)",
         defaultUsing,
         "PARTITIONED BY (a, years(ts), months(ts), days(ts), hours(ts))",
         "CLUSTERED BY (b)",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowCreateTableSuite.scala
index 58145b03fd3c0..a7d5e7b083488 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowCreateTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowCreateTableSuite.scala
@@ -48,7 +48,7 @@ class ShowCreateTableSuite extends v1.ShowCreateTableSuiteBase with CommandSuite
            |)
          """.stripMargin
       )
-      val expected = s"CREATE TABLE $fullName ( `c1` INT COMMENT 'bla', `c2` STRING)" +
+      val expected = s"CREATE TABLE $fullName ( c1 INT COMMENT 'bla', c2 STRING)" +
         " ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'" +
         " WITH SERDEPROPERTIES ( 'serialization.format' = '1')" +
         " STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'" +
@@ -73,7 +73,7 @@ class ShowCreateTableSuite extends v1.ShowCreateTableSuiteBase with CommandSuite
              |)
            """.stripMargin
         )
-        val expected = s"CREATE EXTERNAL TABLE $fullName ( `c1` INT COMMENT 'bla', `c2` STRING)" +
+        val expected = s"CREATE EXTERNAL TABLE $fullName ( c1 INT COMMENT 'bla', c2 STRING)" +
           s" ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'" +
           s" WITH SERDEPROPERTIES ( 'serialization.format' = '1')" +
           s" STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'" +
@@ -100,8 +100,8 @@ class ShowCreateTableSuite extends v1.ShowCreateTableSuiteBase with CommandSuite
            |)
          """.stripMargin
       )
-      val expected = s"CREATE TABLE $fullName ( `c1` INT COMMENT 'bla', `c2` STRING)" +
-        " COMMENT 'bla' PARTITIONED BY (`p1` BIGINT COMMENT 'bla', `p2` STRING)" +
+      val expected = s"CREATE TABLE $fullName ( c1 INT COMMENT 'bla', c2 STRING)" +
+        " COMMENT 'bla' PARTITIONED BY (p1 BIGINT COMMENT 'bla', p2 STRING)" +
         " ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'" +
         " WITH SERDEPROPERTIES ( 'serialization.format' = '1')" +
         " STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'" +
@@ -124,7 +124,7 @@ class ShowCreateTableSuite extends v1.ShowCreateTableSuiteBase with CommandSuite
            |NULL DEFINED AS 'NaN'
          """.stripMargin
       )
-      val expected = s"CREATE TABLE $fullName ( `c1` INT COMMENT 'bla', `c2` STRING)" +
+      val expected = s"CREATE TABLE $fullName ( c1 INT COMMENT 'bla', c2 STRING)" +
         " ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'" +
         " WITH SERDEPROPERTIES (" +
         " 'colelction.delim' = '@'," +
@@ -148,7 +148,7 @@ class ShowCreateTableSuite extends v1.ShowCreateTableSuiteBase with CommandSuite
            |STORED AS PARQUET
          """.stripMargin
       )
-      val expected = s"CREATE TABLE $fullName ( `c1` INT COMMENT 'bla', `c2` STRING)" +
+      val expected = s"CREATE TABLE $fullName ( c1 INT COMMENT 'bla', c2 STRING)" +
         " ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'" +
         " WITH SERDEPROPERTIES ( 'serialization.format' = '1')" +
         " STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'" +
@@ -175,7 +175,7 @@ class ShowCreateTableSuite extends v1.ShowCreateTableSuiteBase with CommandSuite
            |  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
          """.stripMargin
       )
-      val expected = s"CREATE TABLE $fullName ( `c1` INT COMMENT 'bla', `c2` STRING)" +
+      val expected = s"CREATE TABLE $fullName ( c1 INT COMMENT 'bla', c2 STRING)" +
         " ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'" +
         " WITH SERDEPROPERTIES (" +
         " 'mapkey.delim' = ','," +
@@ -197,7 +197,7 @@ class ShowCreateTableSuite extends v1.ShowCreateTableSuiteBase with CommandSuite
            |INTO 2 BUCKETS
          """.stripMargin
       )
-      val expected = s"CREATE TABLE $fullName ( `a` INT, `b` STRING)" +
+      val expected = s"CREATE TABLE $fullName ( a INT, b STRING)" +
         " CLUSTERED BY (a) SORTED BY (b ASC) INTO 2 BUCKETS" +
         " ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'" +
         " WITH SERDEPROPERTIES ( 'serialization.format' = '1')" +

From 07e94c81df36fea0bb3f96656c5cf88b8c0b00fb Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Thu, 20 Jan 2022 17:11:21 +0800
Subject: [PATCH 059/513] [SPARK-37922][SQL] Combine to one cast if we can
 safely up-cast two casts

### What changes were proposed in this pull request?

This PR improves `SimplifyCasts` to combine into one cast if they are both `NumericType` and can safely up-cast two casts. For example:

```scala
spark.sql("CREATE TABLE t1 (id int) using parquet")
spark.sql("SELECT CAST(CAST(id AS DECIMAL(10, 0)) AS DECIMAL(12, 2)) FROM t1").explain(true)
```
Before this pr:
```
== Optimized Logical Plan ==
Project [cast(cast(id#1 as decimal(10,0)) as decimal(12,2)) AS casted#0]
+- Relation default.t1[id#1] parquet
```

After this pr:
```
== Optimized Logical Plan ==
Project [cast(id#1 as decimal(12,2)) AS casted#0]
+- Relation default.t1[id#1] parquet
```

### Why are the changes needed?

Improve query performance.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #35220 from wangyum/SPARK-37922.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/optimizer/expressions.scala  |  12 +
 .../optimizer/SimplifyCastsSuite.scala        |  37 +++
 .../q14a.sf100/explain.txt                    |  32 +--
 .../q14a.sf100/simplified.txt                 |   8 +-
 .../approved-plans-v1_4/q14a/explain.txt      |  32 +--
 .../approved-plans-v1_4/q14a/simplified.txt   |   8 +-
 .../q14b.sf100/explain.txt                    |  24 +-
 .../q14b.sf100/simplified.txt                 |   6 +-
 .../approved-plans-v1_4/q14b/explain.txt      |  24 +-
 .../approved-plans-v1_4/q14b/simplified.txt   |   6 +-
 .../q23a.sf100/explain.txt                    | 170 +++++++-------
 .../q23a.sf100/simplified.txt                 |   6 +-
 .../approved-plans-v1_4/q23a/explain.txt      | 158 ++++++-------
 .../approved-plans-v1_4/q23a/simplified.txt   |   6 +-
 .../q23b.sf100/explain.txt                    | 220 +++++++++---------
 .../q23b.sf100/simplified.txt                 |  14 +-
 .../approved-plans-v1_4/q23b/explain.txt      | 188 +++++++--------
 .../approved-plans-v1_4/q23b/simplified.txt   |  12 +-
 .../approved-plans-v1_4/q66.sf100/explain.txt |  24 +-
 .../q66.sf100/simplified.txt                  |   6 +-
 .../approved-plans-v1_4/q66/explain.txt       |  24 +-
 .../approved-plans-v1_4/q66/simplified.txt    |   6 +-
 .../approved-plans-v1_4/q67.sf100/explain.txt |   8 +-
 .../q67.sf100/simplified.txt                  |   2 +-
 .../approved-plans-v1_4/q67/explain.txt       |   8 +-
 .../approved-plans-v1_4/q67/simplified.txt    |   2 +-
 .../approved-plans-v1_4/q83.sf100/explain.txt |   2 +-
 .../approved-plans-v1_4/q83/explain.txt       |   2 +-
 .../approved-plans-v1_4/q93.sf100/explain.txt |   2 +-
 .../approved-plans-v1_4/q93/explain.txt       |   2 +-
 .../approved-plans-v2_7/q14.sf100/explain.txt |  24 +-
 .../q14.sf100/simplified.txt                  |   6 +-
 .../approved-plans-v2_7/q14/explain.txt       |  24 +-
 .../approved-plans-v2_7/q14/simplified.txt    |   6 +-
 .../q14a.sf100/explain.txt                    |  32 +--
 .../q14a.sf100/simplified.txt                 |   8 +-
 .../approved-plans-v2_7/q14a/explain.txt      |  32 +--
 .../approved-plans-v2_7/q14a/simplified.txt   |   8 +-
 .../q67a.sf100/explain.txt                    |  56 ++---
 .../q67a.sf100/simplified.txt                 |  18 +-
 .../approved-plans-v2_7/q67a/explain.txt      |  56 ++---
 .../approved-plans-v2_7/q67a/simplified.txt   |  18 +-
 .../tpch-plan-stability/q20/explain.txt       |   2 +-
 43 files changed, 695 insertions(+), 646 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 0753e066ae02e..eda4217cd957d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -1038,6 +1038,9 @@ object SimplifyCasts extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressionsWithPruning(
     _.containsPattern(CAST), ruleId) {
     case Cast(e, dataType, _, _) if e.dataType == dataType => e
+    case c @ Cast(Cast(e, dt1: NumericType, _, _), dt2: NumericType, _, _)
+        if isWiderCast(e.dataType, dt1) && isWiderCast(dt1, dt2) =>
+      c.copy(child = e)
     case c @ Cast(e, dataType, _, _) => (e.dataType, dataType) match {
       case (ArrayType(from, false), ArrayType(to, true)) if from == to => e
       case (MapType(fromKey, fromValue, false), MapType(toKey, toValue, true))
@@ -1045,6 +1048,15 @@ object SimplifyCasts extends Rule[LogicalPlan] {
       case _ => c
       }
   }
+
+  // Returns whether the from DataType can be safely casted to the to DataType without losing
+  // any precision or range.
+  private def isWiderCast(from: DataType, to: NumericType): Boolean = (from, to) match {
+    case (from: NumericType, to: DecimalType) if to.isWiderThan(from) => true
+    case (from: DecimalType, to: NumericType) if from.isTighterThan(to) => true
+    case (from: IntegralType, to: IntegralType) => Cast.canUpCast(from, to)
+    case _ => from == to
+  }
 }
 
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala
index c981cee55d0fa..3c1815043df7f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala
@@ -68,4 +68,41 @@ class SimplifyCastsSuite extends PlanTest {
     // `SimplifyCasts` rule respect the plan.
     comparePlans(optimized, plan, checkAnalysis = false)
   }
+
+  test("SPARK-37922: Combine to one cast if we can safely up-cast two casts") {
+    val input = LocalRelation('a.int, 'b.decimal(18, 2), 'c.date, 'd.timestamp)
+
+    // Combine casts
+    comparePlans(
+      Optimize.execute(
+        input.select('a.cast(DecimalType(18, 1)).cast(DecimalType(19, 1)).as("casted")).analyze),
+      input.select('a.cast(DecimalType(19, 1)).as("casted")).analyze)
+    comparePlans(
+      Optimize.execute(
+        input.select('a.cast(LongType).cast(DecimalType(22, 1)).as("casted")).analyze),
+      input.select('a.cast(DecimalType(22, 1)).as("casted")).analyze)
+    comparePlans(
+      Optimize.execute(
+        input.select('b.cast(DecimalType(20, 2)).cast(DecimalType(24, 2)).as("casted")).analyze),
+      input.select('b.cast(DecimalType(24, 2)).as("casted")).analyze)
+
+    // Can not combine casts
+    comparePlans(
+      Optimize.execute(
+        input.select('a.cast(DecimalType(2, 1)).cast(DecimalType(3, 1)).as("casted")).analyze),
+      input.select('a.cast(DecimalType(2, 1)).cast(DecimalType(3, 1)).as("casted")).analyze)
+    comparePlans(
+      Optimize.execute(
+        input.select('b.cast(DecimalType(10, 2)).cast(DecimalType(24, 2)).as("casted")).analyze),
+      input.select('b.cast(DecimalType(10, 2)).cast(DecimalType(24, 2)).as("casted")).analyze)
+
+    comparePlans(
+      Optimize.execute(
+        input.select('c.cast(TimestampType).cast(StringType).as("casted")).analyze),
+      input.select('c.cast(TimestampType).cast(StringType).as("casted")).analyze)
+    comparePlans(
+      Optimize.execute(
+        input.select('d.cast(LongType).cast(StringType).as("casted")).analyze),
+      input.select('d.cast(LongType).cast(StringType).as("casted")).analyze)
+  }
 }
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt
index 536a1cc04222f..14858257813e5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt
@@ -477,7 +477,7 @@ Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#48, i_brand_
 (78) HashAggregate [codegen id : 45]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#49, i_class_id#50, i_category_id#51]
 Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#54, isEmpty#55, count#56]
 Results [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
 
@@ -488,9 +488,9 @@ Arguments: hashpartitioning(i_brand_id#49, i_class_id#50, i_category_id#51, 5),
 (80) HashAggregate [codegen id : 46]
 Input [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
 Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62]
-Results [5]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#63, count(1)#62 AS number_sales#64]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62]
+Results [5]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#63, count(1)#62 AS number_sales#64]
 
 (81) Filter [codegen id : 46]
 Input [5]: [i_brand_id#49, i_class_id#50, i_category_id#51, sales#63, number_sales#64]
@@ -562,7 +562,7 @@ Input [7]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70, i_item_sk#74, i_bra
 (97) HashAggregate [codegen id : 91]
 Input [5]: [cs_quantity#69, cs_list_price#70, i_brand_id#75, i_class_id#76, i_category_id#77]
 Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#78, isEmpty#79, count#80]
 Results [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
 
@@ -573,9 +573,9 @@ Arguments: hashpartitioning(i_brand_id#75, i_class_id#76, i_category_id#77, 5),
 (99) HashAggregate [codegen id : 92]
 Input [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
 Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#85, count(1)#86]
-Results [5]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#85 AS sales#87, count(1)#86 AS number_sales#88]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#85, count(1)#86]
+Results [5]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#85 AS sales#87, count(1)#86 AS number_sales#88]
 
 (100) Filter [codegen id : 92]
 Input [5]: [i_brand_id#75, i_class_id#76, i_category_id#77, sales#87, number_sales#88]
@@ -647,7 +647,7 @@ Input [7]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92, i_item_sk#96, i_bra
 (116) HashAggregate [codegen id : 137]
 Input [5]: [ws_quantity#91, ws_list_price#92, i_brand_id#97, i_class_id#98, i_category_id#99]
 Keys [3]: [i_brand_id#97, i_class_id#98, i_category_id#99]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#91 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#100, isEmpty#101, count#102]
 Results [6]: [i_brand_id#97, i_class_id#98, i_category_id#99, sum#103, isEmpty#104, count#105]
 
@@ -658,9 +658,9 @@ Arguments: hashpartitioning(i_brand_id#97, i_class_id#98, i_category_id#99, 5),
 (118) HashAggregate [codegen id : 138]
 Input [6]: [i_brand_id#97, i_class_id#98, i_category_id#99, sum#103, isEmpty#104, count#105]
 Keys [3]: [i_brand_id#97, i_class_id#98, i_category_id#99]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#91 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#91 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true))#107, count(1)#108]
-Results [5]: [i_brand_id#97, i_class_id#98, i_category_id#99, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#91 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true))#107 AS sales#109, count(1)#108 AS number_sales#110]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true))#107, count(1)#108]
+Results [5]: [i_brand_id#97, i_class_id#98, i_category_id#99, sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true))#107 AS sales#109, count(1)#108 AS number_sales#110]
 
 (119) Filter [codegen id : 138]
 Input [5]: [i_brand_id#97, i_class_id#98, i_category_id#99, sales#109, number_sales#110]
@@ -793,7 +793,7 @@ Input [4]: [ws_quantity#140, ws_list_price#141, ws_sold_date_sk#142, d_date_sk#1
 (143) HashAggregate [codegen id : 7]
 Input [2]: [quantity#132, list_price#133]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#132 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#132 as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [2]: [sum#146, count#147]
 Results [2]: [sum#148, count#149]
 
@@ -804,9 +804,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#150]
 (145) HashAggregate [codegen id : 8]
 Input [2]: [sum#148, count#149]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#132 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#132 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2), true))#151]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#132 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2), true))#151 AS average_sales#152]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#132 as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#132 as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2), true))#151]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#132 as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2), true))#151 AS average_sales#152]
 
 Subquery:2 Hosting operator id = 127 Hosting Expression = ss_sold_date_sk#130 IN dynamicpruning#13
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt
index 35a8f0d31afc7..1666d02ce276c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt
@@ -13,7 +13,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                         Filter [sales]
                           Subquery #3
                             WholeStageCodegen (8)
-                              HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
+                              HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
                                 InputAdapter
                                   Exchange #18
                                     WholeStageCodegen (7)
@@ -47,7 +47,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                         ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #10
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #2
                                 WholeStageCodegen (45)
@@ -207,7 +207,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                       Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sales]
                           ReusedSubquery [average_sales] #3
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #19
                                 WholeStageCodegen (91)
@@ -241,7 +241,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                       Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sales]
                           ReusedSubquery [average_sales] #3
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #21
                                 WholeStageCodegen (137)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt
index cf4bb6501bd92..fa036252e71bc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt
@@ -406,7 +406,7 @@ Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_
 (65) HashAggregate [codegen id : 25]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#49, isEmpty#50, count#51]
 Results [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 
@@ -417,9 +417,9 @@ Arguments: hashpartitioning(i_brand_id#44, i_class_id#45, i_category_id#46, 5),
 (67) HashAggregate [codegen id : 26]
 Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56, count(1)#57]
-Results [5]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56 AS sales#58, count(1)#57 AS number_sales#59]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56, count(1)#57]
+Results [5]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56 AS sales#58, count(1)#57 AS number_sales#59]
 
 (68) Filter [codegen id : 26]
 Input [5]: [i_brand_id#44, i_class_id#45, i_category_id#46, sales#58, number_sales#59]
@@ -479,7 +479,7 @@ Input [7]: [cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66, i_brand_id#68,
 (81) HashAggregate [codegen id : 51]
 Input [5]: [cs_quantity#64, cs_list_price#65, i_brand_id#68, i_class_id#69, i_category_id#70]
 Keys [3]: [i_brand_id#68, i_class_id#69, i_category_id#70]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#72, isEmpty#73, count#74]
 Results [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77]
 
@@ -490,9 +490,9 @@ Arguments: hashpartitioning(i_brand_id#68, i_class_id#69, i_category_id#70, 5),
 (83) HashAggregate [codegen id : 52]
 Input [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77]
 Keys [3]: [i_brand_id#68, i_class_id#69, i_category_id#70]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79, count(1)#80]
-Results [5]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79 AS sales#81, count(1)#80 AS number_sales#82]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79, count(1)#80]
+Results [5]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79 AS sales#81, count(1)#80 AS number_sales#82]
 
 (84) Filter [codegen id : 52]
 Input [5]: [i_brand_id#68, i_class_id#69, i_category_id#70, sales#81, number_sales#82]
@@ -552,7 +552,7 @@ Input [7]: [ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87, i_brand_id#89,
 (97) HashAggregate [codegen id : 77]
 Input [5]: [ws_quantity#85, ws_list_price#86, i_brand_id#89, i_class_id#90, i_category_id#91]
 Keys [3]: [i_brand_id#89, i_class_id#90, i_category_id#91]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#85 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#93, isEmpty#94, count#95]
 Results [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98]
 
@@ -563,9 +563,9 @@ Arguments: hashpartitioning(i_brand_id#89, i_class_id#90, i_category_id#91, 5),
 (99) HashAggregate [codegen id : 78]
 Input [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98]
 Keys [3]: [i_brand_id#89, i_class_id#90, i_category_id#91]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#85 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#85 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true))#100, count(1)#101]
-Results [5]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#85 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true))#100 AS sales#102, count(1)#101 AS number_sales#103]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true))#100, count(1)#101]
+Results [5]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true))#100 AS sales#102, count(1)#101 AS number_sales#103]
 
 (100) Filter [codegen id : 78]
 Input [5]: [i_brand_id#89, i_class_id#90, i_category_id#91, sales#102, number_sales#103]
@@ -698,7 +698,7 @@ Input [4]: [ws_quantity#133, ws_list_price#134, ws_sold_date_sk#135, d_date_sk#1
 (124) HashAggregate [codegen id : 7]
 Input [2]: [quantity#125, list_price#126]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#125 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [2]: [sum#139, count#140]
 Results [2]: [sum#141, count#142]
 
@@ -709,9 +709,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#143]
 (126) HashAggregate [codegen id : 8]
 Input [2]: [sum#141, count#142]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#125 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#125 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2), true))#144]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#125 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2), true))#144 AS average_sales#145]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2), true))#144]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2), true))#144 AS average_sales#145]
 
 Subquery:2 Hosting operator id = 108 Hosting Expression = ss_sold_date_sk#123 IN dynamicpruning#12
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt
index 34d892c264062..521d5b34ea0e8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt
@@ -13,7 +13,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                         Filter [sales]
                           Subquery #3
                             WholeStageCodegen (8)
-                              HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
+                              HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
                                 InputAdapter
                                   Exchange #13
                                     WholeStageCodegen (7)
@@ -47,7 +47,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                         ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #7
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #2
                                 WholeStageCodegen (25)
@@ -168,7 +168,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                       Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sales]
                           ReusedSubquery [average_sales] #3
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #14
                                 WholeStageCodegen (51)
@@ -193,7 +193,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                       Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sales]
                           ReusedSubquery [average_sales] #3
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #15
                                 WholeStageCodegen (77)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt
index 3a62afcce3e31..ff2e3984e6dd2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt
@@ -453,7 +453,7 @@ Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#48, i_brand_
 (78) HashAggregate [codegen id : 45]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#49, i_class_id#50, i_category_id#51]
 Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#54, isEmpty#55, count#56]
 Results [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
 
@@ -464,9 +464,9 @@ Arguments: hashpartitioning(i_brand_id#49, i_class_id#50, i_category_id#51, 5),
 (80) HashAggregate [codegen id : 92]
 Input [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
 Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62]
-Results [6]: [store AS channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#64, count(1)#62 AS number_sales#65]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62]
+Results [6]: [store AS channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#64, count(1)#62 AS number_sales#65]
 
 (81) Filter [codegen id : 92]
 Input [6]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sales#64, number_sales#65]
@@ -534,7 +534,7 @@ Input [7]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70, i_item_sk#75, i_bra
 (96) HashAggregate [codegen id : 90]
 Input [5]: [ss_quantity#69, ss_list_price#70, i_brand_id#76, i_class_id#77, i_category_id#78]
 Keys [3]: [i_brand_id#76, i_class_id#77, i_category_id#78]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#79, isEmpty#80, count#81]
 Results [6]: [i_brand_id#76, i_class_id#77, i_category_id#78, sum#82, isEmpty#83, count#84]
 
@@ -545,9 +545,9 @@ Arguments: hashpartitioning(i_brand_id#76, i_class_id#77, i_category_id#78, 5),
 (98) HashAggregate [codegen id : 91]
 Input [6]: [i_brand_id#76, i_class_id#77, i_category_id#78, sum#82, isEmpty#83, count#84]
 Keys [3]: [i_brand_id#76, i_class_id#77, i_category_id#78]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#86, count(1)#87]
-Results [6]: [store AS channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#86 AS sales#89, count(1)#87 AS number_sales#90]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#86, count(1)#87]
+Results [6]: [store AS channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#86 AS sales#89, count(1)#87 AS number_sales#90]
 
 (99) Filter [codegen id : 91]
 Input [6]: [channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sales#89, number_sales#90]
@@ -661,7 +661,7 @@ Input [4]: [ws_quantity#104, ws_list_price#105, ws_sold_date_sk#106, d_date_sk#1
 (119) HashAggregate [codegen id : 7]
 Input [2]: [quantity#96, list_price#97]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#96 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [2]: [sum#110, count#111]
 Results [2]: [sum#112, count#113]
 
@@ -672,9 +672,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#114]
 (121) HashAggregate [codegen id : 8]
 Input [2]: [sum#112, count#113]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#96 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#96 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))#115]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#96 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))#115 AS average_sales#116]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))#115]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))#115 AS average_sales#116]
 
 Subquery:2 Hosting operator id = 103 Hosting Expression = ss_sold_date_sk#94 IN dynamicpruning#13
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt
index 695a7c13381d8..7c193e479a013 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt
@@ -4,7 +4,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
       Filter [sales]
         Subquery #4
           WholeStageCodegen (8)
-            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
+            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
               InputAdapter
                 Exchange #17
                   WholeStageCodegen (7)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #9
-        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
           InputAdapter
             Exchange [i_brand_id,i_class_id,i_category_id] #1
               WholeStageCodegen (45)
@@ -206,7 +206,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
           WholeStageCodegen (91)
             Filter [sales]
               ReusedSubquery [average_sales] #4
-              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                 InputAdapter
                   Exchange [i_brand_id,i_class_id,i_category_id] #19
                     WholeStageCodegen (90)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt
index ae5cf49cbb21b..254c73a9e8884 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt
@@ -385,7 +385,7 @@ Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_
 (65) HashAggregate [codegen id : 25]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#49, isEmpty#50, count#51]
 Results [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 
@@ -396,9 +396,9 @@ Arguments: hashpartitioning(i_brand_id#44, i_class_id#45, i_category_id#46, 5),
 (67) HashAggregate [codegen id : 52]
 Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56, count(1)#57]
-Results [6]: [store AS channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56 AS sales#59, count(1)#57 AS number_sales#60]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56, count(1)#57]
+Results [6]: [store AS channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56 AS sales#59, count(1)#57 AS number_sales#60]
 
 (68) Filter [codegen id : 52]
 Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60]
@@ -454,7 +454,7 @@ Input [7]: [ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66, i_brand_id#69,
 (80) HashAggregate [codegen id : 50]
 Input [5]: [ss_quantity#64, ss_list_price#65, i_brand_id#69, i_class_id#70, i_category_id#71]
 Keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#73, isEmpty#74, count#75]
 Results [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78]
 
@@ -465,9 +465,9 @@ Arguments: hashpartitioning(i_brand_id#69, i_class_id#70, i_category_id#71, 5),
 (82) HashAggregate [codegen id : 51]
 Input [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78]
 Keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#80, count(1)#81]
-Results [6]: [store AS channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#80 AS sales#83, count(1)#81 AS number_sales#84]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#80, count(1)#81]
+Results [6]: [store AS channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#80 AS sales#83, count(1)#81 AS number_sales#84]
 
 (83) Filter [codegen id : 51]
 Input [6]: [channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84]
@@ -581,7 +581,7 @@ Input [4]: [ws_quantity#98, ws_list_price#99, ws_sold_date_sk#100, d_date_sk#101
 (103) HashAggregate [codegen id : 7]
 Input [2]: [quantity#90, list_price#91]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#90 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [2]: [sum#104, count#105]
 Results [2]: [sum#106, count#107]
 
@@ -592,9 +592,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#108]
 (105) HashAggregate [codegen id : 8]
 Input [2]: [sum#106, count#107]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#90 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#90 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))#109]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#90 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))#109 AS average_sales#110]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))#109]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))#109 AS average_sales#110]
 
 Subquery:2 Hosting operator id = 87 Hosting Expression = ss_sold_date_sk#88 IN dynamicpruning#12
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt
index 2df0810ddba28..15fdf6b0eab16 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt
@@ -4,7 +4,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
       Filter [sales]
         Subquery #4
           WholeStageCodegen (8)
-            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
+            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
               InputAdapter
                 Exchange #12
                   WholeStageCodegen (7)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #6
-        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
           InputAdapter
             Exchange [i_brand_id,i_class_id,i_category_id] #1
               WholeStageCodegen (25)
@@ -167,7 +167,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
           WholeStageCodegen (51)
             Filter [sales]
               ReusedSubquery [average_sales] #4
-              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                 InputAdapter
                   Exchange [i_brand_id,i_class_id,i_category_id] #14
                     WholeStageCodegen (50)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt
index be706fee66776..05c6a35ee7ced 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt
@@ -278,16 +278,16 @@ Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#
 (42) HashAggregate [codegen id : 15]
 Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [2]: [sum#31, isEmpty#32]
 Results [3]: [c_customer_sk#29, sum#33, isEmpty#34]
 
 (43) HashAggregate [codegen id : 15]
 Input [3]: [c_customer_sk#29, sum#33, isEmpty#34]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
 
 (44) Filter [codegen id : 15]
 Input [2]: [c_customer_sk#29, ssales#36]
@@ -319,7 +319,7 @@ Right keys [1]: [d_date_sk#39]
 Join condition: None
 
 (51) Project [codegen id : 17]
-Output [1]: [CheckOverflow((promote_precision(cast(cast(cs_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true) AS sales#40]
+Output [1]: [CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true) AS sales#40]
 Input [4]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, d_date_sk#39]
 
 (52) Scan parquet default.web_sales
@@ -432,16 +432,16 @@ Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#
 (77) HashAggregate [codegen id : 32]
 Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#31, isEmpty#32]
-Results [3]: [c_customer_sk#29, sum#33, isEmpty#34]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#49, isEmpty#50]
+Results [3]: [c_customer_sk#29, sum#51, isEmpty#52]
 
 (78) HashAggregate [codegen id : 32]
-Input [3]: [c_customer_sk#29, sum#33, isEmpty#34]
+Input [3]: [c_customer_sk#29, sum#51, isEmpty#52]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
 
 (79) Filter [codegen id : 32]
 Input [2]: [c_customer_sk#29, ssales#36]
@@ -465,16 +465,16 @@ Output [3]: [ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45]
 Input [4]: [ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45]
 
 (84) ReusedExchange [Reuses operator id: 95]
-Output [1]: [d_date_sk#49]
+Output [1]: [d_date_sk#53]
 
 (85) BroadcastHashJoin [codegen id : 34]
 Left keys [1]: [ws_sold_date_sk#45]
-Right keys [1]: [d_date_sk#49]
+Right keys [1]: [d_date_sk#53]
 Join condition: None
 
 (86) Project [codegen id : 34]
-Output [1]: [CheckOverflow((promote_precision(cast(cast(ws_quantity#43 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#44 as decimal(12,2)))), DecimalType(18,2), true) AS sales#50]
-Input [4]: [ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45, d_date_sk#49]
+Output [1]: [CheckOverflow((promote_precision(cast(ws_quantity#43 as decimal(12,2))) * promote_precision(cast(ws_list_price#44 as decimal(12,2)))), DecimalType(18,2), true) AS sales#54]
+Input [4]: [ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45, d_date_sk#53]
 
 (87) Union
 
@@ -482,19 +482,19 @@ Input [4]: [ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45, d_date_sk#49]
 Input [1]: [sales#40]
 Keys: []
 Functions [1]: [partial_sum(sales#40)]
-Aggregate Attributes [2]: [sum#51, isEmpty#52]
-Results [2]: [sum#53, isEmpty#54]
+Aggregate Attributes [2]: [sum#55, isEmpty#56]
+Results [2]: [sum#57, isEmpty#58]
 
 (89) Exchange
-Input [2]: [sum#53, isEmpty#54]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#55]
+Input [2]: [sum#57, isEmpty#58]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#59]
 
 (90) HashAggregate [codegen id : 36]
-Input [2]: [sum#53, isEmpty#54]
+Input [2]: [sum#57, isEmpty#58]
 Keys: []
 Functions [1]: [sum(sales#40)]
-Aggregate Attributes [1]: [sum(sales#40)#56]
-Results [1]: [sum(sales#40)#56 AS sum(sales)#57]
+Aggregate Attributes [1]: [sum(sales#40)#60]
+Results [1]: [sum(sales#40)#60 AS sum(sales)#61]
 
 ===== Subqueries =====
 
@@ -507,26 +507,26 @@ BroadcastExchange (95)
 
 
 (91) Scan parquet default.date_dim
-Output [3]: [d_date_sk#39, d_year#58, d_moy#59]
+Output [3]: [d_date_sk#39, d_year#62, d_moy#63]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
 (92) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#39, d_year#58, d_moy#59]
+Input [3]: [d_date_sk#39, d_year#62, d_moy#63]
 
 (93) Filter [codegen id : 1]
-Input [3]: [d_date_sk#39, d_year#58, d_moy#59]
-Condition : ((((isnotnull(d_year#58) AND isnotnull(d_moy#59)) AND (d_year#58 = 2000)) AND (d_moy#59 = 2)) AND isnotnull(d_date_sk#39))
+Input [3]: [d_date_sk#39, d_year#62, d_moy#63]
+Condition : ((((isnotnull(d_year#62) AND isnotnull(d_moy#63)) AND (d_year#62 = 2000)) AND (d_moy#63 = 2)) AND isnotnull(d_date_sk#39))
 
 (94) Project [codegen id : 1]
 Output [1]: [d_date_sk#39]
-Input [3]: [d_date_sk#39, d_year#58, d_moy#59]
+Input [3]: [d_date_sk#39, d_year#62, d_moy#63]
 
 (95) BroadcastExchange
 Input [1]: [d_date_sk#39]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#60]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#64]
 
 Subquery:2 Hosting operator id = 5 Hosting Expression = ss_sold_date_sk#9 IN dynamicpruning#10
 BroadcastExchange (100)
@@ -537,26 +537,26 @@ BroadcastExchange (100)
 
 
 (96) Scan parquet default.date_dim
-Output [3]: [d_date_sk#11, d_date#12, d_year#61]
+Output [3]: [d_date_sk#11, d_date#12, d_year#65]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date,d_year:int>
 
 (97) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#11, d_date#12, d_year#61]
+Input [3]: [d_date_sk#11, d_date#12, d_year#65]
 
 (98) Filter [codegen id : 1]
-Input [3]: [d_date_sk#11, d_date#12, d_year#61]
-Condition : (d_year#61 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#11))
+Input [3]: [d_date_sk#11, d_date#12, d_year#65]
+Condition : (d_year#65 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#11))
 
 (99) Project [codegen id : 1]
 Output [2]: [d_date_sk#11, d_date#12]
-Input [3]: [d_date_sk#11, d_date#12, d_year#61]
+Input [3]: [d_date_sk#11, d_date#12, d_year#65]
 
 (100) BroadcastExchange
 Input [2]: [d_date_sk#11, d_date#12]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#62]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#66]
 
 Subquery:3 Hosting operator id = 44 Hosting Expression = Subquery scalar-subquery#37, [id=#38]
 * HashAggregate (117)
@@ -579,89 +579,89 @@ Subquery:3 Hosting operator id = 44 Hosting Expression = Subquery scalar-subquer
 
 
 (101) Scan parquet default.store_sales
-Output [4]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66]
+Output [4]: [ss_customer_sk#67, ss_quantity#68, ss_sales_price#69, ss_sold_date_sk#70]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#66), dynamicpruningexpression(ss_sold_date_sk#66 IN dynamicpruning#67)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#70), dynamicpruningexpression(ss_sold_date_sk#70 IN dynamicpruning#71)]
 PushedFilters: [IsNotNull(ss_customer_sk)]
 ReadSchema: struct<ss_customer_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2)>
 
 (102) ColumnarToRow [codegen id : 2]
-Input [4]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66]
+Input [4]: [ss_customer_sk#67, ss_quantity#68, ss_sales_price#69, ss_sold_date_sk#70]
 
 (103) Filter [codegen id : 2]
-Input [4]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66]
-Condition : isnotnull(ss_customer_sk#63)
+Input [4]: [ss_customer_sk#67, ss_quantity#68, ss_sales_price#69, ss_sold_date_sk#70]
+Condition : isnotnull(ss_customer_sk#67)
 
 (104) ReusedExchange [Reuses operator id: 122]
-Output [1]: [d_date_sk#68]
+Output [1]: [d_date_sk#72]
 
 (105) BroadcastHashJoin [codegen id : 2]
-Left keys [1]: [ss_sold_date_sk#66]
-Right keys [1]: [d_date_sk#68]
+Left keys [1]: [ss_sold_date_sk#70]
+Right keys [1]: [d_date_sk#72]
 Join condition: None
 
 (106) Project [codegen id : 2]
-Output [3]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65]
-Input [5]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66, d_date_sk#68]
+Output [3]: [ss_customer_sk#67, ss_quantity#68, ss_sales_price#69]
+Input [5]: [ss_customer_sk#67, ss_quantity#68, ss_sales_price#69, ss_sold_date_sk#70, d_date_sk#72]
 
 (107) Exchange
-Input [3]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65]
-Arguments: hashpartitioning(ss_customer_sk#63, 5), ENSURE_REQUIREMENTS, [id=#69]
+Input [3]: [ss_customer_sk#67, ss_quantity#68, ss_sales_price#69]
+Arguments: hashpartitioning(ss_customer_sk#67, 5), ENSURE_REQUIREMENTS, [id=#73]
 
 (108) Sort [codegen id : 3]
-Input [3]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65]
-Arguments: [ss_customer_sk#63 ASC NULLS FIRST], false, 0
+Input [3]: [ss_customer_sk#67, ss_quantity#68, ss_sales_price#69]
+Arguments: [ss_customer_sk#67 ASC NULLS FIRST], false, 0
 
 (109) ReusedExchange [Reuses operator id: 38]
-Output [1]: [c_customer_sk#70]
+Output [1]: [c_customer_sk#74]
 
 (110) Sort [codegen id : 5]
-Input [1]: [c_customer_sk#70]
-Arguments: [c_customer_sk#70 ASC NULLS FIRST], false, 0
+Input [1]: [c_customer_sk#74]
+Arguments: [c_customer_sk#74 ASC NULLS FIRST], false, 0
 
 (111) SortMergeJoin [codegen id : 6]
-Left keys [1]: [ss_customer_sk#63]
-Right keys [1]: [c_customer_sk#70]
+Left keys [1]: [ss_customer_sk#67]
+Right keys [1]: [c_customer_sk#74]
 Join condition: None
 
 (112) Project [codegen id : 6]
-Output [3]: [ss_quantity#64, ss_sales_price#65, c_customer_sk#70]
-Input [4]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, c_customer_sk#70]
+Output [3]: [ss_quantity#68, ss_sales_price#69, c_customer_sk#74]
+Input [4]: [ss_customer_sk#67, ss_quantity#68, ss_sales_price#69, c_customer_sk#74]
 
 (113) HashAggregate [codegen id : 6]
-Input [3]: [ss_quantity#64, ss_sales_price#65, c_customer_sk#70]
-Keys [1]: [c_customer_sk#70]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#71, isEmpty#72]
-Results [3]: [c_customer_sk#70, sum#73, isEmpty#74]
+Input [3]: [ss_quantity#68, ss_sales_price#69, c_customer_sk#74]
+Keys [1]: [c_customer_sk#74]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_sales_price#69 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#75, isEmpty#76]
+Results [3]: [c_customer_sk#74, sum#77, isEmpty#78]
 
 (114) HashAggregate [codegen id : 6]
-Input [3]: [c_customer_sk#70, sum#73, isEmpty#74]
-Keys [1]: [c_customer_sk#70]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2), true))#75]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2), true))#75 AS csales#76]
+Input [3]: [c_customer_sk#74, sum#77, isEmpty#78]
+Keys [1]: [c_customer_sk#74]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_sales_price#69 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_sales_price#69 as decimal(12,2)))), DecimalType(18,2), true))#79]
+Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_sales_price#69 as decimal(12,2)))), DecimalType(18,2), true))#79 AS csales#80]
 
 (115) HashAggregate [codegen id : 6]
-Input [1]: [csales#76]
+Input [1]: [csales#80]
 Keys: []
-Functions [1]: [partial_max(csales#76)]
-Aggregate Attributes [1]: [max#77]
-Results [1]: [max#78]
+Functions [1]: [partial_max(csales#80)]
+Aggregate Attributes [1]: [max#81]
+Results [1]: [max#82]
 
 (116) Exchange
-Input [1]: [max#78]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#79]
+Input [1]: [max#82]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#83]
 
 (117) HashAggregate [codegen id : 7]
-Input [1]: [max#78]
+Input [1]: [max#82]
 Keys: []
-Functions [1]: [max(csales#76)]
-Aggregate Attributes [1]: [max(csales#76)#80]
-Results [1]: [max(csales#76)#80 AS tpcds_cmax#81]
+Functions [1]: [max(csales#80)]
+Aggregate Attributes [1]: [max(csales#80)#84]
+Results [1]: [max(csales#80)#84 AS tpcds_cmax#85]
 
-Subquery:4 Hosting operator id = 101 Hosting Expression = ss_sold_date_sk#66 IN dynamicpruning#67
+Subquery:4 Hosting operator id = 101 Hosting Expression = ss_sold_date_sk#70 IN dynamicpruning#71
 BroadcastExchange (122)
 +- * Project (121)
    +- * Filter (120)
@@ -670,26 +670,26 @@ BroadcastExchange (122)
 
 
 (118) Scan parquet default.date_dim
-Output [2]: [d_date_sk#68, d_year#82]
+Output [2]: [d_date_sk#72, d_year#86]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
 (119) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#68, d_year#82]
+Input [2]: [d_date_sk#72, d_year#86]
 
 (120) Filter [codegen id : 1]
-Input [2]: [d_date_sk#68, d_year#82]
-Condition : (d_year#82 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#68))
+Input [2]: [d_date_sk#72, d_year#86]
+Condition : (d_year#86 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#72))
 
 (121) Project [codegen id : 1]
-Output [1]: [d_date_sk#68]
-Input [2]: [d_date_sk#68, d_year#82]
+Output [1]: [d_date_sk#72]
+Input [2]: [d_date_sk#72, d_year#86]
 
 (122) BroadcastExchange
-Input [1]: [d_date_sk#68]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#83]
+Input [1]: [d_date_sk#72]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#87]
 
 Subquery:5 Hosting operator id = 52 Hosting Expression = ws_sold_date_sk#45 IN dynamicpruning#6
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt
index 17377b91326fd..7fcf4ef29d66a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt
@@ -89,7 +89,7 @@ WholeStageCodegen (36)
                                             Exchange #10
                                               WholeStageCodegen (6)
                                                 HashAggregate [csales] [max,max]
-                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
+                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
                                                     HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                                       Project [ss_quantity,ss_sales_price,c_customer_sk]
                                                         SortMergeJoin [ss_customer_sk,c_customer_sk]
@@ -120,7 +120,7 @@ WholeStageCodegen (36)
                                                               Sort [c_customer_sk]
                                                                 InputAdapter
                                                                   ReusedExchange [c_customer_sk] #9
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
                                       HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                         Project [ss_quantity,ss_sales_price,c_customer_sk]
                                           SortMergeJoin [ss_customer_sk,c_customer_sk]
@@ -195,7 +195,7 @@ WholeStageCodegen (36)
                                 Project [c_customer_sk]
                                   Filter [ssales]
                                     ReusedSubquery [tpcds_cmax] #3
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
                                       HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                         Project [ss_quantity,ss_sales_price,c_customer_sk]
                                           SortMergeJoin [ss_customer_sk,c_customer_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt
index 1de23e1f4d2ab..8b5ac41195ab8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt
@@ -226,7 +226,7 @@ Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#
 (35) HashAggregate [codegen id : 8]
 Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
 Keys [1]: [c_customer_sk#28]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [2]: [sum#30, isEmpty#31]
 Results [3]: [c_customer_sk#28, sum#32, isEmpty#33]
 
@@ -237,9 +237,9 @@ Arguments: hashpartitioning(c_customer_sk#28, 5), ENSURE_REQUIREMENTS, [id=#34]
 (37) HashAggregate [codegen id : 9]
 Input [3]: [c_customer_sk#28, sum#32, isEmpty#33]
 Keys [1]: [c_customer_sk#28]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
 
 (38) Filter [codegen id : 9]
 Input [2]: [c_customer_sk#28, ssales#36]
@@ -271,7 +271,7 @@ Right keys [1]: [d_date_sk#39]
 Join condition: None
 
 (45) Project [codegen id : 11]
-Output [1]: [CheckOverflow((promote_precision(cast(cast(cs_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true) AS sales#40]
+Output [1]: [CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true) AS sales#40]
 Input [4]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, d_date_sk#39]
 
 (46) Scan parquet default.web_sales
@@ -305,14 +305,14 @@ Input [4]: [ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_da
 Arguments: [ws_bill_customer_sk#42 ASC NULLS FIRST], false, 0
 
 (53) ReusedExchange [Reuses operator id: 36]
-Output [3]: [c_customer_sk#28, sum#32, isEmpty#33]
+Output [3]: [c_customer_sk#28, sum#47, isEmpty#48]
 
 (54) HashAggregate [codegen id : 20]
-Input [3]: [c_customer_sk#28, sum#32, isEmpty#33]
+Input [3]: [c_customer_sk#28, sum#47, isEmpty#48]
 Keys [1]: [c_customer_sk#28]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
 
 (55) Filter [codegen id : 20]
 Input [2]: [c_customer_sk#28, ssales#36]
@@ -336,16 +336,16 @@ Output [3]: [ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45]
 Input [4]: [ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45]
 
 (60) ReusedExchange [Reuses operator id: 71]
-Output [1]: [d_date_sk#47]
+Output [1]: [d_date_sk#49]
 
 (61) BroadcastHashJoin [codegen id : 22]
 Left keys [1]: [ws_sold_date_sk#45]
-Right keys [1]: [d_date_sk#47]
+Right keys [1]: [d_date_sk#49]
 Join condition: None
 
 (62) Project [codegen id : 22]
-Output [1]: [CheckOverflow((promote_precision(cast(cast(ws_quantity#43 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#44 as decimal(12,2)))), DecimalType(18,2), true) AS sales#48]
-Input [4]: [ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45, d_date_sk#47]
+Output [1]: [CheckOverflow((promote_precision(cast(ws_quantity#43 as decimal(12,2))) * promote_precision(cast(ws_list_price#44 as decimal(12,2)))), DecimalType(18,2), true) AS sales#50]
+Input [4]: [ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45, d_date_sk#49]
 
 (63) Union
 
@@ -353,19 +353,19 @@ Input [4]: [ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45, d_date_sk#47]
 Input [1]: [sales#40]
 Keys: []
 Functions [1]: [partial_sum(sales#40)]
-Aggregate Attributes [2]: [sum#49, isEmpty#50]
-Results [2]: [sum#51, isEmpty#52]
+Aggregate Attributes [2]: [sum#51, isEmpty#52]
+Results [2]: [sum#53, isEmpty#54]
 
 (65) Exchange
-Input [2]: [sum#51, isEmpty#52]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#53]
+Input [2]: [sum#53, isEmpty#54]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#55]
 
 (66) HashAggregate [codegen id : 24]
-Input [2]: [sum#51, isEmpty#52]
+Input [2]: [sum#53, isEmpty#54]
 Keys: []
 Functions [1]: [sum(sales#40)]
-Aggregate Attributes [1]: [sum(sales#40)#54]
-Results [1]: [sum(sales#40)#54 AS sum(sales)#55]
+Aggregate Attributes [1]: [sum(sales#40)#56]
+Results [1]: [sum(sales#40)#56 AS sum(sales)#57]
 
 ===== Subqueries =====
 
@@ -378,26 +378,26 @@ BroadcastExchange (71)
 
 
 (67) Scan parquet default.date_dim
-Output [3]: [d_date_sk#39, d_year#56, d_moy#57]
+Output [3]: [d_date_sk#39, d_year#58, d_moy#59]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
 (68) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#39, d_year#56, d_moy#57]
+Input [3]: [d_date_sk#39, d_year#58, d_moy#59]
 
 (69) Filter [codegen id : 1]
-Input [3]: [d_date_sk#39, d_year#56, d_moy#57]
-Condition : ((((isnotnull(d_year#56) AND isnotnull(d_moy#57)) AND (d_year#56 = 2000)) AND (d_moy#57 = 2)) AND isnotnull(d_date_sk#39))
+Input [3]: [d_date_sk#39, d_year#58, d_moy#59]
+Condition : ((((isnotnull(d_year#58) AND isnotnull(d_moy#59)) AND (d_year#58 = 2000)) AND (d_moy#59 = 2)) AND isnotnull(d_date_sk#39))
 
 (70) Project [codegen id : 1]
 Output [1]: [d_date_sk#39]
-Input [3]: [d_date_sk#39, d_year#56, d_moy#57]
+Input [3]: [d_date_sk#39, d_year#58, d_moy#59]
 
 (71) BroadcastExchange
 Input [1]: [d_date_sk#39]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#58]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#60]
 
 Subquery:2 Hosting operator id = 3 Hosting Expression = ss_sold_date_sk#8 IN dynamicpruning#9
 BroadcastExchange (76)
@@ -408,26 +408,26 @@ BroadcastExchange (76)
 
 
 (72) Scan parquet default.date_dim
-Output [3]: [d_date_sk#10, d_date#11, d_year#59]
+Output [3]: [d_date_sk#10, d_date#11, d_year#61]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date,d_year:int>
 
 (73) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#10, d_date#11, d_year#59]
+Input [3]: [d_date_sk#10, d_date#11, d_year#61]
 
 (74) Filter [codegen id : 1]
-Input [3]: [d_date_sk#10, d_date#11, d_year#59]
-Condition : (d_year#59 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#10))
+Input [3]: [d_date_sk#10, d_date#11, d_year#61]
+Condition : (d_year#61 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#10))
 
 (75) Project [codegen id : 1]
 Output [2]: [d_date_sk#10, d_date#11]
-Input [3]: [d_date_sk#10, d_date#11, d_year#59]
+Input [3]: [d_date_sk#10, d_date#11, d_year#61]
 
 (76) BroadcastExchange
 Input [2]: [d_date_sk#10, d_date#11]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#60]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#62]
 
 Subquery:3 Hosting operator id = 38 Hosting Expression = Subquery scalar-subquery#37, [id=#38]
 * HashAggregate (91)
@@ -448,81 +448,81 @@ Subquery:3 Hosting operator id = 38 Hosting Expression = Subquery scalar-subquer
 
 
 (77) Scan parquet default.store_sales
-Output [4]: [ss_customer_sk#61, ss_quantity#62, ss_sales_price#63, ss_sold_date_sk#64]
+Output [4]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#64), dynamicpruningexpression(ss_sold_date_sk#64 IN dynamicpruning#65)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#66), dynamicpruningexpression(ss_sold_date_sk#66 IN dynamicpruning#67)]
 PushedFilters: [IsNotNull(ss_customer_sk)]
 ReadSchema: struct<ss_customer_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2)>
 
 (78) ColumnarToRow [codegen id : 3]
-Input [4]: [ss_customer_sk#61, ss_quantity#62, ss_sales_price#63, ss_sold_date_sk#64]
+Input [4]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66]
 
 (79) Filter [codegen id : 3]
-Input [4]: [ss_customer_sk#61, ss_quantity#62, ss_sales_price#63, ss_sold_date_sk#64]
-Condition : isnotnull(ss_customer_sk#61)
+Input [4]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66]
+Condition : isnotnull(ss_customer_sk#63)
 
 (80) ReusedExchange [Reuses operator id: 32]
-Output [1]: [c_customer_sk#66]
+Output [1]: [c_customer_sk#68]
 
 (81) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [ss_customer_sk#61]
-Right keys [1]: [c_customer_sk#66]
+Left keys [1]: [ss_customer_sk#63]
+Right keys [1]: [c_customer_sk#68]
 Join condition: None
 
 (82) Project [codegen id : 3]
-Output [4]: [ss_quantity#62, ss_sales_price#63, ss_sold_date_sk#64, c_customer_sk#66]
-Input [5]: [ss_customer_sk#61, ss_quantity#62, ss_sales_price#63, ss_sold_date_sk#64, c_customer_sk#66]
+Output [4]: [ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66, c_customer_sk#68]
+Input [5]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66, c_customer_sk#68]
 
 (83) ReusedExchange [Reuses operator id: 96]
-Output [1]: [d_date_sk#67]
+Output [1]: [d_date_sk#69]
 
 (84) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [ss_sold_date_sk#64]
-Right keys [1]: [d_date_sk#67]
+Left keys [1]: [ss_sold_date_sk#66]
+Right keys [1]: [d_date_sk#69]
 Join condition: None
 
 (85) Project [codegen id : 3]
-Output [3]: [ss_quantity#62, ss_sales_price#63, c_customer_sk#66]
-Input [5]: [ss_quantity#62, ss_sales_price#63, ss_sold_date_sk#64, c_customer_sk#66, d_date_sk#67]
+Output [3]: [ss_quantity#64, ss_sales_price#65, c_customer_sk#68]
+Input [5]: [ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66, c_customer_sk#68, d_date_sk#69]
 
 (86) HashAggregate [codegen id : 3]
-Input [3]: [ss_quantity#62, ss_sales_price#63, c_customer_sk#66]
-Keys [1]: [c_customer_sk#66]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#62 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#63 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#68, isEmpty#69]
-Results [3]: [c_customer_sk#66, sum#70, isEmpty#71]
+Input [3]: [ss_quantity#64, ss_sales_price#65, c_customer_sk#68]
+Keys [1]: [c_customer_sk#68]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#70, isEmpty#71]
+Results [3]: [c_customer_sk#68, sum#72, isEmpty#73]
 
 (87) Exchange
-Input [3]: [c_customer_sk#66, sum#70, isEmpty#71]
-Arguments: hashpartitioning(c_customer_sk#66, 5), ENSURE_REQUIREMENTS, [id=#72]
+Input [3]: [c_customer_sk#68, sum#72, isEmpty#73]
+Arguments: hashpartitioning(c_customer_sk#68, 5), ENSURE_REQUIREMENTS, [id=#74]
 
 (88) HashAggregate [codegen id : 4]
-Input [3]: [c_customer_sk#66, sum#70, isEmpty#71]
-Keys [1]: [c_customer_sk#66]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#62 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#63 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#62 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#63 as decimal(12,2)))), DecimalType(18,2), true))#73]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#62 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#63 as decimal(12,2)))), DecimalType(18,2), true))#73 AS csales#74]
+Input [3]: [c_customer_sk#68, sum#72, isEmpty#73]
+Keys [1]: [c_customer_sk#68]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2), true))#75]
+Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2), true))#75 AS csales#76]
 
 (89) HashAggregate [codegen id : 4]
-Input [1]: [csales#74]
+Input [1]: [csales#76]
 Keys: []
-Functions [1]: [partial_max(csales#74)]
-Aggregate Attributes [1]: [max#75]
-Results [1]: [max#76]
+Functions [1]: [partial_max(csales#76)]
+Aggregate Attributes [1]: [max#77]
+Results [1]: [max#78]
 
 (90) Exchange
-Input [1]: [max#76]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#77]
+Input [1]: [max#78]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#79]
 
 (91) HashAggregate [codegen id : 5]
-Input [1]: [max#76]
+Input [1]: [max#78]
 Keys: []
-Functions [1]: [max(csales#74)]
-Aggregate Attributes [1]: [max(csales#74)#78]
-Results [1]: [max(csales#74)#78 AS tpcds_cmax#79]
+Functions [1]: [max(csales#76)]
+Aggregate Attributes [1]: [max(csales#76)#80]
+Results [1]: [max(csales#76)#80 AS tpcds_cmax#81]
 
-Subquery:4 Hosting operator id = 77 Hosting Expression = ss_sold_date_sk#64 IN dynamicpruning#65
+Subquery:4 Hosting operator id = 77 Hosting Expression = ss_sold_date_sk#66 IN dynamicpruning#67
 BroadcastExchange (96)
 +- * Project (95)
    +- * Filter (94)
@@ -531,26 +531,26 @@ BroadcastExchange (96)
 
 
 (92) Scan parquet default.date_dim
-Output [2]: [d_date_sk#67, d_year#80]
+Output [2]: [d_date_sk#69, d_year#82]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
 (93) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#67, d_year#80]
+Input [2]: [d_date_sk#69, d_year#82]
 
 (94) Filter [codegen id : 1]
-Input [2]: [d_date_sk#67, d_year#80]
-Condition : (d_year#80 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#67))
+Input [2]: [d_date_sk#69, d_year#82]
+Condition : (d_year#82 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#69))
 
 (95) Project [codegen id : 1]
-Output [1]: [d_date_sk#67]
-Input [2]: [d_date_sk#67, d_year#80]
+Output [1]: [d_date_sk#69]
+Input [2]: [d_date_sk#69, d_year#82]
 
 (96) BroadcastExchange
-Input [1]: [d_date_sk#67]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#81]
+Input [1]: [d_date_sk#69]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#83]
 
 Subquery:5 Hosting operator id = 46 Hosting Expression = ws_sold_date_sk#45 IN dynamicpruning#6
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt
index 5c5a8a7fe425f..dfa1ee1f4fe66 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt
@@ -77,7 +77,7 @@ WholeStageCodegen (24)
                                             Exchange #10
                                               WholeStageCodegen (4)
                                                 HashAggregate [csales] [max,max]
-                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
+                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
                                                     InputAdapter
                                                       Exchange [c_customer_sk] #11
                                                         WholeStageCodegen (3)
@@ -102,7 +102,7 @@ WholeStageCodegen (24)
                                                                       ReusedExchange [c_customer_sk] #9
                                                                 InputAdapter
                                                                   ReusedExchange [d_date_sk] #12
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
                                       InputAdapter
                                         Exchange [c_customer_sk] #8
                                           WholeStageCodegen (8)
@@ -148,7 +148,7 @@ WholeStageCodegen (24)
                                 Project [c_customer_sk]
                                   Filter [ssales]
                                     ReusedSubquery [tpcds_cmax] #3
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
                                       InputAdapter
                                         ReusedExchange [c_customer_sk,sum,isEmpty] #8
                       InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt
index 638f5ec3ded62..b99458d82af0c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt
@@ -322,16 +322,16 @@ Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#
 (43) HashAggregate [codegen id : 15]
 Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [2]: [sum#31, isEmpty#32]
 Results [3]: [c_customer_sk#29, sum#33, isEmpty#34]
 
 (44) HashAggregate [codegen id : 15]
 Input [3]: [c_customer_sk#29, sum#33, isEmpty#34]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
 
 (45) Filter [codegen id : 15]
 Input [2]: [c_customer_sk#29, ssales#36]
@@ -410,16 +410,16 @@ Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#
 (63) HashAggregate [codegen id : 24]
 Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [2]: [sum#31, isEmpty#32]
 Results [3]: [c_customer_sk#29, sum#33, isEmpty#34]
 
 (64) HashAggregate [codegen id : 24]
 Input [3]: [c_customer_sk#29, sum#33, isEmpty#34]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
 
 (65) Filter [codegen id : 24]
 Input [2]: [c_customer_sk#29, ssales#36]
@@ -450,7 +450,7 @@ Input [6]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, c_customer_sk
 (71) HashAggregate [codegen id : 26]
 Input [4]: [cs_quantity#3, cs_list_price#4, c_first_name#41, c_last_name#42]
 Keys [2]: [c_last_name#42, c_first_name#41]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [2]: [sum#44, isEmpty#45]
 Results [4]: [c_last_name#42, c_first_name#41, sum#46, isEmpty#47]
 
@@ -461,9 +461,9 @@ Arguments: hashpartitioning(c_last_name#42, c_first_name#41, 5), ENSURE_REQUIREM
 (73) HashAggregate [codegen id : 27]
 Input [4]: [c_last_name#42, c_first_name#41, sum#46, isEmpty#47]
 Keys [2]: [c_last_name#42, c_first_name#41]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#49]
-Results [3]: [c_last_name#42, c_first_name#41, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#49 AS sales#50]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#49]
+Results [3]: [c_last_name#42, c_first_name#41, sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#49 AS sales#50]
 
 (74) Scan parquet default.web_sales
 Output [5]: [ws_item_sk#51, ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, ws_sold_date_sk#55]
@@ -580,16 +580,16 @@ Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#
 (100) HashAggregate [codegen id : 42]
 Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#31, isEmpty#32]
-Results [3]: [c_customer_sk#29, sum#33, isEmpty#34]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#59, isEmpty#60]
+Results [3]: [c_customer_sk#29, sum#61, isEmpty#62]
 
 (101) HashAggregate [codegen id : 42]
-Input [3]: [c_customer_sk#29, sum#33, isEmpty#34]
+Input [3]: [c_customer_sk#29, sum#61, isEmpty#62]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
 
 (102) Filter [codegen id : 42]
 Input [2]: [c_customer_sk#29, ssales#36]
@@ -609,23 +609,23 @@ Right keys [1]: [c_customer_sk#29]
 Join condition: None
 
 (106) ReusedExchange [Reuses operator id: 134]
-Output [1]: [d_date_sk#59]
+Output [1]: [d_date_sk#63]
 
 (107) BroadcastHashJoin [codegen id : 44]
 Left keys [1]: [ws_sold_date_sk#55]
-Right keys [1]: [d_date_sk#59]
+Right keys [1]: [d_date_sk#63]
 Join condition: None
 
 (108) Project [codegen id : 44]
 Output [3]: [ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54]
-Input [5]: [ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, ws_sold_date_sk#55, d_date_sk#59]
+Input [5]: [ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, ws_sold_date_sk#55, d_date_sk#63]
 
 (109) ReusedExchange [Reuses operator id: 55]
-Output [3]: [c_customer_sk#60, c_first_name#61, c_last_name#62]
+Output [3]: [c_customer_sk#64, c_first_name#65, c_last_name#66]
 
 (110) Sort [codegen id : 46]
-Input [3]: [c_customer_sk#60, c_first_name#61, c_last_name#62]
-Arguments: [c_customer_sk#60 ASC NULLS FIRST], false, 0
+Input [3]: [c_customer_sk#64, c_first_name#65, c_last_name#66]
+Arguments: [c_customer_sk#64 ASC NULLS FIRST], false, 0
 
 (111) ReusedExchange [Reuses operator id: 34]
 Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
@@ -653,16 +653,16 @@ Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#
 (117) HashAggregate [codegen id : 51]
 Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#31, isEmpty#32]
-Results [3]: [c_customer_sk#29, sum#33, isEmpty#34]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#59, isEmpty#60]
+Results [3]: [c_customer_sk#29, sum#61, isEmpty#62]
 
 (118) HashAggregate [codegen id : 51]
-Input [3]: [c_customer_sk#29, sum#33, isEmpty#34]
+Input [3]: [c_customer_sk#29, sum#61, isEmpty#62]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
 
 (119) Filter [codegen id : 51]
 Input [2]: [c_customer_sk#29, ssales#36]
@@ -677,36 +677,36 @@ Input [1]: [c_customer_sk#29]
 Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
 
 (122) SortMergeJoin [codegen id : 52]
-Left keys [1]: [c_customer_sk#60]
+Left keys [1]: [c_customer_sk#64]
 Right keys [1]: [c_customer_sk#29]
 Join condition: None
 
 (123) SortMergeJoin [codegen id : 53]
 Left keys [1]: [ws_bill_customer_sk#52]
-Right keys [1]: [c_customer_sk#60]
+Right keys [1]: [c_customer_sk#64]
 Join condition: None
 
 (124) Project [codegen id : 53]
-Output [4]: [ws_quantity#53, ws_list_price#54, c_first_name#61, c_last_name#62]
-Input [6]: [ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, c_customer_sk#60, c_first_name#61, c_last_name#62]
+Output [4]: [ws_quantity#53, ws_list_price#54, c_first_name#65, c_last_name#66]
+Input [6]: [ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, c_customer_sk#64, c_first_name#65, c_last_name#66]
 
 (125) HashAggregate [codegen id : 53]
-Input [4]: [ws_quantity#53, ws_list_price#54, c_first_name#61, c_last_name#62]
-Keys [2]: [c_last_name#62, c_first_name#61]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#53 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#63, isEmpty#64]
-Results [4]: [c_last_name#62, c_first_name#61, sum#65, isEmpty#66]
+Input [4]: [ws_quantity#53, ws_list_price#54, c_first_name#65, c_last_name#66]
+Keys [2]: [c_last_name#66, c_first_name#65]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#67, isEmpty#68]
+Results [4]: [c_last_name#66, c_first_name#65, sum#69, isEmpty#70]
 
 (126) Exchange
-Input [4]: [c_last_name#62, c_first_name#61, sum#65, isEmpty#66]
-Arguments: hashpartitioning(c_last_name#62, c_first_name#61, 5), ENSURE_REQUIREMENTS, [id=#67]
+Input [4]: [c_last_name#66, c_first_name#65, sum#69, isEmpty#70]
+Arguments: hashpartitioning(c_last_name#66, c_first_name#65, 5), ENSURE_REQUIREMENTS, [id=#71]
 
 (127) HashAggregate [codegen id : 54]
-Input [4]: [c_last_name#62, c_first_name#61, sum#65, isEmpty#66]
-Keys [2]: [c_last_name#62, c_first_name#61]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#53 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#53 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2), true))#68]
-Results [3]: [c_last_name#62, c_first_name#61, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#53 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2), true))#68 AS sales#69]
+Input [4]: [c_last_name#66, c_first_name#65, sum#69, isEmpty#70]
+Keys [2]: [c_last_name#66, c_first_name#65]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2), true))#72]
+Results [3]: [c_last_name#66, c_first_name#65, sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2), true))#72 AS sales#73]
 
 (128) Union
 
@@ -725,26 +725,26 @@ BroadcastExchange (134)
 
 
 (130) Scan parquet default.date_dim
-Output [3]: [d_date_sk#39, d_year#70, d_moy#71]
+Output [3]: [d_date_sk#39, d_year#74, d_moy#75]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
 (131) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#39, d_year#70, d_moy#71]
+Input [3]: [d_date_sk#39, d_year#74, d_moy#75]
 
 (132) Filter [codegen id : 1]
-Input [3]: [d_date_sk#39, d_year#70, d_moy#71]
-Condition : ((((isnotnull(d_year#70) AND isnotnull(d_moy#71)) AND (d_year#70 = 2000)) AND (d_moy#71 = 2)) AND isnotnull(d_date_sk#39))
+Input [3]: [d_date_sk#39, d_year#74, d_moy#75]
+Condition : ((((isnotnull(d_year#74) AND isnotnull(d_moy#75)) AND (d_year#74 = 2000)) AND (d_moy#75 = 2)) AND isnotnull(d_date_sk#39))
 
 (133) Project [codegen id : 1]
 Output [1]: [d_date_sk#39]
-Input [3]: [d_date_sk#39, d_year#70, d_moy#71]
+Input [3]: [d_date_sk#39, d_year#74, d_moy#75]
 
 (134) BroadcastExchange
 Input [1]: [d_date_sk#39]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#72]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#76]
 
 Subquery:2 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#9 IN dynamicpruning#10
 BroadcastExchange (139)
@@ -755,26 +755,26 @@ BroadcastExchange (139)
 
 
 (135) Scan parquet default.date_dim
-Output [3]: [d_date_sk#11, d_date#12, d_year#73]
+Output [3]: [d_date_sk#11, d_date#12, d_year#77]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date,d_year:int>
 
 (136) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#11, d_date#12, d_year#73]
+Input [3]: [d_date_sk#11, d_date#12, d_year#77]
 
 (137) Filter [codegen id : 1]
-Input [3]: [d_date_sk#11, d_date#12, d_year#73]
-Condition : (d_year#73 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#11))
+Input [3]: [d_date_sk#11, d_date#12, d_year#77]
+Condition : (d_year#77 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#11))
 
 (138) Project [codegen id : 1]
 Output [2]: [d_date_sk#11, d_date#12]
-Input [3]: [d_date_sk#11, d_date#12, d_year#73]
+Input [3]: [d_date_sk#11, d_date#12, d_year#77]
 
 (139) BroadcastExchange
 Input [2]: [d_date_sk#11, d_date#12]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#74]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#78]
 
 Subquery:3 Hosting operator id = 45 Hosting Expression = Subquery scalar-subquery#37, [id=#38]
 * HashAggregate (156)
@@ -797,89 +797,89 @@ Subquery:3 Hosting operator id = 45 Hosting Expression = Subquery scalar-subquer
 
 
 (140) Scan parquet default.store_sales
-Output [4]: [ss_customer_sk#75, ss_quantity#76, ss_sales_price#77, ss_sold_date_sk#78]
+Output [4]: [ss_customer_sk#79, ss_quantity#80, ss_sales_price#81, ss_sold_date_sk#82]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#78), dynamicpruningexpression(ss_sold_date_sk#78 IN dynamicpruning#79)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#82), dynamicpruningexpression(ss_sold_date_sk#82 IN dynamicpruning#83)]
 PushedFilters: [IsNotNull(ss_customer_sk)]
 ReadSchema: struct<ss_customer_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2)>
 
 (141) ColumnarToRow [codegen id : 2]
-Input [4]: [ss_customer_sk#75, ss_quantity#76, ss_sales_price#77, ss_sold_date_sk#78]
+Input [4]: [ss_customer_sk#79, ss_quantity#80, ss_sales_price#81, ss_sold_date_sk#82]
 
 (142) Filter [codegen id : 2]
-Input [4]: [ss_customer_sk#75, ss_quantity#76, ss_sales_price#77, ss_sold_date_sk#78]
-Condition : isnotnull(ss_customer_sk#75)
+Input [4]: [ss_customer_sk#79, ss_quantity#80, ss_sales_price#81, ss_sold_date_sk#82]
+Condition : isnotnull(ss_customer_sk#79)
 
 (143) ReusedExchange [Reuses operator id: 161]
-Output [1]: [d_date_sk#80]
+Output [1]: [d_date_sk#84]
 
 (144) BroadcastHashJoin [codegen id : 2]
-Left keys [1]: [ss_sold_date_sk#78]
-Right keys [1]: [d_date_sk#80]
+Left keys [1]: [ss_sold_date_sk#82]
+Right keys [1]: [d_date_sk#84]
 Join condition: None
 
 (145) Project [codegen id : 2]
-Output [3]: [ss_customer_sk#75, ss_quantity#76, ss_sales_price#77]
-Input [5]: [ss_customer_sk#75, ss_quantity#76, ss_sales_price#77, ss_sold_date_sk#78, d_date_sk#80]
+Output [3]: [ss_customer_sk#79, ss_quantity#80, ss_sales_price#81]
+Input [5]: [ss_customer_sk#79, ss_quantity#80, ss_sales_price#81, ss_sold_date_sk#82, d_date_sk#84]
 
 (146) Exchange
-Input [3]: [ss_customer_sk#75, ss_quantity#76, ss_sales_price#77]
-Arguments: hashpartitioning(ss_customer_sk#75, 5), ENSURE_REQUIREMENTS, [id=#81]
+Input [3]: [ss_customer_sk#79, ss_quantity#80, ss_sales_price#81]
+Arguments: hashpartitioning(ss_customer_sk#79, 5), ENSURE_REQUIREMENTS, [id=#85]
 
 (147) Sort [codegen id : 3]
-Input [3]: [ss_customer_sk#75, ss_quantity#76, ss_sales_price#77]
-Arguments: [ss_customer_sk#75 ASC NULLS FIRST], false, 0
+Input [3]: [ss_customer_sk#79, ss_quantity#80, ss_sales_price#81]
+Arguments: [ss_customer_sk#79 ASC NULLS FIRST], false, 0
 
 (148) ReusedExchange [Reuses operator id: 39]
-Output [1]: [c_customer_sk#82]
+Output [1]: [c_customer_sk#86]
 
 (149) Sort [codegen id : 5]
-Input [1]: [c_customer_sk#82]
-Arguments: [c_customer_sk#82 ASC NULLS FIRST], false, 0
+Input [1]: [c_customer_sk#86]
+Arguments: [c_customer_sk#86 ASC NULLS FIRST], false, 0
 
 (150) SortMergeJoin [codegen id : 6]
-Left keys [1]: [ss_customer_sk#75]
-Right keys [1]: [c_customer_sk#82]
+Left keys [1]: [ss_customer_sk#79]
+Right keys [1]: [c_customer_sk#86]
 Join condition: None
 
 (151) Project [codegen id : 6]
-Output [3]: [ss_quantity#76, ss_sales_price#77, c_customer_sk#82]
-Input [4]: [ss_customer_sk#75, ss_quantity#76, ss_sales_price#77, c_customer_sk#82]
+Output [3]: [ss_quantity#80, ss_sales_price#81, c_customer_sk#86]
+Input [4]: [ss_customer_sk#79, ss_quantity#80, ss_sales_price#81, c_customer_sk#86]
 
 (152) HashAggregate [codegen id : 6]
-Input [3]: [ss_quantity#76, ss_sales_price#77, c_customer_sk#82]
-Keys [1]: [c_customer_sk#82]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#76 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#77 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#83, isEmpty#84]
-Results [3]: [c_customer_sk#82, sum#85, isEmpty#86]
+Input [3]: [ss_quantity#80, ss_sales_price#81, c_customer_sk#86]
+Keys [1]: [c_customer_sk#86]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#80 as decimal(12,2))) * promote_precision(cast(ss_sales_price#81 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#87, isEmpty#88]
+Results [3]: [c_customer_sk#86, sum#89, isEmpty#90]
 
 (153) HashAggregate [codegen id : 6]
-Input [3]: [c_customer_sk#82, sum#85, isEmpty#86]
-Keys [1]: [c_customer_sk#82]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#76 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#77 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#76 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#77 as decimal(12,2)))), DecimalType(18,2), true))#87]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#76 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#77 as decimal(12,2)))), DecimalType(18,2), true))#87 AS csales#88]
+Input [3]: [c_customer_sk#86, sum#89, isEmpty#90]
+Keys [1]: [c_customer_sk#86]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#80 as decimal(12,2))) * promote_precision(cast(ss_sales_price#81 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#80 as decimal(12,2))) * promote_precision(cast(ss_sales_price#81 as decimal(12,2)))), DecimalType(18,2), true))#91]
+Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#80 as decimal(12,2))) * promote_precision(cast(ss_sales_price#81 as decimal(12,2)))), DecimalType(18,2), true))#91 AS csales#92]
 
 (154) HashAggregate [codegen id : 6]
-Input [1]: [csales#88]
+Input [1]: [csales#92]
 Keys: []
-Functions [1]: [partial_max(csales#88)]
-Aggregate Attributes [1]: [max#89]
-Results [1]: [max#90]
+Functions [1]: [partial_max(csales#92)]
+Aggregate Attributes [1]: [max#93]
+Results [1]: [max#94]
 
 (155) Exchange
-Input [1]: [max#90]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#91]
+Input [1]: [max#94]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#95]
 
 (156) HashAggregate [codegen id : 7]
-Input [1]: [max#90]
+Input [1]: [max#94]
 Keys: []
-Functions [1]: [max(csales#88)]
-Aggregate Attributes [1]: [max(csales#88)#92]
-Results [1]: [max(csales#88)#92 AS tpcds_cmax#93]
+Functions [1]: [max(csales#92)]
+Aggregate Attributes [1]: [max(csales#92)#96]
+Results [1]: [max(csales#92)#96 AS tpcds_cmax#97]
 
-Subquery:4 Hosting operator id = 140 Hosting Expression = ss_sold_date_sk#78 IN dynamicpruning#79
+Subquery:4 Hosting operator id = 140 Hosting Expression = ss_sold_date_sk#82 IN dynamicpruning#83
 BroadcastExchange (161)
 +- * Project (160)
    +- * Filter (159)
@@ -888,26 +888,26 @@ BroadcastExchange (161)
 
 
 (157) Scan parquet default.date_dim
-Output [2]: [d_date_sk#80, d_year#94]
+Output [2]: [d_date_sk#84, d_year#98]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
 (158) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#80, d_year#94]
+Input [2]: [d_date_sk#84, d_year#98]
 
 (159) Filter [codegen id : 1]
-Input [2]: [d_date_sk#80, d_year#94]
-Condition : (d_year#94 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#80))
+Input [2]: [d_date_sk#84, d_year#98]
+Condition : (d_year#98 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#84))
 
 (160) Project [codegen id : 1]
-Output [1]: [d_date_sk#80]
-Input [2]: [d_date_sk#80, d_year#94]
+Output [1]: [d_date_sk#84]
+Input [2]: [d_date_sk#84, d_year#98]
 
 (161) BroadcastExchange
-Input [1]: [d_date_sk#80]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#95]
+Input [1]: [d_date_sk#84]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#99]
 
 Subquery:5 Hosting operator id = 65 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt
index 1cdf12e0cc261..c3779ff0d6e2d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt
@@ -1,7 +1,7 @@
 TakeOrderedAndProject [c_last_name,c_first_name,sales]
   Union
     WholeStageCodegen (27)
-      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sales,sum,isEmpty]
+      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sales,sum,isEmpty]
         InputAdapter
           Exchange [c_last_name,c_first_name] #1
             WholeStageCodegen (26)
@@ -92,7 +92,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                 Exchange #10
                                                   WholeStageCodegen (6)
                                                     HashAggregate [csales] [max,max]
-                                                      HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
+                                                      HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
                                                         HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                                           Project [ss_quantity,ss_sales_price,c_customer_sk]
                                                             SortMergeJoin [ss_customer_sk,c_customer_sk]
@@ -123,7 +123,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                   Sort [c_customer_sk]
                                                                     InputAdapter
                                                                       ReusedExchange [c_customer_sk] #9
-                                        HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                        HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
                                           HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                             Project [ss_quantity,ss_sales_price,c_customer_sk]
                                               SortMergeJoin [ss_customer_sk,c_customer_sk]
@@ -169,7 +169,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                 Project [c_customer_sk]
                                   Filter [ssales]
                                     ReusedSubquery [tpcds_cmax] #3
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
                                       HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                         Project [ss_quantity,ss_sales_price,c_customer_sk]
                                           SortMergeJoin [ss_customer_sk,c_customer_sk]
@@ -184,7 +184,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                   InputAdapter
                                                     ReusedExchange [c_customer_sk] #9
     WholeStageCodegen (54)
-      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sales,sum,isEmpty]
+      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sales,sum,isEmpty]
         InputAdapter
           Exchange [c_last_name,c_first_name] #14
             WholeStageCodegen (53)
@@ -240,7 +240,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                     Project [c_customer_sk]
                                       Filter [ssales]
                                         ReusedSubquery [tpcds_cmax] #3
-                                        HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                        HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
                                           HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                             Project [ss_quantity,ss_sales_price,c_customer_sk]
                                               SortMergeJoin [ss_customer_sk,c_customer_sk]
@@ -270,7 +270,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                 Project [c_customer_sk]
                                   Filter [ssales]
                                     ReusedSubquery [tpcds_cmax] #3
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
                                       HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                         Project [ss_quantity,ss_sales_price,c_customer_sk]
                                           SortMergeJoin [ss_customer_sk,c_customer_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt
index 371f34bc14b4b..0527d277461e7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt
@@ -252,7 +252,7 @@ Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#
 (36) HashAggregate [codegen id : 8]
 Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
 Keys [1]: [c_customer_sk#28]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [2]: [sum#30, isEmpty#31]
 Results [3]: [c_customer_sk#28, sum#32, isEmpty#33]
 
@@ -263,9 +263,9 @@ Arguments: hashpartitioning(c_customer_sk#28, 5), ENSURE_REQUIREMENTS, [id=#34]
 (38) HashAggregate [codegen id : 9]
 Input [3]: [c_customer_sk#28, sum#32, isEmpty#33]
 Keys [1]: [c_customer_sk#28]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
 
 (39) Filter [codegen id : 9]
 Input [2]: [c_customer_sk#28, ssales#36]
@@ -312,9 +312,9 @@ Output [3]: [c_customer_sk#28, sum#32, isEmpty#33]
 (49) HashAggregate [codegen id : 14]
 Input [3]: [c_customer_sk#28, sum#32, isEmpty#33]
 Keys [1]: [c_customer_sk#28]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
 
 (50) Filter [codegen id : 14]
 Input [2]: [c_customer_sk#28, ssales#36]
@@ -361,7 +361,7 @@ Input [6]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#40,
 (60) HashAggregate [codegen id : 17]
 Input [4]: [cs_quantity#3, cs_list_price#4, c_first_name#40, c_last_name#41]
 Keys [2]: [c_last_name#41, c_first_name#40]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [2]: [sum#45, isEmpty#46]
 Results [4]: [c_last_name#41, c_first_name#40, sum#47, isEmpty#48]
 
@@ -372,9 +372,9 @@ Arguments: hashpartitioning(c_last_name#41, c_first_name#40, 5), ENSURE_REQUIREM
 (62) HashAggregate [codegen id : 18]
 Input [4]: [c_last_name#41, c_first_name#40, sum#47, isEmpty#48]
 Keys [2]: [c_last_name#41, c_first_name#40]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#50]
-Results [3]: [c_last_name#41, c_first_name#40, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#50 AS sales#51]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#50]
+Results [3]: [c_last_name#41, c_first_name#40, sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#50 AS sales#51]
 
 (63) Scan parquet default.web_sales
 Output [5]: [ws_item_sk#52, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55, ws_sold_date_sk#56]
@@ -412,14 +412,14 @@ Input [4]: [ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55, ws_sold_da
 Arguments: [ws_bill_customer_sk#53 ASC NULLS FIRST], false, 0
 
 (71) ReusedExchange [Reuses operator id: 37]
-Output [3]: [c_customer_sk#28, sum#32, isEmpty#33]
+Output [3]: [c_customer_sk#28, sum#58, isEmpty#59]
 
 (72) HashAggregate [codegen id : 27]
-Input [3]: [c_customer_sk#28, sum#32, isEmpty#33]
+Input [3]: [c_customer_sk#28, sum#58, isEmpty#59]
 Keys [1]: [c_customer_sk#28]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
 
 (73) Filter [codegen id : 27]
 Input [2]: [c_customer_sk#28, ssales#36]
@@ -439,46 +439,46 @@ Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
 (77) ReusedExchange [Reuses operator id: 54]
-Output [3]: [c_customer_sk#58, c_first_name#59, c_last_name#60]
+Output [3]: [c_customer_sk#60, c_first_name#61, c_last_name#62]
 
 (78) BroadcastHashJoin [codegen id : 35]
 Left keys [1]: [ws_bill_customer_sk#53]
-Right keys [1]: [c_customer_sk#58]
+Right keys [1]: [c_customer_sk#60]
 Join condition: None
 
 (79) Project [codegen id : 35]
-Output [5]: [ws_quantity#54, ws_list_price#55, ws_sold_date_sk#56, c_first_name#59, c_last_name#60]
-Input [7]: [ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55, ws_sold_date_sk#56, c_customer_sk#58, c_first_name#59, c_last_name#60]
+Output [5]: [ws_quantity#54, ws_list_price#55, ws_sold_date_sk#56, c_first_name#61, c_last_name#62]
+Input [7]: [ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55, ws_sold_date_sk#56, c_customer_sk#60, c_first_name#61, c_last_name#62]
 
 (80) ReusedExchange [Reuses operator id: 92]
-Output [1]: [d_date_sk#61]
+Output [1]: [d_date_sk#63]
 
 (81) BroadcastHashJoin [codegen id : 35]
 Left keys [1]: [ws_sold_date_sk#56]
-Right keys [1]: [d_date_sk#61]
+Right keys [1]: [d_date_sk#63]
 Join condition: None
 
 (82) Project [codegen id : 35]
-Output [4]: [ws_quantity#54, ws_list_price#55, c_first_name#59, c_last_name#60]
-Input [6]: [ws_quantity#54, ws_list_price#55, ws_sold_date_sk#56, c_first_name#59, c_last_name#60, d_date_sk#61]
+Output [4]: [ws_quantity#54, ws_list_price#55, c_first_name#61, c_last_name#62]
+Input [6]: [ws_quantity#54, ws_list_price#55, ws_sold_date_sk#56, c_first_name#61, c_last_name#62, d_date_sk#63]
 
 (83) HashAggregate [codegen id : 35]
-Input [4]: [ws_quantity#54, ws_list_price#55, c_first_name#59, c_last_name#60]
-Keys [2]: [c_last_name#60, c_first_name#59]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#54 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#62, isEmpty#63]
-Results [4]: [c_last_name#60, c_first_name#59, sum#64, isEmpty#65]
+Input [4]: [ws_quantity#54, ws_list_price#55, c_first_name#61, c_last_name#62]
+Keys [2]: [c_last_name#62, c_first_name#61]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#54 as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#64, isEmpty#65]
+Results [4]: [c_last_name#62, c_first_name#61, sum#66, isEmpty#67]
 
 (84) Exchange
-Input [4]: [c_last_name#60, c_first_name#59, sum#64, isEmpty#65]
-Arguments: hashpartitioning(c_last_name#60, c_first_name#59, 5), ENSURE_REQUIREMENTS, [id=#66]
+Input [4]: [c_last_name#62, c_first_name#61, sum#66, isEmpty#67]
+Arguments: hashpartitioning(c_last_name#62, c_first_name#61, 5), ENSURE_REQUIREMENTS, [id=#68]
 
 (85) HashAggregate [codegen id : 36]
-Input [4]: [c_last_name#60, c_first_name#59, sum#64, isEmpty#65]
-Keys [2]: [c_last_name#60, c_first_name#59]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#54 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#54 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))#67]
-Results [3]: [c_last_name#60, c_first_name#59, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#54 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))#67 AS sales#68]
+Input [4]: [c_last_name#62, c_first_name#61, sum#66, isEmpty#67]
+Keys [2]: [c_last_name#62, c_first_name#61]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#54 as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#54 as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))#69]
+Results [3]: [c_last_name#62, c_first_name#61, sum(CheckOverflow((promote_precision(cast(ws_quantity#54 as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sales#70]
 
 (86) Union
 
@@ -497,26 +497,26 @@ BroadcastExchange (92)
 
 
 (88) Scan parquet default.date_dim
-Output [3]: [d_date_sk#44, d_year#69, d_moy#70]
+Output [3]: [d_date_sk#44, d_year#71, d_moy#72]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
 (89) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#44, d_year#69, d_moy#70]
+Input [3]: [d_date_sk#44, d_year#71, d_moy#72]
 
 (90) Filter [codegen id : 1]
-Input [3]: [d_date_sk#44, d_year#69, d_moy#70]
-Condition : ((((isnotnull(d_year#69) AND isnotnull(d_moy#70)) AND (d_year#69 = 2000)) AND (d_moy#70 = 2)) AND isnotnull(d_date_sk#44))
+Input [3]: [d_date_sk#44, d_year#71, d_moy#72]
+Condition : ((((isnotnull(d_year#71) AND isnotnull(d_moy#72)) AND (d_year#71 = 2000)) AND (d_moy#72 = 2)) AND isnotnull(d_date_sk#44))
 
 (91) Project [codegen id : 1]
 Output [1]: [d_date_sk#44]
-Input [3]: [d_date_sk#44, d_year#69, d_moy#70]
+Input [3]: [d_date_sk#44, d_year#71, d_moy#72]
 
 (92) BroadcastExchange
 Input [1]: [d_date_sk#44]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#71]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#73]
 
 Subquery:2 Hosting operator id = 4 Hosting Expression = ss_sold_date_sk#8 IN dynamicpruning#9
 BroadcastExchange (97)
@@ -527,26 +527,26 @@ BroadcastExchange (97)
 
 
 (93) Scan parquet default.date_dim
-Output [3]: [d_date_sk#10, d_date#11, d_year#72]
+Output [3]: [d_date_sk#10, d_date#11, d_year#74]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date,d_year:int>
 
 (94) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#10, d_date#11, d_year#72]
+Input [3]: [d_date_sk#10, d_date#11, d_year#74]
 
 (95) Filter [codegen id : 1]
-Input [3]: [d_date_sk#10, d_date#11, d_year#72]
-Condition : (d_year#72 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#10))
+Input [3]: [d_date_sk#10, d_date#11, d_year#74]
+Condition : (d_year#74 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#10))
 
 (96) Project [codegen id : 1]
 Output [2]: [d_date_sk#10, d_date#11]
-Input [3]: [d_date_sk#10, d_date#11, d_year#72]
+Input [3]: [d_date_sk#10, d_date#11, d_year#74]
 
 (97) BroadcastExchange
 Input [2]: [d_date_sk#10, d_date#11]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#73]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#75]
 
 Subquery:3 Hosting operator id = 39 Hosting Expression = Subquery scalar-subquery#37, [id=#38]
 * HashAggregate (112)
@@ -567,81 +567,81 @@ Subquery:3 Hosting operator id = 39 Hosting Expression = Subquery scalar-subquer
 
 
 (98) Scan parquet default.store_sales
-Output [4]: [ss_customer_sk#74, ss_quantity#75, ss_sales_price#76, ss_sold_date_sk#77]
+Output [4]: [ss_customer_sk#76, ss_quantity#77, ss_sales_price#78, ss_sold_date_sk#79]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#77), dynamicpruningexpression(ss_sold_date_sk#77 IN dynamicpruning#78)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#79), dynamicpruningexpression(ss_sold_date_sk#79 IN dynamicpruning#80)]
 PushedFilters: [IsNotNull(ss_customer_sk)]
 ReadSchema: struct<ss_customer_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2)>
 
 (99) ColumnarToRow [codegen id : 3]
-Input [4]: [ss_customer_sk#74, ss_quantity#75, ss_sales_price#76, ss_sold_date_sk#77]
+Input [4]: [ss_customer_sk#76, ss_quantity#77, ss_sales_price#78, ss_sold_date_sk#79]
 
 (100) Filter [codegen id : 3]
-Input [4]: [ss_customer_sk#74, ss_quantity#75, ss_sales_price#76, ss_sold_date_sk#77]
-Condition : isnotnull(ss_customer_sk#74)
+Input [4]: [ss_customer_sk#76, ss_quantity#77, ss_sales_price#78, ss_sold_date_sk#79]
+Condition : isnotnull(ss_customer_sk#76)
 
 (101) ReusedExchange [Reuses operator id: 33]
-Output [1]: [c_customer_sk#79]
+Output [1]: [c_customer_sk#81]
 
 (102) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [ss_customer_sk#74]
-Right keys [1]: [c_customer_sk#79]
+Left keys [1]: [ss_customer_sk#76]
+Right keys [1]: [c_customer_sk#81]
 Join condition: None
 
 (103) Project [codegen id : 3]
-Output [4]: [ss_quantity#75, ss_sales_price#76, ss_sold_date_sk#77, c_customer_sk#79]
-Input [5]: [ss_customer_sk#74, ss_quantity#75, ss_sales_price#76, ss_sold_date_sk#77, c_customer_sk#79]
+Output [4]: [ss_quantity#77, ss_sales_price#78, ss_sold_date_sk#79, c_customer_sk#81]
+Input [5]: [ss_customer_sk#76, ss_quantity#77, ss_sales_price#78, ss_sold_date_sk#79, c_customer_sk#81]
 
 (104) ReusedExchange [Reuses operator id: 117]
-Output [1]: [d_date_sk#80]
+Output [1]: [d_date_sk#82]
 
 (105) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [ss_sold_date_sk#77]
-Right keys [1]: [d_date_sk#80]
+Left keys [1]: [ss_sold_date_sk#79]
+Right keys [1]: [d_date_sk#82]
 Join condition: None
 
 (106) Project [codegen id : 3]
-Output [3]: [ss_quantity#75, ss_sales_price#76, c_customer_sk#79]
-Input [5]: [ss_quantity#75, ss_sales_price#76, ss_sold_date_sk#77, c_customer_sk#79, d_date_sk#80]
+Output [3]: [ss_quantity#77, ss_sales_price#78, c_customer_sk#81]
+Input [5]: [ss_quantity#77, ss_sales_price#78, ss_sold_date_sk#79, c_customer_sk#81, d_date_sk#82]
 
 (107) HashAggregate [codegen id : 3]
-Input [3]: [ss_quantity#75, ss_sales_price#76, c_customer_sk#79]
-Keys [1]: [c_customer_sk#79]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#75 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#76 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#81, isEmpty#82]
-Results [3]: [c_customer_sk#79, sum#83, isEmpty#84]
+Input [3]: [ss_quantity#77, ss_sales_price#78, c_customer_sk#81]
+Keys [1]: [c_customer_sk#81]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#77 as decimal(12,2))) * promote_precision(cast(ss_sales_price#78 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#83, isEmpty#84]
+Results [3]: [c_customer_sk#81, sum#85, isEmpty#86]
 
 (108) Exchange
-Input [3]: [c_customer_sk#79, sum#83, isEmpty#84]
-Arguments: hashpartitioning(c_customer_sk#79, 5), ENSURE_REQUIREMENTS, [id=#85]
+Input [3]: [c_customer_sk#81, sum#85, isEmpty#86]
+Arguments: hashpartitioning(c_customer_sk#81, 5), ENSURE_REQUIREMENTS, [id=#87]
 
 (109) HashAggregate [codegen id : 4]
-Input [3]: [c_customer_sk#79, sum#83, isEmpty#84]
-Keys [1]: [c_customer_sk#79]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#75 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#76 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#75 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#76 as decimal(12,2)))), DecimalType(18,2), true))#86]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#75 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#76 as decimal(12,2)))), DecimalType(18,2), true))#86 AS csales#87]
+Input [3]: [c_customer_sk#81, sum#85, isEmpty#86]
+Keys [1]: [c_customer_sk#81]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#77 as decimal(12,2))) * promote_precision(cast(ss_sales_price#78 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#77 as decimal(12,2))) * promote_precision(cast(ss_sales_price#78 as decimal(12,2)))), DecimalType(18,2), true))#88]
+Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#77 as decimal(12,2))) * promote_precision(cast(ss_sales_price#78 as decimal(12,2)))), DecimalType(18,2), true))#88 AS csales#89]
 
 (110) HashAggregate [codegen id : 4]
-Input [1]: [csales#87]
+Input [1]: [csales#89]
 Keys: []
-Functions [1]: [partial_max(csales#87)]
-Aggregate Attributes [1]: [max#88]
-Results [1]: [max#89]
+Functions [1]: [partial_max(csales#89)]
+Aggregate Attributes [1]: [max#90]
+Results [1]: [max#91]
 
 (111) Exchange
-Input [1]: [max#89]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#90]
+Input [1]: [max#91]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#92]
 
 (112) HashAggregate [codegen id : 5]
-Input [1]: [max#89]
+Input [1]: [max#91]
 Keys: []
-Functions [1]: [max(csales#87)]
-Aggregate Attributes [1]: [max(csales#87)#91]
-Results [1]: [max(csales#87)#91 AS tpcds_cmax#92]
+Functions [1]: [max(csales#89)]
+Aggregate Attributes [1]: [max(csales#89)#93]
+Results [1]: [max(csales#89)#93 AS tpcds_cmax#94]
 
-Subquery:4 Hosting operator id = 98 Hosting Expression = ss_sold_date_sk#77 IN dynamicpruning#78
+Subquery:4 Hosting operator id = 98 Hosting Expression = ss_sold_date_sk#79 IN dynamicpruning#80
 BroadcastExchange (117)
 +- * Project (116)
    +- * Filter (115)
@@ -650,26 +650,26 @@ BroadcastExchange (117)
 
 
 (113) Scan parquet default.date_dim
-Output [2]: [d_date_sk#80, d_year#93]
+Output [2]: [d_date_sk#82, d_year#95]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
 (114) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#80, d_year#93]
+Input [2]: [d_date_sk#82, d_year#95]
 
 (115) Filter [codegen id : 1]
-Input [2]: [d_date_sk#80, d_year#93]
-Condition : (d_year#93 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#80))
+Input [2]: [d_date_sk#82, d_year#95]
+Condition : (d_year#95 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#82))
 
 (116) Project [codegen id : 1]
-Output [1]: [d_date_sk#80]
-Input [2]: [d_date_sk#80, d_year#93]
+Output [1]: [d_date_sk#82]
+Input [2]: [d_date_sk#82, d_year#95]
 
 (117) BroadcastExchange
-Input [1]: [d_date_sk#80]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#94]
+Input [1]: [d_date_sk#82]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#96]
 
 Subquery:5 Hosting operator id = 50 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt
index 8a43f5cdae750..84ab178f95260 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt
@@ -1,7 +1,7 @@
 TakeOrderedAndProject [c_last_name,c_first_name,sales]
   Union
     WholeStageCodegen (18)
-      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sales,sum,isEmpty]
+      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sales,sum,isEmpty]
         InputAdapter
           Exchange [c_last_name,c_first_name] #1
             WholeStageCodegen (17)
@@ -78,7 +78,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                             Exchange #10
                                               WholeStageCodegen (4)
                                                 HashAggregate [csales] [max,max]
-                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
+                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
                                                     InputAdapter
                                                       Exchange [c_customer_sk] #11
                                                         WholeStageCodegen (3)
@@ -103,7 +103,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                       ReusedExchange [c_customer_sk] #9
                                                                 InputAdapter
                                                                   ReusedExchange [d_date_sk] #12
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
                                       InputAdapter
                                         Exchange [c_customer_sk] #8
                                           WholeStageCodegen (8)
@@ -142,13 +142,13 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                       Project [c_customer_sk]
                                         Filter [ssales]
                                           ReusedSubquery [tpcds_cmax] #3
-                                          HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                          HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
                                             InputAdapter
                                               ReusedExchange [c_customer_sk,sum,isEmpty] #8
                     InputAdapter
                       ReusedExchange [d_date_sk] #3
     WholeStageCodegen (36)
-      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sales,sum,isEmpty]
+      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sales,sum,isEmpty]
         InputAdapter
           Exchange [c_last_name,c_first_name] #15
             WholeStageCodegen (35)
@@ -179,7 +179,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                 Project [c_customer_sk]
                                   Filter [ssales]
                                     ReusedSubquery [tpcds_cmax] #3
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
                                       InputAdapter
                                         ReusedExchange [c_customer_sk,sum,isEmpty] #8
                         InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt
index b59df1b7d5777..28f1f48dd9f0a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt
@@ -172,7 +172,7 @@ Input [13]: [ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid
 (27) HashAggregate [codegen id : 5]
 Input [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#16, d_moy#17]
 Keys [7]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#16]
-Functions [24]: [partial_sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
+Functions [24]: [partial_sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
 Aggregate Attributes [48]: [sum#26, isEmpty#27, sum#28, isEmpty#29, sum#30, isEmpty#31, sum#32, isEmpty#33, sum#34, isEmpty#35, sum#36, isEmpty#37, sum#38, isEmpty#39, sum#40, isEmpty#41, sum#42, isEmpty#43, sum#44, isEmpty#45, sum#46, isEmpty#47, sum#48, isEmpty#49, sum#50, isEmpty#51, sum#52, isEmpty#53, sum#54, isEmpty#55, sum#56, isEmpty#57, sum#58, isEmpty#59, sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65, sum#66, isEmpty#67, sum#68, isEmpty#69, sum#70, isEmpty#71, sum#72, isEmpty#73]
 Results [55]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#16, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121]
 
@@ -183,9 +183,9 @@ Arguments: hashpartitioning(w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21
 (29) HashAggregate [codegen id : 6]
 Input [55]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#16, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121]
 Keys [7]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#16]
-Functions [24]: [sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
-Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146]
-Results [32]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, DHL,BARIAN AS ship_carriers#147, d_year#16 AS year#148, sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123 AS jan_sales#149, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124 AS feb_sales#150, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125 AS mar_sales#151, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126 AS apr_sales#152, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127 AS may_sales#153, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128 AS jun_sales#154, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129 AS jul_sales#155, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130 AS aug_sales#156, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131 AS sep_sales#157, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132 AS oct_sales#158, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133 AS nov_sales#159, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134 AS dec_sales#160, sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135 AS jan_net#161, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136 AS feb_net#162, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137 AS mar_net#163, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138 AS apr_net#164, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139 AS may_net#165, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140 AS jun_net#166, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141 AS jul_net#167, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142 AS aug_net#168, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143 AS sep_net#169, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144 AS oct_net#170, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145 AS nov_net#171, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146 AS dec_net#172]
+Functions [24]: [sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
+Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146]
+Results [32]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, DHL,BARIAN AS ship_carriers#147, d_year#16 AS year#148, sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123 AS jan_sales#149, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124 AS feb_sales#150, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125 AS mar_sales#151, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126 AS apr_sales#152, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127 AS may_sales#153, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128 AS jun_sales#154, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129 AS jul_sales#155, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130 AS aug_sales#156, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131 AS sep_sales#157, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132 AS oct_sales#158, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133 AS nov_sales#159, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134 AS dec_sales#160, sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135 AS jan_net#161, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136 AS feb_net#162, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137 AS mar_net#163, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138 AS apr_net#164, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139 AS may_net#165, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140 AS jun_net#166, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141 AS jul_net#167, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142 AS aug_net#168, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143 AS sep_net#169, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144 AS oct_net#170, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145 AS nov_net#171, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146 AS dec_net#172]
 
 (30) Scan parquet default.catalog_sales
 Output [7]: [cs_sold_time_sk#173, cs_ship_mode_sk#174, cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, cs_sold_date_sk#179]
@@ -253,7 +253,7 @@ Input [13]: [cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_pa
 (45) HashAggregate [codegen id : 11]
 Input [11]: [cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, d_year#183, d_moy#184]
 Keys [7]: [w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, d_year#183]
-Functions [24]: [partial_sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
+Functions [24]: [partial_sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
 Aggregate Attributes [48]: [sum#192, isEmpty#193, sum#194, isEmpty#195, sum#196, isEmpty#197, sum#198, isEmpty#199, sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205, sum#206, isEmpty#207, sum#208, isEmpty#209, sum#210, isEmpty#211, sum#212, isEmpty#213, sum#214, isEmpty#215, sum#216, isEmpty#217, sum#218, isEmpty#219, sum#220, isEmpty#221, sum#222, isEmpty#223, sum#224, isEmpty#225, sum#226, isEmpty#227, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237, sum#238, isEmpty#239]
 Results [55]: [w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, d_year#183, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283, sum#284, isEmpty#285, sum#286, isEmpty#287]
 
@@ -264,16 +264,16 @@ Arguments: hashpartitioning(w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#
 (47) HashAggregate [codegen id : 12]
 Input [55]: [w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, d_year#183, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283, sum#284, isEmpty#285, sum#286, isEmpty#287]
 Keys [7]: [w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, d_year#183]
-Functions [24]: [sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
-Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300, sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#301, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#302, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#303, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#304, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#305, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#306, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#307, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#308, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#309, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#310, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#311, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#312]
-Results [32]: [w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, DHL,BARIAN AS ship_carriers#313, d_year#183 AS year#314, sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289 AS jan_sales#315, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290 AS feb_sales#316, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291 AS mar_sales#317, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292 AS apr_sales#318, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293 AS may_sales#319, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294 AS jun_sales#320, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295 AS jul_sales#321, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296 AS aug_sales#322, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297 AS sep_sales#323, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298 AS oct_sales#324, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299 AS nov_sales#325, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300 AS dec_sales#326, sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#301 AS jan_net#327, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#302 AS feb_net#328, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#303 AS mar_net#329, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#304 AS apr_net#330, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#305 AS may_net#331, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#306 AS jun_net#332, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#307 AS jul_net#333, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#308 AS aug_net#334, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#309 AS sep_net#335, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#310 AS oct_net#336, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#311 AS nov_net#337, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#312 AS dec_net#338]
+Functions [24]: [sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
+Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300, sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#301, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#302, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#303, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#304, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#305, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#306, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#307, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#308, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#309, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#310, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#311, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#312]
+Results [32]: [w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, DHL,BARIAN AS ship_carriers#313, d_year#183 AS year#314, sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289 AS jan_sales#315, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290 AS feb_sales#316, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291 AS mar_sales#317, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292 AS apr_sales#318, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293 AS may_sales#319, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294 AS jun_sales#320, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295 AS jul_sales#321, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296 AS aug_sales#322, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297 AS sep_sales#323, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298 AS oct_sales#324, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299 AS nov_sales#325, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300 AS dec_sales#326, sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#301 AS jan_net#327, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#302 AS feb_net#328, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#303 AS mar_net#329, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#304 AS apr_net#330, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#305 AS may_net#331, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#306 AS jun_net#332, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#307 AS jul_net#333, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#308 AS aug_net#334, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#309 AS sep_net#335, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#310 AS oct_net#336, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#311 AS nov_net#337, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#312 AS dec_net#338]
 
 (48) Union
 
 (49) HashAggregate [codegen id : 13]
 Input [32]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, jan_sales#149, feb_sales#150, mar_sales#151, apr_sales#152, may_sales#153, jun_sales#154, jul_sales#155, aug_sales#156, sep_sales#157, oct_sales#158, nov_sales#159, dec_sales#160, jan_net#161, feb_net#162, mar_net#163, apr_net#164, may_net#165, jun_net#166, jul_net#167, aug_net#168, sep_net#169, oct_net#170, nov_net#171, dec_net#172]
 Keys [8]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148]
-Functions [36]: [partial_sum(jan_sales#149), partial_sum(feb_sales#150), partial_sum(mar_sales#151), partial_sum(apr_sales#152), partial_sum(may_sales#153), partial_sum(jun_sales#154), partial_sum(jul_sales#155), partial_sum(aug_sales#156), partial_sum(sep_sales#157), partial_sum(oct_sales#158), partial_sum(nov_sales#159), partial_sum(dec_sales#160), partial_sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(jan_net#161), partial_sum(feb_net#162), partial_sum(mar_net#163), partial_sum(apr_net#164), partial_sum(may_net#165), partial_sum(jun_net#166), partial_sum(jul_net#167), partial_sum(aug_net#168), partial_sum(sep_net#169), partial_sum(oct_net#170), partial_sum(nov_net#171), partial_sum(dec_net#172)]
+Functions [36]: [partial_sum(jan_sales#149), partial_sum(feb_sales#150), partial_sum(mar_sales#151), partial_sum(apr_sales#152), partial_sum(may_sales#153), partial_sum(jun_sales#154), partial_sum(jul_sales#155), partial_sum(aug_sales#156), partial_sum(sep_sales#157), partial_sum(oct_sales#158), partial_sum(nov_sales#159), partial_sum(dec_sales#160), partial_sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(jan_net#161), partial_sum(feb_net#162), partial_sum(mar_net#163), partial_sum(apr_net#164), partial_sum(may_net#165), partial_sum(jun_net#166), partial_sum(jul_net#167), partial_sum(aug_net#168), partial_sum(sep_net#169), partial_sum(oct_net#170), partial_sum(nov_net#171), partial_sum(dec_net#172)]
 Aggregate Attributes [72]: [sum#339, isEmpty#340, sum#341, isEmpty#342, sum#343, isEmpty#344, sum#345, isEmpty#346, sum#347, isEmpty#348, sum#349, isEmpty#350, sum#351, isEmpty#352, sum#353, isEmpty#354, sum#355, isEmpty#356, sum#357, isEmpty#358, sum#359, isEmpty#360, sum#361, isEmpty#362, sum#363, isEmpty#364, sum#365, isEmpty#366, sum#367, isEmpty#368, sum#369, isEmpty#370, sum#371, isEmpty#372, sum#373, isEmpty#374, sum#375, isEmpty#376, sum#377, isEmpty#378, sum#379, isEmpty#380, sum#381, isEmpty#382, sum#383, isEmpty#384, sum#385, isEmpty#386, sum#387, isEmpty#388, sum#389, isEmpty#390, sum#391, isEmpty#392, sum#393, isEmpty#394, sum#395, isEmpty#396, sum#397, isEmpty#398, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410]
 Results [80]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476, sum#477, isEmpty#478, sum#479, isEmpty#480, sum#481, isEmpty#482]
 
@@ -284,9 +284,9 @@ Arguments: hashpartitioning(w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21
 (51) HashAggregate [codegen id : 14]
 Input [80]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476, sum#477, isEmpty#478, sum#479, isEmpty#480, sum#481, isEmpty#482]
 Keys [8]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148]
-Functions [36]: [sum(jan_sales#149), sum(feb_sales#150), sum(mar_sales#151), sum(apr_sales#152), sum(may_sales#153), sum(jun_sales#154), sum(jul_sales#155), sum(aug_sales#156), sum(sep_sales#157), sum(oct_sales#158), sum(nov_sales#159), sum(dec_sales#160), sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(jan_net#161), sum(feb_net#162), sum(mar_net#163), sum(apr_net#164), sum(may_net#165), sum(jun_net#166), sum(jul_net#167), sum(aug_net#168), sum(sep_net#169), sum(oct_net#170), sum(nov_net#171), sum(dec_net#172)]
-Aggregate Attributes [36]: [sum(jan_sales#149)#484, sum(feb_sales#150)#485, sum(mar_sales#151)#486, sum(apr_sales#152)#487, sum(may_sales#153)#488, sum(jun_sales#154)#489, sum(jul_sales#155)#490, sum(aug_sales#156)#491, sum(sep_sales#157)#492, sum(oct_sales#158)#493, sum(nov_sales#159)#494, sum(dec_sales#160)#495, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#496, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#497, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#498, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#499, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#500, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#501, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#502, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#503, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#504, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#505, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#506, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#507, sum(jan_net#161)#508, sum(feb_net#162)#509, sum(mar_net#163)#510, sum(apr_net#164)#511, sum(may_net#165)#512, sum(jun_net#166)#513, sum(jul_net#167)#514, sum(aug_net#168)#515, sum(sep_net#169)#516, sum(oct_net#170)#517, sum(nov_net#171)#518, sum(dec_net#172)#519]
-Results [44]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, sum(jan_sales#149)#484 AS jan_sales#520, sum(feb_sales#150)#485 AS feb_sales#521, sum(mar_sales#151)#486 AS mar_sales#522, sum(apr_sales#152)#487 AS apr_sales#523, sum(may_sales#153)#488 AS may_sales#524, sum(jun_sales#154)#489 AS jun_sales#525, sum(jul_sales#155)#490 AS jul_sales#526, sum(aug_sales#156)#491 AS aug_sales#527, sum(sep_sales#157)#492 AS sep_sales#528, sum(oct_sales#158)#493 AS oct_sales#529, sum(nov_sales#159)#494 AS nov_sales#530, sum(dec_sales#160)#495 AS dec_sales#531, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#496 AS jan_sales_per_sq_foot#532, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#497 AS feb_sales_per_sq_foot#533, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#498 AS mar_sales_per_sq_foot#534, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#499 AS apr_sales_per_sq_foot#535, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#500 AS may_sales_per_sq_foot#536, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#501 AS jun_sales_per_sq_foot#537, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#502 AS jul_sales_per_sq_foot#538, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#503 AS aug_sales_per_sq_foot#539, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#504 AS sep_sales_per_sq_foot#540, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#505 AS oct_sales_per_sq_foot#541, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#506 AS nov_sales_per_sq_foot#542, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#20 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#507 AS dec_sales_per_sq_foot#543, sum(jan_net#161)#508 AS jan_net#544, sum(feb_net#162)#509 AS feb_net#545, sum(mar_net#163)#510 AS mar_net#546, sum(apr_net#164)#511 AS apr_net#547, sum(may_net#165)#512 AS may_net#548, sum(jun_net#166)#513 AS jun_net#549, sum(jul_net#167)#514 AS jul_net#550, sum(aug_net#168)#515 AS aug_net#551, sum(sep_net#169)#516 AS sep_net#552, sum(oct_net#170)#517 AS oct_net#553, sum(nov_net#171)#518 AS nov_net#554, sum(dec_net#172)#519 AS dec_net#555]
+Functions [36]: [sum(jan_sales#149), sum(feb_sales#150), sum(mar_sales#151), sum(apr_sales#152), sum(may_sales#153), sum(jun_sales#154), sum(jul_sales#155), sum(aug_sales#156), sum(sep_sales#157), sum(oct_sales#158), sum(nov_sales#159), sum(dec_sales#160), sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(jan_net#161), sum(feb_net#162), sum(mar_net#163), sum(apr_net#164), sum(may_net#165), sum(jun_net#166), sum(jul_net#167), sum(aug_net#168), sum(sep_net#169), sum(oct_net#170), sum(nov_net#171), sum(dec_net#172)]
+Aggregate Attributes [36]: [sum(jan_sales#149)#484, sum(feb_sales#150)#485, sum(mar_sales#151)#486, sum(apr_sales#152)#487, sum(may_sales#153)#488, sum(jun_sales#154)#489, sum(jul_sales#155)#490, sum(aug_sales#156)#491, sum(sep_sales#157)#492, sum(oct_sales#158)#493, sum(nov_sales#159)#494, sum(dec_sales#160)#495, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#496, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#497, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#498, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#499, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#500, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#501, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#502, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#503, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#504, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#505, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#506, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#507, sum(jan_net#161)#508, sum(feb_net#162)#509, sum(mar_net#163)#510, sum(apr_net#164)#511, sum(may_net#165)#512, sum(jun_net#166)#513, sum(jul_net#167)#514, sum(aug_net#168)#515, sum(sep_net#169)#516, sum(oct_net#170)#517, sum(nov_net#171)#518, sum(dec_net#172)#519]
+Results [44]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, sum(jan_sales#149)#484 AS jan_sales#520, sum(feb_sales#150)#485 AS feb_sales#521, sum(mar_sales#151)#486 AS mar_sales#522, sum(apr_sales#152)#487 AS apr_sales#523, sum(may_sales#153)#488 AS may_sales#524, sum(jun_sales#154)#489 AS jun_sales#525, sum(jul_sales#155)#490 AS jul_sales#526, sum(aug_sales#156)#491 AS aug_sales#527, sum(sep_sales#157)#492 AS sep_sales#528, sum(oct_sales#158)#493 AS oct_sales#529, sum(nov_sales#159)#494 AS nov_sales#530, sum(dec_sales#160)#495 AS dec_sales#531, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#496 AS jan_sales_per_sq_foot#532, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#497 AS feb_sales_per_sq_foot#533, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#498 AS mar_sales_per_sq_foot#534, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#499 AS apr_sales_per_sq_foot#535, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#500 AS may_sales_per_sq_foot#536, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#501 AS jun_sales_per_sq_foot#537, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#502 AS jul_sales_per_sq_foot#538, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#503 AS aug_sales_per_sq_foot#539, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#504 AS sep_sales_per_sq_foot#540, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#505 AS oct_sales_per_sq_foot#541, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#506 AS nov_sales_per_sq_foot#542, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#507 AS dec_sales_per_sq_foot#543, sum(jan_net#161)#508 AS jan_net#544, sum(feb_net#162)#509 AS feb_net#545, sum(mar_net#163)#510 AS mar_net#546, sum(apr_net#164)#511 AS apr_net#547, sum(may_net#165)#512 AS may_net#548, sum(jun_net#166)#513 AS jun_net#549, sum(jul_net#167)#514 AS jul_net#550, sum(aug_net#168)#515 AS aug_net#551, sum(sep_net#169)#516 AS sep_net#552, sum(oct_net#170)#517 AS oct_net#553, sum(nov_net#171)#518 AS nov_net#554, sum(dec_net#172)#519 AS dec_net#555]
 
 (52) TakeOrderedAndProject
 Input [44]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, jan_sales#520, feb_sales#521, mar_sales#522, apr_sales#523, may_sales#524, jun_sales#525, jul_sales#526, aug_sales#527, sep_sales#528, oct_sales#529, nov_sales#530, dec_sales#531, jan_sales_per_sq_foot#532, feb_sales_per_sq_foot#533, mar_sales_per_sq_foot#534, apr_sales_per_sq_foot#535, may_sales_per_sq_foot#536, jun_sales_per_sq_foot#537, jul_sales_per_sq_foot#538, aug_sales_per_sq_foot#539, sep_sales_per_sq_foot#540, oct_sales_per_sq_foot#541, nov_sales_per_sq_foot#542, dec_sales_per_sq_foot#543, jan_net#544, feb_net#545, mar_net#546, apr_net#547, may_net#548, jun_net#549, jul_net#550, aug_net#551, sep_net#552, oct_net#553, nov_net#554, dec_net#555]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt
index 86c73b1f44bfe..f84c9de0bcd6b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net]
   WholeStageCodegen (14)
-    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(jan_sales),sum(feb_sales),sum(mar_sales),sum(apr_sales),sum(may_sales),sum(jun_sales),sum(jul_sales),sum(aug_sales),sum(sep_sales),sum(oct_sales),sum(nov_sales),sum(dec_sales),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(jan_net),sum(feb_net),sum(mar_net),sum(apr_net),sum(may_net),sum(jun_net),sum(jul_net),sum(aug_net),sum(sep_net),sum(oct_net),sum(nov_net),sum(dec_net),jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(jan_sales),sum(feb_sales),sum(mar_sales),sum(apr_sales),sum(may_sales),sum(jun_sales),sum(jul_sales),sum(aug_sales),sum(sep_sales),sum(oct_sales),sum(nov_sales),sum(dec_sales),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(jan_net),sum(feb_net),sum(mar_net),sum(apr_net),sum(may_net),sum(jun_net),sum(jul_net),sum(aug_net),sum(sep_net),sum(oct_net),sum(nov_net),sum(dec_net),jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
       InputAdapter
         Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year] #1
           WholeStageCodegen (13)
@@ -8,7 +8,7 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
               InputAdapter
                 Union
                   WholeStageCodegen (6)
-                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
                         Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #2
                           WholeStageCodegen (5)
@@ -58,7 +58,7 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
                                             InputAdapter
                                               Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country]
                   WholeStageCodegen (12)
-                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
                         Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #7
                           WholeStageCodegen (11)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt
index defc9caffa7c2..c97dfda97c695 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt
@@ -172,7 +172,7 @@ Input [13]: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid
 (27) HashAggregate [codegen id : 5]
 Input [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18, d_moy#19]
 Keys [7]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18]
-Functions [24]: [partial_sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
+Functions [24]: [partial_sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
 Aggregate Attributes [48]: [sum#26, isEmpty#27, sum#28, isEmpty#29, sum#30, isEmpty#31, sum#32, isEmpty#33, sum#34, isEmpty#35, sum#36, isEmpty#37, sum#38, isEmpty#39, sum#40, isEmpty#41, sum#42, isEmpty#43, sum#44, isEmpty#45, sum#46, isEmpty#47, sum#48, isEmpty#49, sum#50, isEmpty#51, sum#52, isEmpty#53, sum#54, isEmpty#55, sum#56, isEmpty#57, sum#58, isEmpty#59, sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65, sum#66, isEmpty#67, sum#68, isEmpty#69, sum#70, isEmpty#71, sum#72, isEmpty#73]
 Results [55]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121]
 
@@ -183,9 +183,9 @@ Arguments: hashpartitioning(w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12
 (29) HashAggregate [codegen id : 6]
 Input [55]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121]
 Keys [7]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18]
-Functions [24]: [sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
-Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146]
-Results [32]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, DHL,BARIAN AS ship_carriers#147, d_year#18 AS year#148, sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123 AS jan_sales#149, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124 AS feb_sales#150, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125 AS mar_sales#151, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126 AS apr_sales#152, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127 AS may_sales#153, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128 AS jun_sales#154, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129 AS jul_sales#155, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130 AS aug_sales#156, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131 AS sep_sales#157, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132 AS oct_sales#158, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133 AS nov_sales#159, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134 AS dec_sales#160, sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135 AS jan_net#161, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136 AS feb_net#162, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137 AS mar_net#163, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138 AS apr_net#164, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139 AS may_net#165, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140 AS jun_net#166, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141 AS jul_net#167, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142 AS aug_net#168, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143 AS sep_net#169, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144 AS oct_net#170, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145 AS nov_net#171, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146 AS dec_net#172]
+Functions [24]: [sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
+Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146]
+Results [32]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, DHL,BARIAN AS ship_carriers#147, d_year#18 AS year#148, sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123 AS jan_sales#149, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124 AS feb_sales#150, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125 AS mar_sales#151, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126 AS apr_sales#152, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127 AS may_sales#153, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128 AS jun_sales#154, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129 AS jul_sales#155, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130 AS aug_sales#156, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131 AS sep_sales#157, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132 AS oct_sales#158, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133 AS nov_sales#159, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134 AS dec_sales#160, sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135 AS jan_net#161, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136 AS feb_net#162, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137 AS mar_net#163, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138 AS apr_net#164, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139 AS may_net#165, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140 AS jun_net#166, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141 AS jul_net#167, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142 AS aug_net#168, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143 AS sep_net#169, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144 AS oct_net#170, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145 AS nov_net#171, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146 AS dec_net#172]
 
 (30) Scan parquet default.catalog_sales
 Output [7]: [cs_sold_time_sk#173, cs_ship_mode_sk#174, cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, cs_sold_date_sk#179]
@@ -253,7 +253,7 @@ Input [13]: [cs_ship_mode_sk#174, cs_quantity#176, cs_sales_price#177, cs_net_pa
 (45) HashAggregate [codegen id : 11]
 Input [11]: [cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188, d_moy#189]
 Keys [7]: [w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188]
-Functions [24]: [partial_sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
+Functions [24]: [partial_sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
 Aggregate Attributes [48]: [sum#192, isEmpty#193, sum#194, isEmpty#195, sum#196, isEmpty#197, sum#198, isEmpty#199, sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205, sum#206, isEmpty#207, sum#208, isEmpty#209, sum#210, isEmpty#211, sum#212, isEmpty#213, sum#214, isEmpty#215, sum#216, isEmpty#217, sum#218, isEmpty#219, sum#220, isEmpty#221, sum#222, isEmpty#223, sum#224, isEmpty#225, sum#226, isEmpty#227, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237, sum#238, isEmpty#239]
 Results [55]: [w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283, sum#284, isEmpty#285, sum#286, isEmpty#287]
 
@@ -264,16 +264,16 @@ Arguments: hashpartitioning(w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#
 (47) HashAggregate [codegen id : 12]
 Input [55]: [w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283, sum#284, isEmpty#285, sum#286, isEmpty#287]
 Keys [7]: [w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188]
-Functions [24]: [sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
-Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300, sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#301, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#302, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#303, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#304, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#305, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#306, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#307, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#308, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#309, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#310, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#311, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#312]
-Results [32]: [w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, DHL,BARIAN AS ship_carriers#313, d_year#188 AS year#314, sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289 AS jan_sales#315, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290 AS feb_sales#316, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291 AS mar_sales#317, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292 AS apr_sales#318, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293 AS may_sales#319, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294 AS jun_sales#320, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295 AS jul_sales#321, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296 AS aug_sales#322, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297 AS sep_sales#323, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298 AS oct_sales#324, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299 AS nov_sales#325, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300 AS dec_sales#326, sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#301 AS jan_net#327, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#302 AS feb_net#328, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#303 AS mar_net#329, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#304 AS apr_net#330, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#305 AS may_net#331, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#306 AS jun_net#332, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#307 AS jul_net#333, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#308 AS aug_net#334, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#309 AS sep_net#335, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#310 AS oct_net#336, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#311 AS nov_net#337, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#176 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#312 AS dec_net#338]
+Functions [24]: [sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
+Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300, sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#301, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#302, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#303, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#304, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#305, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#306, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#307, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#308, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#309, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#310, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#311, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#312]
+Results [32]: [w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, DHL,BARIAN AS ship_carriers#313, d_year#188 AS year#314, sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289 AS jan_sales#315, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290 AS feb_sales#316, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291 AS mar_sales#317, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292 AS apr_sales#318, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293 AS may_sales#319, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294 AS jun_sales#320, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295 AS jul_sales#321, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296 AS aug_sales#322, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297 AS sep_sales#323, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298 AS oct_sales#324, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299 AS nov_sales#325, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300 AS dec_sales#326, sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#301 AS jan_net#327, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#302 AS feb_net#328, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#303 AS mar_net#329, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#304 AS apr_net#330, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#305 AS may_net#331, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#306 AS jun_net#332, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#307 AS jul_net#333, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#308 AS aug_net#334, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#309 AS sep_net#335, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#310 AS oct_net#336, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#311 AS nov_net#337, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#312 AS dec_net#338]
 
 (48) Union
 
 (49) HashAggregate [codegen id : 13]
 Input [32]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, jan_sales#149, feb_sales#150, mar_sales#151, apr_sales#152, may_sales#153, jun_sales#154, jul_sales#155, aug_sales#156, sep_sales#157, oct_sales#158, nov_sales#159, dec_sales#160, jan_net#161, feb_net#162, mar_net#163, apr_net#164, may_net#165, jun_net#166, jul_net#167, aug_net#168, sep_net#169, oct_net#170, nov_net#171, dec_net#172]
 Keys [8]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148]
-Functions [36]: [partial_sum(jan_sales#149), partial_sum(feb_sales#150), partial_sum(mar_sales#151), partial_sum(apr_sales#152), partial_sum(may_sales#153), partial_sum(jun_sales#154), partial_sum(jul_sales#155), partial_sum(aug_sales#156), partial_sum(sep_sales#157), partial_sum(oct_sales#158), partial_sum(nov_sales#159), partial_sum(dec_sales#160), partial_sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(jan_net#161), partial_sum(feb_net#162), partial_sum(mar_net#163), partial_sum(apr_net#164), partial_sum(may_net#165), partial_sum(jun_net#166), partial_sum(jul_net#167), partial_sum(aug_net#168), partial_sum(sep_net#169), partial_sum(oct_net#170), partial_sum(nov_net#171), partial_sum(dec_net#172)]
+Functions [36]: [partial_sum(jan_sales#149), partial_sum(feb_sales#150), partial_sum(mar_sales#151), partial_sum(apr_sales#152), partial_sum(may_sales#153), partial_sum(jun_sales#154), partial_sum(jul_sales#155), partial_sum(aug_sales#156), partial_sum(sep_sales#157), partial_sum(oct_sales#158), partial_sum(nov_sales#159), partial_sum(dec_sales#160), partial_sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(jan_net#161), partial_sum(feb_net#162), partial_sum(mar_net#163), partial_sum(apr_net#164), partial_sum(may_net#165), partial_sum(jun_net#166), partial_sum(jul_net#167), partial_sum(aug_net#168), partial_sum(sep_net#169), partial_sum(oct_net#170), partial_sum(nov_net#171), partial_sum(dec_net#172)]
 Aggregate Attributes [72]: [sum#339, isEmpty#340, sum#341, isEmpty#342, sum#343, isEmpty#344, sum#345, isEmpty#346, sum#347, isEmpty#348, sum#349, isEmpty#350, sum#351, isEmpty#352, sum#353, isEmpty#354, sum#355, isEmpty#356, sum#357, isEmpty#358, sum#359, isEmpty#360, sum#361, isEmpty#362, sum#363, isEmpty#364, sum#365, isEmpty#366, sum#367, isEmpty#368, sum#369, isEmpty#370, sum#371, isEmpty#372, sum#373, isEmpty#374, sum#375, isEmpty#376, sum#377, isEmpty#378, sum#379, isEmpty#380, sum#381, isEmpty#382, sum#383, isEmpty#384, sum#385, isEmpty#386, sum#387, isEmpty#388, sum#389, isEmpty#390, sum#391, isEmpty#392, sum#393, isEmpty#394, sum#395, isEmpty#396, sum#397, isEmpty#398, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410]
 Results [80]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476, sum#477, isEmpty#478, sum#479, isEmpty#480, sum#481, isEmpty#482]
 
@@ -284,9 +284,9 @@ Arguments: hashpartitioning(w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12
 (51) HashAggregate [codegen id : 14]
 Input [80]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476, sum#477, isEmpty#478, sum#479, isEmpty#480, sum#481, isEmpty#482]
 Keys [8]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148]
-Functions [36]: [sum(jan_sales#149), sum(feb_sales#150), sum(mar_sales#151), sum(apr_sales#152), sum(may_sales#153), sum(jun_sales#154), sum(jul_sales#155), sum(aug_sales#156), sum(sep_sales#157), sum(oct_sales#158), sum(nov_sales#159), sum(dec_sales#160), sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(jan_net#161), sum(feb_net#162), sum(mar_net#163), sum(apr_net#164), sum(may_net#165), sum(jun_net#166), sum(jul_net#167), sum(aug_net#168), sum(sep_net#169), sum(oct_net#170), sum(nov_net#171), sum(dec_net#172)]
-Aggregate Attributes [36]: [sum(jan_sales#149)#484, sum(feb_sales#150)#485, sum(mar_sales#151)#486, sum(apr_sales#152)#487, sum(may_sales#153)#488, sum(jun_sales#154)#489, sum(jul_sales#155)#490, sum(aug_sales#156)#491, sum(sep_sales#157)#492, sum(oct_sales#158)#493, sum(nov_sales#159)#494, sum(dec_sales#160)#495, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#496, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#497, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#498, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#499, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#500, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#501, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#502, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#503, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#504, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#505, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#506, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#507, sum(jan_net#161)#508, sum(feb_net#162)#509, sum(mar_net#163)#510, sum(apr_net#164)#511, sum(may_net#165)#512, sum(jun_net#166)#513, sum(jul_net#167)#514, sum(aug_net#168)#515, sum(sep_net#169)#516, sum(oct_net#170)#517, sum(nov_net#171)#518, sum(dec_net#172)#519]
-Results [44]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, sum(jan_sales#149)#484 AS jan_sales#520, sum(feb_sales#150)#485 AS feb_sales#521, sum(mar_sales#151)#486 AS mar_sales#522, sum(apr_sales#152)#487 AS apr_sales#523, sum(may_sales#153)#488 AS may_sales#524, sum(jun_sales#154)#489 AS jun_sales#525, sum(jul_sales#155)#490 AS jul_sales#526, sum(aug_sales#156)#491 AS aug_sales#527, sum(sep_sales#157)#492 AS sep_sales#528, sum(oct_sales#158)#493 AS oct_sales#529, sum(nov_sales#159)#494 AS nov_sales#530, sum(dec_sales#160)#495 AS dec_sales#531, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#496 AS jan_sales_per_sq_foot#532, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#497 AS feb_sales_per_sq_foot#533, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#498 AS mar_sales_per_sq_foot#534, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#499 AS apr_sales_per_sq_foot#535, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#500 AS may_sales_per_sq_foot#536, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#501 AS jun_sales_per_sq_foot#537, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#502 AS jul_sales_per_sq_foot#538, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#503 AS aug_sales_per_sq_foot#539, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#504 AS sep_sales_per_sq_foot#540, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#505 AS oct_sales_per_sq_foot#541, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#506 AS nov_sales_per_sq_foot#542, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#11 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#507 AS dec_sales_per_sq_foot#543, sum(jan_net#161)#508 AS jan_net#544, sum(feb_net#162)#509 AS feb_net#545, sum(mar_net#163)#510 AS mar_net#546, sum(apr_net#164)#511 AS apr_net#547, sum(may_net#165)#512 AS may_net#548, sum(jun_net#166)#513 AS jun_net#549, sum(jul_net#167)#514 AS jul_net#550, sum(aug_net#168)#515 AS aug_net#551, sum(sep_net#169)#516 AS sep_net#552, sum(oct_net#170)#517 AS oct_net#553, sum(nov_net#171)#518 AS nov_net#554, sum(dec_net#172)#519 AS dec_net#555]
+Functions [36]: [sum(jan_sales#149), sum(feb_sales#150), sum(mar_sales#151), sum(apr_sales#152), sum(may_sales#153), sum(jun_sales#154), sum(jul_sales#155), sum(aug_sales#156), sum(sep_sales#157), sum(oct_sales#158), sum(nov_sales#159), sum(dec_sales#160), sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(jan_net#161), sum(feb_net#162), sum(mar_net#163), sum(apr_net#164), sum(may_net#165), sum(jun_net#166), sum(jul_net#167), sum(aug_net#168), sum(sep_net#169), sum(oct_net#170), sum(nov_net#171), sum(dec_net#172)]
+Aggregate Attributes [36]: [sum(jan_sales#149)#484, sum(feb_sales#150)#485, sum(mar_sales#151)#486, sum(apr_sales#152)#487, sum(may_sales#153)#488, sum(jun_sales#154)#489, sum(jul_sales#155)#490, sum(aug_sales#156)#491, sum(sep_sales#157)#492, sum(oct_sales#158)#493, sum(nov_sales#159)#494, sum(dec_sales#160)#495, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#496, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#497, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#498, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#499, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#500, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#501, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#502, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#503, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#504, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#505, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#506, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#507, sum(jan_net#161)#508, sum(feb_net#162)#509, sum(mar_net#163)#510, sum(apr_net#164)#511, sum(may_net#165)#512, sum(jun_net#166)#513, sum(jul_net#167)#514, sum(aug_net#168)#515, sum(sep_net#169)#516, sum(oct_net#170)#517, sum(nov_net#171)#518, sum(dec_net#172)#519]
+Results [44]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, sum(jan_sales#149)#484 AS jan_sales#520, sum(feb_sales#150)#485 AS feb_sales#521, sum(mar_sales#151)#486 AS mar_sales#522, sum(apr_sales#152)#487 AS apr_sales#523, sum(may_sales#153)#488 AS may_sales#524, sum(jun_sales#154)#489 AS jun_sales#525, sum(jul_sales#155)#490 AS jul_sales#526, sum(aug_sales#156)#491 AS aug_sales#527, sum(sep_sales#157)#492 AS sep_sales#528, sum(oct_sales#158)#493 AS oct_sales#529, sum(nov_sales#159)#494 AS nov_sales#530, sum(dec_sales#160)#495 AS dec_sales#531, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#496 AS jan_sales_per_sq_foot#532, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#497 AS feb_sales_per_sq_foot#533, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#498 AS mar_sales_per_sq_foot#534, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#499 AS apr_sales_per_sq_foot#535, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#500 AS may_sales_per_sq_foot#536, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#501 AS jun_sales_per_sq_foot#537, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#502 AS jul_sales_per_sq_foot#538, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#503 AS aug_sales_per_sq_foot#539, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#504 AS sep_sales_per_sq_foot#540, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#505 AS oct_sales_per_sq_foot#541, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#506 AS nov_sales_per_sq_foot#542, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#507 AS dec_sales_per_sq_foot#543, sum(jan_net#161)#508 AS jan_net#544, sum(feb_net#162)#509 AS feb_net#545, sum(mar_net#163)#510 AS mar_net#546, sum(apr_net#164)#511 AS apr_net#547, sum(may_net#165)#512 AS may_net#548, sum(jun_net#166)#513 AS jun_net#549, sum(jul_net#167)#514 AS jul_net#550, sum(aug_net#168)#515 AS aug_net#551, sum(sep_net#169)#516 AS sep_net#552, sum(oct_net#170)#517 AS oct_net#553, sum(nov_net#171)#518 AS nov_net#554, sum(dec_net#172)#519 AS dec_net#555]
 
 (52) TakeOrderedAndProject
 Input [44]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, jan_sales#520, feb_sales#521, mar_sales#522, apr_sales#523, may_sales#524, jun_sales#525, jul_sales#526, aug_sales#527, sep_sales#528, oct_sales#529, nov_sales#530, dec_sales#531, jan_sales_per_sq_foot#532, feb_sales_per_sq_foot#533, mar_sales_per_sq_foot#534, apr_sales_per_sq_foot#535, may_sales_per_sq_foot#536, jun_sales_per_sq_foot#537, jul_sales_per_sq_foot#538, aug_sales_per_sq_foot#539, sep_sales_per_sq_foot#540, oct_sales_per_sq_foot#541, nov_sales_per_sq_foot#542, dec_sales_per_sq_foot#543, jan_net#544, feb_net#545, mar_net#546, apr_net#547, may_net#548, jun_net#549, jul_net#550, aug_net#551, sep_net#552, oct_net#553, nov_net#554, dec_net#555]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt
index 46e0418b4fabe..addcddea15cb2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net]
   WholeStageCodegen (14)
-    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(jan_sales),sum(feb_sales),sum(mar_sales),sum(apr_sales),sum(may_sales),sum(jun_sales),sum(jul_sales),sum(aug_sales),sum(sep_sales),sum(oct_sales),sum(nov_sales),sum(dec_sales),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(jan_net),sum(feb_net),sum(mar_net),sum(apr_net),sum(may_net),sum(jun_net),sum(jul_net),sum(aug_net),sum(sep_net),sum(oct_net),sum(nov_net),sum(dec_net),jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(jan_sales),sum(feb_sales),sum(mar_sales),sum(apr_sales),sum(may_sales),sum(jun_sales),sum(jul_sales),sum(aug_sales),sum(sep_sales),sum(oct_sales),sum(nov_sales),sum(dec_sales),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(jan_net),sum(feb_net),sum(mar_net),sum(apr_net),sum(may_net),sum(jun_net),sum(jul_net),sum(aug_net),sum(sep_net),sum(oct_net),sum(nov_net),sum(dec_net),jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
       InputAdapter
         Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year] #1
           WholeStageCodegen (13)
@@ -8,7 +8,7 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
               InputAdapter
                 Union
                   WholeStageCodegen (6)
-                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
                         Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #2
                           WholeStageCodegen (5)
@@ -58,7 +58,7 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
                                               InputAdapter
                                                 Scan parquet default.ship_mode [sm_ship_mode_sk,sm_carrier]
                   WholeStageCodegen (12)
-                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
                         Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #7
                           WholeStageCodegen (11)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/explain.txt
index d74fb5b4bfb61..f8e489f4901a9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/explain.txt
@@ -131,7 +131,7 @@ Arguments: [[ss_quantity#3, ss_sales_price#4, i_category#18, i_class#17, i_brand
 (23) HashAggregate [codegen id : 7]
 Input [11]: [ss_quantity#3, ss_sales_price#4, i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29]
 Keys [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29]
-Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
 Aggregate Attributes [2]: [sum#30, isEmpty#31]
 Results [11]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, sum#32, isEmpty#33]
 
@@ -142,9 +142,9 @@ Arguments: hashpartitioning(i_category#21, i_class#22, i_brand#23, i_product_nam
 (25) HashAggregate [codegen id : 8]
 Input [11]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, sum#32, isEmpty#33]
 Keys [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#35]
-Results [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#35 AS sumsales#36]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#35]
+Results [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#35 AS sumsales#36]
 
 (26) Exchange
 Input [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/simplified.txt
index e6c26f61d2832..524be972e6332 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/simplified.txt
@@ -8,7 +8,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
               InputAdapter
                 Exchange [i_category] #1
                   WholeStageCodegen (8)
-                    HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                    HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                       InputAdapter
                         Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id] #2
                           WholeStageCodegen (7)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt
index a9efff6eba561..a8976d85cddd4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt
@@ -116,7 +116,7 @@ Arguments: [[ss_quantity#3, ss_sales_price#4, i_category#17, i_class#16, i_brand
 (20) HashAggregate [codegen id : 4]
 Input [11]: [ss_quantity#3, ss_sales_price#4, i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, spark_grouping_id#28]
 Keys [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, spark_grouping_id#28]
-Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
 Aggregate Attributes [2]: [sum#29, isEmpty#30]
 Results [11]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, spark_grouping_id#28, sum#31, isEmpty#32]
 
@@ -127,9 +127,9 @@ Arguments: hashpartitioning(i_category#20, i_class#21, i_brand#22, i_product_nam
 (22) HashAggregate [codegen id : 5]
 Input [11]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, spark_grouping_id#28, sum#31, isEmpty#32]
 Keys [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, spark_grouping_id#28]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#34]
-Results [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#34 AS sumsales#35]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#34]
+Results [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#34 AS sumsales#35]
 
 (23) Exchange
 Input [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, sumsales#35]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt
index 5b7d1595c0398..b45adcfc883a9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt
@@ -8,7 +8,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
               InputAdapter
                 Exchange [i_category] #1
                   WholeStageCodegen (5)
-                    HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                    HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                       InputAdapter
                         Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id] #2
                           WholeStageCodegen (4)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/explain.txt
index 175a1c675675f..1fd4febb4e266 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/explain.txt
@@ -256,7 +256,7 @@ Right keys [1]: [item_id#38]
 Join condition: None
 
 (45) Project [codegen id : 18]
-Output [8]: [item_id#13, sr_item_qty#14, (((cast(sr_item_qty#14 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#26, (((cast(cr_item_qty#26 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as decimal(20,0)) as decimal(21,1))) / 3.0), DecimalType(27,6), true) AS average#44]
+Output [8]: [item_id#13, sr_item_qty#14, (((cast(sr_item_qty#14 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#26, (((cast(cr_item_qty#26 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as decimal(21,1))) / 3.0), DecimalType(27,6), true) AS average#44]
 Input [5]: [item_id#13, sr_item_qty#14, cr_item_qty#26, item_id#38, wr_item_qty#39]
 
 (46) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt
index 8332d48905e48..b78773ee48f48 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt
@@ -256,7 +256,7 @@ Right keys [1]: [item_id#38]
 Join condition: None
 
 (45) Project [codegen id : 18]
-Output [8]: [item_id#13, sr_item_qty#14, (((cast(sr_item_qty#14 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#26, (((cast(cr_item_qty#26 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as decimal(20,0)) as decimal(21,1))) / 3.0), DecimalType(27,6), true) AS average#44]
+Output [8]: [item_id#13, sr_item_qty#14, (((cast(sr_item_qty#14 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#26, (((cast(cr_item_qty#26 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as decimal(21,1))) / 3.0), DecimalType(27,6), true) AS average#44]
 Input [5]: [item_id#13, sr_item_qty#14, cr_item_qty#26, item_id#38, wr_item_qty#39]
 
 (46) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/explain.txt
index 01b7b7f5e20c8..3ed4a02f3bc9e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/explain.txt
@@ -109,7 +109,7 @@ Right keys [2]: [ss_item_sk#10, ss_ticket_number#12]
 Join condition: None
 
 (20) Project [codegen id : 6]
-Output [2]: [ss_customer_sk#11, CASE WHEN isnotnull(sr_return_quantity#4) THEN CheckOverflow((promote_precision(cast(cast((ss_quantity#13 - sr_return_quantity#4) as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#14 as decimal(12,2)))), DecimalType(18,2), true) ELSE CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#14 as decimal(12,2)))), DecimalType(18,2), true) END AS act_sales#17]
+Output [2]: [ss_customer_sk#11, CASE WHEN isnotnull(sr_return_quantity#4) THEN CheckOverflow((promote_precision(cast((ss_quantity#13 - sr_return_quantity#4) as decimal(12,2))) * promote_precision(cast(ss_sales_price#14 as decimal(12,2)))), DecimalType(18,2), true) ELSE CheckOverflow((promote_precision(cast(ss_quantity#13 as decimal(12,2))) * promote_precision(cast(ss_sales_price#14 as decimal(12,2)))), DecimalType(18,2), true) END AS act_sales#17]
 Input [8]: [sr_item_sk#1, sr_ticket_number#3, sr_return_quantity#4, ss_item_sk#10, ss_customer_sk#11, ss_ticket_number#12, ss_quantity#13, ss_sales_price#14]
 
 (21) HashAggregate [codegen id : 6]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt
index 54b9ae752c7a0..461172f33f132 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt
@@ -109,7 +109,7 @@ Right keys [1]: [r_reason_sk#14]
 Join condition: None
 
 (20) Project [codegen id : 6]
-Output [2]: [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#11) THEN CheckOverflow((promote_precision(cast(cast((ss_quantity#4 - sr_return_quantity#11) as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2), true) ELSE CheckOverflow((promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2), true) END AS act_sales#17]
+Output [2]: [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#11) THEN CheckOverflow((promote_precision(cast((ss_quantity#4 - sr_return_quantity#11) as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2), true) ELSE CheckOverflow((promote_precision(cast(ss_quantity#4 as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2), true) END AS act_sales#17]
 Input [6]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#9, sr_return_quantity#11, r_reason_sk#14]
 
 (21) HashAggregate [codegen id : 6]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt
index 5c3fbb7946f1f..e0c588294b920 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt
@@ -453,7 +453,7 @@ Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#48, i_brand_
 (78) HashAggregate [codegen id : 45]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#49, i_class_id#50, i_category_id#51]
 Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#54, isEmpty#55, count#56]
 Results [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
 
@@ -464,9 +464,9 @@ Arguments: hashpartitioning(i_brand_id#49, i_class_id#50, i_category_id#51, 5),
 (80) HashAggregate [codegen id : 92]
 Input [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
 Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62]
-Results [6]: [store AS channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#64, count(1)#62 AS number_sales#65]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62]
+Results [6]: [store AS channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#64, count(1)#62 AS number_sales#65]
 
 (81) Filter [codegen id : 92]
 Input [6]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sales#64, number_sales#65]
@@ -534,7 +534,7 @@ Input [7]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70, i_item_sk#75, i_bra
 (96) HashAggregate [codegen id : 90]
 Input [5]: [ss_quantity#69, ss_list_price#70, i_brand_id#76, i_class_id#77, i_category_id#78]
 Keys [3]: [i_brand_id#76, i_class_id#77, i_category_id#78]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#79, isEmpty#80, count#81]
 Results [6]: [i_brand_id#76, i_class_id#77, i_category_id#78, sum#82, isEmpty#83, count#84]
 
@@ -545,9 +545,9 @@ Arguments: hashpartitioning(i_brand_id#76, i_class_id#77, i_category_id#78, 5),
 (98) HashAggregate [codegen id : 91]
 Input [6]: [i_brand_id#76, i_class_id#77, i_category_id#78, sum#82, isEmpty#83, count#84]
 Keys [3]: [i_brand_id#76, i_class_id#77, i_category_id#78]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#86, count(1)#87]
-Results [6]: [store AS channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#86 AS sales#89, count(1)#87 AS number_sales#90]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#86, count(1)#87]
+Results [6]: [store AS channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#86 AS sales#89, count(1)#87 AS number_sales#90]
 
 (99) Filter [codegen id : 91]
 Input [6]: [channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sales#89, number_sales#90]
@@ -661,7 +661,7 @@ Input [4]: [ws_quantity#104, ws_list_price#105, ws_sold_date_sk#106, d_date_sk#1
 (119) HashAggregate [codegen id : 7]
 Input [2]: [quantity#96, list_price#97]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#96 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [2]: [sum#110, count#111]
 Results [2]: [sum#112, count#113]
 
@@ -672,9 +672,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#114]
 (121) HashAggregate [codegen id : 8]
 Input [2]: [sum#112, count#113]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#96 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#96 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))#115]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#96 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))#115 AS average_sales#116]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))#115]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))#115 AS average_sales#116]
 
 Subquery:2 Hosting operator id = 103 Hosting Expression = ss_sold_date_sk#94 IN dynamicpruning#13
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt
index 695a7c13381d8..7c193e479a013 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt
@@ -4,7 +4,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
       Filter [sales]
         Subquery #4
           WholeStageCodegen (8)
-            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
+            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
               InputAdapter
                 Exchange #17
                   WholeStageCodegen (7)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #9
-        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
           InputAdapter
             Exchange [i_brand_id,i_class_id,i_category_id] #1
               WholeStageCodegen (45)
@@ -206,7 +206,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
           WholeStageCodegen (91)
             Filter [sales]
               ReusedSubquery [average_sales] #4
-              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                 InputAdapter
                   Exchange [i_brand_id,i_class_id,i_category_id] #19
                     WholeStageCodegen (90)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt
index 212cb97de2873..fa27ed0d5f607 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt
@@ -385,7 +385,7 @@ Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_
 (65) HashAggregate [codegen id : 25]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#49, isEmpty#50, count#51]
 Results [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 
@@ -396,9 +396,9 @@ Arguments: hashpartitioning(i_brand_id#44, i_class_id#45, i_category_id#46, 5),
 (67) HashAggregate [codegen id : 52]
 Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56, count(1)#57]
-Results [6]: [store AS channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56 AS sales#59, count(1)#57 AS number_sales#60]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56, count(1)#57]
+Results [6]: [store AS channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56 AS sales#59, count(1)#57 AS number_sales#60]
 
 (68) Filter [codegen id : 52]
 Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60]
@@ -454,7 +454,7 @@ Input [7]: [ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66, i_brand_id#69,
 (80) HashAggregate [codegen id : 50]
 Input [5]: [ss_quantity#64, ss_list_price#65, i_brand_id#69, i_class_id#70, i_category_id#71]
 Keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#73, isEmpty#74, count#75]
 Results [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78]
 
@@ -465,9 +465,9 @@ Arguments: hashpartitioning(i_brand_id#69, i_class_id#70, i_category_id#71, 5),
 (82) HashAggregate [codegen id : 51]
 Input [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78]
 Keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#80, count(1)#81]
-Results [6]: [store AS channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#80 AS sales#83, count(1)#81 AS number_sales#84]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#80, count(1)#81]
+Results [6]: [store AS channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#80 AS sales#83, count(1)#81 AS number_sales#84]
 
 (83) Filter [codegen id : 51]
 Input [6]: [channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84]
@@ -581,7 +581,7 @@ Input [4]: [ws_quantity#98, ws_list_price#99, ws_sold_date_sk#100, d_date_sk#101
 (103) HashAggregate [codegen id : 7]
 Input [2]: [quantity#90, list_price#91]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#90 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [2]: [sum#104, count#105]
 Results [2]: [sum#106, count#107]
 
@@ -592,9 +592,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#108]
 (105) HashAggregate [codegen id : 8]
 Input [2]: [sum#106, count#107]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#90 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#90 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))#109]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#90 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))#109 AS average_sales#110]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))#109]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))#109 AS average_sales#110]
 
 Subquery:2 Hosting operator id = 87 Hosting Expression = ss_sold_date_sk#88 IN dynamicpruning#12
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt
index 2df0810ddba28..15fdf6b0eab16 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt
@@ -4,7 +4,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
       Filter [sales]
         Subquery #4
           WholeStageCodegen (8)
-            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
+            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
               InputAdapter
                 Exchange #12
                   WholeStageCodegen (7)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #6
-        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
           InputAdapter
             Exchange [i_brand_id,i_class_id,i_category_id] #1
               WholeStageCodegen (25)
@@ -167,7 +167,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
           WholeStageCodegen (51)
             Filter [sales]
               ReusedSubquery [average_sales] #4
-              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                 InputAdapter
                   Exchange [i_brand_id,i_class_id,i_category_id] #14
                     WholeStageCodegen (50)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt
index 5595e1a12b3fc..6b057de932b33 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt
@@ -497,7 +497,7 @@ Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#48, i_brand_
 (78) HashAggregate [codegen id : 45]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#49, i_class_id#50, i_category_id#51]
 Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#54, isEmpty#55, count#56]
 Results [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
 
@@ -508,9 +508,9 @@ Arguments: hashpartitioning(i_brand_id#49, i_class_id#50, i_category_id#51, 5),
 (80) HashAggregate [codegen id : 46]
 Input [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
 Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62]
-Results [6]: [store AS channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#64, count(1)#62 AS number_sales#65]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62]
+Results [6]: [store AS channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#64, count(1)#62 AS number_sales#65]
 
 (81) Filter [codegen id : 46]
 Input [6]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sales#64, number_sales#65]
@@ -578,7 +578,7 @@ Input [7]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70, i_item_sk#74, i_bra
 (96) HashAggregate [codegen id : 91]
 Input [5]: [cs_quantity#69, cs_list_price#70, i_brand_id#75, i_class_id#76, i_category_id#77]
 Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#78, isEmpty#79, count#80]
 Results [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
 
@@ -589,9 +589,9 @@ Arguments: hashpartitioning(i_brand_id#75, i_class_id#76, i_category_id#77, 5),
 (98) HashAggregate [codegen id : 92]
 Input [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
 Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#85, count(1)#86]
-Results [6]: [catalog AS channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#85 AS sales#88, count(1)#86 AS number_sales#89]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#85, count(1)#86]
+Results [6]: [catalog AS channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#85 AS sales#88, count(1)#86 AS number_sales#89]
 
 (99) Filter [codegen id : 92]
 Input [6]: [channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sales#88, number_sales#89]
@@ -659,7 +659,7 @@ Input [7]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92, i_item_sk#96, i_bra
 (114) HashAggregate [codegen id : 137]
 Input [5]: [ws_quantity#91, ws_list_price#92, i_brand_id#97, i_class_id#98, i_category_id#99]
 Keys [3]: [i_brand_id#97, i_class_id#98, i_category_id#99]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#91 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#100, isEmpty#101, count#102]
 Results [6]: [i_brand_id#97, i_class_id#98, i_category_id#99, sum#103, isEmpty#104, count#105]
 
@@ -670,9 +670,9 @@ Arguments: hashpartitioning(i_brand_id#97, i_class_id#98, i_category_id#99, 5),
 (116) HashAggregate [codegen id : 138]
 Input [6]: [i_brand_id#97, i_class_id#98, i_category_id#99, sum#103, isEmpty#104, count#105]
 Keys [3]: [i_brand_id#97, i_class_id#98, i_category_id#99]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#91 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#91 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true))#107, count(1)#108]
-Results [6]: [web AS channel#109, i_brand_id#97, i_class_id#98, i_category_id#99, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#91 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true))#107 AS sales#110, count(1)#108 AS number_sales#111]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true))#107, count(1)#108]
+Results [6]: [web AS channel#109, i_brand_id#97, i_class_id#98, i_category_id#99, sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true))#107 AS sales#110, count(1)#108 AS number_sales#111]
 
 (117) Filter [codegen id : 138]
 Input [6]: [channel#109, i_brand_id#97, i_class_id#98, i_category_id#99, sales#110, number_sales#111]
@@ -929,7 +929,7 @@ Input [4]: [ws_quantity#191, ws_list_price#192, ws_sold_date_sk#193, d_date_sk#1
 (163) HashAggregate [codegen id : 7]
 Input [2]: [quantity#182, list_price#183]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#182 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#182 as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [2]: [sum#197, count#198]
 Results [2]: [sum#199, count#200]
 
@@ -940,9 +940,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#201]
 (165) HashAggregate [codegen id : 8]
 Input [2]: [sum#199, count#200]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#182 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#182 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2), true))#202]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#182 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2), true))#202 AS average_sales#203]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#182 as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#182 as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2), true))#202]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#182 as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2), true))#202 AS average_sales#203]
 
 Subquery:2 Hosting operator id = 147 Hosting Expression = ss_sold_date_sk#180 IN dynamicpruning#13
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt
index d494944f8e4d5..c02368aac7e78 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt
@@ -19,7 +19,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                     Filter [sales]
                                       Subquery #3
                                         WholeStageCodegen (8)
-                                          HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
+                                          HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
                                             InputAdapter
                                               Exchange #19
                                                 WholeStageCodegen (7)
@@ -60,7 +60,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                     ReusedSubquery [d_date_sk] #4
                                                               InputAdapter
                                                                 ReusedExchange [d_date_sk] #20
-                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
                                           Exchange [i_brand_id,i_class_id,i_category_id] #3
                                             WholeStageCodegen (45)
@@ -219,7 +219,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                   WholeStageCodegen (92)
                                     Filter [sales]
                                       ReusedSubquery [average_sales] #3
-                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
                                           Exchange [i_brand_id,i_class_id,i_category_id] #21
                                             WholeStageCodegen (91)
@@ -252,7 +252,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                   WholeStageCodegen (138)
                                     Filter [sales]
                                       ReusedSubquery [average_sales] #3
-                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
                                           Exchange [i_brand_id,i_class_id,i_category_id] #23
                                             WholeStageCodegen (137)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt
index bd3290f8c55b4..01062fa7e351c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt
@@ -426,7 +426,7 @@ Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_
 (65) HashAggregate [codegen id : 25]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#49, isEmpty#50, count#51]
 Results [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 
@@ -437,9 +437,9 @@ Arguments: hashpartitioning(i_brand_id#44, i_class_id#45, i_category_id#46, 5),
 (67) HashAggregate [codegen id : 26]
 Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56, count(1)#57]
-Results [6]: [store AS channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56 AS sales#59, count(1)#57 AS number_sales#60]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56, count(1)#57]
+Results [6]: [store AS channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56 AS sales#59, count(1)#57 AS number_sales#60]
 
 (68) Filter [codegen id : 26]
 Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60]
@@ -495,7 +495,7 @@ Input [7]: [cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66, i_brand_id#68,
 (80) HashAggregate [codegen id : 51]
 Input [5]: [cs_quantity#64, cs_list_price#65, i_brand_id#68, i_class_id#69, i_category_id#70]
 Keys [3]: [i_brand_id#68, i_class_id#69, i_category_id#70]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#72, isEmpty#73, count#74]
 Results [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77]
 
@@ -506,9 +506,9 @@ Arguments: hashpartitioning(i_brand_id#68, i_class_id#69, i_category_id#70, 5),
 (82) HashAggregate [codegen id : 52]
 Input [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77]
 Keys [3]: [i_brand_id#68, i_class_id#69, i_category_id#70]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79, count(1)#80]
-Results [6]: [catalog AS channel#81, i_brand_id#68, i_class_id#69, i_category_id#70, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79 AS sales#82, count(1)#80 AS number_sales#83]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79, count(1)#80]
+Results [6]: [catalog AS channel#81, i_brand_id#68, i_class_id#69, i_category_id#70, sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79 AS sales#82, count(1)#80 AS number_sales#83]
 
 (83) Filter [codegen id : 52]
 Input [6]: [channel#81, i_brand_id#68, i_class_id#69, i_category_id#70, sales#82, number_sales#83]
@@ -564,7 +564,7 @@ Input [7]: [ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87, i_brand_id#89,
 (95) HashAggregate [codegen id : 77]
 Input [5]: [ws_quantity#85, ws_list_price#86, i_brand_id#89, i_class_id#90, i_category_id#91]
 Keys [3]: [i_brand_id#89, i_class_id#90, i_category_id#91]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#85 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
 Aggregate Attributes [3]: [sum#93, isEmpty#94, count#95]
 Results [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98]
 
@@ -575,9 +575,9 @@ Arguments: hashpartitioning(i_brand_id#89, i_class_id#90, i_category_id#91, 5),
 (97) HashAggregate [codegen id : 78]
 Input [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98]
 Keys [3]: [i_brand_id#89, i_class_id#90, i_category_id#91]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#85 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#85 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true))#100, count(1)#101]
-Results [6]: [web AS channel#102, i_brand_id#89, i_class_id#90, i_category_id#91, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#85 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true))#100 AS sales#103, count(1)#101 AS number_sales#104]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true))#100, count(1)#101]
+Results [6]: [web AS channel#102, i_brand_id#89, i_class_id#90, i_category_id#91, sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true))#100 AS sales#103, count(1)#101 AS number_sales#104]
 
 (98) Filter [codegen id : 78]
 Input [6]: [channel#102, i_brand_id#89, i_class_id#90, i_category_id#91, sales#103, number_sales#104]
@@ -834,7 +834,7 @@ Input [4]: [ws_quantity#184, ws_list_price#185, ws_sold_date_sk#186, d_date_sk#1
 (144) HashAggregate [codegen id : 7]
 Input [2]: [quantity#175, list_price#176]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#175 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [2]: [sum#190, count#191]
 Results [2]: [sum#192, count#193]
 
@@ -845,9 +845,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#194]
 (146) HashAggregate [codegen id : 8]
 Input [2]: [sum#192, count#193]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#175 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#175 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2), true))#195]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#175 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2), true))#195 AS average_sales#196]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2), true))#195]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2), true))#195 AS average_sales#196]
 
 Subquery:2 Hosting operator id = 128 Hosting Expression = ss_sold_date_sk#173 IN dynamicpruning#12
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt
index 3a56d26b3b2d3..2d0d4267a2a69 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt
@@ -19,7 +19,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                     Filter [sales]
                                       Subquery #3
                                         WholeStageCodegen (8)
-                                          HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
+                                          HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
                                             InputAdapter
                                               Exchange #14
                                                 WholeStageCodegen (7)
@@ -60,7 +60,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                     ReusedSubquery [d_date_sk] #4
                                                               InputAdapter
                                                                 ReusedExchange [d_date_sk] #15
-                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
                                           Exchange [i_brand_id,i_class_id,i_category_id] #3
                                             WholeStageCodegen (25)
@@ -180,7 +180,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                   WholeStageCodegen (52)
                                     Filter [sales]
                                       ReusedSubquery [average_sales] #3
-                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
                                           Exchange [i_brand_id,i_class_id,i_category_id] #16
                                             WholeStageCodegen (51)
@@ -204,7 +204,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                   WholeStageCodegen (78)
                                     Filter [sales]
                                       ReusedSubquery [average_sales] #3
-                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
                                           Exchange [i_brand_id,i_class_id,i_category_id] #17
                                             WholeStageCodegen (77)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/explain.txt
index b0ecc08ff8b25..7e9f2f0c6777b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/explain.txt
@@ -167,7 +167,7 @@ Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d
 (22) HashAggregate [codegen id : 7]
 Input [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#16, i_class#17, i_category#18, i_product_name#19]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
 Aggregate Attributes [2]: [sum#21, isEmpty#22]
 Results [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#23, isEmpty#24]
 
@@ -178,9 +178,9 @@ Arguments: hashpartitioning(i_category#18, i_class#17, i_brand#16, i_product_nam
 (24) HashAggregate [codegen id : 8]
 Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#23, isEmpty#24]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
-Results [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, cast(sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 as decimal(38,2)) AS sumsales#27]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
+Results [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, cast(sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 as decimal(38,2)) AS sumsales#27]
 
 (25) ReusedExchange [Reuses operator id: 23]
 Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#28, isEmpty#29]
@@ -188,9 +188,9 @@ Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8
 (26) HashAggregate [codegen id : 16]
 Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#28, isEmpty#29]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
-Results [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
+Results [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
 
 (27) HashAggregate [codegen id : 16]
 Input [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, sumsales#30]
@@ -216,9 +216,9 @@ Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8
 (31) HashAggregate [codegen id : 25]
 Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#39, isEmpty#40]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
-Results [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
+Results [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
 
 (32) HashAggregate [codegen id : 25]
 Input [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, sumsales#30]
@@ -244,9 +244,9 @@ Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8
 (36) HashAggregate [codegen id : 34]
 Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#50, isEmpty#51]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
-Results [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
+Results [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
 
 (37) HashAggregate [codegen id : 34]
 Input [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, sumsales#30]
@@ -272,9 +272,9 @@ Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8
 (41) HashAggregate [codegen id : 43]
 Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#62, isEmpty#63]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
-Results [5]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
+Results [5]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
 
 (42) HashAggregate [codegen id : 43]
 Input [5]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, sumsales#30]
@@ -300,9 +300,9 @@ Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8
 (46) HashAggregate [codegen id : 52]
 Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#75, isEmpty#76]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
-Results [4]: [i_category#18, i_class#17, i_brand#16, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
+Results [4]: [i_category#18, i_class#17, i_brand#16, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
 
 (47) HashAggregate [codegen id : 52]
 Input [4]: [i_category#18, i_class#17, i_brand#16, sumsales#30]
@@ -328,9 +328,9 @@ Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8
 (51) HashAggregate [codegen id : 61]
 Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#89, isEmpty#90]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
-Results [3]: [i_category#18, i_class#17, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
+Results [3]: [i_category#18, i_class#17, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
 
 (52) HashAggregate [codegen id : 61]
 Input [3]: [i_category#18, i_class#17, sumsales#30]
@@ -356,9 +356,9 @@ Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8
 (56) HashAggregate [codegen id : 70]
 Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#104, isEmpty#105]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
-Results [2]: [i_category#18, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
+Results [2]: [i_category#18, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
 
 (57) HashAggregate [codegen id : 70]
 Input [2]: [i_category#18, sumsales#30]
@@ -384,9 +384,9 @@ Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8
 (61) HashAggregate [codegen id : 79]
 Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#120, isEmpty#121]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
-Results [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
+Results [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
 
 (62) HashAggregate [codegen id : 79]
 Input [1]: [sumsales#30]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/simplified.txt
index ef75e80bde2a5..2e4627c7d48aa 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/simplified.txt
@@ -9,7 +9,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                 Exchange [i_category] #1
                   Union
                     WholeStageCodegen (8)
-                      HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                      HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                         InputAdapter
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] #2
                             WholeStageCodegen (7)
@@ -63,7 +63,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy] #7
                             WholeStageCodegen (16)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (26)
@@ -72,7 +72,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy] #8
                             WholeStageCodegen (25)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (35)
@@ -81,7 +81,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year] #9
                             WholeStageCodegen (34)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (44)
@@ -90,7 +90,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name] #10
                             WholeStageCodegen (43)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (53)
@@ -99,7 +99,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand] #11
                             WholeStageCodegen (52)
                               HashAggregate [i_category,i_class,i_brand,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (62)
@@ -108,7 +108,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class] #12
                             WholeStageCodegen (61)
                               HashAggregate [i_category,i_class,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (71)
@@ -117,7 +117,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category] #13
                             WholeStageCodegen (70)
                               HashAggregate [i_category,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (80)
@@ -126,6 +126,6 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange #14
                             WholeStageCodegen (79)
                               HashAggregate [sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/explain.txt
index 48ab2f77ad964..4c344cef9d4a8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/explain.txt
@@ -152,7 +152,7 @@ Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d
 (19) HashAggregate [codegen id : 4]
 Input [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#15, i_class#16, i_category#17, i_product_name#18]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
 Aggregate Attributes [2]: [sum#20, isEmpty#21]
 Results [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#22, isEmpty#23]
 
@@ -163,9 +163,9 @@ Arguments: hashpartitioning(i_category#17, i_class#16, i_brand#15, i_product_nam
 (21) HashAggregate [codegen id : 5]
 Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#22, isEmpty#23]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
-Results [9]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, cast(sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 as decimal(38,2)) AS sumsales#26]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
+Results [9]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, cast(sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 as decimal(38,2)) AS sumsales#26]
 
 (22) ReusedExchange [Reuses operator id: 20]
 Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#27, isEmpty#28]
@@ -173,9 +173,9 @@ Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8
 (23) HashAggregate [codegen id : 10]
 Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#27, isEmpty#28]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
-Results [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
+Results [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
 
 (24) HashAggregate [codegen id : 10]
 Input [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, sumsales#29]
@@ -201,9 +201,9 @@ Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8
 (28) HashAggregate [codegen id : 16]
 Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#38, isEmpty#39]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
-Results [7]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
+Results [7]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
 
 (29) HashAggregate [codegen id : 16]
 Input [7]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, sumsales#29]
@@ -229,9 +229,9 @@ Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8
 (33) HashAggregate [codegen id : 22]
 Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#49, isEmpty#50]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
-Results [6]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
+Results [6]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
 
 (34) HashAggregate [codegen id : 22]
 Input [6]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, sumsales#29]
@@ -257,9 +257,9 @@ Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8
 (38) HashAggregate [codegen id : 28]
 Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#61, isEmpty#62]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
-Results [5]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
+Results [5]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
 
 (39) HashAggregate [codegen id : 28]
 Input [5]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, sumsales#29]
@@ -285,9 +285,9 @@ Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8
 (43) HashAggregate [codegen id : 34]
 Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#74, isEmpty#75]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
-Results [4]: [i_category#17, i_class#16, i_brand#15, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
+Results [4]: [i_category#17, i_class#16, i_brand#15, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
 
 (44) HashAggregate [codegen id : 34]
 Input [4]: [i_category#17, i_class#16, i_brand#15, sumsales#29]
@@ -313,9 +313,9 @@ Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8
 (48) HashAggregate [codegen id : 40]
 Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#88, isEmpty#89]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
-Results [3]: [i_category#17, i_class#16, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
+Results [3]: [i_category#17, i_class#16, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
 
 (49) HashAggregate [codegen id : 40]
 Input [3]: [i_category#17, i_class#16, sumsales#29]
@@ -341,9 +341,9 @@ Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8
 (53) HashAggregate [codegen id : 46]
 Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#103, isEmpty#104]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
-Results [2]: [i_category#17, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
+Results [2]: [i_category#17, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
 
 (54) HashAggregate [codegen id : 46]
 Input [2]: [i_category#17, sumsales#29]
@@ -369,9 +369,9 @@ Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8
 (58) HashAggregate [codegen id : 52]
 Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#119, isEmpty#120]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
-Results [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
+Results [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
 
 (59) HashAggregate [codegen id : 52]
 Input [1]: [sumsales#29]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/simplified.txt
index a26fa77b9a6d2..d3a866b3ddf29 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/simplified.txt
@@ -9,7 +9,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                 Exchange [i_category] #1
                   Union
                     WholeStageCodegen (5)
-                      HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                      HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                         InputAdapter
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] #2
                             WholeStageCodegen (4)
@@ -54,7 +54,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy] #6
                             WholeStageCodegen (10)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (17)
@@ -63,7 +63,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy] #7
                             WholeStageCodegen (16)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (23)
@@ -72,7 +72,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year] #8
                             WholeStageCodegen (22)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (29)
@@ -81,7 +81,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name] #9
                             WholeStageCodegen (28)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (35)
@@ -90,7 +90,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand] #10
                             WholeStageCodegen (34)
                               HashAggregate [i_category,i_class,i_brand,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (41)
@@ -99,7 +99,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class] #11
                             WholeStageCodegen (40)
                               HashAggregate [i_category,i_class,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (47)
@@ -108,7 +108,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category] #12
                             WholeStageCodegen (46)
                               HashAggregate [i_category,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (53)
@@ -117,6 +117,6 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange #13
                             WholeStageCodegen (52)
                               HashAggregate [sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q20/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q20/explain.txt
index edf14f1c424e5..32fab608371f3 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q20/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q20/explain.txt
@@ -148,7 +148,7 @@ Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true], input[2, big
 (24) BroadcastHashJoin [codegen id : 5]
 Left keys [2]: [ps_partkey#5, ps_suppkey#6]
 Right keys [2]: [l_partkey#11, l_suppkey#12]
-Join condition: (cast(cast(ps_availqty#7 as decimal(10,0)) as decimal(22,1)) > (0.5 * sum(l_quantity))#21)
+Join condition: (cast(ps_availqty#7 as decimal(22,1)) > (0.5 * sum(l_quantity))#21)
 
 (25) Project [codegen id : 5]
 Output [1]: [ps_suppkey#6]

From 9c02dd4035c9412ca03e5a5f4721ee223953c004 Mon Sep 17 00:00:00 2001
From: Jiaan Geng <beliefer@163.com>
Date: Thu, 20 Jan 2022 17:22:44 +0800
Subject: [PATCH 060/513] [SPARK-28137][SQL] Data Type Formatting Functions:
 `to_number`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
Many database support the function `to_number` to convert a string to number.
The implement of `to_number` has many different between `Postgresql` ,`Oracle` and `Phoenix`.
So, this PR follows the implement of `to_number` in `Oracle` that give a strict parameter verification.
So, this PR follows the implement of `to_number` in `Phoenix` that uses BigDecimal.

This PR support the patterns for numeric formatting as follows:

Pattern | Description
-- | --
9 | digit position
0 | digit position
. (period) | decimal point (only allowed once)
, (comma) | group (thousands) separator
S | sign anchored to number (only allowed once)
$ | value with a leading dollar sign (only allowed once)
D | decimal point (only allowed once)
G | group (thousands) separator

There are some mainstream database support the syntax.
**PostgreSQL:**
https://www.postgresql.org/docs/12/functions-formatting.html

**Oracle:**
https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/TO_NUMBER.html#GUID-D4807212-AFD7-48A7-9AED-BEC3E8809866

**Vertica**
https://www.vertica.com/docs/10.0.x/HTML/Content/Authoring/SQLReferenceManual/Functions/Formatting/TO_NUMBER.htm?tocpath=SQL%20Reference%20Manual%7CSQL%20Functions%7CFormatting%20Functions%7C_____7

**Redshift**
https://docs.aws.amazon.com/redshift/latest/dg/r_TO_NUMBER.html

**DB2**
https://www.ibm.com/support/knowledgecenter/SSGU8G_14.1.0/com.ibm.sqls.doc/ids_sqs_1544.htm

**Teradata**
https://docs.teradata.com/r/kmuOwjp1zEYg98JsB8fu_A/TH2cDXBn6tala29S536nqg

**Snowflake:**
https://docs.snowflake.net/manuals/sql-reference/functions/to_decimal.html

**Exasol**
https://docs.exasol.com/sql_references/functions/alphabeticallistfunctions/to_number.htm#TO_NUMBER

**Phoenix**
http://phoenix.incubator.apache.org/language/functions.html#to_number

**Singlestore**
https://docs.singlestore.com/v7.3/reference/sql-reference/numeric-functions/to-number/

**Intersystems**
https://docs.intersystems.com/latest/csp/docbook/DocBook.UI.Page.cls?KEY=RSQL_TONUMBER

The syntax like:
> select to_number('12,454.8-', '99G999D9S');
-12454.8

### Why are the changes needed?
`to_number` is very useful for formatted currency to number conversion.

### Does this PR introduce any user-facing change?
Yes. New feature.

### How was this patch tested?
New tests

Closes #35060 from beliefer/SPARK-28137-new.

Authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/_data/menu-sql.yaml                      |   2 +
 docs/sql-ref-number-pattern.md                |  22 ++
 docs/sql-ref.md                               |   1 +
 .../catalyst/analysis/FunctionRegistry.scala  |   1 +
 .../expressions/numberFormatExpressions.scala | 105 ++++++++
 .../sql/catalyst/util/NumberFormatter.scala   | 243 ++++++++++++++++++
 .../spark/sql/catalyst/util/NumberUtils.scala | 189 --------------
 .../sql/errors/QueryCompilationErrors.scala   |   8 -
 .../sql/errors/QueryExecutionErrors.scala     |   5 +-
 .../expressions/StringExpressionsSuite.scala  | 167 ++++++++++++
 ...Suite.scala => NumberFormatterSuite.scala} | 154 ++++++-----
 .../sql-functions/sql-expression-schema.md    |   3 +-
 .../sql-tests/inputs/postgreSQL/numeric.sql   |  18 +-
 .../sql-tests/inputs/string-functions.sql     |  12 +-
 .../results/ansi/string-functions.sql.out     |  66 ++++-
 .../results/postgreSQL/numeric.sql.out        |  76 +++++-
 .../results/string-functions.sql.out          |  66 ++++-
 17 files changed, 846 insertions(+), 292 deletions(-)
 create mode 100644 docs/sql-ref-number-pattern.md
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberFormatter.scala
 delete mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberUtils.scala
 rename sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/{NumberUtilsSuite.scala => NumberFormatterSuite.scala} (65%)

diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml
index 22e01df98e9be..7d9e6f45ec75c 100644
--- a/docs/_data/menu-sql.yaml
+++ b/docs/_data/menu-sql.yaml
@@ -79,6 +79,8 @@
       url: sql-ref-datatypes.html
     - text: Datetime Pattern
       url: sql-ref-datetime-pattern.html
+    - text: Number Pattern
+      url: sql-ref-number-pattern.html
     - text: Functions
       url: sql-ref-functions.html
     - text: Identifiers
diff --git a/docs/sql-ref-number-pattern.md b/docs/sql-ref-number-pattern.md
new file mode 100644
index 0000000000000..dc7d696e32fb1
--- /dev/null
+++ b/docs/sql-ref-number-pattern.md
@@ -0,0 +1,22 @@
+---
+layout: global
+title: Number patterns
+displayTitle: Number Patterns for Formatting and Parsing
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+TODO: Add the content of Number Patterns for Formatting and Parsing
diff --git a/docs/sql-ref.md b/docs/sql-ref.md
index 32e7e9672eb89..026d072c07df3 100644
--- a/docs/sql-ref.md
+++ b/docs/sql-ref.md
@@ -25,6 +25,7 @@ Spark SQL is Apache Spark's module for working with structured data. This guide
  * [ANSI Compliance](sql-ref-ansi-compliance.html)
  * [Data Types](sql-ref-datatypes.html)
  * [Datetime Pattern](sql-ref-datetime-pattern.html)
+ * [Number Pattern](sql-ref-number-pattern.html)
  * [Functions](sql-ref-functions.html)
    * [Built-in Functions](sql-ref-functions-builtin.html)
    * [Scalar User-Defined Functions (UDFs)](sql-ref-functions-udf-scalar.html)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index c995ff8637529..e98759bd5021f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -474,6 +474,7 @@ object FunctionRegistry {
     expression[FindInSet]("find_in_set"),
     expression[FormatNumber]("format_number"),
     expression[FormatString]("format_string"),
+    expression[ToNumber]("to_number"),
     expression[GetJsonObject]("get_json_object"),
     expression[InitCap]("initcap"),
     expression[StringInstr]("instr"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
new file mode 100644
index 0000000000000..e29a425eef199
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import java.util.Locale
+
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
+import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper
+import org.apache.spark.sql.catalyst.util.NumberFormatter
+import org.apache.spark.sql.types.{DataType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * A function that converts string to numeric.
+ */
+@ExpressionDescription(
+  usage = """
+     _FUNC_(strExpr, formatExpr) - Convert `strExpr` to a number based on the `formatExpr`.
+       The format can consist of the following characters:
+         '0' or '9':  digit position
+         '.' or 'D':  decimal point (only allowed once)
+         ',' or 'G':  group (thousands) separator
+         '-' or 'S':  sign anchored to number (only allowed once)
+         '$':  value with a leading dollar sign (only allowed once)
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('454', '999');
+       454
+      > SELECT _FUNC_('454.00', '000D00');
+       454.00
+      > SELECT _FUNC_('12,454', '99G999');
+       12454
+      > SELECT _FUNC_('$78.12', '$99.99');
+       78.12
+      > SELECT _FUNC_('12,454.8-', '99G999D9S');
+       -12454.8
+  """,
+  since = "3.3.0",
+  group = "string_funcs")
+case class ToNumber(left: Expression, right: Expression)
+  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+
+  private lazy val numberFormat = right.eval().toString.toUpperCase(Locale.ROOT)
+  private lazy val numberFormatter = new NumberFormatter(numberFormat)
+
+  override def dataType: DataType = numberFormatter.parsedDecimalType
+
+  override def inputTypes: Seq[DataType] = Seq(StringType, StringType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    val inputTypeCheck = super.checkInputDataTypes()
+    if (inputTypeCheck.isSuccess) {
+      if (right.foldable) {
+        numberFormatter.check()
+      } else {
+        TypeCheckResult.TypeCheckFailure(s"Format expression must be foldable, but got $right")
+      }
+    } else {
+      inputTypeCheck
+    }
+  }
+
+  override def prettyName: String = "to_number"
+
+  override def nullSafeEval(string: Any, format: Any): Any = {
+    val input = string.asInstanceOf[UTF8String]
+    numberFormatter.parse(input)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val builder =
+      ctx.addReferenceObj("builder", numberFormatter, classOf[NumberFormatter].getName)
+    val eval = left.genCode(ctx)
+    ev.copy(code =
+      code"""
+        |${eval.code}
+        |boolean ${ev.isNull} = ${eval.isNull};
+        |${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+        |if (!${ev.isNull}) {
+        |  ${ev.value} = $builder.parse(${eval.value});
+        |}
+      """.stripMargin)
+  }
+
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): ToNumber = copy(left = newLeft, right = newRight)
+}
+
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberFormatter.scala
new file mode 100644
index 0000000000000..a14aceb692291
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberFormatter.scala
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import java.math.BigDecimal
+import java.text.{DecimalFormat, ParsePosition}
+import java.util.Locale
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.types.{Decimal, DecimalType}
+import org.apache.spark.unsafe.types.UTF8String
+
+object NumberFormatter {
+  final val POINT_SIGN = '.'
+  final val POINT_LETTER = 'D'
+  final val COMMA_SIGN = ','
+  final val COMMA_LETTER = 'G'
+  final val MINUS_SIGN = '-'
+  final val MINUS_LETTER = 'S'
+  final val DOLLAR_SIGN = '$'
+  final val NINE_DIGIT = '9'
+  final val ZERO_DIGIT = '0'
+  final val POUND_SIGN = '#'
+
+  final val COMMA_SIGN_STRING = COMMA_SIGN.toString
+  final val POUND_SIGN_STRING = POUND_SIGN.toString
+
+  final val SIGN_SET = Set(POINT_SIGN, COMMA_SIGN, MINUS_SIGN, DOLLAR_SIGN)
+}
+
+class NumberFormatter(originNumberFormat: String, isParse: Boolean = true) extends Serializable {
+  import NumberFormatter._
+
+  protected val normalizedNumberFormat = normalize(originNumberFormat)
+
+  private val transformedFormat = transform(normalizedNumberFormat)
+
+  private lazy val numberDecimalFormat = {
+    val decimalFormat = new DecimalFormat(transformedFormat)
+    decimalFormat.setParseBigDecimal(true)
+    decimalFormat
+  }
+
+  private lazy val (precision, scale) = {
+    val formatSplits = normalizedNumberFormat.split(POINT_SIGN).map(_.filterNot(isSign))
+    assert(formatSplits.length <= 2)
+    val precision = formatSplits.map(_.length).sum
+    val scale = if (formatSplits.length == 2) formatSplits.last.length else 0
+    (precision, scale)
+  }
+
+  def parsedDecimalType: DecimalType = DecimalType(precision, scale)
+
+  /**
+   * DecimalFormat provides '#' and '0' as placeholder of digit, ',' as grouping separator,
+   * '.' as decimal separator, '-' as minus, '$' as dollar, but not '9', 'G', 'D', 'S'. So we need
+   * replace them show below:
+   * 1. '9' -> '#'
+   * 2. 'G' -> ','
+   * 3. 'D' -> '.'
+   * 4. 'S' -> '-'
+   *
+   * Note: When calling format, we must preserve the digits after decimal point, so the digits
+   * after decimal point should be replaced as '0'. For example: '999.9' will be normalized as
+   * '###.0' and '999.99' will be normalized as '###.00', so if the input is 454, the format
+   * output will be 454.0 and 454.00 respectively.
+   *
+   * @param format number format string
+   * @return normalized number format string
+   */
+  private def normalize(format: String): String = {
+    var notFindDecimalPoint = true
+    val normalizedFormat = format.toUpperCase(Locale.ROOT).map {
+      case NINE_DIGIT if notFindDecimalPoint => POUND_SIGN
+      case ZERO_DIGIT if isParse && notFindDecimalPoint => POUND_SIGN
+      case NINE_DIGIT if !notFindDecimalPoint => ZERO_DIGIT
+      case COMMA_LETTER => COMMA_SIGN
+      case POINT_LETTER | POINT_SIGN =>
+        notFindDecimalPoint = false
+        POINT_SIGN
+      case MINUS_LETTER => MINUS_SIGN
+      case other => other
+    }
+    // If the comma is at the beginning or end of number format, then DecimalFormat will be
+    // invalid. For example, "##,###," or ",###,###" for DecimalFormat is invalid, so we must use
+    // "##,###" or "###,###".
+    normalizedFormat.stripPrefix(COMMA_SIGN_STRING).stripSuffix(COMMA_SIGN_STRING)
+  }
+
+  private def isSign(c: Char): Boolean = {
+    SIGN_SET.contains(c)
+  }
+
+  private def transform(format: String): String = {
+    if (format.contains(MINUS_SIGN)) {
+      // For example: '#.######' represents a positive number,
+      // but '#.######;#.######-' represents a negative number.
+      val positiveFormatString = format.replaceAll("-", "")
+      s"$positiveFormatString;$format"
+    } else {
+      format
+    }
+  }
+
+  def check(): TypeCheckResult = {
+    def invalidSignPosition(c: Char): Boolean = {
+      val signIndex = normalizedNumberFormat.indexOf(c)
+      signIndex > 0 && signIndex < normalizedNumberFormat.length - 1
+    }
+
+    def multipleSignInNumberFormatError(message: String): String = {
+      s"At most one $message is allowed in the number format: '$originNumberFormat'"
+    }
+
+    def nonFistOrLastCharInNumberFormatError(message: String): String = {
+      s"$message must be the first or last char in the number format: '$originNumberFormat'"
+    }
+
+    if (normalizedNumberFormat.length == 0) {
+      TypeCheckResult.TypeCheckFailure("Number format cannot be empty")
+    } else if (normalizedNumberFormat.count(_ == POINT_SIGN) > 1) {
+      TypeCheckResult.TypeCheckFailure(
+        multipleSignInNumberFormatError(s"'$POINT_LETTER' or '$POINT_SIGN'"))
+    } else if (normalizedNumberFormat.count(_ == MINUS_SIGN) > 1) {
+      TypeCheckResult.TypeCheckFailure(
+        multipleSignInNumberFormatError(s"'$MINUS_LETTER' or '$MINUS_SIGN'"))
+    } else if (normalizedNumberFormat.count(_ == DOLLAR_SIGN) > 1) {
+      TypeCheckResult.TypeCheckFailure(multipleSignInNumberFormatError(s"'$DOLLAR_SIGN'"))
+    } else if (invalidSignPosition(MINUS_SIGN)) {
+      TypeCheckResult.TypeCheckFailure(
+        nonFistOrLastCharInNumberFormatError(s"'$MINUS_LETTER' or '$MINUS_SIGN'"))
+    } else if (invalidSignPosition(DOLLAR_SIGN)) {
+      TypeCheckResult.TypeCheckFailure(
+        nonFistOrLastCharInNumberFormatError(s"'$DOLLAR_SIGN'"))
+    } else {
+      TypeCheckResult.TypeCheckSuccess
+    }
+  }
+
+  /**
+   * Convert string to numeric based on the given number format.
+   * The format can consist of the following characters:
+   * '0' or '9': digit position
+   * '.' or 'D': decimal point (only allowed once)
+   * ',' or 'G': group (thousands) separator
+   * '-' or 'S': sign anchored to number (only allowed once)
+   * '$': value with a leading dollar sign (only allowed once)
+   *
+   * @param input the string need to converted
+   * @return decimal obtained from string parsing
+   */
+  def parse(input: UTF8String): Decimal = {
+    val inputStr = input.toString.trim
+    val inputSplits = inputStr.split(POINT_SIGN)
+    assert(inputSplits.length <= 2)
+    if (inputSplits.length == 1) {
+      if (inputStr.filterNot(isSign).length > precision - scale) {
+        throw QueryExecutionErrors.invalidNumberFormatError(input, originNumberFormat)
+      }
+    } else if (inputSplits(0).filterNot(isSign).length > precision - scale ||
+      inputSplits(1).filterNot(isSign).length > scale) {
+      throw QueryExecutionErrors.invalidNumberFormatError(input, originNumberFormat)
+    }
+
+    try {
+      val number = numberDecimalFormat.parse(inputStr, new ParsePosition(0))
+      assert(number.isInstanceOf[BigDecimal])
+      Decimal(number.asInstanceOf[BigDecimal])
+    } catch {
+      case _: IllegalArgumentException =>
+        throw QueryExecutionErrors.invalidNumberFormatError(input, originNumberFormat)
+    }
+  }
+
+  /**
+   * Convert numeric to string based on the given number format.
+   * The format can consist of the following characters:
+   * '9': digit position (can be dropped if insignificant)
+   * '0': digit position (will not be dropped, even if insignificant)
+   * '.' or 'D': decimal point (only allowed once)
+   * ',' or 'G': group (thousands) separator
+   * '-' or 'S': sign anchored to number (only allowed once)
+   * '$': value with a leading dollar sign (only allowed once)
+   *
+   * @param input the decimal to format
+   * @param numberFormat the format string
+   * @return The string after formatting input decimal
+   */
+  def format(input: Decimal): String = {
+    val bigDecimal = input.toJavaBigDecimal
+    val decimalPlainStr = bigDecimal.toPlainString
+    if (decimalPlainStr.length > transformedFormat.length) {
+      transformedFormat.replaceAll("0", POUND_SIGN_STRING)
+    } else {
+      var resultStr = numberDecimalFormat.format(bigDecimal)
+      // Since we trimmed the comma at the beginning or end of number format in function
+      // `normalize`, we restore the comma to the result here.
+      // For example, if the specified number format is "99,999," or ",999,999", function
+      // `normalize` normalize them to "##,###" or "###,###".
+      // new DecimalFormat("##,###").parse(12454) and new DecimalFormat("###,###").parse(124546)
+      // will return "12,454" and "124,546" respectively. So we add ',' at the end and head of
+      // the result, then the final output are "12,454," or ",124,546".
+      if (originNumberFormat.last == COMMA_SIGN || originNumberFormat.last == COMMA_LETTER) {
+        resultStr = resultStr + COMMA_SIGN
+      }
+      if (originNumberFormat.charAt(0) == COMMA_SIGN ||
+        originNumberFormat.charAt(0) == COMMA_LETTER) {
+        resultStr = COMMA_SIGN + resultStr
+      }
+
+      resultStr
+    }
+  }
+}
+
+// Visible for testing
+class TestNumberFormatter(originNumberFormat: String, isParse: Boolean = true)
+  extends NumberFormatter(originNumberFormat, isParse) {
+  def checkWithException(): Unit = {
+    check() match {
+      case TypeCheckResult.TypeCheckFailure(message) =>
+        throw new AnalysisException(message)
+      case _ =>
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberUtils.scala
deleted file mode 100644
index 6efde2aa657b9..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberUtils.scala
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.util
-
-import java.math.BigDecimal
-import java.text.{DecimalFormat, NumberFormat, ParsePosition}
-import java.util.Locale
-
-import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
-import org.apache.spark.sql.types.Decimal
-import org.apache.spark.unsafe.types.UTF8String
-
-object NumberUtils {
-
-  private val pointSign = '.'
-  private val letterPointSign = 'D'
-  private val commaSign = ','
-  private val letterCommaSign = 'G'
-  private val minusSign = '-'
-  private val letterMinusSign = 'S'
-  private val dollarSign = '$'
-
-  private val commaSignStr = commaSign.toString
-
-  private def normalize(format: String): String = {
-    var notFindDecimalPoint = true
-    val normalizedFormat = format.toUpperCase(Locale.ROOT).map {
-      case '9' if notFindDecimalPoint => '#'
-      case '9' if !notFindDecimalPoint => '0'
-      case `letterPointSign` =>
-        notFindDecimalPoint = false
-        pointSign
-      case `letterCommaSign` => commaSign
-      case `letterMinusSign` => minusSign
-      case `pointSign` =>
-        notFindDecimalPoint = false
-        pointSign
-      case other => other
-    }
-    // If the comma is at the beginning or end of number format, then DecimalFormat will be invalid.
-    // For example, "##,###," or ",###,###" for DecimalFormat is invalid, so we must use "##,###"
-    // or "###,###".
-    normalizedFormat.stripPrefix(commaSignStr).stripSuffix(commaSignStr)
-  }
-
-  private def isSign(c: Char): Boolean = {
-    Set(pointSign, commaSign, minusSign, dollarSign).contains(c)
-  }
-
-  private def transform(format: String): String = {
-    if (format.contains(minusSign)) {
-      val positiveFormatString = format.replaceAll("-", "")
-      s"$positiveFormatString;$format"
-    } else {
-      format
-    }
-  }
-
-  private def check(normalizedFormat: String, numberFormat: String) = {
-    def invalidSignPosition(format: String, c: Char): Boolean = {
-      val signIndex = format.indexOf(c)
-      signIndex > 0 && signIndex < format.length - 1
-    }
-
-    if (normalizedFormat.count(_ == pointSign) > 1) {
-      throw QueryCompilationErrors.multipleSignInNumberFormatError(
-        s"'$letterPointSign' or '$pointSign'", numberFormat)
-    } else if (normalizedFormat.count(_ == minusSign) > 1) {
-      throw QueryCompilationErrors.multipleSignInNumberFormatError(
-        s"'$letterMinusSign' or '$minusSign'", numberFormat)
-    } else if (normalizedFormat.count(_ == dollarSign) > 1) {
-      throw QueryCompilationErrors.multipleSignInNumberFormatError(s"'$dollarSign'", numberFormat)
-    } else if (invalidSignPosition(normalizedFormat, minusSign)) {
-      throw QueryCompilationErrors.nonFistOrLastCharInNumberFormatError(
-        s"'$letterMinusSign' or '$minusSign'", numberFormat)
-    } else if (invalidSignPosition(normalizedFormat, dollarSign)) {
-      throw QueryCompilationErrors.nonFistOrLastCharInNumberFormatError(
-        s"'$dollarSign'", numberFormat)
-    }
-  }
-
-  /**
-   * Convert string to numeric based on the given number format.
-   * The format can consist of the following characters:
-   * '9':  digit position (can be dropped if insignificant)
-   * '0':  digit position (will not be dropped, even if insignificant)
-   * '.':  decimal point (only allowed once)
-   * ',':  group (thousands) separator
-   * 'S':  sign anchored to number (uses locale)
-   * 'D':  decimal point (uses locale)
-   * 'G':  group separator (uses locale)
-   * '$':  specifies that the input value has a leading $ (Dollar) sign.
-   *
-   * @param input the string need to converted
-   * @param numberFormat the given number format
-   * @return decimal obtained from string parsing
-   */
-  def parse(input: UTF8String, numberFormat: String): Decimal = {
-    val normalizedFormat = normalize(numberFormat)
-    check(normalizedFormat, numberFormat)
-
-    val precision = normalizedFormat.filterNot(isSign).length
-    val formatSplits = normalizedFormat.split(pointSign)
-    val scale = if (formatSplits.length == 1) {
-      0
-    } else {
-      formatSplits(1).filterNot(isSign).length
-    }
-    val transformedFormat = transform(normalizedFormat)
-    val numberFormatInstance = NumberFormat.getInstance()
-    val numberDecimalFormat = numberFormatInstance.asInstanceOf[DecimalFormat]
-    numberDecimalFormat.setParseBigDecimal(true)
-    numberDecimalFormat.applyPattern(transformedFormat)
-    val inputStr = input.toString.trim
-    val inputSplits = inputStr.split(pointSign)
-    if (inputSplits.length == 1) {
-      if (inputStr.filterNot(isSign).length > precision - scale) {
-        throw QueryExecutionErrors.invalidNumberFormatError(numberFormat)
-      }
-    } else if (inputSplits(0).filterNot(isSign).length > precision - scale ||
-      inputSplits(1).filterNot(isSign).length > scale) {
-      throw QueryExecutionErrors.invalidNumberFormatError(numberFormat)
-    }
-    val number = numberDecimalFormat.parse(inputStr, new ParsePosition(0))
-    Decimal(number.asInstanceOf[BigDecimal])
-  }
-
-  /**
-   * Convert numeric to string based on the given number format.
-   * The format can consist of the following characters:
-   * '9':  digit position (can be dropped if insignificant)
-   * '0':  digit position (will not be dropped, even if insignificant)
-   * '.':  decimal point (only allowed once)
-   * ',':  group (thousands) separator
-   * 'S':  sign anchored to number (uses locale)
-   * 'D':  decimal point (uses locale)
-   * 'G':  group separator (uses locale)
-   * '$':  specifies that the input value has a leading $ (Dollar) sign.
-   *
-   * @param input the decimal to format
-   * @param numberFormat the format string
-   * @return The string after formatting input decimal
-   */
-  def format(input: Decimal, numberFormat: String): String = {
-    val normalizedFormat = normalize(numberFormat)
-    check(normalizedFormat, numberFormat)
-
-    val transformedFormat = transform(normalizedFormat)
-    val bigDecimal = input.toJavaBigDecimal
-    val decimalPlainStr = bigDecimal.toPlainString
-    if (decimalPlainStr.length > transformedFormat.length) {
-      transformedFormat.replaceAll("0", "#")
-    } else {
-      val decimalFormat = new DecimalFormat(transformedFormat)
-      var resultStr = decimalFormat.format(bigDecimal)
-      // Since we trimmed the comma at the beginning or end of number format in function
-      // `normalize`, we restore the comma to the result here.
-      // For example, if the specified number format is "99,999," or ",999,999", function
-      // `normalize` normalize them to "##,###" or "###,###".
-      // new DecimalFormat("##,###").parse(12454) and new DecimalFormat("###,###").parse(124546)
-      // will return "12,454" and "124,546" respectively. So we add ',' at the end and head of
-      // the result, then the final output are "12,454," or ",124,546".
-      if (numberFormat.last == commaSign || numberFormat.last == letterCommaSign) {
-        resultStr = resultStr + commaSign
-      }
-      if (numberFormat.charAt(0) == commaSign || numberFormat.charAt(0) == letterCommaSign) {
-        resultStr = commaSign + resultStr
-      }
-
-      resultStr
-    }
-  }
-
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index fcbcb5491587e..14f8053233d45 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -2380,12 +2380,4 @@ object QueryCompilationErrors {
   def tableNotSupportTimeTravelError(tableName: Identifier): UnsupportedOperationException = {
     new UnsupportedOperationException(s"Table $tableName does not support time travel.")
   }
-
-  def multipleSignInNumberFormatError(message: String, numberFormat: String): Throwable = {
-    new AnalysisException(s"Multiple $message in '$numberFormat'")
-  }
-
-  def nonFistOrLastCharInNumberFormatError(message: String, numberFormat: String): Throwable = {
-    new AnalysisException(s"$message must be the first or last char in '$numberFormat'")
-  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index ede4c393b1308..975d748e1827f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -1935,9 +1935,8 @@ object QueryExecutionErrors {
         s" to at least $numWrittenParts.")
   }
 
-  def invalidNumberFormatError(format: String): Throwable = {
+  def invalidNumberFormatError(input: UTF8String, format: String): Throwable = {
     new IllegalArgumentException(
-      s"Format '$format' used for parsing string to number or " +
-        "formatting number to string is invalid")
+      s"The input string '$input' does not match the given number format: '$format'")
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index 443a94b2ee08c..b54d0a6ef7e3b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.internal.SQLConf
@@ -888,6 +889,172 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       Literal.create(null, IntegerType), Literal.create(null, IntegerType)), null)
   }
 
+  test("ToNumber") {
+    ToNumber(Literal("454"), Literal("")).checkInputDataTypes() match {
+      case TypeCheckResult.TypeCheckFailure(msg) =>
+        assert(msg.contains("Number format cannot be empty"))
+    }
+    ToNumber(Literal("454"), NonFoldableLiteral.create("999", StringType))
+      .checkInputDataTypes() match {
+      case TypeCheckResult.TypeCheckFailure(msg) =>
+        assert(msg.contains("Format expression must be foldable"))
+    }
+
+    // Test '0' and '9'
+
+    Seq("454", "054", "54", "450").foreach { input =>
+      val invalidFormat1 = 0.until(input.length - 1).map(_ => '0').mkString
+      val invalidFormat2 = 0.until(input.length - 2).map(_ => '0').mkString
+      val invalidFormat3 = 0.until(input.length - 1).map(_ => '9').mkString
+      val invalidFormat4 = 0.until(input.length - 2).map(_ => '9').mkString
+      Seq(invalidFormat1, invalidFormat2, invalidFormat3, invalidFormat4)
+        .filter(_.nonEmpty).foreach { format =>
+        checkExceptionInExpression[IllegalArgumentException](
+          ToNumber(Literal(input), Literal(format)),
+          s"The input string '$input' does not match the given number format: '$format'")
+      }
+
+      val format1 = 0.until(input.length).map(_ => '0').mkString
+      val format2 = 0.until(input.length).map(_ => '9').mkString
+      val format3 = 0.until(input.length).map(i => i % 2 * 9).mkString
+      val format4 = 0.until(input.length + 1).map(_ => '0').mkString
+      val format5 = 0.until(input.length + 1).map(_ => '9').mkString
+      val format6 = 0.until(input.length + 1).map(i => i % 2 * 9).mkString
+      Seq(format1, format2, format3, format4, format5, format6).foreach { format =>
+        checkEvaluation(ToNumber(Literal(input), Literal(format)), Decimal(input))
+      }
+    }
+
+    // Test '.' and 'D'
+    checkExceptionInExpression[IllegalArgumentException](
+      ToNumber(Literal("454.2"), Literal("999")),
+      "The input string '454.2' does not match the given number format: '999'")
+    Seq("999.9", "000.0", "99.99", "00.00", "0000.0", "9999.9", "00.000", "99.999")
+      .foreach { format =>
+        checkExceptionInExpression[IllegalArgumentException](
+          ToNumber(Literal("454.23"), Literal(format)),
+          s"The input string '454.23' does not match the given number format: '$format'")
+        val format2 = format.replace('.', 'D')
+        checkExceptionInExpression[IllegalArgumentException](
+          ToNumber(Literal("454.23"), Literal(format2)),
+          s"The input string '454.23' does not match the given number format: '$format2'")
+    }
+
+    Seq(
+      ("454.2", "000.0") -> Decimal(454.2),
+      ("454.23", "000.00") -> Decimal(454.23),
+      ("454.2", "000.00") -> Decimal(454.2),
+      ("454.0", "000.0") -> Decimal(454),
+      ("454.00", "000.00") -> Decimal(454),
+      (".4542", ".0000") -> Decimal(0.4542),
+      ("4542.", "0000.") -> Decimal(4542)
+    ).foreach { case ((str, format), expected) =>
+      checkEvaluation(ToNumber(Literal(str), Literal(format)), expected)
+      val format2 = format.replace('.', 'D')
+      checkEvaluation(ToNumber(Literal(str), Literal(format2)), expected)
+      val format3 = format.replace('0', '9')
+      checkEvaluation(ToNumber(Literal(str), Literal(format3)), expected)
+      val format4 = format3.replace('.', 'D')
+      checkEvaluation(ToNumber(Literal(str), Literal(format4)), expected)
+    }
+
+    Seq("999.9.9", "999D9D9", "999.9D9", "999D9.9").foreach { str =>
+      ToNumber(Literal("454.3.2"), Literal(str)).checkInputDataTypes() match {
+        case TypeCheckResult.TypeCheckFailure(msg) =>
+          assert(msg.contains(s"At most one 'D' or '.' is allowed in the number format: '$str'"))
+      }
+    }
+
+    // Test ',' and 'G'
+    checkExceptionInExpression[IllegalArgumentException](
+      ToNumber(Literal("123,456"), Literal("9G9")),
+      "The input string '123,456' does not match the given number format: '9G9'")
+    checkExceptionInExpression[IllegalArgumentException](
+      ToNumber(Literal("123,456,789"), Literal("999,999")),
+      "The input string '123,456,789' does not match the given number format: '999,999'")
+
+    Seq(
+      ("12,454", "99,999") -> Decimal(12454),
+      ("12,454", "99,999,999") -> Decimal(12454),
+      ("12,454,367", "99,999,999") -> Decimal(12454367),
+      ("12,454,", "99,999,") -> Decimal(12454),
+      (",454,367", ",999,999") -> Decimal(454367),
+      (",454,367", "999,999") -> Decimal(454367)
+    ).foreach { case ((str, format), expected) =>
+      checkEvaluation(ToNumber(Literal(str), Literal(format)), expected)
+      val format2 = format.replace(',', 'G')
+      checkEvaluation(ToNumber(Literal(str), Literal(format2)), expected)
+      val format3 = format.replace('9', '0')
+      checkEvaluation(ToNumber(Literal(str), Literal(format3)), expected)
+      val format4 = format3.replace(',', 'G')
+      checkEvaluation(ToNumber(Literal(str), Literal(format4)), expected)
+      val format5 = s"${format}9"
+      checkEvaluation(ToNumber(Literal(str), Literal(format5)), expected)
+      val format6 = s"${format}0"
+      checkEvaluation(ToNumber(Literal(str), Literal(format6)), expected)
+      val format7 = s"9${format}9"
+      checkEvaluation(ToNumber(Literal(str), Literal(format7)), expected)
+      val format8 = s"0${format}0"
+      checkEvaluation(ToNumber(Literal(str), Literal(format8)), expected)
+      val format9 = s"${format3}9"
+      checkEvaluation(ToNumber(Literal(str), Literal(format9)), expected)
+      val format10 = s"${format3}0"
+      checkEvaluation(ToNumber(Literal(str), Literal(format10)), expected)
+      val format11 = s"9${format3}9"
+      checkEvaluation(ToNumber(Literal(str), Literal(format11)), expected)
+      val format12 = s"0${format3}0"
+      checkEvaluation(ToNumber(Literal(str), Literal(format12)), expected)
+    }
+
+    // Test '$'
+    Seq(
+      ("$78.12", "$99.99") -> Decimal(78.12),
+      ("$78.12", "$00.00") -> Decimal(78.12),
+      ("78.12$", "99.99$") -> Decimal(78.12),
+      ("78.12$", "00.00$") -> Decimal(78.12)
+    ).foreach { case ((str, format), expected) =>
+      checkEvaluation(ToNumber(Literal(str), Literal(format)), expected)
+    }
+
+    ToNumber(Literal("$78$.12"), Literal("$99$.99")).checkInputDataTypes() match {
+      case TypeCheckResult.TypeCheckFailure(msg) =>
+        assert(msg.contains("At most one '$' is allowed in the number format: '$99$.99'"))
+    }
+    ToNumber(Literal("78$.12"), Literal("99$.99")).checkInputDataTypes() match {
+      case TypeCheckResult.TypeCheckFailure(msg) =>
+        assert(msg.contains("'$' must be the first or last char in the number format: '99$.99'"))
+    }
+
+    // Test '-' and 'S'
+    Seq(
+      ("454-", "999-") -> Decimal(-454),
+      ("-454", "-999") -> Decimal(-454),
+      ("12,454.8-", "99G999D9-") -> Decimal(-12454.8),
+      ("00,454.8-", "99G999.9-") -> Decimal(-454.8)
+    ).foreach { case ((str, format), expected) =>
+      checkEvaluation(ToNumber(Literal(str), Literal(format)), expected)
+      val format2 = format.replace('9', '0')
+      checkEvaluation(ToNumber(Literal(str), Literal(format2)), expected)
+      val format3 = format.replace('-', 'S')
+      checkEvaluation(ToNumber(Literal(str), Literal(format3)), expected)
+      val format4 = format2.replace('-', 'S')
+      checkEvaluation(ToNumber(Literal(str), Literal(format4)), expected)
+    }
+
+    ToNumber(Literal("454.3--"), Literal("999D9SS")).checkInputDataTypes() match {
+      case TypeCheckResult.TypeCheckFailure(msg) =>
+        assert(msg.contains("At most one 'S' or '-' is allowed in the number format: '999D9SS'"))
+    }
+
+    Seq("9S99", "9-99").foreach { str =>
+      ToNumber(Literal("-454"), Literal(str)).checkInputDataTypes() match {
+        case TypeCheckResult.TypeCheckFailure(msg) =>
+          assert(msg.contains(
+            s"'S' or '-' must be the first or last char in the number format: '$str'"))
+      }
+    }
+  }
+
   test("find in set") {
     checkEvaluation(
       FindInSet(Literal.create(null, StringType), Literal.create(null, StringType)), null)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberFormatterSuite.scala
similarity index 65%
rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberUtilsSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberFormatterSuite.scala
index 66a17dceed745..81264f4e85080 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberFormatterSuite.scala
@@ -19,43 +19,37 @@ package org.apache.spark.sql.catalyst.util
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.util.NumberUtils.{format, parse}
 import org.apache.spark.sql.types.Decimal
 import org.apache.spark.unsafe.types.UTF8String
 
-class NumberUtilsSuite extends SparkFunSuite {
+class NumberFormatterSuite extends SparkFunSuite {
 
-  private def failParseWithInvalidInput(
-      input: UTF8String, numberFormat: String, errorMsg: String): Unit = {
-    val e = intercept[IllegalArgumentException](parse(input, numberFormat))
+  private def invalidNumberFormat(numberFormat: String, errorMsg: String): Unit = {
+    val testNumberFormatter = new TestNumberFormatter(numberFormat)
+    val e = intercept[AnalysisException](testNumberFormatter.checkWithException())
     assert(e.getMessage.contains(errorMsg))
   }
 
-  private def failParseWithAnalysisException(
+  private def failParseWithInvalidInput(
       input: UTF8String, numberFormat: String, errorMsg: String): Unit = {
-    val e = intercept[AnalysisException](parse(input, numberFormat))
-    assert(e.getMessage.contains(errorMsg))
-  }
-
-  private def failFormatWithAnalysisException(
-      input: Decimal, numberFormat: String, errorMsg: String): Unit = {
-    val e = intercept[AnalysisException](format(input, numberFormat))
+    val testNumberFormatter = new TestNumberFormatter(numberFormat)
+    val e = intercept[IllegalArgumentException](testNumberFormatter.parse(input))
     assert(e.getMessage.contains(errorMsg))
   }
 
   test("parse") {
-    failParseWithInvalidInput(UTF8String.fromString("454"), "",
-      "Format '' used for parsing string to number or formatting number to string is invalid")
+    invalidNumberFormat("", "Number format cannot be empty")
 
     // Test '9' and '0'
     failParseWithInvalidInput(UTF8String.fromString("454"), "9",
-      "Format '9' used for parsing string to number or formatting number to string is invalid")
+      "The input string '454' does not match the given number format: '9'")
     failParseWithInvalidInput(UTF8String.fromString("454"), "99",
-      "Format '99' used for parsing string to number or formatting number to string is invalid")
+      "The input string '454' does not match the given number format: '99'")
 
     Seq(
       ("454", "999") -> Decimal(454),
       ("054", "999") -> Decimal(54),
+      ("54", "999") -> Decimal(54),
       ("404", "999") -> Decimal(404),
       ("450", "999") -> Decimal(450),
       ("454", "9999") -> Decimal(454),
@@ -63,17 +57,20 @@ class NumberUtilsSuite extends SparkFunSuite {
       ("404", "9999") -> Decimal(404),
       ("450", "9999") -> Decimal(450)
     ).foreach { case ((str, format), expected) =>
-      assert(parse(UTF8String.fromString(str), format) === expected)
+      val builder = new TestNumberFormatter(format)
+      builder.check()
+      assert(builder.parse(UTF8String.fromString(str)) === expected)
     }
 
     failParseWithInvalidInput(UTF8String.fromString("454"), "0",
-      "Format '0' used for parsing string to number or formatting number to string is invalid")
+      "The input string '454' does not match the given number format: '0'")
     failParseWithInvalidInput(UTF8String.fromString("454"), "00",
-      "Format '00' used for parsing string to number or formatting number to string is invalid")
+      "The input string '454' does not match the given number format: '00'")
 
     Seq(
       ("454", "000") -> Decimal(454),
       ("054", "000") -> Decimal(54),
+      ("54", "000") -> Decimal(54),
       ("404", "000") -> Decimal(404),
       ("450", "000") -> Decimal(450),
       ("454", "0000") -> Decimal(454),
@@ -81,14 +78,16 @@ class NumberUtilsSuite extends SparkFunSuite {
       ("404", "0000") -> Decimal(404),
       ("450", "0000") -> Decimal(450)
     ).foreach { case ((str, format), expected) =>
-      assert(parse(UTF8String.fromString(str), format) === expected)
+      val builder = new TestNumberFormatter(format)
+      builder.check()
+      assert(builder.parse(UTF8String.fromString(str)) === expected)
     }
 
     // Test '.' and 'D'
     failParseWithInvalidInput(UTF8String.fromString("454.2"), "999",
-      "Format '999' used for parsing string to number or formatting number to string is invalid")
+      "The input string '454.2' does not match the given number format: '999'")
     failParseWithInvalidInput(UTF8String.fromString("454.23"), "999.9",
-      "Format '999.9' used for parsing string to number or formatting number to string is invalid")
+      "The input string '454.23' does not match the given number format: '999.9'")
 
     Seq(
       ("454.2", "999.9") -> Decimal(454.2),
@@ -116,17 +115,19 @@ class NumberUtilsSuite extends SparkFunSuite {
       ("4542.", "9999D") -> Decimal(4542),
       ("4542.", "0000D") -> Decimal(4542)
     ).foreach { case ((str, format), expected) =>
-      assert(parse(UTF8String.fromString(str), format) === expected)
+      val builder = new TestNumberFormatter(format)
+      builder.check()
+      assert(builder.parse(UTF8String.fromString(str)) === expected)
     }
 
-    failParseWithAnalysisException(UTF8String.fromString("454.3.2"), "999.9.9",
-      "Multiple 'D' or '.' in '999.9.9'")
-    failParseWithAnalysisException(UTF8String.fromString("454.3.2"), "999D9D9",
-      "Multiple 'D' or '.' in '999D9D9'")
-    failParseWithAnalysisException(UTF8String.fromString("454.3.2"), "999.9D9",
-      "Multiple 'D' or '.' in '999.9D9'")
-    failParseWithAnalysisException(UTF8String.fromString("454.3.2"), "999D9.9",
-      "Multiple 'D' or '.' in '999D9.9'")
+    invalidNumberFormat(
+      "999.9.9", "At most one 'D' or '.' is allowed in the number format: '999.9.9'")
+    invalidNumberFormat(
+      "999D9D9", "At most one 'D' or '.' is allowed in the number format: '999D9D9'")
+    invalidNumberFormat(
+      "999.9D9", "At most one 'D' or '.' is allowed in the number format: '999.9D9'")
+    invalidNumberFormat(
+      "999D9.9", "At most one 'D' or '.' is allowed in the number format: '999D9.9'")
 
     // Test ',' and 'G'
     Seq(
@@ -145,9 +146,15 @@ class NumberUtilsSuite extends SparkFunSuite {
       (",454,367", ",999,999") -> Decimal(454367),
       (",454,367", ",000,000") -> Decimal(454367),
       (",454,367", "G999G999") -> Decimal(454367),
-      (",454,367", "G000G000") -> Decimal(454367)
+      (",454,367", "G000G000") -> Decimal(454367),
+      (",454,367", "999,999") -> Decimal(454367),
+      (",454,367", "000,000") -> Decimal(454367),
+      (",454,367", "999G999") -> Decimal(454367),
+      (",454,367", "000G000") -> Decimal(454367)
     ).foreach { case ((str, format), expected) =>
-      assert(parse(UTF8String.fromString(str), format) === expected)
+      val builder = new TestNumberFormatter(format)
+      builder.check()
+      assert(builder.parse(UTF8String.fromString(str)) === expected)
     }
 
     // Test '$'
@@ -157,13 +164,14 @@ class NumberUtilsSuite extends SparkFunSuite {
       ("78.12$", "99.99$") -> Decimal(78.12),
       ("78.12$", "00.00$") -> Decimal(78.12)
     ).foreach { case ((str, format), expected) =>
-      assert(parse(UTF8String.fromString(str), format) === expected)
+      val builder = new TestNumberFormatter(format)
+      builder.check()
+      assert(builder.parse(UTF8String.fromString(str)) === expected)
     }
 
-    failParseWithAnalysisException(UTF8String.fromString("78$.12"), "99$.99",
-      "'$' must be the first or last char in '99$.99'")
-    failParseWithAnalysisException(UTF8String.fromString("$78.12$"), "$99.99$",
-      "Multiple '$' in '$99.99$'")
+    invalidNumberFormat(
+      "99$.99", "'$' must be the first or last char in the number format: '99$.99'")
+    invalidNumberFormat("$99.99$", "At most one '$' is allowed in the number format: '$99.99$'")
 
     // Test '-' and 'S'
     Seq(
@@ -178,19 +186,20 @@ class NumberUtilsSuite extends SparkFunSuite {
       ("12,454.8-", "99G999D9S") -> Decimal(-12454.8),
       ("00,454.8-", "99G999.9S") -> Decimal(-454.8)
     ).foreach { case ((str, format), expected) =>
-      assert(parse(UTF8String.fromString(str), format) === expected)
+      val builder = new TestNumberFormatter(format)
+      builder.check()
+      assert(builder.parse(UTF8String.fromString(str)) === expected)
     }
 
-    failParseWithAnalysisException(UTF8String.fromString("4-54"), "9S99",
-      "'S' or '-' must be the first or last char in '9S99'")
-    failParseWithAnalysisException(UTF8String.fromString("4-54"), "9-99",
-      "'S' or '-' must be the first or last char in '9-99'")
-    failParseWithAnalysisException(UTF8String.fromString("454.3--"), "999D9SS",
-      "Multiple 'S' or '-' in '999D9SS'")
+    invalidNumberFormat(
+      "9S99", "'S' or '-' must be the first or last char in the number format: '9S99'")
+    invalidNumberFormat(
+      "9-99", "'S' or '-' must be the first or last char in the number format: '9-99'")
+    invalidNumberFormat(
+      "999D9SS", "At most one 'S' or '-' is allowed in the number format: '999D9SS'")
   }
 
   test("format") {
-    assert(format(Decimal(454), "") === "")
 
     // Test '9' and '0'
     Seq(
@@ -214,8 +223,10 @@ class NumberUtilsSuite extends SparkFunSuite {
       (Decimal(54), "0000") -> "0054",
       (Decimal(404), "0000") -> "0404",
       (Decimal(450), "0000") -> "0450"
-    ).foreach { case ((decimal, str), expected) =>
-      assert(format(decimal, str) === expected)
+    ).foreach { case ((decimal, format), expected) =>
+      val builder = new TestNumberFormatter(format, false)
+      builder.check()
+      assert(builder.format(decimal) === expected)
     }
 
     // Test '.' and 'D'
@@ -240,19 +251,12 @@ class NumberUtilsSuite extends SparkFunSuite {
       (Decimal(4542), "0000.") -> "4542.",
       (Decimal(4542), "9999D") -> "4542.",
       (Decimal(4542), "0000D") -> "4542."
-    ).foreach { case ((decimal, str), expected) =>
-      assert(format(decimal, str) === expected)
+    ).foreach { case ((decimal, format), expected) =>
+      val builder = new TestNumberFormatter(format, false)
+      builder.check()
+      assert(builder.format(decimal) === expected)
     }
 
-    failFormatWithAnalysisException(Decimal(454.32), "999.9.9",
-      "Multiple 'D' or '.' in '999.9.9'")
-    failFormatWithAnalysisException(Decimal(454.32), "999D9D9",
-      "Multiple 'D' or '.' in '999D9D9'")
-    failFormatWithAnalysisException(Decimal(454.32), "999.9D9",
-      "Multiple 'D' or '.' in '999.9D9'")
-    failFormatWithAnalysisException(Decimal(454.32), "999D9.9",
-      "Multiple 'D' or '.' in '999D9.9'")
-
     // Test ',' and 'G'
     Seq(
       (Decimal(12454), "99,999") -> "12,454",
@@ -271,8 +275,10 @@ class NumberUtilsSuite extends SparkFunSuite {
       (Decimal(454367), ",000,000") -> ",454,367",
       (Decimal(454367), "G999G999") -> ",454,367",
       (Decimal(454367), "G000G000") -> ",454,367"
-    ).foreach { case ((decimal, str), expected) =>
-      assert(format(decimal, str) === expected)
+    ).foreach { case ((decimal, format), expected) =>
+      val builder = new TestNumberFormatter(format, false)
+      builder.check()
+      assert(builder.format(decimal) === expected)
     }
 
     // Test '$'
@@ -281,15 +287,12 @@ class NumberUtilsSuite extends SparkFunSuite {
       (Decimal(78.12), "$00.00") -> "$78.12",
       (Decimal(78.12), "99.99$") -> "78.12$",
       (Decimal(78.12), "00.00$") -> "78.12$"
-    ).foreach { case ((decimal, str), expected) =>
-      assert(format(decimal, str) === expected)
+    ).foreach { case ((decimal, format), expected) =>
+      val builder = new TestNumberFormatter(format, false)
+      builder.check()
+      assert(builder.format(decimal) === expected)
     }
 
-    failFormatWithAnalysisException(Decimal(78.12), "99$.99",
-      "'$' must be the first or last char in '99$.99'")
-    failFormatWithAnalysisException(Decimal(78.12), "$99.99$",
-      "Multiple '$' in '$99.99$'")
-
     // Test '-' and 'S'
     Seq(
       (Decimal(-454), "999-") -> "454-",
@@ -302,16 +305,11 @@ class NumberUtilsSuite extends SparkFunSuite {
       (Decimal(-454), "S000") -> "-454",
       (Decimal(-12454.8), "99G999D9S") -> "12,454.8-",
       (Decimal(-454.8), "99G999.9S") -> "454.8-"
-    ).foreach { case ((decimal, str), expected) =>
-      assert(format(decimal, str) === expected)
+    ).foreach { case ((decimal, format), expected) =>
+      val builder = new TestNumberFormatter(format, false)
+      builder.check()
+      assert(builder.format(decimal) === expected)
     }
-
-    failFormatWithAnalysisException(Decimal(-454), "9S99",
-      "'S' or '-' must be the first or last char in '9S99'")
-    failFormatWithAnalysisException(Decimal(-454), "9-99",
-      "'S' or '-' must be the first or last char in '9-99'")
-    failFormatWithAnalysisException(Decimal(-454.3), "999D9SS",
-      "Multiple 'S' or '-' in '999D9SS'")
   }
 
 }
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 07e1d00ca545d..b742a05fdfb75 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,6 +1,6 @@
 <!-- Automatically generated by ExpressionsSchemaSuite -->
 ## Summary
-  - Number of queries: 375
+  - Number of queries: 376
   - Number of expressions that missing example: 12
   - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint
 ## Schema of Built-in Functions
@@ -300,6 +300,7 @@
 | org.apache.spark.sql.catalyst.expressions.Tanh | tanh | SELECT tanh(0) | struct<TANH(0):double> |
 | org.apache.spark.sql.catalyst.expressions.TimeWindow | window | SELECT a, window.start, window.end, count(*) as cnt FROM VALUES ('A1', '2021-01-01 00:00:00'), ('A1', '2021-01-01 00:04:30'), ('A1', '2021-01-01 00:06:00'), ('A2', '2021-01-01 00:01:00') AS tab(a, b) GROUP by a, window(b, '5 minutes') ORDER BY a, start | struct<a:string,start:timestamp,end:timestamp,cnt:bigint> |
 | org.apache.spark.sql.catalyst.expressions.ToDegrees | degrees | SELECT degrees(3.141592653589793) | struct<DEGREES(3.141592653589793):double> |
+| org.apache.spark.sql.catalyst.expressions.ToNumber | to_number | SELECT to_number('454', '999') | struct<to_number(454, 999):decimal(3,0)> |
 | org.apache.spark.sql.catalyst.expressions.ToRadians | radians | SELECT radians(180) | struct<RADIANS(180):double> |
 | org.apache.spark.sql.catalyst.expressions.ToUTCTimestamp | to_utc_timestamp | SELECT to_utc_timestamp('2016-08-31', 'Asia/Seoul') | struct<to_utc_timestamp(2016-08-31, Asia/Seoul):timestamp> |
 | org.apache.spark.sql.catalyst.expressions.ToUnixTimestamp | to_unix_timestamp | SELECT to_unix_timestamp('2016-04-08', 'yyyy-MM-dd') | struct<to_unix_timestamp(2016-04-08, yyyy-MM-dd):bigint> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql
index 53f2aa41ae3fa..14a89d526b512 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql
@@ -895,22 +895,22 @@ DROP TABLE width_bucket_test;
 -- TO_NUMBER()
 --
 -- SET lc_numeric = 'C';
--- SELECT '' AS to_number_1,  to_number('-34,338,492', '99G999G999');
--- SELECT '' AS to_number_2,  to_number('-34,338,492.654,878', '99G999G999D999G999');
+SELECT '' AS to_number_1,  to_number('-34,338,492', '99G999G999');
+SELECT '' AS to_number_2,  to_number('-34,338,492.654,878', '99G999G999D999G999');
 -- SELECT '' AS to_number_3,  to_number('<564646.654564>', '999999.999999PR');
--- SELECT '' AS to_number_4,  to_number('0.00001-', '9.999999S');
+SELECT '' AS to_number_4,  to_number('0.00001-', '9.999999S');
 -- SELECT '' AS to_number_5,  to_number('5.01-', 'FM9.999999S');
 -- SELECT '' AS to_number_5,  to_number('5.01-', 'FM9.999999MI');
 -- SELECT '' AS to_number_7,  to_number('5 4 4 4 4 8 . 7 8', '9 9 9 9 9 9 . 9 9');
 -- SELECT '' AS to_number_8,  to_number('.01', 'FM9.99');
--- SELECT '' AS to_number_9,  to_number('.0', '99999999.99999999');
--- SELECT '' AS to_number_10, to_number('0', '99.99');
+SELECT '' AS to_number_9,  to_number('.0', '99999999.99999999');
+SELECT '' AS to_number_10, to_number('0', '99.99');
 -- SELECT '' AS to_number_11, to_number('.-01', 'S99.99');
--- SELECT '' AS to_number_12, to_number('.01-', '99.99S');
+SELECT '' AS to_number_12, to_number('.01-', '99.99S');
 -- SELECT '' AS to_number_13, to_number(' . 0 1-', ' 9 9 . 9 9 S');
--- SELECT '' AS to_number_14, to_number('34,50','999,99');
--- SELECT '' AS to_number_15, to_number('123,000','999G');
--- SELECT '' AS to_number_16, to_number('123456','999G999');
+SELECT '' AS to_number_14, to_number('34,50','999,99');
+SELECT '' AS to_number_15, to_number('123,000','999G');
+SELECT '' AS to_number_16, to_number('123456','999G999');
 -- SELECT '' AS to_number_17, to_number('$1234.56','L9,999.99');
 -- SELECT '' AS to_number_18, to_number('$1234.56','L99,999.99');
 -- SELECT '' AS to_number_19, to_number('$1,234.56','L99,999.99');
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index 4b5f1204b15e9..94924a91991b9 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -124,4 +124,14 @@ SELECT endswith('Spark SQL', 'QL');
 SELECT endswith('Spark SQL', 'Spa');
 SELECT endswith(null, 'Spark');
 SELECT endswith('Spark', null);
-SELECT endswith(null, null);
\ No newline at end of file
+SELECT endswith(null, null);
+
+-- to_number
+select to_number('454', '000');
+select to_number('454.2', '000.0');
+select to_number('12,454', '00,000');
+select to_number('$78.12', '$00.00');
+select to_number('-454', '-000');
+select to_number('-454', 'S000');
+select to_number('12,454.8-', '00,000.9-');
+select to_number('00,454.8-', '00,000.9-');
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
index 6fb9a6d5a47ab..99927c262c5ac 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 94
+-- Number of queries: 102
 
 
 -- !query
@@ -760,3 +760,67 @@ SELECT endswith(null, null)
 struct<endswith(NULL, NULL):boolean>
 -- !query output
 NULL
+
+
+-- !query
+select to_number('454', '000')
+-- !query schema
+struct<to_number(454, 000):decimal(3,0)>
+-- !query output
+454
+
+
+-- !query
+select to_number('454.2', '000.0')
+-- !query schema
+struct<to_number(454.2, 000.0):decimal(4,1)>
+-- !query output
+454.2
+
+
+-- !query
+select to_number('12,454', '00,000')
+-- !query schema
+struct<to_number(12,454, 00,000):decimal(5,0)>
+-- !query output
+12454
+
+
+-- !query
+select to_number('$78.12', '$00.00')
+-- !query schema
+struct<to_number($78.12, $00.00):decimal(4,2)>
+-- !query output
+78.12
+
+
+-- !query
+select to_number('-454', '-000')
+-- !query schema
+struct<to_number(-454, -000):decimal(3,0)>
+-- !query output
+-454
+
+
+-- !query
+select to_number('-454', 'S000')
+-- !query schema
+struct<to_number(-454, S000):decimal(3,0)>
+-- !query output
+-454
+
+
+-- !query
+select to_number('12,454.8-', '00,000.9-')
+-- !query schema
+struct<to_number(12,454.8-, 00,000.9-):decimal(6,1)>
+-- !query output
+-12454.8
+
+
+-- !query
+select to_number('00,454.8-', '00,000.9-')
+-- !query schema
+struct<to_number(00,454.8-, 00,000.9-):decimal(6,1)>
+-- !query output
+-454.8
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
index bc13bb893b118..41fc9908d0c2b 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 592
+-- Number of queries: 601
 
 
 -- !query
@@ -4594,6 +4594,80 @@ struct<>
 
 
 
+-- !query
+SELECT '' AS to_number_1,  to_number('-34,338,492', '99G999G999')
+-- !query schema
+struct<to_number_1:string,to_number(-34,338,492, 99G999G999):decimal(8,0)>
+-- !query output
+	-34338492
+
+
+-- !query
+SELECT '' AS to_number_2,  to_number('-34,338,492.654,878', '99G999G999D999G999')
+-- !query schema
+struct<>
+-- !query output
+java.lang.IllegalArgumentException
+The input string '-34,338,492.654,878' does not match the given number format: '99G999G999D999G999'
+
+
+-- !query
+SELECT '' AS to_number_4,  to_number('0.00001-', '9.999999S')
+-- !query schema
+struct<to_number_4:string,to_number(0.00001-, 9.999999S):decimal(7,6)>
+-- !query output
+	-0.000010
+
+
+-- !query
+SELECT '' AS to_number_9,  to_number('.0', '99999999.99999999')
+-- !query schema
+struct<to_number_9:string,to_number(.0, 99999999.99999999):decimal(16,8)>
+-- !query output
+	0.00000000
+
+
+-- !query
+SELECT '' AS to_number_10, to_number('0', '99.99')
+-- !query schema
+struct<to_number_10:string,to_number(0, 99.99):decimal(4,2)>
+-- !query output
+	0.00
+
+
+-- !query
+SELECT '' AS to_number_12, to_number('.01-', '99.99S')
+-- !query schema
+struct<to_number_12:string,to_number(.01-, 99.99S):decimal(4,2)>
+-- !query output
+	-0.01
+
+
+-- !query
+SELECT '' AS to_number_14, to_number('34,50','999,99')
+-- !query schema
+struct<to_number_14:string,to_number(34,50, 999,99):decimal(5,0)>
+-- !query output
+	3450
+
+
+-- !query
+SELECT '' AS to_number_15, to_number('123,000','999G')
+-- !query schema
+struct<>
+-- !query output
+java.lang.IllegalArgumentException
+The input string '123,000' does not match the given number format: '999G'
+
+
+-- !query
+SELECT '' AS to_number_16, to_number('123456','999G999')
+-- !query schema
+struct<to_number_16:string,to_number(123456, 999G999):decimal(6,0)>
+-- !query output
+	123456
+
+
 -- !query
 CREATE TABLE num_input_test (n1 decimal(38, 18)) USING parquet
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index 2aa2e80a1244e..6baac6148885f 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 94
+-- Number of queries: 102
 
 
 -- !query
@@ -756,3 +756,67 @@ SELECT endswith(null, null)
 struct<endswith(NULL, NULL):boolean>
 -- !query output
 NULL
+
+
+-- !query
+select to_number('454', '000')
+-- !query schema
+struct<to_number(454, 000):decimal(3,0)>
+-- !query output
+454
+
+
+-- !query
+select to_number('454.2', '000.0')
+-- !query schema
+struct<to_number(454.2, 000.0):decimal(4,1)>
+-- !query output
+454.2
+
+
+-- !query
+select to_number('12,454', '00,000')
+-- !query schema
+struct<to_number(12,454, 00,000):decimal(5,0)>
+-- !query output
+12454
+
+
+-- !query
+select to_number('$78.12', '$00.00')
+-- !query schema
+struct<to_number($78.12, $00.00):decimal(4,2)>
+-- !query output
+78.12
+
+
+-- !query
+select to_number('-454', '-000')
+-- !query schema
+struct<to_number(-454, -000):decimal(3,0)>
+-- !query output
+-454
+
+
+-- !query
+select to_number('-454', 'S000')
+-- !query schema
+struct<to_number(-454, S000):decimal(3,0)>
+-- !query output
+-454
+
+
+-- !query
+select to_number('12,454.8-', '00,000.9-')
+-- !query schema
+struct<to_number(12,454.8-, 00,000.9-):decimal(6,1)>
+-- !query output
+-12454.8
+
+
+-- !query
+select to_number('00,454.8-', '00,000.9-')
+-- !query schema
+struct<to_number(00,454.8-, 00,000.9-):decimal(6,1)>
+-- !query output
+-454.8

From 851eb280424777e0855310878609e764c3774977 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Thu, 20 Jan 2022 21:39:34 +0800
Subject: [PATCH 061/513] [SPARK-37963][SQL] Need to update Partition URI after
 renaming table in InMemoryCatalog

### What changes were proposed in this pull request?

After renaming a partitioned table, select from the new table from InMemoryCatalog will get an empty result.

The following checkAnswer will fail as the result is empty.
```
sql(s"create table foo(i int, j int) using PARQUET partitioned by (j)")
sql("insert into table foo partition(j=2) values (1)")
sql(s"alter table foo rename to bar")
checkAnswer(spark.table("bar"), Row(1, 2))
```
To fix the bug, we need to update Partition URI after renaming a table in InMemoryCatalog
### Why are the changes needed?

Bug fix

### Does this PR introduce _any_ user-facing change?

No, InMemoryCatalog is used internally and HMS doesn't have this bug.

### How was this patch tested?

Unit test

Closes #35251 from gengliangwang/fixAlterRename.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../sql/catalyst/catalog/InMemoryCatalog.scala      | 13 +++++++++++--
 .../command/AlterTableRenameSuiteBase.scala         | 10 ++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index e3896c598eac9..5ca96f097b2f3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -52,7 +52,7 @@ class InMemoryCatalog(
   import CatalogTypes.TablePartitionSpec
 
   private class TableDesc(var table: CatalogTable) {
-    val partitions = new mutable.HashMap[TablePartitionSpec, CatalogTablePartition]
+    var partitions = new mutable.HashMap[TablePartitionSpec, CatalogTablePartition]
   }
 
   private class DatabaseDesc(var db: CatalogDatabase) {
@@ -298,8 +298,17 @@ class InMemoryCatalog(
             oldName, newName, oldDir, e)
       }
       oldDesc.table = oldDesc.table.withNewStorage(locationUri = Some(newDir.toUri))
-    }
 
+      val newPartitions = oldDesc.partitions.map { case (spec, partition) =>
+        val storage = partition.storage
+        val newLocationUri = storage.locationUri.map { uri =>
+          new Path(uri.toString.replace(oldDir.toString, newDir.toString)).toUri
+        }
+        val newPartition = partition.copy(storage = storage.copy(locationUri = newLocationUri))
+        (spec, newPartition)
+      }
+      oldDesc.partitions = newPartitions
+    }
     catalog(db).tables.put(newName, oldDesc)
     catalog(db).tables.remove(oldName)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala
index 6370939cef6a2..1803ec046930b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala
@@ -126,4 +126,14 @@ trait AlterTableRenameSuiteBase extends QueryTest with DDLCommandTestUtils {
       spark.sessionState.catalogManager.reset()
     }
   }
+
+  test("SPARK-37963: preserve partition info") {
+    withNamespaceAndTable("ns", "dst_tbl") { dst =>
+      val src = dst.replace("dst", "src")
+      sql(s"CREATE TABLE $src (i int, j int) $defaultUsing partitioned by (j)")
+      sql(s"insert into table $src partition(j=2) values (1)")
+      sql(s"ALTER TABLE $src RENAME TO ns.dst_tbl")
+      checkAnswer(spark.table(dst), Row(1, 2))
+    }
+  }
 }

From 35e264adee9c48c7542f403cbea963e9bb8db60c Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Thu, 20 Jan 2022 10:57:38 -0800
Subject: [PATCH 062/513] [SPARK-37533][FOLLOWUP] Remove useless code in
 Analyzer and TryEvalSuite

### What changes were proposed in this pull request?

Remove useless code in Analyzer and TryEvalSuite

### Why are the changes needed?

Code clean up and avoid confusion.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing UT

Closes #35260 from gengliangwang/removeDeadCode.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  3 ---
 .../catalyst/expressions/TryEvalSuite.scala   | 24 -------------------
 2 files changed, 27 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index a6c6036520d53..103a445097554 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1879,9 +1879,6 @@ class Analyzer(override val catalogManager: CatalogManager)
       }}
     }
 
-    // Group by alias is not allowed in ANSI mode.
-    private def allowGroupByAlias: Boolean = conf.groupByAliases && !conf.ansiEnabled
-
     override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
       // mayResolveAttrByAggregateExprs requires the TreePattern UNRESOLVED_ATTRIBUTE.
       _.containsAllPatterns(AGGREGATE, UNRESOLVED_ATTRIBUTE), ruleId) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryEvalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryEvalSuite.scala
index 4633b63cab7f0..928077523d7e3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryEvalSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryEvalSuite.scala
@@ -45,28 +45,4 @@ class TryEvalSuite extends SparkFunSuite with ExpressionEvalHelper {
       checkEvaluation(input, expected)
     }
   }
-
-  test("try_element_at: array") {
-    val left = Literal(Array(1, 2, 3))
-    Seq(
-      (0, null),
-      (1, 1),
-      (4, null)
-    ).foreach { case (index, expected) =>
-      val input = TryEval(ElementAt(left, Literal(index), failOnError = false))
-      checkEvaluation(input, expected)
-    }
-  }
-
-  test("try_element_at: map") {
-    val left = Literal.create(Map(1 -> 1))
-    Seq(
-      (0, null),
-      (1, 1),
-      (4, null)
-    ).foreach { case (index, expected) =>
-      val input = TryEval(ElementAt(left, Literal(index), failOnError = false))
-      checkEvaluation(input, expected)
-    }
-  }
 }

From 821bfa51f3014e30b302d17f1423a48b46384541 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Thu, 20 Jan 2022 11:02:02 -0800
Subject: [PATCH 063/513] [SPARK-37968][BUILD][CORE] Upgrade
 commons-collections 3.x to commons-collections4

### What changes were proposed in this pull request?
`Apache commons-collections` 3.x is a Java 1.3 compatible version, and it does not use Java 5 generics.  `Apache commons-collections4` 4.4 is an upgraded version of `commons-collections` and it built by Java 8.

So this pr upgraded this dependency and fixed the code that block compilation due to this upgrade.

### Why are the changes needed?
Dependency upgrade, the release notes as follows:

- https://commons.apache.org/proper/commons-collections/changes-report.html#a4.4

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass GA

Closes #35257 from LuciferYang/commons-collections4.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 core/pom.xml                                                | 4 ++--
 .../scala/org/apache/spark/broadcast/BroadcastManager.scala | 5 +++--
 dev/deps/spark-deps-hadoop-2-hive-2.3                       | 1 +
 dev/deps/spark-deps-hadoop-3-hive-2.3                       | 2 +-
 pom.xml                                                     | 6 +++---
 5 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/core/pom.xml b/core/pom.xml
index 8c9bbd3b8d277..ac429fc4309f4 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -193,8 +193,8 @@
       <artifactId>commons-io</artifactId>
     </dependency>
     <dependency>
-      <groupId>commons-collections</groupId>
-      <artifactId>commons-collections</artifactId>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-collections4</artifactId>
     </dependency>
     <dependency>
       <groupId>com.google.code.findbugs</groupId>
diff --git a/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
index 989a1941d1791..b6f59c36081f5 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
@@ -22,7 +22,8 @@ import java.util.concurrent.atomic.AtomicLong
 
 import scala.reflect.ClassTag
 
-import org.apache.commons.collections.map.{AbstractReferenceMap, ReferenceMap}
+import org.apache.commons.collections4.map.AbstractReferenceMap.ReferenceStrength
+import org.apache.commons.collections4.map.ReferenceMap
 
 import org.apache.spark.SparkConf
 import org.apache.spark.api.python.PythonBroadcast
@@ -55,7 +56,7 @@ private[spark] class BroadcastManager(
 
   private[broadcast] val cachedValues =
     Collections.synchronizedMap(
-      new ReferenceMap(AbstractReferenceMap.HARD, AbstractReferenceMap.WEAK)
+      new ReferenceMap(ReferenceStrength.HARD, ReferenceStrength.WEAK)
         .asInstanceOf[java.util.Map[Any, Any]]
     )
 
diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index 09b01a3524e22..adefbe107442e 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -38,6 +38,7 @@ commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar
 commons-cli/1.5.0//commons-cli-1.5.0.jar
 commons-codec/1.15//commons-codec-1.15.jar
 commons-collections/3.2.2//commons-collections-3.2.2.jar
+commons-collections4/4.4//commons-collections4-4.4.jar
 commons-compiler/3.0.16//commons-compiler-3.0.16.jar
 commons-compress/1.21//commons-compress-1.21.jar
 commons-configuration/1.6//commons-configuration-1.6.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index b7cc91f9e0dfd..a57b7dc5216a5 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -39,7 +39,7 @@ chill-java/0.10.0//chill-java-0.10.0.jar
 chill_2.12/0.10.0//chill_2.12-0.10.0.jar
 commons-cli/1.5.0//commons-cli-1.5.0.jar
 commons-codec/1.15//commons-codec-1.15.jar
-commons-collections/3.2.2//commons-collections-3.2.2.jar
+commons-collections4/4.4//commons-collections4-4.4.jar
 commons-compiler/3.0.16//commons-compiler-3.0.16.jar
 commons-compress/1.21//commons-compress-1.21.jar
 commons-crypto/1.1.0//commons-crypto-1.1.0.jar
diff --git a/pom.xml b/pom.xml
index 4f53da0f94a4a..62f3d1b479799 100644
--- a/pom.xml
+++ b/pom.xml
@@ -159,7 +159,7 @@
     <commons.httpcore.version>4.4.14</commons.httpcore.version>
     <commons.math3.version>3.4.1</commons.math3.version>
     <!-- managed up from 3.2.1 for SPARK-11652 -->
-    <commons.collections.version>3.2.2</commons.collections.version>
+    <commons.collections.version>4.4</commons.collections.version>
     <scala.version>2.12.15</scala.version>
     <scala.binary.version>2.12</scala.binary.version>
     <scalatest-maven-plugin.version>2.0.2</scalatest-maven-plugin.version>
@@ -612,8 +612,8 @@
         <version>${commons.math3.version}</version>
       </dependency>
       <dependency>
-        <groupId>commons-collections</groupId>
-        <artifactId>commons-collections</artifactId>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-collections4</artifactId>
         <version>${commons.collections.version}</version>
       </dependency>
       <dependency>

From 8909ced98acf492f871ca5eba2aee0fda28bb46f Mon Sep 17 00:00:00 2001
From: dch nguyen <dgd_contributor@viettel.com.vn>
Date: Thu, 20 Jan 2022 21:23:09 +0100
Subject: [PATCH 064/513] [SPARK-37083][PYTHON] Inline type hints for
 python/pyspark/accumulators.py

### What changes were proposed in this pull request?
Inline type hints for python/pyspark/accumulators.py

### Why are the changes needed?
We can take advantage of static type checking within the functions by inlining the type hints.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing tests

Closes #34363 from dchvn/SPARK-37083.

Authored-by: dch nguyen <dgd_contributor@viettel.com.vn>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/accumulators.py  | 84 ++++++++++++++++++++-------------
 python/pyspark/accumulators.pyi | 71 ----------------------------
 python/pyspark/context.py       |  8 ++--
 3 files changed, 56 insertions(+), 107 deletions(-)
 delete mode 100644 python/pyspark/accumulators.pyi

diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index d3dc2e91c4fad..fe775a37ed8e9 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -20,20 +20,30 @@
 import struct
 import socketserver as SocketServer
 import threading
+from typing import Callable, Dict, Generic, Tuple, Type, TYPE_CHECKING, TypeVar, Union
+
 from pyspark.serializers import read_int, CPickleSerializer
 
+if TYPE_CHECKING:
+    from pyspark._typing import SupportsIAdd  # noqa: F401
+    import socketserver.BaseRequestHandler  # type: ignore[import]
+
 
 __all__ = ["Accumulator", "AccumulatorParam"]
 
+T = TypeVar("T")
+U = TypeVar("U", bound="SupportsIAdd")
 
 pickleSer = CPickleSerializer()
 
 # Holds accumulators registered on the current machine, keyed by ID. This is then used to send
 # the local accumulator updates back to the driver program at the end of a task.
-_accumulatorRegistry = {}
+_accumulatorRegistry: Dict[int, "Accumulator"] = {}
 
 
-def _deserialize_accumulator(aid, zero_value, accum_param):
+def _deserialize_accumulator(
+    aid: int, zero_value: T, accum_param: "AccumulatorParam[T]"
+) -> "Accumulator[T]":
     from pyspark.accumulators import _accumulatorRegistry
 
     # If this certain accumulator was deserialized, don't overwrite it.
@@ -46,7 +56,7 @@ def _deserialize_accumulator(aid, zero_value, accum_param):
         return accum
 
 
-class Accumulator:
+class Accumulator(Generic[T]):
 
     """
     A shared variable that can be accumulated, i.e., has a commutative and associative "add"
@@ -106,7 +116,7 @@ class Accumulator:
     TypeError: ...
     """
 
-    def __init__(self, aid, value, accum_param):
+    def __init__(self, aid: int, value: T, accum_param: "AccumulatorParam[T]"):
         """Create a new Accumulator with a given initial value and AccumulatorParam object"""
         from pyspark.accumulators import _accumulatorRegistry
 
@@ -116,42 +126,47 @@ def __init__(self, aid, value, accum_param):
         self._deserialized = False
         _accumulatorRegistry[aid] = self
 
-    def __reduce__(self):
+    def __reduce__(
+        self,
+    ) -> Tuple[
+        Callable[[int, T, "AccumulatorParam[T]"], "Accumulator[T]"],
+        Tuple[int, T, "AccumulatorParam[T]"],
+    ]:
         """Custom serialization; saves the zero value from our AccumulatorParam"""
         param = self.accum_param
         return (_deserialize_accumulator, (self.aid, param.zero(self._value), param))
 
     @property
-    def value(self):
+    def value(self) -> T:
         """Get the accumulator's value; only usable in driver program"""
         if self._deserialized:
             raise RuntimeError("Accumulator.value cannot be accessed inside tasks")
         return self._value
 
     @value.setter
-    def value(self, value):
+    def value(self, value: T) -> None:
         """Sets the accumulator's value; only usable in driver program"""
         if self._deserialized:
             raise RuntimeError("Accumulator.value cannot be accessed inside tasks")
         self._value = value
 
-    def add(self, term):
+    def add(self, term: T) -> None:
         """Adds a term to this accumulator's value"""
         self._value = self.accum_param.addInPlace(self._value, term)
 
-    def __iadd__(self, term):
+    def __iadd__(self, term: T) -> "Accumulator[T]":
         """The += operator; adds a term to this accumulator's value"""
         self.add(term)
         return self
 
-    def __str__(self):
+    def __str__(self) -> str:
         return str(self._value)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return "Accumulator<id=%i, value=%s>" % (self.aid, self._value)
 
 
-class AccumulatorParam:
+class AccumulatorParam(Generic[T]):
 
     """
     Helper object that defines how to accumulate values of a given type.
@@ -178,14 +193,14 @@ class AccumulatorParam:
     [7.0, 8.0, 9.0]
     """
 
-    def zero(self, value):
+    def zero(self, value: T) -> T:
         """
         Provide a "zero value" for the type, compatible in dimensions with the
         provided `value` (e.g., a zero vector)
         """
         raise NotImplementedError
 
-    def addInPlace(self, value1, value2):
+    def addInPlace(self, value1: T, value2: T) -> T:
         """
         Add two values of the accumulator's data type, returning a new value;
         for efficiency, can also update `value1` in place and return it.
@@ -193,7 +208,7 @@ def addInPlace(self, value1, value2):
         raise NotImplementedError
 
 
-class AddingAccumulatorParam(AccumulatorParam):
+class AddingAccumulatorParam(AccumulatorParam[U]):
 
     """
     An AccumulatorParam that uses the + operators to add values. Designed for simple types
@@ -201,21 +216,21 @@ class AddingAccumulatorParam(AccumulatorParam):
     as a parameter.
     """
 
-    def __init__(self, zero_value):
+    def __init__(self, zero_value: U):
         self.zero_value = zero_value
 
-    def zero(self, value):
+    def zero(self, value: U) -> U:
         return self.zero_value
 
-    def addInPlace(self, value1, value2):
-        value1 += value2
+    def addInPlace(self, value1: U, value2: U) -> U:
+        value1 += value2  # type: ignore[operator]
         return value1
 
 
 # Singleton accumulator params for some standard types
-INT_ACCUMULATOR_PARAM = AddingAccumulatorParam(0)
-FLOAT_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0)
-COMPLEX_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0j)
+INT_ACCUMULATOR_PARAM = AddingAccumulatorParam(0)  # type: ignore[type-var]
+FLOAT_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0)  # type: ignore[type-var]
+COMPLEX_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0j)  # type: ignore[type-var]
 
 
 class _UpdateRequestHandler(SocketServer.StreamRequestHandler):
@@ -225,20 +240,20 @@ class _UpdateRequestHandler(SocketServer.StreamRequestHandler):
     server is shutdown.
     """
 
-    def handle(self):
+    def handle(self) -> None:
         from pyspark.accumulators import _accumulatorRegistry
 
-        auth_token = self.server.auth_token
+        auth_token = self.server.auth_token  # type: ignore[attr-defined]
 
-        def poll(func):
-            while not self.server.server_shutdown:
+        def poll(func: Callable[[], bool]) -> None:
+            while not self.server.server_shutdown:  # type: ignore[attr-defined]
                 # Poll every 1 second for new data -- don't block in case of shutdown.
                 r, _, _ = select.select([self.rfile], [], [], 1)
                 if self.rfile in r:
                     if func():
                         break
 
-        def accum_updates():
+        def accum_updates() -> bool:
             num_updates = read_int(self.rfile)
             for _ in range(num_updates):
                 (aid, update) = pickleSer._read_with_length(self.rfile)
@@ -247,8 +262,8 @@ def accum_updates():
             self.wfile.write(struct.pack("!b", 1))
             return False
 
-        def authenticate_and_accum_updates():
-            received_token = self.rfile.read(len(auth_token))
+        def authenticate_and_accum_updates() -> bool:
+            received_token: Union[bytes, str] = self.rfile.read(len(auth_token))
             if isinstance(received_token, bytes):
                 received_token = received_token.decode("utf-8")
             if received_token == auth_token:
@@ -267,7 +282,12 @@ def authenticate_and_accum_updates():
 
 
 class AccumulatorServer(SocketServer.TCPServer):
-    def __init__(self, server_address, RequestHandlerClass, auth_token):
+    def __init__(
+        self,
+        server_address: Tuple[str, int],
+        RequestHandlerClass: Type["socketserver.BaseRequestHandler"],
+        auth_token: str,
+    ):
         SocketServer.TCPServer.__init__(self, server_address, RequestHandlerClass)
         self.auth_token = auth_token
 
@@ -277,13 +297,13 @@ def __init__(self, server_address, RequestHandlerClass, auth_token):
     """
     server_shutdown = False
 
-    def shutdown(self):
+    def shutdown(self) -> None:
         self.server_shutdown = True
         SocketServer.TCPServer.shutdown(self)
         self.server_close()
 
 
-def _start_update_server(auth_token):
+def _start_update_server(auth_token: str) -> AccumulatorServer:
     """Start a TCP server to receive accumulator updates in a daemon thread, and returns it"""
     server = AccumulatorServer(("localhost", 0), _UpdateRequestHandler, auth_token)
     thread = threading.Thread(target=server.serve_forever)
diff --git a/python/pyspark/accumulators.pyi b/python/pyspark/accumulators.pyi
deleted file mode 100644
index 315979218cee6..0000000000000
--- a/python/pyspark/accumulators.pyi
+++ /dev/null
@@ -1,71 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Callable, Dict, Generic, Tuple, Type, TypeVar
-
-import socketserver.BaseRequestHandler  # type: ignore
-
-from pyspark._typing import SupportsIAdd
-
-T = TypeVar("T")
-U = TypeVar("U", bound=SupportsIAdd)
-
-import socketserver as SocketServer
-
-_accumulatorRegistry: Dict[int, Accumulator]
-
-class Accumulator(Generic[T]):
-    aid: int
-    accum_param: AccumulatorParam[T]
-    def __init__(self, aid: int, value: T, accum_param: AccumulatorParam[T]) -> None: ...
-    def __reduce__(
-        self,
-    ) -> Tuple[
-        Callable[[int, int, AccumulatorParam[T]], Accumulator[T]],
-        Tuple[int, int, AccumulatorParam[T]],
-    ]: ...
-    @property
-    def value(self) -> T: ...
-    @value.setter
-    def value(self, value: T) -> None: ...
-    def add(self, term: T) -> None: ...
-    def __iadd__(self, term: T) -> Accumulator[T]: ...
-
-class AccumulatorParam(Generic[T]):
-    def zero(self, value: T) -> T: ...
-    def addInPlace(self, value1: T, value2: T) -> T: ...
-
-class AddingAccumulatorParam(AccumulatorParam[U]):
-    zero_value: U
-    def __init__(self, zero_value: U) -> None: ...
-    def zero(self, value: U) -> U: ...
-    def addInPlace(self, value1: U, value2: U) -> U: ...
-
-class _UpdateRequestHandler(SocketServer.StreamRequestHandler):
-    def handle(self) -> None: ...
-
-class AccumulatorServer(SocketServer.TCPServer):
-    auth_token: str
-    def __init__(
-        self,
-        server_address: Tuple[str, int],
-        RequestHandlerClass: Type[socketserver.BaseRequestHandler],
-        auth_token: str,
-    ) -> None: ...
-    server_shutdown: bool
-    def shutdown(self) -> None: ...
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 1002716ae2453..3db9630898af7 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -570,7 +570,7 @@ def stop(self) -> None:
                 self._jsc = None
         if getattr(self, "_accumulatorServer", None):
             self._accumulatorServer.shutdown()
-            self._accumulatorServer = None
+            self._accumulatorServer = None  # type: ignore[assignment]
         with SparkContext._lock:
             SparkContext._active_spark_context = None  # type: ignore[assignment]
 
@@ -1213,11 +1213,11 @@ def accumulator(
         """
         if accum_param is None:
             if isinstance(value, int):
-                accum_param = accumulators.INT_ACCUMULATOR_PARAM  # type: ignore[attr-defined]
+                accum_param = cast("AccumulatorParam[T]", accumulators.INT_ACCUMULATOR_PARAM)
             elif isinstance(value, float):
-                accum_param = accumulators.FLOAT_ACCUMULATOR_PARAM  # type: ignore[attr-defined]
+                accum_param = cast("AccumulatorParam[T]", accumulators.FLOAT_ACCUMULATOR_PARAM)
             elif isinstance(value, complex):
-                accum_param = accumulators.COMPLEX_ACCUMULATOR_PARAM  # type: ignore[attr-defined]
+                accum_param = cast("AccumulatorParam[T]", accumulators.COMPLEX_ACCUMULATOR_PARAM)
             else:
                 raise TypeError("No default accumulator param for type %s" % type(value))
         SparkContext._next_accum_id += 1

From 9f95f46bd92d793c9d3f296e0a581795d3d11218 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 20 Jan 2022 14:27:34 -0800
Subject: [PATCH 065/513] [SPARK-37806][K8S][FOLLOWUP] Use sc instead of
 sparkContext

### What changes were proposed in this pull request?

This PR is a follow-up of https://github.com/apache/spark/pull/35096.

### Why are the changes needed?

To avoid NPE.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

Closes #35263 from dongjoon-hyun/SPARK-37806-2.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala
index f6054a8dbc5ee..2a4d96596f0c2 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala
@@ -61,7 +61,7 @@ class ExecutorRollDriverPlugin extends DriverPlugin with Logging {
     } else if (!sc.conf.get(DECOMMISSION_ENABLED)) {
       logWarning(s"Disabled because ${DECOMMISSION_ENABLED.key} is false.")
     } else {
-      minTasks = sparkContext.conf.get(MINIMUM_TASKS_PER_EXECUTOR_BEFORE_ROLLING)
+      minTasks = sc.conf.get(MINIMUM_TASKS_PER_EXECUTOR_BEFORE_ROLLING)
       // Scheduler is not created yet
       sparkContext = sc
 

From ef0055418ee065de924bc1e9b8c5b31587068dea Mon Sep 17 00:00:00 2001
From: dch nguyen <dchvn.dgd@gmail.com>
Date: Fri, 21 Jan 2022 11:09:04 +0900
Subject: [PATCH 066/513] [SPARK-37903][PYTHON][FOLLOW-UP] Raise ValueError
 with no return function

### What changes were proposed in this pull request?
Raise ValueError with no return function

### Why are the changes needed?
This PR is a follow-up of https://github.com/apache/spark/pull/35200
Currently, with the function with no return, `infer_return_type` will return `ScalarType[DoubleType]` as default. We should raise exception for this case.

### Does this PR introduce _any_ user-facing change?
Yes,
Before this PR
```python
>>> from pyspark.pandas.typedef.typehints import infer_return_type
>>> def f():
...     pass
...
>>> infer_return_type(f)
ScalarType[DoubleType]
```

after this PR user will take an exception when infer return type of function with no return.
```python-traceback
>>> from pyspark.pandas.typedef.typehints import infer_return_type
>>> def f():
...     pass
...
>>> infer_return_type(f)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/u02/spark/python/pyspark/pandas/typedef/typehints.py", line 563, in infer_return_type
    raise ValueError("A return value is required for the input function")
ValueError: A return value is required for the input function

```

### How was this patch tested?
unit test

Closes #35236 from dchvn/SPARK-37903-FU.

Authored-by: dch nguyen <dchvn.dgd@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/pandas/tests/test_typedef.py | 21 +++++++++++++++++++++
 python/pyspark/pandas/typedef/typehints.py  |  3 +++
 2 files changed, 24 insertions(+)

diff --git a/python/pyspark/pandas/tests/test_typedef.py b/python/pyspark/pandas/tests/test_typedef.py
index f292f0f320325..1bc5c8cfdd051 100644
--- a/python/pyspark/pandas/tests/test_typedef.py
+++ b/python/pyspark/pandas/tests/test_typedef.py
@@ -56,6 +56,27 @@
 
 
 class TypeHintTests(unittest.TestCase):
+    def test_infer_schema_with_no_return(self):
+        def try_infer_return_type():
+            def f():
+                pass
+
+            infer_return_type(f)
+
+        self.assertRaisesRegex(
+            ValueError, "A return value is required for the input function", try_infer_return_type
+        )
+
+        def try_infer_return_type():
+            def f() -> None:
+                pass
+
+            infer_return_type(f)
+
+        self.assertRaisesRegex(
+            TypeError, "Type <class 'NoneType'> was not understood", try_infer_return_type
+        )
+
     def test_infer_schema_from_pandas_instances(self):
         def func() -> pd.Series[int]:
             pass
diff --git a/python/pyspark/pandas/typedef/typehints.py b/python/pyspark/pandas/typedef/typehints.py
index 9b42daffcd351..695ed31af6f42 100644
--- a/python/pyspark/pandas/typedef/typehints.py
+++ b/python/pyspark/pandas/typedef/typehints.py
@@ -559,6 +559,9 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp
 
     tpe = get_type_hints(f).get("return", None)
 
+    if tpe is None:
+        raise ValueError("A return value is required for the input function")
+
     if hasattr(tpe, "__origin__") and issubclass(tpe.__origin__, SeriesType):
         tpe = tpe.__args__[0]
         if issubclass(tpe, NameTypeHolder):

From 73390ab52e8ff6c2d41b425a3edd0062ecdee96e Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Fri, 21 Jan 2022 15:40:51 +0900
Subject: [PATCH 067/513] [SPARK-37037][SQL][FOLLOWUP] Remove unused field in
 UTF8String

### What changes were proposed in this pull request?

Remove `IS_LITTLE_ENDIAN` field in `UTF8String`.

### Why are the changes needed?

After SPARK-36992 and SPARK-37037, the usage of `IS_LITTLE_ENDIAN` has been moved to `ByteArray`. The original `IS_LITTLE_ENDIAN` in `UTF8String` is unused.

### Does this PR introduce _any_ user-facing change?

no, just a code cleanup

### How was this patch tested?

Pass CI is enough

Closes #35264 from ulysses-you/SPARK-37037-followup.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../main/java/org/apache/spark/unsafe/types/UTF8String.java   | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index c47b90d4be6af..04c69f89b1e34 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -20,7 +20,6 @@
 import javax.annotation.Nonnull;
 import java.io.*;
 import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Map;
@@ -96,9 +95,6 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // 0xF5..0xFF - disallowed in UTF-8
   };
 
-  private static final boolean IS_LITTLE_ENDIAN =
-      ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
-
   private static final UTF8String COMMA_UTF8 = UTF8String.fromString(",");
   public static final UTF8String EMPTY_UTF8 = UTF8String.fromString("");
 

From f934145d320447e68ea4eaebe54b4cf420521531 Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Fri, 21 Jan 2022 15:18:15 +0800
Subject: [PATCH 068/513] [SPARK-37967][SQL] Literal.create support ObjectType

### What changes were proposed in this pull request?
Currently , ConstantFolding use `Literal.create()` to represent the result of constant.
If the data type is ObjectType, such as `ObjectType(classOf[UTF8String])`,
current `Literal.create` will first convert `UTF8String`  to `String`, then failed to create ObjectType literal.
this pr to fix this issue.

### Why are the changes needed?
Fix bug

### Does this PR introduce _any_ user-facing change?
User can create ObjectType literal  use `Literal.create()` more safety.

### How was this patch tested?
Added UT

Closes #35255 from AngersZhuuuu/SPARK-37967.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/catalyst/expressions/literals.scala  | 1 +
 .../sql/catalyst/expressions/LiteralExpressionSuite.scala | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index cc207e51f85c4..af10a18e4d16d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -158,6 +158,7 @@ object Literal {
         Literal(CatalystTypeConverters.createToCatalystConverter(dataType)(v), dataType)
       case _: DayTimeIntervalType if v.isInstanceOf[Duration] =>
         Literal(CatalystTypeConverters.createToCatalystConverter(dataType)(v), dataType)
+      case _: ObjectType => Literal(v, dataType)
       case _ => Literal(CatalystTypeConverters.convertToCatalyst(v), dataType)
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index 4081e138d2b62..b1934a06dc1bf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.DayTimeIntervalType._
 import org.apache.spark.sql.types.YearMonthIntervalType._
-import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
 
@@ -465,4 +465,10 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       checkEvaluation(Literal.create(duration, dt), result)
     }
   }
+
+  test("SPARK-37967: Literal.create support ObjectType") {
+    checkEvaluation(
+      Literal.create(UTF8String.fromString("Spark SQL"), ObjectType(classOf[UTF8String])),
+      UTF8String.fromString("Spark SQL"))
+  }
 }

From ec5d2a76a55a4094d7bb48788a917145d81d47cf Mon Sep 17 00:00:00 2001
From: dch nguyen <dchvn.dgd@gmail.com>
Date: Fri, 21 Jan 2022 15:23:05 +0800
Subject: [PATCH 069/513] [SPARK-37929][SQL] Support cascade mode for
 `dropNamespace` API

### What changes were proposed in this pull request?
This PR adds a new API `dropNamespace(String[] ns, boolean cascade)` to replace the existing one: Add a boolean parameter `cascade` that supports deleting all the Namespaces and Tables under the namespace.

Also include changing the implementations and tests that are relevant to this API.

### Why are the changes needed?
According to [#cmt](https://github.com/apache/spark/pull/35202#discussion_r784463563), the current `dropNamespace` API doesn't support cascade mode. So this PR replaces that to support cascading.
If cascade is set True, delete all namespaces and tables under the namespace.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing test.

Closes #35246 from dchvn/change_dropnamespace_api.

Authored-by: dch nguyen <dchvn.dgd@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/jdbc/v2/V2JDBCNamespaceTest.scala     |  2 +-
 .../catalog/DelegatingCatalogExtension.java   |  6 ++-
 .../connector/catalog/SupportsNamespaces.java | 10 ++++-
 .../catalyst/analysis/NonEmptyException.scala | 36 ++++++++++++++++
 .../sql/connector/catalog/CatalogSuite.scala  |  6 +--
 .../catalog/InMemoryTableCatalog.scala        | 14 +++++--
 .../datasources/v2/DropNamespaceExec.scala    | 15 +++----
 .../datasources/v2/V2SessionCatalog.scala     |  9 ++--
 .../v2/jdbc/JDBCTableCatalog.scala            |  4 +-
 .../v2/V2SessionCatalogSuite.scala            | 42 +++++++++----------
 10 files changed, 95 insertions(+), 49 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NonEmptyException.scala

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
index 284b05c1cc120..0c6b2701c92b0 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
@@ -52,7 +52,7 @@ private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession with DockerInte
       .exists(_.contains("catalog comment"))
     assert(createCommentWarning === false)
 
-    catalog.dropNamespace(Array("foo"))
+    catalog.dropNamespace(Array("foo"), cascade = false)
     assert(catalog.namespaceExists(Array("foo")) === false)
     assert(catalog.listNamespaces() === builtinNamespaces)
     val msg = intercept[AnalysisException] {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
index 48a859a4159fb..865ac553199aa 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
@@ -155,8 +155,10 @@ public void alterNamespace(
   }
 
   @Override
-  public boolean dropNamespace(String[] namespace) throws NoSuchNamespaceException {
-    return asNamespaceCatalog().dropNamespace(namespace);
+  public boolean dropNamespace(
+      String[] namespace,
+      boolean cascade) throws NoSuchNamespaceException, NonEmptyNamespaceException {
+    return asNamespaceCatalog().dropNamespace(namespace, cascade);
   }
 
   @Override
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsNamespaces.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsNamespaces.java
index f70746b612e92..c1a4960068d24 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsNamespaces.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsNamespaces.java
@@ -20,6 +20,7 @@
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.catalyst.analysis.NamespaceAlreadyExistsException;
 import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException;
+import org.apache.spark.sql.catalyst.analysis.NonEmptyNamespaceException;
 
 import java.util.Map;
 
@@ -136,15 +137,20 @@ void alterNamespace(
       NamespaceChange... changes) throws NoSuchNamespaceException;
 
   /**
-   * Drop a namespace from the catalog, recursively dropping all objects within the namespace.
+   * Drop a namespace from the catalog with cascade mode, recursively dropping all objects
+   * within the namespace if cascade is true.
    * <p>
    * If the catalog implementation does not support this operation, it may throw
    * {@link UnsupportedOperationException}.
    *
    * @param namespace a multi-part namespace
+   * @param cascade When true, deletes all objects under the namespace
    * @return true if the namespace was dropped
    * @throws NoSuchNamespaceException If the namespace does not exist (optional)
+   * @throws NonEmptyNamespaceException If the namespace is non-empty and cascade is false
    * @throws UnsupportedOperationException If drop is not a supported operation
    */
-  boolean dropNamespace(String[] namespace) throws NoSuchNamespaceException;
+  boolean dropNamespace(
+      String[] namespace,
+      boolean cascade) throws NoSuchNamespaceException, NonEmptyNamespaceException;
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NonEmptyException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NonEmptyException.scala
new file mode 100644
index 0000000000000..f3ff28f74fcc3
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NonEmptyException.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+
+/**
+ * Thrown by a catalog when an item already exists. The analyzer will rethrow the exception
+ * as an [[org.apache.spark.sql.AnalysisException]] with the correct position information.
+ */
+case class NonEmptyNamespaceException(
+    override val message: String,
+    override val cause: Option[Throwable] = None)
+  extends AnalysisException(message, cause = cause) {
+
+  def this(namespace: Array[String]) = {
+    this(s"Namespace '${namespace.quoted}' is non empty.")
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogSuite.scala
index 0cca1cc9bebf2..d00bc31e07f19 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogSuite.scala
@@ -820,7 +820,7 @@ class CatalogSuite extends SparkFunSuite {
 
     assert(catalog.namespaceExists(testNs) === false)
 
-    val ret = catalog.dropNamespace(testNs)
+    val ret = catalog.dropNamespace(testNs, cascade = false)
 
     assert(ret === false)
   }
@@ -833,7 +833,7 @@ class CatalogSuite extends SparkFunSuite {
     assert(catalog.namespaceExists(testNs) === true)
     assert(catalog.loadNamespaceMetadata(testNs).asScala === Map("property" -> "value"))
 
-    val ret = catalog.dropNamespace(testNs)
+    val ret = catalog.dropNamespace(testNs, cascade = false)
 
     assert(ret === true)
     assert(catalog.namespaceExists(testNs) === false)
@@ -845,7 +845,7 @@ class CatalogSuite extends SparkFunSuite {
     catalog.createNamespace(testNs, Map("property" -> "value").asJava)
     catalog.createTable(testIdent, schema, Array.empty, emptyProps)
 
-    assert(catalog.dropNamespace(testNs))
+    assert(catalog.dropNamespace(testNs, cascade = true))
 
     assert(!catalog.namespaceExists(testNs))
     intercept[NoSuchNamespaceException](catalog.listTables(testNs))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala
index d8e6bc4149d98..428aec703674d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala
@@ -22,7 +22,7 @@ import java.util.concurrent.ConcurrentHashMap
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NonEmptyNamespaceException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.connector.distributions.{Distribution, Distributions}
 import org.apache.spark.sql.connector.expressions.{SortOrder, Transform}
 import org.apache.spark.sql.types.StructType
@@ -213,10 +213,16 @@ class InMemoryTableCatalog extends BasicInMemoryTableCatalog with SupportsNamesp
     namespaces.put(namespace.toList, CatalogV2Util.applyNamespaceChanges(metadata, changes))
   }
 
-  override def dropNamespace(namespace: Array[String]): Boolean = {
-    listNamespaces(namespace).foreach(dropNamespace)
+  override def dropNamespace(namespace: Array[String], cascade: Boolean): Boolean = {
     try {
-      listTables(namespace).foreach(dropTable)
+      if (!cascade) {
+        if (listTables(namespace).nonEmpty || listNamespaces(namespace).nonEmpty) {
+          throw new NonEmptyNamespaceException(namespace)
+        }
+      } else {
+        listNamespaces(namespace).foreach(namespace => dropNamespace(namespace, cascade))
+        listTables(namespace).foreach(dropTable)
+      }
     } catch {
       case _: NoSuchNamespaceException =>
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropNamespaceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropNamespaceExec.scala
index 9a9d8e1d4d57d..5d302055e7d91 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropNamespaceExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropNamespaceExec.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.NonEmptyNamespaceException
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.connector.catalog.CatalogPlugin
 import org.apache.spark.sql.errors.QueryCompilationErrors
@@ -37,17 +38,11 @@ case class DropNamespaceExec(
     val nsCatalog = catalog.asNamespaceCatalog
     val ns = namespace.toArray
     if (nsCatalog.namespaceExists(ns)) {
-      // The default behavior of `SupportsNamespace.dropNamespace()` is cascading,
-      // so make sure the namespace to drop is empty.
-      if (!cascade) {
-        if (catalog.asTableCatalog.listTables(ns).nonEmpty
-          || nsCatalog.listNamespaces(ns).nonEmpty) {
+      try {
+        nsCatalog.dropNamespace(ns, cascade)
+      } catch {
+        case _: NonEmptyNamespaceException =>
           throw QueryCompilationErrors.cannotDropNonemptyNamespaceError(namespace)
-        }
-      }
-
-      if (!nsCatalog.dropNamespace(ns)) {
-        throw QueryCompilationErrors.cannotDropNonemptyNamespaceError(namespace)
       }
     } else if (!ifExists) {
       throw QueryCompilationErrors.noSuchNamespaceError(ns)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index 3ea7d0f578b3f..d9cfe0aa04dc8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -286,12 +286,11 @@ class V2SessionCatalog(catalog: SessionCatalog)
     }
   }
 
-  override def dropNamespace(namespace: Array[String]): Boolean = namespace match {
+  override def dropNamespace(
+      namespace: Array[String],
+      cascade: Boolean): Boolean = namespace match {
     case Array(db) if catalog.databaseExists(db) =>
-      if (catalog.listTables(db).nonEmpty) {
-        throw QueryExecutionErrors.namespaceNotEmptyError(namespace)
-      }
-      catalog.dropDatabase(db, ignoreIfNotExists = false, cascade = false)
+      catalog.dropDatabase(db, ignoreIfNotExists = false, cascade)
       true
 
     case Array(_) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
index 566706486d3f0..1658f0dce7fbe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
@@ -278,7 +278,9 @@ class JDBCTableCatalog extends TableCatalog with SupportsNamespaces with Logging
     }
   }
 
-  override def dropNamespace(namespace: Array[String]): Boolean = namespace match {
+  override def dropNamespace(
+      namespace: Array[String],
+      cascade: Boolean): Boolean = namespace match {
     case Array(db) if namespaceExists(namespace) =>
       if (listTables(Array(db)).nonEmpty) {
         throw QueryExecutionErrors.namespaceNotEmptyError(namespace)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
index 86f4dc467638f..646eccb4cdd7a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
@@ -67,10 +67,10 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
   override protected def afterAll(): Unit = {
     val catalog = newCatalog()
-    catalog.dropNamespace(Array("db"))
-    catalog.dropNamespace(Array("db2"))
-    catalog.dropNamespace(Array("ns"))
-    catalog.dropNamespace(Array("ns2"))
+    catalog.dropNamespace(Array("db"), cascade = true)
+    catalog.dropNamespace(Array("db2"), cascade = true)
+    catalog.dropNamespace(Array("ns"), cascade = true)
+    catalog.dropNamespace(Array("ns2"), cascade = true)
     super.afterAll()
   }
 
@@ -811,7 +811,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
     assert(catalog.listNamespaces(Array()) === Array(testNs, defaultNs))
     assert(catalog.listNamespaces(testNs) === Array())
 
-    catalog.dropNamespace(testNs)
+    catalog.dropNamespace(testNs, cascade = false)
   }
 
   test("listNamespaces: fail if missing namespace") {
@@ -849,7 +849,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
     assert(catalog.namespaceExists(testNs) === true)
     checkMetadata(metadata.asScala, Map("property" -> "value"))
 
-    catalog.dropNamespace(testNs)
+    catalog.dropNamespace(testNs, cascade = false)
   }
 
   test("loadNamespaceMetadata: empty metadata") {
@@ -864,7 +864,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
     assert(catalog.namespaceExists(testNs) === true)
     checkMetadata(metadata.asScala, emptyProps.asScala)
 
-    catalog.dropNamespace(testNs)
+    catalog.dropNamespace(testNs, cascade = false)
   }
 
   test("createNamespace: basic behavior") {
@@ -884,7 +884,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
     checkMetadata(metadata, Map("property" -> "value"))
     assert(expectedPath === metadata("location"))
 
-    catalog.dropNamespace(testNs)
+    catalog.dropNamespace(testNs, cascade = false)
   }
 
   test("createNamespace: initialize location") {
@@ -900,7 +900,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
     checkMetadata(metadata, Map.empty)
     assert(expectedPath === metadata("location"))
 
-    catalog.dropNamespace(testNs)
+    catalog.dropNamespace(testNs, cascade = false)
   }
 
   test("createNamespace: relative location") {
@@ -917,7 +917,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
     checkMetadata(metadata, Map.empty)
     assert(expectedPath === metadata("location"))
 
-    catalog.dropNamespace(testNs)
+    catalog.dropNamespace(testNs, cascade = false)
   }
 
   test("createNamespace: fail if namespace already exists") {
@@ -933,7 +933,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
     assert(catalog.namespaceExists(testNs) === true)
     checkMetadata(catalog.loadNamespaceMetadata(testNs).asScala, Map("property" -> "value"))
 
-    catalog.dropNamespace(testNs)
+    catalog.dropNamespace(testNs, cascade = false)
   }
 
   test("createNamespace: fail nested namespace") {
@@ -948,7 +948,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
 
     assert(exc.getMessage.contains("Invalid namespace name: db.nested"))
 
-    catalog.dropNamespace(Array("db"))
+    catalog.dropNamespace(Array("db"), cascade = false)
   }
 
   test("createTable: fail if namespace does not exist") {
@@ -969,7 +969,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
 
     assert(catalog.namespaceExists(testNs) === false)
 
-    val ret = catalog.dropNamespace(testNs)
+    val ret = catalog.dropNamespace(testNs, cascade = false)
 
     assert(ret === false)
   }
@@ -981,7 +981,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
 
     assert(catalog.namespaceExists(testNs) === true)
 
-    val ret = catalog.dropNamespace(testNs)
+    val ret = catalog.dropNamespace(testNs, cascade = false)
 
     assert(ret === true)
     assert(catalog.namespaceExists(testNs) === false)
@@ -993,8 +993,8 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
     catalog.createNamespace(testNs, Map("property" -> "value").asJava)
     catalog.createTable(testIdent, schema, Array.empty, emptyProps)
 
-    val exc = intercept[IllegalStateException] {
-      catalog.dropNamespace(testNs)
+    val exc = intercept[AnalysisException] {
+      catalog.dropNamespace(testNs, cascade = false)
     }
 
     assert(exc.getMessage.contains(testNs.quoted))
@@ -1002,7 +1002,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
     checkMetadata(catalog.loadNamespaceMetadata(testNs).asScala, Map("property" -> "value"))
 
     catalog.dropTable(testIdent)
-    catalog.dropNamespace(testNs)
+    catalog.dropNamespace(testNs, cascade = false)
   }
 
   test("alterNamespace: basic behavior") {
@@ -1027,7 +1027,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
       catalog.loadNamespaceMetadata(testNs).asScala,
       Map("property" -> "value"))
 
-    catalog.dropNamespace(testNs)
+    catalog.dropNamespace(testNs, cascade = false)
   }
 
   test("alterNamespace: update namespace location") {
@@ -1050,7 +1050,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
     catalog.alterNamespace(testNs, NamespaceChange.setProperty("location", "relativeP"))
     assert(newRelativePath === spark.catalog.getDatabase(testNs(0)).locationUri)
 
-    catalog.dropNamespace(testNs)
+    catalog.dropNamespace(testNs, cascade = false)
   }
 
   test("alterNamespace: update namespace comment") {
@@ -1065,7 +1065,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
 
     assert(newComment === spark.catalog.getDatabase(testNs(0)).description)
 
-    catalog.dropNamespace(testNs)
+    catalog.dropNamespace(testNs, cascade = false)
   }
 
   test("alterNamespace: fail if namespace doesn't exist") {
@@ -1092,6 +1092,6 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
       assert(exc.getMessage.contains(s"Cannot remove reserved property: $p"))
 
     }
-    catalog.dropNamespace(testNs)
+    catalog.dropNamespace(testNs, cascade = false)
   }
 }

From 2b7442e77f49e02a66285652b63b465eafe20f6b Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Fri, 21 Jan 2022 15:28:11 +0800
Subject: [PATCH 070/513] [SPARK-27442][SQL] Remove check field name when
 reading/writing data in parquet

### What changes were proposed in this pull request?
Spark should remove check field name when reading/writing  parquet files.

### Why are the changes needed?
Support spark reading existing parquet files with special chars in column names.

### Does this PR introduce _any_ user-facing change?
Such as  parquet, user can use spark to read existing files with special chars in column names. And then can use back quote  to wrap special column name such as  &#96;max(t)&#96; or use &#96;max(t)&#96; as &#96;max_t&#96;, then user can use `max_t`.

### How was this patch tested?

Added UT

Closes #35229 from AngersZhuuuu/SPARK-27442.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../parquet/ParquetFileFormat.scala           |  4 ---
 .../parquet/ParquetSchemaConverter.scala      | 18 ----------
 .../datasources/v2/parquet/ParquetWrite.scala |  1 -
 .../spark/sql/FileBasedDataSourceSuite.scala  | 22 ------------
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 19 ++++++++++
 .../sql/hive/HiveParquetSourceSuite.scala     | 17 ---------
 .../sql/hive/execution/HiveDDLSuite.scala     | 34 +++++-------------
 .../sql/hive/execution/SQLQuerySuite.scala    | 35 -------------------
 .../ParquetHadoopFsRelationSuite.scala        | 15 --------
 9 files changed, 27 insertions(+), 138 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 4515387bdaa90..b0a168c9a85c7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -407,10 +407,6 @@ class ParquetFileFormat
 
     case _ => false
   }
-
-  override def supportFieldName(name: String): Boolean = {
-    !name.matches(".*[ ,;{}()\n\t=].*")
-  }
 }
 
 object ParquetFileFormat extends Logging {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
index 352e5f01172f2..cb5d646f85e9e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
@@ -467,7 +467,6 @@ class SparkToParquetSchemaConverter(
   }
 
   private def convertField(field: StructField, repetition: Type.Repetition): Type = {
-    ParquetSchemaConverter.checkFieldName(field.name)
 
     field.dataType match {
       // ===================
@@ -698,23 +697,6 @@ private[sql] object ParquetSchemaConverter {
   val EMPTY_MESSAGE: MessageType =
     Types.buildMessage().named(ParquetSchemaConverter.SPARK_PARQUET_SCHEMA_NAME)
 
-  def checkFieldName(name: String): Unit = {
-    // ,;{}()\n\t= and space are special characters in Parquet schema
-    if (name.matches(".*[ ,;{}()\n\t=].*")) {
-      throw QueryCompilationErrors.columnNameContainsInvalidCharactersError(name)
-    }
-  }
-
-  def checkFieldNames(schema: StructType): Unit = {
-    schema.foreach { field =>
-      checkFieldName(field.name)
-      field.dataType match {
-        case s: StructType => checkFieldNames(s)
-        case _ =>
-      }
-    }
-  }
-
   def checkConversionRequirement(f: => Boolean, message: String): Unit = {
     if (!f) {
       throw new AnalysisException(message)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala
index b2b6d313e1bcd..0316d91f40732 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala
@@ -72,7 +72,6 @@ case class ParquetWrite(
 
     ParquetOutputFormat.setWriteSupportClass(job, classOf[ParquetWriteSupport])
 
-    ParquetSchemaConverter.checkFieldNames(dataSchema)
     // This metadata is useful for keeping UDTs like Vector/Matrix.
     ParquetWriteSupport.setSchema(dataSchema, conf)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 518090877e633..39b08bd560bb1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -991,28 +991,6 @@ class FileBasedDataSourceSuite extends QueryTest
       checkAnswer(df, Row("v1", "v2"))
     }
   }
-
-  test("SPARK-36271: V1 insert should check schema field name too") {
-    withView("v") {
-      spark.range(1).createTempView("v")
-      withTempDir { dir =>
-        val e = intercept[AnalysisException] {
-          sql("SELECT ID, IF(ID=1,1,0) FROM v").write.mode(SaveMode.Overwrite)
-            .format("parquet").save(dir.getCanonicalPath)
-        }.getMessage
-        assert(e.contains("Column name \"(IF((ID = 1), 1, 0))\" contains invalid character(s)."))
-      }
-
-      withTempDir { dir =>
-        val e = intercept[AnalysisException] {
-          sql("SELECT NAMED_STRUCT('(IF((ID = 1), 1, 0))', IF(ID=1,ID,0)) AS col1 FROM v")
-            .write.mode(SaveMode.Overwrite)
-            .format("parquet").save(dir.getCanonicalPath)
-        }.getMessage
-        assert(e.contains("Column name \"(IF((ID = 1), 1, 0))\" contains invalid character(s)."))
-      }
-    }
-  }
 }
 
 object TestingUDT {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index d7f18ee801d72..523a8e242e7e8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -4243,6 +4243,25 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       checkAnswer(df3, df4)
     }
   }
+
+  test("SPARK-27442: Spark support read/write parquet file with invalid char in field name") {
+    withTempDir { dir =>
+      Seq((1, 2, 3, 4, 5, 6, 7, 8, 9, 10), (2, 4, 6, 8, 10, 12, 14, 16, 18, 20))
+        .toDF("max(t)", "max(t", "=", "\n", ";", "a b", "{", ".", "a.b", "a")
+        .repartition(1)
+        .write.mode(SaveMode.Overwrite).parquet(dir.getAbsolutePath)
+      val df = spark.read.parquet(dir.getAbsolutePath)
+      checkAnswer(df,
+        Row(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) ::
+          Row(2, 4, 6, 8, 10, 12, 14, 16, 18, 20) :: Nil)
+      assert(df.schema.names.sameElements(
+        Array("max(t)", "max(t", "=", "\n", ";", "a b", "{", ".", "a.b", "a")))
+      checkAnswer(df.select("`max(t)`", "`a b`", "`{`", "`.`", "`a.b`"),
+        Row(1, 6, 7, 8, 9) :: Row(2, 12, 14, 16, 18) :: Nil)
+      checkAnswer(df.where("`a.b` > 10"),
+        Row(2, 4, 6, 8, 10, 12, 14, 16, 18, 20) :: Nil)
+    }
+  }
 }
 
 case class Foo(bar: Option[String])
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
index 7690e1e9e1465..5778b259c7d5a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
@@ -207,23 +207,6 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest with ParquetTest {
     }
   }
 
-  test("Aggregation attribute names can't contain special chars \" ,;{}()\\n\\t=\"") {
-    withTempDir { tempDir =>
-      val filePath = new File(tempDir, "testParquet").getCanonicalPath
-      val filePath2 = new File(tempDir, "testParquet2").getCanonicalPath
-
-      val df = Seq(1, 2, 3).map(i => (i, i.toString)).toDF("int", "str")
-      val df2 = df.as("x").join(df.as("y"), $"x.str" === $"y.str").groupBy("y.str").max("y.int")
-      intercept[Throwable](df2.write.parquet(filePath))
-
-      val df3 = df2.toDF("str", "max_int")
-      df3.write.parquet(filePath2)
-      val df4 = read.parquet(filePath2)
-      checkAnswer(df4, Row("1", 1) :: Row("2", 2) :: Row("3", 3) :: Nil)
-      assert(df4.columns === Array("str", "max_int"))
-    }
-  }
-
   test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
     Seq("true", "false").foreach { parquetConversion =>
       withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 014feb33df5ea..85e3d0b53ba7d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -2930,12 +2930,12 @@ class HiveDDLSuite
     withView("v") {
       spark.range(1).createTempView("v")
       withTempPath { path =>
-        val e = intercept[AnalysisException] {
+        val e = intercept[SparkException] {
           spark.sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}' " +
             s"STORED AS PARQUET SELECT ID, if(1=1, 1, 0), abs(id), '^-' FROM v")
-        }.getMessage
-        assert(e.contains("Column name \"(IF((1 = 1), 1, 0))\" contains invalid character(s). " +
-          "Please use alias to rename it."))
+        }.getCause.getCause.getMessage
+        assert(e.contains(
+          "field ended by ';': expected ';' but got 'IF' at line 2:   optional int32 (IF"))
       }
     }
   }
@@ -2944,7 +2944,7 @@ class HiveDDLSuite
     withView("v") {
       spark.range(1).createTempView("v")
       withTempPath { path =>
-        val e = intercept[AnalysisException] {
+        val e = intercept[SparkException] {
           spark.sql(
             s"""
                |INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}'
@@ -2953,27 +2953,9 @@ class HiveDDLSuite
                |NAMED_STRUCT('ID', ID, 'IF(ID=1,ID,0)', IF(ID=1,ID,0), 'B', ABS(ID)) AS col1
                |FROM v
                """.stripMargin)
-        }.getMessage
-        assert(e.contains("Column name \"IF(ID=1,ID,0)\" contains" +
-          " invalid character(s). Please use alias to rename it."))
-      }
-    }
-  }
-
-  test("SPARK-36312: ParquetWriteSupport should check inner field") {
-    withView("v") {
-      spark.range(1).createTempView("v")
-      withTempPath { path =>
-        val e = intercept[AnalysisException] {
-          spark.sql(
-            """
-              |SELECT
-              |NAMED_STRUCT('ID', ID, 'IF(ID=1,ID,0)', IF(ID=1,ID,0), 'B', ABS(ID)) AS col1
-              |FROM v
-              |""".stripMargin).write.mode(SaveMode.Overwrite).parquet(path.toString)
-        }.getMessage
-        assert(e.contains("Column name \"IF(ID=1,ID,0)\" contains" +
-          " invalid character(s). Please use alias to rename it."))
+        }.getCause.getCause.getMessage
+        assert(e.contains("expected at the position 19 of " +
+          "'struct<ID:bigint,IF(ID=1,ID,0):bigint,B:bigint>' but '(' is found."))
       }
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 1829f38fe5775..e690d026053d6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -2212,41 +2212,6 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
     }
   }
 
-  test("SPARK-21912 Parquet table should not create invalid column names") {
-    Seq(" ", ",", ";", "{", "}", "(", ")", "\n", "\t", "=").foreach { name =>
-      val source = "PARQUET"
-      withTable("t21912") {
-        val m = intercept[AnalysisException] {
-          sql(s"CREATE TABLE t21912(`col$name` INT) USING $source")
-        }.getMessage
-        assert(m.contains(s"contains invalid character(s)"))
-
-        val m1 = intercept[AnalysisException] {
-          sql(s"CREATE TABLE t21912 STORED AS $source AS SELECT 1 `col$name`")
-        }.getMessage
-        assert(m1.contains(s"contains invalid character(s)"))
-
-        val m2 = intercept[AnalysisException] {
-          sql(s"CREATE TABLE t21912 USING $source AS SELECT 1 `col$name`")
-        }.getMessage
-        assert(m2.contains(s"contains invalid character(s)"))
-
-        withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "false") {
-          val m3 = intercept[AnalysisException] {
-            sql(s"CREATE TABLE t21912(`col$name` INT) USING hive OPTIONS (fileFormat '$source')")
-          }.getMessage
-          assert(m3.contains(s"contains invalid character(s)"))
-        }
-
-        sql(s"CREATE TABLE t21912(`col` INT) USING $source")
-        val m4 = intercept[AnalysisException] {
-          sql(s"ALTER TABLE t21912 ADD COLUMNS(`col$name` INT)")
-        }.getMessage
-        assert(m4.contains(s"contains invalid character(s)"))
-      }
-    }
-  }
-
   test("SPARK-32889: ORC table column name supports special characters") {
     // " " "," is not allowed.
     Seq("$", ";", "{", "}", "(", ")", "\n", "\t", "=").foreach { name =>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala
index 2e6b86206a631..18e8401ee3d2b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala
@@ -107,21 +107,6 @@ class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest {
     }
   }
 
-  test("SPARK-8079: Avoid NPE thrown from BaseWriterContainer.abortJob") {
-    withTempPath { dir =>
-      intercept[AnalysisException] {
-        // Parquet doesn't allow field names with spaces.  Here we are intentionally making an
-        // exception thrown from the `ParquetRelation2.prepareForWriteJob()` method to trigger
-        // the bug.  Please refer to spark-8079 for more details.
-        spark.range(1, 10)
-          .withColumnRenamed("id", "a b")
-          .write
-          .format("parquet")
-          .save(dir.getCanonicalPath)
-      }
-    }
-  }
-
   test("SPARK-8604: Parquet data source should write summary file while doing appending") {
     withSQLConf(
         ParquetOutputFormat.JOB_SUMMARY_LEVEL -> "ALL",

From b01c5ab2b847c4ecc5f359fd33c08f301051fd01 Mon Sep 17 00:00:00 2001
From: Jerry Peng <jerry.peng@databricks.com>
Date: Fri, 21 Jan 2022 01:00:23 -0800
Subject: [PATCH 071/513] [SPARK-36649][SQL] Support `Trigger.AvailableNow` on
 Kafka data source

### What changes were proposed in this pull request?
To add support to using the AvailableNow trigger for the KafkaSource

### Why are the changes needed?
To allow users to use the KafkaSource the AvailableNow trigger.  The AvailableNow trigger allows the all the data currently available in the source to be processed in multiple micro batches.

### Does this PR introduce _any_ user-facing change?

Yes, allows users to using the KafkaSource to use the AvailableNow trigger

### How was this patch tested?

Added a test for this.

Closes #35238 from jerrypeng/SPARK-36649.

Authored-by: Jerry Peng <jerry.peng@databricks.com>
Signed-off-by: Yuanjian Li <yuanjian.li@databricks.com>
---
 .../sql/kafka010/KafkaMicroBatchStream.scala  | 20 +++++++--
 .../spark/sql/kafka010/KafkaSource.scala      | 20 +++++++--
 .../kafka010/KafkaMicroBatchSourceSuite.scala | 41 ++++++++++++++++++-
 3 files changed, 74 insertions(+), 7 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
index 3b73896d631c6..829ee15c13a3d 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
@@ -57,7 +57,7 @@ private[kafka010] class KafkaMicroBatchStream(
     metadataPath: String,
     startingOffsets: KafkaOffsetRangeLimit,
     failOnDataLoss: Boolean)
-  extends SupportsAdmissionControl with ReportsSourceMetrics with MicroBatchStream with Logging {
+  extends SupportsTriggerAvailableNow with ReportsSourceMetrics with MicroBatchStream with Logging {
 
   private[kafka010] val pollTimeoutMs = options.getLong(
     KafkaSourceProvider.CONSUMER_POLL_TIMEOUT,
@@ -81,6 +81,8 @@ private[kafka010] class KafkaMicroBatchStream(
 
   private var latestPartitionOffsets: PartitionOffsetMap = _
 
+  private var allDataForTriggerAvailableNow: PartitionOffsetMap = _
+
   /**
    * Lazily initialize `initialPartitionOffsets` to make sure that `KafkaConsumer.poll` is only
    * called in StreamExecutionThread. Otherwise, interrupting a thread while running
@@ -98,7 +100,8 @@ private[kafka010] class KafkaMicroBatchStream(
     } else if (minOffsetPerTrigger.isDefined) {
       ReadLimit.minRows(minOffsetPerTrigger.get, maxTriggerDelayMs)
     } else {
-      maxOffsetsPerTrigger.map(ReadLimit.maxRows).getOrElse(super.getDefaultReadLimit)
+      // TODO (SPARK-37973) Directly call super.getDefaultReadLimit when scala issue 12523 is fixed
+      maxOffsetsPerTrigger.map(ReadLimit.maxRows).getOrElse(ReadLimit.allAvailable())
     }
   }
 
@@ -113,7 +116,13 @@ private[kafka010] class KafkaMicroBatchStream(
 
   override def latestOffset(start: Offset, readLimit: ReadLimit): Offset = {
     val startPartitionOffsets = start.asInstanceOf[KafkaSourceOffset].partitionToOffsets
-    latestPartitionOffsets = kafkaOffsetReader.fetchLatestOffsets(Some(startPartitionOffsets))
+
+    // Use the pre-fetched list of partition offsets when Trigger.AvailableNow is enabled.
+    latestPartitionOffsets = if (allDataForTriggerAvailableNow != null) {
+      allDataForTriggerAvailableNow
+    } else {
+      kafkaOffsetReader.fetchLatestOffsets(Some(startPartitionOffsets))
+    }
 
     val limits: Seq[ReadLimit] = readLimit match {
       case rows: CompositeReadLimit => rows.getReadLimits
@@ -298,6 +307,11 @@ private[kafka010] class KafkaMicroBatchStream(
       logWarning(message + s". $INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_FALSE")
     }
   }
+
+  override def prepareForTriggerAvailableNow(): Unit = {
+    allDataForTriggerAvailableNow = kafkaOffsetReader.fetchLatestOffsets(
+      Some(getOrCreateInitialPartitionOffsets()))
+  }
 }
 
 object KafkaMicroBatchStream extends Logging {
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index 87cef02d0d8f2..09db0a7e82dfe 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -77,7 +77,7 @@ private[kafka010] class KafkaSource(
     metadataPath: String,
     startingOffsets: KafkaOffsetRangeLimit,
     failOnDataLoss: Boolean)
-  extends SupportsAdmissionControl with Source with Logging {
+  extends SupportsTriggerAvailableNow with Source with Logging {
 
   private val sc = sqlContext.sparkContext
 
@@ -99,6 +99,8 @@ private[kafka010] class KafkaSource(
 
   private var lastTriggerMillis = 0L
 
+  private var allDataForTriggerAvailableNow: PartitionOffsetMap = _
+
   /**
    * Lazily initialize `initialPartitionOffsets` to make sure that `KafkaConsumer.poll` is only
    * called in StreamExecutionThread. Otherwise, interrupting a thread while running
@@ -130,7 +132,8 @@ private[kafka010] class KafkaSource(
     } else if (minOffsetPerTrigger.isDefined) {
       ReadLimit.minRows(minOffsetPerTrigger.get, maxTriggerDelayMs)
     } else {
-      maxOffsetsPerTrigger.map(ReadLimit.maxRows).getOrElse(super.getDefaultReadLimit)
+      // TODO (SPARK-37973) Directly call super.getDefaultReadLimit when scala issue 12523 is fixed
+      maxOffsetsPerTrigger.map(ReadLimit.maxRows).getOrElse(ReadLimit.allAvailable())
     }
   }
 
@@ -159,7 +162,14 @@ private[kafka010] class KafkaSource(
     // Make sure initialPartitionOffsets is initialized
     initialPartitionOffsets
     val currentOffsets = currentPartitionOffsets.orElse(Some(initialPartitionOffsets))
-    val latest = kafkaReader.fetchLatestOffsets(currentOffsets)
+
+    // Use the pre-fetched list of partition offsets when Trigger.AvailableNow is enabled.
+    val latest = if (allDataForTriggerAvailableNow != null) {
+      allDataForTriggerAvailableNow
+    } else {
+      kafkaReader.fetchLatestOffsets(currentOffsets)
+    }
+
     latestPartitionOffsets = Some(latest)
 
     val limits: Seq[ReadLimit] = limit match {
@@ -331,6 +341,10 @@ private[kafka010] class KafkaSource(
       logWarning(message + s". $INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_FALSE")
     }
   }
+
+  override def prepareForTriggerAvailableNow(): Unit = {
+    allDataForTriggerAvailableNow = kafkaReader.fetchLatestOffsets(Some(initialPartitionOffsets))
+  }
 }
 
 /** Companion object for the [[KafkaSource]]. */
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index f61696f6485e6..61be7dd6cd8ef 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -44,7 +44,7 @@ import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution
 import org.apache.spark.sql.functions.{count, window}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.kafka010.KafkaSourceProvider._
-import org.apache.spark.sql.streaming.{StreamTest, Trigger}
+import org.apache.spark.sql.streaming.{StreamingQuery, StreamTest, Trigger}
 import org.apache.spark.sql.streaming.util.StreamManualClock
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -195,6 +195,45 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
     true
   }
 
+  test("Trigger.AvailableNow") {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 5)
+
+    testUtils.sendMessages(topic, (0 until 15).map { case x =>
+      s"foo-$x"
+    }.toArray, Some(0))
+
+    val reader = spark
+      .readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("kafka.metadata.max.age.ms", "1")
+      .option("maxOffsetsPerTrigger", 5)
+      .option("subscribe", topic)
+      .option("startingOffsets", "earliest")
+      .load()
+
+    var index: Int = 0
+    def startTriggerAvailableNowQuery(): StreamingQuery = {
+      reader.writeStream
+        .foreachBatch((_: Dataset[Row], _: Long) => {
+          index += 1
+        })
+        .trigger(Trigger.AvailableNow)
+        .start()
+    }
+
+    val query = startTriggerAvailableNowQuery()
+    try {
+      assert(query.awaitTermination(streamingTimeout.toMillis))
+    } finally {
+      query.stop()
+    }
+
+    // should have 3 batches now i.e. 15 / 5 = 3
+    assert(index == 3)
+  }
+
   test("(de)serialization of initial offsets") {
     val topic = newTopic()
     testUtils.createTopic(topic, partitions = 5)

From 1aa665239876b32ccf81c9d170e17368c6b44c61 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Fri, 21 Jan 2022 12:00:16 +0100
Subject: [PATCH 072/513] [SPARK-37972][PYTHON][MLLIB] Address typing
 incompatibilities with numpy==1.22.x

### What changes were proposed in this pull request?

This PR:

- Updates `Vector.norm` annotation to match numpy counterpart.
- Adds cast for numpy `dot` arguments.

### Why are the changes needed?

To resolve typing incompatibilities between `pyspark.mllib.linalg` and numpy 1.22.

```
python/pyspark/mllib/linalg/__init__.py:412: error: Argument 2 to "norm" has incompatible type "Union[float, str]"; expected "Union[None, float, Literal['fro'], Literal['nuc']]"  [arg-type]
python/pyspark/mllib/linalg/__init__.py:457: error: No overload variant of "dot" matches argument types "ndarray[Any, Any]", "Iterable[float]"  [call-overload]
python/pyspark/mllib/linalg/__init__.py:457: note: Possible overload variant:
python/pyspark/mllib/linalg/__init__.py:457: note:     def dot(a: Union[_SupportsArray[dtype[Any]], _NestedSequence[_SupportsArray[dtype[Any]]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], b: Union[_SupportsArray[dtype[Any]], _NestedSequence[_SupportsArray[dtype[Any]]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], out: None = ...) -> Any
python/pyspark/mllib/linalg/__init__.py:457: note:     <1 more non-matching overload not shown>
python/pyspark/mllib/linalg/__init__.py:707: error: Argument 2 to "norm" has incompatible type "Union[float, str]"; expected "Union[None, float, Literal['fro'], Literal['nuc']]"  [arg-type]
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

`dev/lint-python`.

Closes #35261 from zero323/SPARK-37972.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/mllib/_typing.pyi        | 2 ++
 python/pyspark/mllib/linalg/__init__.py | 9 +++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/mllib/_typing.pyi b/python/pyspark/mllib/_typing.pyi
index 51a98cb0b016b..6a1a0f53a5950 100644
--- a/python/pyspark/mllib/_typing.pyi
+++ b/python/pyspark/mllib/_typing.pyi
@@ -17,6 +17,7 @@
 # under the License.
 
 from typing import List, Tuple, TypeVar, Union
+from typing_extensions import Literal
 from pyspark.mllib.linalg import Vector
 from numpy import ndarray  # noqa: F401
 from py4j.java_gateway import JavaObject
@@ -24,3 +25,4 @@ from py4j.java_gateway import JavaObject
 VectorLike = Union[ndarray, Vector, List[float], Tuple[float, ...]]
 C = TypeVar("C", bound=type)
 JavaObjectOrPickleDump = Union[JavaObject, bytearray, bytes]
+NormType = Union[None, float, Literal["fro"], Literal["nuc"]]
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index bbe87280b74e3..30fa84cf8a0f4 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -61,8 +61,9 @@
 )
 
 if TYPE_CHECKING:
-    from pyspark.mllib._typing import VectorLike
+    from pyspark.mllib._typing import VectorLike, NormType
     from scipy.sparse import spmatrix
+    from numpy.typing import ArrayLike
 
 
 QT = TypeVar("QT")
@@ -397,7 +398,7 @@ def numNonzeros(self) -> int:
         """
         return np.count_nonzero(self.array)
 
-    def norm(self, p: Union[float, str]) -> np.float64:
+    def norm(self, p: "NormType") -> np.float64:
         """
         Calculates the norm of a DenseVector.
 
@@ -454,7 +455,7 @@ def dot(self, other: Iterable[float]) -> np.float64:
             elif isinstance(other, Vector):
                 return np.dot(self.toArray(), other.toArray())
             else:
-                return np.dot(self.toArray(), other)
+                return np.dot(self.toArray(), cast("ArrayLike", other))
 
     def squared_distance(self, other: Iterable[float]) -> np.float64:
         """
@@ -692,7 +693,7 @@ def numNonzeros(self) -> int:
         """
         return np.count_nonzero(self.values)
 
-    def norm(self, p: Union[float, str]) -> np.float64:
+    def norm(self, p: "NormType") -> np.float64:
         """
         Calculates the norm of a SparseVector.
 

From 8ff48b36cfa1fb10930c69a42887316b81e0b4a0 Mon Sep 17 00:00:00 2001
From: PengLei <peng.8lei@gmail.com>
Date: Fri, 21 Jan 2022 20:20:09 +0800
Subject: [PATCH 073/513] [SPARK-37950][SQL] Take EXTERNAL as a reserved table
 property

### What changes were proposed in this pull request?

Take `external` as a reserved table property. and do not allow use `external` for end-user when `spark.sql.legacy.notReserveProperties` == `false`.

### Why are the changes needed?

[#disscuss](https://github.com/apache/spark/pull/35204#issuecomment-1014752053).
keep it consistent with other properties like `location` `owner` `provider` and so on.

### Does this PR introduce _any_ user-facing change?
Yes. end-user could not use `external` as property key when create table with tblproperties and alter table set tblproperties.

### How was this patch tested?
existed testcase.

Closes #35268 from Peng-Lei/SPARK-37950.

Authored-by: PengLei <peng.8lei@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide.md                                   | 2 ++
 .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala     | 4 ++++
 .../apache/spark/sql/connector/catalog/CatalogV2Util.scala    | 3 ++-
 .../sql/execution/datasources/v2/ShowCreateTableExec.scala    | 4 +---
 .../sql/execution/datasources/v2/V2SessionCatalogSuite.scala  | 3 +--
 .../org/apache/spark/sql/hive/execution/HiveDDLSuite.scala    | 2 +-
 6 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 5edf83935dcd2..01c828a0f69bf 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -56,6 +56,8 @@ license: |
 
   - Since Spark 3.3, DESCRIBE FUNCTION fails if the function does not exist. In Spark 3.2 or earlier, DESCRIBE FUNCTION can still run and print "Function: func_name not found".
 
+  - Since Spark 3.3, the table property `external` becomes reserved. Certain commands will fail if you specify the `external` property, such as `CREATE TABLE ... TBLPROPERTIES` and `ALTER TABLE ... SET TBLPROPERTIES`. In Spark 3.2 and earlier, the table property `external` is silently ignored. You can set `spark.sql.legacy.notReserveProperties` to `true` to restore the old behavior.
+
 ## Upgrading from Spark SQL 3.1 to 3.2
 
   - Since Spark 3.2, ADD FILE/JAR/ARCHIVE commands require each path to be enclosed by `"` or `'` if the path contains whitespaces.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 6a509db73718c..35fe084e1a4ed 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3203,6 +3203,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
         throw QueryParsingErrors.cannotCleanReservedTablePropertyError(
           PROP_OWNER, ctx, "it will be set to the current user")
       case (PROP_OWNER, _) => false
+      case (PROP_EXTERNAL, _) if !legacyOn =>
+        throw QueryParsingErrors.cannotCleanReservedTablePropertyError(
+          PROP_EXTERNAL, ctx, "please use CREATE EXTERNAL TABLE")
+      case (PROP_EXTERNAL, _) => false
       case _ => true
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
index 597b3c3884c62..4092674046eca 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
@@ -47,7 +47,8 @@ private[sql] object CatalogV2Util {
     Seq(TableCatalog.PROP_COMMENT,
       TableCatalog.PROP_LOCATION,
       TableCatalog.PROP_PROVIDER,
-      TableCatalog.PROP_OWNER)
+      TableCatalog.PROP_OWNER,
+      TableCatalog.PROP_EXTERNAL)
 
   /**
    * The list of reserved namespace properties, which can not be removed or changed directly by
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
index 8b3ad95216486..abe8d67adb856 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
@@ -127,9 +127,7 @@ case class ShowCreateTableExec(
     val showProps = table.properties.asScala
       .filterKeys(key => !CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(key)
         && !key.startsWith(TableCatalog.OPTION_PREFIX)
-        && !tableOptions.contains(key)
-        && !key.equals(TableCatalog.PROP_EXTERNAL)
-      )
+        && !tableOptions.contains(key))
     if (showProps.nonEmpty) {
       val props = showProps.toSeq.sortBy(_._1).map {
         case (key, value) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
index 646eccb4cdd7a..1aa8e3736cfe2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
@@ -785,8 +785,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   private def filterV2TableProperties(
       properties: util.Map[String, String]): Map[String, String] = {
     properties.asScala.filter(kv => !CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(kv._1))
-      .filter(!_._1.startsWith(TableCatalog.OPTION_PREFIX))
-      .filter(_._1 != TableCatalog.PROP_EXTERNAL).toMap
+      .filter(!_._1.startsWith(TableCatalog.OPTION_PREFIX)).toMap
   }
 }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 85e3d0b53ba7d..e5bd1aa1194ce 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -844,7 +844,7 @@ class HiveDDLSuite
       assert(
         catalog.getTableMetadata(TableIdentifier(tabName)).tableType == CatalogTableType.MANAGED)
       // The table property is case sensitive. Thus, external is allowed
-      sql(s"ALTER TABLE $tabName SET TBLPROPERTIES ('external' = 'TRUE')")
+      sql(s"ALTER TABLE $tabName SET TBLPROPERTIES ('External' = 'TRUE')")
       // The table type is not changed to external
       assert(
         catalog.getTableMetadata(TableIdentifier(tabName)).tableType == CatalogTableType.MANAGED)

From 50758ab1a3d6a5f73a2419149a1420d103930f77 Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Fri, 21 Jan 2022 20:30:41 +0800
Subject: [PATCH 074/513] [SPARK-37907][SQL] InvokeLike support ConstantFolding

### What changes were proposed in this pull request?
Currently, `InvokeLike` not implement `foldable`, can't be optimized by ConstantFolding, this pr support this

### Why are the changes needed?
Make StaticInvoke support ConstantFolding

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added UT

Closes #35207 from AngersZhuuuu/SPARK-37907.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/objects/objects.scala         |  1 +
 .../optimizer/ConstantFoldingSuite.scala      | 40 +++++++++++++++++++
 .../spark/sql/CharVarcharTestSuite.scala      |  4 +-
 3 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 6d251b6d1007d..68a55f7f11696 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -50,6 +50,7 @@ trait InvokeLike extends Expression with NonSQLExpression with ImplicitCastInput
 
   def propagateNull: Boolean
 
+  override def foldable: Boolean = children.forall(_.foldable) && deterministic
   protected lazy val needNullCheck: Boolean = needNullCheckForIndex.contains(true)
   protected lazy val needNullCheckForIndex: Array[Boolean] =
     arguments.map(a => a.nullable && (propagateNull ||
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
index ae644c1110740..6f4f70423357b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
@@ -21,10 +21,13 @@ import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, Unresol
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, NewInstance, StaticInvoke}
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.catalyst.util.GenericArrayData
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.ByteArray
 
 class ConstantFoldingSuite extends PlanTest {
 
@@ -299,4 +302,41 @@ class ConstantFoldingSuite extends PlanTest {
 
     comparePlans(optimized, correctAnswer)
   }
+
+  test("SPARK-37907: InvokeLike support ConstantFolding") {
+    val originalQuery =
+      testRelation
+        .select(
+          StaticInvoke(
+            classOf[ByteArray],
+            BinaryType,
+            "lpad",
+            Seq(Literal("Spark".getBytes), Literal(7), Literal("W".getBytes)),
+            Seq(BinaryType, IntegerType, BinaryType),
+            returnNullable = false).as("c1"),
+          Invoke(
+            Literal.create("a", StringType),
+            "substring",
+            StringType,
+            Seq(Literal(0), Literal(1))).as("c2"),
+          NewInstance(
+            cls = classOf[GenericArrayData],
+            arguments = Literal.fromObject(List(1, 2, 3)) :: Nil,
+            inputTypes = Nil,
+            propagateNull = false,
+            dataType = ArrayType(IntegerType),
+            outerPointer = None).as("c3"))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+
+    val correctAnswer =
+      testRelation
+        .select(
+          Literal("WWSpark".getBytes()).as("c1"),
+          Literal.create("a", StringType).as("c2"),
+          Literal.create(new GenericArrayData(List(1, 2, 3)), ArrayType(IntegerType)).as("c3"))
+        .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
index 10eacdb08c424..6ade7a7c99e37 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -332,8 +332,8 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       sql(s"CREATE TABLE t(c STRUCT<c: $typeName(5)>) USING $format")
       sql("INSERT INTO t SELECT struct(null)")
       checkAnswer(spark.table("t"), Row(Row(null)))
-      val e = intercept[SparkException](sql("INSERT INTO t SELECT struct('123456')"))
-      assert(e.getCause.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
+      val e = intercept[RuntimeException](sql("INSERT INTO t SELECT struct('123456')"))
+      assert(e.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
     }
   }
 

From b4f0b18daf1c83222fb585ee4d4d006c6900a063 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Sun, 23 Jan 2022 10:04:06 +0900
Subject: [PATCH 075/513] [MINOR][PYTHON] Replace `@abstractproperty` with
 `@property` + `@abstractmethod`

### What changes were proposed in this pull request?

This PR replaces deprecated `abstractproperty` with recommended `property` and `abstractmethod` combination.

### Why are the changes needed?

`abstractproperty` [has been deprecated in Python 3.3](https://docs.python.org/3/library/abc.html?highlight=abstractproperty#abc.abstractproperty)

![Python docs snap](https://user-images.githubusercontent.com/1554276/150641161-6ed03943-35d0-4d23-b598-4f942c2419f9.png)

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #35281 from zero323/ABSTRACT-PROPERTY.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/ml/base.py           | 5 +++--
 python/pyspark/ml/classification.py | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/ml/base.py b/python/pyspark/ml/base.py
index 9d2a1917d9f0e..d984209685167 100644
--- a/python/pyspark/ml/base.py
+++ b/python/pyspark/ml/base.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 #
 
-from abc import ABCMeta, abstractmethod, abstractproperty
+from abc import ABCMeta, abstractmethod
 
 import copy
 import threading
@@ -360,7 +360,8 @@ def setPredictionCol(self, value):
         """
         return self._set(predictionCol=value)
 
-    @abstractproperty
+    @property
+    @abstractmethod
     @since("2.1.0")
     def numFeatures(self):
         """
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index e6ce3e0b9ae89..058740e820542 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -20,7 +20,7 @@
 import sys
 import uuid
 import warnings
-from abc import ABCMeta, abstractmethod, abstractproperty
+from abc import ABCMeta, abstractmethod
 from multiprocessing.pool import ThreadPool
 
 from pyspark import keyword_only, since, SparkContext, inheritable_thread_target
@@ -155,7 +155,8 @@ def setRawPredictionCol(self, value):
         """
         return self._set(rawPredictionCol=value)
 
-    @abstractproperty
+    @property
+    @abstractmethod
     @since("2.1.0")
     def numClasses(self):
         """

From 1ff40d61cee754d3ba60ee45f839dba76a9955d3 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Sun, 23 Jan 2022 11:32:41 +0900
Subject: [PATCH 076/513] [SPARK-37886][PYTHON][TESTS] Use ComparisonTestBase
 as base class in OpsTestBase

### What changes were proposed in this pull request?
- Rename TestCasesUtils to OpsTestBase
- Make OpsTestCase inherited from `ComparisonTestBase`(`PandasOnSparkTestCase` with `pdf` and `psdf`)
- Make `*OpsTest` inherited from `OpsTestBase`

### Why are the changes needed?
All data type ops related tests case are using `PandasOnSparkTestCase, TestCasesUtils` as basic classes, we'd better just let `TestCasesUtils` inherited from `PandasOnSparkTestCase` instead of multiple inheritance.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
UT

Closes #35203 from Yikun/opstest_refactor.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../pandas/tests/data_type_ops/test_binary_ops.py      |  5 ++---
 .../pandas/tests/data_type_ops/test_boolean_ops.py     |  7 +++----
 .../pandas/tests/data_type_ops/test_categorical_ops.py |  5 ++---
 .../pandas/tests/data_type_ops/test_complex_ops.py     |  5 ++---
 .../pandas/tests/data_type_ops/test_date_ops.py        |  5 ++---
 .../pandas/tests/data_type_ops/test_datetime_ops.py    |  5 ++---
 .../pandas/tests/data_type_ops/test_null_ops.py        |  5 ++---
 .../pyspark/pandas/tests/data_type_ops/test_num_ops.py |  9 ++++-----
 .../pandas/tests/data_type_ops/test_string_ops.py      |  7 +++----
 .../pandas/tests/data_type_ops/test_timedelta_ops.py   |  5 ++---
 .../pyspark/pandas/tests/data_type_ops/test_udt_ops.py |  5 ++---
 .../pandas/tests/data_type_ops/testing_utils.py        | 10 ++++------
 12 files changed, 30 insertions(+), 43 deletions(-)

diff --git a/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py
index 5dc7f8096855b..35fcb3705a310 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py
@@ -19,11 +19,10 @@
 from pandas.api.types import CategoricalDtype
 
 from pyspark import pandas as ps
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 
 
-class BinaryOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class BinaryOpsTest(OpsTestBase):
     @property
     def pser(self):
         return pd.Series([b"1", b"2", b"3"])
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py
index b83b610d0cc21..02bb048ee5bc8 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py
@@ -25,15 +25,14 @@
 
 from pyspark import pandas as ps
 from pyspark.pandas import option_context
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 from pyspark.pandas.typedef.typehints import (
     extension_float_dtypes_available,
     extension_object_dtypes_available,
 )
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
 
 
-class BooleanOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class BooleanOpsTest(OpsTestBase):
     @property
     def bool_pdf(self):
         return pd.DataFrame({"this": [True, False, True], "that": [False, True, True]})
@@ -381,7 +380,7 @@ def test_ge(self):
 @unittest.skipIf(
     not extension_object_dtypes_available, "pandas extension object dtypes are not available"
 )
-class BooleanExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class BooleanExtensionOpsTest(OpsTestBase):
     @property
     def boolean_pdf(self):
         return pd.DataFrame(
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
index e07af724f6905..b84c35bb104f9 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
@@ -23,11 +23,10 @@
 
 from pyspark import pandas as ps
 from pyspark.pandas.config import option_context
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 
 
-class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class CategoricalOpsTest(OpsTestBase):
     @property
     def pdf(self):
         return pd.DataFrame(
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py
index 91a92badf8cd6..cc9a0bf4a7430 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py
@@ -21,11 +21,10 @@
 import pandas as pd
 
 from pyspark import pandas as ps
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 
 
-class ComplexOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class ComplexOpsTest(OpsTestBase):
     @property
     def pser(self):
         return pd.Series([[1, 2, 3]])
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
index 8c196d2a715bb..f0585c3f5a14f 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
@@ -21,11 +21,10 @@
 from pandas.api.types import CategoricalDtype
 
 from pyspark import pandas as ps
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 
 
-class DateOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class DateOpsTest(OpsTestBase):
     @property
     def pser(self):
         return pd.Series(
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
index 5eba4855f93ae..f29f9d375e47f 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
@@ -21,11 +21,10 @@
 from pandas.api.types import CategoricalDtype
 
 from pyspark import pandas as ps
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 
 
-class DatetimeOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class DatetimeOpsTest(OpsTestBase):
     @property
     def pser(self):
         return pd.Series(pd.date_range("1994-1-31 10:30:15", periods=3, freq="D"))
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py
index c2b6be29038bd..009d4d0aba019 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py
@@ -19,11 +19,10 @@
 from pandas.api.types import CategoricalDtype
 
 import pyspark.pandas as ps
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 
 
-class NullOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class NullOpsTest(OpsTestBase):
     @property
     def pser(self):
         return pd.Series([None, None, None])
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
index 785eb250a72b3..0c2c94eab8ef1 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
@@ -25,17 +25,16 @@
 
 from pyspark import pandas as ps
 from pyspark.pandas.config import option_context
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 from pyspark.pandas.typedef.typehints import (
     extension_dtypes_available,
     extension_float_dtypes_available,
     extension_object_dtypes_available,
 )
 from pyspark.sql.types import DecimalType, IntegralType
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
 
 
-class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class NumOpsTest(OpsTestBase):
     """Unit tests for arithmetic operations of numeric data types.
 
     A few test cases are disabled because pandas-on-Spark returns float64 whereas pandas
@@ -450,7 +449,7 @@ def test_ge(self):
 
 
 @unittest.skipIf(not extension_dtypes_available, "pandas extension dtypes are not available")
-class IntegralExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class IntegralExtensionOpsTest(OpsTestBase):
     @property
     def intergral_extension_psers(self):
         return [pd.Series([1, 2, 3, None], dtype=dtype) for dtype in self.integral_extension_dtypes]
@@ -590,7 +589,7 @@ def test_rxor(self):
 @unittest.skipIf(
     not extension_float_dtypes_available, "pandas extension float dtypes are not available"
 )
-class FractionalExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class FractionalExtensionOpsTest(OpsTestBase):
     @property
     def fractional_extension_psers(self):
         return [
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py
index f7c45cc429837..572ea7688cb7f 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py
@@ -23,15 +23,14 @@
 
 from pyspark import pandas as ps
 from pyspark.pandas.config import option_context
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 from pyspark.pandas.typedef.typehints import extension_object_dtypes_available
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
 
 if extension_object_dtypes_available:
     from pandas import StringDtype
 
 
-class StringOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class StringOpsTest(OpsTestBase):
     @property
     def bool_pdf(self):
         return pd.DataFrame({"this": ["x", "y", "z"], "that": ["z", "y", "x"]})
@@ -237,7 +236,7 @@ def test_ge(self):
 @unittest.skipIf(
     not extension_object_dtypes_available, "pandas extension object dtypes are not available"
 )
-class StringExtensionOpsTest(StringOpsTest, PandasOnSparkTestCase, TestCasesUtils):
+class StringExtensionOpsTest(StringOpsTest):
     @property
     def pser(self):
         return pd.Series(["x", "y", "z", None], dtype="string")
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py
index 40882b8f24a90..16788c06c7c92 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py
@@ -21,11 +21,10 @@
 from pandas.api.types import CategoricalDtype
 
 import pyspark.pandas as ps
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 
 
-class TimedeltaOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class TimedeltaOpsTest(OpsTestBase):
     @property
     def pser(self):
         return pd.Series([timedelta(1), timedelta(microseconds=2), timedelta(weeks=3)])
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py
index 70175c4a97d2b..a71691c036cfe 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py
@@ -19,11 +19,10 @@
 
 import pyspark.pandas as ps
 from pyspark.ml.linalg import SparseVector
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 
 
-class UDTOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class UDTOpsTest(OpsTestBase):
     @property
     def pser(self):
         sparse_values = {0: 0.1, 1: 1.1}
diff --git a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py
index 9f57ad4832da2..222b945265264 100644
--- a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py
+++ b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py
@@ -31,6 +31,8 @@
     extension_object_dtypes_available,
 )
 
+from pyspark.testing.pandasutils import ComparisonTestBase
+
 if extension_dtypes_available:
     from pandas import Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype
 
@@ -41,8 +43,8 @@
     from pandas import BooleanDtype, StringDtype
 
 
-class TestCasesUtils:
-    """A utility holding common test cases for arithmetic operations of different data types."""
+class OpsTestBase(ComparisonTestBase):
+    """The test base for arithmetic operations of different data types."""
 
     @property
     def numeric_pdf(self):
@@ -110,10 +112,6 @@ def non_numeric_df_cols(self):
     def pdf(self):
         return pd.concat([self.numeric_pdf, self.non_numeric_pdf], axis=1)
 
-    @property
-    def psdf(self):
-        return ps.from_pandas(self.pdf)
-
     @property
     def df_cols(self):
         return self.pdf.columns

From ddc77fb906cb3ce1567d277c2d0850104c89ac25 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Sun, 23 Jan 2022 11:51:27 +0800
Subject: [PATCH 077/513] [SPARK-37986][SQL] Support TimestampNTZ in radix sort

### What changes were proposed in this pull request?

Make `TimestampNTZ` data type support radix sort in SQL
### Why are the changes needed?

Better performance when sort by one TimestampNTZ column only

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing test case in SortSuite

Closes #35279 from gengliangwang/NTZRadixSort.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../apache/spark/sql/catalyst/expressions/SortOrder.scala | 8 +++++---
 .../org/apache/spark/sql/execution/SortPrefixUtils.scala  | 4 ++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
index 8e6f07611bfe8..974d4b5f86889 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
@@ -132,7 +132,8 @@ object SortOrder {
 case class SortPrefix(child: SortOrder) extends UnaryExpression {
 
   val nullValue = child.child.dataType match {
-    case BooleanType | DateType | TimestampType | _: IntegralType | _: AnsiIntervalType =>
+    case BooleanType | DateType | TimestampType | TimestampNTZType |
+         _: IntegralType | _: AnsiIntervalType =>
       if (nullAsSmallest) Long.MinValue else Long.MaxValue
     case dt: DecimalType if dt.precision - dt.scale <= Decimal.MAX_LONG_DIGITS =>
       if (nullAsSmallest) Long.MinValue else Long.MaxValue
@@ -154,7 +155,8 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression {
   private lazy val calcPrefix: Any => Long = child.child.dataType match {
     case BooleanType => (raw) =>
       if (raw.asInstanceOf[Boolean]) 1 else 0
-    case DateType | TimestampType | _: IntegralType | _: AnsiIntervalType => (raw) =>
+    case DateType | TimestampType | TimestampNTZType |
+         _: IntegralType | _: AnsiIntervalType => (raw) =>
       raw.asInstanceOf[java.lang.Number].longValue()
     case FloatType | DoubleType => (raw) => {
       val dVal = raw.asInstanceOf[java.lang.Number].doubleValue()
@@ -198,7 +200,7 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression {
         s"$input ? 1L : 0L"
       case _: IntegralType =>
         s"(long) $input"
-      case DateType | TimestampType | _: AnsiIntervalType =>
+      case DateType | TimestampType | TimestampNTZType | _: AnsiIntervalType =>
         s"(long) $input"
       case FloatType | DoubleType =>
         s"$DoublePrefixCmp.computePrefix((double)$input)"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala
index a1b093f88f862..4b561b813067e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala
@@ -43,7 +43,7 @@ object SortPrefixUtils {
       case StringType => stringPrefixComparator(sortOrder)
       case BinaryType => binaryPrefixComparator(sortOrder)
       case BooleanType | ByteType | ShortType | IntegerType | LongType | DateType | TimestampType |
-          _: AnsiIntervalType =>
+          TimestampNTZType | _: AnsiIntervalType =>
         longPrefixComparator(sortOrder)
       case dt: DecimalType if dt.precision - dt.scale <= Decimal.MAX_LONG_DIGITS =>
         longPrefixComparator(sortOrder)
@@ -123,7 +123,7 @@ object SortPrefixUtils {
   def canSortFullyWithPrefix(sortOrder: SortOrder): Boolean = {
     sortOrder.dataType match {
       case BooleanType | ByteType | ShortType | IntegerType | LongType | DateType |
-           TimestampType | FloatType | DoubleType | _: AnsiIntervalType =>
+           TimestampType | TimestampNTZType | FloatType | DoubleType | _: AnsiIntervalType =>
         true
       case dt: DecimalType if dt.precision <= Decimal.MAX_LONG_DIGITS =>
         true

From 3eca309fa8b70f65b18ceeac75c4e613c18368af Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Mon, 24 Jan 2022 08:52:07 +0900
Subject: [PATCH 078/513] [SPARK-37985][SQL] Fix flaky test for SPARK-37578

### What changes were proposed in this pull request?
In SQL test `SPARK-37578: Update output metrics from Datasource v2`
Execution's metric value is not null seem only can promise the execution is not still running.
But can't promise bytesWrittenListener have received all TaskEnd event, cause test failed.

### Why are the changes needed?
Fix flaky test

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existed UT

Closes #35277 from AngersZhuuuu/SPARK-37985.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../spark/sql/execution/ui/SQLAppStatusListenerSuite.scala       | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
index 8eaeefccc5ec3..ad744696f5472 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
@@ -933,6 +933,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
             statusStore.executionsList().last.metricValues != null)
         }
 
+        spark.sparkContext.listenerBus.waitUntilEmpty()
         assert(bytesWritten.sum == 246)
         assert(recordsWritten.sum == 20)
       } finally {

From 2b2c4056476bfde565d0d59776116654ea09beaf Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Mon, 24 Jan 2022 08:54:51 +0900
Subject: [PATCH 079/513] [SPARK-37408][PYTHON] Inline type hints for
 pyspark.ml.image

### What changes were proposed in this pull request?

Migration of `pyspark.ml.image` annotations from stub files to inline type hints.

### Why are the changes needed?

As a part of ongoing migration process.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #35280 from zero323/SPARK-37408.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/ml/image.py  | 44 +++++++++++++++++++++----------------
 python/pyspark/ml/image.pyi | 40 ---------------------------------
 2 files changed, 25 insertions(+), 59 deletions(-)
 delete mode 100644 python/pyspark/ml/image.pyi

diff --git a/python/pyspark/ml/image.py b/python/pyspark/ml/image.py
index 7188ef3d10963..6dc97ac246ab3 100644
--- a/python/pyspark/ml/image.py
+++ b/python/pyspark/ml/image.py
@@ -25,12 +25,13 @@
 """
 
 import sys
+from typing import Any, Dict, List, NoReturn, Optional, cast
 
 import numpy as np
 from distutils.version import LooseVersion
 
 from pyspark import SparkContext
-from pyspark.sql.types import Row, _create_row, _parse_datatype_json_string
+from pyspark.sql.types import Row, StructType, _create_row, _parse_datatype_json_string
 from pyspark.sql import SparkSession
 
 __all__ = ["ImageSchema"]
@@ -43,15 +44,15 @@ class _ImageSchema:
     APIs of this class.
     """
 
-    def __init__(self):
-        self._imageSchema = None
-        self._ocvTypes = None
-        self._columnSchema = None
-        self._imageFields = None
-        self._undefinedImageType = None
+    def __init__(self) -> None:
+        self._imageSchema: Optional[StructType] = None
+        self._ocvTypes: Optional[Dict[str, int]] = None
+        self._columnSchema: Optional[StructType] = None
+        self._imageFields: Optional[List[str]] = None
+        self._undefinedImageType: Optional[str] = None
 
     @property
-    def imageSchema(self):
+    def imageSchema(self) -> StructType:
         """
         Returns the image schema.
 
@@ -66,12 +67,13 @@ def imageSchema(self):
 
         if self._imageSchema is None:
             ctx = SparkContext._active_spark_context
+            assert ctx is not None and ctx._jvm is not None
             jschema = ctx._jvm.org.apache.spark.ml.image.ImageSchema.imageSchema()
-            self._imageSchema = _parse_datatype_json_string(jschema.json())
+            self._imageSchema = cast(StructType, _parse_datatype_json_string(jschema.json()))
         return self._imageSchema
 
     @property
-    def ocvTypes(self):
+    def ocvTypes(self) -> Dict[str, int]:
         """
         Returns the OpenCV type mapping supported.
 
@@ -85,11 +87,12 @@ def ocvTypes(self):
 
         if self._ocvTypes is None:
             ctx = SparkContext._active_spark_context
+            assert ctx is not None and ctx._jvm is not None
             self._ocvTypes = dict(ctx._jvm.org.apache.spark.ml.image.ImageSchema.javaOcvTypes())
         return self._ocvTypes
 
     @property
-    def columnSchema(self):
+    def columnSchema(self) -> StructType:
         """
         Returns the schema for the image column.
 
@@ -104,12 +107,13 @@ def columnSchema(self):
 
         if self._columnSchema is None:
             ctx = SparkContext._active_spark_context
+            assert ctx is not None and ctx._jvm is not None
             jschema = ctx._jvm.org.apache.spark.ml.image.ImageSchema.columnSchema()
-            self._columnSchema = _parse_datatype_json_string(jschema.json())
+            self._columnSchema = cast(StructType, _parse_datatype_json_string(jschema.json()))
         return self._columnSchema
 
     @property
-    def imageFields(self):
+    def imageFields(self) -> List[str]:
         """
         Returns field names of image columns.
 
@@ -123,11 +127,12 @@ def imageFields(self):
 
         if self._imageFields is None:
             ctx = SparkContext._active_spark_context
+            assert ctx is not None and ctx._jvm is not None
             self._imageFields = list(ctx._jvm.org.apache.spark.ml.image.ImageSchema.imageFields())
         return self._imageFields
 
     @property
-    def undefinedImageType(self):
+    def undefinedImageType(self) -> str:
         """
         Returns the name of undefined image type for the invalid image.
 
@@ -136,12 +141,13 @@ def undefinedImageType(self):
 
         if self._undefinedImageType is None:
             ctx = SparkContext._active_spark_context
+            assert ctx is not None and ctx._jvm is not None
             self._undefinedImageType = (
                 ctx._jvm.org.apache.spark.ml.image.ImageSchema.undefinedImageType()
             )
         return self._undefinedImageType
 
-    def toNDArray(self, image):
+    def toNDArray(self, image: Row) -> np.ndarray:
         """
         Converts an image to an array with metadata.
 
@@ -181,7 +187,7 @@ def toNDArray(self, image):
             strides=(width * nChannels, nChannels, 1),
         )
 
-    def toImage(self, array, origin=""):
+    def toImage(self, array: np.ndarray, origin: str = "") -> Row:
         """
         Converts an array with metadata to a two-dimensional image.
 
@@ -238,14 +244,14 @@ def toImage(self, array, origin=""):
 
 
 # Monkey patch to disallow instantiation of this class.
-def _disallow_instance(_):
+def _disallow_instance(_: Any) -> NoReturn:
     raise RuntimeError("Creating instance of _ImageSchema class is disallowed.")
 
 
-_ImageSchema.__init__ = _disallow_instance
+_ImageSchema.__init__ = _disallow_instance  # type: ignore[assignment]
 
 
-def _test():
+def _test() -> None:
     import doctest
     import pyspark.ml.image
 
diff --git a/python/pyspark/ml/image.pyi b/python/pyspark/ml/image.pyi
deleted file mode 100644
index 206490aaa82d5..0000000000000
--- a/python/pyspark/ml/image.pyi
+++ /dev/null
@@ -1,40 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Dict, List
-
-from pyspark.sql.types import Row, StructType
-
-from numpy import ndarray
-
-class _ImageSchema:
-    def __init__(self) -> None: ...
-    @property
-    def imageSchema(self) -> StructType: ...
-    @property
-    def ocvTypes(self) -> Dict[str, int]: ...
-    @property
-    def columnSchema(self) -> StructType: ...
-    @property
-    def imageFields(self) -> List[str]: ...
-    @property
-    def undefinedImageType(self) -> str: ...
-    def toNDArray(self, image: Row) -> ndarray: ...
-    def toImage(self, array: ndarray, origin: str = ...) -> Row: ...
-
-ImageSchema: _ImageSchema

From 60a7c3b67e9566f205eb2a65d94daac76c867268 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Mon, 24 Jan 2022 08:55:30 +0900
Subject: [PATCH 080/513] [SPARK-37188][PYTHON] Respect user input layout
 kwargs in plot.hist

### What changes were proposed in this pull request?

```
s = ps.Series([1, 3, 2])
plt = s.plot.hist(title="Title")
```

Before this patch:
![image](https://user-images.githubusercontent.com/1736354/150648238-0fc08e19-f4e3-43dc-9378-fe5810bc4df3.png)

After this patch (with title):
![image](https://user-images.githubusercontent.com/1736354/150648232-39daf7a6-1527-4cb0-ae45-ec7fd6704a1f.png)

### Why are the changes needed?
pyspark.pandas histogram accepts the title option but does not add a title to the plot

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
- UT passed
- Manual test

Closes #35282 from Yikun/SPARK-37188-hist.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/pandas/plot/plotly.py                       | 7 ++++++-
 python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py | 7 +++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/pandas/plot/plotly.py b/python/pyspark/pandas/plot/plotly.py
index dfcc13931d4bb..ebf23416344d4 100644
--- a/python/pyspark/pandas/plot/plotly.py
+++ b/python/pyspark/pandas/plot/plotly.py
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import inspect
 from typing import TYPE_CHECKING, Union
 
 import pandas as pd
@@ -109,7 +110,11 @@ def plot_histogram(data: Union["ps.DataFrame", "ps.Series"], **kwargs):
             )
         )
 
-    fig = go.Figure(data=bars, layout=go.Layout(barmode="stack"))
+    layout_keys = inspect.signature(go.Layout).parameters.keys()
+    layout_kwargs = {k: v for k, v in kwargs.items() if k in layout_keys}
+
+    fig = go.Figure(data=bars, layout=go.Layout(**layout_kwargs))
+    fig["layout"]["barmode"] = "stack"
     fig["layout"]["xaxis"]["title"] = "value"
     fig["layout"]["yaxis"]["title"] = "count"
     return fig
diff --git a/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py b/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py
index 7be00d593ee36..2937ef1813f74 100644
--- a/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py
+++ b/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py
@@ -186,6 +186,13 @@ def check_pie_plot(psdf):
         # )
         # check_pie_plot(psdf1)
 
+    def test_hist_layout_kwargs(self):
+        s = ps.Series([1, 3, 2])
+        plt = s.plot.hist(title="Title", foo="xxx")
+        self.assertEqual(plt.layout.barmode, "stack")
+        self.assertEqual(plt.layout.title.text, "Title")
+        self.assertFalse(hasattr(plt.layout, "foo"))
+
     def test_hist_plot(self):
         def check_hist_plot(psdf):
             bins = np.array([1.0, 5.9, 10.8, 15.7, 20.6, 25.5, 30.4, 35.3, 40.2, 45.1, 50.0])

From 3922b9b74a115347424284cc56775db3e3c031e8 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Mon, 24 Jan 2022 09:11:49 +0900
Subject: [PATCH 081/513] [SPARK-37992][PYTHON] Restore Mypy version checks in
 dev/lint-python

### What changes were proposed in this pull request?

This PR restores Mypy version checks in `dev/lint-python`

Additionally, `MINIMUM_MYPY` is updated to match one used in GitHub workflow.

### Why are the changes needed?

These were accidentally removed in SPARK-36997.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Run `dev/lint-python` with versions satisfying and violating constraints, to ensure correct behavior.

Closes #35294 from zero323/SPARK-37992.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 dev/lint-python | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/dev/lint-python b/dev/lint-python
index c40198e87c2d6..35ba7a496839b 100755
--- a/dev/lint-python
+++ b/dev/lint-python
@@ -18,7 +18,7 @@
 # define test binaries + versions
 FLAKE8_BUILD="flake8"
 MINIMUM_FLAKE8="3.9.0"
-MINIMUM_MYPY="0.910"
+MINIMUM_MYPY="0.920"
 MYPY_BUILD="mypy"
 PYTEST_BUILD="pytest"
 
@@ -152,6 +152,15 @@ function mypy_test {
         return
     fi
 
+    _MYPY_VERSION=($($MYPY_BUILD --version))
+    MYPY_VERSION="${_MYPY_VERSION[1]}"
+    EXPECTED_MYPY="$(satisfies_min_version $MYPY_VERSION $MINIMUM_MYPY)"
+
+    if [[ "$EXPECTED_MYPY" == "False" ]]; then
+        echo "The minimum mypy version needs to be $MINIMUM_MYPY. Your current version is $MYPY_VERSION. Skipping for now."
+        return
+    fi
+
     mypy_annotation_test
     mypy_examples_test
     mypy_data_test

From f8ff7863e792b833afb2ff603878f29d4a9888e6 Mon Sep 17 00:00:00 2001
From: weixiuli <weixiuli@jd.com>
Date: Sun, 23 Jan 2022 20:23:20 -0600
Subject: [PATCH 082/513] [SPARK-37984][SHUFFLE] Avoid calculating all
 outstanding requests to improve performance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

Avoid calculating all outstanding requests to improve performance.

### Why are the changes needed?

Follow the comment (https://github.com/apache/spark/pull/34711#pullrequestreview-835520984) ,  we can implement a "has outstanding requests" method in the response handler that doesn't even need to get a count，let's do this with PR.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Exist unittests.

Closes #35276 from weixiuli/SPARK-37984.

Authored-by: weixiuli <weixiuli@jd.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../spark/network/client/TransportResponseHandler.java | 10 ++++++++--
 .../spark/network/server/TransportChannelHandler.java  |  3 +--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
index 576c08858d6c3..261f20540a297 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
@@ -140,7 +140,7 @@ public void channelActive() {
 
   @Override
   public void channelInactive() {
-    if (numOutstandingRequests() > 0) {
+    if (hasOutstandingRequests()) {
       String remoteAddress = getRemoteAddress(channel);
       logger.error("Still have {} requests outstanding when connection from {} is closed",
         numOutstandingRequests(), remoteAddress);
@@ -150,7 +150,7 @@ public void channelInactive() {
 
   @Override
   public void exceptionCaught(Throwable cause) {
-    if (numOutstandingRequests() > 0) {
+    if (hasOutstandingRequests()) {
       String remoteAddress = getRemoteAddress(channel);
       logger.error("Still have {} requests outstanding when connection from {} is closed",
         numOutstandingRequests(), remoteAddress);
@@ -275,6 +275,12 @@ public int numOutstandingRequests() {
       (streamActive ? 1 : 0);
   }
 
+  /** Check if there are any outstanding requests (fetch requests + rpcs) */
+  public Boolean hasOutstandingRequests() {
+    return streamActive || !outstandingFetches.isEmpty() || !outstandingRpcs.isEmpty() ||
+        !streamCallbacks.isEmpty();
+  }
+
   /** Returns the time in nanoseconds of when the last request was sent out. */
   public long getTimeOfLastRequestNs() {
     return timeOfLastRequestNs.get();
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
index 275e64ee50f26..d197032003e6e 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
@@ -161,8 +161,7 @@ public void userEventTriggered(ChannelHandlerContext ctx, Object evt) throws Exc
         boolean isActuallyOverdue =
           System.nanoTime() - responseHandler.getTimeOfLastRequestNs() > requestTimeoutNs;
         if (e.state() == IdleState.ALL_IDLE && isActuallyOverdue) {
-          boolean hasInFlightRequests = responseHandler.numOutstandingRequests() > 0;
-          if (hasInFlightRequests) {
+          if (responseHandler.hasOutstandingRequests()) {
             String address = getRemoteAddress(ctx.channel());
             logger.error("Connection to {} has been quiet for {} ms while there are outstanding " +
               "requests. Assuming connection is dead; please adjust" +

From 47276ab99020e0b903cc2620dcca74e5bbaf7982 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Mon, 24 Jan 2022 14:46:13 +0900
Subject: [PATCH 083/513] [SPARK-37990][SQL] Support TimestampNTZ in
 RowToColumnConverter

### What changes were proposed in this pull request?

Support TimestampNTZ in RowToColumnConverter

### Why are the changes needed?

Support converting `InternalRow` with TimestampNTZ type column to `WritableColumnVector`, as all the other data type does.

### Does this PR introduce _any_ user-facing change?

No, the TimestampNTZ type is not released yet.

### How was this patch tested?

Unit test

Closes #35288 from gengliangwang/RowToColumnConverterNTZ.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../apache/spark/sql/execution/Columnar.scala |  2 +-
 .../vectorized/ColumnarBatchSuite.scala       | 34 ++++++++++++++-----
 2 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
index 70a508e6b7ec9..1971b8b1baf09 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
@@ -264,7 +264,7 @@ private object RowToColumnConverter {
       case ShortType => ShortConverter
       case IntegerType | DateType | _: YearMonthIntervalType => IntConverter
       case FloatType => FloatConverter
-      case LongType | TimestampType | _: DayTimeIntervalType => LongConverter
+      case LongType | TimestampType | TimestampNTZType | _: DayTimeIntervalType => LongConverter
       case DoubleType => DoubleConverter
       case StringType => StringConverter
       case CalendarIntervalType => CalendarConverter
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
index 738f2281c9a65..0395798d9e7ab 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.vectorized
 import java.nio.ByteBuffer
 import java.nio.ByteOrder
 import java.nio.charset.StandardCharsets
+import java.time.LocalDateTime
 import java.util
 import java.util.NoSuchElementException
 
@@ -1591,10 +1592,21 @@ class ColumnarBatchSuite extends SparkFunSuite {
         )) ::
         StructField("int_to_int", MapType(IntegerType, IntegerType)) ::
         StructField("binary", BinaryType) ::
+        StructField("ts_ntz", TimestampNTZType) ::
         Nil)
     var mapBuilder = new ArrayBasedMapBuilder(IntegerType, IntegerType)
     mapBuilder.put(1, 10)
     mapBuilder.put(20, null)
+
+    val tsString1 = "2015-01-01 23:50:59.123"
+    val ts1 = DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf(tsString1))
+    val tsNTZ1 =
+      DateTimeUtils.localDateTimeToMicros(LocalDateTime.parse(tsString1.replace(" ", "T")))
+    val tsString2 = "1880-01-05 12:45:21.321"
+    val ts2 = DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf(tsString2))
+    val tsNTZ2 =
+      DateTimeUtils.localDateTimeToMicros(LocalDateTime.parse(tsString2.replace(" ", "T")))
+
     val row1 = new GenericInternalRow(Array[Any](
       UTF8String.fromString("a string"),
       true,
@@ -1606,12 +1618,13 @@ class ColumnarBatchSuite extends SparkFunSuite {
       0.75D,
       Decimal("1234.23456"),
       DateTimeUtils.fromJavaDate(java.sql.Date.valueOf("2015-01-01")),
-      DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf("2015-01-01 23:50:59.123")),
+      ts1,
       new CalendarInterval(1, 0, 0),
       new GenericArrayData(Array(1, 2, 3, 4, null)),
       new GenericInternalRow(Array[Any](5.asInstanceOf[Any], 10)),
       mapBuilder.build(),
-      "Spark SQL".getBytes()
+      "Spark SQL".getBytes(),
+      tsNTZ1
     ))
 
     mapBuilder = new ArrayBasedMapBuilder(IntegerType, IntegerType)
@@ -1628,12 +1641,13 @@ class ColumnarBatchSuite extends SparkFunSuite {
       Double.PositiveInfinity,
       Decimal("0.01000"),
       DateTimeUtils.fromJavaDate(java.sql.Date.valueOf("1875-12-12")),
-      DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf("1880-01-05 12:45:21.321")),
+      ts2,
       new CalendarInterval(-10, -50, -100),
       new GenericArrayData(Array(5, 10, -100)),
       new GenericInternalRow(Array[Any](20.asInstanceOf[Any], null)),
       mapBuilder.build(),
-      "Parquet".getBytes()
+      "Parquet".getBytes(),
+      tsNTZ2
     ))
 
     val row3 = new GenericInternalRow(Array[Any](
@@ -1652,6 +1666,7 @@ class ColumnarBatchSuite extends SparkFunSuite {
       null,
       null,
       null,
+      null,
       null
     ))
 
@@ -1716,10 +1731,8 @@ class ColumnarBatchSuite extends SparkFunSuite {
       assert(columns(9).isNullAt(2))
 
       assert(columns(10).dataType() == TimestampType)
-      assert(columns(10).getLong(0) ==
-        DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf("2015-01-01 23:50:59.123")))
-      assert(columns(10).getLong(1) ==
-        DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf("1880-01-05 12:45:21.321")))
+      assert(columns(10).getLong(0) == ts1)
+      assert(columns(10).getLong(1) == ts2)
       assert(columns(10).isNullAt(2))
 
       assert(columns(11).dataType() == CalendarIntervalType)
@@ -1777,6 +1790,11 @@ class ColumnarBatchSuite extends SparkFunSuite {
       assert(new String(columns(15).getBinary(0)) == "Spark SQL")
       assert(new String(columns(15).getBinary(1)) == "Parquet")
       assert(columns(15).isNullAt(2))
+
+      assert(columns(16).dataType() == TimestampNTZType)
+      assert(columns(16).getLong(0) == tsNTZ1)
+      assert(columns(16).getLong(1) == tsNTZ2)
+      assert(columns(16).isNullAt(2))
     } finally {
       batch.close()
     }

From 3b540ad822a53a8cb94159dc8aa3c66d34085e3e Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Mon, 24 Jan 2022 17:33:24 +0900
Subject: [PATCH 084/513] [SPARK-37987][SS] Fix flaky test
 StreamingAggregationSuite.changing schema of state when restarting query

### What changes were proposed in this pull request?

This PR fixes a flaky test `StreamingAggregationSuite.changing schema of state when restarting query`, via adjusting the number of shuffle partition to 1.

The flakiness was due to the optimization on schema verification - we only verify it in partition 0 since it is costly and redundant to verify the schema for all partitions. Other partitions are still possible to provide other errors which are considered as unexpected.

### Why are the changes needed?

This PR fixes a flaky test.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Ran test suite 10 times locally.

Closes #35298 from HeartSaVioR/SPARK-37987.

Authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../sql/streaming/StreamingAggregationSuite.scala    | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index 77334ad64c3ce..8a7bb8b60c878 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -766,7 +766,11 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
   }
 
   testQuietlyWithAllStateVersions("changing schema of state when restarting query",
-    (SQLConf.STATE_STORE_FORMAT_VALIDATION_ENABLED.key, "false")) {
+    (SQLConf.STATE_STORE_FORMAT_VALIDATION_ENABLED.key, "false"),
+    // Since we only do the check in partition 0 and other partitions still may fail with
+    // different errors, we change the number of shuffle partitions to 1 to make the test
+    // result to be deterministic.
+    (SQLConf.SHUFFLE_PARTITIONS.key, "1")) {
     withTempDir { tempDir =>
       val (inputData, aggregated) = prepareTestForChangingSchemaOfState(tempDir)
 
@@ -790,7 +794,11 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
   testQuietlyWithAllStateVersions("changing schema of state when restarting query -" +
     " schema check off",
     (SQLConf.STATE_SCHEMA_CHECK_ENABLED.key, "false"),
-    (SQLConf.STATE_STORE_FORMAT_VALIDATION_ENABLED.key, "false")) {
+    (SQLConf.STATE_STORE_FORMAT_VALIDATION_ENABLED.key, "false"),
+    // Since we only do the check in partition 0 and other partitions still may fail with
+    // different errors, we change the number of shuffle partitions to 1 to make the test
+    // result to be deterministic.
+    (SQLConf.SHUFFLE_PARTITIONS.key, "1")) {
     withTempDir { tempDir =>
       val (inputData, aggregated) = prepareTestForChangingSchemaOfState(tempDir)
 

From 5b91381b9bcea27b7be6d9cef852efbd6c23d98a Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Mon, 24 Jan 2022 16:47:13 +0800
Subject: [PATCH 085/513] [SPARK-37965][SQL] Remove check field name when
 reading/writing existing data in Orc

### What changes were proposed in this pull request?
Remove `supportFieldName` check in DataSource ORCFormat.
1. org.apache.spark.sql.hive.orc.OrcFileFormat didn't add this check too
2. Tried a lot of wield column name, all can be reading and writing.

### Why are the changes needed?
Remove unnecessary check

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added UT

Closes #35253 from AngersZhuuuu/SPARK-37965.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/orc/OrcFileFormat.scala       |  9 ---------
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 19 +++++++++++++++++++
 .../sql/hive/execution/SQLQuerySuite.scala    |  5 +++--
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
index ce851c58cc4fa..39a8763160530 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
@@ -228,13 +228,4 @@ class OrcFileFormat
 
     case _ => false
   }
-
-  override def supportFieldName(name: String): Boolean = {
-    try {
-      TypeDescription.fromString(s"struct<`$name`:int>")
-      true
-    } catch {
-      case _: IllegalArgumentException => false
-    }
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 523a8e242e7e8..ffc3db31c90dc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -4262,6 +4262,25 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
         Row(2, 4, 6, 8, 10, 12, 14, 16, 18, 20) :: Nil)
     }
   }
+
+  test("SPARK-37965: Spark support read/write orc file with invalid char in field name") {
+    withTempDir { dir =>
+      Seq((1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), (2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22))
+        .toDF("max(t)", "max(t", "=", "\n", ";", "a b", "{", ".", "a.b", "a", ",")
+        .repartition(1)
+        .write.mode(SaveMode.Overwrite).orc(dir.getAbsolutePath)
+      val df = spark.read.orc(dir.getAbsolutePath)
+      checkAnswer(df,
+        Row(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) ::
+          Row(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22) :: Nil)
+      assert(df.schema.names.sameElements(
+        Array("max(t)", "max(t", "=", "\n", ";", "a b", "{", ".", "a.b", "a", ",")))
+      checkAnswer(df.select("`max(t)`", "`a b`", "`{`", "`.`", "`a.b`"),
+        Row(1, 6, 7, 8, 9) :: Row(2, 12, 14, 16, 18) :: Nil)
+      checkAnswer(df.where("`a.b` > 10"),
+        Row(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22) :: Nil)
+    }
+  }
 }
 
 case class Foo(bar: Option[String])
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index e690d026053d6..d3f5d7613ace7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -2213,8 +2213,9 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
   }
 
   test("SPARK-32889: ORC table column name supports special characters") {
-    // " " "," is not allowed.
-    Seq("$", ";", "{", "}", "(", ")", "\n", "\t", "=").foreach { name =>
+    // "," is not allowed since cannot create a table having a column whose name
+    // contains commas in Hive metastore.
+    Seq("$", ";", "{", "}", "(", ")", "\n", "\t", "=", " ", "a b").foreach { name =>
       val source = "ORC"
       Seq(s"CREATE TABLE t32889(`$name` INT) USING $source",
           s"CREATE TABLE t32889 STORED AS $source AS SELECT 1 `$name`",

From 4d5ea5e26e4ca625910b88f2dd02f94e55874ab8 Mon Sep 17 00:00:00 2001
From: dch nguyen <dgd_contributor@viettel.com.vn>
Date: Mon, 24 Jan 2022 14:15:37 +0100
Subject: [PATCH 086/513] [SPARK-37153][PYTHON] Inline type hints for
 python/pyspark/profiler.py

### What changes were proposed in this pull request?
Inline type hints for python/pyspark/profiler.py

### Why are the changes needed?
We can take advantage of static type checking within the functions by inlining the type hints.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing tests

Closes #34731 from dchvn/SPARK-37153.

Authored-by: dch nguyen <dgd_contributor@viettel.com.vn>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/profiler.py  | 68 ++++++++++++++++++++++---------------
 python/pyspark/profiler.pyi | 65 -----------------------------------
 2 files changed, 40 insertions(+), 93 deletions(-)
 delete mode 100644 python/pyspark/profiler.pyi

diff --git a/python/pyspark/profiler.py b/python/pyspark/profiler.py
index 6271bbc4814f0..45365cc1e79b0 100644
--- a/python/pyspark/profiler.py
+++ b/python/pyspark/profiler.py
@@ -15,6 +15,8 @@
 # limitations under the License.
 #
 
+from typing import Any, Callable, List, Optional, Type, TYPE_CHECKING, cast
+
 import cProfile
 import pstats
 import os
@@ -23,6 +25,9 @@
 
 from pyspark.accumulators import AccumulatorParam
 
+if TYPE_CHECKING:
+    from pyspark.context import SparkContext
+
 
 class ProfilerCollector:
     """
@@ -31,21 +36,26 @@ class ProfilerCollector:
     the different stages/UDFs.
     """
 
-    def __init__(self, profiler_cls, udf_profiler_cls, dump_path=None):
-        self.profiler_cls = profiler_cls
-        self.udf_profiler_cls = udf_profiler_cls
-        self.profile_dump_path = dump_path
-        self.profilers = []
-
-    def new_profiler(self, ctx):
+    def __init__(
+        self,
+        profiler_cls: Type["Profiler"],
+        udf_profiler_cls: Type["Profiler"],
+        dump_path: Optional[str] = None,
+    ):
+        self.profiler_cls: Type[Profiler] = profiler_cls
+        self.udf_profiler_cls: Type[Profiler] = udf_profiler_cls
+        self.profile_dump_path: Optional[str] = dump_path
+        self.profilers: List[List[Any]] = []
+
+    def new_profiler(self, ctx: "SparkContext") -> "Profiler":
         """Create a new profiler using class `profiler_cls`"""
         return self.profiler_cls(ctx)
 
-    def new_udf_profiler(self, ctx):
+    def new_udf_profiler(self, ctx: "SparkContext") -> "Profiler":
         """Create a new profiler using class `udf_profiler_cls`"""
         return self.udf_profiler_cls(ctx)
 
-    def add_profiler(self, id, profiler):
+    def add_profiler(self, id: int, profiler: "Profiler") -> None:
         """Add a profiler for RDD/UDF `id`"""
         if not self.profilers:
             if self.profile_dump_path:
@@ -55,13 +65,13 @@ def add_profiler(self, id, profiler):
 
         self.profilers.append([id, profiler, False])
 
-    def dump_profiles(self, path):
+    def dump_profiles(self, path: str) -> None:
         """Dump the profile stats into directory `path`"""
         for id, profiler, _ in self.profilers:
             profiler.dump(id, path)
         self.profilers = []
 
-    def show_profiles(self):
+    def show_profiles(self) -> None:
         """Print the profile stats to stdout"""
         for i, (id, profiler, showed) in enumerate(self.profilers):
             if not showed and profiler:
@@ -108,18 +118,18 @@ class Profiler:
     This API is a developer API.
     """
 
-    def __init__(self, ctx):
+    def __init__(self, ctx: "SparkContext") -> None:
         pass
 
-    def profile(self, func, *args, **kwargs):
+    def profile(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
         """Do profiling on the function `func`"""
         raise NotImplementedError
 
-    def stats(self):
+    def stats(self) -> pstats.Stats:
         """Return the collected profiling stats (pstats.Stats)"""
         raise NotImplementedError
 
-    def show(self, id):
+    def show(self, id: int) -> None:
         """Print the profile stats to stdout, id is the RDD id"""
         stats = self.stats()
         if stats:
@@ -128,7 +138,7 @@ def show(self, id):
             print("=" * 60)
             stats.sort_stats("time", "cumulative").print_stats()
 
-    def dump(self, id, path):
+    def dump(self, id: int, path: str) -> None:
         """Dump the profile into path, id is the RDD id"""
         if not os.path.exists(path):
             os.makedirs(path)
@@ -138,15 +148,17 @@ def dump(self, id, path):
             stats.dump_stats(p)
 
 
-class PStatsParam(AccumulatorParam):
+class PStatsParam(AccumulatorParam[Optional[pstats.Stats]]):
     """PStatsParam is used to merge pstats.Stats"""
 
     @staticmethod
-    def zero(value):
+    def zero(value: Optional[pstats.Stats]) -> None:
         return None
 
     @staticmethod
-    def addInPlace(value1, value2):
+    def addInPlace(
+        value1: Optional[pstats.Stats], value2: Optional[pstats.Stats]
+    ) -> Optional[pstats.Stats]:
         if value1 is None:
             return value2
         value1.add(value2)
@@ -159,27 +171,27 @@ class BasicProfiler(Profiler):
     cProfile and Accumulator
     """
 
-    def __init__(self, ctx):
+    def __init__(self, ctx: "SparkContext") -> None:
         Profiler.__init__(self, ctx)
         # Creates a new accumulator for combining the profiles of different
         # partitions of a stage
-        self._accumulator = ctx.accumulator(None, PStatsParam)
+        self._accumulator = ctx.accumulator(None, PStatsParam)  # type: ignore[arg-type]
 
-    def profile(self, func, *args, **kwargs):
+    def profile(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
         """Runs and profiles the method to_profile passed in. A profile object is returned."""
         pr = cProfile.Profile()
         ret = pr.runcall(func, *args, **kwargs)
         st = pstats.Stats(pr)
-        st.stream = None  # make it picklable
+        st.stream = None  # type: ignore[attr-defined]  # make it picklable
         st.strip_dirs()
 
         # Adds a new profile to the existing accumulated value
-        self._accumulator.add(st)
+        self._accumulator.add(st)  # type: ignore[arg-type]
 
         return ret
 
-    def stats(self):
-        return self._accumulator.value
+    def stats(self) -> pstats.Stats:
+        return cast(pstats.Stats, self._accumulator.value)
 
 
 class UDFBasicProfiler(BasicProfiler):
@@ -187,7 +199,7 @@ class UDFBasicProfiler(BasicProfiler):
     UDFBasicProfiler is the profiler for Python/Pandas UDFs.
     """
 
-    def show(self, id):
+    def show(self, id: int) -> None:
         """Print the profile stats to stdout, id is the PythonUDF id"""
         stats = self.stats()
         if stats:
@@ -196,7 +208,7 @@ def show(self, id):
             print("=" * 60)
             stats.sort_stats("time", "cumulative").print_stats()
 
-    def dump(self, id, path):
+    def dump(self, id: int, path: str) -> None:
         """Dump the profile into path, id is the PythonUDF id"""
         if not os.path.exists(path):
             os.makedirs(path)
diff --git a/python/pyspark/profiler.pyi b/python/pyspark/profiler.pyi
deleted file mode 100644
index 85aa6a248036c..0000000000000
--- a/python/pyspark/profiler.pyi
+++ /dev/null
@@ -1,65 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Any, Callable, List, Optional, Tuple, Type
-
-import pstats
-
-from pyspark.accumulators import AccumulatorParam
-from pyspark.context import SparkContext
-
-class ProfilerCollector:
-    profiler_cls: Type[Profiler]
-    udf_profiler_cls: Type[Profiler]
-    profile_dump_path: Optional[str]
-    profilers: List[Tuple[int, Profiler, bool]]
-    def __init__(
-        self,
-        profiler_cls: Type[Profiler],
-        udf_profiler_cls: Type[Profiler],
-        dump_path: Optional[str] = ...,
-    ) -> None: ...
-    def new_profiler(self, ctx: SparkContext) -> Profiler: ...
-    def new_udf_profiler(self, ctx: SparkContext) -> Profiler: ...
-    def add_profiler(self, id: int, profiler: Profiler) -> None: ...
-    def dump_profiles(self, path: str) -> None: ...
-    def show_profiles(self) -> None: ...
-
-class Profiler:
-    def __init__(self, ctx: SparkContext) -> None: ...
-    def profile(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any: ...
-    def stats(self) -> pstats.Stats: ...
-    def show(self, id: int) -> None: ...
-    def dump(self, id: int, path: str) -> None: ...
-
-class PStatsParam(AccumulatorParam):
-    @staticmethod
-    def zero(value: pstats.Stats) -> None: ...
-    @staticmethod
-    def addInPlace(
-        value1: Optional[pstats.Stats], value2: Optional[pstats.Stats]
-    ) -> Optional[pstats.Stats]: ...
-
-class BasicProfiler(Profiler):
-    def __init__(self, ctx: SparkContext) -> None: ...
-    def profile(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any: ...
-    def stats(self) -> pstats.Stats: ...
-
-class UDFBasicProfiler(BasicProfiler):
-    def show(self, id: int) -> None: ...
-    def dump(self, id: int, path: str) -> None: ...

From 8fef5bb616b2633041c1284de2dac1f611e50ce3 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 24 Jan 2022 16:54:34 +0300
Subject: [PATCH 087/513] [SPARK-37979][SQL] Switch to more generic error
 classes in AES functions

### What changes were proposed in this pull request?
In the PR, I propose to switch from specific error classes in the AES functions: `aes_encrypt()/aes_decrypt()` to more generic:
- AES_CRYPTO_ERROR -> INVALID_PARAMETER_VALUE
- INVALID_AES_KEY_LENGTH -> INVALID_PARAMETER_VALUE
- UNSUPPORTED_AES_MODE -> UNSUPPORTED_FEATURE

The new error classes `INVALID_PARAMETER_VALUE` and `UNSUPPORTED_MODE` are made to be re-used from other functions but not only in the AES functions.

### Why are the changes needed?
1.  To prevent unlimited inflation of the set of error classes and as a consequence of that inflation of `error-classes.json`.
2. To establish rules for other sub-tasks of SPARK-37935

### Does this PR introduce _any_ user-facing change?
Yes, but the AES functions `aes_encrypt()`/`aes_decrypt()` haven't released yet.

### How was this patch tested?
By running the affected test suites:
```
$ build/sbt "test:testOnly *SparkThrowableSuite"
$ build/sbt "test:testOnly *QueryExecutionErrorsSuite"
```

Closes #35272 from MaxGekk/invalid-input-func.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../main/resources/error/error-classes.json   | 19 ++++----
 .../sql/errors/QueryExecutionErrors.scala     | 19 ++++++--
 .../errors/QueryExecutionErrorsSuite.scala    | 48 ++++++++++---------
 3 files changed, 50 insertions(+), 36 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 34ccc63ff072c..2e7831bdb415a 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -1,7 +1,4 @@
 {
-  "AES_CRYPTO_ERROR" : {
-    "message" : [ "AES crypto operation failed with: %s" ]
-  },
   "AMBIGUOUS_FIELD_NAME" : {
     "message" : [ "Field name %s is ambiguous and has %s matching fields in the struct." ],
     "sqlState" : "42000"
@@ -74,10 +71,6 @@
   "INTERNAL_ERROR" : {
     "message" : [ "%s" ]
   },
-  "INVALID_AES_KEY_LENGTH" : {
-    "message" : [ "The key length of aes_encrypt/aes_decrypt should be one of 16, 24 or 32 bytes, but got: %s" ],
-    "sqlState" : "42000"
-  },
   "INVALID_ARRAY_INDEX" : {
     "message" : [ "Invalid index: %s, numElements: %s. If necessary set %s to false to bypass this error." ]
   },
@@ -99,6 +92,10 @@
   "INVALID_JSON_SCHEMA_MAPTYPE" : {
     "message" : [ "Input schema %s can only contain StringType as a key type for a MapType." ]
   },
+  "INVALID_PARAMETER_VALUE" : {
+    "message" : [ "The value of parameter(s) '%s' in %s is invalid: %s" ],
+    "sqlState" : "22023"
+  },
   "MAP_KEY_DOES_NOT_EXIST" : {
     "message" : [ "Key %s does not exist. If necessary set %s to false to bypass this error." ]
   },
@@ -144,10 +141,6 @@
     "message" : [ "Unrecognized SQL type %s" ],
     "sqlState" : "42000"
   },
-  "UNSUPPORTED_AES_MODE" : {
-    "message" : [ "The AES mode %s with the padding %s is not supported" ],
-    "sqlState" : "0A000"
-  },
   "UNSUPPORTED_CHANGE_COLUMN" : {
     "message" : [ "Please add an implementation for a column change here" ],
     "sqlState" : "0A000"
@@ -156,6 +149,10 @@
     "message" : [ "Unsupported data type %s" ],
     "sqlState" : "0A000"
   },
+  "UNSUPPORTED_FEATURE" : {
+    "message" : [ "The feature is not supported: %s" ],
+    "sqlState" : "0A000"
+  },
   "UNSUPPORTED_LITERAL_TYPE" : {
     "message" : [ "Unsupported literal type %s %s" ],
     "sqlState" : "0A000"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 975d748e1827f..384016216f668 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -1905,15 +1905,28 @@ object QueryExecutionErrors {
   }
 
   def invalidAesKeyLengthError(actualLength: Int): RuntimeException = {
-    new SparkRuntimeException("INVALID_AES_KEY_LENGTH", Array(actualLength.toString))
+    new SparkRuntimeException(
+      errorClass = "INVALID_PARAMETER_VALUE",
+      messageParameters = Array(
+        "key",
+        "the aes_encrypt/aes_decrypt function",
+        s"expects a binary value with 16, 24 or 32 bytes, but got ${actualLength.toString} bytes."))
   }
 
   def aesModeUnsupportedError(mode: String, padding: String): RuntimeException = {
-    new SparkRuntimeException("UNSUPPORTED_AES_MODE", Array(mode, padding))
+    new SparkRuntimeException(
+      errorClass = "UNSUPPORTED_FEATURE",
+      messageParameters = Array(
+        s"AES-$mode with the padding $padding by the aes_encrypt/aes_decrypt function."))
   }
 
   def aesCryptoError(detailMessage: String): RuntimeException = {
-    new SparkRuntimeException("AES_CRYPTO_ERROR", Array(detailMessage))
+    new SparkRuntimeException(
+      errorClass = "INVALID_PARAMETER_VALUE",
+      messageParameters = Array(
+        "expr, key",
+        "the aes_encrypt/aes_decrypt function",
+        s"Detail message: $detailMessage"))
   }
 
   def hiveTableWithAnsiIntervalsError(tableName: String): Throwable = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index 13f44a21499d2..5137614a366d1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -41,16 +41,17 @@ class QueryExecutionErrorsSuite extends QueryTest with SharedSparkSession {
     (df1, df2)
   }
 
-  test("INVALID_AES_KEY_LENGTH: invalid key lengths in AES functions") {
+  test("INVALID_PARAMETER_VALUE: invalid key lengths in AES functions") {
     val (df1, df2) = getAesInputs()
     def checkInvalidKeyLength(df: => DataFrame): Unit = {
       val e = intercept[SparkException] {
         df.collect
       }.getCause.asInstanceOf[SparkRuntimeException]
-      assert(e.getErrorClass === "INVALID_AES_KEY_LENGTH")
-      assert(e.getSqlState === "42000")
+      assert(e.getErrorClass === "INVALID_PARAMETER_VALUE")
+      assert(e.getSqlState === "22023")
       assert(e.getMessage.contains(
-        "The key length of aes_encrypt/aes_decrypt should be one of 16, 24 or 32 bytes"))
+        "The value of parameter(s) 'key' in the aes_encrypt/aes_decrypt function is invalid: " +
+        "expects a binary value with 16, 24 or 32 bytes, but got"))
     }
 
     // Encryption failure - invalid key length
@@ -71,7 +72,24 @@ class QueryExecutionErrorsSuite extends QueryTest with SharedSparkSession {
     }
   }
 
-  test("UNSUPPORTED_AES_MODE: unsupported combinations of AES modes and padding") {
+  test("INVALID_PARAMETER_VALUE: AES decrypt failure - key mismatch") {
+    val (_, df2) = getAesInputs()
+    Seq(
+      ("value16", "1234567812345678"),
+      ("value24", "123456781234567812345678"),
+      ("value32", "12345678123456781234567812345678")).foreach { case (colName, key) =>
+      val e = intercept[SparkException] {
+        df2.selectExpr(s"aes_decrypt(unbase64($colName), binary('$key'), 'ECB')").collect
+      }.getCause.asInstanceOf[SparkRuntimeException]
+      assert(e.getErrorClass === "INVALID_PARAMETER_VALUE")
+      assert(e.getSqlState === "22023")
+      assert(e.getMessage.contains(
+        "The value of parameter(s) 'expr, key' in the aes_encrypt/aes_decrypt function " +
+        "is invalid: Detail message:"))
+    }
+  }
+
+  test("UNSUPPORTED_MODE: unsupported combinations of AES modes and padding") {
     val key16 = "abcdefghijklmnop"
     val key32 = "abcdefghijklmnop12345678ABCDEFGH"
     val (df1, df2) = getAesInputs()
@@ -79,9 +97,10 @@ class QueryExecutionErrorsSuite extends QueryTest with SharedSparkSession {
       val e = intercept[SparkException] {
         df.collect
       }.getCause.asInstanceOf[SparkRuntimeException]
-      assert(e.getErrorClass === "UNSUPPORTED_AES_MODE")
+      assert(e.getErrorClass === "UNSUPPORTED_FEATURE")
       assert(e.getSqlState === "0A000")
-      assert(e.getMessage.matches("""The AES mode \w+ with the padding \w+ is not supported"""))
+      assert(e.getMessage.matches("""The feature is not supported: AES-\w+ with the padding \w+""" +
+        " by the aes_encrypt/aes_decrypt function."))
     }
 
     // Unsupported AES mode and padding in encrypt
@@ -93,19 +112,4 @@ class QueryExecutionErrorsSuite extends QueryTest with SharedSparkSession {
     checkUnsupportedMode(df2.selectExpr(s"aes_decrypt(value16, '$key16', 'GCM', 'PKCS')"))
     checkUnsupportedMode(df2.selectExpr(s"aes_decrypt(value32, '$key32', 'ECB', 'None')"))
   }
-
-  test("AES_CRYPTO_ERROR: AES decrypt failure - key mismatch") {
-    val (_, df2) = getAesInputs()
-    Seq(
-      ("value16", "1234567812345678"),
-      ("value24", "123456781234567812345678"),
-      ("value32", "12345678123456781234567812345678")).foreach { case (colName, key) =>
-      val e = intercept[SparkException] {
-        df2.selectExpr(s"aes_decrypt(unbase64($colName), binary('$key'), 'ECB')").collect
-      }.getCause.asInstanceOf[SparkRuntimeException]
-      assert(e.getErrorClass === "AES_CRYPTO_ERROR")
-      assert(e.getSqlState === null)
-      assert(e.getMessage.contains("AES crypto operation failed"))
-    }
-  }
 }

From bcaab6261f8ae3a1e6f2401b96e294a076f9f719 Mon Sep 17 00:00:00 2001
From: tianhanhu <adrianhu96@gmail.com>
Date: Mon, 24 Jan 2022 12:21:03 -0800
Subject: [PATCH 088/513] [SPARK-37891][CORE] Add scalastyle check to disable
 scala.concurrent.ExecutionContext.Implicits.global

### What changes were proposed in this pull request?
Add scalastyle check to disable internal use of scala.concurrent.ExecutionContext.Implicits.global.
The reason is that user queries can also use this thread pool, causing competing in resource and starvation. Spark-internal APIs should thus not use the global thread pool.

### Why are the changes needed?
Forbid Spark internal API from using global thread pool

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
PR tests

Closes #35187 from tianhanhu/SPARK-37891.

Authored-by: tianhanhu <adrianhu96@gmail.com>
Signed-off-by: Xingbo Jiang <xingbo.jiang@databricks.com>
---
 .../scala/org/apache/spark/deploy/FaultToleranceTest.scala  | 2 ++
 .../org/apache/spark/deploy/rest/RestSubmissionClient.scala | 2 ++
 .../scala/org/apache/spark/storage/FallbackStorage.scala    | 2 ++
 .../test/scala/org/apache/spark/JobCancellationSuite.scala  | 2 ++
 .../scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala   | 2 ++
 .../apache/spark/scheduler/SchedulerIntegrationSuite.scala  | 4 ++++
 .../apache/spark/serializer/KryoSerializerBenchmark.scala   | 2 ++
 .../scala/org/apache/spark/storage/BlockManagerSuite.scala  | 2 ++
 .../spark/storage/ShuffleBlockFetcherIteratorSuite.scala    | 2 ++
 scalastyle-config.xml                                       | 6 ++++++
 .../spark/sql/streaming/StreamingQueryManagerSuite.scala    | 4 ++--
 .../org/apache/spark/streaming/StreamingListenerSuite.scala | 2 ++
 12 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
index 5915fb8cc7c84..7209e2c373ab1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
@@ -24,7 +24,9 @@ import java.util.concurrent.TimeoutException
 
 import scala.collection.mutable.ListBuffer
 import scala.concurrent.{Future, Promise}
+// scalastyle:off executioncontextglobal
 import scala.concurrent.ExecutionContext.Implicits.global
+// scalastyle:on executioncontextglobal
 import scala.concurrent.duration._
 import scala.sys.process._
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala
index cc1d60a097b2e..8a0fc886e60ca 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala
@@ -229,7 +229,9 @@ private[spark] class RestSubmissionClient(master: String) extends Logging {
    * Exposed for testing.
    */
   private[rest] def readResponse(connection: HttpURLConnection): SubmitRestProtocolResponse = {
+    // scalastyle:off executioncontextglobal
     import scala.concurrent.ExecutionContext.Implicits.global
+    // scalastyle:on executioncontextglobal
     val responseFuture = Future {
       val responseCode = connection.getResponseCode
 
diff --git a/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala b/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala
index d137099e73437..0c1206cb9010b 100644
--- a/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala
+++ b/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala
@@ -95,7 +95,9 @@ private[storage] class FallbackStorage(conf: SparkConf) extends Logging {
 }
 
 private[storage] class NoopRpcEndpointRef(conf: SparkConf) extends RpcEndpointRef(conf) {
+  // scalastyle:off executioncontextglobal
   import scala.concurrent.ExecutionContext.Implicits.global
+  // scalastyle:on executioncontextglobal
   override def address: RpcAddress = null
   override def name: String = "fallback"
   override def send(message: Any): Unit = {}
diff --git a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
index 082a92ef41d3b..77bdb882c507d 100644
--- a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
@@ -20,7 +20,9 @@ package org.apache.spark
 import java.util.concurrent.{Semaphore, TimeUnit}
 import java.util.concurrent.atomic.AtomicInteger
 
+// scalastyle:off executioncontextglobal
 import scala.concurrent.ExecutionContext.Implicits.global
+// scalastyle:on executioncontextglobal
 import scala.concurrent.Future
 import scala.concurrent.duration._
 
diff --git a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
index a5bc557eef5ad..93daf9032323d 100644
--- a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
@@ -20,7 +20,9 @@ package org.apache.spark.rdd
 import java.util.concurrent.Semaphore
 
 import scala.concurrent._
+// scalastyle:off executioncontextglobal
 import scala.concurrent.ExecutionContext.Implicits.global
+// scalastyle:on executioncontextglobal
 import scala.concurrent.duration.Duration
 
 import org.scalatest.BeforeAndAfterAll
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
index ac4ed13b25488..9ed26e712563e 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
@@ -136,7 +136,9 @@ abstract class SchedulerIntegrationSuite[T <: MockBackend: ClassTag] extends Spa
       func: (TaskContext, Iterator[_]) => _ = jobComputeFunc): Future[Any] = {
     val waiter: JobWaiter[Any] = scheduler.submitJob(rdd, func, partitions.toSeq, CallSite("", ""),
       (index, res) => results(index) = res, new Properties())
+    // scalastyle:off executioncontextglobal
     import scala.concurrent.ExecutionContext.Implicits.global
+    // scalastyle:on executioncontextglobal
     waiter.completionFuture.recover { case ex =>
       failure = ex
     }
@@ -697,7 +699,9 @@ class BasicSchedulerIntegrationSuite extends SchedulerIntegrationSuite[SingleCor
     withBackend(runBackend _) {
       // Submit a job containing an RDD which will hang in getPartitions() until we release
       // the countdown latch:
+      // scalastyle:off executioncontextglobal
       import scala.concurrent.ExecutionContext.Implicits.global
+      // scalastyle:on executioncontextglobal
       val slowJobFuture = Future { submit(rddWithSlowGetPartitions, Array(0)) }.flatten
 
       // Block the current thread until the other thread has started the getPartitions() call:
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala
index 3814d2b6fb475..28e0e79a6fd7e 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala
@@ -18,7 +18,9 @@
 package org.apache.spark.serializer
 
 import scala.concurrent._
+// scalastyle:off executioncontextglobal
 import scala.concurrent.ExecutionContext.Implicits.global
+// scalastyle:on executioncontextglobal
 import scala.concurrent.duration._
 
 import org.apache.spark.{SparkConf, SparkContext}
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index d1dc083868baf..22204dd98ccdd 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -2229,7 +2229,9 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
         blockData: ManagedBuffer,
         level: StorageLevel,
         classTag: ClassTag[_]): Future[Unit] = {
+      // scalastyle:off executioncontextglobal
       import scala.concurrent.ExecutionContext.Implicits.global
+      // scalastyle:on executioncontextglobal
       Future {}
     }
 
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index 56043ea901906..fdaf1f8e3ca96 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -24,7 +24,9 @@ import java.util.concurrent.{CompletableFuture, Semaphore}
 import java.util.zip.CheckedInputStream
 
 import scala.collection.mutable
+// scalastyle:off executioncontextglobal
 import scala.concurrent.ExecutionContext.Implicits.global
+// scalastyle:on executioncontextglobal
 import scala.concurrent.Future
 
 import com.google.common.io.ByteStreams
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index 791d91040c816..32f1f147f3eed 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -264,6 +264,12 @@ This file is divided into 3 sections:
     of Commons Lang 2 (package org.apache.commons.lang.*)</customMessage>
   </check>
 
+  <check customId="executioncontextglobal" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">scala\.concurrent\.ExecutionContext\.Implicits\.global</parameter></parameters>
+    <customMessage> User queries can use global thread pool, causing starvation and eventual OOM.
+      Thus, Spark-internal APIs should not use this thread pool</customMessage>
+  </check>
+
   <check customId="FileSystemGet" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
     <parameters><parameter name="regex">FileSystem.get\([a-zA-Z_$][a-zA-Z_$0-9]*\)</parameter></parameters>
     <customMessage><![CDATA[
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
index 96f7efeef98e6..91d6d77ce5b88 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
@@ -447,9 +447,9 @@ class StreamingQueryManagerSuite extends StreamTest {
 
   /** Stop a random active query either with `stop()` or with an error */
   private def stopRandomQueryAsync(stopAfter: Span, withError: Boolean): StreamingQuery = {
-
+    // scalastyle:off executioncontextglobal
     import scala.concurrent.ExecutionContext.Implicits.global
-
+    // scalastyle:on executioncontextglobal
     val activeQueries = spark.streams.active
     val queryToStop = activeQueries(Random.nextInt(activeQueries.length))
     Future {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
index b54d60aa29c4f..08121a38dc5d5 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
@@ -21,7 +21,9 @@ import java.util.concurrent.ConcurrentLinkedQueue
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.HashMap
+// scalastyle:off executioncontextglobal
 import scala.concurrent.ExecutionContext.Implicits.global
+// scalastyle:on executioncontextglobal
 import scala.concurrent.Future
 
 import org.mockito.Mockito.{mock, reset, verifyNoMoreInteractions}

From cbf4df4e0f4ec66330def2a1fe8d251bdfb17581 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 24 Jan 2022 16:49:53 -0800
Subject: [PATCH 089/513] [SPARK-38002][BUILD] Upgrade ZSTD-JNI to 1.5.2-1

### What changes were proposed in this pull request?

This PR aims to upgrade ZSTD-JNI to 1.5.2-1.

### Why are the changes needed?

This will bring the following improvements.
- https://github.com/luben/zstd-jni/commit/1cc38de0153dd83ccd465b115c72573fe7d97930 (Reducing synchronization in RecyclingBufferPool)
- https://github.com/luben/zstd-jni/commit/16b841192635a02292a172c28fde57a425479eab (Import Zstd v1.5.2)
- https://github.com/luben/zstd-jni/commit/fb16a195367d1fb6a1ed699f36fbd38b67a853b0 (Remove redundant reset)
- https://github.com/luben/zstd-jni/commit/13711375c88ccc2291c4077f35be98552e6898be (Expose the default compression level)
- https://github.com/luben/zstd-jni/commit/1e7ea4d4ec144ad2cd52a3f26ab02eb9938c9943 (Remove the tag on some tests that was added for)
- https://github.com/luben/zstd-jni/commit/d786f6e6c157a289f7282d3a0116f3840d1e1f69 (Mark deprecated APIs with deprecated annotation.)

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

Closes #35303 from dongjoon-hyun/SPARK-38002.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../ZStandardBenchmark-jdk11-results.txt      | 32 +++++++++----------
 .../ZStandardBenchmark-jdk17-results.txt      | 28 ++++++++--------
 .../benchmarks/ZStandardBenchmark-results.txt | 28 ++++++++--------
 dev/deps/spark-deps-hadoop-2-hive-2.3         |  2 +-
 dev/deps/spark-deps-hadoop-3-hive-2.3         |  2 +-
 pom.xml                                       |  2 +-
 6 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/core/benchmarks/ZStandardBenchmark-jdk11-results.txt b/core/benchmarks/ZStandardBenchmark-jdk11-results.txt
index d975b1d05fc98..53c9299e84366 100644
--- a/core/benchmarks/ZStandardBenchmark-jdk11-results.txt
+++ b/core/benchmarks/ZStandardBenchmark-jdk11-results.txt
@@ -2,26 +2,26 @@
 Benchmark ZStandardCompressionCodec
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1027-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Benchmark ZStandardCompressionCodec:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Compression 10000 times at level 1 without buffer pool            517            518           1          0.0       51679.1       1.0X
-Compression 10000 times at level 2 without buffer pool            828            829           1          0.0       82770.5       0.6X
-Compression 10000 times at level 3 without buffer pool           1031           1035           6          0.0      103117.5       0.5X
-Compression 10000 times at level 1 with buffer pool               474            475           1          0.0       47377.9       1.1X
-Compression 10000 times at level 2 with buffer pool               544            545           1          0.0       54382.9       1.0X
-Compression 10000 times at level 3 with buffer pool               728            732           5          0.0       72791.2       0.7X
+Compression 10000 times at level 1 without buffer pool            584            604          15          0.0       58407.5       1.0X
+Compression 10000 times at level 2 without buffer pool            654            665          11          0.0       65444.9       0.9X
+Compression 10000 times at level 3 without buffer pool            907            916           8          0.0       90677.0       0.6X
+Compression 10000 times at level 1 with buffer pool               674            686          11          0.0       67437.9       0.9X
+Compression 10000 times at level 2 with buffer pool               759            769          10          0.0       75916.2       0.8X
+Compression 10000 times at level 3 with buffer pool              1006           1017          16          0.0      100600.2       0.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1027-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Benchmark ZStandardCompressionCodec:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------
-Decompression 10000 times from level 1 without buffer pool           1097           1097           1          0.0      109654.6       1.0X
-Decompression 10000 times from level 2 without buffer pool           1097           1097           0          0.0      109695.5       1.0X
-Decompression 10000 times from level 3 without buffer pool           1093           1093           1          0.0      109309.2       1.0X
-Decompression 10000 times from level 1 with buffer pool               854            855           1          0.0       85422.5       1.3X
-Decompression 10000 times from level 2 with buffer pool               853            853           0          0.0       85287.9       1.3X
-Decompression 10000 times from level 3 with buffer pool               854            854           0          0.0       85417.9       1.3X
+Decompression 10000 times from level 1 without buffer pool            693            698           9          0.0       69257.4       1.0X
+Decompression 10000 times from level 2 without buffer pool            699            707           7          0.0       69857.8       1.0X
+Decompression 10000 times from level 3 without buffer pool            689            697           7          0.0       68858.9       1.0X
+Decompression 10000 times from level 1 with buffer pool               450            476          37          0.0       45005.9       1.5X
+Decompression 10000 times from level 2 with buffer pool               527            550          26          0.0       52653.2       1.3X
+Decompression 10000 times from level 3 with buffer pool               452            513          43          0.0       45201.4       1.5X
 
 
diff --git a/core/benchmarks/ZStandardBenchmark-jdk17-results.txt b/core/benchmarks/ZStandardBenchmark-jdk17-results.txt
index ecb7e6c6bcfd6..c6d84b79cb29c 100644
--- a/core/benchmarks/ZStandardBenchmark-jdk17-results.txt
+++ b/core/benchmarks/ZStandardBenchmark-jdk17-results.txt
@@ -2,26 +2,26 @@
 Benchmark ZStandardCompressionCodec
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1027-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Benchmark ZStandardCompressionCodec:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Compression 10000 times at level 1 without buffer pool           2930           2953          33          0.0      293038.2       1.0X
-Compression 10000 times at level 2 without buffer pool           1846           2728        1248          0.0      184565.8       1.6X
-Compression 10000 times at level 3 without buffer pool           2109           2110           2          0.0      210881.8       1.4X
-Compression 10000 times at level 1 with buffer pool              1466           1479          19          0.0      146569.0       2.0X
-Compression 10000 times at level 2 with buffer pool              1570           1584          20          0.0      156976.5       1.9X
-Compression 10000 times at level 3 with buffer pool              1845           1852          10          0.0      184465.3       1.6X
+Compression 10000 times at level 1 without buffer pool           2380           2426          65          0.0      238014.5       1.0X
+Compression 10000 times at level 2 without buffer pool           1532           2271        1045          0.0      153222.7       1.6X
+Compression 10000 times at level 3 without buffer pool           1746           1757          15          0.0      174619.0       1.4X
+Compression 10000 times at level 1 with buffer pool              1177           1178           2          0.0      117681.3       2.0X
+Compression 10000 times at level 2 with buffer pool              1267           1273           8          0.0      126719.0       1.9X
+Compression 10000 times at level 3 with buffer pool              1517           1603         122          0.0      151729.8       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1027-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Benchmark ZStandardCompressionCodec:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------
-Decompression 10000 times from level 1 without buffer pool           2852           2887          49          0.0      285224.2       1.0X
-Decompression 10000 times from level 2 without buffer pool           2903           2908           7          0.0      290287.1       1.0X
-Decompression 10000 times from level 3 without buffer pool           2846           2858          18          0.0      284558.0       1.0X
-Decompression 10000 times from level 1 with buffer pool              2637           2647          14          0.0      263714.3       1.1X
-Decompression 10000 times from level 2 with buffer pool              2619           2629          14          0.0      261915.2       1.1X
-Decompression 10000 times from level 3 with buffer pool              2640           2652          17          0.0      263976.7       1.1X
+Decompression 10000 times from level 1 without buffer pool           2241           2271          42          0.0      224123.2       1.0X
+Decompression 10000 times from level 2 without buffer pool           2210           2253          62          0.0      220980.7       1.0X
+Decompression 10000 times from level 3 without buffer pool           2220           2228          12          0.0      221964.2       1.0X
+Decompression 10000 times from level 1 with buffer pool              1987           1995          12          0.0      198705.4       1.1X
+Decompression 10000 times from level 2 with buffer pool              1966           1968           4          0.0      196572.3       1.1X
+Decompression 10000 times from level 3 with buffer pool              1983           1991          11          0.0      198277.7       1.1X
 
 
diff --git a/core/benchmarks/ZStandardBenchmark-results.txt b/core/benchmarks/ZStandardBenchmark-results.txt
index 24b982ad63a5e..5de6d182fa6de 100644
--- a/core/benchmarks/ZStandardBenchmark-results.txt
+++ b/core/benchmarks/ZStandardBenchmark-results.txt
@@ -2,26 +2,26 @@
 Benchmark ZStandardCompressionCodec
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1027-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Benchmark ZStandardCompressionCodec:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Compression 10000 times at level 1 without buffer pool            398            523         144          0.0       39785.2       1.0X
-Compression 10000 times at level 2 without buffer pool            452            457           5          0.0       45210.8       0.9X
-Compression 10000 times at level 3 without buffer pool            634            650          15          0.0       63405.8       0.6X
-Compression 10000 times at level 1 with buffer pool               329            334           4          0.0       32851.3       1.2X
-Compression 10000 times at level 2 with buffer pool               384            393           7          0.0       38421.9       1.0X
-Compression 10000 times at level 3 with buffer pool               561            570           7          0.0       56070.4       0.7X
+Compression 10000 times at level 1 without buffer pool            633            774         122          0.0       63315.3       1.0X
+Compression 10000 times at level 2 without buffer pool            748            749           2          0.0       74771.7       0.8X
+Compression 10000 times at level 3 without buffer pool            945            949           7          0.0       94461.5       0.7X
+Compression 10000 times at level 1 with buffer pool               287            289           2          0.0       28703.6       2.2X
+Compression 10000 times at level 2 with buffer pool               336            342           3          0.0       33641.3       1.9X
+Compression 10000 times at level 3 with buffer pool               517            528           8          0.0       51747.9       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1027-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Benchmark ZStandardCompressionCodec:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------
-Decompression 10000 times from level 1 without buffer pool            686            686           0          0.0       68582.6       1.0X
-Decompression 10000 times from level 2 without buffer pool            683            686           3          0.0       68270.5       1.0X
-Decompression 10000 times from level 3 without buffer pool            687            690           4          0.0       68653.8       1.0X
-Decompression 10000 times from level 1 with buffer pool               495            497           3          0.0       49467.7       1.4X
-Decompression 10000 times from level 2 with buffer pool               438            467          26          0.0       43839.3       1.6X
-Decompression 10000 times from level 3 with buffer pool               495            496           1          0.0       49474.0       1.4X
+Decompression 10000 times from level 1 without buffer pool            683            689           9          0.0       68294.8       1.0X
+Decompression 10000 times from level 2 without buffer pool            684            685           1          0.0       68441.8       1.0X
+Decompression 10000 times from level 3 without buffer pool            684            685           1          0.0       68446.7       1.0X
+Decompression 10000 times from level 1 with buffer pool               494            495           2          0.0       49362.5       1.4X
+Decompression 10000 times from level 2 with buffer pool               493            495           2          0.0       49330.7       1.4X
+Decompression 10000 times from level 3 with buffer pool               494            497           5          0.0       49359.8       1.4X
 
 
diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index adefbe107442e..164eab08cdd1b 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -268,4 +268,4 @@ xz/1.8//xz-1.8.jar
 zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
 zookeeper-jute/3.6.2//zookeeper-jute-3.6.2.jar
 zookeeper/3.6.2//zookeeper-3.6.2.jar
-zstd-jni/1.5.1-1//zstd-jni-1.5.1-1.jar
+zstd-jni/1.5.2-1//zstd-jni-1.5.2-1.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index a57b7dc5216a5..3a38d075c9307 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -252,4 +252,4 @@ xz/1.8//xz-1.8.jar
 zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
 zookeeper-jute/3.6.2//zookeeper-jute-3.6.2.jar
 zookeeper/3.6.2//zookeeper-3.6.2.jar
-zstd-jni/1.5.1-1//zstd-jni-1.5.1-1.jar
+zstd-jni/1.5.2-1//zstd-jni-1.5.2-1.jar
diff --git a/pom.xml b/pom.xml
index 62f3d1b479799..9e18ba63bb7f5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -773,7 +773,7 @@
       <dependency>
         <groupId>com.github.luben</groupId>
         <artifactId>zstd-jni</artifactId>
-        <version>1.5.1-1</version>
+        <version>1.5.2-1</version>
       </dependency>
       <dependency>
         <groupId>com.clearspring.analytics</groupId>

From e362ef17e20a5a3957b4b03949940b7787462b42 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 24 Jan 2022 16:52:15 -0800
Subject: [PATCH 090/513] [SPARK-38007][K8S][DOCS] Update K8s doc to recommend
 K8s 1.20+

### What changes were proposed in this pull request?

This PR aims to update K8s `Prerequisites` doc to recommend K8s 1.20+ and to remove `Spark 2.3`.

### Why are the changes needed?

The AS-IS document is outdated because it's written 4 years ago.
- kubernetes-client 4.1.1 dropped K8s 1.6 and 1.7 support. (https://github.com/fabric8io/kubernetes-client)
- kubernetes-client 5.8.0 dropped K8s 1.9 support. (https://github.com/fabric8io/kubernetes-client)
- EKS also makes the following EOLs (https://docs.aws.amazon.com/eks/latest/userguide/kubernetes-versions.html)
  - 1.16 (September 27, 2021)
  - 1.17 (November 2, 2021)
  - 1.18 (March 31, 2022)
  - 1.19 (April, 2022)
- MKS has the similar status (https://docs.microsoft.com/en-us/azure/aks/supported-kubernetes-versions?tabs=azure-cli#aks-kubernetes-release-calendar)

### Does this PR introduce _any_ user-facing change?

No. This is a documentation change.

### How was this patch tested?

Manual.

Closes #35306 from dongjoon-hyun/SPARK-38007.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/running-on-kubernetes.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index b8699513a2b16..b9355c3a709d7 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -44,8 +44,7 @@ Cluster administrators should use [Pod Security Policies](https://kubernetes.io/
 
 # Prerequisites
 
-* A runnable distribution of Spark 2.3 or above.
-* A running Kubernetes cluster at version >= 1.6 with access configured to it using
+* A running Kubernetes cluster at version >= 1.20 with access configured to it using
 [kubectl](https://kubernetes.io/docs/user-guide/prereqs/).  If you do not already have a working Kubernetes cluster,
 you may set up a test cluster on your local machine using
 [minikube](https://kubernetes.io/docs/getting-started-guides/minikube/).

From 9b125713eb88988cd9d252549ca497e21bb23ebc Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Tue, 25 Jan 2022 09:44:32 +0800
Subject: [PATCH 091/513] [SPARK-36183][SQL][FOLLOWUP] Fix push down limit 1
 through Aggregate

### What changes were proposed in this pull request?

Use `Aggregate.aggregateExpressions` instead of `Aggregate.output` when  pushing down limit 1 through Aggregate.

For example:

```scala
spark.range(10).selectExpr("id % 5 AS a", "id % 5 AS b").write.saveAsTable("t1")
spark.sql("SELECT a, b, a AS alias FROM t1 GROUP BY a, b LIMIT 1").explain(true)
```
Before this pr:
```
== Optimized Logical Plan ==
GlobalLimit 1
+- LocalLimit 1
   +- !Project [a#227L, b#228L, alias#226L]
      +- LocalLimit 1
         +- Relation default.t1[a#227L,b#228L] parquet
```
After this pr:
```
== Optimized Logical Plan ==
GlobalLimit 1
+- LocalLimit 1
   +- Project [a#227L, b#228L, a#227L AS alias#226L]
      +- LocalLimit 1
         +- Relation default.t1[a#227L,b#228L] parquet
```

### Why are the changes needed?

Fix bug.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #35286 from wangyum/SPARK-36183-2.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/catalyst/optimizer/Optimizer.scala    | 4 ++--
 .../spark/sql/catalyst/optimizer/LimitPushdownSuite.scala  | 7 +++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 357d11c39f4e0..b72d85be594d3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -721,9 +721,9 @@ object LimitPushDown extends Rule[LogicalPlan] {
       LocalLimit(exp, project.copy(child = pushLocalLimitThroughJoin(exp, join)))
     // Push down limit 1 through Aggregate and turn Aggregate into Project if it is group only.
     case Limit(le @ IntegerLiteral(1), a: Aggregate) if a.groupOnly =>
-      Limit(le, Project(a.output, LocalLimit(le, a.child)))
+      Limit(le, Project(a.aggregateExpressions, LocalLimit(le, a.child)))
     case Limit(le @ IntegerLiteral(1), p @ Project(_, a: Aggregate)) if a.groupOnly =>
-      Limit(le, p.copy(child = Project(a.output, LocalLimit(le, a.child))))
+      Limit(le, p.copy(child = Project(a.aggregateExpressions, LocalLimit(le, a.child))))
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
index ee7f872514985..4cfc90a7d32fd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
@@ -254,6 +254,13 @@ class LimitPushdownSuite extends PlanTest {
       Optimize.execute(x.union(y).groupBy("x.a".attr)("x.a".attr).limit(1).analyze),
       LocalLimit(1, LocalLimit(1, x).union(LocalLimit(1, y))).select("x.a".attr).limit(1).analyze)
 
+    comparePlans(
+      Optimize.execute(
+        x.groupBy("x.a".attr)("x.a".attr)
+          .select("x.a".attr.as("a1"), "x.a".attr.as("a2")).limit(1).analyze),
+      LocalLimit(1, x).select("x.a".attr)
+        .select("x.a".attr.as("a1"), "x.a".attr.as("a2")).limit(1).analyze)
+
     // No push down
     comparePlans(
       Optimize.execute(x.groupBy("x.a".attr)("x.a".attr).limit(2).analyze),

From 32d14b5622e8930fe45a1a8d6c71aa1cc0415cfd Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Mon, 24 Jan 2022 18:15:27 -0800
Subject: [PATCH 092/513] [SPARK-37998][K8S][TESTS] Use
 `rbac.authorization.k8s.io/v1` instead of `v1beta1`

### What changes were proposed in this pull request?
Before this patch:
```bash
$ k apply -f  resource-managers/kubernetes/integration-tests/dev/spark-rbac.yaml
namespace/spark created
serviceaccount/spark-sa created
unable to recognize "resource-managers/kubernetes/integration-tests/dev/spark-rbac.yaml": no matches for kind "ClusterRole" in version "rbac.authorization.k8s.io/v1beta1"
unable to recognize "resource-managers/kubernetes/integration-tests/dev/spark-rbac.yaml": no matches for kind "ClusterRoleBinding" in version "rbac.authorization.k8s.io/v1beta1"
```

This patch bumps rbac to v1 to fix api no matches error in latest minikube setup k8s.

### Why are the changes needed?

Current spark-rbac.yaml would be failed to create rbac when setup k8s using minikube latest version.

As note from kubernetes:
- The rbac.authorization.k8s.io/v1beta1 API version of ClusterRole, ClusterRoleBinding, Role, and RoleBinding is no longer served as of v1.22.
- Migrate manifests and API clients to use the rbac.authorization.k8s.io/v1 API version, available since v1.8.

We'd better using rbac `v1` in here to aovid apply failed on kuberentes v1.22+.

[1] https://kubernetes.io/docs/reference/using-api/deprecation-guide/#rbac-resources-v122

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
```bash
$ k apply -f spark-rbac.yaml
namespace/spark unchanged
serviceaccount/spark-sa unchanged
clusterrole.rbac.authorization.k8s.io/spark-role created
clusterrolebinding.rbac.authorization.k8s.io/spark-role-binding created
```

Closes #35300 from Yikun/SPARK-37998.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../kubernetes/integration-tests/dev/spark-rbac.yaml        | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/resource-managers/kubernetes/integration-tests/dev/spark-rbac.yaml b/resource-managers/kubernetes/integration-tests/dev/spark-rbac.yaml
index a4c242f2f2645..f6b8b10c87b15 100644
--- a/resource-managers/kubernetes/integration-tests/dev/spark-rbac.yaml
+++ b/resource-managers/kubernetes/integration-tests/dev/spark-rbac.yaml
@@ -26,7 +26,7 @@ metadata:
   name: spark-sa
   namespace: spark
 ---
-apiVersion: rbac.authorization.k8s.io/v1beta1
+apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
 metadata:
   name: spark-role
@@ -38,7 +38,7 @@ rules:
   verbs:
   - "*"
 ---
-apiVersion: rbac.authorization.k8s.io/v1beta1
+apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
 metadata:
   name: spark-role-binding
@@ -49,4 +49,4 @@ subjects:
 roleRef:
   kind: ClusterRole
   name: spark-role
-  apiGroup: rbac.authorization.k8s.io
\ No newline at end of file
+  apiGroup: rbac.authorization.k8s.io

From 37629c18c56013427c5067c136c76a60ff8aeeb8 Mon Sep 17 00:00:00 2001
From: Cheng Su <chengsu@fb.com>
Date: Tue, 25 Jan 2022 10:51:57 +0800
Subject: [PATCH 093/513] [SPARK-38006][SQL] Clean up duplicated planner logic
 for window operators

### What changes were proposed in this pull request?

Both `WindowExec.scala` and `WindowInPandasExec.scala` have some duplicated logic with regarded to query planning (`output`, `requiredChildDistribution/Ordering`, `outputPartitioning/Ordering`). We can move these logic into their common parent `WindowExecBase` to reduce duplication.

### Why are the changes needed?

Clean up existing code for better readability.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing test as this is just refactoring.

Closes #35305 from c21/window-fix.

Authored-by: Cheng Su <chengsu@fb.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/python/WindowInPandasExec.scala | 22 ----------------
 .../sql/execution/window/WindowExec.scala     | 20 ---------------
 .../sql/execution/window/WindowExecBase.scala | 25 +++++++++++++++++++
 3 files changed, 25 insertions(+), 42 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
index 07c0aab1b6b74..87102ccac34a8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
@@ -27,7 +27,6 @@ import org.apache.spark.api.python.{ChainedPythonFunctions, PythonEvalType}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.{ExternalAppendOnlyUnsafeRowArray, SparkPlan}
 import org.apache.spark.sql.execution.window._
@@ -87,27 +86,6 @@ case class WindowInPandasExec(
     child: SparkPlan)
   extends WindowExecBase {
 
-  override def output: Seq[Attribute] =
-    child.output ++ windowExpression.map(_.toAttribute)
-
-  override def requiredChildDistribution: Seq[Distribution] = {
-    if (partitionSpec.isEmpty) {
-      // Only show warning when the number of bytes is larger than 100 MiB?
-      logWarning("No Partition Defined for Window operation! Moving all data to a single "
-        + "partition, this can cause serious performance degradation.")
-      AllTuples :: Nil
-    } else {
-      ClusteredDistribution(partitionSpec) :: Nil
-    }
-  }
-
-  override def requiredChildOrdering: Seq[Seq[SortOrder]] =
-    Seq(partitionSpec.map(SortOrder(_, Ascending)) ++ orderSpec)
-
-  override def outputOrdering: Seq[SortOrder] = child.outputOrdering
-
-  override def outputPartitioning: Partitioning = child.outputPartitioning
-
   /**
    * Helper functions and data structures for window bounds
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
index 374659e03a3fd..33c37e871e385 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.execution.window
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.{ExternalAppendOnlyUnsafeRowArray, SparkPlan}
 
 /**
@@ -91,25 +90,6 @@ case class WindowExec(
     child: SparkPlan)
   extends WindowExecBase {
 
-  override def output: Seq[Attribute] =
-    child.output ++ windowExpression.map(_.toAttribute)
-
-  override def requiredChildDistribution: Seq[Distribution] = {
-    if (partitionSpec.isEmpty) {
-      // Only show warning when the number of bytes is larger than 100 MiB?
-      logWarning("No Partition Defined for Window operation! Moving all data to a single "
-        + "partition, this can cause serious performance degradation.")
-      AllTuples :: Nil
-    } else ClusteredDistribution(partitionSpec) :: Nil
-  }
-
-  override def requiredChildOrdering: Seq[Seq[SortOrder]] =
-    Seq(partitionSpec.map(SortOrder(_, Ascending)) ++ orderSpec)
-
-  override def outputOrdering: Seq[SortOrder] = child.outputOrdering
-
-  override def outputPartitioning: Partitioning = child.outputPartitioning
-
   protected override def doExecute(): RDD[InternalRow] = {
     // Unwrap the window expressions and window frame factories from the map.
     val expressions = windowFrameExpressionFactoryPairs.flatMap(_._1)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
index f3b3b3494f2cc..5f1758d12fd5d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
@@ -23,14 +23,39 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
+import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning}
 import org.apache.spark.sql.execution.UnaryExecNode
 import org.apache.spark.sql.types._
 
+/**
+ * Holds common logic for window operators
+ */
 trait WindowExecBase extends UnaryExecNode {
   def windowExpression: Seq[NamedExpression]
   def partitionSpec: Seq[Expression]
   def orderSpec: Seq[SortOrder]
 
+  override def output: Seq[Attribute] =
+    child.output ++ windowExpression.map(_.toAttribute)
+
+  override def requiredChildDistribution: Seq[Distribution] = {
+    if (partitionSpec.isEmpty) {
+      // Only show warning when the number of bytes is larger than 100 MiB?
+      logWarning("No Partition Defined for Window operation! Moving all data to a single "
+        + "partition, this can cause serious performance degradation.")
+      AllTuples :: Nil
+    } else {
+      ClusteredDistribution(partitionSpec) :: Nil
+    }
+  }
+
+  override def requiredChildOrdering: Seq[Seq[SortOrder]] =
+    Seq(partitionSpec.map(SortOrder(_, Ascending)) ++ orderSpec)
+
+  override def outputOrdering: Seq[SortOrder] = child.outputOrdering
+
+  override def outputPartitioning: Partitioning = child.outputPartitioning
+
   /**
    * Create the resulting projection.
    *

From aedc273107fd3f8852c380192f463240423c2c25 Mon Sep 17 00:00:00 2001
From: allisonwang-db <allison.wang@databricks.com>
Date: Tue, 25 Jan 2022 11:10:51 +0800
Subject: [PATCH 094/513] [SPARK-37731][SQL][FOLLOWUP] Update generator
 function lookup and migration guide

### What changes were proposed in this pull request?
This PR is a follow-up PR for SPARK-37731. It updates the Analyzer logic when resolving generator functions to match the behavior before SPARK-37731, and the migration docs to include another behavior change for dropping a persistent function that has the same name as one of the built-in functions.

### Why are the changes needed?
Follow up for SPARK-37731.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing tests

Closes #35275 from allisonwang-db/spark-37731-follow-up.

Authored-by: allisonwang-db <allison.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide.md                            |  2 ++
 .../apache/spark/sql/catalyst/analysis/Analyzer.scala  | 10 ++++++----
 .../spark/sql/catalyst/catalog/SessionCatalog.scala    |  5 ++---
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 01c828a0f69bf..63fc51a5132db 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -58,6 +58,8 @@ license: |
 
   - Since Spark 3.3, the table property `external` becomes reserved. Certain commands will fail if you specify the `external` property, such as `CREATE TABLE ... TBLPROPERTIES` and `ALTER TABLE ... SET TBLPROPERTIES`. In Spark 3.2 and earlier, the table property `external` is silently ignored. You can set `spark.sql.legacy.notReserveProperties` to `true` to restore the old behavior.
 
+  - Since Spark 3.3, DROP FUNCTION fails if the function name matches one of the built-in functions' name and is not qualified. In Spark 3.2 or earlier, DROP FUNCTION can still drop a persistent function even if the name is not qualified and is the same as a built-in function's name.
+
 ## Upgrading from Spark SQL 3.1 to 3.2
 
   - Since Spark 3.2, ADD FILE/JAR/ARCHIVE commands require each path to be enclosed by `"` or `'` if the path contains whitespaces.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 103a445097554..d31f90aa2acf0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2078,10 +2078,12 @@ class Analyzer(override val catalogManager: CatalogManager)
           case u if !u.childrenResolved => u // Skip until children are resolved.
 
           case u @ UnresolvedGenerator(name, arguments) => withPosition(u) {
-            resolveBuiltinOrTempFunction(name.asMultipart, arguments, None).getOrElse {
-              // For generator function, the parser only accepts v1 function name and creates
-              // `FunctionIdentifier`.
-              v1SessionCatalog.resolvePersistentFunction(name, arguments)
+            // For generator function, the parser only accepts v1 function name and creates
+            // `FunctionIdentifier`.
+            v1SessionCatalog.lookupFunction(name, arguments) match {
+              case generator: Generator => generator
+              case other => throw QueryCompilationErrors.generatorNotExpectedError(
+                name, other.getClass.getCanonicalName)
             }
           }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index c712d2ccccade..ad007f1d5cfd4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1715,9 +1715,8 @@ class SessionCatalog(
     }
   }
 
-  // Test only. The actual function lookup logic looks up temp/built-in function first, then
-  // persistent function from either v1 or v2 catalog. This method only look up v1 catalog and is
-  // no longer valid.
+  // The actual function lookup logic looks up temp/built-in function first, then persistent
+  // function from either v1 or v2 catalog. This method only look up v1 catalog.
   def lookupFunction(name: FunctionIdentifier, children: Seq[Expression]): Expression = {
     if (name.database.isEmpty) {
       resolveBuiltinOrTempFunction(name.funcName, children)

From 4f0689c50693c2d33c154f20c6fbdd282cbf483b Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Tue, 25 Jan 2022 12:23:34 +0800
Subject: [PATCH 095/513] [SPARK-36424][SQL][FOLLOWUP] The strategy of
 Eliminate Limits batch should be fixedPoint

### What changes were proposed in this pull request?

This pr change the strategy of `Eliminate Limits` batch from Once to fixedPoint in AQEOptimizer.

### Why are the changes needed?

Fix bug. Otherwise check batch idempotence will fail:
```scala
spark.range(10).write.saveAsTable("t1")
spark.table("t1").distinct().limit(10086).selectExpr("cast(id as string)").limit(20).collect()
```

```
Once strategy's idempotence is broken for batch Eliminate Limits
!GlobalLimit 20                                                                                     Project [cast(id#3L as string) AS id#6]
!+- LocalLimit 20                                                                                   +- LogicalQueryStage Aggregate [id#3L], [id#3L], HashAggregate(keys=[id#3L], functions=[])
!   +- Project [cast(id#3L as string) AS id#6]
!      +- LogicalQueryStage Aggregate [id#3L], [id#3L], HashAggregate(keys=[id#3L], functions=[])

```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #35301 from wangyum/SPARK-36424.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/adaptive/AQEOptimizer.scala      | 2 +-
 .../sql/execution/adaptive/AdaptiveQueryExecSuite.scala  | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
index ea1ab8e5755a2..d81827e4701e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
@@ -40,7 +40,7 @@ class AQEOptimizer(conf: SQLConf) extends RuleExecutor[LogicalPlan] {
       ConvertToLocalRelation,
       UpdateAttributeNullability),
     Batch("Dynamic Join Selection", Once, DynamicJoinSelection),
-    Batch("Eliminate Limits", Once, EliminateLimits)
+    Batch("Eliminate Limits", fixedPoint, EliminateLimits)
   )
 
   final override protected def batches: Seq[Batch] = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index a29989cc06c7c..de41b88ebde9c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -2243,6 +2243,15 @@ class AdaptiveQueryExecSuite
           """.stripMargin)
         assert(findTopLevelLimit(origin2).size == 1)
         assert(findTopLevelLimit(adaptive2).isEmpty)
+
+        // The strategy of Eliminate Limits batch should be fixedPoint
+        val (origin3, adaptive3) = runAdaptiveAndVerifyResult(
+          """
+            |SELECT * FROM (SELECT c1 + c2 FROM (SELECT DISTINCT * FROM v LIMIT 10086)) LIMIT 20
+          """.stripMargin
+        )
+        assert(findTopLevelLimit(origin3).size == 1)
+        assert(findTopLevelLimit(adaptive3).isEmpty)
       }
     }
   }

From 18f9e7efac5100744f255b6c8ae267579cd8d9ce Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Mon, 24 Jan 2022 23:49:10 -0800
Subject: [PATCH 096/513] [SPARK-37258][K8S][BUILD] Upgrade kubernetes-client
 to 5.12.0

### What changes were proposed in this pull request?
This patch aims to upgrade `kubernetes-client` from 5.10.2 to 5.12.0

Hightlight changes:
- v5.12.0: Update Fabric8 Kubernetes Model to v1.23.0, bump jackson-datatype-jsr310 to [2.13.1](https://github.com/fabric8io/kubernetes-client/commit/43562675d36a7240825453370f5d540370e84cbb).
- v5.11.0: Introduce the Volcano extension, it would be useful for users who want to use Volcano as customized scheduler in Spark on K8S.
- v5.11.0: Breaking change: an abstraction layer added over okHttp

There are also some changes to make sure it is compatiable to create http client with custom dispatcher according to https://github.com/fabric8io/kubernetes-client/issues/3663#issuecomment-997214403 suggestion.

### Why are the changes needed?

This will bring several bug fixes and improvements (such as volcano support, Fabric8 Kuberentes model 1.23 support), see more in:
- https://github.com/fabric8io/kubernetes-client/releases/tag/v5.11.0
- https://github.com/fabric8io/kubernetes-client/releases/tag/v5.11.1
- https://github.com/fabric8io/kubernetes-client/releases/tag/v5.11.2
- https://github.com/fabric8io/kubernetes-client/releases/tag/v5.12.0

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
- Pass the CIs.
- Running a spark-pi job to validate manually.

Closes #34939 from Yikun/SPARK-37258.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2-hive-2.3         | 44 +++++++++----------
 dev/deps/spark-deps-hadoop-3-hive-2.3         | 44 +++++++++----------
 pom.xml                                       |  2 +-
 .../k8s/SparkKubernetesClientFactory.scala    | 16 ++++---
 .../deploy/k8s/integrationtest/Utils.scala    |  4 +-
 5 files changed, 57 insertions(+), 53 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index 164eab08cdd1b..5efdca9809924 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -118,7 +118,7 @@ jackson-core/2.13.1//jackson-core-2.13.1.jar
 jackson-databind/2.13.1//jackson-databind-2.13.1.jar
 jackson-dataformat-cbor/2.13.1//jackson-dataformat-cbor-2.13.1.jar
 jackson-dataformat-yaml/2.13.1//jackson-dataformat-yaml-2.13.1.jar
-jackson-datatype-jsr310/2.13.0//jackson-datatype-jsr310-2.13.0.jar
+jackson-datatype-jsr310/2.13.1//jackson-datatype-jsr310-2.13.1.jar
 jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar
 jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
 jackson-module-scala_2.12/2.13.1//jackson-module-scala_2.12-2.13.1.jar
@@ -162,27 +162,27 @@ jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/1.7.32//jul-to-slf4j-1.7.32.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/5.10.2//kubernetes-client-5.10.2.jar
-kubernetes-model-admissionregistration/5.10.2//kubernetes-model-admissionregistration-5.10.2.jar
-kubernetes-model-apiextensions/5.10.2//kubernetes-model-apiextensions-5.10.2.jar
-kubernetes-model-apps/5.10.2//kubernetes-model-apps-5.10.2.jar
-kubernetes-model-autoscaling/5.10.2//kubernetes-model-autoscaling-5.10.2.jar
-kubernetes-model-batch/5.10.2//kubernetes-model-batch-5.10.2.jar
-kubernetes-model-certificates/5.10.2//kubernetes-model-certificates-5.10.2.jar
-kubernetes-model-common/5.10.2//kubernetes-model-common-5.10.2.jar
-kubernetes-model-coordination/5.10.2//kubernetes-model-coordination-5.10.2.jar
-kubernetes-model-core/5.10.2//kubernetes-model-core-5.10.2.jar
-kubernetes-model-discovery/5.10.2//kubernetes-model-discovery-5.10.2.jar
-kubernetes-model-events/5.10.2//kubernetes-model-events-5.10.2.jar
-kubernetes-model-extensions/5.10.2//kubernetes-model-extensions-5.10.2.jar
-kubernetes-model-flowcontrol/5.10.2//kubernetes-model-flowcontrol-5.10.2.jar
-kubernetes-model-metrics/5.10.2//kubernetes-model-metrics-5.10.2.jar
-kubernetes-model-networking/5.10.2//kubernetes-model-networking-5.10.2.jar
-kubernetes-model-node/5.10.2//kubernetes-model-node-5.10.2.jar
-kubernetes-model-policy/5.10.2//kubernetes-model-policy-5.10.2.jar
-kubernetes-model-rbac/5.10.2//kubernetes-model-rbac-5.10.2.jar
-kubernetes-model-scheduling/5.10.2//kubernetes-model-scheduling-5.10.2.jar
-kubernetes-model-storageclass/5.10.2//kubernetes-model-storageclass-5.10.2.jar
+kubernetes-client/5.12.0//kubernetes-client-5.12.0.jar
+kubernetes-model-admissionregistration/5.12.0//kubernetes-model-admissionregistration-5.12.0.jar
+kubernetes-model-apiextensions/5.12.0//kubernetes-model-apiextensions-5.12.0.jar
+kubernetes-model-apps/5.12.0//kubernetes-model-apps-5.12.0.jar
+kubernetes-model-autoscaling/5.12.0//kubernetes-model-autoscaling-5.12.0.jar
+kubernetes-model-batch/5.12.0//kubernetes-model-batch-5.12.0.jar
+kubernetes-model-certificates/5.12.0//kubernetes-model-certificates-5.12.0.jar
+kubernetes-model-common/5.12.0//kubernetes-model-common-5.12.0.jar
+kubernetes-model-coordination/5.12.0//kubernetes-model-coordination-5.12.0.jar
+kubernetes-model-core/5.12.0//kubernetes-model-core-5.12.0.jar
+kubernetes-model-discovery/5.12.0//kubernetes-model-discovery-5.12.0.jar
+kubernetes-model-events/5.12.0//kubernetes-model-events-5.12.0.jar
+kubernetes-model-extensions/5.12.0//kubernetes-model-extensions-5.12.0.jar
+kubernetes-model-flowcontrol/5.12.0//kubernetes-model-flowcontrol-5.12.0.jar
+kubernetes-model-metrics/5.12.0//kubernetes-model-metrics-5.12.0.jar
+kubernetes-model-networking/5.12.0//kubernetes-model-networking-5.12.0.jar
+kubernetes-model-node/5.12.0//kubernetes-model-node-5.12.0.jar
+kubernetes-model-policy/5.12.0//kubernetes-model-policy-5.12.0.jar
+kubernetes-model-rbac/5.12.0//kubernetes-model-rbac-5.12.0.jar
+kubernetes-model-scheduling/5.12.0//kubernetes-model-scheduling-5.12.0.jar
+kubernetes-model-storageclass/5.12.0//kubernetes-model-storageclass-5.12.0.jar
 lapack/2.2.1//lapack-2.2.1.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 3a38d075c9307..a79a71b846dd1 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -109,7 +109,7 @@ jackson-core/2.13.1//jackson-core-2.13.1.jar
 jackson-databind/2.13.1//jackson-databind-2.13.1.jar
 jackson-dataformat-cbor/2.13.1//jackson-dataformat-cbor-2.13.1.jar
 jackson-dataformat-yaml/2.13.1//jackson-dataformat-yaml-2.13.1.jar
-jackson-datatype-jsr310/2.13.0//jackson-datatype-jsr310-2.13.0.jar
+jackson-datatype-jsr310/2.13.1//jackson-datatype-jsr310-2.13.1.jar
 jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
 jackson-module-scala_2.12/2.13.1//jackson-module-scala_2.12-2.13.1.jar
 jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar
@@ -148,27 +148,27 @@ jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/1.7.32//jul-to-slf4j-1.7.32.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/5.10.2//kubernetes-client-5.10.2.jar
-kubernetes-model-admissionregistration/5.10.2//kubernetes-model-admissionregistration-5.10.2.jar
-kubernetes-model-apiextensions/5.10.2//kubernetes-model-apiextensions-5.10.2.jar
-kubernetes-model-apps/5.10.2//kubernetes-model-apps-5.10.2.jar
-kubernetes-model-autoscaling/5.10.2//kubernetes-model-autoscaling-5.10.2.jar
-kubernetes-model-batch/5.10.2//kubernetes-model-batch-5.10.2.jar
-kubernetes-model-certificates/5.10.2//kubernetes-model-certificates-5.10.2.jar
-kubernetes-model-common/5.10.2//kubernetes-model-common-5.10.2.jar
-kubernetes-model-coordination/5.10.2//kubernetes-model-coordination-5.10.2.jar
-kubernetes-model-core/5.10.2//kubernetes-model-core-5.10.2.jar
-kubernetes-model-discovery/5.10.2//kubernetes-model-discovery-5.10.2.jar
-kubernetes-model-events/5.10.2//kubernetes-model-events-5.10.2.jar
-kubernetes-model-extensions/5.10.2//kubernetes-model-extensions-5.10.2.jar
-kubernetes-model-flowcontrol/5.10.2//kubernetes-model-flowcontrol-5.10.2.jar
-kubernetes-model-metrics/5.10.2//kubernetes-model-metrics-5.10.2.jar
-kubernetes-model-networking/5.10.2//kubernetes-model-networking-5.10.2.jar
-kubernetes-model-node/5.10.2//kubernetes-model-node-5.10.2.jar
-kubernetes-model-policy/5.10.2//kubernetes-model-policy-5.10.2.jar
-kubernetes-model-rbac/5.10.2//kubernetes-model-rbac-5.10.2.jar
-kubernetes-model-scheduling/5.10.2//kubernetes-model-scheduling-5.10.2.jar
-kubernetes-model-storageclass/5.10.2//kubernetes-model-storageclass-5.10.2.jar
+kubernetes-client/5.12.0//kubernetes-client-5.12.0.jar
+kubernetes-model-admissionregistration/5.12.0//kubernetes-model-admissionregistration-5.12.0.jar
+kubernetes-model-apiextensions/5.12.0//kubernetes-model-apiextensions-5.12.0.jar
+kubernetes-model-apps/5.12.0//kubernetes-model-apps-5.12.0.jar
+kubernetes-model-autoscaling/5.12.0//kubernetes-model-autoscaling-5.12.0.jar
+kubernetes-model-batch/5.12.0//kubernetes-model-batch-5.12.0.jar
+kubernetes-model-certificates/5.12.0//kubernetes-model-certificates-5.12.0.jar
+kubernetes-model-common/5.12.0//kubernetes-model-common-5.12.0.jar
+kubernetes-model-coordination/5.12.0//kubernetes-model-coordination-5.12.0.jar
+kubernetes-model-core/5.12.0//kubernetes-model-core-5.12.0.jar
+kubernetes-model-discovery/5.12.0//kubernetes-model-discovery-5.12.0.jar
+kubernetes-model-events/5.12.0//kubernetes-model-events-5.12.0.jar
+kubernetes-model-extensions/5.12.0//kubernetes-model-extensions-5.12.0.jar
+kubernetes-model-flowcontrol/5.12.0//kubernetes-model-flowcontrol-5.12.0.jar
+kubernetes-model-metrics/5.12.0//kubernetes-model-metrics-5.12.0.jar
+kubernetes-model-networking/5.12.0//kubernetes-model-networking-5.12.0.jar
+kubernetes-model-node/5.12.0//kubernetes-model-node-5.12.0.jar
+kubernetes-model-policy/5.12.0//kubernetes-model-policy-5.12.0.jar
+kubernetes-model-rbac/5.12.0//kubernetes-model-rbac-5.12.0.jar
+kubernetes-model-scheduling/5.12.0//kubernetes-model-scheduling-5.12.0.jar
+kubernetes-model-storageclass/5.12.0//kubernetes-model-storageclass-5.12.0.jar
 lapack/2.2.1//lapack-2.2.1.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
diff --git a/pom.xml b/pom.xml
index 9e18ba63bb7f5..5bae4d280038d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -204,7 +204,7 @@
     <arrow.version>6.0.1</arrow.version>
     <!-- org.fusesource.leveldbjni will be used except on arm64 platform. -->
     <leveldbjni.group>org.fusesource.leveldbjni</leveldbjni.group>
-    <kubernetes-client.version>5.10.2</kubernetes-client.version>
+    <kubernetes-client.version>5.12.0</kubernetes-client.version>
 
     <test.java.home>${java.home}</test.java.home>
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
index 4131605e62b1f..54f557c750a4b 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
@@ -24,9 +24,10 @@ import com.google.common.io.Files
 import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient, KubernetesClient}
 import io.fabric8.kubernetes.client.Config.KUBERNETES_REQUEST_RETRY_BACKOFFLIMIT_SYSTEM_PROPERTY
 import io.fabric8.kubernetes.client.Config.autoConfigure
-import io.fabric8.kubernetes.client.utils.HttpClientUtils
+import io.fabric8.kubernetes.client.okhttp.OkHttpClientFactory
 import io.fabric8.kubernetes.client.utils.Utils.getSystemPropertyOrEnvVar
 import okhttp3.Dispatcher
+import okhttp3.OkHttpClient
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.k8s.Config._
@@ -68,6 +69,8 @@ private[spark] object SparkKubernetesClientFactory extends Logging {
       .getOption(s"$kubernetesAuthConfPrefix.$CLIENT_KEY_FILE_CONF_SUFFIX")
     val clientCertFile = sparkConf
       .getOption(s"$kubernetesAuthConfPrefix.$CLIENT_CERT_FILE_CONF_SUFFIX")
+    // TODO(SPARK-37687): clean up direct usage of OkHttpClient, see also:
+    // https://github.com/fabric8io/kubernetes-client/issues/3547
     val dispatcher = new Dispatcher(
       ThreadUtils.newDaemonCachedThreadPool("kubernetes-dispatcher"))
 
@@ -105,13 +108,14 @@ private[spark] object SparkKubernetesClientFactory extends Logging {
       }.withOption(namespace) {
         (ns, configBuilder) => configBuilder.withNamespace(ns)
       }.build()
-    val baseHttpClient = HttpClientUtils.createHttpClient(config)
-    val httpClientWithCustomDispatcher = baseHttpClient.newBuilder()
-      .dispatcher(dispatcher)
-      .build()
+    val factoryWithCustomDispatcher = new OkHttpClientFactory() {
+      override protected def additionalConfig(builder: OkHttpClient.Builder): Unit = {
+        builder.dispatcher(dispatcher)
+      }
+    }
     logDebug("Kubernetes client config: " +
       new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(config))
-    new DefaultKubernetesClient(httpClientWithCustomDispatcher, config)
+    new DefaultKubernetesClient(factoryWithCustomDispatcher.createHttpClient(config), config)
   }
 
   private implicit class OptionConfigurableConfigBuilder(val configBuilder: ConfigBuilder)
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
index cc258533c2c8d..e0fd92617ba6d 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
@@ -24,7 +24,7 @@ import java.util.zip.{ZipEntry, ZipOutputStream}
 import scala.collection.JavaConverters._
 
 import io.fabric8.kubernetes.client.dsl.ExecListener
-import okhttp3.Response
+import io.fabric8.kubernetes.client.dsl.ExecListener.Response
 import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveOutputStream}
 import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream
 import org.apache.commons.compress.utils.IOUtils
@@ -62,7 +62,7 @@ object Utils extends Logging {
       val openLatch: CountDownLatch = new CountDownLatch(1)
       val closeLatch: CountDownLatch = new CountDownLatch(1)
 
-      override def onOpen(response: Response): Unit = {
+      override def onOpen(): Unit = {
         openLatch.countDown()
       }
 

From ac2b0df68ae053be9f877d2ec82e50276d3ba7bc Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Tue, 25 Jan 2022 18:41:52 +0800
Subject: [PATCH 097/513] [SPARK-37915][SQL] Combine unions if there is a
 project between them

### What changes were proposed in this pull request?

This pr makes `CombineUnions` combine unions if there is a project between them. For example:
```scala
spark.range(1).selectExpr("CAST(id AS decimal(18, 1)) AS id").write.saveAsTable("t1")
spark.range(2).selectExpr("CAST(id AS decimal(18, 2)) AS id").write.saveAsTable("t2")
spark.range(3).selectExpr("CAST(id AS decimal(18, 3)) AS id").write.saveAsTable("t3")
spark.range(4).selectExpr("CAST(id AS decimal(18, 4)) AS id").write.saveAsTable("t4")
spark.range(5).selectExpr("CAST(id AS decimal(18, 5)) AS id").write.saveAsTable("t5")

spark.sql("SELECT id FROM t1 UNION SELECT id FROM t2 UNION SELECT id FROM t3 UNION SELECT id FROM t4 UNION SELECT id FROM t5").explain(true)
```

Before this pr:
```
== Optimized Logical Plan ==
Aggregate [id#36], [id#36]
+- Union false, false
   :- Aggregate [id#34], [cast(id#34 as decimal(22,5)) AS id#36]
   :  +- Union false, false
   :     :- Aggregate [id#32], [cast(id#32 as decimal(21,4)) AS id#34]
   :     :  +- Union false, false
   :     :     :- Aggregate [id#30], [cast(id#30 as decimal(20,3)) AS id#32]
   :     :     :  +- Union false, false
   :     :     :     :- Project [cast(id#25 as decimal(19,2)) AS id#30]
   :     :     :     :  +- Relation default.t1[id#25] parquet
   :     :     :     +- Project [cast(id#26 as decimal(19,2)) AS id#31]
   :     :     :        +- Relation default.t2[id#26] parquet
   :     :     +- Project [cast(id#27 as decimal(20,3)) AS id#33]
   :     :        +- Relation default.t3[id#27] parquet
   :     +- Project [cast(id#28 as decimal(21,4)) AS id#35]
   :        +- Relation default.t4[id#28] parquet
   +- Project [cast(id#29 as decimal(22,5)) AS id#37]
      +- Relation default.t5[id#29] parquet
```

After this pr:
```
== Optimized Logical Plan ==
Aggregate [id#36], [id#36]
+- Union false, false
   :- Project [cast(id#25 as decimal(22,5)) AS id#36]
   :  +- Relation default.t1[id#25] parquet
   :- Project [cast(id#26 as decimal(22,5)) AS id#46]
   :  +- Relation default.t2[id#26] parquet
   :- Project [cast(id#27 as decimal(22,5)) AS id#45]
   :  +- Relation default.t3[id#27] parquet
   :- Project [cast(id#28 as decimal(22,5)) AS id#44]
   :  +- Relation default.t4[id#28] parquet
   +- Project [cast(id#29 as decimal(22,5)) AS id#37]
      +- Relation default.t5[id#29] parquet
```

### Why are the changes needed?

Improve query performance by reduce shuffles.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #35214 from wangyum/SPARK-37915.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    | 47 ++++++++++----
 .../optimizer/SetOperationSuite.scala         | 64 ++++++++++++++++++-
 2 files changed, 97 insertions(+), 14 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index b72d85be594d3..8fba271524e85 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -764,22 +764,22 @@ object PushProjectionThroughUnion extends Rule[LogicalPlan] with PredicateHelper
     result.asInstanceOf[A]
   }
 
+  def pushProjectionThroughUnion(projectList: Seq[NamedExpression], u: Union): Seq[LogicalPlan] = {
+    val newFirstChild = Project(projectList, u.children.head)
+    val newOtherChildren = u.children.tail.map { child =>
+      val rewrites = buildRewrites(u.children.head, child)
+      Project(projectList.map(pushToRight(_, rewrites)), child)
+    }
+    newFirstChild +: newOtherChildren
+  }
+
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
     _.containsAllPatterns(UNION, PROJECT)) {
 
     // Push down deterministic projection through UNION ALL
-    case p @ Project(projectList, u: Union) =>
-      assert(u.children.nonEmpty)
-      if (projectList.forall(_.deterministic)) {
-        val newFirstChild = Project(projectList, u.children.head)
-        val newOtherChildren = u.children.tail.map { child =>
-          val rewrites = buildRewrites(u.children.head, child)
-          Project(projectList.map(pushToRight(_, rewrites)), child)
-        }
-        u.copy(children = newFirstChild +: newOtherChildren)
-      } else {
-        p
-      }
+    case Project(projectList, u: Union)
+        if projectList.forall(_.deterministic) && u.children.nonEmpty =>
+      u.copy(children = pushProjectionThroughUnion(projectList, u))
   }
 }
 
@@ -1006,7 +1006,7 @@ object CollapseProject extends Rule[LogicalPlan] with AliasHelper {
     }.isEmpty)
   }
 
-  private def buildCleanedProjectList(
+  def buildCleanedProjectList(
       upper: Seq[NamedExpression],
       lower: Seq[NamedExpression]): Seq[NamedExpression] = {
     val aliases = getAliasMap(lower)
@@ -1300,6 +1300,9 @@ object InferFiltersFromConstraints extends Rule[LogicalPlan]
  * Combines all adjacent [[Union]] operators into a single [[Union]].
  */
 object CombineUnions extends Rule[LogicalPlan] {
+  import CollapseProject.{buildCleanedProjectList, canCollapseExpressions}
+  import PushProjectionThroughUnion.pushProjectionThroughUnion
+
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformDownWithPruning(
     _.containsAnyPattern(UNION, DISTINCT_LIKE), ruleId) {
     case u: Union => flattenUnion(u, false)
@@ -1321,6 +1324,10 @@ object CombineUnions extends Rule[LogicalPlan] {
     // rules (by position and by name) could cause incorrect results.
     while (stack.nonEmpty) {
       stack.pop() match {
+        case p1 @ Project(_, p2: Project)
+            if canCollapseExpressions(p1.projectList, p2.projectList, alwaysInline = false) =>
+          val newProjectList = buildCleanedProjectList(p1.projectList, p2.projectList)
+          stack.pushAll(Seq(p2.copy(projectList = newProjectList)))
         case Distinct(Union(children, byName, allowMissingCol))
             if flattenDistinct && byName == topByName && allowMissingCol == topAllowMissingCol =>
           stack.pushAll(children.reverse)
@@ -1332,6 +1339,20 @@ object CombineUnions extends Rule[LogicalPlan] {
         case Union(children, byName, allowMissingCol)
             if byName == topByName && allowMissingCol == topAllowMissingCol =>
           stack.pushAll(children.reverse)
+        // Push down projection through Union and then push pushed plan to Stack if
+        // there is a Project.
+        case Project(projectList, Distinct(u @ Union(children, byName, allowMissingCol)))
+            if projectList.forall(_.deterministic) && children.nonEmpty &&
+              flattenDistinct && byName == topByName && allowMissingCol == topAllowMissingCol =>
+          stack.pushAll(pushProjectionThroughUnion(projectList, u).reverse)
+        case Project(projectList, Deduplicate(keys: Seq[Attribute], u: Union))
+            if projectList.forall(_.deterministic) && flattenDistinct && u.byName == topByName &&
+              u.allowMissingCol == topAllowMissingCol && AttributeSet(keys) == u.outputSet =>
+          stack.pushAll(pushProjectionThroughUnion(projectList, u).reverse)
+        case Project(projectList, u @ Union(children, byName, allowMissingCol))
+            if projectList.forall(_.deterministic) && children.nonEmpty &&
+              byName == topByName && allowMissingCol == topAllowMissingCol =>
+          stack.pushAll(pushProjectionThroughUnion(projectList, u).reverse)
         case child =>
           flattened += child
       }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala
index 3fa7df3c94949..c4113e734c704 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.{And, GreaterThan, GreaterThanO
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.types.BooleanType
+import org.apache.spark.sql.types.{BooleanType, DecimalType}
 
 class SetOperationSuite extends PlanTest {
   object Optimize extends RuleExecutor[LogicalPlan] {
@@ -328,4 +328,66 @@ class SetOperationSuite extends PlanTest {
       Union(testRelation :: testRelation :: testRelation :: testRelation :: Nil, true, false)
     comparePlans(unionOptimized2, unionCorrectAnswer2, false)
   }
+
+  test("SPARK-37915: combine unions if there is a project between them") {
+    val relation1 = LocalRelation('a.decimal(18, 1), 'b.int)
+    val relation2 = LocalRelation('a.decimal(18, 2), 'b.int)
+    val relation3 = LocalRelation('a.decimal(18, 3), 'b.int)
+    val relation4 = LocalRelation('a.decimal(18, 4), 'b.int)
+    val relation5 = LocalRelation('a.decimal(18, 5), 'b.int)
+
+    val optimizedRelation1 = relation1.select('a.cast(DecimalType(19, 2)).cast(DecimalType(20, 3))
+      .cast(DecimalType(21, 4)).cast(DecimalType(22, 5)).as("a"), 'b)
+    val optimizedRelation2 = relation2.select('a.cast(DecimalType(19, 2)).cast(DecimalType(20, 3))
+      .cast(DecimalType(21, 4)).cast(DecimalType(22, 5)).as("a"), 'b)
+    val optimizedRelation3 = relation3.select('a.cast(DecimalType(20, 3))
+      .cast(DecimalType(21, 4)).cast(DecimalType(22, 5)).as("a"), 'b)
+    val optimizedRelation4 = relation4
+      .select('a.cast(DecimalType(21, 4)).cast(DecimalType(22, 5)).as("a"), 'b)
+    val optimizedRelation5 = relation5.select('a.cast(DecimalType(22, 5)).as("a"), 'b)
+
+    // SQL UNION ALL
+    comparePlans(
+      Optimize.execute(relation1.union(relation2)
+        .union(relation3).union(relation4).union(relation5).analyze),
+      Union(Seq(optimizedRelation1, optimizedRelation2, optimizedRelation3,
+        optimizedRelation4, optimizedRelation5)).analyze)
+
+    // SQL UNION
+    comparePlans(
+      Optimize.execute(Distinct(Distinct(Distinct(Distinct(relation1.union(relation2))
+        .union(relation3)).union(relation4)).union(relation5)).analyze),
+      Distinct(Union(Seq(optimizedRelation1, optimizedRelation2, optimizedRelation3,
+        optimizedRelation4, optimizedRelation5))).analyze)
+
+    // Deduplicate
+    comparePlans(
+      Optimize.execute(relation1.union(relation2).deduplicate('a, 'b).union(relation3)
+        .deduplicate('a, 'b).union(relation4).deduplicate('a, 'b).union(relation5)
+        .deduplicate('a, 'b).analyze),
+      Deduplicate(
+        Seq('a, 'b),
+        Union(Seq(optimizedRelation1, optimizedRelation2, optimizedRelation3,
+          optimizedRelation4, optimizedRelation5))).analyze)
+
+    // Other cases
+    comparePlans(
+      Optimize.execute(Distinct(Distinct(Distinct(Distinct(relation1.union(relation2))
+        .union(relation3)).union(relation4)).union(relation5)).select('a % 2).analyze),
+      Distinct(Union(Seq(optimizedRelation1, optimizedRelation2, optimizedRelation3,
+        optimizedRelation4, optimizedRelation5))).select('a % 2).analyze)
+
+    comparePlans(
+      Optimize.execute(Distinct(Distinct(Distinct(Distinct(relation1.union(relation2))
+        .union(relation3)).union(relation4)).union(relation5)).select('a + 'b).analyze),
+      Distinct(Union(Seq(optimizedRelation1, optimizedRelation2, optimizedRelation3,
+        optimizedRelation4, optimizedRelation5))).select('a + 'b).analyze)
+
+    comparePlans(
+      Optimize.execute(Distinct(Distinct(Distinct(Distinct(relation1.union(relation2))
+        .union(relation3)).union(relation4)).union(relation5)).select('a).analyze),
+      Distinct(Union(Seq(optimizedRelation1, optimizedRelation2, optimizedRelation3,
+        optimizedRelation4, optimizedRelation5))).select('a).analyze)
+
+  }
 }

From 7148980dac987518a1d095c52fd9462d5a2cf47b Mon Sep 17 00:00:00 2001
From: Jiaan Geng <beliefer@163.com>
Date: Tue, 25 Jan 2022 18:50:33 +0800
Subject: [PATCH 098/513] [SPARK-37867][SQL] Compile aggregate functions of
 build-in JDBC dialect

### What changes were proposed in this pull request?
DS V2 translate a lot of standard aggregate functions.
Currently, only H2Dialect compile these standard aggregate functions. This PR compile these standard aggregate functions for other build-in JDBC dialect.

### Why are the changes needed?
Make build-in JDBC dialect support complete aggregate push-down.

### Does this PR introduce _any_ user-facing change?
'Yes'.
Users could use complete aggregate push-down with build-in JDBC dialect.

### How was this patch tested?
New tests.

Closes #35166 from beliefer/SPARK-37867.

Authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/jdbc/v2/DB2IntegrationSuite.scala     |  17 +-
 .../v2/DockerJDBCIntegrationV2Suite.scala     |  44 ++++
 .../jdbc/v2/MsSqlServerIntegrationSuite.scala |  16 +-
 .../sql/jdbc/v2/MySQLIntegrationSuite.scala   |  17 +-
 .../sql/jdbc/v2/OracleIntegrationSuite.scala  |  24 ++-
 .../jdbc/v2/PostgresIntegrationSuite.scala    |  19 +-
 .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 198 +++++++++++++++---
 .../apache/spark/sql/jdbc/DB2Dialect.scala    |  13 ++
 .../apache/spark/sql/jdbc/DerbyDialect.scala  |  25 +++
 .../spark/sql/jdbc/MsSqlServerDialect.scala   |  25 +++
 .../apache/spark/sql/jdbc/MySQLDialect.scala  |  25 +++
 .../apache/spark/sql/jdbc/OracleDialect.scala |  37 ++++
 .../spark/sql/jdbc/PostgresDialect.scala      |  37 ++++
 .../spark/sql/jdbc/TeradataDialect.scala      |  37 ++++
 14 files changed, 493 insertions(+), 41 deletions(-)
 create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
index 5ac9a5191b010..2cfb21395d8a9 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
@@ -18,13 +18,14 @@
 package org.apache.spark.sql.jdbc.v2
 
 import java.sql.Connection
+import java.util.Locale
 
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
-import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
+import org.apache.spark.sql.jdbc.DatabaseOnDocker
 import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
@@ -36,8 +37,9 @@ import org.apache.spark.tags.DockerTest
  * }}}
  */
 @DockerTest
-class DB2IntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
+class DB2IntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
   override val catalogName: String = "db2"
+  override val namespaceOpt: Option[String] = Some("DB2INST1")
   override val db = new DatabaseOnDocker {
     override val imageName = sys.env.getOrElse("DB2_DOCKER_IMAGE_NAME", "ibmcom/db2:11.5.6.0a")
     override val env = Map(
@@ -59,8 +61,13 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
   override def sparkConf: SparkConf = super.sparkConf
     .set("spark.sql.catalog.db2", classOf[JDBCTableCatalog].getName)
     .set("spark.sql.catalog.db2.url", db.getJdbcUrl(dockerIp, externalPort))
+    .set("spark.sql.catalog.db2.pushDownAggregate", "true")
 
-  override def dataPreparation(conn: Connection): Unit = {}
+  override def tablePreparation(connection: Connection): Unit = {
+    connection.prepareStatement(
+      "CREATE TABLE employee (dept INTEGER, name VARCHAR(10), salary DECIMAL(20, 2), bonus DOUBLE)")
+      .executeUpdate()
+  }
 
   override def testUpdateColumnType(tbl: String): Unit = {
     sql(s"CREATE TABLE $tbl (ID INTEGER)")
@@ -86,4 +93,8 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
     val expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
     assert(t.schema === expectedSchema)
   }
+
+  override def caseConvert(tableName: String): String = tableName.toUpperCase(Locale.ROOT)
+
+  testVarPop()
 }
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
new file mode 100644
index 0000000000000..72edfc9f1bf1c
--- /dev/null
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import java.sql.Connection
+
+import org.apache.spark.sql.jdbc.DockerJDBCIntegrationSuite
+
+abstract class DockerJDBCIntegrationV2Suite extends DockerJDBCIntegrationSuite {
+
+  /**
+   * Prepare databases and tables for testing.
+   */
+  override def dataPreparation(connection: Connection): Unit = {
+    tablePreparation(connection)
+    connection.prepareStatement("INSERT INTO employee VALUES (1, 'amy', 10000, 1000)")
+      .executeUpdate()
+    connection.prepareStatement("INSERT INTO employee VALUES (2, 'alex', 12000, 1200)")
+      .executeUpdate()
+    connection.prepareStatement("INSERT INTO employee VALUES (1, 'cathy', 9000, 1200)")
+      .executeUpdate()
+    connection.prepareStatement("INSERT INTO employee VALUES (2, 'david', 10000, 1300)")
+      .executeUpdate()
+    connection.prepareStatement("INSERT INTO employee VALUES (6, 'jen', 12000, 1200)")
+      .executeUpdate()
+  }
+
+  def tablePreparation(connection: Connection): Unit
+}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
index 75446fb50e45b..e9521ec35a8ce 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
@@ -24,7 +24,7 @@ import org.scalatest.time.SpanSugar._
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
-import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
+import org.apache.spark.sql.jdbc.DatabaseOnDocker
 import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
@@ -37,7 +37,7 @@ import org.apache.spark.tags.DockerTest
  * }}}
  */
 @DockerTest
-class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
+class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
 
   override val catalogName: String = "mssql"
 
@@ -58,10 +58,15 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBC
   override def sparkConf: SparkConf = super.sparkConf
     .set("spark.sql.catalog.mssql", classOf[JDBCTableCatalog].getName)
     .set("spark.sql.catalog.mssql.url", db.getJdbcUrl(dockerIp, externalPort))
+    .set("spark.sql.catalog.mssql.pushDownAggregate", "true")
 
   override val connectionTimeout = timeout(7.minutes)
 
-  override def dataPreparation(conn: Connection): Unit = {}
+  override def tablePreparation(connection: Connection): Unit = {
+    connection.prepareStatement(
+      "CREATE TABLE employee (dept INT, name VARCHAR(32), salary NUMERIC(20, 2), bonus FLOAT)")
+      .executeUpdate()
+  }
 
   override def notSupportsTableComment: Boolean = true
 
@@ -91,4 +96,9 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBC
 
     assert(msg.contains("UpdateColumnNullability is not supported"))
   }
+
+  testVarPop()
+  testVarSamp()
+  testStddevPop()
+  testStddevSamp()
 }
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
index 71adc51b87441..bc4bf54324ee5 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
@@ -24,7 +24,7 @@ import org.scalatest.time.SpanSugar._
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
-import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
+import org.apache.spark.sql.jdbc.DatabaseOnDocker
 import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
@@ -39,7 +39,7 @@ import org.apache.spark.tags.DockerTest
  *
  */
 @DockerTest
-class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
+class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
   override val catalogName: String = "mysql"
   override val db = new DatabaseOnDocker {
     override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:5.7.36")
@@ -57,13 +57,17 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
   override def sparkConf: SparkConf = super.sparkConf
     .set("spark.sql.catalog.mysql", classOf[JDBCTableCatalog].getName)
     .set("spark.sql.catalog.mysql.url", db.getJdbcUrl(dockerIp, externalPort))
+    .set("spark.sql.catalog.mysql.pushDownAggregate", "true")
 
   override val connectionTimeout = timeout(7.minutes)
 
   private var mySQLVersion = -1
 
-  override def dataPreparation(conn: Connection): Unit = {
-    mySQLVersion = conn.getMetaData.getDatabaseMajorVersion
+  override def tablePreparation(connection: Connection): Unit = {
+    mySQLVersion = connection.getMetaData.getDatabaseMajorVersion
+    connection.prepareStatement(
+      "CREATE TABLE employee (dept INT, name VARCHAR(32), salary DECIMAL(20, 2)," +
+        " bonus DOUBLE)").executeUpdate()
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
@@ -119,4 +123,9 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
   override def supportsIndex: Boolean = true
 
   override def indexOptions: String = "KEY_BLOCK_SIZE=10"
+
+  testVarPop()
+  testVarSamp()
+  testStddevPop()
+  testStddevSamp()
 }
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
index ef8fe5354c540..2669924dc28c0 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
@@ -18,13 +18,14 @@
 package org.apache.spark.sql.jdbc.v2
 
 import java.sql.Connection
+import java.util.Locale
 
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
-import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
+import org.apache.spark.sql.jdbc.DatabaseOnDocker
 import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
@@ -54,8 +55,9 @@ import org.apache.spark.tags.DockerTest
  * This procedure has been validated with Oracle 18.4.0 Express Edition.
  */
 @DockerTest
-class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
+class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
   override val catalogName: String = "oracle"
+  override val namespaceOpt: Option[String] = Some("SYSTEM")
   override val db = new DatabaseOnDocker {
     lazy override val imageName =
       sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "gvenzl/oracle-xe:18.4.0")
@@ -73,9 +75,15 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest
   override def sparkConf: SparkConf = super.sparkConf
     .set("spark.sql.catalog.oracle", classOf[JDBCTableCatalog].getName)
     .set("spark.sql.catalog.oracle.url", db.getJdbcUrl(dockerIp, externalPort))
+    .set("spark.sql.catalog.oracle.pushDownAggregate", "true")
 
   override val connectionTimeout = timeout(7.minutes)
-  override def dataPreparation(conn: Connection): Unit = {}
+
+  override def tablePreparation(connection: Connection): Unit = {
+    connection.prepareStatement(
+      "CREATE TABLE employee (dept NUMBER(32), name VARCHAR2(32), salary NUMBER(20, 2)," +
+        " bonus BINARY_DOUBLE)").executeUpdate()
+  }
 
   override def testUpdateColumnType(tbl: String): Unit = {
     sql(s"CREATE TABLE $tbl (ID INTEGER)")
@@ -93,4 +101,14 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest
     assert(msg1.contains(
       s"Cannot update $catalogName.alt_table field ID: string cannot be cast to int"))
   }
+
+  override def caseConvert(tableName: String): String = tableName.toUpperCase(Locale.ROOT)
+
+  testVarPop()
+  testVarSamp()
+  testStddevPop()
+  testStddevSamp()
+  testCovarPop()
+  testCovarSamp()
+  testCorr()
 }
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
index 7fba6671ffe71..86f5c3c8cd418 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
@@ -22,7 +22,7 @@ import java.sql.Connection
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
-import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
+import org.apache.spark.sql.jdbc.DatabaseOnDocker
 import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
@@ -34,7 +34,7 @@ import org.apache.spark.tags.DockerTest
  * }}}
  */
 @DockerTest
-class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
+class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
   override val catalogName: String = "postgresql"
   override val db = new DatabaseOnDocker {
     override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:14.0-alpine")
@@ -51,8 +51,13 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTes
     .set("spark.sql.catalog.postgresql.url", db.getJdbcUrl(dockerIp, externalPort))
     .set("spark.sql.catalog.postgresql.pushDownTableSample", "true")
     .set("spark.sql.catalog.postgresql.pushDownLimit", "true")
+    .set("spark.sql.catalog.postgresql.pushDownAggregate", "true")
 
-  override def dataPreparation(conn: Connection): Unit = {}
+  override def tablePreparation(connection: Connection): Unit = {
+    connection.prepareStatement(
+      "CREATE TABLE employee (dept INTEGER, name VARCHAR(32), salary NUMERIC(20, 2)," +
+        " bonus double precision)").executeUpdate()
+  }
 
   override def testUpdateColumnType(tbl: String): Unit = {
     sql(s"CREATE TABLE $tbl (ID INTEGER)")
@@ -84,4 +89,12 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTes
   override def supportsIndex: Boolean = true
 
   override def indexOptions: String = "FILLFACTOR=70"
+
+  testVarPop()
+  testVarSamp()
+  testStddevPop()
+  testStddevSamp()
+  testCovarPop()
+  testCovarSamp()
+  testCorr()
 }
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
index 49aa20387e38e..6ea2099346781 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -19,12 +19,12 @@ package org.apache.spark.sql.jdbc.v2
 
 import org.apache.logging.log4j.Level
 
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.{AnalysisException, DataFrame}
 import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NoSuchIndexException}
-import org.apache.spark.sql.catalyst.plans.logical.{Filter, Sample}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Sample}
 import org.apache.spark.sql.connector.catalog.{Catalogs, Identifier, TableCatalog}
 import org.apache.spark.sql.connector.catalog.index.SupportsIndex
+import org.apache.spark.sql.connector.expressions.aggregate.GeneralAggregateFunc
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanRelation, V1ScanWrapper}
 import org.apache.spark.sql.jdbc.DockerIntegrationFunSuite
 import org.apache.spark.sql.test.SharedSparkSession
@@ -36,6 +36,12 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
   import testImplicits._
 
   val catalogName: String
+
+  val namespaceOpt: Option[String] = None
+
+  private def catalogAndNamespace =
+    namespaceOpt.map(namespace => s"$catalogName.$namespace").getOrElse(catalogName)
+
   // dialect specific update column type test
   def testUpdateColumnType(tbl: String): Unit
 
@@ -246,22 +252,30 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
 
   def supportsTableSample: Boolean = false
 
-  private def samplePushed(df: DataFrame): Boolean = {
+  private def checkSamplePushed(df: DataFrame, pushed: Boolean = true): Unit = {
     val sample = df.queryExecution.optimizedPlan.collect {
       case s: Sample => s
     }
-    sample.isEmpty
+    if (pushed) {
+      assert(sample.isEmpty)
+    } else {
+      assert(sample.nonEmpty)
+    }
   }
 
-  private def filterPushed(df: DataFrame): Boolean = {
+  private def checkFilterPushed(df: DataFrame, pushed: Boolean = true): Unit = {
     val filter = df.queryExecution.optimizedPlan.collect {
       case f: Filter => f
     }
-    filter.isEmpty
+    if (pushed) {
+      assert(filter.isEmpty)
+    } else {
+      assert(filter.nonEmpty)
+    }
   }
 
   private def limitPushed(df: DataFrame, limit: Int): Boolean = {
-    val filter = df.queryExecution.optimizedPlan.collect {
+    df.queryExecution.optimizedPlan.collect {
       case relation: DataSourceV2ScanRelation => relation.scan match {
         case v1: V1ScanWrapper =>
           return v1.pushedDownOperators.limit == Some(limit)
@@ -270,11 +284,11 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     false
   }
 
-  private def columnPruned(df: DataFrame, col: String): Boolean = {
+  private def checkColumnPruned(df: DataFrame, col: String): Unit = {
     val scan = df.queryExecution.optimizedPlan.collectFirst {
       case s: DataSourceV2ScanRelation => s
     }.get
-    scan.schema.names.sameElements(Seq(col))
+    assert(scan.schema.names.sameElements(Seq(col)))
   }
 
   test("SPARK-37038: Test TABLESAMPLE") {
@@ -286,37 +300,37 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
         // sample push down + column pruning
         val df1 = sql(s"SELECT col1 FROM $catalogName.new_table TABLESAMPLE (BUCKET 6 OUT OF 10)" +
           " REPEATABLE (12345)")
-        assert(samplePushed(df1))
-        assert(columnPruned(df1, "col1"))
+        checkSamplePushed(df1)
+        checkColumnPruned(df1, "col1")
         assert(df1.collect().length < 10)
 
         // sample push down only
         val df2 = sql(s"SELECT * FROM $catalogName.new_table TABLESAMPLE (50 PERCENT)" +
           " REPEATABLE (12345)")
-        assert(samplePushed(df2))
+        checkSamplePushed(df2)
         assert(df2.collect().length < 10)
 
         // sample(BUCKET ... OUT OF) push down + limit push down + column pruning
         val df3 = sql(s"SELECT col1 FROM $catalogName.new_table TABLESAMPLE (BUCKET 6 OUT OF 10)" +
           " LIMIT 2")
-        assert(samplePushed(df3))
+        checkSamplePushed(df3)
         assert(limitPushed(df3, 2))
-        assert(columnPruned(df3, "col1"))
+        checkColumnPruned(df3, "col1")
         assert(df3.collect().length <= 2)
 
         // sample(... PERCENT) push down + limit push down + column pruning
         val df4 = sql(s"SELECT col1 FROM $catalogName.new_table" +
           " TABLESAMPLE (50 PERCENT) REPEATABLE (12345) LIMIT 2")
-        assert(samplePushed(df4))
+        checkSamplePushed(df4)
         assert(limitPushed(df4, 2))
-        assert(columnPruned(df4, "col1"))
+        checkColumnPruned(df4, "col1")
         assert(df4.collect().length <= 2)
 
         // sample push down + filter push down + limit push down
         val df5 = sql(s"SELECT * FROM $catalogName.new_table" +
           " TABLESAMPLE (BUCKET 6 OUT OF 10) WHERE col1 > 0 LIMIT 2")
-        assert(samplePushed(df5))
-        assert(filterPushed(df5))
+        checkSamplePushed(df5)
+        checkFilterPushed(df5)
         assert(limitPushed(df5, 2))
         assert(df5.collect().length <= 2)
 
@@ -325,27 +339,161 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
         // Todo: push down filter/limit
         val df6 = sql(s"SELECT col1 FROM $catalogName.new_table" +
           " TABLESAMPLE (BUCKET 6 OUT OF 10) WHERE col1 > 0 LIMIT 2")
-        assert(samplePushed(df6))
-        assert(!filterPushed(df6))
+        checkSamplePushed(df6)
+        checkFilterPushed(df6, false)
         assert(!limitPushed(df6, 2))
-        assert(columnPruned(df6, "col1"))
+        checkColumnPruned(df6, "col1")
         assert(df6.collect().length <= 2)
 
         // sample + limit
         // Push down order is sample -> filter -> limit
         // only limit is pushed down because in this test sample is after limit
         val df7 = spark.read.table(s"$catalogName.new_table").limit(2).sample(0.5)
-        assert(!samplePushed(df7))
+        checkSamplePushed(df7, false)
         assert(limitPushed(df7, 2))
 
         // sample + filter
         // Push down order is sample -> filter -> limit
         // only filter is pushed down because in this test sample is after filter
         val df8 = spark.read.table(s"$catalogName.new_table").where($"col1" > 1).sample(0.5)
-        assert(!samplePushed(df8))
-        assert(filterPushed(df8))
+        checkSamplePushed(df8, false)
+        checkFilterPushed(df8)
         assert(df8.collect().length < 10)
       }
     }
   }
+
+  protected def checkAggregateRemoved(df: DataFrame): Unit = {
+    val aggregates = df.queryExecution.optimizedPlan.collect {
+      case agg: Aggregate => agg
+    }
+    assert(aggregates.isEmpty)
+  }
+
+  private def checkAggregatePushed(df: DataFrame, funcName: String): Unit = {
+    df.queryExecution.optimizedPlan.collect {
+      case DataSourceV2ScanRelation(_, scan, _) =>
+        assert(scan.isInstanceOf[V1ScanWrapper])
+        val wrapper = scan.asInstanceOf[V1ScanWrapper]
+        assert(wrapper.pushedDownOperators.aggregation.isDefined)
+        val aggregationExpressions =
+          wrapper.pushedDownOperators.aggregation.get.aggregateExpressions()
+        assert(aggregationExpressions.length == 1)
+        assert(aggregationExpressions(0).isInstanceOf[GeneralAggregateFunc])
+        assert(aggregationExpressions(0).asInstanceOf[GeneralAggregateFunc].name() == funcName)
+    }
+  }
+
+  protected def caseConvert(tableName: String): String = tableName
+
+  protected def testVarPop(): Unit = {
+    test(s"scan with aggregate push-down: VAR_POP") {
+      val df = sql(s"SELECT VAR_POP(bonus) FROM $catalogAndNamespace.${caseConvert("employee")}" +
+        " WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "VAR_POP")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 10000d)
+      assert(row(1).getDouble(0) === 2500d)
+      assert(row(2).getDouble(0) === 0d)
+    }
+  }
+
+  protected def testVarSamp(): Unit = {
+    test(s"scan with aggregate push-down: VAR_SAMP") {
+      val df = sql(
+        s"SELECT VAR_SAMP(bonus) FROM $catalogAndNamespace.${caseConvert("employee")}" +
+        " WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "VAR_SAMP")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 20000d)
+      assert(row(1).getDouble(0) === 5000d)
+      assert(row(2).isNullAt(0))
+    }
+  }
+
+  protected def testStddevPop(): Unit = {
+    test("scan with aggregate push-down: STDDEV_POP") {
+      val df = sql(
+        s"SELECT STDDEV_POP(bonus) FROM $catalogAndNamespace.${caseConvert("employee")}" +
+        " WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "STDDEV_POP")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 100d)
+      assert(row(1).getDouble(0) === 50d)
+      assert(row(2).getDouble(0) === 0d)
+    }
+  }
+
+  protected def testStddevSamp(): Unit = {
+    test("scan with aggregate push-down: STDDEV_SAMP") {
+      val df = sql(
+        s"SELECT STDDEV_SAMP(bonus) FROM $catalogAndNamespace.${caseConvert("employee")}" +
+        " WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "STDDEV_SAMP")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 141.4213562373095d)
+      assert(row(1).getDouble(0) === 70.71067811865476d)
+      assert(row(2).isNullAt(0))
+    }
+  }
+
+  protected def testCovarPop(): Unit = {
+    test("scan with aggregate push-down: COVAR_POP") {
+      val df = sql(
+        s"SELECT COVAR_POP(bonus, bonus) FROM $catalogAndNamespace.${caseConvert("employee")}" +
+        " WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "COVAR_POP")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 10000d)
+      assert(row(1).getDouble(0) === 2500d)
+      assert(row(2).getDouble(0) === 0d)
+    }
+  }
+
+  protected def testCovarSamp(): Unit = {
+    test("scan with aggregate push-down: COVAR_SAMP") {
+      val df = sql(
+        s"SELECT COVAR_SAMP(bonus, bonus) FROM $catalogAndNamespace.${caseConvert("employee")}" +
+        " WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "COVAR_SAMP")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 20000d)
+      assert(row(1).getDouble(0) === 5000d)
+      assert(row(2).isNullAt(0))
+    }
+  }
+
+  protected def testCorr(): Unit = {
+    test("scan with aggregate push-down: CORR") {
+      val df = sql(
+        s"SELECT CORR(bonus, bonus) FROM $catalogAndNamespace.${caseConvert("employee")}" +
+        " WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "CORR")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 1d)
+      assert(row(1).getDouble(0) === 1d)
+      assert(row(2).isNullAt(0))
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
index 0b394db5c8932..9e9aac679ab39 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.jdbc
 import java.sql.Types
 import java.util.Locale
 
+import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc}
 import org.apache.spark.sql.types._
 
 private object DB2Dialect extends JdbcDialect {
@@ -27,6 +28,18 @@ private object DB2Dialect extends JdbcDialect {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:db2")
 
+  override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
+    super.compileAggregate(aggFunction).orElse(
+      aggFunction match {
+        case f: GeneralAggregateFunc if f.name() == "VAR_POP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"VARIANCE($distinct${f.inputs().head})")
+        case _ => None
+      }
+    )
+  }
+
   override def getCatalystType(
       sqlType: Int,
       typeName: String,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala
index f19ef7ead5f8e..e87d4d08ae031 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.jdbc
 import java.sql.Types
 import java.util.Locale
 
+import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types._
 
@@ -29,6 +30,30 @@ private object DerbyDialect extends JdbcDialect {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:derby")
 
+  override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
+    super.compileAggregate(aggFunction).orElse(
+      aggFunction match {
+        case f: GeneralAggregateFunc if f.name() == "VAR_POP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"VAR_POP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"VAR_SAMP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"STDDEV_POP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"STDDEV_SAMP($distinct${f.inputs().head})")
+        case _ => None
+      }
+    )
+  }
+
   override def getCatalystType(
       sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
     if (sqlType == Types.REAL) Option(FloatType) else None
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
index 8e5674a181e7a..442c5599b3ab3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.jdbc
 
 import java.util.Locale
 
+import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -36,6 +37,30 @@ private object MsSqlServerDialect extends JdbcDialect {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:sqlserver")
 
+  override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
+    super.compileAggregate(aggFunction).orElse(
+      aggFunction match {
+        case f: GeneralAggregateFunc if f.name() == "VAR_POP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"VARP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"VAR($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"STDEVP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"STDEV($distinct${f.inputs().head})")
+        case _ => None
+      }
+    )
+  }
+
   override def getCatalystType(
       sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
     if (typeName.contains("datetimeoffset")) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index fb98996e6bf8b..9fcb7a27d17af 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NoSuchIndexException}
 import org.apache.spark.sql.connector.catalog.index.TableIndex
 import org.apache.spark.sql.connector.expressions.{FieldReference, NamedReference}
+import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
 import org.apache.spark.sql.types.{BooleanType, DataType, FloatType, LongType, MetadataBuilder}
@@ -35,6 +36,30 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
   override def canHandle(url : String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:mysql")
 
+  override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
+    super.compileAggregate(aggFunction).orElse(
+      aggFunction match {
+        case f: GeneralAggregateFunc if f.name() == "VAR_POP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"VAR_POP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"VAR_SAMP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"STDDEV_POP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"STDDEV_SAMP($distinct${f.inputs().head})")
+        case _ => None
+      }
+    )
+  }
+
   override def getCatalystType(
       sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
     if (sqlType == Types.VARBINARY && typeName.equals("BIT") && size != 1) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
index b741ece8dda9b..4fe7d93142c1e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
@@ -21,6 +21,7 @@ import java.sql.{Date, Timestamp, Types}
 import java.util.{Locale, TimeZone}
 
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
@@ -33,6 +34,42 @@ private case object OracleDialect extends JdbcDialect {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:oracle")
 
+  override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
+    super.compileAggregate(aggFunction).orElse(
+      aggFunction match {
+        case f: GeneralAggregateFunc if f.name() == "VAR_POP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"VAR_POP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"VAR_SAMP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"STDDEV_POP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"STDDEV_SAMP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "COVAR_POP" =>
+          assert(f.inputs().length == 2)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"COVAR_POP($distinct${f.inputs().head}, ${f.inputs().last})")
+        case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" =>
+          assert(f.inputs().length == 2)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"COVAR_SAMP($distinct${f.inputs().head}, ${f.inputs().last})")
+        case f: GeneralAggregateFunc if f.name() == "CORR" =>
+          assert(f.inputs().length == 2)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"CORR($distinct${f.inputs().head}, ${f.inputs().last})")
+        case _ => None
+      }
+    )
+  }
+
   private def supportTimeZoneTypes: Boolean = {
     val timeZone = DateTimeUtils.getTimeZone(SQLConf.get.sessionLocalTimeZone)
     // TODO: support timezone types when users are not using the JVM timezone, which
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
index 356cb4ddbd008..3b1a2c81fffd6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NoSuchIndexException}
 import org.apache.spark.sql.connector.expressions.NamedReference
+import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc}
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
 import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
 import org.apache.spark.sql.types._
@@ -35,6 +36,42 @@ private object PostgresDialect extends JdbcDialect with SQLConfHelper {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:postgresql")
 
+  override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
+    super.compileAggregate(aggFunction).orElse(
+      aggFunction match {
+        case f: GeneralAggregateFunc if f.name() == "VAR_POP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"VAR_POP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"VAR_SAMP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"STDDEV_POP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"STDDEV_SAMP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "COVAR_POP" =>
+          assert(f.inputs().length == 2)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"COVAR_POP($distinct${f.inputs().head}, ${f.inputs().last})")
+        case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" =>
+          assert(f.inputs().length == 2)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"COVAR_SAMP($distinct${f.inputs().head}, ${f.inputs().last})")
+        case f: GeneralAggregateFunc if f.name() == "CORR" =>
+          assert(f.inputs().length == 2)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"CORR($distinct${f.inputs().head}, ${f.inputs().last})")
+        case _ => None
+      }
+    )
+  }
+
   override def getCatalystType(
       sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
     if (sqlType == Types.REAL) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala
index 13f4c5fe9c926..6344667b3180e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.jdbc
 
 import java.util.Locale
 
+import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc}
 import org.apache.spark.sql.types._
 
 
@@ -27,6 +28,42 @@ private case object TeradataDialect extends JdbcDialect {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:teradata")
 
+  override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
+    super.compileAggregate(aggFunction).orElse(
+      aggFunction match {
+        case f: GeneralAggregateFunc if f.name() == "VAR_POP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"VAR_POP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"VAR_SAMP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"STDDEV_POP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"STDDEV_SAMP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "COVAR_POP" =>
+          assert(f.inputs().length == 2)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"COVAR_POP($distinct${f.inputs().head}, ${f.inputs().last})")
+        case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" =>
+          assert(f.inputs().length == 2)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"COVAR_SAMP($distinct${f.inputs().head}, ${f.inputs().last})")
+        case f: GeneralAggregateFunc if f.name() == "CORR" =>
+          assert(f.inputs().length == 2)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"CORR($distinct${f.inputs().head}, ${f.inputs().last})")
+        case _ => None
+      }
+    )
+  }
+
   override def getJDBCType(dt: DataType): Option[JdbcType] = dt match {
     case StringType => Some(JdbcType("VARCHAR(255)", java.sql.Types.VARCHAR))
     case BooleanType => Option(JdbcType("CHAR(1)", java.sql.Types.CHAR))

From e2c4913c2e43481d1a12e5a2f307ed8a8d913311 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 25 Jan 2022 03:00:34 -0800
Subject: [PATCH 099/513] [SPARK-38019][CORE] Make
 `ExecutorMonitor.timedOutExecutors` deterministic

### What changes were proposed in this pull request?

This PR aims to make `ExecutorMonitor.timedOutExecutors` method deterministic.

### Why are the changes needed?

Since the AS-IS `timedOutExecutors` returns the result indeterministic, it kills the executors in a random order at Dynamic Allocation setting.

https://github.com/apache/spark/blob/18f9e7efac5100744f255b6c8ae267579cd8d9ce/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala#L58

https://github.com/apache/spark/blob/18f9e7efac5100744f255b6c8ae267579cd8d9ce/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala#L119

This random behavior not only makes the users confusing but also causes a K8s decommission tests flaky like the following case in Java 17 on Apple Silicon environment. The K8s test expects the decommission of executor 1 while the executor 2 is chosen at this time.

```
22/01/25 06:11:16 DEBUG ExecutorMonitor: Executors 1,2 do not have active shuffle data after job 0 finished.
22/01/25 06:11:16 DEBUG ExecutorAllocationManager: max needed for rpId: 0 numpending: 0, tasksperexecutor: 1
22/01/25 06:11:16 DEBUG ExecutorAllocationManager: No change in number of executors
22/01/25 06:11:16 DEBUG ExecutorAllocationManager: Request to remove executorIds: (2,0), (1,0)
22/01/25 06:11:16 DEBUG ExecutorAllocationManager: Not removing idle executor 1 because there are only 1 executor(s) left (minimum number of executor limit 1)
22/01/25 06:11:16 INFO KubernetesClusterSchedulerBackend: Decommission executors: 2
```

### Does this PR introduce _any_ user-facing change?

No because the previous behavior was a random list and new behavior is now deterministic.

### How was this patch tested?

Pass the CIs with the newly added test case.

Closes #35315 from dongjoon-hyun/SPARK-38019.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../scheduler/dynalloc/ExecutorMonitor.scala      |  2 +-
 .../scheduler/dynalloc/ExecutorMonitorSuite.scala | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala b/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala
index 4939dab5702a7..3dea64c34a327 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala
@@ -134,7 +134,7 @@ private[spark] class ExecutorMonitor(
         .toSeq
       updateNextTimeout(newNextTimeout)
     }
-    timedOutExecs
+    timedOutExecs.sortBy(_._1)
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala
index 69afdb57ef404..6fb89b883a1f8 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala
@@ -233,6 +233,21 @@ class ExecutorMonitorSuite extends SparkFunSuite {
     assert(monitor.timedOutExecutors(clock.nanoTime()).toSet === Set("1", "2", "3"))
   }
 
+  test("SPARK-38019: timedOutExecutors should be deterministic") {
+    knownExecs ++= Set("1", "2", "3")
+
+    // start exec 1, 2, 3 at 0s (should idle time out at 60s)
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
+    assert(monitor.isExecutorIdle("1"))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "2", execInfo))
+    assert(monitor.isExecutorIdle("2"))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "3", execInfo))
+    assert(monitor.isExecutorIdle("3"))
+
+    clock.setTime(TimeUnit.SECONDS.toMillis(150))
+    assert(monitor.timedOutExecutors().map(_._1) === Seq("1", "2", "3"))
+  }
+
   test("SPARK-27677: don't track blocks stored on disk when using shuffle service") {
     // First make sure that blocks on disk are counted when no shuffle service is available.
     monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))

From 1bda48b1e14f1034f657bd0df4d651a812fe0614 Mon Sep 17 00:00:00 2001
From: Cheng Su <chengsu@fb.com>
Date: Tue, 25 Jan 2022 19:23:33 +0800
Subject: [PATCH 100/513] [SPARK-38018][SQL] Fix ColumnVectorUtils.populate to
 handle CalendarIntervalType correctly

### What changes were proposed in this pull request?

[`ColumnVectorUtils.populate()` does not handle CalendarInterval type correctly](https://github.com/apache/spark/blob/master/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java#L93-L94). The CalendarInterval type is in the format of [(months: int, days: int, microseconds: long)](https://github.com/apache/spark/blob/master/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java#L58 ). However, the function above misses `days` field, and sets `microseconds` field in wrong position.

`ColumnVectorUtils.populate()` is used by [Parquet](https://github.com/apache/spark/blob/master/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java#L258) and [ORC](https://github.com/apache/spark/blob/master/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java#L171) vectorized reader to read partition column. So technically Spark can potentially produce wrong result if reading table with CalendarInterval partition column. However I also notice Spark [explicitly disallows writing data with CalendarInterval type](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala#L586 ), so it might not be a big deal for users. But it's worth to fix anyway.

Caveat: I found the bug when reading through the related code path, but I never encountered the issue in production for partition column with CalendarInterval type. I think it should be an obvious fix unless anyone more experienced could find some more historical context. The code was introduced a long time ago where I couldn't find any more info why it was implemented as it is (https://github.com/apache/spark/pull/11435)

### Why are the changes needed?

To fix potential correctness issue.

### Does this PR introduce _any_ user-facing change?

No but fix the exiting correctness issue when reading partition column with CalendarInterval type.

### How was this patch tested?

Added unit test in `ColumnVectorSuite.scala`.
Verified the unit test failed with exception below without this PR:

```
java.lang.NullPointerException was thrown.
java.lang.NullPointerException
	at org.apache.spark.sql.execution.vectorized.OnHeapColumnVector.putLongs(OnHeapColumnVector.java:345)
	at org.apache.spark.sql.execution.vectorized.ColumnVectorUtils.populate(ColumnVectorUtils.java:94)
	at org.apache.spark.sql.execution.vectorized.ColumnVectorSuite.$anonfun$new$99(ColumnVectorSuite.scala:613)
```

Closes #35314 from c21/vector-fix.

Authored-by: Cheng Su <chengsu@fb.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/vectorized/ColumnVectorUtils.java   |  3 ++-
 .../sql/execution/vectorized/ColumnVectorSuite.scala  | 11 ++++++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
index 37c348cf4ed66..353a128254412 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
@@ -91,7 +91,8 @@ public static void populate(WritableColumnVector col, InternalRow row, int field
       } else if (t instanceof CalendarIntervalType) {
         CalendarInterval c = (CalendarInterval)row.get(fieldIdx, t);
         col.getChild(0).putInts(0, capacity, c.months);
-        col.getChild(1).putLongs(0, capacity, c.microseconds);
+        col.getChild(1).putInts(0, capacity, c.days);
+        col.getChild(2).putLongs(0, capacity, c.microseconds);
       } else if (t instanceof DateType || t instanceof YearMonthIntervalType) {
         col.putInts(0, capacity, row.getInt(fieldIdx));
       } else if (t instanceof TimestampType || t instanceof TimestampNTZType ||
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
index cdf41ed651d4e..4cf2376a3fccd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.execution.columnar.ColumnAccessor
 import org.apache.spark.sql.execution.columnar.compression.ColumnBuilderHelper
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.vectorized.ColumnarArray
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
   private def withVector(
@@ -605,5 +605,14 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
       }
     }
   }
+
+  test("SPARK-38018: ColumnVectorUtils.populate to handle CalendarIntervalType correctly") {
+    val vector = new OnHeapColumnVector(5, CalendarIntervalType)
+    val row = new SpecificInternalRow(Array(CalendarIntervalType))
+    val interval = new CalendarInterval(3, 5, 1000000)
+    row.setInterval(0, interval)
+    ColumnVectorUtils.populate(vector, row, 0)
+    assert(vector.getInterval(0) === interval)
+  }
 }
 

From 48a440fe1fc334134f42a726cc6fb3d98802e0fd Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Tue, 25 Jan 2022 20:41:38 +0900
Subject: [PATCH 101/513] [SPARK-38016][SQL][DOCS] Fix the API doc for
 session_window to say it supports TimestampNTZType too as timeColumn

### What changes were proposed in this pull request?

This PR fixes the API docs for `session_window` to say it supports `TimestampNTZType` too as `timeColumn`.

### Why are the changes needed?

As of Spark 3.3.0 (e858cd568a74123f7fd8fe4c3d2917a), `session_window` supports not only `TimestampType` but also `TimestampNTZType`.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Built the docs with the following commands.
```
bundle install
SKIP_RDOC=1 SKIP_SQLDOC=1 bundle exec jekyll build
```
Then, confirmed the built doc.
![session_window_timestampntz](https://user-images.githubusercontent.com/4736016/150925544-7f9a2297-36c5-419a-b2b5-a8e43dfb50ff.png)
![session_window_timestampntz_python](https://user-images.githubusercontent.com/4736016/150925570-c8d59d1f-666a-49d9-a6e7-084d6e877871.png)

Closes #35312 from sarutak/sessionwindow-timestampntz-doc.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Kousuke Saruta <sarutak@oss.nttdata.com>
---
 python/pyspark/sql/functions.py                              | 2 +-
 sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index e69c37d320a34..bfee9948d4cd7 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2623,7 +2623,7 @@ def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str])
     ----------
     timeColumn : :class:`~pyspark.sql.Column` or str
         The column name or column to use as the timestamp for windowing by time.
-        The time column must be of TimestampType.
+        The time column must be of TimestampType or TimestampNTZType.
     gapDuration : :class:`~pyspark.sql.Column` or str
         A Python string literal or column specifying the timeout of the session. It could be
         static value, e.g. `10 minutes`, `1 second`, or an expression/UDF that specifies gap
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index ec28d8dde38e3..f217dad5907ad 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3750,7 +3750,7 @@ object functions {
    * processing time.
    *
    * @param timeColumn The column or the expression to use as the timestamp for windowing by time.
-   *                   The time column must be of TimestampType.
+   *                   The time column must be of TimestampType or TimestampNTZType.
    * @param gapDuration A string specifying the timeout of the session, e.g. `10 minutes`,
    *                    `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for
    *                    valid duration identifiers.
@@ -3787,7 +3787,7 @@ object functions {
    * processing time.
    *
    * @param timeColumn The column or the expression to use as the timestamp for windowing by time.
-   *                   The time column must be of TimestampType.
+   *                   The time column must be of TimestampType or TimestampNTZType.
    * @param gapDuration A column specifying the timeout of the session. It could be static value,
    *                    e.g. `10 minutes`, `1 second`, or an expression/UDF that specifies gap
    *                    duration dynamically based on the input row.

From 76f685d26dc1f0f4d92293cd370e58ee2fa68452 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Tue, 25 Jan 2022 20:44:06 +0900
Subject: [PATCH 102/513] [SPARK-38017][SQL][DOCS] Fix the API doc for window
 to say it supports TimestampNTZType too as timeColumn

### What changes were proposed in this pull request?

This PR fixes the API docs for `window` to say it supports `TimestampNTZType` too as `timeColumn`.

### Why are the changes needed?

`window` function supports not only `TimestampType` but also `TimestampNTZType`.

### Does this PR introduce _any_ user-facing change?

Yes, but I don't think this change affects existing users.

### How was this patch tested?

Built the docs with the following commands.
```
bundle install
SKIP_RDOC=1 SKIP_SQLDOC=1 bundle exec jekyll build
```
Then, confirmed the built doc.
![window_timestampntz](https://user-images.githubusercontent.com/4736016/150927548-2b1bec61-a165-410d-b8b2-5cd33ed13a50.png)
![window_timestmapntz_python](https://user-images.githubusercontent.com/4736016/150927564-450da33b-540f-4b97-a0e3-cae7897d9ea4.png)

Closes #35313 from sarutak/window-timestampntz-doc.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Kousuke Saruta <sarutak@oss.nttdata.com>
---
 python/pyspark/sql/functions.py                             | 2 +-
 .../src/main/scala/org/apache/spark/sql/functions.scala     | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index bfee9948d4cd7..2dfaec8a9403a 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2551,7 +2551,7 @@ def window(
     ----------
     timeColumn : :class:`~pyspark.sql.Column`
         The column or the expression to use as the timestamp for windowing by time.
-        The time column must be of TimestampType.
+        The time column must be of TimestampType or TimestampNTZType.
     windowDuration : str
         A string specifying the width of the window, e.g. `10 minutes`,
         `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index f217dad5907ad..0db12a24e6ef9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3621,7 +3621,7 @@ object functions {
    * processing time.
    *
    * @param timeColumn The column or the expression to use as the timestamp for windowing by time.
-   *                   The time column must be of TimestampType.
+   *                   The time column must be of TimestampType or TimestampNTZType.
    * @param windowDuration A string specifying the width of the window, e.g. `10 minutes`,
    *                       `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for
    *                       valid duration identifiers. Note that the duration is a fixed length of
@@ -3677,7 +3677,7 @@ object functions {
    * processing time.
    *
    * @param timeColumn The column or the expression to use as the timestamp for windowing by time.
-   *                   The time column must be of TimestampType.
+   *                   The time column must be of TimestampType or TimestampNTZType.
    * @param windowDuration A string specifying the width of the window, e.g. `10 minutes`,
    *                       `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for
    *                       valid duration identifiers. Note that the duration is a fixed length of
@@ -3722,7 +3722,7 @@ object functions {
    * processing time.
    *
    * @param timeColumn The column or the expression to use as the timestamp for windowing by time.
-   *                   The time column must be of TimestampType.
+   *                   The time column must be of TimestampType or TimestampNTZType.
    * @param windowDuration A string specifying the width of the window, e.g. `10 minutes`,
    *                       `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for
    *                       valid duration identifiers.

From a1b061d7fc5427138bfaa9fe68d2748f8bf3907c Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Tue, 25 Jan 2022 20:57:16 +0900
Subject: [PATCH 103/513] [SPARK-38021][BUILD] Upgrade dropwizard metrics from
 4.2.2 to 4.2.7

### What changes were proposed in this pull request?
This pr upgrade dropwizard metrics from 4.2.2 to 4.2.7.

### Why are the changes needed?
There are 5 versions after 4.2.2, the release notes as follows:

- https://github.com/dropwizard/metrics/releases/tag/v4.2.3
- https://github.com/dropwizard/metrics/releases/tag/v4.2.4
- https://github.com/dropwizard/metrics/releases/tag/v4.2.5
- https://github.com/dropwizard/metrics/releases/tag/v4.2.6
- https://github.com/dropwizard/metrics/releases/tag/v4.2.7

And after 4.2.5, dropwizard metrics supports [build with JDK 17](https://github.com/dropwizard/metrics/pull/2180).

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35317 from LuciferYang/upgrade-metrics.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Kousuke Saruta <sarutak@oss.nttdata.com>
---
 dev/deps/spark-deps-hadoop-2-hive-2.3 | 10 +++++-----
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 10 +++++-----
 pom.xml                               |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index 5efdca9809924..8284237904765 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -195,11 +195,11 @@ logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
 lz4-java/1.8.0//lz4-java-1.8.0.jar
 macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar
 mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar
-metrics-core/4.2.2//metrics-core-4.2.2.jar
-metrics-graphite/4.2.2//metrics-graphite-4.2.2.jar
-metrics-jmx/4.2.2//metrics-jmx-4.2.2.jar
-metrics-json/4.2.2//metrics-json-4.2.2.jar
-metrics-jvm/4.2.2//metrics-jvm-4.2.2.jar
+metrics-core/4.2.7//metrics-core-4.2.7.jar
+metrics-graphite/4.2.7//metrics-graphite-4.2.7.jar
+metrics-jmx/4.2.7//metrics-jmx-4.2.7.jar
+metrics-json/4.2.7//metrics-json-4.2.7.jar
+metrics-jvm/4.2.7//metrics-jvm-4.2.7.jar
 minlog/1.3.0//minlog-1.3.0.jar
 netty-all/4.1.73.Final//netty-all-4.1.73.Final.jar
 netty-buffer/4.1.73.Final//netty-buffer-4.1.73.Final.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index a79a71b846dd1..f1692777dadb1 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -181,11 +181,11 @@ logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
 lz4-java/1.8.0//lz4-java-1.8.0.jar
 macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar
 mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar
-metrics-core/4.2.2//metrics-core-4.2.2.jar
-metrics-graphite/4.2.2//metrics-graphite-4.2.2.jar
-metrics-jmx/4.2.2//metrics-jmx-4.2.2.jar
-metrics-json/4.2.2//metrics-json-4.2.2.jar
-metrics-jvm/4.2.2//metrics-jvm-4.2.2.jar
+metrics-core/4.2.7//metrics-core-4.2.7.jar
+metrics-graphite/4.2.7//metrics-graphite-4.2.7.jar
+metrics-jmx/4.2.7//metrics-jmx-4.2.7.jar
+metrics-json/4.2.7//metrics-json-4.2.7.jar
+metrics-jvm/4.2.7//metrics-jvm-4.2.7.jar
 minlog/1.3.0//minlog-1.3.0.jar
 netty-all/4.1.73.Final//netty-all-4.1.73.Final.jar
 netty-buffer/4.1.73.Final//netty-buffer-4.1.73.Final.jar
diff --git a/pom.xml b/pom.xml
index 5bae4d280038d..09577f220de5c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -147,7 +147,7 @@
     If you changes codahale.metrics.version, you also need to change
     the link to metrics.dropwizard.io in docs/monitoring.md.
     -->
-    <codahale.metrics.version>4.2.2</codahale.metrics.version>
+    <codahale.metrics.version>4.2.7</codahale.metrics.version>
     <avro.version>1.11.0</avro.version>
     <aws.kinesis.client.version>1.12.0</aws.kinesis.client.version>
     <!-- Should be consistent with Kinesis client dependency -->

From a13f79a49fb77dc3c876f551c2c712f2fc69675c Mon Sep 17 00:00:00 2001
From: dch nguyen <dchvn.dgd@gmail.com>
Date: Tue, 25 Jan 2022 22:13:52 +0800
Subject: [PATCH 104/513] [SPARK-37479][SQL] Migrate DROP NAMESPACE to use V2
 command by default

### What changes were proposed in this pull request?
This PR migrates `DROP NAMESPACE` to use V2 command by default.

### Why are the changes needed?
It's been a while since we introduced the v2 commands, and it seems reasonable to use v2 commands by default even for the session catalog, with a legacy config to fall back to the v1 commands.

### Does this PR introduce _any_ user-facing change?
The error message will be different if drop database containing tables with RESTRICT mode when v2 command is run against v1 catalog and Hive Catalog:
Before: `Cannot drop a non-empty database`
vs.
After: `Cannot drop a non-empty namespace`

### How was this patch tested?
Existing *DropNamespaceSuite tests

Closes #35202 from dchvn/migrate_dropnamespace_v2_command_default.

Authored-by: dch nguyen <dchvn.dgd@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/ResolveSessionCatalog.scala      | 2 +-
 .../sql/execution/command/v1/DropNamespaceSuite.scala      | 7 +++++--
 .../sql/hive/execution/command/DropNamespaceSuite.scala    | 1 +
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 3dde9985abbee..6df94f3864552 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -221,7 +221,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       val newProperties = c.properties -- CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES
       CreateDatabaseCommand(name, c.ifNotExists, location, comment, newProperties)
 
-    case d @ DropNamespace(DatabaseInSessionCatalog(db), _, _) =>
+    case d @ DropNamespace(DatabaseInSessionCatalog(db), _, _) if conf.useV1Command =>
       DropDatabaseCommand(db, d.ifExists, d.cascade)
 
     case ShowTables(DatabaseInSessionCatalog(db), pattern, output) if conf.useV1Command =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropNamespaceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropNamespaceSuite.scala
index 24e51317575d3..174ac970be6bc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropNamespaceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropNamespaceSuite.scala
@@ -28,7 +28,8 @@ import org.apache.spark.sql.execution.command
  *   - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.DropNamespaceSuite`
  *   - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.DropNamespaceSuite`
  */
-trait DropNamespaceSuiteBase extends command.DropNamespaceSuiteBase {
+trait DropNamespaceSuiteBase extends command.DropNamespaceSuiteBase
+  with command.TestsV1AndV2Commands {
   override protected def builtinTopNamespaces: Seq[String] = Seq("default")
 
   override protected def namespaceAlias(): String = "database"
@@ -41,4 +42,6 @@ trait DropNamespaceSuiteBase extends command.DropNamespaceSuiteBase {
   }
 }
 
-class DropNamespaceSuite extends DropNamespaceSuiteBase with CommandSuiteBase
+class DropNamespaceSuite extends DropNamespaceSuiteBase with CommandSuiteBase {
+  override def commandVersion: String = super[DropNamespaceSuiteBase].commandVersion
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropNamespaceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropNamespaceSuite.scala
index cabebb9e11510..955fe332cf1d0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropNamespaceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropNamespaceSuite.scala
@@ -25,4 +25,5 @@ import org.apache.spark.sql.execution.command.v1
  */
 class DropNamespaceSuite extends v1.DropNamespaceSuiteBase with CommandSuiteBase {
   override def isCasePreserving: Boolean = false
+  override def commandVersion: String = super[DropNamespaceSuiteBase].commandVersion
 }

From 277322851f3c96f812c7da115f00f66bb6f11f6b Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 25 Jan 2022 09:29:57 -0800
Subject: [PATCH 105/513] [SPARK-38022][K8S][TESTS] Use relativePath for K8s
 remote file test in `BasicTestsSuite`

### What changes were proposed in this pull request?

This PR aims to use `relativePath` for K8s remote file test in `BasicTestsSuite`.

### Why are the changes needed?

To make `Run SparkRemoteFileTest using a remote data file` test pass.

**BEFORE**
```
$ build/sbt -Pkubernetes -Pkubernetes-integration-tests -Dspark.kubernetes.test.dockerFile=resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 -Dtest.exclude.tags=minikube,r "kubernetes-integration-tests/test"
...
[info] KubernetesSuite:
...
[info] - Run SparkRemoteFileTest using a remote data file *** FAILED *** (3 minutes, 3 seconds)
[info]   The code passed to eventually never returned normally. Attempted 190 times over 3.012265011116667 minutes. Last failure message: false was not true. (KubernetesSuite.scala:452)
...
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

```
$ build/sbt -Pkubernetes -Pkubernetes-integration-tests -Dspark.kubernetes.test.dockerFile=resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 -Dtest.exclude.tags=minikube,r "kubernetes-integration-tests/test"
...
[info] KubernetesSuite:
...
[info] - Run SparkRemoteFileTest using a remote data file (8 seconds, 608 milliseconds)
...
```

Closes #35318 from dongjoon-hyun/SPARK-38022.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/deploy/k8s/integrationtest/BasicTestsSuite.scala    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
index d704ef753ed63..217359b3da1bf 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
@@ -119,8 +119,8 @@ private[spark] trait BasicTestsSuite { k8sSuite: KubernetesSuite =>
   test("Run SparkRemoteFileTest using a remote data file", k8sTestTag) {
     assert(sys.props.contains("spark.test.home"), "spark.test.home is not set!")
     TestUtils.withHttpServer(sys.props("spark.test.home")) { baseURL =>
-      sparkAppConf
-        .set("spark.files", baseURL.toString + REMOTE_PAGE_RANK_DATA_FILE)
+      sparkAppConf.set("spark.files", baseURL.toString +
+          REMOTE_PAGE_RANK_DATA_FILE.replace(sys.props("spark.test.home"), "").substring(1))
       runSparkRemoteCheckAndVerifyCompletion(appArgs = Array(REMOTE_PAGE_RANK_FILE_NAME))
     }
   }

From 9887d0f7f55157da1b9f55d7053cc6c78ea3cdc5 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 25 Jan 2022 14:34:56 -0800
Subject: [PATCH 106/513] [SPARK-38023][CORE]
 `ExecutorMonitor.onExecutorRemoved` should handle `ExecutorDecommission` as
 finished

### What changes were proposed in this pull request?

Although SPARK-36614 (https://github.com/apache/spark/pull/33868) fixed the UI issue, it made a regression where the `K8s integration test` has been broken and shows a wrong metrics and message to the users. After `Finished decommissioning`, it's still counted it as `unfinished`. This PR aims to fix this bug.

**BEFORE**
```
22/01/25 13:05:16 DEBUG KubernetesClusterSchedulerBackend$KubernetesDriverEndpoint:
Asked to remove executor 1 with reason Finished decommissioning
...
22/01/25 13:05:16 INFO ExecutorMonitor: Executor 1 is removed.
Remove reason statistics: (gracefully decommissioned: 0, decommision unfinished: 1, driver killed: 0, unexpectedly exited: 0).
```

**AFTER**
```
Remove reason statistics: (gracefully decommissioned: 1, decommision unfinished: 0, driver killed: 0, unexpectedly exited: 0).
```

### Why are the changes needed?

```
$ build/sbt -Pkubernetes -Pkubernetes-integration-tests -Dspark.kubernetes.test.dockerFile=resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 -Dtest.exclude.tags=minikube,r "kubernetes-integration-tests/test"
```

**BEFORE**
The corresponding test case hangs and fails.
```
[info] KubernetesSuite:
...
[info] *** Test still running after 2 minutes, 13 seconds: suite name: KubernetesSuite, test name: Test decommissioning with dynamic allocation & shuffle cleanups.
// Eventually fails
...
```

**AFTER**
```
[info] KubernetesSuite:
...
[info] - Test decommissioning with dynamic allocation & shuffle cleanups (2 minutes, 41 seconds)
...
```

### Does this PR introduce _any_ user-facing change?

Yes, this is a regression bug fix.

### How was this patch tested?

Manually because this should be verified via the K8s integration test
```
$ build/sbt -Pkubernetes -Pkubernetes-integration-tests -Dspark.kubernetes.test.dockerFile=resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 -Dtest.exclude.tags=minikube,r "kubernetes-integration-tests/test"
```

Closes #35321 from dongjoon-hyun/SPARK-38023.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala  | 3 ++-
 .../spark/deploy/k8s/integrationtest/DecommissionSuite.scala   | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala b/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala
index 3dea64c34a327..def63b9ead183 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala
@@ -356,7 +356,8 @@ private[spark] class ExecutorMonitor(
     if (removed != null) {
       decrementExecResourceProfileCount(removed.resourceProfileId)
       if (removed.decommissioning) {
-        if (event.reason == ExecutorLossMessage.decommissionFinished) {
+        if (event.reason == ExecutorLossMessage.decommissionFinished ||
+            event.reason == ExecutorDecommission().message) {
           metrics.gracefullyDecommissioned.inc()
         } else {
           metrics.decommissionUnfinished.inc()
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
index 9605f6c42a45d..ca6108daa4de4 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
@@ -151,7 +151,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite =>
           val client = kubernetesTestComponents.kubernetesClient
           // The label will be added eventually, but k8s objects don't refresh.
           Eventually.eventually(
-            PatienceConfiguration.Timeout(Span(1200, Seconds)),
+            PatienceConfiguration.Timeout(Span(120, Seconds)),
             PatienceConfiguration.Interval(Span(1, Seconds))) {
 
             val currentPod = client.pods().withName(pod.getMetadata.getName).get

From a722c6d199a73d570eb95f8f80d545949c917c75 Mon Sep 17 00:00:00 2001
From: Weichen Xu <weichen.xu@databricks.com>
Date: Wed, 26 Jan 2022 08:49:55 +0900
Subject: [PATCH 107/513] [MINOR][ML][TESTS] Increase timeout for mllib
 streaming test

### What changes were proposed in this pull request?
Increase timeout for mllib streaming test. Current timemout too short make test flaky in some cases.

### Why are the changes needed?
Address test flakiness.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Unit tests.

Closes #35322 from WeichenXu123/increase_timeout.

Authored-by: Weichen Xu <weichen.xu@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../apache/spark/mllib/clustering/StreamingKMeansSuite.scala    | 2 +-
 .../spark/mllib/regression/StreamingLinearRegressionSuite.scala | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
index 415ac87275390..ea7c52bba6c3f 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.util.random.XORShiftRandom
 
 class StreamingKMeansSuite extends SparkFunSuite with LocalStreamingContext with TestSuiteBase {
 
-  override def maxWaitTimeMillis: Int = 30000
+  override def maxWaitTimeMillis: Int = 100000
 
   test("accuracy for single center and equivalence to grand average") {
     // set parameters
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
index b8342f84be44b..5ad425467d85c 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
@@ -31,7 +31,7 @@ class StreamingLinearRegressionSuite
   with TestSuiteBase {
 
   // use longer wait time to ensure job completion
-  override def maxWaitTimeMillis: Int = 60000
+  override def maxWaitTimeMillis: Int = 100000
 
   // Assert that two values are equal within tolerance epsilon
   def assertEqual(v1: Double, v2: Double, epsilon: Double): Unit = {

From cff69219627ab53c99c07921df55756fd7eea4a9 Mon Sep 17 00:00:00 2001
From: Cheng Su <chengsu@fb.com>
Date: Wed, 26 Jan 2022 09:53:47 +0900
Subject: [PATCH 108/513] [SPARK-38015][CORE] Mark legacy file naming functions
 as deprecated in FileCommitProtocol
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

FileCommitProtocol is the class to commit Spark job output (staging file & directory renaming, etc). During Spark 3.2 development, we added new functions into this class to allow more flexible output file naming. We didn’t delete the existing file naming functions (newTaskTempFile(ext) & newTaskTempFileAbsPath(ext)), because we were aware of many other downstream projects or codebases already implemented their own custom implementation for FileCommitProtocol. Delete the existing functions would be a breaking change for them when upgrading Spark version, and we would like to avoid this unpleasant surprise for anyone if possible. But we also need to clean up legacy as we evolve our codebase.

So for next step, I would like to propose:

Spark 3.3 (now): Add deprecate annotation to legacy functions in FileCommitProtocol - newTaskTempFile(ext) & newTaskTempFileAbsPath(ext).

### Why are the changes needed?

Clean up codebase.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing unit tests.

Closes #35311 from c21/file-naming.

Authored-by: Cheng Su <chengsu@fb.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/internal/io/FileCommitProtocol.scala | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
index 5cd7397ea358f..e2a96267082b8 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
@@ -95,6 +95,7 @@ abstract class FileCommitProtocol extends Logging {
    * if a task is going to write out multiple files to the same dir. The file commit protocol only
    * guarantees that files written by different tasks will not conflict.
    */
+  @deprecated("use newTaskTempFile(..., spec: FileNameSpec) instead", "3.3.0")
   def newTaskTempFile(taskContext: TaskAttemptContext, dir: Option[String], ext: String): String
 
   /**
@@ -132,6 +133,7 @@ abstract class FileCommitProtocol extends Logging {
    * if a task is going to write out multiple files to the same dir. The file commit protocol only
    * guarantees that files written by different tasks will not conflict.
    */
+  @deprecated("use newTaskTempFileAbsPath(..., spec: FileNameSpec) instead", "3.3.0")
   def newTaskTempFileAbsPath(
       taskContext: TaskAttemptContext, absoluteDir: String, ext: String): String
 

From 69c213d3568d665ce239a4aa20568e89081d2419 Mon Sep 17 00:00:00 2001
From: William Hyun <william@apache.org>
Date: Tue, 25 Jan 2022 18:37:05 -0800
Subject: [PATCH 109/513] [SPARK-38029][K8S][TESTS] Support K8S integration
 test in SBT

### What changes were proposed in this pull request?
This PR aims to support K8S integration test in SBT.

### Why are the changes needed?
Currently, SBT only support `minikube` in a hard-coded way.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manually, because this is an integration test.

Closes #35327 from williamhyun/sbt_k8s.

Authored-by: William Hyun <william@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 project/SparkBuild.scala | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 4130c6a1c73d6..02ffa236c8722 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -604,8 +604,8 @@ object DockerIntegrationTests {
 }
 
 /**
- * These settings run a hardcoded configuration of the Kubernetes integration tests using
- * minikube. Docker images will have the "dev" tag, and will be overwritten every time the
+ * These settings run the Kubernetes integration tests.
+ * Docker images will have the "dev" tag, and will be overwritten every time the
  * integration tests are run. The integration tests are actually bound to the "test" phase,
  * so running "test" on this module will run the integration tests.
  *
@@ -625,6 +625,7 @@ object KubernetesIntegrationTests {
   val runITs = TaskKey[Unit]("run-its", "Only run ITs, skip image build.")
   val imageTag = settingKey[String]("Tag to use for images built during the test.")
   val namespace = settingKey[String]("Namespace where to run pods.")
+  val deployMode = sys.props.get("spark.kubernetes.test.deployMode")
 
   // Hack: this variable is used to control whether to build docker images. It's updated by
   // the tasks below in a non-obvious way, so that you get the functionality described in
@@ -645,10 +646,11 @@ object KubernetesIntegrationTests {
         } else {
           Seq("-b", s"java_image_tag=$javaImageTag")
         }
-        val cmd = Seq(dockerTool, "-m",
+        val cmd = Seq(dockerTool,
           "-t", imageTag.value,
           "-p", s"$bindingsDir/python/Dockerfile",
           "-R", s"$bindingsDir/R/Dockerfile") ++
+          (if (deployMode == Some("docker-for-desktop")) Seq.empty else Seq("-m")) ++
           extraOptions :+
           "build"
         val ec = Process(cmd).!
@@ -666,7 +668,7 @@ object KubernetesIntegrationTests {
     }.value,
     (Test / test) := (Test / test).dependsOn(dockerBuild).value,
     (Test / javaOptions) ++= Seq(
-      "-Dspark.kubernetes.test.deployMode=minikube",
+      s"-Dspark.kubernetes.test.deployMode=${deployMode.getOrElse("minikube")}",
       s"-Dspark.kubernetes.test.imageTag=${imageTag.value}",
       s"-Dspark.kubernetes.test.namespace=${namespace.value}",
       s"-Dspark.kubernetes.test.unpackSparkDir=$sparkHome"

From 94df0d51b2d48d4273ef956aa833db1aa87224a6 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 25 Jan 2022 18:39:31 -0800
Subject: [PATCH 110/513] [SPARK-38028][SQL] Expose Arrow Vector from
 ArrowColumnVector

### What changes were proposed in this pull request?

This change exposes Arrow Vector from `ArrowColumnVector`.

### Why are the changes needed?

In some cases we need to work with Arrow Vectors behind `ColumnVector` using Arrow APIs. For example, some Spark extension libraries need to consume Arrow Vectors. For now, it is impossible as the Arrow Vector is private member in `ArrowColumnVector`. We need to expose the Arrow Vector from `ArrowColumnVector`.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing tests.

Closes #35326 from viirya/arrow_vector.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../java/org/apache/spark/sql/vectorized/ArrowColumnVector.java | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
index 9aee1050370da..89daee1cbbfc7 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
@@ -33,6 +33,8 @@ public final class ArrowColumnVector extends ColumnVector {
   private final ArrowVectorAccessor accessor;
   private ArrowColumnVector[] childColumns;
 
+  public ValueVector getValueVector() { return accessor.vector; }
+
   @Override
   public boolean hasNull() {
     return accessor.getNullCount() > 0;

From 7e5c3b216431b6a5e9a0786bf7cded694228cdee Mon Sep 17 00:00:00 2001
From: Ivan Karol <reexar1@gmail.com>
Date: Tue, 25 Jan 2022 19:14:24 -0800
Subject: [PATCH 111/513] [SPARK-30062][SQL] Add the IMMEDIATE statement to the
 DB2 dialect truncate implementation

### What changes were proposed in this pull request?
I've added a DB2 specific truncate implementation that adds an IMMEDIATE statement at the end of the query.

### Why are the changes needed?
I've encountered this issue myself while working with DB2 and trying to use truncate functionality.
A quick google search shows that some people have also encountered this issue before:
https://stackoverflow.com/questions/70027567/overwrite-mode-does-not-work-in-spark-sql-while-adding-data-in-db2
https://issues.apache.org/jira/browse/SPARK-30062

By looking into DB2 docs it becomes apparent that the IMMEDIATE statement is only optional if the table is column organized(though I'm not sure if it applies to all DB2 versions). So for the cases(such as mine) where the table is not column organized adding an IMMEDIATE statement becomes essential for the query to work.
https://www.ibm.com/support/knowledgecenter/en/SSEPGG_11.5.0/com.ibm.db2.luw.sql.ref.doc/doc/r0053474.html

Also, that might not be the best example, but I've found that DbVisualizer does add an IMMEDIATE statement at the end of the truncate command. Though, does it only for versions that are >=9.7
https://fossies.org/linux/dbvis/resources/profiles/db2.xml (please look at line number 473)

### Does this PR introduce _any_ user-facing change?
It should not, as even though the docs mention that if the TRUNCATE statement is executed in conjunction with IMMEDIATE, it has to be the first statement in the transaction, the JDBC connection that is established to execute the TRUNCATE statement has the auto-commit mode turned on. This means that there won't be any other query/statement executed prior within the same transaction.
https://www.ibm.com/docs/en/db2/11.5?topic=statements-truncate (see the description for IMMEDIATE)
https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcRelationProvider.scala#L49
https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcRelationProvider.scala#L57
https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala#L108

### How was this patch tested?
Existing test case with slightly adjusted logic.

Closes #35283 from ikarol/SPARK-30062.

Authored-by: Ivan Karol <reexar1@gmail.com>
Signed-off-by: huaxingao <huaxin.gao11@gmail.com>
---
 .../spark/sql/jdbc/DB2IntegrationSuite.scala  | 21 ++++++++++++++++++-
 .../apache/spark/sql/jdbc/DB2Dialect.scala    |  9 ++++++++
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala |  8 +++++--
 3 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
index 59eb49dc303df..6cee6622e1c1f 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
@@ -23,7 +23,7 @@ import java.util.Properties
 
 import org.scalatest.time.SpanSugar._
 
-import org.apache.spark.sql.Row
+import org.apache.spark.sql.{Row, SaveMode}
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.types.{BooleanType, ByteType, ShortType, StructType}
 import org.apache.spark.tags.DockerTest
@@ -198,4 +198,23 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite {
        """.stripMargin.replaceAll("\n", " "))
     assert(sql("select x, y from queryOption").collect.toSet == expectedResult)
   }
+
+  test("SPARK-30062") {
+    val expectedResult = Set(
+      (42, "fred"),
+      (17, "dave")
+    ).map { case (x, y) =>
+      Row(Integer.valueOf(x), String.valueOf(y))
+    }
+    val df = sqlContext.read.jdbc(jdbcUrl, "tbl", new Properties)
+    for (_ <- 0 to 2) {
+      df.write.mode(SaveMode.Append).jdbc(jdbcUrl, "tblcopy", new Properties)
+    }
+    assert(sqlContext.read.jdbc(jdbcUrl, "tblcopy", new Properties).count === 6)
+    df.write.mode(SaveMode.Overwrite).option("truncate", true)
+      .jdbc(jdbcUrl, "tblcopy", new Properties)
+    val actual = sqlContext.read.jdbc(jdbcUrl, "tblcopy", new Properties).collect
+    assert(actual.length === 2)
+    assert(actual.toSet === expectedResult)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
index 9e9aac679ab39..307aa511cc152 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
@@ -65,6 +65,15 @@ private object DB2Dialect extends JdbcDialect {
 
   override def isCascadingTruncateTable(): Option[Boolean] = Some(false)
 
+  // scalastyle:off line.size.limit
+  // See https://www.ibm.com/support/knowledgecenter/en/SSEPGG_11.5.0/com.ibm.db2.luw.sql.ref.doc/doc/r0053474.html
+  // scalastyle:on line.size.limit
+  override def getTruncateQuery(
+      table: String,
+      cascade: Option[Boolean] = isCascadingTruncateTable): String = {
+    s"TRUNCATE TABLE $table IMMEDIATE"
+  }
+
   // scalastyle:off line.size.limit
   // See https://www.ibm.com/support/knowledgecenter/en/SSEPGG_11.5.0/com.ibm.db2.luw.sql.ref.doc/doc/r0000980.html
   // scalastyle:on line.size.limit
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index f4b18f1adfdec..18e1f8c1aa67f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -1008,14 +1008,16 @@ class JDBCSuite extends QueryTest
     val defaultQuery = s"TRUNCATE TABLE $table"
     val postgresQuery = s"TRUNCATE TABLE ONLY $table"
     val teradataQuery = s"DELETE FROM $table ALL"
+    val db2Query = s"TRUNCATE TABLE $table IMMEDIATE"
 
-    Seq(mysql, db2, h2, derby).foreach{ dialect =>
+    Seq(mysql, h2, derby).foreach{ dialect =>
       assert(dialect.getTruncateQuery(table, Some(true)) == defaultQuery)
     }
 
     assert(postgres.getTruncateQuery(table) == postgresQuery)
     assert(oracle.getTruncateQuery(table) == defaultQuery)
     assert(teradata.getTruncateQuery(table) == teradataQuery)
+    assert(db2.getTruncateQuery(table) == db2Query)
   }
 
   test("SPARK-22880: Truncate table with CASCADE by jdbc dialect") {
@@ -1034,13 +1036,15 @@ class JDBCSuite extends QueryTest
     val postgresQuery = s"TRUNCATE TABLE ONLY $table CASCADE"
     val oracleQuery = s"TRUNCATE TABLE $table CASCADE"
     val teradataQuery = s"DELETE FROM $table ALL"
+    val db2Query = s"TRUNCATE TABLE $table IMMEDIATE"
 
-    Seq(mysql, db2, h2, derby).foreach{ dialect =>
+    Seq(mysql, h2, derby).foreach{ dialect =>
       assert(dialect.getTruncateQuery(table, Some(true)) == defaultQuery)
     }
     assert(postgres.getTruncateQuery(table, Some(true)) == postgresQuery)
     assert(oracle.getTruncateQuery(table, Some(true)) == oracleQuery)
     assert(teradata.getTruncateQuery(table, Some(true)) == teradataQuery)
+    assert(db2.getTruncateQuery(table, Some(true)) == db2Query)
   }
 
   test("Test DataFrame.where for Date and Timestamp") {

From 660d2ab07071238dadfd243cd763bb7706637518 Mon Sep 17 00:00:00 2001
From: allisonwang-db <allison.wang@databricks.com>
Date: Wed, 26 Jan 2022 12:36:50 +0800
Subject: [PATCH 112/513] [SPARK-38003][SQL] LookupFunctions rule should only
 look up functions from the scalar function registry

### What changes were proposed in this pull request?
This PR updates the Analyzer rule `LookupFunctions` to only use scalar function registry instead of using both scalar function and table function registries, since currently LookupFunctions only handles scalar functions.

### Why are the changes needed?

To make the error message consistent when a scalar function does not exist.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing unit tests.

Closes #35304 from allisonwang-db/spark-38003-lookup-func.

Authored-by: allisonwang-db <allison.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 11 +++++++++-
 .../sql/catalyst/catalog/SessionCatalog.scala | 22 +++++++++++++------
 .../results/postgreSQL/window_part3.sql.out   |  2 +-
 3 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index d31f90aa2acf0..0390131172bb6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2045,7 +2045,8 @@ class Analyzer(override val catalogManager: CatalogManager)
       _.containsAnyPattern(UNRESOLVED_FUNC, UNRESOLVED_FUNCTION, GENERATOR), ruleId) {
       // Resolve functions with concrete relations from v2 catalog.
       case u @ UnresolvedFunc(nameParts, cmd, requirePersistentFunc, mismatchHint, _) =>
-        lookupBuiltinOrTempFunction(nameParts).map { info =>
+        lookupBuiltinOrTempFunction(nameParts)
+          .orElse(lookupBuiltinOrTempTableFunction(nameParts)).map { info =>
           if (requirePersistentFunc) {
             throw QueryCompilationErrors.expectPersistentFuncError(
               nameParts.head, cmd, mismatchHint, u)
@@ -2116,6 +2117,14 @@ class Analyzer(override val catalogManager: CatalogManager)
       }
     }
 
+    def lookupBuiltinOrTempTableFunction(name: Seq[String]): Option[ExpressionInfo] = {
+      if (name.length == 1) {
+        v1SessionCatalog.lookupBuiltinOrTempTableFunction(name.head)
+      } else {
+        None
+      }
+    }
+
     private def resolveBuiltinOrTempFunction(
         name: Seq[String],
         arguments: Seq[Expression],
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index ad007f1d5cfd4..464768ac7ce2b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1553,18 +1553,24 @@ class SessionCatalog(
 
   /**
    * Look up the `ExpressionInfo` of the given function by name if it's a built-in or temp function.
-   * This supports both scalar and table functions.
+   * This only supports scalar functions.
    */
   def lookupBuiltinOrTempFunction(name: String): Option[ExpressionInfo] = {
     FunctionRegistry.builtinOperators.get(name.toLowerCase(Locale.ROOT)).orElse {
-      def lookup(ident: FunctionIdentifier): Option[ExpressionInfo] = {
-        functionRegistry.lookupFunction(ident).orElse(
-          tableFunctionRegistry.lookupFunction(ident))
-      }
-      synchronized(lookupTempFuncWithViewContext(name, isBuiltinFunction, lookup))
+      synchronized(lookupTempFuncWithViewContext(
+        name, FunctionRegistry.builtin.functionExists, functionRegistry.lookupFunction))
     }
   }
 
+  /**
+   * Look up the `ExpressionInfo` of the given function by name if it's a built-in or
+   * temp table function.
+   */
+  def lookupBuiltinOrTempTableFunction(name: String): Option[ExpressionInfo] = synchronized {
+    lookupTempFuncWithViewContext(
+      name, TableFunctionRegistry.builtin.functionExists, tableFunctionRegistry.lookupFunction)
+  }
+
   /**
    * Look up a built-in or temp scalar function by name and resolves it to an Expression if such
    * a function exists.
@@ -1709,7 +1715,9 @@ class SessionCatalog(
    */
   def lookupFunctionInfo(name: FunctionIdentifier): ExpressionInfo = synchronized {
     if (name.database.isEmpty) {
-      lookupBuiltinOrTempFunction(name.funcName).getOrElse(lookupPersistentFunction(name))
+      lookupBuiltinOrTempFunction(name.funcName)
+        .orElse(lookupBuiltinOrTempTableFunction(name.funcName))
+        .getOrElse(lookupPersistentFunction(name))
     } else {
       lookupPersistentFunction(name)
     }
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
index a76b4088fb818..a182f9e82c61c 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
@@ -374,7 +374,7 @@ SELECT range(1, 100) OVER () FROM empsalary
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Undefined function: 'range'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+Undefined function: range. This function is neither a built-in/temporary function, nor a persistent function that is qualified as spark_catalog.default.range.; line 1 pos 7
 
 
 -- !query

From 543e008f57d6edc348cf1545de530c9c950b08d6 Mon Sep 17 00:00:00 2001
From: Cheng Su <chengsu@fb.com>
Date: Wed, 26 Jan 2022 13:21:20 +0800
Subject: [PATCH 113/513] [SPARK-37896][SQL][FOLLOWUP] Fix NPE in
 ConstantColumnVector.close()

### What changes were proposed in this pull request?

This PR is a followup of https://github.com/apache/spark/pull/35068 to fix the null pointer exception when calling `ConstantColumnVector.close()`. `ConstantColumnVector.childData` can be null for e.g. non-struct data type.

### Why are the changes needed?

Fix the exception when cleaning up column vector.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Modified unit test in `ConstantColumnVectorSuite.scala` to exercise the code path of `ConstantColumnVector.close()` for every tested data type. Without the fix, the unit test throws NPE.

Closes #35324 from c21/constant-fix.

Authored-by: Cheng Su <chengsu@fb.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/vectorized/ConstantColumnVector.java  | 13 +++++++++----
 .../vectorized/ConstantColumnVectorSuite.scala      |  4 +++-
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java
index 134cb05c1265c..3a5dea479cab5 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java
@@ -70,12 +70,17 @@ public ConstantColumnVector(int numRows, DataType type) {
 
   @Override
   public void close() {
+    stringData = null;
     byteArrayData = null;
-    for (int i = 0; i < childData.length; i++) {
-      childData[i].close();
-      childData[i] = null;
+    if (childData != null) {
+      for (int i = 0; i < childData.length; i++) {
+        if (childData[i] != null) {
+          childData[i].close();
+          childData[i] = null;
+        }
+      }
+      childData = null;
     }
-    childData = null;
     arrayData = null;
     mapData = null;
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ConstantColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ConstantColumnVectorSuite.scala
index c8438f342d256..2bee643df4eff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ConstantColumnVectorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ConstantColumnVectorSuite.scala
@@ -27,7 +27,9 @@ class ConstantColumnVectorSuite extends SparkFunSuite {
   private def testVector(name: String, size: Int, dt: DataType)
     (f: ConstantColumnVector => Unit): Unit = {
     test(name) {
-      f(new ConstantColumnVector(size, dt))
+      val vector = new ConstantColumnVector(size, dt)
+      f(vector)
+      vector.close()
     }
   }
 

From a4e2bf9908edb13ffe7e17511efc8561f2b06f28 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Wed, 26 Jan 2022 13:21:57 +0800
Subject: [PATCH 114/513] [SPARK-37636][SQL][FOLLOW-UP] Move handling Hive
 exceptions for create/drop database to HiveClientImpl

### What changes were proposed in this pull request?

#35113 introduced `HiveExternalCatalog.withClientWrappingException` to to wrap a Hive exception to a Spark exception. However, there was a limitation of testing it against different Hive versions as outlined in  https://github.com/apache/spark/pull/35173#discussion_r782644742. This PR proposes to revert `HiveExternalCatalog.withClientWrappingException` and move wrapping logic to `HiveClientImpl`.

### Why are the changes needed?

1. To make testing against different Hive versions better.
2. For Hive 0.12, the wrapping logic was not working correctly for `dropDatabase` because the message was not matching.

### Does this PR introduce _any_ user-facing change?

Yes, for Hive 0.12, the exception message will now be consistent with other versions.
Before:
`InvalidOperationException(message:Database db is not empty)`
After:
`Cannot drop a non-empty database: db. Use CASCADE option to drop a non-empty database`

### How was this patch tested?

Added a new test coverage.

Closes #35173 from imback82/drop_db_withClientWrappingException.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/hive/HiveExternalCatalog.scala  | 47 +++----------------
 .../sql/hive/client/HiveClientImpl.scala      | 16 +++++--
 .../spark/sql/hive/client/VersionsSuite.scala | 18 +++----
 3 files changed, 30 insertions(+), 51 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 24e60529d227b..5fccce2678f86 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -36,12 +36,11 @@ import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{DatabaseAlreadyExistsException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils}
-import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.{PartitioningUtils, SourceOptions}
 import org.apache.spark.sql.hive.client.HiveClient
@@ -94,22 +93,10 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
   }
 
   /**
-   * Run some code involving `client` in a [[synchronized]] block and wrap non-fatal
+   * Run some code involving `client` in a [[synchronized]] block and wrap certain
    * exceptions thrown in the process in [[AnalysisException]].
    */
-  private def withClient[T](body: => T): T = withClientWrappingException {
-    body
-  } {
-    _ => None // Will fallback to default wrapping strategy in withClientWrappingException.
-  }
-
-  /**
-   * Run some code involving `client` in a [[synchronized]] block and wrap non-fatal
-   * exceptions thrown in the process in [[AnalysisException]] using the given
-   * `wrapException` function.
-   */
-  private def withClientWrappingException[T](body: => T)
-      (wrapException: Throwable => Option[AnalysisException]): T = synchronized {
+  private def withClient[T](body: => T): T = synchronized {
     try {
       body
     } catch {
@@ -120,11 +107,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
           case i: InvocationTargetException => i.getCause
           case o => o
         }
-        wrapException(e) match {
-          case Some(wrapped) => throw wrapped
-          case None => throw new AnalysisException(
-            e.getClass.getCanonicalName + ": " + e.getMessage, cause = Some(e))
-        }
+        throw new AnalysisException(
+          e.getClass.getCanonicalName + ": " + e.getMessage, cause = Some(e))
     }
   }
 
@@ -204,32 +188,15 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
 
   override def createDatabase(
       dbDefinition: CatalogDatabase,
-      ignoreIfExists: Boolean): Unit = withClientWrappingException {
+      ignoreIfExists: Boolean): Unit = withClient {
     client.createDatabase(dbDefinition, ignoreIfExists)
-  } { exception =>
-    if (exception.getClass.getName.equals(
-          "org.apache.hadoop.hive.metastore.api.AlreadyExistsException")
-        && exception.getMessage.contains(
-          s"Database ${dbDefinition.name} already exists")) {
-      Some(new DatabaseAlreadyExistsException(dbDefinition.name))
-    } else {
-      None
-    }
   }
 
   override def dropDatabase(
       db: String,
       ignoreIfNotExists: Boolean,
       cascade: Boolean): Unit = withClient {
-    try {
-      client.dropDatabase(db, ignoreIfNotExists, cascade)
-    } catch {
-      case NonFatal(exception) =>
-        if (exception.getClass.getName.equals("org.apache.hadoop.hive.ql.metadata.HiveException")
-          && exception.getMessage.contains(s"Database $db is not empty.")) {
-          throw QueryCompilationErrors.cannotDropNonemptyDatabaseError(db)
-        } else throw exception
-    }
+    client.dropDatabase(db, ignoreIfNotExists, cascade)
   }
 
   /**
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 9c9a4fd2b3741..3dddca844750d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -49,7 +49,7 @@ import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.internal.Logging
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPartitionException, NoSuchPartitionsException, NoSuchTableException, PartitionAlreadyExistsException, PartitionsAlreadyExistException}
+import org.apache.spark.sql.catalyst.analysis.{DatabaseAlreadyExistsException, NoSuchDatabaseException, NoSuchPartitionException, NoSuchPartitionsException, NoSuchTableException, PartitionAlreadyExistsException, PartitionsAlreadyExistException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.Expression
@@ -332,14 +332,24 @@ private[hive] class HiveClientImpl(
       database: CatalogDatabase,
       ignoreIfExists: Boolean): Unit = withHiveState {
     val hiveDb = toHiveDatabase(database, Some(userName))
-    shim.createDatabase(client, hiveDb, ignoreIfExists)
+    try {
+      shim.createDatabase(client, hiveDb, ignoreIfExists)
+    } catch {
+      case _: AlreadyExistsException =>
+        throw new DatabaseAlreadyExistsException(database.name)
+    }
   }
 
   override def dropDatabase(
       name: String,
       ignoreIfNotExists: Boolean,
       cascade: Boolean): Unit = withHiveState {
-    shim.dropDatabase(client, name, true, ignoreIfNotExists, cascade)
+    try {
+      shim.dropDatabase(client, name, true, ignoreIfNotExists, cascade)
+    } catch {
+      case e: HiveException if e.getMessage.contains(s"Database $name is not empty") =>
+        throw QueryCompilationErrors.cannotDropNonemptyDatabaseError(name)
+    }
   }
 
   override def alterDatabase(database: CatalogDatabase): Unit = withHiveState {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index 14b2a51bff8c0..422a905f69b7c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPermanentFunctionException, PartitionsAlreadyExistException}
+import org.apache.spark.sql.catalyst.analysis.{DatabaseAlreadyExistsException, NoSuchDatabaseException, NoSuchPermanentFunctionException, PartitionsAlreadyExistException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal}
 import org.apache.spark.sql.catalyst.util.quietly
@@ -184,14 +184,8 @@ class VersionsSuite extends SparkFunSuite with Logging {
         "temporary", description = "test create", tempDatabasePath, Map())
       client.createDatabase(tempDB, ignoreIfExists = true)
 
-      try {
+      intercept[DatabaseAlreadyExistsException] {
         client.createDatabase(tempDB, ignoreIfExists = false)
-        assert(false, "createDatabase should throw AlreadyExistsException")
-      } catch {
-        case ex: Throwable =>
-          assert(ex.getClass.getName.equals(
-            "org.apache.hadoop.hive.metastore.api.AlreadyExistsException"))
-          assert(ex.getMessage.contains(s"Database ${tempDB.name} already exists"))
       }
     }
 
@@ -275,6 +269,14 @@ class VersionsSuite extends SparkFunSuite with Logging {
 
     test(s"$version: dropDatabase") {
       assert(client.databaseExists("temporary"))
+
+      client.createTable(table("temporary", tableName = "tbl"), ignoreIfExists = false)
+      val ex = intercept[AnalysisException] {
+        client.dropDatabase("temporary", ignoreIfNotExists = false, cascade = false)
+        assert(false, "dropDatabase should throw HiveException")
+      }
+      assert(ex.message.contains("Cannot drop a non-empty database: temporary."))
+
       client.dropDatabase("temporary", ignoreIfNotExists = false, cascade = true)
       assert(client.databaseExists("temporary") == false)
     }

From e4b1984dc502e8be7a9832cce52b9501be4d7015 Mon Sep 17 00:00:00 2001
From: Jiaan Geng <beliefer@163.com>
Date: Wed, 26 Jan 2022 13:29:23 +0800
Subject: [PATCH 115/513] [SPARK-37929][SQL][FOLLOWUP] Support cascade mode for
 JDBC V2

### What changes were proposed in this pull request?
https://github.com/apache/spark/pull/35246 support `cascade` mode for dropNamespace API.
This PR followup https://github.com/apache/spark/pull/35246 to make JDBC V2 respect `cascade`.

### Why are the changes needed?
Let JDBC V2 respect `cascade`.

### Does this PR introduce _any_ user-facing change?
Yes.
Users could manipulate `drop namespace` with `cascade` on JDBC V2.

### How was this patch tested?
New tests.

Closes #35271 from beliefer/SPARK-37929-followup.

Authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/jdbc/v2/V2JDBCNamespaceTest.scala     | 34 ++++++++++++++++++-
 .../datasources/jdbc/JdbcUtils.scala          | 10 ++++--
 .../v2/jdbc/JDBCTableCatalog.scala            |  5 +--
 .../spark/sql/jdbc/PostgresDialect.scala      |  3 +-
 4 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
index 0c6b2701c92b0..4f56f1f4ea1e7 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
@@ -17,21 +17,31 @@
 
 package org.apache.spark.sql.jdbc.v2
 
+import java.util
+import java.util.Collections
+
 import scala.collection.JavaConverters._
 
 import org.apache.logging.log4j.Level
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.connector.catalog.NamespaceChange
+import org.apache.spark.sql.catalyst.analysis.NonEmptyNamespaceException
+import org.apache.spark.sql.connector.catalog.{Identifier, NamespaceChange}
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
 import org.apache.spark.sql.jdbc.DockerIntegrationFunSuite
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
 import org.apache.spark.tags.DockerTest
 
 @DockerTest
 private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession with DockerIntegrationFunSuite {
   val catalog = new JDBCTableCatalog()
 
+  private val emptyProps: util.Map[String, String] = Collections.emptyMap[String, String]
+  private val schema: StructType = new StructType()
+    .add("id", IntegerType)
+    .add("data", StringType)
+
   def builtinNamespaces: Array[Array[String]]
 
   test("listNamespaces: basic behavior") {
@@ -60,4 +70,26 @@ private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession with DockerInte
     }.getMessage
     assert(msg.contains("Namespace 'foo' not found"))
   }
+
+  test("Drop namespace") {
+    val ident1 = Identifier.of(Array("foo"), "tab")
+    // Drop empty namespace without cascade
+    catalog.createNamespace(Array("foo"), Map("comment" -> "test comment").asJava)
+    assert(catalog.namespaceExists(Array("foo")) === true)
+    catalog.dropNamespace(Array("foo"), cascade = false)
+    assert(catalog.namespaceExists(Array("foo")) === false)
+
+    // Drop non empty namespace without cascade
+    catalog.createNamespace(Array("foo"), Map("comment" -> "test comment").asJava)
+    assert(catalog.namespaceExists(Array("foo")) === true)
+    catalog.createTable(ident1, schema, Array.empty, emptyProps)
+    intercept[NonEmptyNamespaceException] {
+      catalog.dropNamespace(Array("foo"), cascade = false)
+    }
+
+    // Drop non empty namespace with cascade
+    assert(catalog.namespaceExists(Array("foo")) === true)
+    catalog.dropNamespace(Array("foo"), cascade = true)
+    assert(catalog.namespaceExists(Array("foo")) === false)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 7f68a73f8950a..cc40d19693b4d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -1014,9 +1014,15 @@ object JdbcUtils extends Logging with SQLConfHelper {
   /**
    * Drops a namespace from the JDBC database.
    */
-  def dropNamespace(conn: Connection, options: JDBCOptions, namespace: String): Unit = {
+  def dropNamespace(
+      conn: Connection, options: JDBCOptions, namespace: String, cascade: Boolean): Unit = {
     val dialect = JdbcDialects.get(options.url)
-    executeStatement(conn, options, s"DROP SCHEMA ${dialect.quoteIdentifier(namespace)}")
+    val dropCmd = if (cascade) {
+      s"DROP SCHEMA ${dialect.quoteIdentifier(namespace)} CASCADE"
+    } else {
+      s"DROP SCHEMA ${dialect.quoteIdentifier(namespace)}"
+    }
+    executeStatement(conn, options, dropCmd)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
index 1658f0dce7fbe..d06a28d952b38 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
@@ -282,12 +282,9 @@ class JDBCTableCatalog extends TableCatalog with SupportsNamespaces with Logging
       namespace: Array[String],
       cascade: Boolean): Boolean = namespace match {
     case Array(db) if namespaceExists(namespace) =>
-      if (listTables(Array(db)).nonEmpty) {
-        throw QueryExecutionErrors.namespaceNotEmptyError(namespace)
-      }
       JdbcUtils.withConnection(options) { conn =>
         JdbcUtils.classifyException(s"Failed drop name space: $db", dialect) {
-          JdbcUtils.dropNamespace(conn, options, db)
+          JdbcUtils.dropNamespace(conn, options, db, cascade)
           true
         }
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
index 3b1a2c81fffd6..46e79404f3e54 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
@@ -23,7 +23,7 @@ import java.util.Locale
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.SQLConfHelper
-import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NoSuchIndexException}
+import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NonEmptyNamespaceException, NoSuchIndexException}
 import org.apache.spark.sql.connector.expressions.NamedReference
 import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc}
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
@@ -252,6 +252,7 @@ private object PostgresDialect extends JdbcDialect with SQLConfHelper {
           // https://www.postgresql.org/docs/14/errcodes-appendix.html
           case "42P07" => throw new IndexAlreadyExistsException(message, cause = Some(e))
           case "42704" => throw new NoSuchIndexException(message, cause = Some(e))
+          case "2BP01" => throw NonEmptyNamespaceException(message, cause = Some(e))
           case _ => super.classifyException(message, e)
         }
       case unsupported: UnsupportedOperationException => throw unsupported

From a765a4dc9b711e86b5679ca98f0b547edb0fd3e8 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 26 Jan 2022 15:38:24 +0900
Subject: [PATCH 116/513] [SPARK-38031][PYTHON][DOCS] Update document type
 conversion for Pandas UDFs (pyarrow 6.0.1, pandas 1.4.0, Python 3.9)

### What changes were proposed in this pull request?

This PR updates the chart generated at SPARK-25666.

### Why are the changes needed?

To track the changes in type coercion of PySpark <> PyArrow <> pandas.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Use this code to generate the chart:

```python
from pyspark.sql.types import *
from pyspark.sql.functions import pandas_udf

columns = [
    ('none', 'object(NoneType)'),
    ('bool', 'bool'),
    ('int8', 'int8'),
    ('int16', 'int16'),
    ('int32', 'int32'),
    ('int64', 'int64'),
    ('uint8', 'uint8'),
    ('uint16', 'uint16'),
    ('uint32', 'uint32'),
    ('uint64', 'uint64'),
    ('float64', 'float16'),
    ('float64', 'float32'),
    ('float64', 'float64'),
    ('date', 'datetime64[ns]'),
    ('tz_aware_dates', 'datetime64[ns, US/Eastern]'),
    ('string', 'object(string)'),
    ('decimal', 'object(Decimal)'),
    ('array', 'object(array[int32])'),
    ('float128', 'float128'),
    ('complex64', 'complex64'),
    ('complex128', 'complex128'),
    ('category', 'category'),
    ('tdeltas', 'timedelta64[ns]'),
]

def create_dataframe():
    import pandas as pd
    import numpy as np
    import decimal
    pdf = pd.DataFrame({
        'none': [None, None],
        'bool': [True, False],
        'int8': np.arange(1, 3).astype('int8'),
        'int16': np.arange(1, 3).astype('int16'),
        'int32': np.arange(1, 3).astype('int32'),
        'int64': np.arange(1, 3).astype('int64'),
        'uint8': np.arange(1, 3).astype('uint8'),
        'uint16': np.arange(1, 3).astype('uint16'),
        'uint32': np.arange(1, 3).astype('uint32'),
        'uint64': np.arange(1, 3).astype('uint64'),
        'float16': np.arange(1, 3).astype('float16'),
        'float32': np.arange(1, 3).astype('float32'),
        'float64': np.arange(1, 3).astype('float64'),
        'float128': np.arange(1, 3).astype('float128'),
        'complex64': np.arange(1, 3).astype('complex64'),
        'complex128': np.arange(1, 3).astype('complex128'),
        'string': list('ab'),
        'array': pd.Series([np.array([1, 2, 3], dtype=np.int32), np.array([1, 2, 3], dtype=np.int32)]),
        'decimal': pd.Series([decimal.Decimal('1'), decimal.Decimal('2')]),
        'date': pd.date_range('19700101', periods=2).values,
        'category': pd.Series(list("AB")).astype('category')})
    pdf['tdeltas'] = [pdf.date.diff()[1], pdf.date.diff()[0]]
    pdf['tz_aware_dates'] = pd.date_range('19700101', periods=2, tz='US/Eastern')
    return pdf

types =  [
    BooleanType(),
    ByteType(),
    ShortType(),
    IntegerType(),
    LongType(),
    FloatType(),
    DoubleType(),
    DateType(),
    TimestampType(),
    StringType(),
    DecimalType(10, 0),
    ArrayType(IntegerType()),
    MapType(StringType(), IntegerType()),
    StructType([StructField("_1", IntegerType())]),
    BinaryType(),
]

df = spark.range(2).repartition(1)
results = []
count = 0
total = len(types) * len(columns)
values = []
spark.sparkContext.setLogLevel("FATAL")
for t in types:
    result = []
    for column, pandas_t in columns:
        v = create_dataframe()[column][0]
        values.append(v)
        try:
            row = df.select(pandas_udf(lambda _: create_dataframe()[column], t)(df.id)).first()
            ret_str = repr(row[0])
        except Exception:
            ret_str = "X"
        result.append(ret_str)
        progress = "SQL Type: [%s]\n  Pandas Value(Type): %s(%s)]\n  Result Python Value: [%s]" % (
            t.simpleString(), v, pandas_t, ret_str)
        count += 1
        print("%s/%s:\n  %s" % (count, total, progress))
    results.append([t.simpleString()] + list(map(str, result)))

schema = ["SQL Type \\ Pandas Value(Type)"] + list(map(lambda values_column: "%s(%s)" % (values_column[0], values_column[1][1]), zip(values, columns)))
strings = spark.createDataFrame(results, schema=schema)._jdf.showString(20, 20, False)
print("\n".join(map(lambda line: "    # %s  # noqa" % line, strings.strip().split("\n"))))
```

Closes #35330 from HyukjinKwon/SPARK-38031.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/pandas/functions.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/pyspark/sql/pandas/functions.py b/python/pyspark/sql/pandas/functions.py
index 0b7aa6b2abb2c..94fabdbb29590 100644
--- a/python/pyspark/sql/pandas/functions.py
+++ b/python/pyspark/sql/pandas/functions.py
@@ -318,15 +318,15 @@ def calculate(iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
     # |                       string|                  None|                 X|                 X|                 X|                   X|                   X|                 X|                 X|                 X|                 X|             X|             X|             X|                                  X|                                                    X|              'a'|                   X|                            X|             X|                X|                 X|            'A'|                               X|  # noqa
     # |                decimal(10,0)|                  None|                 X|                 X|                 X|                   X|                   X|                 X|                 X|                 X|                 X|             X|             X|             X|                                  X|                                                    X|                X|        Decimal('1')|                            X|             X|                X|                 X|              X|                               X|  # noqa
     # |                   array<int>|                  None|                 X|                 X|                 X|                   X|                   X|                 X|                 X|                 X|                 X|             X|             X|             X|                                  X|                                                    X|                X|                   X|                    [1, 2, 3]|             X|                X|                 X|              X|                               X|  # noqa
-    # |              map<string,int>|                     X|                 X|                 X|                 X|                   X|                   X|                 X|                 X|                 X|                 X|             X|             X|             X|                                  X|                                                    X|                X|                   X|                            X|             X|                X|                 X|              X|                               X|  # noqa
+    # |              map<string,int>|                  None|                 X|                 X|                 X|                   X|                   X|                 X|                 X|                 X|                 X|             X|             X|             X|                                  X|                                                    X|                X|                   X|                            X|             X|                X|                 X|              X|                               X|  # noqa
     # |               struct<_1:int>|                     X|                 X|                 X|                 X|                   X|                   X|                 X|                 X|                 X|                 X|             X|             X|             X|                                  X|                                                    X|                X|                   X|                            X|             X|                X|                 X|              X|                               X|  # noqa
     # |                       binary|                  None|bytearray(b'\x01')|bytearray(b'\x01')|bytearray(b'\x01')|  bytearray(b'\x01')|  bytearray(b'\x01')|bytearray(b'\x01')|bytearray(b'\x01')|bytearray(b'\x01')|bytearray(b'\x01')|bytearray(b'')|bytearray(b'')|bytearray(b'')|                     bytearray(b'')|                                       bytearray(b'')|  bytearray(b'a')|                   X|                            X|bytearray(b'')|   bytearray(b'')|    bytearray(b'')|bytearray(b'A')|                  bytearray(b'')|  # noqa
-    # +-----------------------------+----------------------+------------------+------------------+------------------+--------------------+--------------------+------------------+------------------+------------------+------------------+--------------+--------------+--------------+-----------------------------------+-----------------------------------------------------+-----------------+--------------------+-----------------------------+--------------+-----------------+------------------+---------------+--------------------------------+  # noqa    #
+    # +-----------------------------+----------------------+------------------+------------------+------------------+--------------------+--------------------+------------------+------------------+------------------+------------------+--------------+--------------+--------------+-----------------------------------+-----------------------------------------------------+-----------------+--------------------+-----------------------------+--------------+-----------------+------------------+---------------+--------------------------------+  # noqa
     #
     # Note: DDL formatted string is used for 'SQL Type' for simplicity. This string can be
     #       used in `returnType`.
     # Note: The values inside of the table are generated by `repr`.
-    # Note: Python 3.7.3, Pandas 1.1.1 and PyArrow 1.0.1 are used.
+    # Note: Python 3.9.5, Pandas 1.4.0 and PyArrow 6.0.1 are used.
     # Note: Timezone is KST.
     # Note: 'X' means it throws an exception during the conversion.
     require_minimum_pandas_version()

From 6e64e9252a821651a8984babfaccccc79a9ea433 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 26 Jan 2022 15:55:12 +0900
Subject: [PATCH 117/513] [SPARK-38032][INFRA] Upgrade Arrow version < 7.0.0
 for Python UDF tests in SQL and documentation generation

### What changes were proposed in this pull request?

This PR proposes to use Arrow < 7.0.0 (6.0.1 latest) for [IntegratedUDFTestUtils](https://github.com/apache/spark/blob/master/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala), e.g., https://github.com/apache/spark/tree/master/sql/core/src/test/resources/sql-tests/inputs/udf for pandas UDFs.

Note that this PR does not change the PyArrow and pandas used for PySpark test base because they are installed in the base image (https://github.com/apache/spark/blob/master/.github/workflows/build_and_test.yml#L290), and they are already using almost latest version (PyArrow 6.0.0, and pandas 1.3.3) so I think it's fine.

### Why are the changes needed?

It's better to test latest versions as they are likely more used by end users.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

Existing test cases should cover.

Closes #35331 from HyukjinKwon/arrow-version-sql-test.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .github/workflows/build_and_test.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 32f46d35c5b3e..4529cd9ba4c29 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -252,7 +252,7 @@ jobs:
     - name: Install Python packages (Python 3.8)
       if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
       run: |
-        python3.8 -m pip install 'numpy>=1.20.0' 'pyarrow<5.0.0' pandas scipy xmlrunner
+        python3.8 -m pip install 'numpy>=1.20.0' 'pyarrow<7.0.0' pandas scipy xmlrunner
         python3.8 -m pip list
     # Run the tests.
     - name: Run tests
@@ -530,7 +530,7 @@ jobs:
         # Jinja2 3.0.0+ causes error when building with Sphinx.
         #   See also https://issues.apache.org/jira/browse/SPARK-35375.
         python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme ipython nbsphinx numpydoc 'jinja2<3.0.0'
-        python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' 'pyarrow<5.0.0' pandas 'plotly>=4.8'
+        python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' 'pyarrow<7.0.0' pandas 'plotly>=4.8'
         apt-get update -y
         apt-get install -y ruby ruby-dev
         Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"

From 193011632ba41dc4035460c429374981a8ebe0b7 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Thu, 27 Jan 2022 13:36:27 +0900
Subject: [PATCH 118/513] [SPARK-38040][BUILD] Enable binary compatibility
 check for APIs in Catalyst, KVStore and Avro modules

### What changes were proposed in this pull request?

We don't currently run binary compatibility check in below modules:

```
[info] spark-parent: mimaPreviousArtifacts not set, not analyzing binary compatibility
[info] spark-network-common: mimaPreviousArtifacts not set, not analyzing binary compatibility
[info] spark-tags: mimaPreviousArtifacts not set, not analyzing binary compatibility
[info] spark-unsafe: mimaPreviousArtifacts not set, not analyzing binary compatibility
[info] spark-network-shuffle: mimaPreviousArtifacts not set, not analyzing binary compatibility
[info] spark-kvstore: mimaPreviousArtifacts not set, not analyzing binary compatibility
[info] spark-tools: mimaPreviousArtifacts not set, not analyzing binary compatibility
[info] spark-token-provider-kafka-0-10: mimaPreviousArtifacts not set, not analyzing binary compatibility
[info] spark-streaming-kafka-0-10-assembly: mimaPreviousArtifacts not set, not analyzing binary compatibility
[info] spark-catalyst: mimaPreviousArtifacts not set, not analyzing binary compatibility
[info] spark-repl: mimaPreviousArtifacts not set, not analyzing binary compatibility
[info] spark-avro: mimaPreviousArtifacts not set, not analyzing binary compatibility
[info] spark-sql-kafka-0-10: mimaPreviousArtifacts not set, not analyzing binary compatibility
[info] spark-hive: mimaPreviousArtifacts not set, not analyzing binary compatibility
[info] spark-assembly: mimaPreviousArtifacts not set, not analyzing binary compatibility
[info] spark-examples: mimaPreviousArtifacts not set, not analyzing binary compatibility
```

However, there are some APIs under these modules. For example, https://github.com/apache/spark/blob/master/external/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala for Avro,  https://github.com/apache/spark/tree/master/common/kvstore/src/main/java/org/apache/spark/util/kvstore for KVStore (to be API), and https://github.com/apache/spark/tree/master/sql/catalyst/src/main/java/org/apache/spark/sql/connector for Catalyst

### Why are the changes needed?

To detect binary compatibility.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

Manually tested via running `dev/mima`.

Closes #35339 from HyukjinKwon/SPARK-38040.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 project/MimaExcludes.scala | 6 ++++++
 project/SparkBuild.scala   | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index b985f95b85c6d..f77bc5c284ec7 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -66,6 +66,12 @@ object MimaExcludes {
     ProblemFilters.exclude[Problem]("org.apache.spark.sql.catalyst.*"),
     ProblemFilters.exclude[Problem]("org.apache.spark.sql.execution.*"),
     ProblemFilters.exclude[Problem]("org.apache.spark.sql.internal.*"),
+    ProblemFilters.exclude[Problem]("org.apache.spark.sql.errors.*"),
+    // DSv2 catalog and expression APIs are unstable yet. We should enable this back.
+    ProblemFilters.exclude[Problem]("org.apache.spark.sql.connector.catalog.*"),
+    ProblemFilters.exclude[Problem]("org.apache.spark.sql.connector.expressions.*"),
+    // Avro source implementation is internal.
+    ProblemFilters.exclude[Problem]("org.apache.spark.sql.v2.avro.*"),
 
     // [SPARK-34848][CORE] Add duration to TaskMetricDistributions
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.TaskMetricDistributions.this"),
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 02ffa236c8722..ad9aef5669757 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -376,8 +376,8 @@ object SparkBuild extends PomBuild {
 
   val mimaProjects = allProjects.filterNot { x =>
     Seq(
-      spark, hive, hiveThriftServer, catalyst, repl, networkCommon, networkShuffle, networkYarn,
-      unsafe, tags, tokenProviderKafka010, sqlKafka010, kvstore, avro
+      spark, hive, hiveThriftServer, repl, networkCommon, networkShuffle, networkYarn,
+      unsafe, tags, tokenProviderKafka010, sqlKafka010
     ).contains(x)
   }
 

From 8c27881e2e871f3480509ab412faa36307925842 Mon Sep 17 00:00:00 2001
From: yikaifei <yikaifei@baidu.com>
Date: Thu, 27 Jan 2022 15:55:57 +0800
Subject: [PATCH 119/513] [SPARK-38011][SQL] Remove duplicated and useless
 configuration in ParquetFileFormat

### What changes were proposed in this pull request?
Currently, we called `ParquetWriteSupport.setSchema(requiredSchema, hadoopConf)` when ParquetFileFormat buildReaderWithPartition, and this helper method set `org.apache.spark.sql.parquet.row.attributes` and `parquet.writer.version`, actually,  the former is already set and latter is useless when parquetRead

### Why are the changes needed?
duplicated configuration

### Does this PR introduce _any_ user-facing change?
no, duplicated configuration only

### How was this patch tested?
Existing tests

Closes #35308 from Yikf/SPARK-38011.

Lead-authored-by: yikaifei <yikaifei@baidu.com>
Co-authored-by: Yikf <yikaifei1@gmail.com>
Co-authored-by: KaiFei Yi <yikaifei1@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/datasources/parquet/ParquetFileFormat.scala   | 2 --
 .../sql/execution/datasources/v2/parquet/ParquetScan.scala      | 2 --
 2 files changed, 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index b0a168c9a85c7..aa6f9ee91656d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -218,8 +218,6 @@ class ParquetFileFormat
       SQLConf.CASE_SENSITIVE.key,
       sparkSession.sessionState.conf.caseSensitiveAnalysis)
 
-    ParquetWriteSupport.setSchema(requiredSchema, hadoopConf)
-
     // Sets flags for `ParquetToSparkSchemaConverter`
     hadoopConf.setBoolean(
       SQLConf.PARQUET_BINARY_AS_STRING.key,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala
index 617faad8ab6d7..6b35f2406a82f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala
@@ -78,8 +78,6 @@ case class ParquetScan(
       SQLConf.CASE_SENSITIVE.key,
       sparkSession.sessionState.conf.caseSensitiveAnalysis)
 
-    ParquetWriteSupport.setSchema(readDataSchema, hadoopConf)
-
     // Sets flags for `ParquetToSparkSchemaConverter`
     hadoopConf.setBoolean(
       SQLConf.PARQUET_BINARY_AS_STRING.key,

From 725df02d3b9ec214a71ababb52d1a9b97997ead8 Mon Sep 17 00:00:00 2001
From: Cheng Su <chengsu@fb.com>
Date: Thu, 27 Jan 2022 16:49:10 +0800
Subject: [PATCH 120/513] [SPARK-37983][SQL] Back out agg build time metrics
 from sort aggregate

### What changes were proposed in this pull request?

This is a followup of https://issues.apache.org/jira/browse/SPARK-37564 . I realize the agg build time metrics for sort aggregate is actually not correctly recorded. We don't have a hash build phase for sort aggregate, so there is really no way to measure so-called build time for sort aggregate. So here I make the change to back out the change introduced in https://github.com/apache/spark/pull/34826 for agg build time metric.

### Why are the changes needed?

To not report confusing SQL metrics `aggTime` for users.

### Does this PR introduce _any_ user-facing change?

No, because https://github.com/apache/spark/pull/34826 is not released yet.

### How was this patch tested?

The existing unit test introduced in https://github.com/apache/spark/pull/34826 .

Closes #35273 from c21/agg-fix.

Authored-by: Cheng Su <chengsu@fb.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../aggregate/AggregateCodegenSupport.scala   | 24 +++++++++++++++----
 .../aggregate/HashAggregateExec.scala         |  2 ++
 .../aggregate/SortAggregateExec.scala         | 18 +++++---------
 3 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateCodegenSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateCodegenSupport.scala
index 6304363d7888e..1377a98422317 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateCodegenSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateCodegenSupport.scala
@@ -47,6 +47,11 @@ trait AggregateCodegenSupport
    */
   private var bufVars: Seq[Seq[ExprCode]] = _
 
+  /**
+   * Whether this operator needs to build hash table.
+   */
+  protected def needHashTable: Boolean
+
   /**
    * The generated code for `doProduce` call when aggregate has grouping keys.
    */
@@ -154,14 +159,23 @@ trait AggregateCodegenSupport
        """.stripMargin)
 
     val numOutput = metricTerm(ctx, "numOutputRows")
-    val aggTime = metricTerm(ctx, "aggTime")
-    val beforeAgg = ctx.freshName("beforeAgg")
+    val doAggWithRecordMetric =
+      if (needHashTable) {
+        val aggTime = metricTerm(ctx, "aggTime")
+        val beforeAgg = ctx.freshName("beforeAgg")
+        s"""
+           |long $beforeAgg = System.nanoTime();
+           |$doAggFuncName();
+           |$aggTime.add((System.nanoTime() - $beforeAgg) / $NANOS_PER_MILLIS);
+         """.stripMargin
+      } else {
+        s"$doAggFuncName();"
+      }
+
     s"""
        |while (!$initAgg) {
        |  $initAgg = true;
-       |  long $beforeAgg = System.nanoTime();
-       |  $doAggFuncName();
-       |  $aggTime.add((System.nanoTime() - $beforeAgg) / $NANOS_PER_MILLIS);
+       |  $doAggWithRecordMetric
        |
        |  // output the result
        |  ${genResult.trim}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index d4a4502badd09..ef0eb3e5da257 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -417,6 +417,8 @@ case class HashAggregateExec(
     }
   }
 
+  protected override def needHashTable: Boolean = true
+
   protected override def doProduceWithKeys(ctx: CodegenContext): String = {
     val initAgg = ctx.addMutableState(CodeGenerator.JAVA_BOOLEAN, "initAgg")
     if (conf.enableTwoLevelAggMap) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
index f5462d226c3ae..a0557822795af 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.execution.aggregate
 
-import java.util.concurrent.TimeUnit.NANOSECONDS
-
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
@@ -44,8 +42,7 @@ case class SortAggregateExec(
   with AliasAwareOutputOrdering {
 
   override lazy val metrics = Map(
-    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
-    "aggTime" -> SQLMetrics.createTimingMetric(sparkContext, "time in aggregation build"))
+    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
 
   override def requiredChildOrdering: Seq[Seq[SortOrder]] = {
     groupingExpressions.map(SortOrder(_, Ascending)) :: Nil
@@ -57,14 +54,11 @@ case class SortAggregateExec(
 
   protected override def doExecute(): RDD[InternalRow] = {
     val numOutputRows = longMetric("numOutputRows")
-    val aggTime = longMetric("aggTime")
-
     child.execute().mapPartitionsWithIndexInternal { (partIndex, iter) =>
-      val beforeAgg = System.nanoTime()
       // Because the constructor of an aggregation iterator will read at least the first row,
       // we need to get the value of iter.hasNext first.
       val hasInput = iter.hasNext
-      val res = if (!hasInput && groupingExpressions.nonEmpty) {
+      if (!hasInput && groupingExpressions.nonEmpty) {
         // This is a grouped aggregate and the input iterator is empty,
         // so return an empty iterator.
         Iterator[UnsafeRow]()
@@ -90,8 +84,6 @@ case class SortAggregateExec(
           outputIter
         }
       }
-      aggTime += NANOSECONDS.toMillis(System.nanoTime() - beforeAgg)
-      res
     }
   }
 
@@ -101,11 +93,13 @@ case class SortAggregateExec(
       groupingExpressions.isEmpty
   }
 
-  protected def doProduceWithKeys(ctx: CodegenContext): String = {
+  protected override def needHashTable: Boolean = false
+
+  protected override def doProduceWithKeys(ctx: CodegenContext): String = {
     throw new UnsupportedOperationException("SortAggregate code-gen does not support grouping keys")
   }
 
-  protected def doConsumeWithKeys(ctx: CodegenContext, input: Seq[ExprCode]): String = {
+  protected override def doConsumeWithKeys(ctx: CodegenContext, input: Seq[ExprCode]): String = {
     throw new UnsupportedOperationException("SortAggregate code-gen does not support grouping keys")
   }
 

From 88e8006b52db784278555e52b56dabc7d5f94cce Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Thu, 27 Jan 2022 21:58:04 +0800
Subject: [PATCH 121/513] [SPARK-38001][SQL] Replace the error classes related
 to unsupported features by `UNSUPPORTED_FEATURE`

### What changes were proposed in this pull request?
In the PR, I propose to re-use one error class `UNSUPPORTED_FEATURE` in the following Spark's exceptions:
- `QueryCompilationErrors.unsupportedIfNotExistsError` - when `IF NOT EXISTS` is not supported by `INSERT INTO`.
- `QueryExecutionErrors.aesModeUnsupportedError` - when an user specify unsupported AES mode and padding.
- `QueryExecutionErrors.literalTypeUnsupportedError` - impossible to create a literal from the input value (some Java class, for instance).
- `QueryExecutionErrors.transactionUnsupportedByJdbcServerError` - the target JDBC server does not support transaction.

And replace the following exceptions by `IllegalStateException` since they are internal, and should be not visible to users:
- `QueryExecutionErrors.simpleStringWithNodeIdUnsupportedError` - a sub-class of `Expression` or `Block` doesn't implements the method `simpleStringWithNodeId()`.
- `QueryExecutionErrors.dataTypeUnsupportedForWriterFuncError` - generating of a writer function for a struct field, array element, map key or map value doesn't support Catalyst's type.

Also, added new base test suite `QueryCompilationErrorsDSv2Suite` for testing DSv2 specific compilation error classes.

### Why are the changes needed?
Reducing the number of error classes should prevent from explode of `error-classes.json`. Also, using one error class for similar errors should improve user experience with Spark SQL.

### Does this PR introduce _any_ user-facing change?
Yes.

### How was this patch tested?
By running the affected test suites:
```
$ build/sbt "test:testOnly *SparkThrowableSuite"
$ build/sbt "test:testOnly *QueryExecutionErrorsSuite"
$ build/sbt "test:testOnly *DataSourceV2SQLSuite"
$ build/sbt "test:testOnly *DataSourceV2DataFrameSessionCatalogSuite"
$ build/sbt "test:testOnly *DataFramePivotSuite"
```
and the new test suite:
```
$ build/sbt "test:testOnly *QueryCompilationErrorsDSv2Suite"
```

Closes #35302 from MaxGekk/re-use-unsupported_feature-error-class.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../main/resources/error/error-classes.json   | 18 -------
 .../sql/catalyst/expressions/Expression.scala |  2 +-
 .../InterpretedUnsafeProjection.scala         |  4 +-
 .../expressions/codegen/javaCode.scala        |  2 +-
 .../sql/errors/QueryCompilationErrors.scala   |  4 +-
 .../sql/errors/QueryExecutionErrors.scala     | 26 +++-------
 .../expressions/LiteralExpressionSuite.scala  | 11 ----
 .../spark/sql/DataFramePivotSuite.scala       | 11 ----
 .../spark/sql/connector/InsertIntoTests.scala | 16 ------
 .../QueryCompilationErrorsDSv2Suite.scala     | 52 +++++++++++++++++++
 .../errors/QueryExecutionErrorsSuite.scala    | 25 ++++++++-
 11 files changed, 89 insertions(+), 82 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 2e7831bdb415a..a1ac99f1a0727 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -51,9 +51,6 @@
   "GROUPING_SIZE_LIMIT_EXCEEDED" : {
     "message" : [ "Grouping sets size cannot be greater than %s" ]
   },
-  "IF_PARTITION_NOT_EXISTS_UNSUPPORTED" : {
-    "message" : [ "Cannot write, IF NOT EXISTS is not supported for table: %s" ]
-  },
   "ILLEGAL_SUBSTRING" : {
     "message" : [ "%s cannot contain %s." ]
   },
@@ -141,10 +138,6 @@
     "message" : [ "Unrecognized SQL type %s" ],
     "sqlState" : "42000"
   },
-  "UNSUPPORTED_CHANGE_COLUMN" : {
-    "message" : [ "Please add an implementation for a column change here" ],
-    "sqlState" : "0A000"
-  },
   "UNSUPPORTED_DATATYPE" : {
     "message" : [ "Unsupported data type %s" ],
     "sqlState" : "0A000"
@@ -153,17 +146,6 @@
     "message" : [ "The feature is not supported: %s" ],
     "sqlState" : "0A000"
   },
-  "UNSUPPORTED_LITERAL_TYPE" : {
-    "message" : [ "Unsupported literal type %s %s" ],
-    "sqlState" : "0A000"
-  },
-  "UNSUPPORTED_SIMPLE_STRING_WITH_NODE_ID" : {
-    "message" : [ "%s does not implement simpleStringWithNodeId" ]
-  },
-  "UNSUPPORTED_TRANSACTION_BY_JDBC_SERVER" : {
-    "message" : [ "The target JDBC server does not support transaction and can only support ALTER TABLE with a single action." ],
-    "sqlState" : "0A000"
-  },
   "WRITING_JOB_ABORTED" : {
     "message" : [ "Writing job aborted" ],
     "sqlState" : "40000"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 1d54efd7319e3..32b25f51b8efe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -315,7 +315,7 @@ abstract class Expression extends TreeNode[Expression] {
   }
 
   override def simpleStringWithNodeId(): String = {
-    throw QueryExecutionErrors.simpleStringWithNodeIdUnsupportedError(nodeName)
+    throw new IllegalStateException(s"$nodeName does not implement simpleStringWithNodeId")
   }
 
   protected def typeSuffix =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
index d02d1e8b55b9d..731ad16cc7d9f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.{UnsafeArrayWriter, UnsafeRowWriter, UnsafeWriter}
 import org.apache.spark.sql.catalyst.util.ArrayData
-import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{UserDefinedType, _}
 import org.apache.spark.unsafe.Platform
@@ -254,7 +253,8 @@ object InterpretedUnsafeProjection {
         (_, _) => {}
 
       case _ =>
-        throw QueryExecutionErrors.dataTypeUnsupportedError(dt)
+        throw new IllegalStateException(s"The data type '${dt.typeName}' is not supported in " +
+          "generating a writer function for a struct field, array element, map key or map value.")
     }
 
     // Always wrap the writer with a null safe version.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
index dbe9a810a493e..3651dc420fa21 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
@@ -203,7 +203,7 @@ trait Block extends TreeNode[Block] with JavaCode {
 
   override def verboseString(maxFields: Int): String = toString
   override def simpleStringWithNodeId(): String = {
-    throw QueryExecutionErrors.simpleStringWithNodeIdUnsupportedError(nodeName)
+    throw new IllegalStateException(s"$nodeName does not implement simpleStringWithNodeId")
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 14f8053233d45..6c84aa6a592c0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -93,8 +93,8 @@ object QueryCompilationErrors {
 
   def unsupportedIfNotExistsError(tableName: String): Throwable = {
     new AnalysisException(
-      errorClass = "IF_PARTITION_NOT_EXISTS_UNSUPPORTED",
-      messageParameters = Array(tableName))
+      errorClass = "UNSUPPORTED_FEATURE",
+      messageParameters = Array(s"IF NOT EXISTS for the table '$tableName' by INSERT INTO."))
   }
 
   def nonPartitionColError(partitionName: String): Throwable = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 384016216f668..6fdb728bca249 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -68,11 +68,6 @@ import org.apache.spark.util.CircularBuffer
  */
 object QueryExecutionErrors {
 
-  def columnChangeUnsupportedError(): Throwable = {
-    new SparkUnsupportedOperationException(errorClass = "UNSUPPORTED_CHANGE_COLUMN",
-      messageParameters = Array.empty)
-  }
-
   def logicalHintOperatorNotRemovedDuringAnalysisError(): Throwable = {
     new SparkIllegalStateException(errorClass = "INTERNAL_ERROR",
       messageParameters = Array(
@@ -131,22 +126,12 @@ object QueryExecutionErrors {
       messageParameters = Array.empty)
   }
 
-  def simpleStringWithNodeIdUnsupportedError(nodeName: String): Throwable = {
-    new SparkUnsupportedOperationException(errorClass = "UNSUPPORTED_SIMPLE_STRING_WITH_NODE_ID",
-      messageParameters = Array(nodeName))
-  }
-
   def evaluateUnevaluableAggregateUnsupportedError(
       methodName: String, unEvaluable: UnevaluableAggregate): Throwable = {
     new SparkUnsupportedOperationException(errorClass = "INTERNAL_ERROR",
       messageParameters = Array(s"Cannot evaluate expression: $methodName: $unEvaluable"))
   }
 
-  def dataTypeUnsupportedError(dt: DataType): Throwable = {
-    new SparkException(errorClass = "UNSUPPORTED_DATATYPE",
-      messageParameters = Array(dt.typeName), null)
-  }
-
   def dataTypeUnsupportedError(dataType: String, failure: String): Throwable = {
     new SparkIllegalArgumentException(errorClass = "UNSUPPORTED_DATATYPE",
       messageParameters = Array(dataType + failure))
@@ -257,8 +242,9 @@ object QueryExecutionErrors {
   }
 
   def literalTypeUnsupportedError(v: Any): RuntimeException = {
-    new SparkRuntimeException("UNSUPPORTED_LITERAL_TYPE",
-      Array(v.getClass.toString, v.toString))
+    new SparkRuntimeException(
+      errorClass = "UNSUPPORTED_FEATURE",
+      messageParameters = Array(s"literal for '${v.toString}' of ${v.getClass.toString}."))
   }
 
   def noDefaultForDataTypeError(dataType: DataType): RuntimeException = {
@@ -784,8 +770,10 @@ object QueryExecutionErrors {
   }
 
   def transactionUnsupportedByJdbcServerError(): Throwable = {
-    new SparkSQLFeatureNotSupportedException(errorClass = "UNSUPPORTED_TRANSACTION_BY_JDBC_SERVER",
-      Array.empty)
+    new SparkSQLFeatureNotSupportedException(
+      errorClass = "UNSUPPORTED_FEATURE",
+      messageParameters = Array("the target JDBC server does not support transaction and " +
+        "can only support ALTER TABLE with a single action."))
   }
 
   def dataTypeUnsupportedYetError(dataType: DataType): Throwable = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index b1934a06dc1bf..6ce51f1eec8ca 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -231,17 +231,6 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkStructLiteral((Period.ZERO, ("abc", Duration.ofDays(1))))
   }
 
-  test("unsupported types (map and struct) in Literal.apply") {
-    def checkUnsupportedTypeInLiteral(v: Any): Unit = {
-      val errMsgMap = intercept[RuntimeException] {
-        Literal(v)
-      }
-      assert(errMsgMap.getMessage.startsWith("Unsupported literal type"))
-    }
-    checkUnsupportedTypeInLiteral(Map("key1" -> 1, "key2" -> 2))
-    checkUnsupportedTypeInLiteral(("mike", 29, 1.0))
-  }
-
   test("SPARK-24571: char literals") {
     checkEvaluation(Literal('X'), "X")
     checkEvaluation(Literal.create('0'), "0")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
index 32cbb8b457d86..bbdae29fa3b05 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
@@ -323,17 +323,6 @@ class DataFramePivotSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df, expected)
   }
 
-  test("pivoting column list") {
-    val exception = intercept[RuntimeException] {
-      trainingSales
-        .groupBy($"sales.year")
-        .pivot(struct(lower($"sales.course"), $"training"))
-        .agg(sum($"sales.earnings"))
-        .collect()
-    }
-    assert(exception.getMessage.contains("Unsupported literal type"))
-  }
-
   test("SPARK-26403: pivoting by array column") {
     val df = Seq(
       (2, Seq.empty[String]),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala
index 0dee48fbb5b92..fc98cfd5138e1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala
@@ -282,22 +282,6 @@ trait InsertIntoSQLOnlyTests
       }
     }
 
-    test("InsertInto: IF PARTITION NOT EXISTS not supported") {
-      val t1 = s"${catalogAndNamespace}tbl"
-      withTableAndData(t1) { view =>
-        sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)")
-
-        val exc = intercept[AnalysisException] {
-          sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (id = 1) IF NOT EXISTS SELECT * FROM $view")
-        }
-
-        verifyTable(t1, spark.emptyDataFrame)
-        assert(exc.getMessage.contains("Cannot write, IF NOT EXISTS is not supported for table"))
-        assert(exc.getMessage.contains(t1))
-        assert(exc.getErrorClass == "IF_PARTITION_NOT_EXISTS_UNSUPPORTED")
-      }
-    }
-
     test("InsertInto: overwrite - dynamic clause - static mode") {
       withSQLConf(PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.STATIC.toString) {
         val t1 = s"${catalogAndNamespace}tbl"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala
new file mode 100644
index 0000000000000..bfea3f535dd94
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.errors
+
+import org.apache.spark.sql.{AnalysisException, QueryTest}
+import org.apache.spark.sql.connector.{DatasourceV2SQLBase, FakeV2Provider}
+import org.apache.spark.sql.test.SharedSparkSession
+
+class QueryCompilationErrorsDSv2Suite
+  extends QueryTest
+  with SharedSparkSession
+  with DatasourceV2SQLBase {
+
+  test("UNSUPPORTED_FEATURE: IF PARTITION NOT EXISTS not supported by INSERT") {
+    val v2Format = classOf[FakeV2Provider].getName
+    val tbl = "testcat.ns1.ns2.tbl"
+
+    withTable(tbl) {
+      val view = "tmp_view"
+      val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
+      df.createOrReplaceTempView(view)
+      withTempView(view) {
+        sql(s"CREATE TABLE $tbl (id bigint, data string) USING $v2Format PARTITIONED BY (id)")
+
+        val e = intercept[AnalysisException] {
+          sql(s"INSERT OVERWRITE TABLE $tbl PARTITION (id = 1) IF NOT EXISTS SELECT * FROM $view")
+        }
+
+        checkAnswer(spark.table(tbl), spark.emptyDataFrame)
+        assert(e.getMessage === "The feature is not supported: " +
+          s"IF NOT EXISTS for the table '$tbl' by INSERT INTO.")
+        assert(e.getErrorClass === "UNSUPPORTED_FEATURE")
+        assert(e.getSqlState === "0A000")
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index 5137614a366d1..4b2564034344a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.errors
 
 import org.apache.spark.{SparkException, SparkRuntimeException}
 import org.apache.spark.sql.{DataFrame, QueryTest}
+import org.apache.spark.sql.functions.{lit, lower, struct, sum}
 import org.apache.spark.sql.test.SharedSparkSession
 
 class QueryExecutionErrorsSuite extends QueryTest with SharedSparkSession {
@@ -89,7 +90,7 @@ class QueryExecutionErrorsSuite extends QueryTest with SharedSparkSession {
     }
   }
 
-  test("UNSUPPORTED_MODE: unsupported combinations of AES modes and padding") {
+  test("UNSUPPORTED_FEATURE: unsupported combinations of AES modes and padding") {
     val key16 = "abcdefghijklmnop"
     val key32 = "abcdefghijklmnop12345678ABCDEFGH"
     val (df1, df2) = getAesInputs()
@@ -112,4 +113,26 @@ class QueryExecutionErrorsSuite extends QueryTest with SharedSparkSession {
     checkUnsupportedMode(df2.selectExpr(s"aes_decrypt(value16, '$key16', 'GCM', 'PKCS')"))
     checkUnsupportedMode(df2.selectExpr(s"aes_decrypt(value32, '$key32', 'ECB', 'None')"))
   }
+
+  test("UNSUPPORTED_FEATURE: unsupported types (map and struct) in lit()") {
+    def checkUnsupportedTypeInLiteral(v: Any): Unit = {
+      val e1 = intercept[SparkRuntimeException] { lit(v) }
+      assert(e1.getErrorClass === "UNSUPPORTED_FEATURE")
+      assert(e1.getSqlState === "0A000")
+      assert(e1.getMessage.matches("""The feature is not supported: literal for '.+' of .+\."""))
+    }
+    checkUnsupportedTypeInLiteral(Map("key1" -> 1, "key2" -> 2))
+    checkUnsupportedTypeInLiteral(("mike", 29, 1.0))
+
+    val e2 = intercept[SparkRuntimeException] {
+      trainingSales
+        .groupBy($"sales.year")
+        .pivot(struct(lower($"sales.course"), $"training"))
+        .agg(sum($"sales.earnings"))
+        .collect()
+    }
+    assert(e2.getMessage === "The feature is not supported: " +
+      "literal for '[dotnet,Dummies]' of class " +
+      "org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema.")
+  }
 }

From 5289fad9c77f973290c0541cbeb3320825d52ad9 Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Thu, 27 Jan 2022 12:48:22 -0800
Subject: [PATCH 122/513] [SPARK-38045][SS][TEST] More strict validation on
 plan check for stream-stream join unit test

### What changes were proposed in this pull request?

This PR is a follow-up of SPARK-35693 to enhance the unit test on stream-stream join to be more strict on plan check.

### Why are the changes needed?

We would like to be more strict on plan check so that requirement of distribution against stream-stream join is fulfilled.

### Does this PR introduce _any_ user-facing change?

No, test only.

### How was this patch tested?

Modified test passed.

Closes #35341 from HeartSaVioR/SPARK-35693-followup.

Authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/streaming/StreamingJoinSuite.scala | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index a24e76f81b4aa..5ec47bb2aa527 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -28,6 +28,8 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql.{DataFrame, Row, SparkSession}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
+import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.execution.streaming.{MemoryStream, StatefulOperatorStateInfo, StreamingSymmetricHashJoinExec, StreamingSymmetricHashJoinHelper}
 import org.apache.spark.sql.execution.streaming.state.{StateStore, StateStoreProviderId}
@@ -583,9 +585,21 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
       CheckAnswer(1.to(1000): _*),
       Execute { query =>
         // Verify the query plan
+        def partitionExpressionsColumns(expressions: Seq[Expression]): Seq[String] = {
+          expressions.flatMap {
+            case ref: AttributeReference => Some(ref.name)
+          }
+        }
+
+        val numPartitions = spark.sqlContext.conf.getConf(SQLConf.SHUFFLE_PARTITIONS)
+
         assert(query.lastExecution.executedPlan.collect {
           case j @ StreamingSymmetricHashJoinExec(_, _, _, _, _, _, _, _,
-            _: ShuffleExchangeExec, _: ShuffleExchangeExec) => j
+            ShuffleExchangeExec(opA: HashPartitioning, _, _),
+            ShuffleExchangeExec(opB: HashPartitioning, _, _))
+              if partitionExpressionsColumns(opA.expressions) === Seq("a", "b")
+                && partitionExpressionsColumns(opB.expressions) === Seq("a", "b")
+                && opA.numPartitions == numPartitions && opB.numPartitions == numPartitions => j
         }.size == 1)
       })
   }

From 4f755772349117599d444579f04ecafb30cff092 Mon Sep 17 00:00:00 2001
From: William Hyun <william@apache.org>
Date: Thu, 27 Jan 2022 19:02:39 -0800
Subject: [PATCH 123/513] [SPARK-38048][K8S][TESTS] Add
 `IntegrationTestBackend.describePods` to support all K8s test backends

### What changes were proposed in this pull request?
This PR aims to add `IntegrationTestBackend.describePods` to support all K8s test backends

### Why are the changes needed?
Currently the docker based K8s tests cannot get the pod information when it fails.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manually.

Closes #35344 from williamhyun/describePOD.

Authored-by: William Hyun <william@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/deploy/k8s/integrationtest/KubernetesSuite.scala   | 3 +--
 .../k8s/integrationtest/backend/IntegrationTestBackend.scala | 5 +++++
 .../k8s/integrationtest/backend/minikube/Minikube.scala      | 4 ----
 .../backend/minikube/MinikubeTestBackend.scala               | 4 ++++
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index e608b3aa76ae9..90f666ae54e38 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -37,7 +37,6 @@ import org.scalatest.time.{Minutes, Seconds, Span}
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.deploy.k8s.integrationtest.TestConstants._
 import org.apache.spark.deploy.k8s.integrationtest.backend.{IntegrationTestBackend, IntegrationTestBackendFactory}
-import org.apache.spark.deploy.k8s.integrationtest.backend.minikube.Minikube
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 
@@ -76,7 +75,7 @@ class KubernetesSuite extends SparkFunSuite
   protected override def logForFailedTest(): Unit = {
     logInfo("\n\n===== EXTRA LOGS FOR THE FAILED TEST\n")
     logInfo("BEGIN DESCRIBE PODS for application\n" +
-      Minikube.describePods(s"spark-app-locator=$appLocator").mkString("\n"))
+      testBackend.describePods(s"spark-app-locator=$appLocator").mkString("\n"))
     logInfo("END DESCRIBE PODS for the application")
     val driverPodOption = kubernetesTestComponents.kubernetesClient
       .pods()
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/IntegrationTestBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/IntegrationTestBackend.scala
index 56ddae0c9c57c..36c3b6ad2ec5d 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/IntegrationTestBackend.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/IntegrationTestBackend.scala
@@ -19,6 +19,7 @@ package org.apache.spark.deploy.k8s.integrationtest.backend
 
 import io.fabric8.kubernetes.client.DefaultKubernetesClient
 
+import org.apache.spark.deploy.k8s.integrationtest.ProcessUtils
 import org.apache.spark.deploy.k8s.integrationtest.TestConstants._
 import org.apache.spark.deploy.k8s.integrationtest.backend.cloud.KubeConfigBackend
 import org.apache.spark.deploy.k8s.integrationtest.backend.docker.DockerForDesktopBackend
@@ -28,6 +29,10 @@ private[spark] trait IntegrationTestBackend {
   def initialize(): Unit
   def getKubernetesClient: DefaultKubernetesClient
   def cleanUp(): Unit = {}
+  def describePods(labels: String): Seq[String] =
+    ProcessUtils.executeProcess(
+      Array("bash", "-c", s"kubectl describe pods --all-namespaces -l $labels"),
+      timeout = 60, dumpOutput = false).filter { !_.contains("https://github.com/kubernetes") }
 }
 
 private[spark] object IntegrationTestBackendFactory {
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
index 1ebc64445b717..9f99edefaf093 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
@@ -111,10 +111,6 @@ private[spark] object Minikube extends Logging {
   def minikubeServiceAction(args: String*): String = {
     executeMinikube(true, "service", args: _*).head
   }
-
-  def describePods(labels: String): Seq[String] =
-    Minikube.executeMinikube(false, "kubectl", "--", "describe", "pods", "--all-namespaces",
-      "-l", labels)
 }
 
 private[spark] object MinikubeStatus extends Enumeration {
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala
index f92977ddacdf5..8c8f848114d1c 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala
@@ -40,4 +40,8 @@ private[spark] object MinikubeTestBackend extends IntegrationTestBackend {
   override def getKubernetesClient: DefaultKubernetesClient = {
     defaultClient
   }
+
+  override def describePods(labels: String): Seq[String] =
+    Minikube.executeMinikube(false, "kubectl", "--", "describe", "pods", "--all-namespaces",
+      "-l", labels)
 }

From 421358d63af395f20e3187fba09eb73cb80a3d0d Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 27 Jan 2022 23:21:06 -0800
Subject: [PATCH 124/513] [SPARK-38051][R][DOCS] Update `Roxygen` reference to
 7.1.2

### What changes were proposed in this pull request?

This PR aims to update `Roxygen` reference to 7.1.2 for Apache Spark 3.3.0.

### Why are the changes needed?

`Roxygen2` was 7.1.2 since 2021-09-08. So, this touches `R/pkg/DESCRIPTION` file when we build with `-Psparkr`. This PR removes this annoying situation by removing this version mismatch.
- https://cran.r-project.org/web/packages/roxygen2/index.html

```
$ build/sbt -Psparkr compile
$ git diff
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 6b85bb758a..d147ff2b34 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
 -60,7 +60,7  Collate:
     'types.R'
     'utils.R'
     'window.R'
-RoxygenNote: 7.1.1
+RoxygenNote: 7.1.2
 VignetteBuilder: knitr
 NeedsCompilation: no
 Encoding: UTF-8
```

We have been using `7.1.2` already for testing.
```
$ docker run -it --rm dongjoon/apache-spark-github-action-image:20211228 Rscript -e 'installed.packages()' | grep roxygen2 | grep site-library
roxygen2    "roxygen2"    "/usr/local/lib/R/site-library" "7.1.2"
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

Closes #35349 from dongjoon-hyun/SPARK-38051.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 R/pkg/DESCRIPTION | 2 +-
 docs/README.md    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 6b85bb758a081..d147ff2b34cfd 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -60,7 +60,7 @@ Collate:
     'types.R'
     'utils.R'
     'window.R'
-RoxygenNote: 7.1.1
+RoxygenNote: 7.1.2
 VignetteBuilder: knitr
 NeedsCompilation: no
 Encoding: UTF-8
diff --git a/docs/README.md b/docs/README.md
index 5e9a187ea3ab6..0cf5c0a6281b1 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -55,12 +55,12 @@ and install these libraries:
 
 ```sh
 $ sudo Rscript -e 'install.packages(c("knitr", "devtools", "testthat", "rmarkdown"), repos="https://cloud.r-project.org/")'
-$ sudo Rscript -e 'devtools::install_version("roxygen2", version = "7.1.1", repos="https://cloud.r-project.org/")'
+$ sudo Rscript -e 'devtools::install_version("roxygen2", version = "7.1.2", repos="https://cloud.r-project.org/")'
 $ sudo Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')"
 $ sudo Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
 ```
 
-Note: Other versions of roxygen2 might work in SparkR documentation generation but `RoxygenNote` field in `$SPARK_HOME/R/pkg/DESCRIPTION` is 7.1.1, which is updated if the version is mismatched.
+Note: Other versions of roxygen2 might work in SparkR documentation generation but `RoxygenNote` field in `$SPARK_HOME/R/pkg/DESCRIPTION` is 7.1.2, which is updated if the version is mismatched.
 
 ### API Documentation
 

From 169be3e1e6c8743390d3ca401f762b69b328ccfd Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 28 Jan 2022 01:06:34 -0800
Subject: [PATCH 125/513] [SPARK-38049][K8S][TESTS] Use Java 17 in K8s
 integration tests by default

### What changes were proposed in this pull request?

This PR aims to use `Java 17` in K8s integration tests by default.

### Why are the changes needed?

Java 8 cannot run Java11/17-built Spark distribution.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?

Manually run the following and check the Java version in the generated docker images.

**SBT**
```
$ build/sbt -Psparkr -Pkubernetes -Pkubernetes-integration-tests -Dtest.exclude.tags=minikube -Dspark.kubernetes.test.deployMode=docker-for-desktop "kubernetes-integration-tests/test"
...
[info] KubernetesSuite:
[info] - Run SparkPi with no resources (8 seconds, 949 milliseconds)
[info] - Run SparkPi with no resources & statefulset allocation (8 seconds, 515 milliseconds)
[info] - Run SparkPi with a very long application name. (8 seconds, 389 milliseconds)
[info] - Use SparkLauncher.NO_RESOURCE (8 seconds, 393 milliseconds)
[info] - Run SparkPi with a master URL without a scheme. (8 seconds, 360 milliseconds)
[info] - Run SparkPi with an argument. (8 seconds, 435 milliseconds)
[info] - Run SparkPi with custom labels, annotations, and environment variables. (8 seconds, 611 milliseconds)
[info] - All pods have the same service account by default (8 seconds, 353 milliseconds)
[info] - Run extraJVMOptions check on driver (4 seconds, 364 milliseconds)
[info] - Run SparkRemoteFileTest using a remote data file (8 seconds, 392 milliseconds)
[info] - Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j2.properties (14 seconds, 564 milliseconds)
[info] - Run SparkPi with env and mount secrets. (16 seconds, 868 milliseconds)
[info] - Run PySpark on simple pi.py example (9 seconds, 632 milliseconds)
[info] - Run PySpark to test a pyfiles example (10 seconds, 520 milliseconds)
[info] - Run PySpark with memory customization (8 seconds, 385 milliseconds)
[info] - Run in client mode. (7 seconds, 336 milliseconds)
[info] - Start pod creation from template (8 seconds, 727 milliseconds)
[info] - Test basic decommissioning (42 seconds, 353 milliseconds)
[info] - Test basic decommissioning with shuffle cleanup (42 seconds, 532 milliseconds)
[info] - Test decommissioning with dynamic allocation & shuffle cleanups (2 minutes, 40 seconds)
[info] - Test decommissioning timeouts (42 seconds, 211 milliseconds)
[info] - SPARK-37576: Rolling decommissioning (1 minute, 7 seconds)
[info] - Run SparkR on simple dataframe.R example (12 seconds, 16 milliseconds)
[info] Run completed in 11 minutes, 24 seconds.
[info] Total number of tests run: 23
[info] Suites: completed 1, aborted 0
[info] Tests: succeeded 23, failed 0, canceled 0, ignored 0, pending 0
[info] All tests passed.
```

**MAVEN**
```
$ mvn package -Pkubernetes -DskipTests
$ resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh --deploy-mode docker-for-desktop --namespace default --exclude-tags minikube,r
...
KubernetesSuite:
- Run SparkPi with no resources
- Run SparkPi with no resources & statefulset allocation
- Run SparkPi with a very long application name.
- Use SparkLauncher.NO_RESOURCE
- Run SparkPi with a master URL without a scheme.
- Run SparkPi with an argument.
- Run SparkPi with custom labels, annotations, and environment variables.
- All pods have the same service account by default
- Run extraJVMOptions check on driver
- Run SparkRemoteFileTest using a remote data file
- Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j2.properties
- Run SparkPi with env and mount secrets.
- Run PySpark on simple pi.py example
- Run PySpark to test a pyfiles example
- Run PySpark with memory customization
- Run in client mode.
- Start pod creation from template
- Test basic decommissioning
- Test basic decommissioning with shuffle cleanup
- Test decommissioning with dynamic allocation & shuffle cleanups
- Test decommissioning timeouts
- SPARK-37576: Rolling decommissioning
Run completed in 8 minutes, 52 seconds.
Total number of tests run: 22
Suites: completed 2, aborted 0
Tests: succeeded 22, failed 0, canceled 0, ignored 0, pending 0
All tests passed.
```

```
$ docker run -it --rm kubespark/spark:3.3.0-SNAPSHOT_C0A4AD5A-2561-4972-B2DE-0FDA941F8064 java -version | tail -n1
OpenJDK 64-Bit Server VM (build 17.0.2+8-Debian-1deb11u1, mixed mode, sharing)
```

Closes #35346 from dongjoon-hyun/SPARK-38049.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 project/SparkBuild.scala                              | 11 ++++++-----
 .../kubernetes/integration-tests/pom.xml              |  4 ++--
 .../scripts/setup-integration-test-env.sh             |  2 +-
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index ad9aef5669757..8a56bef3e351b 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -639,12 +639,13 @@ object KubernetesIntegrationTests {
       if (shouldBuildImage) {
         val dockerTool = s"$sparkHome/bin/docker-image-tool.sh"
         val bindingsDir = s"$sparkHome/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings"
-        val dockerFile = sys.props.get("spark.kubernetes.test.dockerFile")
-        val javaImageTag = sys.props.getOrElse("spark.kubernetes.test.javaImageTag", "8-jre-slim")
-        val extraOptions = if (dockerFile.isDefined) {
-          Seq("-f", s"${dockerFile.get}")
-        } else {
+        val javaImageTag = sys.props.get("spark.kubernetes.test.javaImageTag")
+        val dockerFile = sys.props.getOrElse("spark.kubernetes.test.dockerFile",
+            "resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17")
+        val extraOptions = if (javaImageTag.isDefined) {
           Seq("-b", s"java_image_tag=$javaImageTag")
+        } else {
+          Seq("-f", s"$dockerFile")
         }
         val cmd = Seq(dockerTool,
           "-t", imageTag.value,
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 4c5f14b79f690..a44cedb9e1e25 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -35,7 +35,7 @@
     <spark.kubernetes.test.sparkTgz>N/A</spark.kubernetes.test.sparkTgz>
     <spark.kubernetes.test.unpackSparkDir>${project.build.directory}/spark-dist-unpacked</spark.kubernetes.test.unpackSparkDir>
     <spark.kubernetes.test.imageTag>N/A</spark.kubernetes.test.imageTag>
-    <spark.kubernetes.test.javaImageTag>8-jre-slim</spark.kubernetes.test.javaImageTag>
+    <spark.kubernetes.test.javaImageTag>N/A</spark.kubernetes.test.javaImageTag>
     <spark.kubernetes.test.imageTagFile>${project.build.directory}/imageTag.txt</spark.kubernetes.test.imageTagFile>
     <spark.kubernetes.test.deployMode>minikube</spark.kubernetes.test.deployMode>
     <spark.kubernetes.test.imageRepo>docker.io/kubespark</spark.kubernetes.test.imageRepo>
@@ -43,7 +43,7 @@
     <spark.kubernetes.test.master></spark.kubernetes.test.master>
     <spark.kubernetes.test.namespace></spark.kubernetes.test.namespace>
     <spark.kubernetes.test.serviceAccountName></spark.kubernetes.test.serviceAccountName>
-    <spark.kubernetes.test.dockerFile>N/A</spark.kubernetes.test.dockerFile>
+    <spark.kubernetes.test.dockerFile>resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17</spark.kubernetes.test.dockerFile>
 
     <test.exclude.tags></test.exclude.tags>
     <test.include.tags></test.include.tags>
diff --git a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
index 562d1d820cdd1..f79b1f82add67 100755
--- a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
+++ b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
@@ -23,7 +23,7 @@ IMAGE_TAG_OUTPUT_FILE="$TEST_ROOT_DIR/target/image-tag.txt"
 DEPLOY_MODE="minikube"
 IMAGE_REPO="docker.io/kubespark"
 IMAGE_TAG="N/A"
-JAVA_IMAGE_TAG="8-jre-slim"
+JAVA_IMAGE_TAG="N/A"
 SPARK_TGZ="N/A"
 MVN="$TEST_ROOT_DIR/build/mvn"
 DOCKER_FILE="N/A"

From d150b7ee7f035650cd190058375dbef5df74201f Mon Sep 17 00:00:00 2001
From: PengLei <peng.8lei@gmail.com>
Date: Fri, 28 Jan 2022 17:19:34 +0800
Subject: [PATCH 126/513] [SPARK-37931][SQL][FOLLOWUP] Quote the column name of
 view if needed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
Quote the column name of view when `SHOW CREATE TABLE` for view.

### Why are the changes needed?
follow up the [#PR](https://github.com/apache/spark/pull/35227). Keep the consistent between table and view when `SHOW CREATE TABLE`.

### Does this PR introduce _any_ user-facing change?
Yes，It will change the result of `SHOW CREATE TABLE` for view.
eg:
```
"STRUCT<`_c0`, `_c1`>"  => "STRUCT<_c0, _c1>"
```

### How was this patch tested?
existed testcase.

Closes #35351 from Peng-Lei/view-quote-columns.

Authored-by: PengLei <peng.8lei@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/command/tables.scala  |  4 ++--
 .../sql-tests/results/charvarchar.sql.out     |  4 ++--
 .../results/show-create-table.sql.out         | 24 +++++++++----------
 .../sql/execution/SQLViewTestSuite.scala      |  6 ++---
 4 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 7ae0e017b28c0..eceb9e6536d5d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier, CaseInsensitiveMap, CharVarcharUtils}
+import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIfNeeded, CaseInsensitiveMap, CharVarcharUtils}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.TableIdentifierHelper
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.datasources.DataSource
@@ -1035,7 +1035,7 @@ trait ShowCreateTableCommandBase {
           .map(" COMMENT '" + _ + "'")
 
         // view columns shouldn't have data type info
-        s"${quoteIdentifier(f.name)}${comment.getOrElse("")}"
+        s"${quoteIfNeeded(f.name)}${comment.getOrElse("")}"
       }
       builder ++= concatByMultiLines(viewColumns)
     }
diff --git a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
index de994d67c9f34..6345702e00ea2 100644
--- a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
@@ -219,8 +219,8 @@ show create table char_view
 struct<createtab_stmt:string>
 -- !query output
 CREATE VIEW default.char_view (
-  `c`,
-  `v`)
+  c,
+  v)
 AS select * from char_tbl
 
 
diff --git a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
index 4c7f124e72fe1..ca1652b337a28 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
@@ -296,8 +296,8 @@ SHOW CREATE TABLE view_SPARK_30302 AS SERDE
 struct<createtab_stmt:string>
 -- !query output
 CREATE VIEW default.view_SPARK_30302 (
-  `aaa`,
-  `bbb`)
+  aaa,
+  bbb)
 AS SELECT a, b FROM tbl
 
 
@@ -307,8 +307,8 @@ SHOW CREATE TABLE view_SPARK_30302
 struct<createtab_stmt:string>
 -- !query output
 CREATE VIEW default.view_SPARK_30302 (
-  `aaa`,
-  `bbb`)
+  aaa,
+  bbb)
 AS SELECT a, b FROM tbl
 
 
@@ -336,8 +336,8 @@ SHOW CREATE TABLE view_SPARK_30302 AS SERDE
 struct<createtab_stmt:string>
 -- !query output
 CREATE VIEW default.view_SPARK_30302 (
-  `aaa` COMMENT 'comment with \'quoted text\' for aaa',
-  `bbb`)
+  aaa COMMENT 'comment with \'quoted text\' for aaa',
+  bbb)
 COMMENT 'This is a comment with \'quoted text\' for view'
 AS SELECT a, b FROM tbl
 
@@ -348,8 +348,8 @@ SHOW CREATE TABLE view_SPARK_30302
 struct<createtab_stmt:string>
 -- !query output
 CREATE VIEW default.view_SPARK_30302 (
-  `aaa` COMMENT 'comment with \'quoted text\' for aaa',
-  `bbb`)
+  aaa COMMENT 'comment with \'quoted text\' for aaa',
+  bbb)
 COMMENT 'This is a comment with \'quoted text\' for view'
 AS SELECT a, b FROM tbl
 
@@ -378,8 +378,8 @@ SHOW CREATE TABLE view_SPARK_30302 AS SERDE
 struct<createtab_stmt:string>
 -- !query output
 CREATE VIEW default.view_SPARK_30302 (
-  `aaa`,
-  `bbb`)
+  aaa,
+  bbb)
 TBLPROPERTIES (
   'a' = '1',
   'b' = '2')
@@ -392,8 +392,8 @@ SHOW CREATE TABLE view_SPARK_30302
 struct<createtab_stmt:string>
 -- !query output
 CREATE VIEW default.view_SPARK_30302 (
-  `aaa`,
-  `bbb`)
+  aaa,
+  bbb)
 TBLPROPERTIES (
   'a' = '1',
   'b' = '2')
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
index da6826c7808aa..94855f2c42143 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -611,7 +611,7 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
     Seq(true, false).foreach { serde =>
       withView(viewName) {
         createView(viewName, "SELECT 1 AS a")
-        val expected = s"CREATE VIEW ${formattedViewName(viewName)} ( `a`) AS SELECT 1 AS a"
+        val expected = s"CREATE VIEW ${formattedViewName(viewName)} ( a) AS SELECT 1 AS a"
         assert(getShowCreateDDL(formattedViewName(viewName), serde) == expected)
       }
     }
@@ -623,7 +623,7 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
       withView(viewName) {
         createView(viewName, "SELECT 1 AS a, 2 AS b", Seq("a", "b COMMENT 'b column'"))
         val expected = s"CREATE VIEW ${formattedViewName(viewName)}" +
-          s" ( `a`, `b` COMMENT 'b column') AS SELECT 1 AS a, 2 AS b"
+          s" ( a, b COMMENT 'b column') AS SELECT 1 AS a, 2 AS b"
         assert(getShowCreateDDL(formattedViewName(viewName), serde) == expected)
       }
     }
@@ -636,7 +636,7 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
         createView(viewName, "SELECT 1 AS c1, '2' AS c2", Seq("c1 COMMENT 'bla'", "c2"),
           Seq("COMMENT 'table comment'", "TBLPROPERTIES ( 'prop2' = 'value2', 'prop1' = 'value1')"))
 
-        val expected = s"CREATE VIEW ${formattedViewName(viewName)} ( `c1` COMMENT 'bla', `c2`)" +
+        val expected = s"CREATE VIEW ${formattedViewName(viewName)} ( c1 COMMENT 'bla', c2)" +
           " COMMENT 'table comment'" +
           " TBLPROPERTIES ( 'prop1' = 'value1', 'prop2' = 'value2')" +
           " AS SELECT 1 AS c1, '2' AS c2"

From c027fb69bb5fe0382b48187a67399eb141f13505 Mon Sep 17 00:00:00 2001
From: Jiaan Geng <beliefer@163.com>
Date: Fri, 28 Jan 2022 17:39:55 +0800
Subject: [PATCH 127/513] [SPARK-38035][SQL] Add docker tests for build-in JDBC
 dialect

### What changes were proposed in this pull request?
Currently, Spark only have `PostgresNamespaceSuite` to test DS V2 namespace in docker environment.
But missing tests for other build-in JDBC dialect (e.g. Oracle, MySQL).

This PR also found some compatible issue. For example, the JDBC api `conn.getMetaData.getSchemas` works bad for MySQL.

### Why are the changes needed?
We need add tests for other build-in JDBC dialect.

### Does this PR introduce _any_ user-facing change?
'No'. Just add tests which face developers.

### How was this patch tested?
New tests.

Closes #35333 from beliefer/SPARK-38035.

Authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 external/docker-integration-tests/pom.xml     |   5 +
 .../spark/sql/jdbc/v2/DB2NamespaceSuite.scala |  74 ++++++++++++
 .../jdbc/v2/MsSqlServerNamespaceSuite.scala   |  76 ++++++++++++
 .../sql/jdbc/v2/MySQLIntegrationSuite.scala   |   3 -
 .../sql/jdbc/v2/MySQLNamespaceSuite.scala     |  65 +++++++++++
 .../sql/jdbc/v2/OracleNamespaceSuite.scala    |  86 ++++++++++++++
 .../sql/jdbc/v2/PostgresNamespaceSuite.scala  |   6 +-
 .../sql/jdbc/v2/V2JDBCNamespaceTest.scala     | 110 +++++++++++-------
 .../datasources/jdbc/JdbcUtils.scala          |   7 +-
 .../apache/spark/sql/jdbc/DB2Dialect.scala    |  28 ++++-
 .../apache/spark/sql/jdbc/JdbcDialects.scala  |   8 ++
 .../spark/sql/jdbc/MsSqlServerDialect.scala   |  14 +++
 12 files changed, 428 insertions(+), 54 deletions(-)
 create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2NamespaceSuite.scala
 create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala
 create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
 create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleNamespaceSuite.scala

diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index bb39e5fde6d08..e3070f462c1ff 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -162,5 +162,10 @@
       <artifactId>mssql-jdbc</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>mysql</groupId>
+      <artifactId>mysql-connector-java</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 </project>
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2NamespaceSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2NamespaceSuite.scala
new file mode 100644
index 0000000000000..f0e98fc2722b0
--- /dev/null
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2NamespaceSuite.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import java.sql.Connection
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.tags.DockerTest
+
+/**
+ * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.6.0a):
+ * {{{
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.6.0a
+ *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.DB2NamespaceSuite"
+ * }}}
+ */
+@DockerTest
+class DB2NamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespaceTest {
+  override val db = new DatabaseOnDocker {
+    override val imageName = sys.env.getOrElse("DB2_DOCKER_IMAGE_NAME", "ibmcom/db2:11.5.6.0a")
+    override val env = Map(
+      "DB2INST1_PASSWORD" -> "rootpass",
+      "LICENSE" -> "accept",
+      "DBNAME" -> "db2foo",
+      "ARCHIVE_LOGS" -> "false",
+      "AUTOCONFIG" -> "false"
+    )
+    override val usesIpc = false
+    override val jdbcPort: Int = 50000
+    override val privileged = true
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:db2://$ip:$port/db2foo:user=db2inst1;password=rootpass;retrieveMessagesFromServerOnGetMessage=true;" //scalastyle:ignore
+  }
+
+  val map = new CaseInsensitiveStringMap(
+    Map("url" -> db.getJdbcUrl(dockerIp, externalPort),
+      "driver" -> "com.ibm.db2.jcc.DB2Driver").asJava)
+
+  catalog.initialize("db2", map)
+
+  override def dataPreparation(conn: Connection): Unit = {}
+
+  override def builtinNamespaces: Array[Array[String]] =
+    Array(Array("NULLID"), Array("SQLJ"), Array("SYSCAT"), Array("SYSFUN"),
+      Array("SYSIBM"), Array("SYSIBMADM"), Array("SYSIBMINTERNAL"), Array("SYSIBMTS"),
+      Array("SYSPROC"), Array("SYSPUBLIC"), Array("SYSSTAT"), Array("SYSTOOLS"))
+
+  override def listNamespaces(namespace: Array[String]): Array[Array[String]] = {
+    builtinNamespaces ++ Array(namespace)
+  }
+
+  override val supportsDropSchemaCascade: Boolean = false
+
+  testListNamespaces()
+  testDropNamespaces()
+}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala
new file mode 100644
index 0000000000000..aa8dac266380a
--- /dev/null
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import java.sql.Connection
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.tags.DockerTest
+
+/**
+ * To run this test suite for a specific version (e.g., 2019-CU13-ubuntu-20.04):
+ * {{{
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1
+ *   MSSQLSERVER_DOCKER_IMAGE_NAME=mcr.microsoft.com/mssql/server:2019-CU13-ubuntu-20.04
+ *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.MsSqlServerNamespaceSuite"
+ * }}}
+ */
+@DockerTest
+class MsSqlServerNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespaceTest {
+  override val db = new DatabaseOnDocker {
+    override val imageName = sys.env.getOrElse("MSSQLSERVER_DOCKER_IMAGE_NAME",
+      "mcr.microsoft.com/mssql/server:2019-CU13-ubuntu-20.04")
+    override val env = Map(
+      "SA_PASSWORD" -> "Sapass123",
+      "ACCEPT_EULA" -> "Y"
+    )
+    override val usesIpc = false
+    override val jdbcPort: Int = 1433
+
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:sqlserver://$ip:$port;user=sa;password=Sapass123;"
+  }
+
+  val map = new CaseInsensitiveStringMap(
+    Map("url" -> db.getJdbcUrl(dockerIp, externalPort),
+      "driver" -> "com.microsoft.sqlserver.jdbc.SQLServerDriver").asJava)
+
+  catalog.initialize("mssql", map)
+
+  override def dataPreparation(conn: Connection): Unit = {}
+
+  override def builtinNamespaces: Array[Array[String]] =
+    Array(Array("db_accessadmin"), Array("db_backupoperator"), Array("db_datareader"),
+      Array("db_datawriter"), Array("db_ddladmin"), Array("db_denydatareader"),
+      Array("db_denydatawriter"), Array("db_owner"), Array("db_securityadmin"), Array("dbo"),
+      Array("guest"), Array("INFORMATION_SCHEMA"), Array("sys"))
+
+  override def listNamespaces(namespace: Array[String]): Array[Array[String]] = {
+    builtinNamespaces ++ Array(namespace)
+  }
+
+  override val supportsSchemaComment: Boolean = false
+
+  override val supportsDropSchemaCascade: Boolean = false
+
+  testListNamespaces()
+  testDropNamespaces()
+}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
index bc4bf54324ee5..97f521a378eb7 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
@@ -29,14 +29,11 @@ import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
 /**
- *
  * To run this test suite for a specific version (e.g., mysql:5.7.36):
  * {{{
  *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.36
  *     ./build/sbt -Pdocker-integration-tests "testOnly *v2*MySQLIntegrationSuite"
- *
  * }}}
- *
  */
 @DockerTest
 class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
new file mode 100644
index 0000000000000..d3230155b8923
--- /dev/null
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import java.sql.Connection
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.tags.DockerTest
+
+/**
+ * To run this test suite for a specific version (e.g., mysql:5.7.36):
+ * {{{
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.36
+ *     ./build/sbt -Pdocker-integration-tests "testOnly *v2*MySQLNamespaceSuite"
+ * }}}
+ */
+@DockerTest
+class MySQLNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespaceTest {
+  override val db = new DatabaseOnDocker {
+    override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:5.7.36")
+    override val env = Map(
+      "MYSQL_ROOT_PASSWORD" -> "rootpass"
+    )
+    override val usesIpc = false
+    override val jdbcPort: Int = 3306
+
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:mysql://$ip:$port/" +
+        s"mysql?user=root&password=rootpass&allowPublicKeyRetrieval=true&useSSL=false"
+  }
+
+  val map = new CaseInsensitiveStringMap(
+    Map("url" -> db.getJdbcUrl(dockerIp, externalPort),
+      "driver" -> "com.mysql.jdbc.Driver").asJava)
+
+  catalog.initialize("mysql", map)
+
+  override def dataPreparation(conn: Connection): Unit = {}
+
+  override def builtinNamespaces: Array[Array[String]] = Array()
+
+  override val supportsSchemaComment: Boolean = false
+
+  // Cannot get namespaces with conn.getMetaData.getSchemas
+  // TODO testListNamespaces()
+  // TODO testDropNamespaces()
+}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleNamespaceSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleNamespaceSuite.scala
new file mode 100644
index 0000000000000..31f26d2990666
--- /dev/null
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleNamespaceSuite.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import java.sql.Connection
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.tags.DockerTest
+
+/**
+ * The following are the steps to test this:
+ *
+ * 1. Choose to use a prebuilt image or build Oracle database in a container
+ *    - The documentation on how to build Oracle RDBMS in a container is at
+ *      https://github.com/oracle/docker-images/blob/master/OracleDatabase/SingleInstance/README.md
+ *    - Official Oracle container images can be found at https://container-registry.oracle.com
+ *    - A trustable and streamlined Oracle XE database image can be found on Docker Hub at
+ *      https://hub.docker.com/r/gvenzl/oracle-xe see also https://github.com/gvenzl/oci-oracle-xe
+ * 2. Run: export ORACLE_DOCKER_IMAGE_NAME=image_you_want_to_use_for_testing
+ *    - Example: export ORACLE_DOCKER_IMAGE_NAME=gvenzl/oracle-xe:latest
+ * 3. Run: export ENABLE_DOCKER_INTEGRATION_TESTS=1
+ * 4. Start docker: sudo service docker start
+ *    - Optionally, docker pull $ORACLE_DOCKER_IMAGE_NAME
+ * 5. Run Spark integration tests for Oracle with: ./build/sbt -Pdocker-integration-tests
+ *    "testOnly org.apache.spark.sql.jdbc.v2.OracleNamespaceSuite"
+ *
+ * A sequence of commands to build the Oracle XE database container image:
+ *  $ git clone https://github.com/oracle/docker-images.git
+ *  $ cd docker-images/OracleDatabase/SingleInstance/dockerfiles
+ *  $ ./buildContainerImage.sh -v 18.4.0 -x
+ *  $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe
+ *
+ * This procedure has been validated with Oracle 18.4.0 Express Edition.
+ */
+@DockerTest
+class OracleNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespaceTest {
+  override val db = new DatabaseOnDocker {
+    lazy override val imageName =
+      sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "gvenzl/oracle-xe:18.4.0")
+    val oracle_password = "Th1s1sThe0racle#Pass"
+    override val env = Map(
+      "ORACLE_PWD" -> oracle_password,      // oracle images uses this
+      "ORACLE_PASSWORD" -> oracle_password  // gvenzl/oracle-xe uses this
+    )
+    override val usesIpc = false
+    override val jdbcPort: Int = 1521
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:oracle:thin:system/$oracle_password@//$ip:$port/xe"
+  }
+
+  val map = new CaseInsensitiveStringMap(
+    Map("url" -> db.getJdbcUrl(dockerIp, externalPort),
+      "driver" -> "oracle.jdbc.OracleDriver").asJava)
+
+  catalog.initialize("system", map)
+
+  override def dataPreparation(conn: Connection): Unit = {}
+
+  override def builtinNamespaces: Array[Array[String]] =
+    Array(Array("ANONYMOUS"), Array("APEX_030200"), Array("APEX_PUBLIC_USER"), Array("APPQOSSYS"),
+      Array("BI"), Array("DIP"), Array("FLOWS_FILES"), Array("HR"), Array("OE"), Array("PM"),
+      Array("SCOTT"), Array("SH"), Array("SPATIAL_CSW_ADMIN_USR"), Array("SPATIAL_WFS_ADMIN_USR"),
+      Array("XS$NULL"))
+
+  // Cannot create schema dynamically
+  // TODO testListNamespaces()
+  // TODO testDropNamespaces()
+}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
index a7744d18433f1..33190103d6a9a 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
@@ -53,7 +53,9 @@ class PostgresNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNames
 
   override def dataPreparation(conn: Connection): Unit = {}
 
-  override def builtinNamespaces: Array[Array[String]] = {
+  override def builtinNamespaces: Array[Array[String]] =
     Array(Array("information_schema"), Array("pg_catalog"), Array("public"))
-  }
+
+  testListNamespaces()
+  testDropNamespaces()
 }
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
index 4f56f1f4ea1e7..8d97ac45568e3 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
@@ -44,52 +44,78 @@ private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession with DockerInte
 
   def builtinNamespaces: Array[Array[String]]
 
-  test("listNamespaces: basic behavior") {
-    catalog.createNamespace(Array("foo"), Map("comment" -> "test comment").asJava)
-    assert(catalog.listNamespaces() === Array(Array("foo")) ++ builtinNamespaces)
-    assert(catalog.listNamespaces(Array("foo")) === Array())
-    assert(catalog.namespaceExists(Array("foo")) === true)
-
-    val logAppender = new LogAppender("catalog comment")
-    withLogAppender(logAppender) {
-      catalog.alterNamespace(Array("foo"), NamespaceChange
-        .setProperty("comment", "comment for foo"))
-      catalog.alterNamespace(Array("foo"), NamespaceChange.removeProperty("comment"))
-    }
-    val createCommentWarning = logAppender.loggingEvents
-      .filter(_.getLevel == Level.WARN)
-      .map(_.getMessage.getFormattedMessage)
-      .exists(_.contains("catalog comment"))
-    assert(createCommentWarning === false)
-
-    catalog.dropNamespace(Array("foo"), cascade = false)
-    assert(catalog.namespaceExists(Array("foo")) === false)
-    assert(catalog.listNamespaces() === builtinNamespaces)
-    val msg = intercept[AnalysisException] {
-      catalog.listNamespaces(Array("foo"))
-    }.getMessage
-    assert(msg.contains("Namespace 'foo' not found"))
+  def listNamespaces(namespace: Array[String]): Array[Array[String]] = {
+    Array(namespace) ++ builtinNamespaces
   }
 
-  test("Drop namespace") {
-    val ident1 = Identifier.of(Array("foo"), "tab")
-    // Drop empty namespace without cascade
-    catalog.createNamespace(Array("foo"), Map("comment" -> "test comment").asJava)
-    assert(catalog.namespaceExists(Array("foo")) === true)
-    catalog.dropNamespace(Array("foo"), cascade = false)
-    assert(catalog.namespaceExists(Array("foo")) === false)
-
-    // Drop non empty namespace without cascade
-    catalog.createNamespace(Array("foo"), Map("comment" -> "test comment").asJava)
-    assert(catalog.namespaceExists(Array("foo")) === true)
-    catalog.createTable(ident1, schema, Array.empty, emptyProps)
-    intercept[NonEmptyNamespaceException] {
+  def supportsSchemaComment: Boolean = true
+
+  def supportsDropSchemaCascade: Boolean = true
+
+  def testListNamespaces(): Unit = {
+    test("listNamespaces: basic behavior") {
+      val commentMap = if (supportsSchemaComment) {
+        Map("comment" -> "test comment")
+      } else {
+        Map.empty[String, String]
+      }
+      catalog.createNamespace(Array("foo"), commentMap.asJava)
+      assert(catalog.listNamespaces() === listNamespaces(Array("foo")))
+      assert(catalog.listNamespaces(Array("foo")) === Array())
+      assert(catalog.namespaceExists(Array("foo")) === true)
+
+      if (supportsSchemaComment) {
+        val logAppender = new LogAppender("catalog comment")
+        withLogAppender(logAppender) {
+          catalog.alterNamespace(Array("foo"), NamespaceChange
+            .setProperty("comment", "comment for foo"))
+          catalog.alterNamespace(Array("foo"), NamespaceChange.removeProperty("comment"))
+        }
+        val createCommentWarning = logAppender.loggingEvents
+          .filter(_.getLevel == Level.WARN)
+          .map(_.getMessage.getFormattedMessage)
+          .exists(_.contains("catalog comment"))
+        assert(createCommentWarning === false)
+      }
+
       catalog.dropNamespace(Array("foo"), cascade = false)
+      assert(catalog.namespaceExists(Array("foo")) === false)
+      assert(catalog.listNamespaces() === builtinNamespaces)
+      val msg = intercept[AnalysisException] {
+        catalog.listNamespaces(Array("foo"))
+      }.getMessage
+      assert(msg.contains("Namespace 'foo' not found"))
     }
+  }
+
+  def testDropNamespaces(): Unit = {
+    test("Drop namespace") {
+      val ident1 = Identifier.of(Array("foo"), "tab")
+      // Drop empty namespace without cascade
+      val commentMap = if (supportsSchemaComment) {
+        Map("comment" -> "test comment")
+      } else {
+        Map.empty[String, String]
+      }
+      catalog.createNamespace(Array("foo"), commentMap.asJava)
+      assert(catalog.namespaceExists(Array("foo")) === true)
+      catalog.dropNamespace(Array("foo"), cascade = false)
+      assert(catalog.namespaceExists(Array("foo")) === false)
 
-    // Drop non empty namespace with cascade
-    assert(catalog.namespaceExists(Array("foo")) === true)
-    catalog.dropNamespace(Array("foo"), cascade = true)
-    assert(catalog.namespaceExists(Array("foo")) === false)
+      // Drop non empty namespace without cascade
+      catalog.createNamespace(Array("foo"), Map("comment" -> "test comment").asJava)
+      assert(catalog.namespaceExists(Array("foo")) === true)
+      catalog.createTable(ident1, schema, Array.empty, emptyProps)
+      intercept[NonEmptyNamespaceException] {
+        catalog.dropNamespace(Array("foo"), cascade = false)
+      }
+
+      // Drop non empty namespace with cascade
+      if (supportsDropSchemaCascade) {
+        assert(catalog.namespaceExists(Array("foo")) === true)
+        catalog.dropNamespace(Array("foo"), cascade = true)
+        assert(catalog.namespaceExists(Array("foo")) === false)
+      }
+    }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index cc40d19693b4d..1a4e4aaf16da8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -1017,12 +1017,7 @@ object JdbcUtils extends Logging with SQLConfHelper {
   def dropNamespace(
       conn: Connection, options: JDBCOptions, namespace: String, cascade: Boolean): Unit = {
     val dialect = JdbcDialects.get(options.url)
-    val dropCmd = if (cascade) {
-      s"DROP SCHEMA ${dialect.quoteIdentifier(namespace)} CASCADE"
-    } else {
-      s"DROP SCHEMA ${dialect.quoteIdentifier(namespace)}"
-    }
-    executeStatement(conn, options, dropCmd)
+    executeStatement(conn, options, dialect.dropSchema(namespace, cascade))
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
index 307aa511cc152..baa772f4546a4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
@@ -17,9 +17,11 @@
 
 package org.apache.spark.sql.jdbc
 
-import java.sql.Types
+import java.sql.{SQLException, Types}
 import java.util.Locale
 
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.NonEmptyNamespaceException
 import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc}
 import org.apache.spark.sql.types._
 
@@ -101,4 +103,28 @@ private object DB2Dialect extends JdbcDialect {
     val nullable = if (isNullable) "DROP NOT NULL" else "SET NOT NULL"
     s"ALTER TABLE $tableName ALTER COLUMN ${quoteIdentifier(columnName)} $nullable"
   }
+
+  override def removeSchemaCommentQuery(schema: String): String = {
+    s"COMMENT ON SCHEMA ${quoteIdentifier(schema)} IS ''"
+  }
+
+  override def classifyException(message: String, e: Throwable): AnalysisException = {
+    e match {
+      case sqlException: SQLException =>
+        sqlException.getSQLState match {
+          // https://www.ibm.com/docs/en/db2/11.5?topic=messages-sqlstate
+          case "42893" => throw NonEmptyNamespaceException(message, cause = Some(e))
+          case _ => super.classifyException(message, e)
+        }
+      case _ => super.classifyException(message, e)
+    }
+  }
+
+  override def dropSchema(schema: String, cascade: Boolean): String = {
+    if (cascade) {
+      s"DROP SCHEMA ${quoteIdentifier(schema)} CASCADE"
+    } else {
+      s"DROP SCHEMA ${quoteIdentifier(schema)} RESTRICT"
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 7b8b362e64c6d..7dd987e0a44b3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -327,6 +327,14 @@ abstract class JdbcDialect extends Serializable with Logging{
     s"COMMENT ON SCHEMA ${quoteIdentifier(schema)} IS NULL"
   }
 
+  def dropSchema(schema: String, cascade: Boolean): String = {
+    if (cascade) {
+      s"DROP SCHEMA ${quoteIdentifier(schema)} CASCADE"
+    } else {
+      s"DROP SCHEMA ${quoteIdentifier(schema)}"
+    }
+  }
+
   /**
    * Build a create index SQL statement.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
index 442c5599b3ab3..3d8a48a66ea8f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
@@ -17,8 +17,11 @@
 
 package org.apache.spark.sql.jdbc
 
+import java.sql.SQLException
 import java.util.Locale
 
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.NonEmptyNamespaceException
 import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
@@ -147,4 +150,15 @@ private object MsSqlServerDialect extends JdbcDialect {
   override def getLimitClause(limit: Integer): String = {
     ""
   }
+
+  override def classifyException(message: String, e: Throwable): AnalysisException = {
+    e match {
+      case sqlException: SQLException =>
+        sqlException.getErrorCode match {
+          case 3729 => throw NonEmptyNamespaceException(message, cause = Some(e))
+          case _ => super.classifyException(message, e)
+        }
+      case _ => super.classifyException(message, e)
+    }
+  }
 }

From 4920014cc3a14019a2584c1059bc3d7b75426635 Mon Sep 17 00:00:00 2001
From: huaxingao <huaxin.gao11@gmail.com>
Date: Fri, 28 Jan 2022 21:56:44 +0800
Subject: [PATCH 128/513] [SPARK-37923][SQL] Generate partition transforms for
 BucketSpec inside parser

### What changes were proposed in this pull request?
We currently generate partition transforms for BucketSpec in Analyzer. It's cleaner to do this inside Parser.

### Why are the changes needed?
code simplification

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
existing tests

Closes #35221 from huaxingao/partition_transform.

Authored-by: huaxingao <huaxin.gao11@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/parser/AstBuilder.scala      | 11 +++--
 .../catalyst/plans/logical/v2Commands.scala   |  3 +-
 .../catalog/CatalogV2Implicits.scala          | 39 +++++++++------
 .../sql/errors/QueryCompilationErrors.scala   |  7 +--
 .../sql/errors/QueryExecutionErrors.scala     |  6 ++-
 ...eateTablePartitioningValidationSuite.scala | 14 +++---
 .../sql/catalyst/parser/DDLParserSuite.scala  | 30 ++----------
 .../catalog/InMemoryPartitionTable.scala      |  2 +-
 .../apache/spark/sql/DataFrameWriter.scala    |  3 --
 .../apache/spark/sql/DataFrameWriterV2.scala  |  2 -
 .../analysis/ResolveSessionCatalog.scala      | 47 +++++++++----------
 .../datasources/v2/V2SessionCatalog.scala     | 34 ++------------
 .../sql/streaming/DataStreamWriter.scala      |  1 -
 .../V2CommandsCaseSensitivitySuite.scala      |  8 ++--
 .../command/PlanResolutionSuite.scala         | 28 ++++++++---
 15 files changed, 100 insertions(+), 135 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 35fe084e1a4ed..ed2623ebf420d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -55,6 +55,7 @@ import org.apache.spark.util.random.RandomSampler
  * TableIdentifier.
  */
 class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logging {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
   import ParserUtils._
 
   protected def typedVisit[T](ctx: ParseTree): T = {
@@ -3472,8 +3473,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
         s"CREATE TEMPORARY TABLE ...$asSelect, use CREATE TEMPORARY VIEW instead", ctx)
     }
 
-    val partitioning = partitionExpressions(partTransforms, partCols, ctx)
-    val tableSpec = TableSpec(bucketSpec, properties, provider, options, location, comment,
+    val partitioning =
+      partitionExpressions(partTransforms, partCols, ctx) ++ bucketSpec.map(_.asTransform)
+    val tableSpec = TableSpec(properties, provider, options, location, comment,
       serdeInfo, external)
 
     Option(ctx.query).map(plan) match {
@@ -3556,8 +3558,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       operationNotAllowed(s"CREATE TABLE ... USING ... ${serdeInfo.get.describe}", ctx)
     }
 
-    val partitioning = partitionExpressions(partTransforms, partCols, ctx)
-    val tableSpec = TableSpec(bucketSpec, properties, provider, options, location, comment,
+    val partitioning =
+      partitionExpressions(partTransforms, partCols, ctx) ++ bucketSpec.map(_.asTransform)
+    val tableSpec = TableSpec(properties, provider, options, location, comment,
       serdeInfo, false)
 
     Option(ctx.query).map(plan) match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index edf3abfacbb72..45465b0f99d3b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -18,8 +18,8 @@
 package org.apache.spark.sql.catalyst.plans.logical
 
 import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, FieldName, NamedRelation, PartitionSpec, ResolvedDBObjectName, UnresolvedException}
-import org.apache.spark.sql.catalyst.catalog.{BucketSpec, FunctionResource}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.catalyst.catalog.FunctionResource
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, Expression, Unevaluable}
 import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema
 import org.apache.spark.sql.catalyst.trees.BinaryLike
@@ -1129,7 +1129,6 @@ case class DropIndex(
 }
 
 case class TableSpec(
-    bucketSpec: Option[BucketSpec],
     properties: Map[String, String],
     provider: Option[String],
     options: Map[String, String],
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
index f4890cc3058d8..07098eed4f9c4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
@@ -17,12 +17,14 @@
 
 package org.apache.spark.sql.connector.catalog
 
+import scala.collection.mutable
+
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.quoteIfNeeded
-import org.apache.spark.sql.connector.expressions.{IdentityTransform, LogicalExpressions, Transform}
-import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.connector.expressions.{BucketTransform, FieldReference, IdentityTransform, LogicalExpressions, Transform}
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 
 /**
  * Conversion helpers for working with v2 [[CatalogPlugin]].
@@ -49,21 +51,28 @@ private[sql] object CatalogV2Implicits {
   }
 
   implicit class TransformHelper(transforms: Seq[Transform]) {
-    def asPartitionColumns: Seq[String] = {
-      val (idTransforms, nonIdTransforms) = transforms.partition(_.isInstanceOf[IdentityTransform])
-
-      if (nonIdTransforms.nonEmpty) {
-        throw QueryCompilationErrors.cannotConvertTransformsToPartitionColumnsError(nonIdTransforms)
+    def convertTransforms: (Seq[String], Option[BucketSpec]) = {
+      val identityCols = new mutable.ArrayBuffer[String]
+      var bucketSpec = Option.empty[BucketSpec]
+
+      transforms.map {
+        case IdentityTransform(FieldReference(Seq(col))) =>
+          identityCols += col
+
+        case BucketTransform(numBuckets, col, sortCol) =>
+          if (bucketSpec.nonEmpty) throw QueryExecutionErrors.MultipleBucketTransformsError
+          if (sortCol.isEmpty) {
+            bucketSpec = Some(BucketSpec(numBuckets, col.map(_.fieldNames.mkString(".")), Nil))
+          } else {
+            bucketSpec = Some(BucketSpec(numBuckets, col.map(_.fieldNames.mkString(".")),
+              sortCol.map(_.fieldNames.mkString("."))))
+          }
+
+        case transform =>
+          throw QueryExecutionErrors.unsupportedPartitionTransformError(transform)
       }
 
-      idTransforms.map(_.asInstanceOf[IdentityTransform]).map(_.reference).map { ref =>
-        val parts = ref.fieldNames
-        if (parts.size > 1) {
-          throw QueryCompilationErrors.cannotPartitionByNestedColumnError(ref)
-        } else {
-          parts(0)
-        }
-      }
+      (identityCols.toSeq, bucketSpec)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 6c84aa6a592c0..3250098e65063 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.util.{toPrettySQL, FailFastMode, ParseMode,
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.functions.{BoundFunction, UnboundFunction}
-import org.apache.spark.sql.connector.expressions.{NamedReference, Transform}
+import org.apache.spark.sql.connector.expressions.NamedReference
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.{LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED, LEGACY_CTE_PRECEDENCE_POLICY}
 import org.apache.spark.sql.sources.Filter
@@ -1369,11 +1369,6 @@ object QueryCompilationErrors {
     new AnalysisException("Cannot use interval type in the table schema.")
   }
 
-  def cannotConvertTransformsToPartitionColumnsError(nonIdTransforms: Seq[Transform]): Throwable = {
-    new AnalysisException("Transforms cannot be converted to partition columns: " +
-      nonIdTransforms.map(_.describe).mkString(", "))
-  }
-
   def cannotPartitionByNestedColumnError(reference: NamedReference): Throwable = {
     new AnalysisException(s"Cannot partition by nested column: $reference")
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 6fdb728bca249..76eb4311e41bf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -655,7 +655,7 @@ object QueryExecutionErrors {
 
   def unsupportedPartitionTransformError(transform: Transform): Throwable = {
     new UnsupportedOperationException(
-      s"SessionCatalog does not support partition transform: $transform")
+      s"Unsupported partition transform: $transform")
   }
 
   def missingDatabaseLocationError(): Throwable = {
@@ -1940,4 +1940,8 @@ object QueryExecutionErrors {
     new IllegalArgumentException(
       s"The input string '$input' does not match the given number format: '$format'")
   }
+
+  def MultipleBucketTransformsError(): Throwable = {
+    new UnsupportedOperationException("Multiple bucket transforms are not supported.")
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
index 41b22bc019014..ced83b31c7f04 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 class CreateTablePartitioningValidationSuite extends AnalysisTest {
 
   test("CreateTableAsSelect: fail missing top-level column") {
-    val tableSpec = TableSpec(None, Map.empty, None, Map.empty,
+    val tableSpec = TableSpec(Map.empty, None, Map.empty,
       None, None, None, false)
     val plan = CreateTableAsSelect(
       UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
@@ -46,7 +46,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
   }
 
   test("CreateTableAsSelect: fail missing top-level column nested reference") {
-    val tableSpec = TableSpec(None, Map.empty, None, Map.empty,
+    val tableSpec = TableSpec(Map.empty, None, Map.empty,
       None, None, None, false)
     val plan = CreateTableAsSelect(
       UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
@@ -63,7 +63,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
   }
 
   test("CreateTableAsSelect: fail missing nested column") {
-    val tableSpec = TableSpec(None, Map.empty, None, Map.empty,
+    val tableSpec = TableSpec(Map.empty, None, Map.empty,
       None, None, None, false)
     val plan = CreateTableAsSelect(
       UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
@@ -80,7 +80,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
   }
 
   test("CreateTableAsSelect: fail with multiple errors") {
-    val tableSpec = TableSpec(None, Map.empty, None, Map.empty,
+    val tableSpec = TableSpec(Map.empty, None, Map.empty,
       None, None, None, false)
     val plan = CreateTableAsSelect(
       UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
@@ -98,7 +98,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
   }
 
   test("CreateTableAsSelect: success with top-level column") {
-    val tableSpec = TableSpec(None, Map.empty, None, Map.empty,
+    val tableSpec = TableSpec(Map.empty, None, Map.empty,
       None, None, None, false)
     val plan = CreateTableAsSelect(
       UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
@@ -112,7 +112,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
   }
 
   test("CreateTableAsSelect: success using nested column") {
-    val tableSpec = TableSpec(None, Map.empty, None, Map.empty,
+    val tableSpec = TableSpec(Map.empty, None, Map.empty,
       None, None, None, false)
     val plan = CreateTableAsSelect(
       UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
@@ -126,7 +126,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
   }
 
   test("CreateTableAsSelect: success using complex column") {
-    val tableSpec = TableSpec(None, Map.empty, None, Map.empty,
+    val tableSpec = TableSpec(Map.empty, None, Map.empty,
       None, None, None, false)
     val plan = CreateTableAsSelect(
       UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 56fdffdd82bc9..956b70a6e0351 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -21,11 +21,11 @@ import java.util.Locale
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions.{EqualTo, Hex, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition.{after, first}
 import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
+import org.apache.spark.sql.connector.expressions.LogicalExpressions.bucket
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType, TimestampType}
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
@@ -59,7 +59,6 @@ class DDLParserSuite extends AnalysisTest {
         .add("a", IntegerType, nullable = true, "test")
         .add("b", StringType, nullable = false)),
       Seq.empty[Transform],
-      None,
       Map.empty[String, String],
       Some("parquet"),
       Map.empty[String, String],
@@ -83,7 +82,6 @@ class DDLParserSuite extends AnalysisTest {
         Seq("my_tab"),
         Some(new StructType().add("a", IntegerType).add("b", StringType)),
         Seq.empty[Transform],
-        None,
         Map.empty[String, String],
         Some("parquet"),
         Map.empty[String, String],
@@ -104,7 +102,6 @@ class DDLParserSuite extends AnalysisTest {
         .add("a", IntegerType, nullable = true, "test")
         .add("b", StringType)),
       Seq(IdentityTransform(FieldReference("a"))),
-      None,
       Map.empty[String, String],
       Some("parquet"),
       Map.empty[String, String],
@@ -159,7 +156,6 @@ class DDLParserSuite extends AnalysisTest {
           FieldReference("a"),
           LiteralValue(UTF8String.fromString("bar"), StringType),
           LiteralValue(34, IntegerType)))),
-      None,
       Map.empty[String, String],
       Some("parquet"),
       Map.empty[String, String],
@@ -181,14 +177,14 @@ class DDLParserSuite extends AnalysisTest {
     val expectedTableSpec = TableSpec(
       Seq("my_tab"),
       Some(new StructType().add("a", IntegerType).add("b", StringType)),
-      Seq.empty[Transform],
-      Some(BucketSpec(5, Seq("a"), Seq("b"))),
+      List(bucket(5, Array(FieldReference.column("a")), Array(FieldReference.column("b")))),
       Map.empty[String, String],
       Some("parquet"),
       Map.empty[String, String],
       None,
       None,
       None)
+
     Seq(createSql, replaceSql).foreach { sql =>
       testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
     }
@@ -201,7 +197,6 @@ class DDLParserSuite extends AnalysisTest {
       Seq("my_tab"),
       Some(new StructType().add("a", IntegerType).add("b", StringType)),
       Seq.empty[Transform],
-      None,
       Map.empty[String, String],
       Some("parquet"),
       Map.empty[String, String],
@@ -222,7 +217,6 @@ class DDLParserSuite extends AnalysisTest {
       Seq("my_tab"),
       Some(new StructType().add("a", IntegerType).add("b", StringType)),
       Seq.empty[Transform],
-      None,
       Map("test" -> "test"),
       Some("parquet"),
       Map.empty[String, String],
@@ -241,7 +235,6 @@ class DDLParserSuite extends AnalysisTest {
         Seq("my_tab"),
         Some(new StructType().add("a", IntegerType).add("b", StringType)),
         Seq.empty[Transform],
-        None,
         Map.empty[String, String],
         Some("parquet"),
         Map.empty[String, String],
@@ -260,7 +253,6 @@ class DDLParserSuite extends AnalysisTest {
       Seq("1m", "2g"),
       Some(new StructType().add("a", IntegerType)),
       Seq.empty[Transform],
-      None,
       Map.empty[String, String],
       Some("parquet"),
       Map.empty[String, String],
@@ -279,7 +271,6 @@ class DDLParserSuite extends AnalysisTest {
       Seq("my_tab"),
       Some(new StructType().add("id", LongType).add("part", StringType)),
       Seq(IdentityTransform(FieldReference("part"))),
-      None,
       Map.empty[String, String],
       None,
       Map.empty[String, String],
@@ -298,7 +289,6 @@ class DDLParserSuite extends AnalysisTest {
       Seq("my_tab"),
       Some(new StructType().add("part", StringType)),
       Seq(IdentityTransform(FieldReference("part"))),
-      None,
       Map.empty[String, String],
       None,
       Map.empty[String, String],
@@ -317,7 +307,6 @@ class DDLParserSuite extends AnalysisTest {
       Seq("my_tab"),
       Some(new StructType().add("id", LongType).add("part", StringType)),
       Seq(IdentityTransform(FieldReference("part"))),
-      None,
       Map.empty[String, String],
       Some("parquet"),
       Map.empty[String, String],
@@ -361,7 +350,6 @@ class DDLParserSuite extends AnalysisTest {
       Seq("my_tab"),
       Some(new StructType().add("id", LongType).add("part", StringType)),
       Seq(IdentityTransform(FieldReference("part"))),
-      None,
       Map.empty[String, String],
       None,
       Map.empty[String, String],
@@ -387,7 +375,6 @@ class DDLParserSuite extends AnalysisTest {
         Seq("my_tab"),
         Some(new StructType().add("id", LongType).add("part", StringType)),
         Seq(IdentityTransform(FieldReference("part"))),
-        None,
         Map.empty[String, String],
         None,
         Map.empty[String, String],
@@ -430,7 +417,6 @@ class DDLParserSuite extends AnalysisTest {
       Seq("my_tab"),
       Some(new StructType().add("id", LongType).add("part", StringType)),
       Seq(IdentityTransform(FieldReference("part"))),
-      None,
       Map.empty[String, String],
       None,
       Map.empty[String, String],
@@ -469,7 +455,6 @@ class DDLParserSuite extends AnalysisTest {
       Seq("my_tab"),
       Some(new StructType().add("id", LongType).add("part", StringType)),
       Seq(IdentityTransform(FieldReference("part"))),
-      None,
       Map.empty[String, String],
       None,
       Map.empty[String, String],
@@ -493,7 +478,6 @@ class DDLParserSuite extends AnalysisTest {
       Seq("my_tab"),
       Some(new StructType().add("id", LongType).add("part", StringType)),
       Seq(IdentityTransform(FieldReference("part"))),
-      None,
       Map.empty[String, String],
       None,
       Map.empty[String, String],
@@ -627,7 +611,6 @@ class DDLParserSuite extends AnalysisTest {
           Seq("table_name"),
           Some(new StructType),
           Seq.empty[Transform],
-          Option.empty[BucketSpec],
           Map.empty[String, String],
           Some("json"),
           Map("a" -> "1", "b" -> "0.1", "c" -> "true"),
@@ -683,7 +666,6 @@ class DDLParserSuite extends AnalysisTest {
         Seq("mydb", "page_view"),
         None,
         Seq.empty[Transform],
-        None,
         Map("p1" -> "v1", "p2" -> "v2"),
         Some("parquet"),
         Map.empty[String, String],
@@ -2124,7 +2106,6 @@ class DDLParserSuite extends AnalysisTest {
       name: Seq[String],
       schema: Option[StructType],
       partitioning: Seq[Transform],
-      bucketSpec: Option[BucketSpec],
       properties: Map[String, String],
       provider: Option[String],
       options: Map[String, String],
@@ -2141,7 +2122,6 @@ class DDLParserSuite extends AnalysisTest {
             create.name.asInstanceOf[UnresolvedDBObjectName].nameParts,
             Some(create.tableSchema),
             create.partitioning,
-            create.tableSpec.bucketSpec,
             create.tableSpec.properties,
             create.tableSpec.provider,
             create.tableSpec.options,
@@ -2154,7 +2134,6 @@ class DDLParserSuite extends AnalysisTest {
             replace.name.asInstanceOf[UnresolvedDBObjectName].nameParts,
             Some(replace.tableSchema),
             replace.partitioning,
-            replace.tableSpec.bucketSpec,
             replace.tableSpec.properties,
             replace.tableSpec.provider,
             replace.tableSpec.options,
@@ -2166,7 +2145,6 @@ class DDLParserSuite extends AnalysisTest {
             ctas.name.asInstanceOf[UnresolvedDBObjectName].nameParts,
             Some(ctas.query).filter(_.resolved).map(_.schema),
             ctas.partitioning,
-            ctas.tableSpec.bucketSpec,
             ctas.tableSpec.properties,
             ctas.tableSpec.provider,
             ctas.tableSpec.options,
@@ -2179,7 +2157,6 @@ class DDLParserSuite extends AnalysisTest {
             rtas.name.asInstanceOf[UnresolvedDBObjectName].nameParts,
             Some(rtas.query).filter(_.resolved).map(_.schema),
             rtas.partitioning,
-            rtas.tableSpec.bucketSpec,
             rtas.tableSpec.properties,
             rtas.tableSpec.provider,
             rtas.tableSpec.options,
@@ -2217,7 +2194,6 @@ class DDLParserSuite extends AnalysisTest {
       Seq("1m", "2g"),
       Some(new StructType().add("a", IntegerType)),
       Seq.empty[Transform],
-      None,
       Map.empty[String, String],
       None,
       Map.empty[String, String],
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryPartitionTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryPartitionTable.scala
index 58dc4847111e2..671d22040e169 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryPartitionTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryPartitionTable.scala
@@ -43,7 +43,7 @@ class InMemoryPartitionTable(
     new ConcurrentHashMap[InternalRow, util.Map[String, String]]()
 
   def partitionSchema: StructType = {
-    val partitionColumnNames = partitioning.toSeq.asPartitionColumns
+    val partitionColumnNames = partitioning.toSeq.convertTransforms._1
     new StructType(schema.filter(p => partitionColumnNames.contains(p.name)).toArray)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 396cb26259f68..e4d6dd2297f9b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -327,7 +327,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
                 supportsExtract, catalogManager, dsOptions)
 
               val tableSpec = TableSpec(
-                bucketSpec = None,
                 properties = Map.empty,
                 provider = Some(source),
                 options = Map.empty,
@@ -596,7 +595,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
 
       case (SaveMode.Overwrite, _) =>
         val tableSpec = TableSpec(
-          bucketSpec = None,
           properties = Map.empty,
           provider = Some(source),
           options = Map.empty,
@@ -617,7 +615,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
         // created between our existence check and physical execution, but this can't be helped
         // in any case.
         val tableSpec = TableSpec(
-          bucketSpec = None,
           properties = Map.empty,
           provider = Some(source),
           options = Map.empty,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
index 22b2eb978d917..93127e6288a3b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
@@ -108,7 +108,6 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
 
   override def create(): Unit = {
     val tableSpec = TableSpec(
-      bucketSpec = None,
       properties = properties.toMap,
       provider = provider,
       options = Map.empty,
@@ -198,7 +197,6 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
 
   private def internalReplace(orCreate: Boolean): Unit = {
     val tableSpec = TableSpec(
-      bucketSpec = None,
       properties = properties.toMap,
       provider = provider,
       options = Map.empty,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 6df94f3864552..13237eb75c9a9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils}
+import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils}
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
@@ -146,30 +146,28 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
 
     // For CREATE TABLE [AS SELECT], we should use the v1 command if the catalog is resolved to the
     // session catalog and the table provider is not v2.
-    case c @ CreateTable(ResolvedDBObjectName(catalog, name), _, _, _, _) =>
+    case c @ CreateTable(ResolvedDBObjectName(catalog, name), _, _, _, _)
+        if isSessionCatalog(catalog) =>
       val (storageFormat, provider) = getStorageFormatAndProvider(
         c.tableSpec.provider, c.tableSpec.options, c.tableSpec.location, c.tableSpec.serde,
         ctas = false)
-      if (isSessionCatalog(catalog) && !isV2Provider(provider)) {
+      if (!isV2Provider(provider)) {
         constructV1TableCmd(None, c.tableSpec, name, c.tableSchema, c.partitioning,
           c.ignoreIfExists, storageFormat, provider)
       } else {
-        val newTableSpec = c.tableSpec.copy(bucketSpec = None)
-        c.copy(partitioning = c.partitioning ++ c.tableSpec.bucketSpec.map(_.asTransform),
-          tableSpec = newTableSpec)
+        c
       }
 
-    case c @ CreateTableAsSelect(ResolvedDBObjectName(catalog, name), _, _, _, _, _) =>
+    case c @ CreateTableAsSelect(ResolvedDBObjectName(catalog, name), _, _, _, _, _)
+        if isSessionCatalog(catalog) =>
       val (storageFormat, provider) = getStorageFormatAndProvider(
         c.tableSpec.provider, c.tableSpec.options, c.tableSpec.location, c.tableSpec.serde,
         ctas = true)
-      if (isSessionCatalog(catalog) && !isV2Provider(provider)) {
+      if (!isV2Provider(provider)) {
         constructV1TableCmd(Some(c.query), c.tableSpec, name, new StructType, c.partitioning,
           c.ignoreIfExists, storageFormat, provider)
       } else {
-        val newTableSpec = c.tableSpec.copy(bucketSpec = None)
-        c.copy(partitioning = c.partitioning ++ c.tableSpec.bucketSpec.map(_.asTransform),
-          tableSpec = newTableSpec)
+        c
       }
 
     case RefreshTable(ResolvedV1TableIdentifier(ident)) =>
@@ -180,26 +178,23 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
 
     // For REPLACE TABLE [AS SELECT], we should fail if the catalog is resolved to the
     // session catalog and the table provider is not v2.
-    case c @ ReplaceTable(
-      ResolvedDBObjectName(catalog, _), _, _, _, _) =>
+    case c @ ReplaceTable(ResolvedDBObjectName(catalog, _), _, _, _, _)
+        if isSessionCatalog(catalog) =>
       val provider = c.tableSpec.provider.getOrElse(conf.defaultDataSourceName)
-      if (isSessionCatalog(catalog) && !isV2Provider(provider)) {
+      if (!isV2Provider(provider)) {
         throw QueryCompilationErrors.operationOnlySupportedWithV2TableError("REPLACE TABLE")
       } else {
-        val newTableSpec = c.tableSpec.copy(bucketSpec = None)
-        c.copy(partitioning = c.partitioning ++ c.tableSpec.bucketSpec.map(_.asTransform),
-          tableSpec = newTableSpec)
+        c
       }
 
-    case c @ ReplaceTableAsSelect(ResolvedDBObjectName(catalog, _), _, _, _, _, _) =>
+    case c @ ReplaceTableAsSelect(ResolvedDBObjectName(catalog, _), _, _, _, _, _)
+        if isSessionCatalog(catalog) =>
       val provider = c.tableSpec.provider.getOrElse(conf.defaultDataSourceName)
-      if (isSessionCatalog(catalog) && !isV2Provider(provider)) {
+      if (!isV2Provider(provider)) {
         throw QueryCompilationErrors
           .operationOnlySupportedWithV2TableError("REPLACE TABLE AS SELECT")
       } else {
-        val newTableSpec = c.tableSpec.copy(bucketSpec = None)
-        c.copy(partitioning = c.partitioning ++ c.tableSpec.bucketSpec.map(_.asTransform),
-          tableSpec = newTableSpec)
+        c
       }
 
     case DropTable(ResolvedV1TableIdentifier(ident), ifExists, purge) =>
@@ -453,7 +448,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       storageFormat: CatalogStorageFormat,
       provider: String): CreateTableV1 = {
     val tableDesc = buildCatalogTable(name.asTableIdentifier, tableSchema,
-        partitioning, tableSpec.bucketSpec, tableSpec.properties, provider,
+        partitioning, tableSpec.properties, provider,
         tableSpec.location, tableSpec.comment, storageFormat, tableSpec.external)
     val mode = if (ignoreIfExists) SaveMode.Ignore else SaveMode.ErrorIfExists
     CreateTableV1(tableDesc, mode, query)
@@ -525,7 +520,6 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       table: TableIdentifier,
       schema: StructType,
       partitioning: Seq[Transform],
-      bucketSpec: Option[BucketSpec],
       properties: Map[String, String],
       provider: String,
       location: Option[String],
@@ -537,6 +531,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
     } else {
       CatalogTableType.MANAGED
     }
+    val (partitionColumns, maybeBucketSpec) = partitioning.toSeq.convertTransforms
 
     CatalogTable(
       identifier = table,
@@ -544,8 +539,8 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       storage = storageFormat,
       schema = schema,
       provider = Some(provider),
-      partitionColumnNames = partitioning.asPartitionColumns,
-      bucketSpec = bucketSpec,
+      partitionColumnNames = partitionColumns,
+      bucketSpec = maybeBucketSpec,
       properties = properties,
       comment = comment)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index d9cfe0aa04dc8..b9a4e0e6ba30b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -25,11 +25,11 @@ import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, SQLConfHelper, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException}
-import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogDatabase, CatalogTable, CatalogTableType, CatalogUtils, SessionCatalog}
+import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogTable, CatalogTableType, CatalogUtils, SessionCatalog}
 import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogV2Util, FunctionCatalog, Identifier, NamespaceChange, SupportsNamespaces, Table, TableCatalog, TableChange, V1Table}
 import org.apache.spark.sql.connector.catalog.NamespaceChange.RemoveProperty
 import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
-import org.apache.spark.sql.connector.expressions.{BucketTransform, FieldReference, IdentityTransform, Transform}
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.internal.connector.V1Function
@@ -96,8 +96,8 @@ class V2SessionCatalog(catalog: SessionCatalog)
       schema: StructType,
       partitions: Array[Transform],
       properties: util.Map[String, String]): Table = {
-
-    val (partitionColumns, maybeBucketSpec) = V2SessionCatalog.convertTransforms(partitions)
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.TransformHelper
+    val (partitionColumns, maybeBucketSpec) = partitions.toSeq.convertTransforms
     val provider = properties.getOrDefault(TableCatalog.PROP_PROVIDER, conf.defaultDataSourceName)
     val tableProperties = properties.asScala
     val location = Option(properties.get(TableCatalog.PROP_LOCATION))
@@ -330,32 +330,6 @@ class V2SessionCatalog(catalog: SessionCatalog)
 
 private[sql] object V2SessionCatalog {
 
-  /**
-   * Convert v2 Transforms to v1 partition columns and an optional bucket spec.
-   */
-  private def convertTransforms(partitions: Seq[Transform]): (Seq[String], Option[BucketSpec]) = {
-    val identityCols = new mutable.ArrayBuffer[String]
-    var bucketSpec = Option.empty[BucketSpec]
-
-    partitions.map {
-      case IdentityTransform(FieldReference(Seq(col))) =>
-        identityCols += col
-
-      case BucketTransform(numBuckets, col, sortCol) =>
-        if (sortCol.isEmpty) {
-          bucketSpec = Some(BucketSpec(numBuckets, col.map(_.fieldNames.mkString(".")), Nil))
-        } else {
-          bucketSpec = Some(BucketSpec(numBuckets, col.map(_.fieldNames.mkString(".")),
-            sortCol.map(_.fieldNames.mkString("."))))
-        }
-
-      case transform =>
-        throw QueryExecutionErrors.unsupportedPartitionTransformError(transform)
-    }
-
-    (identityCols.toSeq, bucketSpec)
-  }
-
   private def toCatalogDatabase(
       db: String,
       metadata: util.Map[String, String],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index f72d03ecc62be..af058315f7caf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -290,7 +290,6 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
        * TODO (SPARK-33638): Full support of v2 table creation
        */
       val tableSpec = TableSpec(
-        None,
         Map.empty[String, String],
         Some(source),
         Map.empty[String, String],
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
index 15a25c2680722..fcb25751db8d6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
@@ -46,7 +46,7 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
     Seq(true, false).foreach { caseSensitive =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
         Seq("ID", "iD").foreach { ref =>
-          val tableSpec = TableSpec(None, Map.empty, None, Map.empty,
+          val tableSpec = TableSpec(Map.empty, None, Map.empty,
             None, None, None, false)
           val plan = CreateTableAsSelect(
             UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
@@ -70,7 +70,7 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
     Seq(true, false).foreach { caseSensitive =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
         Seq("POINT.X", "point.X", "poInt.x", "poInt.X").foreach { ref =>
-          val tableSpec = TableSpec(None, Map.empty, None, Map.empty,
+          val tableSpec = TableSpec(Map.empty, None, Map.empty,
             None, None, None, false)
           val plan = CreateTableAsSelect(
             UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
@@ -95,7 +95,7 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
     Seq(true, false).foreach { caseSensitive =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
         Seq("ID", "iD").foreach { ref =>
-          val tableSpec = TableSpec(None, Map.empty, None, Map.empty,
+          val tableSpec = TableSpec(Map.empty, None, Map.empty,
             None, None, None, false)
           val plan = ReplaceTableAsSelect(
             UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
@@ -119,7 +119,7 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
     Seq(true, false).foreach { caseSensitive =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
         Seq("POINT.X", "point.X", "poInt.x", "poInt.X").foreach { ref =>
-          val tableSpec = TableSpec(None, Map.empty, None, Map.empty,
+          val tableSpec = TableSpec(Map.empty, None, Map.empty,
             None, None, None, false)
           val plan = ReplaceTableAsSelect(
             UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 5862acff70ab1..4a8defdad4105 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -249,9 +249,7 @@ class PlanResolutionSuite extends AnalysisTest {
   }
 
   test("create table - partitioned by transforms") {
-    val transforms = Seq(
-        "bucket(16, b)", "years(ts)", "months(ts)", "days(ts)", "hours(ts)", "foo(a, 'bar', 34)",
-        "bucket(32, b), days(ts)")
+    val transforms = Seq("years(ts)", "months(ts)", "days(ts)", "hours(ts)", "foo(a, 'bar', 34)")
     transforms.foreach { transform =>
       val query =
         s"""
@@ -259,12 +257,30 @@ class PlanResolutionSuite extends AnalysisTest {
            |PARTITIONED BY ($transform)
            """.stripMargin
 
-      val ae = intercept[AnalysisException] {
+      val ae = intercept[UnsupportedOperationException] {
         parseAndResolve(query)
       }
 
-      assert(ae.message
-          .contains(s"Transforms cannot be converted to partition columns: $transform"))
+      assert(ae.getMessage
+        .contains(s"Unsupported partition transform: $transform"))
+    }
+  }
+
+  test("create table - partitioned by multiple bucket transforms") {
+    val transforms = Seq("bucket(32, b), sorted_bucket(b, 32, c)")
+    transforms.foreach { transform =>
+      val query =
+        s"""
+           |CREATE TABLE my_tab(a INT, b STRING, c String) USING parquet
+           |PARTITIONED BY ($transform)
+           """.stripMargin
+
+      val ae = intercept[UnsupportedOperationException] {
+        parseAndResolve(query)
+      }
+
+      assert(ae.getMessage
+        .contains("Multiple bucket transforms are not supported."))
     }
   }
 

From 9afb407fa7aaf2f0961661b5d8cfbec549e591ee Mon Sep 17 00:00:00 2001
From: Chandni Singh <singh.chandni@gmail.com>
Date: Fri, 28 Jan 2022 21:12:28 -0600
Subject: [PATCH 129/513] [SPARK-37675][SPARK-37793] Prevent overwriting of
 push shuffle merged files once the shuffle is finalized

### What changes were proposed in this pull request?
This fixes the bugs that were reported in SPARK-37675 and SPARK-37793.
- Empty merged partitions were reported by the shuffle server to the driver.
- The push merged files were getting overwritten after a shuffle merge is finalized.
- Throwing exception in the finalization of a shuffle for which the shuffle server didn't receive any blocks.

### Why are the changes needed?
Changes are need to fix the bug.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Have added unit test.

Closes #35325 from otterc/SPARK-37675.

Authored-by: Chandni Singh <singh.chandni@gmail.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../shuffle/RemoteBlockPushResolver.java      | 171 +++++++++---------
 .../shuffle/RemoteBlockPushResolverSuite.java |  77 +++++++-
 2 files changed, 158 insertions(+), 90 deletions(-)

diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
index d626cc3efaf07..b823076e57f71 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
@@ -83,20 +83,11 @@ public class RemoteBlockPushResolver implements MergedShuffleFileManager {
   public static final String MERGE_DIR_KEY = "mergeDir";
   public static final String ATTEMPT_ID_KEY = "attemptId";
   private static final int UNDEFINED_ATTEMPT_ID = -1;
-  // Shuffles of determinate stages will have shuffleMergeId set to 0
-  private static final int DETERMINATE_SHUFFLE_MERGE_ID = 0;
   private static final ErrorHandler.BlockPushErrorHandler ERROR_HANDLER = createErrorHandler();
   // ByteBuffer to respond to client upon a successful merge of a pushed block
   private static final ByteBuffer SUCCESS_RESPONSE =
     new BlockPushReturnCode(ReturnCode.SUCCESS.id(), "").toByteBuffer().asReadOnlyBuffer();
 
-  // ConcurrentHashMap doesn't allow null for keys or values which is why this is required.
-  // Marker to identify finalized indeterminate shuffle partitions in the case of indeterminate
-  // stage retries.
-  @VisibleForTesting
-  public static final Map<Integer, AppShufflePartitionInfo> INDETERMINATE_SHUFFLE_FINALIZED =
-    Collections.emptyMap();
-
   /**
    * A concurrent hashmap where the key is the applicationId, and the value includes
    * all the merged shuffle information for this application. AppShuffleInfo stores
@@ -169,59 +160,45 @@ private AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo(
       String blockId) throws BlockPushNonFatalFailure {
     ConcurrentMap<Integer, AppShuffleMergePartitionsInfo> shuffles = appShuffleInfo.shuffles;
     AppShuffleMergePartitionsInfo shufflePartitionsWithMergeId =
-      shuffles.compute(shuffleId, (id, appShuffleMergePartitionsInfo) -> {
-        if (appShuffleMergePartitionsInfo == null) {
-          File dataFile =
-            appShuffleInfo.getMergedShuffleDataFile(shuffleId, shuffleMergeId, reduceId);
-          // If this partition is already finalized then the partitions map will not contain the
-          // shuffleId for determinate stages but the data file would exist.
-          // In that case the block is considered late. In the case of indeterminate stages, most
-          // recent shuffleMergeId finalized would be pointing to INDETERMINATE_SHUFFLE_FINALIZED
-          if (dataFile.exists()) {
-            throw new BlockPushNonFatalFailure(new BlockPushReturnCode(
-              ReturnCode.TOO_LATE_BLOCK_PUSH.id(), blockId).toByteBuffer(),
-              BlockPushNonFatalFailure.getErrorMsg(blockId, ReturnCode.TOO_LATE_BLOCK_PUSH));
-          } else {
-            logger.info("Creating a new attempt for shuffle blocks push request for shuffle {}"
-              + " with shuffleMergeId {} for application {}_{}", shuffleId, shuffleMergeId,
-              appShuffleInfo.appId, appShuffleInfo.attemptId);
-            return new AppShuffleMergePartitionsInfo(shuffleMergeId, false);
-          }
+      shuffles.compute(shuffleId, (id, mergePartitionsInfo) -> {
+        if (mergePartitionsInfo == null) {
+          logger.info("{} attempt {} shuffle {} shuffleMerge {}: creating a new shuffle " +
+              "merge metadata", appShuffleInfo.appId, appShuffleInfo.attemptId, shuffleId,
+              shuffleMergeId);
+          return new AppShuffleMergePartitionsInfo(shuffleMergeId, false);
         } else {
-          // Reject the request as we have already seen a higher shuffleMergeId than the
-          // current incoming one
-          int latestShuffleMergeId = appShuffleMergePartitionsInfo.shuffleMergeId;
+          int latestShuffleMergeId = mergePartitionsInfo.shuffleMergeId;
           if (latestShuffleMergeId > shuffleMergeId) {
+            // Reject the request as we have already seen a higher shuffleMergeId than the one
+            // in the current request.
             throw new BlockPushNonFatalFailure(
               new BlockPushReturnCode(ReturnCode.STALE_BLOCK_PUSH.id(), blockId).toByteBuffer(),
               BlockPushNonFatalFailure.getErrorMsg(blockId, ReturnCode.STALE_BLOCK_PUSH));
-          } else if (latestShuffleMergeId == shuffleMergeId) {
-            return appShuffleMergePartitionsInfo;
-          } else {
+          } else if (latestShuffleMergeId < shuffleMergeId){
             // Higher shuffleMergeId seen for the shuffle ID meaning new stage attempt is being
             // run for the shuffle ID. Close and clean up old shuffleMergeId files,
             // happens in the indeterminate stage retries
-            logger.info("Creating a new attempt for shuffle blocks push request for shuffle {}"
-              + " with shuffleMergeId {} for application {}_{} since it is higher than the"
-              + " latest shuffleMergeId {} already seen", shuffleId, shuffleMergeId,
-              appShuffleInfo.appId, appShuffleInfo.attemptId, latestShuffleMergeId);
+            logger.info("{} attempt {} shuffle {} shuffleMerge {}: creating a new shuffle " +
+                "merge metadata since received shuffleMergeId is higher than latest " +
+                "shuffleMergeId {}", appShuffleInfo.appId, appShuffleInfo.attemptId, shuffleId,
+                shuffleMergeId, latestShuffleMergeId);
             mergedShuffleCleaner.execute(() ->
-              closeAndDeletePartitionFiles(appShuffleMergePartitionsInfo.shuffleMergePartitions));
+                closeAndDeletePartitionFiles(mergePartitionsInfo.shuffleMergePartitions));
             return new AppShuffleMergePartitionsInfo(shuffleMergeId, false);
+          } else {
+            // The request is for block with same shuffleMergeId as the latest shuffleMergeId
+            if (mergePartitionsInfo.isFinalized()) {
+              throw new BlockPushNonFatalFailure(
+                  new BlockPushReturnCode(
+                      ReturnCode.TOO_LATE_BLOCK_PUSH.id(), blockId).toByteBuffer(),
+                  BlockPushNonFatalFailure.getErrorMsg(blockId, ReturnCode.TOO_LATE_BLOCK_PUSH));
+            }
+            return mergePartitionsInfo;
           }
         }
       });
-
-    // It only gets here when the shuffle is already finalized.
-    if (null == shufflePartitionsWithMergeId ||
-        INDETERMINATE_SHUFFLE_FINALIZED == shufflePartitionsWithMergeId.shuffleMergePartitions) {
-      throw new BlockPushNonFatalFailure(
-        new BlockPushReturnCode(ReturnCode.TOO_LATE_BLOCK_PUSH.id(), blockId).toByteBuffer(),
-        BlockPushNonFatalFailure.getErrorMsg(blockId, ReturnCode.TOO_LATE_BLOCK_PUSH));
-    }
-
     Map<Integer, AppShufflePartitionInfo> shuffleMergePartitions =
-      shufflePartitionsWithMergeId.shuffleMergePartitions;
+        shufflePartitionsWithMergeId.shuffleMergePartitions;
     return shuffleMergePartitions.computeIfAbsent(reduceId, key -> {
       // It only gets here when the key is not present in the map. The first time the merge
       // manager receives a pushed block for a given application shuffle partition.
@@ -235,9 +212,9 @@ private AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo(
         return newAppShufflePartitionInfo(appShuffleInfo.appId, shuffleId, shuffleMergeId,
           reduceId, dataFile, indexFile, metaFile);
       } catch (IOException e) {
-        logger.error(
-          "Cannot create merged shuffle partition with data file {}, index file {}, and "
-            + "meta file {}", dataFile.getAbsolutePath(),
+        logger.error("{} attempt {} shuffle {} shuffleMerge {}: cannot create merged shuffle " +
+            "partition with data file {}, index file {}, and meta file {}", appShuffleInfo.appId,
+            appShuffleInfo.attemptId, shuffleId, shuffleMergeId, dataFile.getAbsolutePath(),
             indexFile.getAbsolutePath(), metaFile.getAbsolutePath());
         throw new RuntimeException(
           String.format("Cannot initialize merged shuffle partition for appId %s shuffleId %s "
@@ -350,6 +327,7 @@ public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
    * If cleanupLocalDirs is true, the merged shuffle files will also be deleted.
    * The cleanup will be executed in a separate thread.
    */
+  @SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter")
   @VisibleForTesting
   void closeAndDeletePartitionFilesIfNeeded(
       AppShuffleInfo appShuffleInfo,
@@ -512,10 +490,11 @@ public ByteBuffer getCompletionResponse() {
     }
   }
 
+  @SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter")
   @Override
   public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) {
-    logger.info("Finalizing shuffle {} with shuffleMergeId {} from Application {}_{}.",
-      msg.shuffleId, msg.shuffleMergeId, msg.appId, msg.appAttemptId);
+    logger.info("{} attempt {} shuffle {} shuffleMerge {}: finalize shuffle merge",
+        msg.appId, msg.appAttemptId, msg.shuffleId, msg.shuffleMergeId);
     AppShuffleInfo appShuffleInfo = validateAndGetAppShuffleInfo(msg.appId);
     if (appShuffleInfo.attemptId != msg.appAttemptId) {
       // If finalizeShuffleMerge from a former application attempt, it is considered late,
@@ -534,35 +513,33 @@ public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) {
     }
     AtomicReference<Map<Integer, AppShufflePartitionInfo>> shuffleMergePartitionsRef =
       new AtomicReference<>(null);
-    // Metadata of the determinate stage shuffle can be safely removed as part of finalizing
-    // shuffle merge. Currently once the shuffle is finalized for a determinate stages, retry
-    // stages of the same shuffle will have shuffle push disabled.
-    if (msg.shuffleMergeId == DETERMINATE_SHUFFLE_MERGE_ID) {
-      AppShuffleMergePartitionsInfo appShuffleMergePartitionsInfo =
-        appShuffleInfo.shuffles.remove(msg.shuffleId);
-      if (appShuffleMergePartitionsInfo != null) {
-        shuffleMergePartitionsRef.set(appShuffleMergePartitionsInfo.shuffleMergePartitions);
-      }
-    } else {
-      appShuffleInfo.shuffles.compute(msg.shuffleId, (id, value) -> {
-        if (null == value || msg.shuffleMergeId < value.shuffleMergeId ||
-          INDETERMINATE_SHUFFLE_FINALIZED == value.shuffleMergePartitions) {
+    appShuffleInfo.shuffles.compute(msg.shuffleId, (shuffleId, mergePartitionsInfo) -> {
+      if (null != mergePartitionsInfo) {
+        if (msg.shuffleMergeId < mergePartitionsInfo.shuffleMergeId ||
+          mergePartitionsInfo.isFinalized()) {
           throw new RuntimeException(String.format(
-            "Shuffle merge finalize request for shuffle %s with" + " shuffleMergeId %s is %s",
-            msg.shuffleId, msg.shuffleMergeId,
-            ErrorHandler.BlockPushErrorHandler.STALE_SHUFFLE_FINALIZE_SUFFIX));
-        } else if (msg.shuffleMergeId > value.shuffleMergeId) {
+              "Shuffle merge finalize request for shuffle %s with" + " shuffleMergeId %s is %s",
+              msg.shuffleId, msg.shuffleMergeId,
+              ErrorHandler.BlockPushErrorHandler.STALE_SHUFFLE_FINALIZE_SUFFIX));
+        } else if (msg.shuffleMergeId > mergePartitionsInfo.shuffleMergeId) {
           // If no blocks pushed for the finalizeShuffleMerge shuffleMergeId then return
           // empty MergeStatuses but cleanup the older shuffleMergeId files.
           mergedShuffleCleaner.execute(() ->
-            closeAndDeletePartitionFiles(value.shuffleMergePartitions));
-          return new AppShuffleMergePartitionsInfo(msg.shuffleMergeId, true);
+              closeAndDeletePartitionFiles(mergePartitionsInfo.shuffleMergePartitions));
         } else {
-          shuffleMergePartitionsRef.set(value.shuffleMergePartitions);
-          return new AppShuffleMergePartitionsInfo(msg.shuffleMergeId, true);
+          // This block covers:
+          //  1. finalization of determinate stage
+          //  2. finalization of indeterminate stage if the shuffleMergeId related to it is the one
+          //  for which the message is received.
+          shuffleMergePartitionsRef.set(mergePartitionsInfo.shuffleMergePartitions);
         }
-      });
-    }
+      }
+      // Even when the mergePartitionsInfo is null, we mark the shuffle as finalized but the results
+      // sent to the driver will be empty. This cam happen when the service didn't receive any
+      // blocks for the shuffle yet and the driver didn't wait for enough time to finalize the
+      // shuffle.
+      return new AppShuffleMergePartitionsInfo(msg.shuffleMergeId, true);
+    });
     Map<Integer, AppShufflePartitionInfo> shuffleMergePartitions = shuffleMergePartitionsRef.get();
     MergeStatuses mergeStatuses;
     if (null == shuffleMergePartitions || shuffleMergePartitions.isEmpty()) {
@@ -576,14 +553,25 @@ public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) {
       for (AppShufflePartitionInfo partition: shuffleMergePartitions.values()) {
         synchronized (partition) {
           try {
+            logger.debug("{} attempt {} shuffle {} shuffleMerge {}: finalizing shuffle " +
+                "partition {} ", msg.appId, msg.appAttemptId, msg.shuffleId,
+                msg.shuffleMergeId, partition.reduceId);
             // This can throw IOException which will marks this shuffle partition as not merged.
             partition.finalizePartition();
-            bitmaps.add(partition.mapTracker);
-            reduceIds.add(partition.reduceId);
-            sizes.add(partition.getLastChunkOffset());
+            if (partition.mapTracker.getCardinality() > 0) {
+              bitmaps.add(partition.mapTracker);
+              reduceIds.add(partition.reduceId);
+              sizes.add(partition.getLastChunkOffset());
+              logger.debug("{} attempt {} shuffle {} shuffleMerge {}: finalization results " +
+                  "added for partition {} data size {} index size {} meta size {}",
+                  msg.appId, msg.appAttemptId, msg.shuffleId,
+                  msg.shuffleMergeId, partition.reduceId, partition.getLastChunkOffset(),
+                  partition.indexFile.getPos(), partition.metaFile.getPos());
+            }
           } catch (IOException ioe) {
-            logger.warn("Exception while finalizing shuffle partition {}_{} {} {}", msg.appId,
-              msg.appAttemptId, msg.shuffleId, partition.reduceId, ioe);
+            logger.warn("{} attempt {} shuffle {} shuffleMerge {}: exception while " +
+                "finalizing shuffle partition {}", msg.appId, msg.appAttemptId, msg.shuffleId,
+                msg.shuffleMergeId, partition.reduceId);
           } finally {
             partition.closeAllFilesAndDeleteIfNeeded(false);
           }
@@ -593,8 +581,8 @@ public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) {
         bitmaps.toArray(new RoaringBitmap[bitmaps.size()]), Ints.toArray(reduceIds),
         Longs.toArray(sizes));
     }
-    logger.info("Finalized shuffle {} with shuffleMergeId {} from Application {}_{}.",
-      msg.shuffleId, msg.shuffleMergeId, msg.appId, msg.appAttemptId);
+    logger.info("{} attempt {} shuffle {} shuffleMerge {}: finalization of shuffle merge completed",
+        msg.appId, msg.appAttemptId, msg.shuffleId, msg.shuffleMergeId);
     return mergeStatuses;
   }
 
@@ -808,7 +796,7 @@ private boolean isTooLate(
         AppShuffleMergePartitionsInfo appShuffleMergePartitionsInfo,
         int reduceId) {
       return null == appShuffleMergePartitionsInfo ||
-        INDETERMINATE_SHUFFLE_FINALIZED == appShuffleMergePartitionsInfo.shuffleMergePartitions ||
+        appShuffleMergePartitionsInfo.isFinalized() ||
         !appShuffleMergePartitionsInfo.shuffleMergePartitions.containsKey(reduceId);
     }
 
@@ -1008,20 +996,27 @@ AppShufflePartitionInfo getPartitionInfo() {
    * required for the shuffles of indeterminate stages.
    */
   public static class AppShuffleMergePartitionsInfo {
+    // ConcurrentHashMap doesn't allow null for keys or values which is why this is required.
+    // Marker to identify finalized shuffle partitions.
+    private static final Map<Integer, AppShufflePartitionInfo> SHUFFLE_FINALIZED_MARKER =
+        Collections.emptyMap();
     private final int shuffleMergeId;
     private final Map<Integer, AppShufflePartitionInfo> shuffleMergePartitions;
 
-    public AppShuffleMergePartitionsInfo(
-        int shuffleMergeId, boolean shuffleFinalized) {
+    public AppShuffleMergePartitionsInfo(int shuffleMergeId, boolean shuffleFinalized) {
       this.shuffleMergeId = shuffleMergeId;
-      this.shuffleMergePartitions = shuffleFinalized ?
-        INDETERMINATE_SHUFFLE_FINALIZED : new ConcurrentHashMap<>();
+      this.shuffleMergePartitions = shuffleFinalized ? SHUFFLE_FINALIZED_MARKER :
+          new ConcurrentHashMap<>();
     }
 
     @VisibleForTesting
     public Map<Integer, AppShufflePartitionInfo> getShuffleMergePartitions() {
       return shuffleMergePartitions;
     }
+
+    public boolean isFinalized() {
+      return shuffleMergePartitions == SHUFFLE_FINALIZED_MARKER;
+    }
   }
 
   /** Metadata tracked for an actively merged shuffle partition */
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
index f4a29aaac19f7..595473376cfcd 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
@@ -1161,8 +1161,8 @@ public void testFinalizeOfDeterminateShuffle() throws IOException {
 
     RemoteBlockPushResolver.AppShuffleInfo appShuffleInfo =
       pushResolver.validateAndGetAppShuffleInfo(TEST_APP);
-    assertTrue("Metadata of determinate shuffle should be removed after finalize shuffle"
-      + " merge", appShuffleInfo.getShuffles().get(0) == null);
+    assertTrue("Determinate shuffle should be marked finalized",
+        appShuffleInfo.getShuffles().get(0).isFinalized());
     validateMergeStatuses(statuses, new int[] {0}, new long[] {9});
     MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0, 0);
     validateChunks(TEST_APP, 0, 0, 0, blockMeta, new int[]{4, 5}, new int[][]{{0}, {1}});
@@ -1287,6 +1287,79 @@ void closeAndDeletePartitionFiles(Map<Integer, AppShufflePartitionInfo> partitio
       + " up", appShuffleInfo.getMergedShuffleDataFile(0, 4, 0).exists());
   }
 
+  @Test
+  public void testFinalizationResultIsEmptyWhenTheServerDidNotReceiveAnyBlocks() {
+    //shuffle 1 0 is finalized even though the server didn't receive any blocks for it.
+    MergeStatuses statuses = pushResolver.finalizeShuffleMerge(
+        new FinalizeShuffleMerge(TEST_APP, NO_ATTEMPT_ID, 1, 0));
+    assertEquals("no partitions were merged", 0, statuses.reduceIds.length);
+    RemoteBlockPushResolver.AppShuffleInfo appShuffleInfo =
+        pushResolver.validateAndGetAppShuffleInfo(TEST_APP);
+    assertTrue("shuffle 1 should be marked finalized",
+        appShuffleInfo.getShuffles().get(1).isFinalized());
+    removeApplication(TEST_APP);
+  }
+
+  // Test for SPARK-37675 and SPARK-37793
+  @Test
+  public void testEmptyMergePartitionsAreNotReported() throws IOException {
+    //shufflePush_1_0_0_100 is received by the server
+    StreamCallbackWithID stream1 = pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 1, 0, 0, 100, 0));
+    stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[4]));
+    //shuffle 1 0 is finalized
+    MergeStatuses statuses = pushResolver.finalizeShuffleMerge(
+        new FinalizeShuffleMerge(TEST_APP, NO_ATTEMPT_ID, 1, 0));
+    assertEquals("no partitions were merged", 0, statuses.reduceIds.length);
+    removeApplication(TEST_APP);
+  }
+
+  // Test for SPARK-37675 and SPARK-37793
+  @Test
+  public void testAllBlocksAreRejectedWhenReceivedAfterFinalization() throws IOException {
+    //shufflePush_1_0_0_100 is received by the server
+    StreamCallbackWithID stream1 = pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 1, 0, 0, 100, 0));
+    stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[4]));
+    stream1.onComplete(stream1.getID());
+    //shuffle 1 0 is finalized
+    pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, NO_ATTEMPT_ID, 1, 0));
+    BlockPushNonFatalFailure errorToValidate = null;
+    try {
+      //shufflePush_1_0_0_200 is received by the server after finalization of shuffle 1 0 which
+      //should be rejected
+      StreamCallbackWithID failureCallback = pushResolver.receiveBlockDataAsStream(
+          new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 1, 0, 0, 200, 0));
+      failureCallback.onComplete(failureCallback.getID());
+    } catch (BlockPushNonFatalFailure e) {
+      BlockPushReturnCode errorCode =
+          (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e.getResponse());
+      assertEquals(BlockPushNonFatalFailure.ReturnCode.TOO_LATE_BLOCK_PUSH.id(),
+          errorCode.returnCode);
+      errorToValidate = e;
+      assertEquals(errorCode.failureBlockId, "shufflePush_1_0_0_200");
+    }
+    assertNotNull("shufflePush_1_0_0_200 should be rejected", errorToValidate);
+    try {
+      //shufflePush_1_0_1_100 is received by the server after finalization of shuffle 1 0 which
+      //should also be rejected
+      StreamCallbackWithID failureCallback = pushResolver.receiveBlockDataAsStream(
+          new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 1, 0, 1, 100, 0));
+      failureCallback.onComplete(failureCallback.getID());
+    } catch (BlockPushNonFatalFailure e) {
+      BlockPushReturnCode errorCode =
+          (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e.getResponse());
+      assertEquals(BlockPushNonFatalFailure.ReturnCode.TOO_LATE_BLOCK_PUSH.id(),
+          errorCode.returnCode);
+      errorToValidate = e;
+      assertEquals(errorCode.failureBlockId, "shufflePush_1_0_1_100");
+    }
+    assertNotNull("shufflePush_1_0_1_100 should be rejected", errorToValidate);
+    MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 1, 0, 100);
+    validateChunks(TEST_APP, 1, 0, 100, blockMeta, new int[]{4}, new int[][]{{0}});
+    removeApplication(TEST_APP);
+  }
+
   private void useTestFiles(boolean useTestIndexFile, boolean useTestMetaFile) throws IOException {
     pushResolver = new RemoteBlockPushResolver(conf) {
       @Override

From ed3ea989f97634374789d173f1cc932230bf3aa1 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sat, 29 Jan 2022 12:55:55 -0800
Subject: [PATCH 130/513] [SPARK-37713][K8S][FOLLOWUP] Fix insufficient
 namespace propagation

### What changes were proposed in this pull request?

This is a follow-up of SPARK-37713 (#34983) to fix insufficient namespace propagation at configMap creation.

### Why are the changes needed?

This will recover the behavior on non-`default` namespace Spark jobs like the following.
```
NAMESPACE                          NAME                                   DATA   AGE
10e32ae0f2304236be9c687edf8a06a4   kube-root-ca.crt                       1      8s
10e32ae0f2304236be9c687edf8a06a4   spark-drv-fa3f387ea3871bbd-conf-map    2      5s
10e32ae0f2304236be9c687edf8a06a4   spark-exec-da5ea37ea38727f1-conf-map   2      3s
```

### Does this PR introduce _any_ user-facing change?

No. This is a bug fix of the unreleased patch.

### How was this patch tested?

Pass the CIs and manually run the integration tests which was broken since the original PR.

```
$ mvn package -Pkubernetes -DskipTests
$ resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh --deploy-mode docker-for-desktop --exclude-tags minikube,r
...
KubernetesSuite:
- Run SparkPi with no resources
- Run SparkPi with no resources & statefulset allocation
- Run SparkPi with a very long application name.
- Use SparkLauncher.NO_RESOURCE
- Run SparkPi with a master URL without a scheme.
- Run SparkPi with an argument.
- Run SparkPi with custom labels, annotations, and environment variables.
- All pods have the same service account by default
- Run extraJVMOptions check on driver
- Run SparkRemoteFileTest using a remote data file
- Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j2.properties
- Run SparkPi with env and mount secrets.
- Run PySpark on simple pi.py example
- Run PySpark to test a pyfiles example
- Run PySpark with memory customization
- Run in client mode.
- Start pod creation from template
- Test basic decommissioning
- Test basic decommissioning with shuffle cleanup
- Test decommissioning with dynamic allocation & shuffle cleanups
- Test decommissioning timeouts
- SPARK-37576: Rolling decommissioning
Run completed in 10 minutes, 34 seconds.
Total number of tests run: 22
Suites: completed 2, aborted 0
Tests: succeeded 22, failed 0, canceled 0, ignored 0, pending 0
All tests passed.
```

Closes #35299

Closes #35359 from dongjoon-hyun/SPARK-37713.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/deploy/k8s/submit/KubernetesClientApplication.scala  | 3 ++-
 .../cluster/k8s/KubernetesClusterSchedulerBackend.scala        | 3 ++-
 .../scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
index 96c19bbb3da69..3a3ab081fe843 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
@@ -105,7 +105,8 @@ private[spark] class Client(
     val configMapName = KubernetesClientUtils.configMapNameDriver
     val confFilesMap = KubernetesClientUtils.buildSparkConfDirFilesMap(configMapName,
       conf.sparkConf, resolvedDriverSpec.systemProperties)
-    val configMap = KubernetesClientUtils.buildConfigMap(configMapName, confFilesMap)
+    val configMap = KubernetesClientUtils.buildConfigMap(configMapName, confFilesMap +
+        (KUBERNETES_NAMESPACE.key -> conf.namespace))
 
     // The include of the ENV_VAR for "SPARK_CONF_DIR" is to allow for the
     // Spark command builder to pickup on the Java Options present in the ConfigMap
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
index 110225e17473b..1bda9cc67d247 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -79,7 +79,8 @@ private[spark] class KubernetesClusterSchedulerBackend(
     val resolvedExecutorProperties =
       Map(KUBERNETES_NAMESPACE.key -> conf.get(KUBERNETES_NAMESPACE))
     val confFilesMap = KubernetesClientUtils
-      .buildSparkConfDirFilesMap(configMapName, conf, resolvedExecutorProperties)
+      .buildSparkConfDirFilesMap(configMapName, conf, resolvedExecutorProperties) ++
+      resolvedExecutorProperties
     val labels =
       Map(SPARK_APP_ID_LABEL -> applicationId(), SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE)
     val configMap = KubernetesClientUtils.buildConfigMap(configMapName, confFilesMap, labels)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
index bd4a78b3bdf97..12a5202b9d067 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
@@ -321,7 +321,8 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
     val configMapName = KubernetesClientUtils.configMapNameDriver
     val configMap: ConfigMap = configMaps.head
     assert(configMap.getMetadata.getName == configMapName)
-    val configMapLoadedFiles = configMap.getData.keySet().asScala.toSet
+    val configMapLoadedFiles = configMap.getData.keySet().asScala.toSet -
+        Config.KUBERNETES_NAMESPACE.key
     assert(configMapLoadedFiles === expectedConfFiles.toSet ++ Set(SPARK_CONF_FILE_NAME))
     for (f <- configMapLoadedFiles) {
       assert(configMap.getData.get(f).contains("conf1key=conf1value"))

From 71c34b42f392be78d8e40749e3e23c9ec58e6718 Mon Sep 17 00:00:00 2001
From: William Hyun <william@apache.org>
Date: Sat, 29 Jan 2022 20:26:54 -0800
Subject: [PATCH 131/513] [SPARK-38071][K8S][TESTS] Support K8s namespace
 parameter in SBT K8s IT

### What changes were proposed in this pull request?
This PR aims to support K8s namespace parameter in SBT K8s integration test.

### Why are the changes needed?
- This allows the users to set the test namespace name
- When there is no given namespace, it will generate a random namespace and use it like `Maven` test

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manually using the following command
```
build/sbt -Psparkr -Pkubernetes -Pkubernetes-integration-tests -Dtest.exclude.tags=minikube -Dspark.kubernetes.test.deployMode=docker-for-desktop -Dspark.kubernetes.test.namespace=spark-it-test "kubernetes-integration-tests/test"
```

Closes #35364 from williamhyun/sbtnamespace.

Authored-by: William Hyun <william@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 project/SparkBuild.scala | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 8a56bef3e351b..53ea7ecf9e151 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -624,7 +624,7 @@ object KubernetesIntegrationTests {
   val dockerBuild = TaskKey[Unit]("docker-imgs", "Build the docker images for ITs.")
   val runITs = TaskKey[Unit]("run-its", "Only run ITs, skip image build.")
   val imageTag = settingKey[String]("Tag to use for images built during the test.")
-  val namespace = settingKey[String]("Namespace where to run pods.")
+  val namespace = sys.props.get("spark.kubernetes.test.namespace")
   val deployMode = sys.props.get("spark.kubernetes.test.deployMode")
 
   // Hack: this variable is used to control whether to build docker images. It's updated by
@@ -634,7 +634,6 @@ object KubernetesIntegrationTests {
 
   lazy val settings = Seq(
     imageTag := "dev",
-    namespace := "default",
     dockerBuild := {
       if (shouldBuildImage) {
         val dockerTool = s"$sparkHome/bin/docker-image-tool.sh"
@@ -671,9 +670,9 @@ object KubernetesIntegrationTests {
     (Test / javaOptions) ++= Seq(
       s"-Dspark.kubernetes.test.deployMode=${deployMode.getOrElse("minikube")}",
       s"-Dspark.kubernetes.test.imageTag=${imageTag.value}",
-      s"-Dspark.kubernetes.test.namespace=${namespace.value}",
       s"-Dspark.kubernetes.test.unpackSparkDir=$sparkHome"
     ),
+    (Test / javaOptions) ++= namespace.map("-Dspark.kubernetes.test.namespace=" + _),
     // Force packaging before building images, so that the latest code is tested.
     dockerBuild := dockerBuild
       .dependsOn(assembly / Compile / packageBin)

From 419d17378c7fe4d9715eca3f84bcc86354c71941 Mon Sep 17 00:00:00 2001
From: William Hyun <william@apache.org>
Date: Sun, 30 Jan 2022 10:11:03 -0800
Subject: [PATCH 132/513] [SPARK-38072][K8S][TESTS] Support K8s imageTag
 parameter in SBT K8s IT

### What changes were proposed in this pull request?
This PR aims to support K8s `imageTag` parameter in SBT K8s integration test.

### Why are the changes needed?
To make maven and SBT consistent.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manually.

Closes #35365 from williamhyun/imagetag.

Authored-by: William Hyun <william@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 project/SparkBuild.scala | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 53ea7ecf9e151..9d4034d403924 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -623,7 +623,7 @@ object KubernetesIntegrationTests {
 
   val dockerBuild = TaskKey[Unit]("docker-imgs", "Build the docker images for ITs.")
   val runITs = TaskKey[Unit]("run-its", "Only run ITs, skip image build.")
-  val imageTag = settingKey[String]("Tag to use for images built during the test.")
+  val imageTag = sys.props.get("spark.kubernetes.test.imageTag")
   val namespace = sys.props.get("spark.kubernetes.test.namespace")
   val deployMode = sys.props.get("spark.kubernetes.test.deployMode")
 
@@ -633,7 +633,6 @@ object KubernetesIntegrationTests {
   private var shouldBuildImage = true
 
   lazy val settings = Seq(
-    imageTag := "dev",
     dockerBuild := {
       if (shouldBuildImage) {
         val dockerTool = s"$sparkHome/bin/docker-image-tool.sh"
@@ -647,7 +646,7 @@ object KubernetesIntegrationTests {
           Seq("-f", s"$dockerFile")
         }
         val cmd = Seq(dockerTool,
-          "-t", imageTag.value,
+          "-t", imageTag.getOrElse("dev"),
           "-p", s"$bindingsDir/python/Dockerfile",
           "-R", s"$bindingsDir/R/Dockerfile") ++
           (if (deployMode == Some("docker-for-desktop")) Seq.empty else Seq("-m")) ++
@@ -669,7 +668,7 @@ object KubernetesIntegrationTests {
     (Test / test) := (Test / test).dependsOn(dockerBuild).value,
     (Test / javaOptions) ++= Seq(
       s"-Dspark.kubernetes.test.deployMode=${deployMode.getOrElse("minikube")}",
-      s"-Dspark.kubernetes.test.imageTag=${imageTag.value}",
+      s"-Dspark.kubernetes.test.imageTag=${imageTag.getOrElse("dev")}",
       s"-Dspark.kubernetes.test.unpackSparkDir=$sparkHome"
     ),
     (Test / javaOptions) ++= namespace.map("-Dspark.kubernetes.test.namespace=" + _),

From 46885bef1a1254853ce9165862e3bd8f3a15071f Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Mon, 31 Jan 2022 10:44:53 -0800
Subject: [PATCH 133/513] [SPARK-38075][SQL] Fix `hasNext` in
 `HiveScriptTransformationExec`'s process output iterator

### What changes were proposed in this pull request?

Fix hasNext in HiveScriptTransformationExec's process output iterator to always return false if it had previously returned false.

### Why are the changes needed?

When hasNext on the process output iterator returns false, it leaves the iterator in a state (i.e., scriptOutputWritable is not null) such that the next call returns true.

The Guava Ordering used in TakeOrderedAndProjectExec will call hasNext on the process output iterator even after an earlier call had returned false. This results in fake rows when script transform is used with `order by` and `limit`. For example:

```
create or replace temp view t as
select * from values
(1),
(2),
(3)
as t(a);

select transform(a)
USING 'cat' AS (a int)
FROM t order by a limit 10;
```
This returns:
```
NULL
NULL
NULL
1
2
3
```

### Does this PR introduce _any_ user-facing change?

No, other than removing the correctness issue.

### How was this patch tested?

New unit test.

Closes #35368 from bersprockets/script_transformation_issue.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../HiveScriptTransformationExec.scala          |  7 ++++++-
 .../HiveScriptTransformationSuite.scala         | 17 +++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala
index 219b1a27f70a2..beb5583d81a60 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala
@@ -64,7 +64,7 @@ private[hive] case class HiveScriptTransformationExec(
       outputSoi: StructObjectInspector,
       hadoopConf: Configuration): Iterator[InternalRow] = {
     new Iterator[InternalRow] with HiveInspectors {
-      var curLine: String = null
+      private var completed = false
       val scriptOutputStream = new DataInputStream(inputStream)
 
       val scriptOutputReader =
@@ -78,6 +78,9 @@ private[hive] case class HiveScriptTransformationExec(
       lazy val unwrappers = outputSoi.getAllStructFieldRefs.asScala.map(unwrapperFor)
 
       override def hasNext: Boolean = {
+        if (completed) {
+          return false
+        }
         try {
           if (scriptOutputWritable == null) {
             scriptOutputWritable = reusedWritableObject
@@ -85,6 +88,7 @@ private[hive] case class HiveScriptTransformationExec(
             if (scriptOutputReader != null) {
               if (scriptOutputReader.next(scriptOutputWritable) <= 0) {
                 checkFailureAndPropagate(writerThread, null, proc, stderrBuffer)
+                completed = true
                 return false
               }
             } else {
@@ -97,6 +101,7 @@ private[hive] case class HiveScriptTransformationExec(
                   // there can be a lag between EOF being written out and the process
                   // being terminated. So explicitly waiting for the process to be done.
                   checkFailureAndPropagate(writerThread, null, proc, stderrBuffer)
+                  completed = true
                   return false
               }
             }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
index d6185ac487d65..d54265e53c126 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
@@ -621,4 +621,21 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
       assert(e.contains("java.lang.ArithmeticException: long overflow"))
     }
   }
+
+  test("SPARK-38075: ORDER BY with LIMIT should not add fake rows") {
+    withTempView("v") {
+      val df = Seq((1), (2), (3)).toDF("a")
+      df.createTempView("v")
+      checkAnswer(sql(
+        """
+          |SELECT TRANSFORM(a)
+          |  USING 'cat' AS (a)
+          |FROM v
+          |ORDER BY a
+          |LIMIT 10
+          |""".stripMargin),
+        identity,
+        Row("1") :: Row("2") :: Row("3") :: Nil)
+    }
+  }
 }

From 66b9087233bc0cb90cf3af07ec34ba74b9c32d5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20J=C3=B8rgensen?=
 <47577197+bjornjorgensen@users.noreply.github.com>
Date: Tue, 1 Feb 2022 12:57:07 +0100
Subject: [PATCH 134/513] [SPARK-38067][PYTHON] Preserve `None` values when
 saved to JSON
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
This PR preserves columns with all values with `NaN`, `Null` or `None` to omit when saved to JSON files.
Changes default behavior for `pyspark.pandas` JSON writer, from missing values are omitted, to missing values are preserved. This impacts output to file if there is any missing value in any field in any row. It can have significant (proportional to N * M, for N rows and M columns) impact in case of datasets (performance, storage cost).
Add an option to delete columns with all values with `NaN`, `Null` or `None` to omit when saved to JSON file.

### Why are the changes needed?
Pandas on spark deletes columns with all `None` values as default.
Pandas writes all columns to JSON even if the values are all `None`.
This is the same behavior as pandas users are used to.

### Does this PR introduce _any_ user-facing change?
The document for the `to_json` function is changed.
The `ignoreNullFields` option has been set to `False` as default to prevent columns with only `Null` to omit during saving to JSON files.

### How was this patch tested?
Tested manually.

```bash
data = {'col_1': [3, 2, 1, 0], 'col_2': [None, None, None, None]}
test = ps.DataFrame.from_dict(data)

test.to_json("test.json")

test2 = ps.read_json("test.json/*")
test2

col_1	col_2
0 3 None
1 2 None
2 1 None
3 0 None

test2.to_json("test2.json", ignoreNullFields=True)

test3 = ps.read_json("test2.json/*")
test3

col_1
0 3
1 2
2 1
3 0
```

Closes #35296 from bjornjorgensen/SPARK-37981-Add-note-for-deleting-Null-and-NaN.

Lead-authored-by: Bjørn Jørgensen <47577197+bjornjorgensen@users.noreply.github.com>
Co-authored-by: bjornjorgensen <bjornjorgensen@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/pandas/generic.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py
index 2dac5b056aba0..63ce25ec5f2b2 100644
--- a/python/pyspark/pandas/generic.py
+++ b/python/pyspark/pandas/generic.py
@@ -24,6 +24,7 @@
 from typing import (
     Any,
     Callable,
+    Dict,
     Iterable,
     IO,
     List,
@@ -905,6 +906,9 @@ def to_json(
         .. note:: output JSON format is different from pandas'. It always use `orient='records'`
             for its output. This behaviour might have to change in the near future.
 
+        .. note:: Set `ignoreNullFields` keyword argument to `True` to omit `None` or `NaN` values
+            when writing JSON objects. It works only when `path` is provided.
+
         Note NaN's and None will be converted to null and datetime objects
         will be converted to UNIX timestamps.
 
@@ -981,6 +985,9 @@ def to_json(
         if "options" in options and isinstance(options.get("options"), dict) and len(options) == 1:
             options = options.get("options")
 
+        default_options: Dict[str, Any] = {"ignoreNullFields": False}
+        options = {**default_options, **options}
+
         if not lines:
             raise NotImplementedError("lines=False is not implemented yet.")
 

From c37c6c393dc0302abd41b0c4c41002710ba52270 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 1 Feb 2022 08:30:39 -0800
Subject: [PATCH 135/513] [SPARK-38081][K8S][TESTS] Support `cloud`-backend in
 K8s IT with SBT

### What changes were proposed in this pull request?

This PR aims to
- Support `cloud` backend in K8s IT with SBT (Image building/pushing/testing)
- Add a new K8s test tag, `local`, and apply it to a test case using local HTTP server.

### Why are the changes needed?

To run K8s IT in the cloud environment more easily.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manually test like the following.
```
$ build/sbt -Psparkr -Pkubernetes -Pkubernetes-integration-tests -Dtest.exclude.tags=minikube,local -Dspark.kubernetes.test.deployMode=cloud -Dspark.kubernetes.test.master=k8s://....eks.amazonaws.com -Dspark.kubernetes.test.namespace=spark-cloud-test -Dspark.kubernetes.test.imageRepo=... -Dspark.kubernetes.test.imageTag=2022-02-01 "kubernetes-integration-tests/test"

...
[info] KubernetesSuite:
[info] - Run SparkPi with no resources (26 seconds, 678 milliseconds)
[info] - Run SparkPi with no resources & statefulset allocation (18 seconds, 617 milliseconds)
[info] - Run SparkPi with a very long application name. (17 seconds, 205 milliseconds)
[info] - Use SparkLauncher.NO_RESOURCE (17 seconds, 555 milliseconds)
[info] - Run SparkPi with a master URL without a scheme. (17 seconds, 478 milliseconds)
[info] - Run SparkPi with an argument. (17 seconds, 518 milliseconds)
[info] - Run SparkPi with custom labels, annotations, and environment variables. (17 seconds, 648 milliseconds)
[info] - All pods have the same service account by default (17 seconds, 800 milliseconds)
[info] - Run extraJVMOptions check on driver (11 seconds, 141 milliseconds)
[info] - Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j2.properties (25 seconds, 608 milliseconds)
[info] - Run SparkPi with env and mount secrets. (27 seconds, 114 milliseconds)
[info] - Run PySpark on simple pi.py example (42 seconds, 929 milliseconds)
[info] - Run PySpark to test a pyfiles example (19 seconds, 914 milliseconds)
[info] - Run PySpark with memory customization (16 seconds, 985 milliseconds)
[info] - Run in client mode. (10 seconds, 42 milliseconds)
[info] - Start pod creation from template (16 seconds, 207 milliseconds)
[info] - Test basic decommissioning (49 seconds, 519 milliseconds)
[info] - Test basic decommissioning with shuffle cleanup (49 seconds, 472 milliseconds)
[info] - Test decommissioning with dynamic allocation & shuffle cleanups (2 minutes, 49 seconds)
[info] - Test decommissioning timeouts (50 seconds, 423 milliseconds)
[info] - SPARK-37576: Rolling decommissioning (1 minute, 13 seconds)
[info] - Run SparkR on simple dataframe.R example (51 seconds, 712 milliseconds)
[info] Run completed in 15 minutes, 50 seconds.
[info] Total number of tests run: 22
[info] Suites: completed 1, aborted 0
[info] Tests: succeeded 22, failed 0, canceled 0, ignored 0, pending 0
[info] All tests passed.
[success] Total time: 1920 s (32:00), completed Jan 31, 2022 11:56:38 PM
```

Closes #35376 from dongjoon-hyun/SPARK-38081.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 project/SparkBuild.scala                             | 12 +++++++++++-
 .../kubernetes/integration-tests/README.md           |  2 +-
 .../deploy/k8s/integrationtest/BasicTestsSuite.scala |  4 ++--
 .../deploy/k8s/integrationtest/KubernetesSuite.scala |  1 +
 4 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 9d4034d403924..3d3a65f3d2333 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -623,6 +623,7 @@ object KubernetesIntegrationTests {
 
   val dockerBuild = TaskKey[Unit]("docker-imgs", "Build the docker images for ITs.")
   val runITs = TaskKey[Unit]("run-its", "Only run ITs, skip image build.")
+  val imageRepo = sys.props.getOrElse("spark.kubernetes.test.imageRepo", "docker.io/kubespark")
   val imageTag = sys.props.get("spark.kubernetes.test.imageTag")
   val namespace = sys.props.get("spark.kubernetes.test.namespace")
   val deployMode = sys.props.get("spark.kubernetes.test.deployMode")
@@ -646,16 +647,24 @@ object KubernetesIntegrationTests {
           Seq("-f", s"$dockerFile")
         }
         val cmd = Seq(dockerTool,
+          "-r", imageRepo,
           "-t", imageTag.getOrElse("dev"),
           "-p", s"$bindingsDir/python/Dockerfile",
           "-R", s"$bindingsDir/R/Dockerfile") ++
-          (if (deployMode == Some("docker-for-desktop")) Seq.empty else Seq("-m")) ++
+          (if (deployMode != Some("minikube")) Seq.empty else Seq("-m")) ++
           extraOptions :+
           "build"
         val ec = Process(cmd).!
         if (ec != 0) {
           throw new IllegalStateException(s"Process '${cmd.mkString(" ")}' exited with $ec.")
         }
+        if (deployMode == Some("cloud")) {
+          val cmd = Seq(dockerTool, "-r", imageRepo, "-t", imageTag.getOrElse("dev"), "push")
+          val ret = Process(cmd).!
+          if (ret != 0) {
+            throw new IllegalStateException(s"Process '${cmd.mkString(" ")}' exited with $ret.")
+          }
+        }
       }
       shouldBuildImage = true
     },
@@ -668,6 +677,7 @@ object KubernetesIntegrationTests {
     (Test / test) := (Test / test).dependsOn(dockerBuild).value,
     (Test / javaOptions) ++= Seq(
       s"-Dspark.kubernetes.test.deployMode=${deployMode.getOrElse("minikube")}",
+      s"-Dspark.kubernetes.test.imageRepo=${imageRepo}",
       s"-Dspark.kubernetes.test.imageTag=${imageTag.getOrElse("dev")}",
       s"-Dspark.kubernetes.test.unpackSparkDir=$sparkHome"
     ),
diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md
index 2c759d9095ce4..eb32e81d8f75a 100644
--- a/resource-managers/kubernetes/integration-tests/README.md
+++ b/resource-managers/kubernetes/integration-tests/README.md
@@ -187,7 +187,7 @@ to the wrapper scripts and using the wrapper scripts will simply set these appro
   <tr>
     <td><code>spark.kubernetes.test.master</code></td>
     <td>
-      When using the <code>cloud-url</code> backend must be specified to indicate the K8S master URL to communicate 
+      When using the <code>cloud</code> backend must be specified to indicate the K8S master URL to communicate
       with.
     </td>
     <td></td>
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
index 217359b3da1bf..0e79f6c554403 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.launcher.SparkLauncher
 private[spark] trait BasicTestsSuite { k8sSuite: KubernetesSuite =>
 
   import BasicTestsSuite._
-  import KubernetesSuite.k8sTestTag
+  import KubernetesSuite.{k8sTestTag, localTestTag}
   import KubernetesSuite.{TIMEOUT, INTERVAL}
 
   test("Run SparkPi with no resources", k8sTestTag) {
@@ -116,7 +116,7 @@ private[spark] trait BasicTestsSuite { k8sSuite: KubernetesSuite =>
       expectedJVMValue = Seq("(spark.test.foo,spark.test.bar)"))
   }
 
-  test("Run SparkRemoteFileTest using a remote data file", k8sTestTag) {
+  test("Run SparkRemoteFileTest using a remote data file", k8sTestTag, localTestTag) {
     assert(sys.props.contains("spark.test.home"), "spark.test.home is not set!")
     TestUtils.withHttpServer(sys.props("spark.test.home")) { baseURL =>
       sparkAppConf.set("spark.files", baseURL.toString +
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index 90f666ae54e38..c1237e3eb9df4 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -595,6 +595,7 @@ class KubernetesSuite extends SparkFunSuite
 
 private[spark] object KubernetesSuite {
   val k8sTestTag = Tag("k8s")
+  val localTestTag = Tag("local")
   val rTestTag = Tag("r")
   val MinikubeTag = Tag("minikube")
   val SPARK_PI_MAIN_CLASS: String = "org.apache.spark.examples.SparkPi"

From 6e7000af0012b80a300acc4ed8dee15c47904504 Mon Sep 17 00:00:00 2001
From: alexander_holmes <alexander_holmes@apple.com>
Date: Tue, 1 Feb 2022 11:10:25 -0800
Subject: [PATCH 136/513] [SPARK-38047][K8S] Add `OUTLIER_NO_FALLBACK` executor
 roll policy

### What changes were proposed in this pull request?

This PR aims to add a new executor roll policy which allows users to skip rolling in cases where there are no outlier executors.

### Why are the changes needed?

As currently implemented an executor is always rolled every `spark.kubernetes.executor.rollInterval` interval. In environments where starting of executors can introduce latencies it may be desirable for users to have the option to determine if rolling should only happen when outliers are found.

### Does this PR introduce _any_ user-facing change?

No, this is an additional option being added to a new feature in Apache Spark 3.3.

### How was this patch tested?

Pass the CIs with the newly added test cases.

Closes #35373 from alexholmes/SPARK-38047.

Authored-by: alexander_holmes <alexander_holmes@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/deploy/k8s/Config.scala  |  7 ++-
 .../cluster/k8s/ExecutorRollPlugin.scala      | 25 +++++++----
 .../cluster/k8s/ExecutorRollPluginSuite.scala | 45 ++++++++++++++++++-
 3 files changed, 65 insertions(+), 12 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index a2ad0d0a52a7f..385463c443275 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -147,7 +147,8 @@ private[spark] object Config extends Logging {
       .createWithDefault(0)
 
   object ExecutorRollPolicy extends Enumeration {
-    val ID, ADD_TIME, TOTAL_GC_TIME, TOTAL_DURATION, AVERAGE_DURATION, FAILED_TASKS, OUTLIER = Value
+    val ID, ADD_TIME, TOTAL_GC_TIME, TOTAL_DURATION, AVERAGE_DURATION, FAILED_TASKS,
+      OUTLIER, OUTLIER_NO_FALLBACK = Value
   }
 
   val EXECUTOR_ROLL_POLICY =
@@ -165,7 +166,9 @@ private[spark] object Config extends Logging {
         "OUTLIER policy chooses an executor with outstanding statistics which is bigger than" +
         "at least two standard deviation from the mean in average task time, " +
         "total task time, total task GC time, and the number of failed tasks if exists. " +
-        "If there is no outlier, it works like TOTAL_DURATION policy.")
+        "If there is no outlier it works like TOTAL_DURATION policy. " +
+        "OUTLIER_NO_FALLBACK policy picks an outlier using the OUTLIER policy above. " +
+        "If there is no outlier then no executor will be rolled.")
       .version("3.3.0")
       .stringConf
       .transform(_.toUpperCase(Locale.ROOT))
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala
index 2a4d96596f0c2..5da4510d2cc86 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala
@@ -118,20 +118,27 @@ class ExecutorRollDriverPlugin extends DriverPlugin with Logging {
       case ExecutorRollPolicy.FAILED_TASKS =>
         listWithoutDriver.sortBy(_.failedTasks).reverse
       case ExecutorRollPolicy.OUTLIER =>
-        // We build multiple outlier lists and concat in the following importance order to find
-        // outliers in various perspective:
-        //   AVERAGE_DURATION > TOTAL_DURATION > TOTAL_GC_TIME > FAILED_TASKS
-        // Since we will choose only first item, the duplication is okay. If there is no outlier,
-        // We fallback to TOTAL_DURATION policy.
-        outliers(listWithoutDriver.filter(_.totalTasks > 0), e => e.totalDuration / e.totalTasks) ++
-          outliers(listWithoutDriver, e => e.totalDuration) ++
-          outliers(listWithoutDriver, e => e.totalGCTime) ++
-          outliers(listWithoutDriver, e => e.failedTasks) ++
+        // If there is no outlier we fallback to TOTAL_DURATION policy.
+        outliersFromMultipleDimensions(listWithoutDriver) ++
           listWithoutDriver.sortBy(_.totalDuration).reverse
+      case ExecutorRollPolicy.OUTLIER_NO_FALLBACK =>
+        outliersFromMultipleDimensions(listWithoutDriver)
     }
     sortedList.headOption.map(_.id)
   }
 
+  /**
+   * We build multiple outlier lists and concat in the following importance order to find
+   * outliers in various perspective:
+   *   AVERAGE_DURATION > TOTAL_DURATION > TOTAL_GC_TIME > FAILED_TASKS
+   * Since we will choose only first item, the duplication is okay.
+   */
+  private def outliersFromMultipleDimensions(listWithoutDriver: Seq[v1.ExecutorSummary]) =
+    outliers(listWithoutDriver.filter(_.totalTasks > 0), e => e.totalDuration / e.totalTasks) ++
+      outliers(listWithoutDriver, e => e.totalDuration) ++
+      outliers(listWithoutDriver, e => e.totalGCTime) ++
+      outliers(listWithoutDriver, e => e.failedTasks)
+
   /**
    * Return executors whose metrics is outstanding, '(value - mean) > 2-sigma'. This is
    * a best-effort approach because the snapshot of ExecutorSummary is not a normal distribution.
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPluginSuite.scala
index 9a6836bee93f7..886abc033893d 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPluginSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPluginSuite.scala
@@ -132,7 +132,7 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
   }
 
   test("A one-item executor list") {
-    ExecutorRollPolicy.values.foreach { value =>
+    ExecutorRollPolicy.values.filter(_ != ExecutorRollPolicy.OUTLIER_NO_FALLBACK).foreach { value =>
       assertEquals(
         Some(execWithSmallestID.id),
         plugin.invokePrivate(_choose(Seq(execWithSmallestID), value)))
@@ -216,4 +216,47 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
       plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.TOTAL_GC_TIME)),
       plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER)))
   }
+
+  test("Policy: OUTLIER_NO_FALLBACK - Return None if there are no outliers") {
+    assertEquals(None, plugin.invokePrivate(_choose(list, ExecutorRollPolicy.OUTLIER_NO_FALLBACK)))
+  }
+
+  test("Policy: OUTLIER_NO_FALLBACK - Detect an average task duration outlier") {
+    val outlier = new ExecutorSummary("9999", "host:port", true, 1,
+      0, 0, 1, 0, 0,
+      3, 0, 1, 300,
+      20, 0, 0,
+      0, false, 0, new Date(1639300001000L),
+      Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
+      false, Set())
+    assertEquals(
+      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.AVERAGE_DURATION)),
+      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER_NO_FALLBACK)))
+  }
+
+  test("Policy: OUTLIER_NO_FALLBACK - Detect a total task duration outlier") {
+    val outlier = new ExecutorSummary("9999", "host:port", true, 1,
+      0, 0, 1, 0, 0,
+      3, 0, 1000, 1000,
+      0, 0, 0,
+      0, false, 0, new Date(1639300001000L),
+      Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
+      false, Set())
+    assertEquals(
+      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.TOTAL_DURATION)),
+      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER_NO_FALLBACK)))
+  }
+
+  test("Policy: OUTLIER_NO_FALLBACK - Detect a total GC time outlier") {
+    val outlier = new ExecutorSummary("9999", "host:port", true, 1,
+      0, 0, 1, 0, 0,
+      3, 0, 1, 100,
+      1000, 0, 0,
+      0, false, 0, new Date(1639300001000L),
+      Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
+      false, Set())
+    assertEquals(
+      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.TOTAL_GC_TIME)),
+      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER_NO_FALLBACK)))
+  }
 }

From 6347b9dc04576ef844a941460bb4a1814dc40dbb Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 1 Feb 2022 13:10:07 -0800
Subject: [PATCH 137/513] [SPARK-38084][TESTS] Support `SKIP_PYTHON` and
 `SKIP_R` in `run-tests.py`

### What changes were proposed in this pull request?

This PR aims to support `SKIP_PYTHON` and `SKIP_R` in `run-tests.py` like `SKIP_MIMA` and `SKIP_UNIDOC`.

### Why are the changes needed?

This is helpful to setup CIs.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manually.
```
SKIP_PYTHON=1 SKIP_R=1 dev/run-tests.py
```

Closes #35381 from dongjoon-hyun/SPARK-38084.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/run-tests.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/run-tests.py b/dev/run-tests.py
index d943277e1d516..570ee4c8169cf 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -653,14 +653,14 @@ def main():
     run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags, included_tags)
 
     modules_with_python_tests = [m for m in test_modules if m.python_test_goals]
-    if modules_with_python_tests:
+    if modules_with_python_tests and not os.environ.get("SKIP_PYTHON"):
         run_python_tests(
             modules_with_python_tests,
             opts.parallelism,
             with_coverage=os.environ.get("PYSPARK_CODECOV", "false") == "true",
         )
         run_python_packaging_tests()
-    if any(m.should_run_r_tests for m in test_modules):
+    if any(m.should_run_r_tests for m in test_modules) and not os.environ.get("SKIP_R"):
         run_sparkr_tests()
 
 

From dc2fd57352a18ebf55c1ffe33898d51f8f408597 Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Tue, 1 Feb 2022 23:12:27 -0800
Subject: [PATCH 138/513] [SPARK-38013][SQL][TEST] AQE can change bhj to smj if
 no extra shuffle introduce

### What changes were proposed in this pull request?

Add a test case in `AdaptiveQueryExecSuite`.

### Why are the changes needed?

AQE can change bhj to smj, and it requires two conditions:
- no extra shuffle introduce, otherwise the built-in cost evaluator will ban it
- AQE does not think the join can be planned as broadcast join. That says the cost statistics in normal planner is not accurate.

It's counterintuitive, but it's an expected behavior as AQE designed.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

Pass CI

Closes #35353 from ulysses-you/bhj-smj.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../adaptive/AdaptiveQueryExecSuite.scala     | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index de41b88ebde9c..1bd8ad90f83da 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -187,6 +187,29 @@ class AdaptiveQueryExecSuite
     }
   }
 
+  test("Change broadcast join to merge join") {
+    withTable("t1", "t2") {
+      withSQLConf(
+          SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10000",
+          SQLConf.ADAPTIVE_AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+          SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
+        sql("CREATE TABLE t1 USING PARQUET AS SELECT 1 c1")
+        sql("CREATE TABLE t2 USING PARQUET AS SELECT 1 c1")
+        val (plan, adaptivePlan) = runAdaptiveAndVerifyResult(
+          """
+            |SELECT * FROM (
+            | SELECT distinct c1 from t1
+            | ) tmp1 JOIN (
+            |  SELECT distinct c1 from t2
+            | ) tmp2 ON tmp1.c1 = tmp2.c1
+            |""".stripMargin)
+        assert(findTopLevelBroadcastHashJoin(plan).size == 1)
+        assert(findTopLevelBroadcastHashJoin(adaptivePlan).isEmpty)
+        assert(findTopLevelSortMergeJoin(adaptivePlan).size == 1)
+      }
+    }
+  }
+
   test("Reuse the parallelism of coalesced shuffle in local shuffle read") {
     withSQLConf(
       SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",

From 8559e94389fb9b2a3f453f568b3e11c79d2b4e2c Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Tue, 1 Feb 2022 23:22:36 -0800
Subject: [PATCH 139/513] [SPARK-37397][PYTHON] Inline annotations for
 pyspark.ml.base

### What changes were proposed in this pull request?

Migration of type annotation for `pyspark.ml.base` from stub file to inline hints.

### Why are the changes needed?

As a part of ongoing type hints migration.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests + new data tests.

Closes #35289 from zero323/SPARK-37397.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 python/pyspark/ml/base.py                     | 105 ++++++++++++------
 python/pyspark/ml/base.pyi                    | 103 -----------------
 python/pyspark/ml/classification.pyi          |   2 +
 python/pyspark/ml/param/__init__.py           |   9 +-
 python/pyspark/ml/pipeline.pyi                |   3 +
 .../pyspark/ml/tests/typing/test_feature.yml  |  13 ++-
 .../ml/tests/typing/test_regression.yml       |  15 +++
 python/pyspark/ml/tuning.pyi                  |   5 +
 python/pyspark/ml/wrapper.pyi                 |  10 +-
 9 files changed, 122 insertions(+), 143 deletions(-)
 delete mode 100644 python/pyspark/ml/base.pyi

diff --git a/python/pyspark/ml/base.py b/python/pyspark/ml/base.py
index d984209685167..4f4ddef2468f8 100644
--- a/python/pyspark/ml/base.py
+++ b/python/pyspark/ml/base.py
@@ -20,7 +20,25 @@
 import copy
 import threading
 
+from typing import (
+    Any,
+    Callable,
+    Generic,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    TypeVar,
+    Union,
+    cast,
+    overload,
+    TYPE_CHECKING,
+)
+
 from pyspark import since
+from pyspark.ml.param import P
 from pyspark.ml.common import inherit_doc
 from pyspark.ml.param.shared import (
     HasInputCol,
@@ -30,11 +48,18 @@
     HasPredictionCol,
     Params,
 )
+from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.functions import udf
-from pyspark.sql.types import StructField, StructType
+from pyspark.sql.types import DataType, StructField, StructType
+
+if TYPE_CHECKING:
+    from pyspark.ml._typing import ParamMap
 
+T = TypeVar("T")
+M = TypeVar("M", bound="Transformer")
 
-class _FitMultipleIterator:
+
+class _FitMultipleIterator(Generic[M]):
     """
     Used by default implementation of Estimator.fitMultiple to produce models in a thread safe
     iterator. This class handles the simple case of fitMultiple where each param map should be
@@ -55,17 +80,17 @@ class _FitMultipleIterator:
     See :py:meth:`Estimator.fitMultiple` for more info.
     """
 
-    def __init__(self, fitSingleModel, numModels):
+    def __init__(self, fitSingleModel: Callable[[int], M], numModels: int):
         """ """
         self.fitSingleModel = fitSingleModel
         self.numModel = numModels
         self.counter = 0
         self.lock = threading.Lock()
 
-    def __iter__(self):
+    def __iter__(self) -> Iterator[Tuple[int, M]]:
         return self
 
-    def __next__(self):
+    def __next__(self) -> Tuple[int, M]:
         with self.lock:
             index = self.counter
             if index >= self.numModel:
@@ -73,13 +98,13 @@ def __next__(self):
             self.counter += 1
         return index, self.fitSingleModel(index)
 
-    def next(self):
+    def next(self) -> Tuple[int, M]:
         """For python2 compatibility."""
         return self.__next__()
 
 
 @inherit_doc
-class Estimator(Params, metaclass=ABCMeta):
+class Estimator(Generic[M], Params, metaclass=ABCMeta):
     """
     Abstract class for estimators that fit models to data.
 
@@ -89,7 +114,7 @@ class Estimator(Params, metaclass=ABCMeta):
     pass
 
     @abstractmethod
-    def _fit(self, dataset):
+    def _fit(self, dataset: DataFrame) -> M:
         """
         Fits a model to the input dataset. This is called by the default implementation of fit.
 
@@ -106,7 +131,9 @@ def _fit(self, dataset):
         """
         raise NotImplementedError()
 
-    def fitMultiple(self, dataset, paramMaps):
+    def fitMultiple(
+        self, dataset: DataFrame, paramMaps: Sequence["ParamMap"]
+    ) -> Iterable[Tuple[int, M]]:
         """
         Fits a model to the input dataset for each param map in `paramMaps`.
 
@@ -128,12 +155,26 @@ def fitMultiple(self, dataset, paramMaps):
         """
         estimator = self.copy()
 
-        def fitSingleModel(index):
+        def fitSingleModel(index: int) -> M:
             return estimator.fit(dataset, paramMaps[index])
 
         return _FitMultipleIterator(fitSingleModel, len(paramMaps))
 
-    def fit(self, dataset, params=None):
+    @overload
+    def fit(self, dataset: DataFrame, params: Optional["ParamMap"] = ...) -> M:
+        ...
+
+    @overload
+    def fit(
+        self, dataset: DataFrame, params: Union[List["ParamMap"], Tuple["ParamMap"]]
+    ) -> List[M]:
+        ...
+
+    def fit(
+        self,
+        dataset: DataFrame,
+        params: Optional[Union["ParamMap", List["ParamMap"], Tuple["ParamMap"]]] = None,
+    ) -> Union[M, List[M]]:
         """
         Fits a model to the input dataset with optional parameters.
 
@@ -156,10 +197,10 @@ def fit(self, dataset, params=None):
         if params is None:
             params = dict()
         if isinstance(params, (list, tuple)):
-            models = [None] * len(params)
+            models: List[Optional[M]] = [None] * len(params)
             for index, model in self.fitMultiple(dataset, params):
                 models[index] = model
-            return models
+            return cast(List[M], models)
         elif isinstance(params, dict):
             if params:
                 return self.copy(params)._fit(dataset)
@@ -183,7 +224,7 @@ class Transformer(Params, metaclass=ABCMeta):
     pass
 
     @abstractmethod
-    def _transform(self, dataset):
+    def _transform(self, dataset: DataFrame) -> DataFrame:
         """
         Transforms the input dataset.
 
@@ -199,7 +240,7 @@ def _transform(self, dataset):
         """
         raise NotImplementedError()
 
-    def transform(self, dataset, params=None):
+    def transform(self, dataset: DataFrame, params: Optional["ParamMap"] = None) -> DataFrame:
         """
         Transforms the input dataset with optional parameters.
 
@@ -248,20 +289,20 @@ class UnaryTransformer(HasInputCol, HasOutputCol, Transformer):
     .. versionadded:: 2.3.0
     """
 
-    def setInputCol(self, value):
+    def setInputCol(self: P, value: str) -> P:
         """
         Sets the value of :py:attr:`inputCol`.
         """
         return self._set(inputCol=value)
 
-    def setOutputCol(self, value):
+    def setOutputCol(self: P, value: str) -> P:
         """
         Sets the value of :py:attr:`outputCol`.
         """
         return self._set(outputCol=value)
 
     @abstractmethod
-    def createTransformFunc(self):
+    def createTransformFunc(self) -> Callable[..., Any]:
         """
         Creates the transform function using the given param map. The input param map already takes
         account of the embedded param map. So the param values should be determined
@@ -270,20 +311,20 @@ def createTransformFunc(self):
         raise NotImplementedError()
 
     @abstractmethod
-    def outputDataType(self):
+    def outputDataType(self) -> DataType:
         """
         Returns the data type of the output column.
         """
         raise NotImplementedError()
 
     @abstractmethod
-    def validateInputType(self, inputType):
+    def validateInputType(self, inputType: DataType) -> None:
         """
         Validates the input type. Throw an exception if it is invalid.
         """
         raise NotImplementedError()
 
-    def transformSchema(self, schema):
+    def transformSchema(self, schema: StructType) -> StructType:
         inputType = schema[self.getInputCol()].dataType
         self.validateInputType(inputType)
         if self.getOutputCol() in schema.names:
@@ -292,7 +333,7 @@ def transformSchema(self, schema):
         outputFields.append(StructField(self.getOutputCol(), self.outputDataType(), nullable=False))
         return StructType(outputFields)
 
-    def _transform(self, dataset):
+    def _transform(self, dataset: DataFrame) -> DataFrame:
         self.transformSchema(dataset.schema)
         transformUDF = udf(self.createTransformFunc(), self.outputDataType())
         transformedDataset = dataset.withColumn(
@@ -313,27 +354,27 @@ class _PredictorParams(HasLabelCol, HasFeaturesCol, HasPredictionCol):
 
 
 @inherit_doc
-class Predictor(Estimator, _PredictorParams, metaclass=ABCMeta):
+class Predictor(Estimator[M], _PredictorParams, metaclass=ABCMeta):
     """
     Estimator for prediction tasks (regression and classification).
     """
 
     @since("3.0.0")
-    def setLabelCol(self, value):
+    def setLabelCol(self: P, value: str) -> P:
         """
         Sets the value of :py:attr:`labelCol`.
         """
         return self._set(labelCol=value)
 
     @since("3.0.0")
-    def setFeaturesCol(self, value):
+    def setFeaturesCol(self: P, value: str) -> P:
         """
         Sets the value of :py:attr:`featuresCol`.
         """
         return self._set(featuresCol=value)
 
     @since("3.0.0")
-    def setPredictionCol(self, value):
+    def setPredictionCol(self: P, value: str) -> P:
         """
         Sets the value of :py:attr:`predictionCol`.
         """
@@ -341,29 +382,29 @@ def setPredictionCol(self, value):
 
 
 @inherit_doc
-class PredictionModel(Model, _PredictorParams, metaclass=ABCMeta):
+class PredictionModel(Generic[T], Transformer, _PredictorParams, metaclass=ABCMeta):
     """
     Model for prediction tasks (regression and classification).
     """
 
     @since("3.0.0")
-    def setFeaturesCol(self, value):
+    def setFeaturesCol(self: P, value: str) -> P:
         """
         Sets the value of :py:attr:`featuresCol`.
         """
         return self._set(featuresCol=value)
 
     @since("3.0.0")
-    def setPredictionCol(self, value):
+    def setPredictionCol(self: P, value: str) -> P:
         """
         Sets the value of :py:attr:`predictionCol`.
         """
         return self._set(predictionCol=value)
 
-    @property
+    @property  # type: ignore[misc]
     @abstractmethod
     @since("2.1.0")
-    def numFeatures(self):
+    def numFeatures(self) -> int:
         """
         Returns the number of features the model was trained on. If unknown, returns -1
         """
@@ -371,7 +412,7 @@ def numFeatures(self):
 
     @abstractmethod
     @since("3.0.0")
-    def predict(self, value):
+    def predict(self, value: T) -> float:
         """
         Predict label for the given features.
         """
diff --git a/python/pyspark/ml/base.pyi b/python/pyspark/ml/base.pyi
deleted file mode 100644
index 37ae6de7ed9a5..0000000000000
--- a/python/pyspark/ml/base.pyi
+++ /dev/null
@@ -1,103 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import overload
-from typing import (
-    Callable,
-    Generic,
-    Iterable,
-    List,
-    Optional,
-    Sequence,
-    Tuple,
-    Union,
-)
-from pyspark.ml._typing import M, P, T, ParamMap
-
-import _thread
-
-import abc
-from abc import abstractmethod
-from pyspark import since as since  # noqa: F401
-from pyspark.ml.common import inherit_doc as inherit_doc  # noqa: F401
-from pyspark.ml.param.shared import (
-    HasFeaturesCol as HasFeaturesCol,
-    HasInputCol as HasInputCol,
-    HasLabelCol as HasLabelCol,
-    HasOutputCol as HasOutputCol,
-    HasPredictionCol as HasPredictionCol,
-    Params as Params,
-)
-from pyspark.sql.functions import udf as udf  # noqa: F401
-from pyspark.sql.types import (  # noqa: F401
-    DataType,
-    StructField as StructField,
-    StructType as StructType,
-)
-
-from pyspark.sql.dataframe import DataFrame
-
-class _FitMultipleIterator:
-    fitSingleModel: Callable[[int], Transformer]
-    numModel: int
-    counter: int = ...
-    lock: _thread.LockType
-    def __init__(self, fitSingleModel: Callable[[int], Transformer], numModels: int) -> None: ...
-    def __iter__(self) -> _FitMultipleIterator: ...
-    def __next__(self) -> Tuple[int, Transformer]: ...
-    def next(self) -> Tuple[int, Transformer]: ...
-
-class Estimator(Generic[M], Params, metaclass=abc.ABCMeta):
-    @overload
-    def fit(self, dataset: DataFrame, params: Optional[ParamMap] = ...) -> M: ...
-    @overload
-    def fit(
-        self, dataset: DataFrame, params: Union[List[ParamMap], Tuple[ParamMap]]
-    ) -> List[M]: ...
-    def fitMultiple(
-        self, dataset: DataFrame, params: Sequence[ParamMap]
-    ) -> Iterable[Tuple[int, M]]: ...
-
-class Transformer(Params, metaclass=abc.ABCMeta):
-    def transform(self, dataset: DataFrame, params: Optional[ParamMap] = ...) -> DataFrame: ...
-
-class Model(Transformer, metaclass=abc.ABCMeta): ...
-
-class UnaryTransformer(HasInputCol, HasOutputCol, Transformer, metaclass=abc.ABCMeta):
-    def createTransformFunc(self) -> Callable: ...
-    def outputDataType(self) -> DataType: ...
-    def validateInputType(self, inputType: DataType) -> None: ...
-    def transformSchema(self, schema: StructType) -> StructType: ...
-    def setInputCol(self: M, value: str) -> M: ...
-    def setOutputCol(self: M, value: str) -> M: ...
-
-class _PredictorParams(HasLabelCol, HasFeaturesCol, HasPredictionCol): ...
-
-class Predictor(Estimator[M], _PredictorParams, metaclass=abc.ABCMeta):
-    def setLabelCol(self: P, value: str) -> P: ...
-    def setFeaturesCol(self: P, value: str) -> P: ...
-    def setPredictionCol(self: P, value: str) -> P: ...
-
-class PredictionModel(Generic[T], Model, _PredictorParams, metaclass=abc.ABCMeta):
-    def setFeaturesCol(self: M, value: str) -> M: ...
-    def setPredictionCol(self: M, value: str) -> M: ...
-    @property
-    @abc.abstractmethod
-    def numFeatures(self) -> int: ...
-    @abstractmethod
-    def predict(self, value: T) -> float: ...
diff --git a/python/pyspark/ml/classification.pyi b/python/pyspark/ml/classification.pyi
index bb4fb056a95d0..4170a8ca3db0c 100644
--- a/python/pyspark/ml/classification.pyi
+++ b/python/pyspark/ml/classification.pyi
@@ -820,6 +820,7 @@ class OneVsRest(
         weightCol: Optional[str] = ...,
         parallelism: int = ...,
     ) -> OneVsRest: ...
+    def _fit(self, dataset: DataFrame) -> OneVsRestModel: ...
     def setClassifier(self, value: Estimator[M]) -> OneVsRest: ...
     def setLabelCol(self, value: str) -> OneVsRest: ...
     def setFeaturesCol(self, value: str) -> OneVsRest: ...
@@ -832,6 +833,7 @@ class OneVsRest(
 class OneVsRestModel(Model, _OneVsRestParams, MLReadable[OneVsRestModel], MLWritable):
     models: List[Transformer]
     def __init__(self, models: List[Transformer]) -> None: ...
+    def _transform(self, dataset: DataFrame) -> DataFrame: ...
     def setFeaturesCol(self, value: str) -> OneVsRestModel: ...
     def setPredictionCol(self, value: str) -> OneVsRestModel: ...
     def setRawPredictionCol(self, value: str) -> OneVsRestModel: ...
diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py
index 092f79f50f4d2..fd5ed63ca944a 100644
--- a/python/pyspark/ml/param/__init__.py
+++ b/python/pyspark/ml/param/__init__.py
@@ -43,6 +43,7 @@
 __all__ = ["Param", "Params", "TypeConverters"]
 
 T = TypeVar("T")
+P = TypeVar("P", bound="Params")
 
 
 class Param(Generic[T]):
@@ -409,7 +410,7 @@ def extractParamMap(self, extra: Optional["ParamMap"] = None) -> "ParamMap":
         paramMap.update(extra)
         return paramMap
 
-    def copy(self, extra: Optional["ParamMap"] = None) -> "Params":
+    def copy(self: P, extra: Optional["ParamMap"] = None) -> P:
         """
         Creates a copy of this instance with the same uid and some
         extra params. The default implementation creates a
@@ -492,7 +493,7 @@ def _dummy() -> "Params":
         dummy.uid = "undefined"
         return dummy
 
-    def _set(self, **kwargs: Any) -> "Params":
+    def _set(self: P, **kwargs: Any) -> P:
         """
         Sets user-supplied params.
         """
@@ -513,7 +514,7 @@ def clear(self, param: Param) -> None:
         if self.isSet(param):
             del self._paramMap[param]
 
-    def _setDefault(self, **kwargs: Any) -> "Params":
+    def _setDefault(self: P, **kwargs: Any) -> P:
         """
         Sets default params.
         """
@@ -529,7 +530,7 @@ def _setDefault(self, **kwargs: Any) -> "Params":
             self._defaultParamMap[p] = value
         return self
 
-    def _copyValues(self, to: "Params", extra: Optional["ParamMap"] = None) -> "Params":
+    def _copyValues(self, to: P, extra: Optional["ParamMap"] = None) -> P:
         """
         Copies param values from this instance to another instance for
         params shared by them.
diff --git a/python/pyspark/ml/pipeline.pyi b/python/pyspark/ml/pipeline.pyi
index f55b1e3e1ea47..7b3890058294f 100644
--- a/python/pyspark/ml/pipeline.pyi
+++ b/python/pyspark/ml/pipeline.pyi
@@ -33,10 +33,12 @@ from pyspark.ml.util import (  # noqa: F401
     MLWritable as MLWritable,
     MLWriter as MLWriter,
 )
+from pyspark.sql.dataframe import DataFrame
 
 class Pipeline(Estimator[PipelineModel], MLReadable[Pipeline], MLWritable):
     stages: List[PipelineStage]
     def __init__(self, *, stages: Optional[List[PipelineStage]] = ...) -> None: ...
+    def _fit(self, dataset: DataFrame) -> PipelineModel: ...
     def setStages(self, stages: List[PipelineStage]) -> Pipeline: ...
     def getStages(self) -> List[PipelineStage]: ...
     def setParams(self, *, stages: Optional[List[PipelineStage]] = ...) -> Pipeline: ...
@@ -69,6 +71,7 @@ class PipelineModelReader(MLReader[PipelineModel]):
 class PipelineModel(Model, MLReadable[PipelineModel], MLWritable):
     stages: List[PipelineStage]
     def __init__(self, stages: List[Transformer]) -> None: ...
+    def _transform(self, dataset: DataFrame) -> DataFrame: ...
     def copy(self, extra: Optional[Dict[Param, Any]] = ...) -> PipelineModel: ...
     def write(self) -> JavaMLWriter: ...
     def save(self, path: str) -> None: ...
diff --git a/python/pyspark/ml/tests/typing/test_feature.yml b/python/pyspark/ml/tests/typing/test_feature.yml
index 3d6b09038ab50..0d1034a44df66 100644
--- a/python/pyspark/ml/tests/typing/test_feature.yml
+++ b/python/pyspark/ml/tests/typing/test_feature.yml
@@ -15,6 +15,17 @@
 # limitations under the License.
 #
 
+
+- case: featureMethodChaining
+  main: |
+    from pyspark.ml.feature import NGram
+
+    reveal_type(NGram().setInputCol("foo").setOutputCol("bar"))
+
+  out: |
+    main:3: note: Revealed type is "pyspark.ml.feature.NGram"
+
+
 - case: stringIndexerOverloads
   main: |
     from pyspark.ml.feature import StringIndexer
@@ -41,4 +52,4 @@
     main:15: error: No overload variant of "StringIndexer" matches argument types "List[str]", "str"  [call-overload]
     main:15: note: Possible overload variants:
     main:15: note:     def StringIndexer(self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ..., handleInvalid: str = ..., stringOrderType: str = ...) -> StringIndexer
-    main:15: note:     def StringIndexer(self, *, inputCols: Optional[List[str]] = ..., outputCols: Optional[List[str]] = ..., handleInvalid: str = ..., stringOrderType: str = ...) -> StringIndexer
\ No newline at end of file
+    main:15: note:     def StringIndexer(self, *, inputCols: Optional[List[str]] = ..., outputCols: Optional[List[str]] = ..., handleInvalid: str = ..., stringOrderType: str = ...) -> StringIndexer
diff --git a/python/pyspark/ml/tests/typing/test_regression.yml b/python/pyspark/ml/tests/typing/test_regression.yml
index b045bec0d9891..4a54a565e626d 100644
--- a/python/pyspark/ml/tests/typing/test_regression.yml
+++ b/python/pyspark/ml/tests/typing/test_regression.yml
@@ -15,6 +15,21 @@
 # limitations under the License.
 #
 
+- case: linearRegressionMethodChaining
+  main: |
+    from pyspark.ml.regression import LinearRegression, LinearRegressionModel
+
+    lr = LinearRegression()
+    reveal_type(lr.setFeaturesCol("foo").setLabelCol("bar"))
+
+    lrm = LinearRegressionModel.load("/foo")
+    reveal_type(lrm.setPredictionCol("baz"))
+
+  out: |
+     main:4: note: Revealed type is "pyspark.ml.regression.LinearRegression"
+     main:7: note: Revealed type is "pyspark.ml.regression.LinearRegressionModel"
+
+
 - case: loadFMRegressor
   main: |
     from pyspark.ml.regression import FMRegressor, FMRegressionModel
diff --git a/python/pyspark/ml/tuning.pyi b/python/pyspark/ml/tuning.pyi
index 75da80bec83c6..25380591cba46 100644
--- a/python/pyspark/ml/tuning.pyi
+++ b/python/pyspark/ml/tuning.pyi
@@ -25,6 +25,7 @@ from pyspark.ml.evaluation import Evaluator
 from pyspark.ml.param import Param
 from pyspark.ml.param.shared import HasCollectSubModels, HasParallelism, HasSeed
 from pyspark.ml.util import MLReader, MLReadable, MLWriter, MLWritable
+from pyspark.sql import DataFrame
 
 class ParamGridBuilder:
     def __init__(self) -> None: ...
@@ -82,6 +83,7 @@ class CrossValidator(
         collectSubModels: bool = ...,
         foldCol: str = ...,
     ) -> CrossValidator: ...
+    def _fit(self, dataset: DataFrame) -> CrossValidatorModel: ...
     def setEstimator(self, value: Estimator) -> CrossValidator: ...
     def setEstimatorParamMaps(self, value: List[ParamMap]) -> CrossValidator: ...
     def setEvaluator(self, value: Evaluator) -> CrossValidator: ...
@@ -107,6 +109,7 @@ class CrossValidatorModel(
         avgMetrics: Optional[List[float]] = ...,
         subModels: Optional[List[List[Model]]] = ...,
     ) -> None: ...
+    def _transform(self, dataset: DataFrame) -> DataFrame: ...
     def copy(self, extra: Optional[ParamMap] = ...) -> CrossValidatorModel: ...
     def write(self) -> MLWriter: ...
     @classmethod
@@ -147,6 +150,7 @@ class TrainValidationSplit(
         collectSubModels: bool = ...,
         seed: Optional[int] = ...,
     ) -> TrainValidationSplit: ...
+    def _fit(self, dataset: DataFrame) -> TrainValidationSplitModel: ...
     def setEstimator(self, value: Estimator) -> TrainValidationSplit: ...
     def setEstimatorParamMaps(self, value: List[ParamMap]) -> TrainValidationSplit: ...
     def setEvaluator(self, value: Evaluator) -> TrainValidationSplit: ...
@@ -174,6 +178,7 @@ class TrainValidationSplitModel(
         validationMetrics: Optional[List[float]] = ...,
         subModels: Optional[List[Model]] = ...,
     ) -> None: ...
+    def _transform(self, dataset: DataFrame) -> DataFrame: ...
     def setEstimator(self, value: Estimator) -> TrainValidationSplitModel: ...
     def setEstimatorParamMaps(self, value: List[ParamMap]) -> TrainValidationSplitModel: ...
     def setEvaluator(self, value: Evaluator) -> TrainValidationSplitModel: ...
diff --git a/python/pyspark/ml/wrapper.pyi b/python/pyspark/ml/wrapper.pyi
index 7c3406a6d3438..a238436eb17ec 100644
--- a/python/pyspark/ml/wrapper.pyi
+++ b/python/pyspark/ml/wrapper.pyi
@@ -17,12 +17,13 @@
 # under the License.
 
 import abc
-from typing import Any, Optional
+from typing import Any, Optional, Generic
 from pyspark.ml._typing import P, T, JM, ParamMap
 
 from pyspark.ml import Estimator, Predictor, PredictionModel, Transformer, Model
 from pyspark.ml.base import _PredictorParams
 from pyspark.ml.param import Param, Params
+from pyspark.sql.dataframe import DataFrame
 
 class JavaWrapper:
     def __init__(self, java_obj: Optional[Any] = ...) -> None: ...
@@ -32,8 +33,11 @@ class JavaParams(JavaWrapper, Params, metaclass=abc.ABCMeta):
     def copy(self: P, extra: Optional[ParamMap] = ...) -> P: ...
     def clear(self, param: Param) -> None: ...
 
-class JavaEstimator(JavaParams, Estimator[JM], metaclass=abc.ABCMeta): ...
-class JavaTransformer(JavaParams, Transformer, metaclass=abc.ABCMeta): ...
+class JavaEstimator(Generic[JM], JavaParams, Estimator[JM], metaclass=abc.ABCMeta):
+    def _fit(self, dataset: DataFrame) -> JM: ...
+
+class JavaTransformer(JavaParams, Transformer, metaclass=abc.ABCMeta):
+    def _transform(self, dataset: DataFrame) -> DataFrame: ...
 
 class JavaModel(JavaTransformer, Model, metaclass=abc.ABCMeta):
     def __init__(self, java_model: Optional[Any] = ...) -> None: ...

From 811b7e35f0c0d88c83d1ab3f2fef86463b3ae714 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <zsxwing@gmail.com>
Date: Tue, 1 Feb 2022 23:27:20 -0800
Subject: [PATCH 140/513] [SPARK-38080][TESTS][SS] Flaky test:
 StreamingQueryManagerSuite: 'awaitAnyTermination with timeout and
 resetTerminated'

### What changes were proposed in this pull request?

Fix a flaky test.

### Why are the changes needed?

`StreamingQueryManagerSuite: 'awaitAnyTermination with timeout and resetTerminated'` is a flaky test.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

- The flaky test can be reproduced by adding a `Thread.sleep(100)` in https://github.com/apache/spark/blob/v3.2.1/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala#L346
- Using the above reproduction to verify the PR.

Closes #35372 from zsxwing/SPARK-38080.

Authored-by: Shixiong Zhu <zsxwing@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/streaming/StreamingQueryManagerSuite.scala  | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
index 91d6d77ce5b88..cc66ce856732a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
@@ -201,6 +201,10 @@ class StreamingQueryManagerSuite extends StreamTest {
 
       // After that query is stopped, awaitAnyTerm should throw exception
       eventually(Timeout(streamingTimeout)) { require(!q3.isActive) } // wait for query to stop
+      // When `isActive` becomes `false`, `StreamingQueryManager` may not receive the error yet.
+      // Hence, call `stop` to wait until the thread of `q3` exits so that we can ensure
+      // `StreamingQueryManager` has already received the error.
+      q3.stop()
       testAwaitAnyTermination(
         ExpectException[SparkException],
         awaitTimeout = 100.milliseconds,
@@ -217,6 +221,10 @@ class StreamingQueryManagerSuite extends StreamTest {
       require(!q4.isActive)
       val q5 = stopRandomQueryAsync(10.milliseconds, withError = true)
       eventually(Timeout(streamingTimeout)) { require(!q5.isActive) }
+      // When `isActive` becomes `false`, `StreamingQueryManager` may not receive the error yet.
+      // Hence, call `stop` to wait until the thread of `q5` exits so that we can ensure
+      // `StreamingQueryManager` has already received the error.
+      q5.stop()
       // After q5 terminates with exception, awaitAnyTerm should start throwing exception
       testAwaitAnyTermination(ExpectException[SparkException], awaitTimeout = 2.seconds)
     }

From 0a7941e369fe46f526eb0675f5e97d2ee7c121ff Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Tue, 1 Feb 2022 23:32:35 -0800
Subject: [PATCH 141/513] [SPARK-38076][CORE] Remove redundant null-check is
 covered by further condition

### What changes were proposed in this pull request?
There are many code pattern in Spark Java code as follows:
```java
obj != null && obj instanceof SomeClass
```
the null-check is redundant as `instanceof` operator implies non-nullity, so this pr remove the redundant `null-check`.

### Why are the changes needed?
Code simplification

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35369 from LuciferYang/SPARK-38076.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/network/shuffle/protocol/BlockPushReturnCode.java     | 2 +-
 .../apache/spark/network/shuffle/protocol/BlocksRemoved.java    | 2 +-
 .../spark/network/shuffle/protocol/ExecutorShuffleInfo.java     | 2 +-
 .../spark/network/shuffle/protocol/FinalizeShuffleMerge.java    | 2 +-
 .../apache/spark/network/shuffle/protocol/MergeStatuses.java    | 2 +-
 .../org/apache/spark/network/shuffle/protocol/OpenBlocks.java   | 2 +-
 .../apache/spark/network/shuffle/protocol/PushBlockStream.java  | 2 +-
 .../apache/spark/network/shuffle/protocol/RegisterExecutor.java | 2 +-
 .../org/apache/spark/network/shuffle/protocol/RemoveBlocks.java | 2 +-
 .../org/apache/spark/network/shuffle/protocol/StreamHandle.java | 2 +-
 .../org/apache/spark/network/shuffle/protocol/UploadBlock.java  | 2 +-
 .../spark/network/shuffle/protocol/UploadBlockStream.java       | 2 +-
 .../src/main/java/org/apache/spark/util/sketch/BitArray.java    | 2 +-
 .../main/java/org/apache/spark/util/sketch/BloomFilterImpl.java | 2 +-
 .../java/org/apache/spark/util/sketch/CountMinSketchImpl.java   | 2 +-
 15 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockPushReturnCode.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockPushReturnCode.java
index 0455d67c5ace2..d3f170f91507f 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockPushReturnCode.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockPushReturnCode.java
@@ -68,7 +68,7 @@ public String toString() {
 
   @Override
   public boolean equals(Object other) {
-    if (other != null && other instanceof BlockPushReturnCode) {
+    if (other instanceof BlockPushReturnCode) {
       BlockPushReturnCode o = (BlockPushReturnCode) other;
       return returnCode == o.returnCode && Objects.equals(failureBlockId, o.failureBlockId);
     }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlocksRemoved.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlocksRemoved.java
index a4d6035df807c..452f70c6cd221 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlocksRemoved.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlocksRemoved.java
@@ -51,7 +51,7 @@ public String toString() {
 
   @Override
   public boolean equals(Object other) {
-    if (other != null && other instanceof BlocksRemoved) {
+    if (other instanceof BlocksRemoved) {
       BlocksRemoved o = (BlocksRemoved) other;
       return numRemovedBlocks == o.numRemovedBlocks;
     }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java
index f123ccb663377..ead13f5b14f1a 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java
@@ -69,7 +69,7 @@ public String toString() {
 
   @Override
   public boolean equals(Object other) {
-    if (other != null && other instanceof ExecutorShuffleInfo) {
+    if (other instanceof ExecutorShuffleInfo) {
       ExecutorShuffleInfo o = (ExecutorShuffleInfo) other;
       return Arrays.equals(localDirs, o.localDirs)
         && subDirsPerLocalDir == o.subDirsPerLocalDir
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java
index 675739a41e817..e99fe1707092b 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java
@@ -69,7 +69,7 @@ public String toString() {
 
   @Override
   public boolean equals(Object other) {
-    if (other != null && other instanceof FinalizeShuffleMerge) {
+    if (other instanceof FinalizeShuffleMerge) {
       FinalizeShuffleMerge o = (FinalizeShuffleMerge) other;
       return Objects.equal(appId, o.appId)
         && appAttemptId == o.appAttemptId
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java
index b2658d62b445b..b6bfc302d218b 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java
@@ -95,7 +95,7 @@ public String toString() {
 
   @Override
   public boolean equals(Object other) {
-    if (other != null && other instanceof MergeStatuses) {
+    if (other instanceof MergeStatuses) {
       MergeStatuses o = (MergeStatuses) other;
       return Objects.equal(shuffleId, o.shuffleId)
         && Objects.equal(shuffleMergeId, o.shuffleMergeId)
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java
index 771e17b3233ec..91f203764ecd8 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java
@@ -60,7 +60,7 @@ public String toString() {
 
   @Override
   public boolean equals(Object other) {
-    if (other != null && other instanceof OpenBlocks) {
+    if (other instanceof OpenBlocks) {
       OpenBlocks o = (OpenBlocks) other;
       return Objects.equals(appId, o.appId)
         && Objects.equals(execId, o.execId)
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java
index b868d7ccff568..fc9900bae1e8a 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java
@@ -87,7 +87,7 @@ public String toString() {
 
   @Override
   public boolean equals(Object other) {
-    if (other != null && other instanceof PushBlockStream) {
+    if (other instanceof PushBlockStream) {
       PushBlockStream o = (PushBlockStream) other;
       return Objects.equal(appId, o.appId)
         && appAttemptId == o.appAttemptId
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java
index f6af755cd9cd5..6189820726205 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java
@@ -65,7 +65,7 @@ public String toString() {
 
   @Override
   public boolean equals(Object other) {
-    if (other != null && other instanceof RegisterExecutor) {
+    if (other instanceof RegisterExecutor) {
       RegisterExecutor o = (RegisterExecutor) other;
       return Objects.equals(appId, o.appId)
         && Objects.equals(execId, o.execId)
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveBlocks.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveBlocks.java
index ade838bd4286c..6c194d1a14cf2 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveBlocks.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveBlocks.java
@@ -60,7 +60,7 @@ public String toString() {
 
   @Override
   public boolean equals(Object other) {
-    if (other != null && other instanceof RemoveBlocks) {
+    if (other instanceof RemoveBlocks) {
       RemoveBlocks o = (RemoveBlocks) other;
       return Objects.equals(appId, o.appId)
         && Objects.equals(execId, o.execId)
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java
index dd7715a4e82d4..20954914a7ced 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java
@@ -57,7 +57,7 @@ public String toString() {
 
   @Override
   public boolean equals(Object other) {
-    if (other != null && other instanceof StreamHandle) {
+    if (other instanceof StreamHandle) {
       StreamHandle o = (StreamHandle) other;
       return Objects.equals(streamId, o.streamId)
         && Objects.equals(numChunks, o.numChunks);
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java
index a5bc3f7009b46..c5e07d0d991b7 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java
@@ -79,7 +79,7 @@ public String toString() {
 
   @Override
   public boolean equals(Object other) {
-    if (other != null && other instanceof UploadBlock) {
+    if (other instanceof UploadBlock) {
       UploadBlock o = (UploadBlock) other;
       return Objects.equals(appId, o.appId)
         && Objects.equals(execId, o.execId)
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlockStream.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlockStream.java
index 958a84e516c81..a1ac9da0956da 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlockStream.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlockStream.java
@@ -63,7 +63,7 @@ public String toString() {
 
   @Override
   public boolean equals(Object other) {
-    if (other != null && other instanceof UploadBlockStream) {
+    if (other instanceof UploadBlockStream) {
       UploadBlockStream o = (UploadBlockStream) other;
       return Objects.equals(blockId, o.blockId)
         && Arrays.equals(metadata, o.metadata);
diff --git a/common/sketch/src/main/java/org/apache/spark/util/sketch/BitArray.java b/common/sketch/src/main/java/org/apache/spark/util/sketch/BitArray.java
index c01c0470fa8c5..31857443e8c68 100644
--- a/common/sketch/src/main/java/org/apache/spark/util/sketch/BitArray.java
+++ b/common/sketch/src/main/java/org/apache/spark/util/sketch/BitArray.java
@@ -115,7 +115,7 @@ static BitArray readFrom(DataInputStream in) throws IOException {
   @Override
   public boolean equals(Object other) {
     if (this == other) return true;
-    if (other == null || !(other instanceof BitArray)) return false;
+    if (!(other instanceof BitArray)) return false;
     BitArray that = (BitArray) other;
     return Arrays.equals(data, that.data);
   }
diff --git a/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilterImpl.java b/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilterImpl.java
index 5afe5fe45b18d..e7766ee903480 100644
--- a/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilterImpl.java
+++ b/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilterImpl.java
@@ -42,7 +42,7 @@ public boolean equals(Object other) {
       return true;
     }
 
-    if (other == null || !(other instanceof BloomFilterImpl)) {
+    if (!(other instanceof BloomFilterImpl)) {
       return false;
     }
 
diff --git a/common/sketch/src/main/java/org/apache/spark/util/sketch/CountMinSketchImpl.java b/common/sketch/src/main/java/org/apache/spark/util/sketch/CountMinSketchImpl.java
index f6c1c39bbfd0a..80e71738198b2 100644
--- a/common/sketch/src/main/java/org/apache/spark/util/sketch/CountMinSketchImpl.java
+++ b/common/sketch/src/main/java/org/apache/spark/util/sketch/CountMinSketchImpl.java
@@ -70,7 +70,7 @@ public boolean equals(Object other) {
       return true;
     }
 
-    if (other == null || !(other instanceof CountMinSketchImpl)) {
+    if (!(other instanceof CountMinSketchImpl)) {
       return false;
     }
 

From 7b6ba450f77656912143cb093825b324a3ec7d33 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Wed, 2 Feb 2022 00:20:57 -0800
Subject: [PATCH 142/513] [SPARK-37908][K8S][TESTS] Refactoring on pod label
 test in BasicDriver/ExecutorFeatureStepSuite

### What changes were proposed in this pull request?
- Rename DRIVER_LABELS to CUSTOM_DRIVER_LABELS, LABELS  to CUSTOM_EXECUTORS_LABELS, make their names more clear.
- Refactoring on preset labels and add preset labels test.

### Why are the changes needed?
There are two type Pod label in current implementations: [preset label](https://github.com/apache/spark/blob/068d53bd5d89c96bf0cdb05d3ec7f2f023cf3875/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala#L158-L165) set by spark and custom label set by user, but there are some mix up in testcase, so this PR just fix it.

Also see realted: https://github.com/apache/spark/pull/34646#issuecomment-981371597

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
UT

Closes #35209 from Yikun/SPARK-37908.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../features/BasicDriverFeatureStepSuite.scala    | 15 ++++++++-------
 .../features/BasicExecutorFeatureStepSuite.scala  | 15 ++++++++-------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
index 9e52c6ef6ccf1..83444e5518e32 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
@@ -21,7 +21,7 @@ import scala.collection.JavaConverters._
 import io.fabric8.kubernetes.api.model.{ContainerPort, ContainerPortBuilder, LocalObjectReferenceBuilder, Quantity}
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesTestConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesTestConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.features.KubernetesFeaturesTestUtils.TestResourceInformation
@@ -34,7 +34,7 @@ import org.apache.spark.util.Utils
 
 class BasicDriverFeatureStepSuite extends SparkFunSuite {
 
-  private val DRIVER_LABELS = Map("labelkey" -> "labelvalue")
+  private val CUSTOM_DRIVER_LABELS = Map("labelkey" -> "labelvalue")
   private val CONTAINER_IMAGE_PULL_POLICY = "IfNotPresent"
   private val DRIVER_ANNOTATIONS = Map("customAnnotation" -> "customAnnotationValue")
   private val DRIVER_ENVS = Map(
@@ -64,7 +64,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
     }
     val kubernetesConf = KubernetesTestConf.createDriverConf(
       sparkConf = sparkConf,
-      labels = DRIVER_LABELS,
+      labels = CUSTOM_DRIVER_LABELS,
       environment = DRIVER_ENVS,
       annotations = DRIVER_ANNOTATIONS)
 
@@ -116,12 +116,13 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
 
     val driverPodMetadata = configuredPod.pod.getMetadata
     assert(driverPodMetadata.getName === "spark-driver-pod")
-    val DEFAULT_LABELS = Map(
-      SPARK_APP_NAME_LABEL-> KubernetesConf.getAppNameLabel(kubernetesConf.appName)
-    )
-    (DRIVER_LABELS ++ DEFAULT_LABELS).foreach { case (k, v) =>
+
+    // Check custom and preset labels are as expected
+    CUSTOM_DRIVER_LABELS.foreach { case (k, v) =>
       assert(driverPodMetadata.getLabels.get(k) === v)
     }
+    assert(driverPodMetadata.getLabels === kubernetesConf.labels.asJava)
+
     assert(driverPodMetadata.getAnnotations.asScala === DRIVER_ANNOTATIONS)
     assert(configuredPod.pod.getSpec.getRestartPolicy === "Never")
     val expectedSparkConf = Map(
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
index b0e7a34a4732f..f5f2712481604 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
@@ -27,7 +27,7 @@ import io.fabric8.kubernetes.api.model._
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException, SparkFunSuite}
-import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesExecutorConf, KubernetesTestConf, SecretVolumeUtils, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesTestConf, SecretVolumeUtils, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.features.KubernetesFeaturesTestUtils.TestResourceInformation
@@ -54,7 +54,7 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
   private val DRIVER_POD_UID = "driver-uid"
   private val RESOURCE_NAME_PREFIX = "base"
   private val EXECUTOR_IMAGE = "executor-image"
-  private val LABELS = Map("label1key" -> "label1value")
+  private val CUSTOM_EXECUTOR_LABELS = Map("label1key" -> "label1value")
   private var defaultProfile: ResourceProfile = _
   private val TEST_IMAGE_PULL_SECRETS = Seq("my-1secret-1", "my-secret-2")
   private val TEST_IMAGE_PULL_SECRET_OBJECTS =
@@ -93,7 +93,7 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     KubernetesTestConf.createExecutorConf(
       sparkConf = baseConf,
       driverPod = Some(DRIVER_POD),
-      labels = LABELS,
+      labels = CUSTOM_EXECUTOR_LABELS,
       environment = environment)
   }
 
@@ -156,12 +156,13 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
 
     // The executor pod name and default labels.
     assert(executor.pod.getMetadata.getName === s"$RESOURCE_NAME_PREFIX-exec-1")
-    val DEFAULT_LABELS = Map(
-      SPARK_APP_NAME_LABEL-> KubernetesConf.getAppNameLabel(conf.appName)
-    )
-    (LABELS ++ DEFAULT_LABELS).foreach { case (k, v) =>
+
+    // Check custom and preset labels are as expected
+    CUSTOM_EXECUTOR_LABELS.foreach { case (k, v) =>
       assert(executor.pod.getMetadata.getLabels.get(k) === v)
     }
+    assert(executor.pod.getMetadata.getLabels === conf.labels.asJava)
+
     assert(executor.pod.getSpec.getImagePullSecrets.asScala === TEST_IMAGE_PULL_SECRET_OBJECTS)
 
     // There is exactly 1 container with 1 volume mount and default memory limits.

From d73d96e4b57363c0da03b9b70ac515d11210d136 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Wed, 2 Feb 2022 09:24:48 -0800
Subject: [PATCH 143/513] [SPARK-37145][K8S] Add
 KubernetesCustom[Driver/Executor]FeatureConfigStep developer api

### What changes were proposed in this pull request?

This patch adds the support for extending user feature steps with configuration by adding 2 developer api:
- `KubernetesDriverCustomFeatureConfigStep`: to help user extend custom feature step in executor side
- `KubernetesExecutorCustomFeatureConfigStep`: to help user extend custom feature step in driver side

Before this patch user can only add feature step like:
- `class TestStep extends KubernetesFeatureConfigStep`: without any kubernetes conf

After this patch user can add feature step with configuration like:
- `class TestStepWithDriverConf extends KubernetesDriverCustomFeatureConfigStep`: only driver
- `class TestStepWithExecConf extends KubernetesExecutorCustomFeatureConfigStep`: only executor
- `class TestStepWithK8SConf extends KubernetesDriverCustomFeatureConfigStep with KubernetesExecutorCustomFeatureConfigStep`: both driver and executor

### Why are the changes needed?
In https://github.com/apache/spark/pull/30206 , a developer API for custom feature steps has been added, but it didn't support initialize user feature step with kubernetes conf (like `KubernetesConf`/`KubernetesDriverConf`/`KubernetesExecutorConf`).

In most of scenarios, users want to make corresponding changes in their feature steps according to the configuration. Such as, the customized scheduler scenario, user wants to configure pod according to passed job configuration.

### Does this PR introduce _any_ user-facing change?
Improve the developer API for for custom feature steps.

### How was this patch tested?
- Added UT
- Runing k8s integration test manaully: `build/sbt -Pkubernetes -Pkubernetes-integration-tests -Dtest.exclude.tags=minikube,r "kubernetes-integration-tests/test`

Closes: https://github.com/apache/spark/pull/34924

Closes #35345 from Yikun/SPARK-37145-alt.

Lead-authored-by: Yikun Jiang <yikunkero@gmail.com>
Co-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/running-on-kubernetes.md                 |  8 +-
 .../org/apache/spark/deploy/k8s/Config.scala  |  8 +-
 ...ernetesDriverCustomFeatureConfigStep.scala | 39 +++++++++
 ...netesExecutorCustomFeatureConfigStep.scala | 39 +++++++++
 .../k8s/submit/KubernetesDriverBuilder.scala  | 22 ++++-
 .../k8s/KubernetesExecutorBuilder.scala       | 23 ++++-
 .../spark/deploy/k8s/PodBuilderSuite.scala    | 87 ++++++++++++++++++-
 .../submit/KubernetesDriverBuilderSuite.scala | 37 +++++++-
 .../k8s/KubernetesExecutorBuilderSuite.scala  | 34 ++++++++
 9 files changed, 288 insertions(+), 9 deletions(-)
 create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesDriverCustomFeatureConfigStep.scala
 create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesExecutorCustomFeatureConfigStep.scala

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index b9355c3a709d7..7cb90d8d20ccf 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1420,7 +1420,9 @@ See the [configuration page](configuration.html) for information on Spark config
   <td>
     Class names of an extra driver pod feature step implementing
     `KubernetesFeatureConfigStep`. This is a developer API. Comma separated.
-    Runs after all of Spark internal feature steps.
+    Runs after all of Spark internal feature steps. Since 3.3.0, your driver feature step
+    can implement `KubernetesDriverCustomFeatureConfigStep` where the driver config
+    is also available.
   </td>
   <td>3.2.0</td>
 </tr>
@@ -1430,7 +1432,9 @@ See the [configuration page](configuration.html) for information on Spark config
   <td>
     Class names of an extra executor pod feature step implementing
     `KubernetesFeatureConfigStep`. This is a developer API. Comma separated.
-    Runs after all of Spark internal feature steps.
+    Runs after all of Spark internal feature steps. Since 3.3.0, your executor feature step
+    can implement `KubernetesExecutorCustomFeatureConfigStep` where the executor config
+    is also available.
   </td>
   <td>3.2.0</td>
 </tr>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index 385463c443275..65a8f82699665 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -344,7 +344,9 @@ private[spark] object Config extends Logging {
     ConfigBuilder("spark.kubernetes.driver.pod.featureSteps")
       .doc("Class names of an extra driver pod feature step implementing " +
         "KubernetesFeatureConfigStep. This is a developer API. Comma separated. " +
-        "Runs after all of Spark internal feature steps.")
+        "Runs after all of Spark internal feature steps. Since 3.3.0, your driver feature " +
+        "step can implement `KubernetesDriverCustomFeatureConfigStep` where the driver " +
+        "config is also available.")
       .version("3.2.0")
       .stringConf
       .toSequence
@@ -354,7 +356,9 @@ private[spark] object Config extends Logging {
     ConfigBuilder("spark.kubernetes.executor.pod.featureSteps")
       .doc("Class name of an extra executor pod feature step implementing " +
         "KubernetesFeatureConfigStep. This is a developer API. Comma separated. " +
-        "Runs after all of Spark internal feature steps.")
+        "Runs after all of Spark internal feature steps. Since 3.3.0, your executor feature " +
+        "step can implement `KubernetesExecutorCustomFeatureConfigStep` where the executor " +
+        "config is also available.")
       .version("3.2.0")
       .stringConf
       .toSequence
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesDriverCustomFeatureConfigStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesDriverCustomFeatureConfigStep.scala
new file mode 100644
index 0000000000000..bbd05e9f67c51
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesDriverCustomFeatureConfigStep.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.features
+
+import org.apache.spark.annotation.{DeveloperApi, Unstable}
+import org.apache.spark.deploy.k8s.KubernetesDriverConf
+
+/**
+ * :: DeveloperApi ::
+ *
+ * A base interface to help user extend custom feature step in driver side.
+ * Note: If your custom feature step would be used only in driver or both in driver and executor,
+ * please use this.
+ */
+@Unstable
+@DeveloperApi
+trait KubernetesDriverCustomFeatureConfigStep extends KubernetesFeatureConfigStep {
+  /**
+   * Initialize the configuration for driver user feature step, this only applicable when user
+   * specified `spark.kubernetes.driver.pod.featureSteps`, the init will be called after feature
+   * step loading.
+   */
+  def init(config: KubernetesDriverConf): Unit
+}
+
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesExecutorCustomFeatureConfigStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesExecutorCustomFeatureConfigStep.scala
new file mode 100644
index 0000000000000..062fa7dbf1413
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesExecutorCustomFeatureConfigStep.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.features
+
+import org.apache.spark.annotation.{DeveloperApi, Unstable}
+import org.apache.spark.deploy.k8s.KubernetesExecutorConf
+
+/**
+ * :: DeveloperApi ::
+ *
+ * A base interface to help user extend custom feature step in executor side.
+ * Note: If your custom feature step would be used only in driver or both in driver and executor,
+ * please use this.
+ */
+@Unstable
+@DeveloperApi
+trait KubernetesExecutorCustomFeatureConfigStep extends KubernetesFeatureConfigStep {
+  /**
+   * Initialize the configuration for executor user feature step, this only applicable when user
+   * specified `spark.kubernetes.executor.pod.featureSteps` the init will be called after feature
+   * step loading.
+   */
+  def init(config: KubernetesExecutorConf): Unit
+}
+
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
index f0c78f371d6d2..e89e52f1af201 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
@@ -18,6 +18,7 @@ package org.apache.spark.deploy.k8s.submit
 
 import io.fabric8.kubernetes.client.KubernetesClient
 
+import org.apache.spark.SparkException
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.features._
 import org.apache.spark.util.Utils
@@ -39,7 +40,26 @@ private[spark] class KubernetesDriverBuilder {
 
     val userFeatures = conf.get(Config.KUBERNETES_DRIVER_POD_FEATURE_STEPS)
       .map { className =>
-        Utils.classForName(className).newInstance().asInstanceOf[KubernetesFeatureConfigStep]
+        val feature = Utils.classForName[Any](className).newInstance()
+        val initializedFeature = feature match {
+          // Since 3.3, allow user to implement feature with KubernetesDriverConf
+          case d: KubernetesDriverCustomFeatureConfigStep =>
+            d.init(conf)
+            Some(d)
+          // raise SparkException with wrong type feature step
+          case _: KubernetesExecutorCustomFeatureConfigStep =>
+            None
+          // Since 3.2, allow user to implement feature without config
+          case f: KubernetesFeatureConfigStep =>
+            Some(f)
+          case _ => None
+        }
+        initializedFeature.getOrElse {
+          throw new SparkException(s"Failed to initialize feature step: $className, " +
+            s"please make sure your driver side feature steps are implemented by " +
+            s"`${classOf[KubernetesDriverCustomFeatureConfigStep].getName}` or " +
+            s"`${classOf[KubernetesFeatureConfigStep].getName}`.")
+        }
       }
 
     val features = Seq(
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
index 1a62d08a7b413..1f6d72cb7eee0 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
@@ -18,7 +18,7 @@ package org.apache.spark.scheduler.cluster.k8s
 
 import io.fabric8.kubernetes.client.KubernetesClient
 
-import org.apache.spark.SecurityManager
+import org.apache.spark.{SecurityManager, SparkException}
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.features._
 import org.apache.spark.resource.ResourceProfile
@@ -43,7 +43,26 @@ private[spark] class KubernetesExecutorBuilder {
 
     val userFeatures = conf.get(Config.KUBERNETES_EXECUTOR_POD_FEATURE_STEPS)
       .map { className =>
-        Utils.classForName(className).newInstance().asInstanceOf[KubernetesFeatureConfigStep]
+        val feature = Utils.classForName[Any](className).newInstance()
+        val initializedFeature = feature match {
+          // Since 3.3, allow user to implement feature with KubernetesExecutorConf
+          case e: KubernetesExecutorCustomFeatureConfigStep =>
+            e.init(conf)
+            Some(e)
+          // raise SparkException with wrong type feature step
+          case _: KubernetesDriverCustomFeatureConfigStep =>
+            None
+          // Since 3.2, allow user to implement feature without config
+          case f: KubernetesFeatureConfigStep =>
+            Some(f)
+          case _ => None
+        }
+        initializedFeature.getOrElse {
+          throw new SparkException(s"Failed to initialize feature step: $className, " +
+            s"please make sure your executor side feature steps are implemented by " +
+            s"`${classOf[KubernetesExecutorCustomFeatureConfigStep].getSimpleName}` or " +
+            s"`${classOf[KubernetesFeatureConfigStep].getSimpleName}`.")
+        }
       }
 
     val features = Seq(
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
index a8a3ca4eea965..c076f22c7b141 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
@@ -26,15 +26,22 @@ import org.mockito.Mockito.{mock, never, verify, when}
 import scala.collection.JavaConverters._
 
 import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
-import org.apache.spark.deploy.k8s.features.KubernetesFeatureConfigStep
+import org.apache.spark.deploy.k8s.features.{KubernetesDriverCustomFeatureConfigStep, KubernetesExecutorCustomFeatureConfigStep, KubernetesFeatureConfigStep}
 import org.apache.spark.internal.config.ConfigEntry
 
 abstract class PodBuilderSuite extends SparkFunSuite {
+  val POD_ROLE: String
+  val TEST_ANNOTATION_KEY: String
+  val TEST_ANNOTATION_VALUE: String
 
   protected def templateFileConf: ConfigEntry[_]
 
   protected def userFeatureStepsConf: ConfigEntry[_]
 
+  protected def userFeatureStepWithExpectedAnnotation: (String, String)
+
+  protected def wrongTypeFeatureStep: String
+
   protected def buildPod(sparkConf: SparkConf, client: KubernetesClient): SparkPod
 
   protected val baseConf = new SparkConf(false)
@@ -66,6 +73,57 @@ abstract class PodBuilderSuite extends SparkFunSuite {
     assert(pod.container.getVolumeMounts.asScala.exists(_.getName == "so_long_two"))
   }
 
+  test("SPARK-37145: configure a custom test step with base config") {
+    val client = mockKubernetesClient()
+    val sparkConf = baseConf.clone()
+      .set(userFeatureStepsConf.key,
+          "org.apache.spark.deploy.k8s.TestStepWithConf")
+      .set(templateFileConf.key, "template-file.yaml")
+      .set("test-features-key", "test-features-value")
+    val pod = buildPod(sparkConf, client)
+    verifyPod(pod)
+    val metadata = pod.pod.getMetadata
+    assert(metadata.getAnnotations.containsKey("test-features-key"))
+    assert(metadata.getAnnotations.get("test-features-key") === "test-features-value")
+  }
+
+  test("SPARK-37145: configure a custom test step with driver or executor config") {
+    val client = mockKubernetesClient()
+    val (featureSteps, annotation) = userFeatureStepWithExpectedAnnotation
+    val sparkConf = baseConf.clone()
+      .set(templateFileConf.key, "template-file.yaml")
+      .set(userFeatureStepsConf.key, featureSteps)
+      .set(TEST_ANNOTATION_KEY, annotation)
+    val pod = buildPod(sparkConf, client)
+    verifyPod(pod)
+    val metadata = pod.pod.getMetadata
+    assert(metadata.getAnnotations.containsKey(TEST_ANNOTATION_KEY))
+    assert(metadata.getAnnotations.get(TEST_ANNOTATION_KEY) === annotation)
+  }
+
+  test("SPARK-37145: configure a custom test step with wrong type config") {
+    val client = mockKubernetesClient()
+    val sparkConf = baseConf.clone()
+      .set(templateFileConf.key, "template-file.yaml")
+      .set(userFeatureStepsConf.key, wrongTypeFeatureStep)
+    val e = intercept[SparkException] {
+      buildPod(sparkConf, client)
+    }
+    assert(e.getMessage.contains(s"please make sure your $POD_ROLE side feature steps"))
+  }
+
+  test("SPARK-37145: configure a custom test step with wrong name") {
+    val client = mockKubernetesClient()
+    val featureSteps = "unknow.class"
+    val sparkConf = baseConf.clone()
+      .set(templateFileConf.key, "template-file.yaml")
+      .set(userFeatureStepsConf.key, featureSteps)
+    val e = intercept[ClassNotFoundException] {
+      buildPod(sparkConf, client)
+    }
+    assert(e.getMessage.contains("unknow.class"))
+  }
+
   test("complain about misconfigured pod template") {
     val client = mockKubernetesClient(
       new PodBuilder()
@@ -249,3 +307,30 @@ class TestStepTwo extends KubernetesFeatureConfigStep {
     SparkPod(podWithLocalDirVolumes, containerWithLocalDirVolumeMounts)
   }
 }
+
+/**
+ * A test user feature step would be used in driver and executor.
+ */
+class TestStepWithConf extends KubernetesDriverCustomFeatureConfigStep
+  with KubernetesExecutorCustomFeatureConfigStep {
+  import io.fabric8.kubernetes.api.model._
+
+  private var kubernetesConf: KubernetesConf = _
+
+  override def init(conf: KubernetesDriverConf): Unit = {
+    kubernetesConf = conf
+  }
+
+  override def init(conf: KubernetesExecutorConf): Unit = {
+    kubernetesConf = conf
+  }
+
+  override def configurePod(pod: SparkPod): SparkPod = {
+    val k8sPodBuilder = new PodBuilder(pod.pod)
+      .editOrNewMetadata()
+        .addToAnnotations("test-features-key", kubernetesConf.get("test-features-key"))
+      .endMetadata()
+    val k8sPod = k8sPodBuilder.build()
+    SparkPod(k8sPod, pod.container)
+  }
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala
index 8bf43d909dee3..5389a880d1b4a 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala
@@ -22,10 +22,13 @@ import io.fabric8.kubernetes.client.KubernetesClient
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.k8s._
-import org.apache.spark.deploy.k8s.features.KubernetesFeatureConfigStep
+import org.apache.spark.deploy.k8s.features.{KubernetesDriverCustomFeatureConfigStep, KubernetesFeatureConfigStep}
 import org.apache.spark.internal.config.ConfigEntry
 
 class KubernetesDriverBuilderSuite extends PodBuilderSuite {
+  val POD_ROLE: String = "driver"
+  val TEST_ANNOTATION_KEY: String = "driver-annotation-key"
+  val TEST_ANNOTATION_VALUE: String = "driver-annotation-value"
 
   override protected def templateFileConf: ConfigEntry[_] = {
     Config.KUBERNETES_DRIVER_PODTEMPLATE_FILE
@@ -35,6 +38,14 @@ class KubernetesDriverBuilderSuite extends PodBuilderSuite {
     Config.KUBERNETES_DRIVER_POD_FEATURE_STEPS
   }
 
+  override protected def userFeatureStepWithExpectedAnnotation: (String, String) = {
+    ("org.apache.spark.deploy.k8s.submit.TestStepWithDrvConf", TEST_ANNOTATION_VALUE)
+  }
+
+  override protected def wrongTypeFeatureStep: String = {
+    "org.apache.spark.scheduler.cluster.k8s.TestStepWithExecConf"
+  }
+
   override protected def buildPod(sparkConf: SparkConf, client: KubernetesClient): SparkPod = {
     val conf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
     new KubernetesDriverBuilder().buildFromFeatures(conf, client).pod
@@ -82,3 +93,27 @@ class TestStep extends KubernetesFeatureConfigStep {
       .build()
   )
 }
+
+
+/**
+ * A test driver user feature step would be used in only driver.
+ */
+class TestStepWithDrvConf extends KubernetesDriverCustomFeatureConfigStep {
+  import io.fabric8.kubernetes.api.model._
+
+  private var driverConf: KubernetesDriverConf = _
+
+  override def init(config: KubernetesDriverConf): Unit = {
+    driverConf = config
+  }
+
+  override def configurePod(pod: SparkPod): SparkPod = {
+    val k8sPodBuilder = new PodBuilder(pod.pod)
+      .editOrNewMetadata()
+       // The annotation key = TEST_ANNOTATION_KEY, value = TEST_ANNOTATION_VALUE
+      .addToAnnotations("driver-annotation-key", driverConf.get("driver-annotation-key"))
+      .endMetadata()
+    val k8sPod = k8sPodBuilder.build()
+    SparkPod(k8sPod, pod.container)
+  }
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
index ec60c6fc0bf82..adbb5b296c9dc 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
@@ -20,10 +20,14 @@ import io.fabric8.kubernetes.client.KubernetesClient
 
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.k8s._
+import org.apache.spark.deploy.k8s.features.KubernetesExecutorCustomFeatureConfigStep
 import org.apache.spark.internal.config.ConfigEntry
 import org.apache.spark.resource.ResourceProfile
 
 class KubernetesExecutorBuilderSuite extends PodBuilderSuite {
+  val POD_ROLE: String = "executor"
+  val TEST_ANNOTATION_KEY: String = "executor-annotation-key"
+  val TEST_ANNOTATION_VALUE: String = "executor-annotation-value"
 
   override protected def templateFileConf: ConfigEntry[_] = {
     Config.KUBERNETES_EXECUTOR_PODTEMPLATE_FILE
@@ -33,6 +37,14 @@ class KubernetesExecutorBuilderSuite extends PodBuilderSuite {
     Config.KUBERNETES_EXECUTOR_POD_FEATURE_STEPS
   }
 
+  override protected def userFeatureStepWithExpectedAnnotation: (String, String) = {
+    ("org.apache.spark.scheduler.cluster.k8s.TestStepWithExecConf", TEST_ANNOTATION_VALUE)
+  }
+
+  override protected def wrongTypeFeatureStep: String = {
+    "org.apache.spark.deploy.k8s.submit.TestStepWithDrvConf"
+  }
+
   override protected def buildPod(sparkConf: SparkConf, client: KubernetesClient): SparkPod = {
     sparkConf.set("spark.driver.host", "https://driver.host.com")
     val conf = KubernetesTestConf.createExecutorConf(sparkConf = sparkConf)
@@ -40,5 +52,27 @@ class KubernetesExecutorBuilderSuite extends PodBuilderSuite {
     val defaultProfile = ResourceProfile.getOrCreateDefaultProfile(sparkConf)
     new KubernetesExecutorBuilder().buildFromFeatures(conf, secMgr, client, defaultProfile).pod
   }
+}
 
+/**
+ * A test executor user feature step would be used in only executor.
+ */
+class TestStepWithExecConf extends KubernetesExecutorCustomFeatureConfigStep {
+  import io.fabric8.kubernetes.api.model._
+
+  private var executorConf: KubernetesExecutorConf = _
+
+  def init(config: KubernetesExecutorConf): Unit = {
+    executorConf = config
+  }
+
+  override def configurePod(pod: SparkPod): SparkPod = {
+    val k8sPodBuilder = new PodBuilder(pod.pod)
+      .editOrNewMetadata()
+       // The annotation key = TEST_ANNOTATION_KEY, value = TEST_ANNOTATION_VALUE
+      .addToAnnotations("executor-annotation-key", executorConf.get("executor-annotation-key"))
+      .endMetadata()
+    val k8sPod = k8sPodBuilder.build()
+    SparkPod(k8sPod, pod.container)
+  }
 }

From b0c947c51ab3c67d753ed523852dc89638cb502a Mon Sep 17 00:00:00 2001
From: Erik Krogen <xkrogen@apache.org>
Date: Wed, 2 Feb 2022 15:44:01 -0800
Subject: [PATCH 144/513] [SPARK-38089][CORE][TESTS] Show the root cause
 exception in `TestUtils.assertExceptionMsg`

### What changes were proposed in this pull request?
Improve assertion failure message in `TestUtils.assertExceptionMsg` to print out the exception tree in which it was searching (upon failure).

### Why are the changes needed?
`TestUtils.assertExceptionMsg` is great, but when the assertion _doesn't_ match, it can be challenging to tell why, because the exception tree that was searched isn't printed. Only way I could find to fix it up was to run things in a debugger and check the exception tree. This makes it easier to tell what went wrong just from the assertion failure message.

### Does this PR introduce _any_ user-facing change?
No. Easier for devs to see why their test failed.

### How was this patch tested?
Used extensively while writing the tests for PR #34009. It was very useful!

For example, let's say I had a typo in the assertion for the test case `AvroSuite.SPARK-34133: Writing user provided schema respects case sensitivity for field matching`. The failure would look like:
```
java.lang.AssertionError: assertion failed: Exception tree doesn't contain the expected exception of type
org.apache.spark.sql.avro.IncompatibleSchemaException with message: Cannot find field "FOO" in Avro schema
	at scala.Predef$.assert(Predef.scala:223)
	at org.apache.spark.TestUtils$.assertExceptionMsg(TestUtils.scala:269)
	at org.apache.spark.sql.avro.AvroSuite.$anonfun$new$175(AvroSuite.scala:1409)
	...
```
So I know that I'm wrong, but I can't tell _why_. Now, after the change:
```
java.lang.AssertionError: assertion failed: Exception tree doesn't contain the expected exception of type
org.apache.spark.sql.avro.IncompatibleSchemaException with message: Cannot find field "FOO" in Avro schema
org.apache.spark.sql.avro.IncompatibleSchemaException: Cannot find field 'FOO' in Avro schema
	at org.apache.spark.sql.avro.AvroUtils$AvroSchemaHelper.$anonfun$validateNoExtraCatalystFields$1(AvroUtils.scala:265)
	at org.apache.spark.sql.avro.AvroUtils$AvroSchemaHelper.$anonfun$validateNoExtraCatalystFields$1$adapted(AvroUtils.scala:258)
	at scala.collection.immutable.List.foreach(List.scala:431)
	at org.apache.spark.sql.avro.AvroUtils$AvroSchemaHelper.validateNoExtraCatalystFields(AvroUtils.scala:258)
	...

	at scala.Predef$.assert(Predef.scala:223)
	at org.apache.spark.TestUtils$.assertExceptionMsg(TestUtils.scala:266)
	at org.apache.spark.sql.avro.AvroSuite.$anonfun$new$175(AvroSuite.scala:1409)
	...
```
I can easily see that I used the wrong type of quotes. Much better!

Closes #35383 from xkrogen/xkrogen-SPARK-38089-testutils-assertexceptionmsg-improve.

Authored-by: Erik Krogen <xkrogen@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 core/src/main/scala/org/apache/spark/TestUtils.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
index 505b3ab3a783a..104e98b8ae0a4 100644
--- a/core/src/main/scala/org/apache/spark/TestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -263,7 +263,8 @@ private[spark] object TestUtils {
       contains = contain(e, msg)
     }
     assert(contains,
-      s"Exception tree doesn't contain the expected exception ${typeMsg}with message: $msg")
+      s"Exception tree doesn't contain the expected exception ${typeMsg}with message: $msg\n" +
+        Utils.exceptionString(e))
   }
 
   /**

From fed78337990be22f0d8d52287e13828a5a543b5d Mon Sep 17 00:00:00 2001
From: Leona Yoda <yodal@oss.nttdata.com>
Date: Thu, 3 Feb 2022 09:41:11 +0900
Subject: [PATCH 145/513] [SPARK-37958][DOCS] Update spark.files.overwrite
 description

### What changes were proposed in this pull request?

Update the description about `spark.files.overwrite`

### Why are the changes needed?

The description is misleading so users might misunderstand that they can use `SparkContext.addFile` to update the files that the added by the same API before.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

` SKIP_API=1 bundle exec jekyll build`

The screenshot of document is below.
<img width="898" alt="Screen Shot 2022-02-01 at 11 14 59" src="https://user-images.githubusercontent.com/14937752/151917587-3e0f1ca8-c87e-4245-a12b-4b3ab77aa196.png">

Closes #35377 from yoda-mon/spark-37958.

Authored-by: Leona Yoda <yodal@oss.nttdata.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/configuration.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 818a2e556337f..ae3f422f34b3a 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1822,8 +1822,9 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.files.overwrite</code></td>
   <td>false</td>
   <td>
-    Whether to overwrite files added through SparkContext.addFile() when the target file exists and
-    its contents do not match those of the source.
+    Whether to overwrite any files which exist at the startup. Users can not overwrite the files added by
+    <code>SparkContext.addFile</code> or <code>SparkContext.addJar</code> before even if this option is set 
+    <code>true</code>.
   </td>
   <td>1.0.0</td>
 </tr>

From 157f3b9174ef320b21785ddb7d21a067dc5bcd00 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 2 Feb 2022 19:53:17 -0800
Subject: [PATCH 146/513] [SPARK-38095][CORE]
 HistoryServerDiskManager.appStorePath should use backend-based extensions

### What changes were proposed in this pull request?

This PR aims to make `HistoryServerDiskManager.appStorePath` use backend-based extensions for directory names.

### Why are the changes needed?

Previous, it was hard-coded to `.ldb` and this was confusing because it doesn't match with the folder's content in case of `RocksDB` backend.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs with the newly added test case.

Closes #35384 from dongjoon-hyun/SPARK-38095.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/deploy/history/HistoryServerDiskManager.scala    | 5 ++++-
 .../deploy/history/HistoryServerDiskManagerSuite.scala     | 7 +++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala
index 40e337a725430..72d407d8643cf 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala
@@ -28,6 +28,7 @@ import org.apache.commons.io.FileUtils
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.History._
+import org.apache.spark.internal.config.History.HybridStoreDiskBackend.LEVELDB
 import org.apache.spark.status.KVUtils._
 import org.apache.spark.util.{Clock, Utils}
 import org.apache.spark.util.kvstore.KVStore
@@ -55,6 +56,8 @@ private class HistoryServerDiskManager(
   if (!appStoreDir.isDirectory() && !appStoreDir.mkdir()) {
     throw new IllegalArgumentException(s"Failed to create app directory ($appStoreDir).")
   }
+  private val extension =
+    if (conf.get(HYBRID_STORE_DISK_BACKEND) == LEVELDB.toString) ".ldb" else ".rdb"
 
   private val tmpStoreDir = new File(path, "temp")
   if (!tmpStoreDir.isDirectory() && !tmpStoreDir.mkdir()) {
@@ -251,7 +254,7 @@ private class HistoryServerDiskManager(
   }
 
   private[history] def appStorePath(appId: String, attemptId: Option[String]): File = {
-    val fileName = appId + attemptId.map("_" + _).getOrElse("") + ".ldb"
+    val fileName = appId + attemptId.map("_" + _).getOrElse("") + extension
     new File(appStoreDir, fileName)
   }
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerDiskManagerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerDiskManagerSuite.scala
index a8f372932672c..de5b5187aa2fa 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerDiskManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerDiskManagerSuite.scala
@@ -212,4 +212,11 @@ class HistoryServerDiskManagerSuite extends SparkFunSuite with BeforeAndAfter {
     assert(store.read(classOf[ApplicationStoreInfo], dstC.getAbsolutePath).size === 2)
   }
 
+  test("SPARK-38095: appStorePath should use backend extensions") {
+    HybridStoreDiskBackend.values.zip(Seq(".ldb", ".rdb")).foreach { case (backend, extension) =>
+      val conf = new SparkConf().set(HYBRID_STORE_DISK_BACKEND, backend.toString)
+      val manager = new HistoryServerDiskManager(conf, testDir, store, new ManualClock())
+      assert(manager.appStorePath("appId", None).getName.endsWith(extension))
+    }
+  }
 }

From 6347857f0bad105541971283f79281c490f6bb18 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Thu, 3 Feb 2022 14:56:11 +0300
Subject: [PATCH 147/513] [SPARK-37937][SQL] Use error classes in the parsing
 errors of lateral join

### What changes were proposed in this pull request?
In the PR, I propose to use the following error classes for the parsing errors of lateral joins:
- `INVALID_SQL_SYNTAX `
- `UNSUPPORTED_FEATURE `

These new error classes are added to `error-classes.json`.

### Why are the changes needed?

Porting the parsing errors for lateral join to the new error framework should improve user experience with Spark SQL.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Added new test suite

Closes #35328 from imback82/SPARK-37937.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../main/resources/error/error-classes.json   |  4 +
 .../spark/sql/errors/QueryParsingErrors.scala |  8 +-
 .../catalyst/parser/ErrorParserSuite.scala    | 10 ---
 .../sql-tests/results/join-lateral.sql.out    |  4 +-
 .../sql/errors/QueryParsingErrorsSuite.scala  | 81 +++++++++++++++++++
 5 files changed, 91 insertions(+), 16 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index a1ac99f1a0727..06ce22a780524 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -93,6 +93,10 @@
     "message" : [ "The value of parameter(s) '%s' in %s is invalid: %s" ],
     "sqlState" : "22023"
   },
+  "INVALID_SQL_SYNTAX" : {
+    "message" : [ "Invalid SQL syntax: %s" ],
+    "sqlState" : "42000"
+  },
   "MAP_KEY_DOES_NOT_EXIST" : {
     "message" : [ "Key %s does not exist. If necessary set %s to false to bypass this error." ]
   },
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index 938bbfdb49c33..6bcd20c19b336 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -102,19 +102,19 @@ object QueryParsingErrors {
   }
 
   def lateralJoinWithNaturalJoinUnsupportedError(ctx: ParserRuleContext): Throwable = {
-    new ParseException("LATERAL join with NATURAL join is not supported", ctx)
+    new ParseException("UNSUPPORTED_FEATURE", Array("LATERAL join with NATURAL join."), ctx)
   }
 
   def lateralJoinWithUsingJoinUnsupportedError(ctx: ParserRuleContext): Throwable = {
-    new ParseException("LATERAL join with USING join is not supported", ctx)
+    new ParseException("UNSUPPORTED_FEATURE", Array("LATERAL join with USING join."), ctx)
   }
 
   def unsupportedLateralJoinTypeError(ctx: ParserRuleContext, joinType: String): Throwable = {
-    new ParseException(s"Unsupported LATERAL join type $joinType", ctx)
+    new ParseException("UNSUPPORTED_FEATURE", Array(s"LATERAL join type '$joinType'."), ctx)
   }
 
   def invalidLateralJoinRelationError(ctx: RelationPrimaryContext): Throwable = {
-    new ParseException(s"LATERAL can only be used with subquery", ctx)
+    new ParseException("INVALID_SQL_SYNTAX", Array("LATERAL can only be used with subquery."), ctx)
   }
 
   def repetitiveWindowDefinitionError(name: String, ctx: WindowClauseContext): Throwable = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
index dfc5edc82ef5b..99051d692451b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
@@ -208,14 +208,4 @@ class ErrorParserSuite extends AnalysisTest {
         |SELECT b
       """.stripMargin, 2, 9, 10, msg + " test-table")
   }
-
-  test("SPARK-35789: lateral join with non-subquery relations") {
-    val msg = "LATERAL can only be used with subquery"
-    intercept("SELECT * FROM t1, LATERAL t2", msg)
-    intercept("SELECT * FROM t1 JOIN LATERAL t2", msg)
-    intercept("SELECT * FROM t1, LATERAL (t2 JOIN t3)", msg)
-    intercept("SELECT * FROM t1, LATERAL (LATERAL t2)", msg)
-    intercept("SELECT * FROM t1, LATERAL VALUES (0, 1)", msg)
-    intercept("SELECT * FROM t1, LATERAL RANGE(0, 1)", msg)
-  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out b/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out
index c1b595ec4fe61..cc1619813dd55 100644
--- a/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out
@@ -153,7 +153,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-LATERAL join with NATURAL join is not supported(line 1, pos 14)
+The feature is not supported: LATERAL join with NATURAL join.(line 1, pos 14)
 
 == SQL ==
 SELECT * FROM t1 NATURAL JOIN LATERAL (SELECT c1 + c2 AS c2)
@@ -167,7 +167,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-LATERAL join with USING join is not supported(line 1, pos 14)
+The feature is not supported: LATERAL join with USING join.(line 1, pos 14)
 
 == SQL ==
 SELECT * FROM t1 JOIN LATERAL (SELECT c1 + c2 AS c2) USING (c2)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
new file mode 100644
index 0000000000000..1a213bf835b15
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.errors
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.test.SharedSparkSession
+
+class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession {
+  def validateParsingError(
+      sqlText: String,
+      errorClass: String,
+      sqlState: String,
+      message: String): Unit = {
+    val e = intercept[ParseException] {
+      sql(sqlText)
+    }
+    assert(e.getErrorClass === errorClass)
+    assert(e.getSqlState === sqlState)
+    assert(e.getMessage.contains(message))
+  }
+
+  test("UNSUPPORTED_FEATURE: LATERAL join with NATURAL join not supported") {
+    validateParsingError(
+      sqlText = "SELECT * FROM t1 NATURAL JOIN LATERAL (SELECT c1 + c2 AS c2)",
+      errorClass = "UNSUPPORTED_FEATURE",
+      sqlState = "0A000",
+      message = "The feature is not supported: LATERAL join with NATURAL join.")
+  }
+
+  test("UNSUPPORTED_FEATURE: LATERAL join with USING join not supported") {
+    validateParsingError(
+      sqlText = "SELECT * FROM t1 JOIN LATERAL (SELECT c1 + c2 AS c2) USING (c2)",
+      errorClass = "UNSUPPORTED_FEATURE",
+      sqlState = "0A000",
+      message = "The feature is not supported: LATERAL join with USING join.")
+  }
+
+  test("UNSUPPORTED_FEATURE: Unsupported LATERAL join type") {
+    Seq(("RIGHT OUTER", "RightOuter"),
+      ("FULL OUTER", "FullOuter"),
+      ("LEFT SEMI", "LeftSemi"),
+      ("LEFT ANTI", "LeftAnti")).foreach { pair =>
+      validateParsingError(
+        sqlText = s"SELECT * FROM t1 ${pair._1} JOIN LATERAL (SELECT c1 + c2 AS c3) ON c2 = c3",
+        errorClass = "UNSUPPORTED_FEATURE",
+        sqlState = "0A000",
+        message = s"The feature is not supported: LATERAL join type '${pair._2}'.")
+    }
+  }
+
+  test("SPARK-35789: INVALID_SQL_SYNTAX - LATERAL can only be used with subquery") {
+    Seq("SELECT * FROM t1, LATERAL t2",
+      "SELECT * FROM t1 JOIN LATERAL t2",
+      "SELECT * FROM t1, LATERAL (t2 JOIN t3)",
+      "SELECT * FROM t1, LATERAL (LATERAL t2)",
+      "SELECT * FROM t1, LATERAL VALUES (0, 1)",
+      "SELECT * FROM t1, LATERAL RANGE(0, 1)").foreach { sqlText =>
+      validateParsingError(
+        sqlText = sqlText,
+        errorClass = "INVALID_SQL_SYNTAX",
+        sqlState = "42000",
+        message = "Invalid SQL syntax: LATERAL can only be used with subquery.")
+    }
+  }
+}

From b63a577b656b5fc56feef1666d1f4d1048b945fe Mon Sep 17 00:00:00 2001
From: Tengfei Huang <tengfei.h@gmail.com>
Date: Thu, 3 Feb 2022 16:16:21 +0300
Subject: [PATCH 148/513] [SPARK-37941][SQL] Use error classes in the
 compilation errors of casting

### What changes were proposed in this pull request?
Migrate the following errors in QueryCompilationErrors onto use error classes:
1. upCastFailureError => CANNOT_UP_CAST_DATATYPE
2. unsupportedAbstractDataTypeForUpCastError => UNSUPPORTED_FEATURE
3. cannotUpCastAsAttributeError => removed as no longer used.

### Why are the changes needed?
Porting casting errors to new error framework.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
New UT added.

Closes #35366 from ivoson/SPARK-37941-new.

Authored-by: Tengfei Huang <tengfei.h@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../main/resources/error/error-classes.json   |  3 +
 .../sql/errors/QueryCompilationErrors.scala   | 25 +++---
 .../errors/QueryCompilationErrorsSuite.scala  | 80 +++++++++++++++++++
 3 files changed, 96 insertions(+), 12 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 06ce22a780524..686c4b63488b1 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -15,6 +15,9 @@
     "message" : [ "Cannot parse decimal" ],
     "sqlState" : "42000"
   },
+  "CANNOT_UP_CAST_DATATYPE" : {
+    "message" : [ "Cannot up cast %s from %s to %s.\n%s" ]
+  },
   "CAST_CAUSES_OVERFLOW" : {
     "message" : [ "Casting %s to %s causes overflow. To return NULL instead, use 'try_cast'. If necessary set %s to false to bypass this error." ],
     "sqlState" : "22005"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 3250098e65063..331636618a502 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -158,16 +158,24 @@ object QueryCompilationErrors {
   def upCastFailureError(
       fromStr: String, from: Expression, to: DataType, walkedTypePath: Seq[String]): Throwable = {
     new AnalysisException(
-      s"Cannot up cast $fromStr from " +
-        s"${from.dataType.catalogString} to ${to.catalogString}.\n" +
+      errorClass = "CANNOT_UP_CAST_DATATYPE",
+      messageParameters = Array(
+        fromStr,
+        from.dataType.catalogString,
+        to.catalogString,
         s"The type path of the target object is:\n" + walkedTypePath.mkString("", "\n", "\n") +
-        "You can either add an explicit cast to the input data or choose a higher precision " +
-        "type of the field in the target object")
+          "You can either add an explicit cast to the input data or choose a higher precision " +
+          "type of the field in the target object"
+      )
+    )
   }
 
   def unsupportedAbstractDataTypeForUpCastError(gotType: AbstractDataType): Throwable = {
     new AnalysisException(
-      s"UpCast only support DecimalType as AbstractDataType yet, but got: $gotType")
+      errorClass = "UNSUPPORTED_FEATURE",
+      messageParameters =
+        Array(s"UpCast only support DecimalType as AbstractDataType yet, but got: $gotType")
+    )
   }
 
   def outerScopeFailureForNewInstanceError(className: String): Throwable = {
@@ -415,13 +423,6 @@ object QueryCompilationErrors {
         s"'${child.output.map(_.name).mkString("(", ",", ")")}'")
   }
 
-  def cannotUpCastAsAttributeError(
-      fromAttr: Attribute, toAttr: Attribute): Throwable = {
-    new AnalysisException(s"Cannot up cast ${fromAttr.sql} from " +
-      s"${fromAttr.dataType.catalogString} to ${toAttr.dataType.catalogString} " +
-      "as it may truncate")
-  }
-
   def functionUndefinedError(name: FunctionIdentifier): Throwable = {
     new AnalysisException(s"undefined function $name")
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
new file mode 100644
index 0000000000000..f41030bf3cbcc
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.errors
+
+import org.apache.spark.sql.{AnalysisException, Dataset, QueryTest}
+import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
+import org.apache.spark.sql.catalyst.expressions.{Alias, UpCast}
+import org.apache.spark.sql.catalyst.plans.logical.Project
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.NumericType
+
+case class StringLongClass(a: String, b: Long)
+
+case class StringIntClass(a: String, b: Int)
+
+case class ComplexClass(a: Long, b: StringLongClass)
+
+class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession {
+  import testImplicits._
+
+  test("CANNOT_UP_CAST_DATATYPE: invalid upcast data type") {
+    val msg1 = intercept[AnalysisException] {
+      sql("select 'value1' as a, 1L as b").as[StringIntClass]
+    }.message
+    assert(msg1 ==
+      s"""
+         |Cannot up cast b from bigint to int.
+         |The type path of the target object is:
+         |- field (class: "scala.Int", name: "b")
+         |- root class: "org.apache.spark.sql.errors.StringIntClass"
+         |You can either add an explicit cast to the input data or choose a higher precision type
+       """.stripMargin.trim + " of the field in the target object")
+
+    val msg2 = intercept[AnalysisException] {
+      sql("select 1L as a," +
+        " named_struct('a', 'value1', 'b', cast(1.0 as decimal(38,18))) as b")
+        .as[ComplexClass]
+    }.message
+    assert(msg2 ==
+      s"""
+         |Cannot up cast b.`b` from decimal(38,18) to bigint.
+         |The type path of the target object is:
+         |- field (class: "scala.Long", name: "b")
+         |- field (class: "org.apache.spark.sql.errors.StringLongClass", name: "b")
+         |- root class: "org.apache.spark.sql.errors.ComplexClass"
+         |You can either add an explicit cast to the input data or choose a higher precision type
+       """.stripMargin.trim + " of the field in the target object")
+  }
+
+  test("UNSUPPORTED_FEATURE: UpCast only support DecimalType as AbstractDataType") {
+    val df = sql("select 1 as value")
+
+    val msg = intercept[AnalysisException] {
+      val plan = Project(
+        Seq(Alias(UpCast(UnresolvedAttribute("value"), NumericType), "value")()),
+        df.logicalPlan)
+
+      Dataset.ofRows(spark, plan)
+    }.message
+    assert(msg.contains("The feature is not supported: " +
+      "UpCast only support DecimalType as AbstractDataType yet," +
+      " but got: org.apache.spark.sql.types.NumericType"))
+  }
+
+}

From 24723ecb7b78dc415caead9b08c4655ed6bab1ba Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Thu, 3 Feb 2022 08:56:07 -0800
Subject: [PATCH 149/513] [SPARK-38014][SQL][TESTS] Add Parquet Data Page V2
 write scenario for `BuiltInDataSourceWriteBenchmark`

### What changes were proposed in this pull request?
`BuiltInDataSourceWriteBenchmark` only test data page V1  for parquet now, this pr add parquet data page V2 write scenario and update relevant benchmark result files.

### Why are the changes needed?
Add micro benchmark scene for parquet

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35310 from LuciferYang/SPARK-38014.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 ...DataSourceWriteBenchmark-jdk11-results.txt | 60 +++++++++-------
 ...DataSourceWriteBenchmark-jdk17-results.txt | 68 +++++++++++--------
 ...uiltInDataSourceWriteBenchmark-results.txt | 60 +++++++++-------
 .../BuiltInDataSourceWriteBenchmark.scala     | 14 +++-
 .../benchmark/DataSourceWriteBenchmark.scala  |  9 ++-
 5 files changed, 129 insertions(+), 82 deletions(-)

diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk11-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk11-results.txt
index d1395ef07eb0d..8ed23d4ba5c31 100644
--- a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk11-results.txt
@@ -2,59 +2,69 @@
 Parquet writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1027-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Parquet writer benchmark:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+Parquet(PARQUET_1_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           2617           2756         197          6.0         166.4       1.0X
-Output Single Double Column                        2753           2782          41          5.7         175.0       1.0X
-Output Int and String Column                       7625           7664          54          2.1         484.8       0.3X
-Output Partitions                                  4964           5023          84          3.2         315.6       0.5X
-Output Buckets                                     6988           7051          88          2.3         444.3       0.4X
+Output Single Int Column                           2199           2291         130          7.2         139.8       1.0X
+Output Single Double Column                        2724           2753          40          5.8         173.2       0.8X
+Output Int and String Column                       6836           6998         229          2.3         434.6       0.3X
+Output Partitions                                  4936           4970          49          3.2         313.8       0.4X
+Output Buckets                                     6672           6708          50          2.4         424.2       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1027-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Parquet(PARQUET_2_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           2610           2622          17          6.0         166.0       1.0X
+Output Single Double Column                        2389           2425          51          6.6         151.9       1.1X
+Output Int and String Column                       7516           7540          35          2.1         477.9       0.3X
+Output Partitions                                  5190           5195           8          3.0         329.9       0.5X
+Output Buckets                                     6444           6446           1          2.4         409.7       0.4X
 
 
 ================================================================================================
 ORC writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1027-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 ORC writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1972           1988          23          8.0         125.4       1.0X
-Output Single Double Column                        2230           2312         116          7.1         141.8       0.9X
-Output Int and String Column                       5748           5858         156          2.7         365.4       0.3X
-Output Partitions                                  4083           4104          30          3.9         259.6       0.5X
-Output Buckets                                     6062           6083          29          2.6         385.4       0.3X
+Output Single Int Column                           1589           1624          49          9.9         101.0       1.0X
+Output Single Double Column                        2221           2243          32          7.1         141.2       0.7X
+Output Int and String Column                       5543           5640         138          2.8         352.4       0.3X
+Output Partitions                                  4135           4284         212          3.8         262.9       0.4X
+Output Buckets                                     6100           6234         190          2.6         387.8       0.3X
 
 
 ================================================================================================
 JSON writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1027-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 JSON writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           2444           2495          72          6.4         155.4       1.0X
-Output Single Double Column                        3384           3388           5          4.6         215.1       0.7X
-Output Int and String Column                       5762           5771          13          2.7         366.4       0.4X
-Output Partitions                                  4727           4777          70          3.3         300.6       0.5X
-Output Buckets                                     6420           6541         171          2.4         408.2       0.4X
+Output Single Int Column                           2475           2492          24          6.4         157.3       1.0X
+Output Single Double Column                        3524           3525           3          4.5         224.0       0.7X
+Output Int and String Column                       5480           5533          74          2.9         348.4       0.5X
+Output Partitions                                  4735           4748          19          3.3         301.0       0.5X
+Output Buckets                                     6251           6264          19          2.5         397.4       0.4X
 
 
 ================================================================================================
 CSV writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1027-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 CSV writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           3301           3325          34          4.8         209.8       1.0X
-Output Single Double Column                        3897           3923          37          4.0         247.8       0.8X
-Output Int and String Column                       6484           6487           4          2.4         412.3       0.5X
-Output Partitions                                  5896           5899           5          2.7         374.8       0.6X
-Output Buckets                                     7919           7927          12          2.0         503.5       0.4X
+Output Single Int Column                           3293           3301          11          4.8         209.4       1.0X
+Output Single Double Column                        4085           4095          14          3.9         259.7       0.8X
+Output Int and String Column                       6369           6375           8          2.5         404.9       0.5X
+Output Partitions                                  6067           6090          32          2.6         385.7       0.5X
+Output Buckets                                     7736           7863         180          2.0         491.8       0.4X
 
 
diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk17-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk17-results.txt
index b4e0345fa5644..5f64bf7b624cb 100644
--- a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk17-results.txt
@@ -2,59 +2,69 @@
 Parquet writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
-Parquet writer benchmark:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1027-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+Parquet(PARQUET_1_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           2948           2954           8          5.3         187.4       1.0X
-Output Single Double Column                        2978           3012          48          5.3         189.3       1.0X
-Output Int and String Column                       8568           8651         117          1.8         544.8       0.3X
-Output Partitions                                  5196           5273         110          3.0         330.3       0.6X
-Output Buckets                                     6761           6800          55          2.3         429.8       0.4X
+Output Single Int Column                           3119           3167          68          5.0         198.3       1.0X
+Output Single Double Column                        3156           3298         201          5.0         200.7       1.0X
+Output Int and String Column                       8070           8207         193          1.9         513.1       0.4X
+Output Partitions                                  5636           5887         355          2.8         358.3       0.6X
+Output Buckets                                     7523           7541          25          2.1         478.3       0.4X
+
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1027-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+Parquet(PARQUET_2_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           3678           3787         154          4.3         233.9       1.0X
+Output Single Double Column                        3201           3229          39          4.9         203.5       1.1X
+Output Int and String Column                       8322           8333          15          1.9         529.1       0.4X
+Output Partitions                                  6184           6202          26          2.5         393.1       0.6X
+Output Buckets                                     7341           7406          93          2.1         466.7       0.5X
 
 
 ================================================================================================
 ORC writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1027-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 ORC writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           2234           2244          14          7.0         142.1       1.0X
-Output Single Double Column                        2824           2876          73          5.6         179.6       0.8X
-Output Int and String Column                       7665           7753         124          2.1         487.3       0.3X
-Output Partitions                                  4985           5004          28          3.2         316.9       0.4X
-Output Buckets                                     6765           6814          69          2.3         430.1       0.3X
+Output Single Int Column                           2264           2301          53          6.9         143.9       1.0X
+Output Single Double Column                        2929           3092         230          5.4         186.2       0.8X
+Output Int and String Column                       7562           7713         212          2.1         480.8       0.3X
+Output Partitions                                  5265           5318          74          3.0         334.8       0.4X
+Output Buckets                                     7117           7160          61          2.2         452.5       0.3X
 
 
 ================================================================================================
 JSON writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1027-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 JSON writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           2783           2826          61          5.7         177.0       1.0X
-Output Single Double Column                        3983           4009          37          3.9         253.3       0.7X
-Output Int and String Column                       6656           6679          32          2.4         423.2       0.4X
-Output Partitions                                  5289           5305          22          3.0         336.3       0.5X
-Output Buckets                                     6584           6695         156          2.4         418.6       0.4X
+Output Single Int Column                           2881           2964         118          5.5         183.2       1.0X
+Output Single Double Column                        4568           4578          14          3.4         290.4       0.6X
+Output Int and String Column                       6943           7078         192          2.3         441.4       0.4X
+Output Partitions                                  5862           5883          30          2.7         372.7       0.5X
+Output Buckets                                     7176           7297         170          2.2         456.3       0.4X
 
 
 ================================================================================================
 CSV writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1027-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 CSV writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           4271           4338          95          3.7         271.5       1.0X
-Output Single Double Column                        5145           5207          87          3.1         327.1       0.8X
-Output Int and String Column                       7573           7682         154          2.1         481.5       0.6X
-Output Partitions                                  6644           6675          44          2.4         422.4       0.6X
-Output Buckets                                     8497           8539          59          1.9         540.2       0.5X
+Output Single Int Column                           4571           4577           8          3.4         290.6       1.0X
+Output Single Double Column                        5769           5794          34          2.7         366.8       0.8X
+Output Int and String Column                       8372           8414          59          1.9         532.3       0.5X
+Output Partitions                                  7186           7215          41          2.2         456.9       0.6X
+Output Buckets                                     9297           9319          31          1.7         591.1       0.5X
 
 
diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
index 442b0cce429dc..88b82991c2d16 100644
--- a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
+++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
@@ -2,59 +2,69 @@
 Parquet writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1027-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Parquet writer benchmark:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+Parquet(PARQUET_1_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1968           2146         251          8.0         125.1       1.0X
-Output Single Double Column                        1921           2073         215          8.2         122.1       1.0X
-Output Int and String Column                       5630           6171         766          2.8         357.9       0.3X
-Output Partitions                                  3699           3733          48          4.3         235.2       0.5X
-Output Buckets                                     4705           4746          59          3.3         299.1       0.4X
+Output Single Int Column                           2089           2185         135          7.5         132.8       1.0X
+Output Single Double Column                        2156           2212          80          7.3         137.1       1.0X
+Output Int and String Column                       5673           5705          46          2.8         360.7       0.4X
+Output Partitions                                  3917           4052         192          4.0         249.0       0.5X
+Output Buckets                                     4782           5108         461          3.3         304.0       0.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1027-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Parquet(PARQUET_2_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           2201           2208          10          7.1         139.9       1.0X
+Output Single Double Column                        2057           2066          13          7.6         130.8       1.1X
+Output Int and String Column                       5969           6011          60          2.6         379.5       0.4X
+Output Partitions                                  3777           3823          65          4.2         240.1       0.6X
+Output Buckets                                     4889           4895           8          3.2         310.8       0.5X
 
 
 ================================================================================================
 ORC writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1027-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 ORC writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1507           1546          54         10.4          95.8       1.0X
-Output Single Double Column                        1641           1650          12          9.6         104.4       0.9X
-Output Int and String Column                       5671           5738          95          2.8         360.6       0.3X
-Output Partitions                                  3068           3112          63          5.1         195.0       0.5X
-Output Buckets                                     4635           4894         366          3.4         294.7       0.3X
+Output Single Int Column                           1634           1645          16          9.6         103.9       1.0X
+Output Single Double Column                        1680           1691          15          9.4         106.8       1.0X
+Output Int and String Column                       5603           5611          11          2.8         356.3       0.3X
+Output Partitions                                  3091           3116          36          5.1         196.5       0.5X
+Output Buckets                                     4472           4734         372          3.5         284.3       0.4X
 
 
 ================================================================================================
 JSON writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1027-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 JSON writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           2206           2243          51          7.1         140.3       1.0X
-Output Single Double Column                        2868           2876          11          5.5         182.3       0.8X
-Output Int and String Column                       6017           6140         175          2.6         382.5       0.4X
-Output Partitions                                  3602           3602           0          4.4         229.0       0.6X
-Output Buckets                                     5308           5340          46          3.0         337.5       0.4X
+Output Single Int Column                           2359           2380          29          6.7         150.0       1.0X
+Output Single Double Column                        2971           2991          29          5.3         188.9       0.8X
+Output Int and String Column                       6070           6244         246          2.6         385.9       0.4X
+Output Partitions                                  3635           3686          73          4.3         231.1       0.6X
+Output Buckets                                     5066           5082          22          3.1         322.1       0.5X
 
 
 ================================================================================================
 CSV writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1027-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 CSV writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           3136           3137           2          5.0         199.4       1.0X
-Output Single Double Column                        3504           3505           2          4.5         222.8       0.9X
-Output Int and String Column                       7075           7473         562          2.2         449.8       0.4X
-Output Partitions                                  5067           5228         227          3.1         322.2       0.6X
-Output Buckets                                     6695           6718          33          2.3         425.7       0.5X
+Output Single Int Column                           3116           3117           2          5.0         198.1       1.0X
+Output Single Double Column                        3575           3695         170          4.4         227.3       0.9X
+Output Int and String Column                       7040           7482         626          2.2         447.6       0.4X
+Output Partitions                                  4819           4995         249          3.3         306.4       0.6X
+Output Buckets                                     6638           6656          25          2.4         422.0       0.5X
 
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala
index 361deb0d3e3b6..45d50b5e11a90 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala
@@ -16,6 +16,9 @@
  */
 package org.apache.spark.sql.execution.benchmark
 
+import org.apache.parquet.column.ParquetProperties
+import org.apache.parquet.hadoop.ParquetOutputFormat
+
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -53,7 +56,16 @@ object BuiltInDataSourceWriteBenchmark extends DataSourceWriteBenchmark {
 
     formats.foreach { format =>
       runBenchmark(s"$format writer benchmark") {
-        runDataSourceBenchmark(format)
+        if (format.equals("Parquet")) {
+          ParquetProperties.WriterVersion.values().foreach {
+            writeVersion =>
+              withSQLConf(ParquetOutputFormat.WRITER_VERSION -> writeVersion.toString) {
+                runDataSourceBenchmark("Parquet", Some(writeVersion.toString))
+              }
+          }
+        } else {
+          runDataSourceBenchmark(format)
+        }
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceWriteBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceWriteBenchmark.scala
index 405d60794ede0..77e26048e0425 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceWriteBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceWriteBenchmark.scala
@@ -66,7 +66,7 @@ trait DataSourceWriteBenchmark extends SqlBasedBenchmark {
     }
   }
 
-  def runDataSourceBenchmark(format: String): Unit = {
+  def runDataSourceBenchmark(format: String, extraInfo: Option[String] = None): Unit = {
     val tableInt = "tableInt"
     val tableDouble = "tableDouble"
     val tableIntString = "tableIntString"
@@ -75,7 +75,12 @@ trait DataSourceWriteBenchmark extends SqlBasedBenchmark {
     withTempTable(tempTable) {
       spark.range(numRows).createOrReplaceTempView(tempTable)
       withTable(tableInt, tableDouble, tableIntString, tablePartition, tableBucket) {
-        val benchmark = new Benchmark(s"$format writer benchmark", numRows, output = output)
+        val writerName = extraInfo match {
+          case Some(extra) => s"$format($extra)"
+          case _ => format
+        }
+        val benchmark =
+          new Benchmark(s"$writerName writer benchmark", numRows, output = output)
         writeNumeric(tableInt, format, benchmark, "Int")
         writeNumeric(tableDouble, format, benchmark, "Double")
         writeIntString(tableIntString, format, benchmark)

From 4fcfcc89beb814dbcc784d320bca89fcda51cafd Mon Sep 17 00:00:00 2001
From: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
Date: Thu, 3 Feb 2022 09:00:50 -0800
Subject: [PATCH 150/513] [SPARK-38096][BUILD] Update sbt to 1.6.2

### What changes were proposed in this pull request?

This PR aims to upgrade SBT to 1.6.2.

### Why are the changes needed?

Sbt 1.6.2 was released with minor improvements - https://eed3si9n.com/sbt-1.6.2

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

GA and AppVeyor.

Closes #35388 from martin-g/sbt-1.6.2.

Authored-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/appveyor-install-dependencies.ps1 | 2 +-
 project/build.properties              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/appveyor-install-dependencies.ps1 b/dev/appveyor-install-dependencies.ps1
index dd9acef2451ee..d469c98fdb3a2 100644
--- a/dev/appveyor-install-dependencies.ps1
+++ b/dev/appveyor-install-dependencies.ps1
@@ -97,7 +97,7 @@ if (!(Test-Path $tools)) {
 # ========================== SBT
 Push-Location $tools
 
-$sbtVer = "1.6.1"
+$sbtVer = "1.6.2"
 Start-FileDownload "https://github.com/sbt/sbt/releases/download/v$sbtVer/sbt-$sbtVer.zip" "sbt.zip"
 
 # extract
diff --git a/project/build.properties b/project/build.properties
index d434f8eead721..8599f07ab2b6f 100644
--- a/project/build.properties
+++ b/project/build.properties
@@ -15,4 +15,4 @@
 # limitations under the License.
 #
 # Please update the version in appveyor-install-dependencies.ps1 together.
-sbt.version=1.6.1
+sbt.version=1.6.2

From 7a613ecb826d3009f4587cefeb89f31b1cb4bed2 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Thu, 3 Feb 2022 13:38:10 -0800
Subject: [PATCH 151/513] [SPARK-38100][SQL] Remove unused private method in
 `Decimal`

### What changes were proposed in this pull request?
There is an unused `private` method `overflowException` in `org.apache.spark.sql.types.Decimal`, this method add by SPARK-28741 and  the relevant invocations are replaced by `QueryExecutionErrors.castingCauseOverflowError` directly after SPARK-35060. So this pr remove this unused method.

### Why are the changes needed?
Remove unused method.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35392 from LuciferYang/SPARK-38100.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: huaxingao <huaxin_gao@apple.com>
---
 .../src/main/scala/org/apache/spark/sql/types/Decimal.scala    | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index cb468c523f36c..4681429723183 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -251,9 +251,6 @@ final class Decimal extends Ordered[Decimal] with Serializable {
 
   def toByte: Byte = toLong.toByte
 
-  private def overflowException(dataType: String) =
-    throw QueryExecutionErrors.castingCauseOverflowError(this, dataType)
-
   /**
    * @return the Byte value that is equal to the rounded decimal.
    * @throws ArithmeticException if the decimal is too big to fit in Byte type.

From 54b11fab2fedfe5382d41737f59fff672d2d39cd Mon Sep 17 00:00:00 2001
From: Ted Yu <yuzhihong@gmail.com>
Date: Fri, 4 Feb 2022 09:47:44 -0600
Subject: [PATCH 152/513] [MINOR] Remove unnecessary null check for exception
 cause

### What changes were proposed in this pull request?
In two classes under common/network-shuffle, we check whether t.getCause() is null before instanceof check.
The null check is not needed since null pointer wouldn't pass instanceof check.

### Why are the changes needed?
This PR simplifies the code by dropping unnecessary null check.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing test suite.

Closes #35394 from tedyu/shuf-null.

Authored-by: Ted Yu <yuzhihong@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../java/org/apache/spark/network/shuffle/ErrorHandler.java   | 4 ++--
 .../apache/spark/network/shuffle/RetryingBlockTransferor.java | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java
index 9136ff6af4e7e..519b02d12421a 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java
@@ -82,8 +82,8 @@ public boolean shouldRetryError(Throwable t) {
       // If it is a FileNotFoundException originating from the client while pushing the shuffle
       // blocks to the server, even then there is no need to retry. We will still log this
       // exception once which helps with debugging.
-      if (t.getCause() != null && (t.getCause() instanceof ConnectException ||
-          t.getCause() instanceof FileNotFoundException)) {
+      if (t.getCause() instanceof ConnectException ||
+          t.getCause() instanceof FileNotFoundException) {
         return false;
       }
 
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java
index 512e4a52c8628..463edc770d28e 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java
@@ -191,7 +191,7 @@ private synchronized void initiateRetry() {
    */
   private synchronized boolean shouldRetry(Throwable e) {
     boolean isIOException = e instanceof IOException
-      || (e.getCause() != null && e.getCause() instanceof IOException);
+      || e.getCause() instanceof IOException;
     boolean hasRemainingRetries = retryCount < maxRetries;
     return isIOException && hasRemainingRetries && errorHandler.shouldRetryError(e);
   }

From 973ea0f06e72ab64574cbf00e095922a3415f864 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 4 Feb 2022 18:58:16 -0800
Subject: [PATCH 153/513] [SPARK-36837][BUILD] Upgrade Kafka to 3.1.0

### What changes were proposed in this pull request?

This PR aims to upgrade Apache Kafka client library from 2.8.1 to 3.1.0 to support Java 17 officially.
- https://issues.apache.org/jira/browse/KAFKA-13273 (Add support for Java 17)

### Why are the changes needed?

Apache Kafka 3.1.0 has the following improvements and bug fixes including client side.
- https://downloads.apache.org/kafka/3.1.0/RELEASE_NOTES.html
- https://downloads.apache.org/kafka/3.0.0/RELEASE_NOTES.html

The following is the notable accumulated breaking changes at Apache Kafka 3.0+
- KAFKA-12554: Refactor Log layer
  - KIP-405: Log layer refactor https://docs.google.com/document/d/1dQJL4MCwqQJSPmZkVmVzshFZKuFy_bCPtubav4wBfHQ/edit#
- KAFKA-12945: Remove port, host.name and related configs in 3.0

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

Closes #34089 from dongjoon-hyun/SPARK-36837.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/kafka010/KafkaTestUtils.scala   |  7 +++----
 .../streaming/kafka010/KafkaRDDSuite.scala    | 20 +++++++++++++------
 .../streaming/kafka010/KafkaTestUtils.scala   |  3 ++-
 pom.xml                                       |  2 +-
 4 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
index 058563dfa167d..c5d2a99d156f8 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -44,6 +44,7 @@ import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.security.auth.SecurityProtocol.{PLAINTEXT, SASL_PLAINTEXT}
 import org.apache.kafka.common.serialization.StringSerializer
 import org.apache.kafka.common.utils.SystemTime
+import org.apache.zookeeper.client.ZKClientConfig
 import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer}
 import org.apache.zookeeper.server.auth.SASLAuthenticationProvider
 import org.scalatest.Assertions._
@@ -266,7 +267,7 @@ class KafkaTestUtils(
     // Get the actual zookeeper binding port
     zkPort = zookeeper.actualPort
     zkClient = KafkaZkClient(s"$zkHost:$zkPort", isSecure = false, zkSessionTimeout,
-      zkConnectionTimeout, 1, new SystemTime())
+      zkConnectionTimeout, 1, new SystemTime(), "test", new ZKClientConfig)
     zkReady = true
   }
 
@@ -488,9 +489,7 @@ class KafkaTestUtils(
   protected def brokerConfiguration: Properties = {
     val props = new Properties()
     props.put("broker.id", "0")
-    props.put("host.name", "127.0.0.1")
-    props.put("advertised.host.name", "127.0.0.1")
-    props.put("port", brokerPort.toString)
+    props.put("listeners", s"PLAINTEXT://127.0.0.1:$brokerPort")
     props.put("log.dir", Utils.createTempDir().getAbsolutePath)
     props.put("zookeeper.connect", zkAddress)
     props.put("zookeeper.connection.timeout.ms", "60000")
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
index b9ef16fb58cb9..9c57663b3d8ef 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
@@ -21,15 +21,17 @@ import java.{ util => ju }
 import java.io.File
 
 import scala.collection.JavaConverters._
+import scala.concurrent.duration._
 import scala.util.Random
 
-import kafka.log.{CleanerConfig, Log, LogCleaner, LogConfig, ProducerStateManager}
+import kafka.log.{CleanerConfig, LogCleaner, LogConfig, UnifiedLog}
 import kafka.server.{BrokerTopicStats, LogDirFailureChannel}
 import kafka.utils.Pool
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.record.{CompressionType, MemoryRecords, SimpleRecord}
 import org.apache.kafka.common.serialization.StringDeserializer
 import org.scalatest.BeforeAndAfterAll
+import org.scalatest.concurrent.Eventually.{eventually, interval, timeout}
 
 import org.apache.spark._
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
@@ -84,7 +86,7 @@ class KafkaRDDSuite extends SparkFunSuite with BeforeAndAfterAll {
   private def compactLogs(topic: String, partition: Int,
       messages: Array[(String, String)]): Unit = {
     val mockTime = new MockTime()
-    val logs = new Pool[TopicPartition, Log]()
+    val logs = new Pool[TopicPartition, UnifiedLog]()
     val logDir = kafkaTestUtils.brokerLogDir
     val dir = new File(logDir, topic + "-" + partition)
     dir.mkdirs()
@@ -93,7 +95,7 @@ class KafkaRDDSuite extends SparkFunSuite with BeforeAndAfterAll {
     logProps.put(LogConfig.MinCleanableDirtyRatioProp, java.lang.Float.valueOf(0.1f))
     val logDirFailureChannel = new LogDirFailureChannel(1)
     val topicPartition = new TopicPartition(topic, partition)
-    val log = new Log(
+    val log = UnifiedLog(
       dir,
       LogConfig(logProps),
       0L,
@@ -103,9 +105,10 @@ class KafkaRDDSuite extends SparkFunSuite with BeforeAndAfterAll {
       mockTime,
       Int.MaxValue,
       Int.MaxValue,
-      topicPartition,
-      new ProducerStateManager(topicPartition, dir),
-      logDirFailureChannel
+      logDirFailureChannel,
+      lastShutdownClean = false,
+      topicId = None,
+      keepPartitionMetadataFile = false
     )
     messages.foreach { case (k, v) =>
       val record = new SimpleRecord(k.getBytes, v.getBytes)
@@ -201,6 +204,11 @@ class KafkaRDDSuite extends SparkFunSuite with BeforeAndAfterAll {
       sc, kafkaParams, offsetRanges, preferredHosts
     ).map(m => m.key -> m.value)
 
+    // To make it sure that the compaction happens
+    eventually(timeout(20.second), interval(1.seconds)) {
+      val dir = new File(kafkaTestUtils.brokerLogDir, topic + "-0")
+      assert(dir.listFiles().exists(_.getName.endsWith(".deleted")))
+    }
     val received = rdd.collect.toSet
     assert(received === compactedMessages.toSet)
 
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
index 0783e591def51..dd8d66f1fc08f 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
@@ -35,6 +35,7 @@ import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.serialization.StringSerializer
 import org.apache.kafka.common.utils.{Time => KTime}
+import org.apache.zookeeper.client.ZKClientConfig
 import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer}
 
 import org.apache.spark.{SparkConf, SparkException}
@@ -106,7 +107,7 @@ private[kafka010] class KafkaTestUtils extends Logging {
     // Get the actual zookeeper binding port
     zkPort = zookeeper.actualPort
     zkClient = KafkaZkClient(s"$zkHost:$zkPort", isSecure = false, zkSessionTimeout,
-      zkConnectionTimeout, 1, KTime.SYSTEM)
+      zkConnectionTimeout, 1, KTime.SYSTEM, "test", new ZKClientConfig)
     admClient = new AdminZkClient(zkClient)
     zkReady = true
   }
diff --git a/pom.xml b/pom.xml
index 09577f220de5c..f8f13fc77f65a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -133,7 +133,7 @@
     <!-- Version used for internal directory structure -->
     <hive.version.short>2.3</hive.version.short>
     <!-- note that this should be compatible with Kafka brokers version 0.10 and up -->
-    <kafka.version>2.8.1</kafka.version>
+    <kafka.version>3.1.0</kafka.version>
     <!-- After 10.15.1.3, the minimum required version is JDK9 -->
     <derby.version>10.14.2.0</derby.version>
     <parquet.version>1.12.2</parquet.version>

From 49f215a5ae64a50e889ae5cf94421cdeb0eacf09 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Fri, 4 Feb 2022 20:05:35 -0800
Subject: [PATCH 154/513] [SPARK-38082][PYTHON] Update minimum numpy version to
 1.15

### What changes were proposed in this pull request?

This PR changes minimum required numpy version to 1.15.

Additionally, it replaces calls to deprecated `tostring` method.

### Why are the changes needed?

Current lower bound is ancient and no longer supported by the rest our dependencies.

Additionally, supporting it, requires usage of long deprecated methods creating unnecessary gaps in our type checker coverage.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #35398 from zero323/SPARK-38082.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 python/pyspark/ml/linalg/__init__.py    | 12 ++++++------
 python/pyspark/mllib/linalg/__init__.py | 14 +++++++-------
 python/setup.py                         |  6 +++---
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/python/pyspark/ml/linalg/__init__.py b/python/pyspark/ml/linalg/__init__.py
index b361925712818..03e63e9690316 100644
--- a/python/pyspark/ml/linalg/__init__.py
+++ b/python/pyspark/ml/linalg/__init__.py
@@ -303,7 +303,7 @@ def __init__(self, ar):
         self.array = ar
 
     def __reduce__(self):
-        return DenseVector, (self.array.tostring(),)
+        return DenseVector, (self.array.tobytes(),)
 
     def numNonzeros(self):
         """
@@ -591,7 +591,7 @@ def norm(self, p):
         return np.linalg.norm(self.values, p)
 
     def __reduce__(self):
-        return (SparseVector, (self.size, self.indices.tostring(), self.values.tostring()))
+        return (SparseVector, (self.size, self.indices.tobytes(), self.values.tobytes()))
 
     def dot(self, other):
         """
@@ -949,7 +949,7 @@ def __reduce__(self):
         return DenseMatrix, (
             self.numRows,
             self.numCols,
-            self.values.tostring(),
+            self.values.tobytes(),
             int(self.isTransposed),
         )
 
@@ -1160,9 +1160,9 @@ def __reduce__(self):
         return SparseMatrix, (
             self.numRows,
             self.numCols,
-            self.colPtrs.tostring(),
-            self.rowIndices.tostring(),
-            self.values.tostring(),
+            self.colPtrs.tobytes(),
+            self.rowIndices.tobytes(),
+            self.values.tobytes(),
             int(self.isTransposed),
         )
 
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index 30fa84cf8a0f4..b9c391ebf82e2 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -390,7 +390,7 @@ def parse(s: str) -> "DenseVector":
         return DenseVector(values)
 
     def __reduce__(self) -> Tuple[Type["DenseVector"], Tuple[bytes]]:
-        return DenseVector, (self.array.tostring(),)  # type: ignore[attr-defined]
+        return DenseVector, (self.array.tobytes(),)
 
     def numNonzeros(self) -> int:
         """
@@ -712,8 +712,8 @@ def __reduce__(self) -> Tuple[Type["SparseVector"], Tuple[int, bytes, bytes]]:
             SparseVector,
             (
                 self.size,
-                self.indices.tostring(),  # type: ignore[attr-defined]
-                self.values.tostring(),  # type: ignore[attr-defined]
+                self.indices.tobytes(),
+                self.values.tobytes(),
             ),
         )
 
@@ -1256,7 +1256,7 @@ def __reduce__(self) -> Tuple[Type["DenseMatrix"], Tuple[int, int, bytes, int]]:
         return DenseMatrix, (
             self.numRows,
             self.numCols,
-            self.values.tostring(),  # type: ignore[attr-defined]
+            self.values.tobytes(),
             int(self.isTransposed),
         )
 
@@ -1489,9 +1489,9 @@ def __reduce__(self) -> Tuple[Type["SparseMatrix"], Tuple[int, int, bytes, bytes
         return SparseMatrix, (
             self.numRows,
             self.numCols,
-            self.colPtrs.tostring(),  # type: ignore[attr-defined]
-            self.rowIndices.tostring(),  # type: ignore[attr-defined]
-            self.values.tostring(),  # type: ignore[attr-defined]
+            self.colPtrs.tobytes(),
+            self.rowIndices.tobytes(),
+            self.values.tobytes(),
             int(self.isTransposed),
         )
 
diff --git a/python/setup.py b/python/setup.py
index 4ff495c19d4dc..673b146cb6c5d 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -260,8 +260,8 @@ def run(self):
         # if you're updating the versions or dependencies.
         install_requires=['py4j==0.10.9.3'],
         extras_require={
-            'ml': ['numpy>=1.7'],
-            'mllib': ['numpy>=1.7'],
+            'ml': ['numpy>=1.15'],
+            'mllib': ['numpy>=1.15'],
             'sql': [
                 'pandas>=%s' % _minimum_pandas_version,
                 'pyarrow>=%s' % _minimum_pyarrow_version,
@@ -269,7 +269,7 @@ def run(self):
             'pandas_on_spark': [
                 'pandas>=%s' % _minimum_pandas_version,
                 'pyarrow>=%s' % _minimum_pyarrow_version,
-                'numpy>=1.14',
+                'numpy>=1.15',
             ],
         },
         python_requires='>=3.7',

From 3e0d4899dcb3be226a120cbeec8df78ff7fb00ba Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Fri, 4 Feb 2022 20:21:02 -0800
Subject: [PATCH 155/513] [SPARK-38073][PYTHON] Update atexit function to avoid
 issues with late binding

### What changes were proposed in this pull request?

This PR updates function registered in PySpark shell `atexit` to capture `SparkContext` instead of depending on the surrounding context.

**Note**

A simpler approach

```python
atexit.register(sc.stop)
```

is possible, but won't work properly in case of contexts with monkey patched `stop` methods (for example like [pyspark-asyncactions](https://github.com/zero323/pyspark-asyncactions))

I also consider using `_active_spark_context`

```python
atexit.register(lambda: (
    SparkContext._active_spark_context.stop()
    if SparkContext._active_spark_context
    else None
))
```

but `SparkContext` is also out of scope, so that doesn't work without introducing a standard function within the scope.

### Why are the changes needed?

When using `ipython` as a driver with Python 3.8, `sc` goes out of scope before `atexit` function is called. This leads to `NameError` on exit. This is a mild annoyance and likely a bug in ipython (there are quite a few of these with similar behavior), but it is easy to address on our side, without causing regressions for users of earlier Python versions.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual testing to confirm that:

- Named error is no longer thrown on exit with ipython and Python 3.8 or later.
- `stop` is indeed invoked on exit with both plain interpreter and ipython shells.

Closes #35396 from zero323/SPARK-38073.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 python/pyspark/shell.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py
index f0c487877a086..4164e3ab0ce89 100644
--- a/python/pyspark/shell.py
+++ b/python/pyspark/shell.py
@@ -46,7 +46,7 @@
 
 sc = spark.sparkContext
 sql = spark.sql
-atexit.register(lambda: sc.stop())
+atexit.register((lambda sc: lambda: sc.stop())(sc))
 
 # for compatibility
 sqlContext = spark._wrapped

From 74ebef243c18e7a8f32bf90ea75ab6afed9e3132 Mon Sep 17 00:00:00 2001
From: Cheng Pan <chengpan@apache.org>
Date: Sat, 5 Feb 2022 09:47:15 -0600
Subject: [PATCH 156/513] [SPARK-37925][DOC] Update document to mention the
 workaround for YARN-11053

### What changes were proposed in this pull request?

Update document "Running multiple versions of the Spark Shuffle Service" to mention the workaround for YARN-11053

### Why are the changes needed?

User may stuck when they following the current document to deploy multi-versions Spark Shuffle Service on YARN because of [YARN-11053](https://issues.apache.org/jira/browse/YARN-11053)

### Does this PR introduce _any_ user-facing change?

User document changes.

### How was this patch tested?

![image](https://user-images.githubusercontent.com/26535726/152097304-b6945ab7-fbf9-493a-954b-689a0e165936.png)

Closes #35223 from pan3793/SPARK-37925.

Authored-by: Cheng Pan <chengpan@apache.org>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 docs/running-on-yarn.md | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index c55ce86531daf..63c03760b8beb 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -916,9 +916,12 @@ support the ability to run shuffle services within an isolated classloader
 can coexist within a single NodeManager. The
 `yarn.nodemanager.aux-services.<service-name>.classpath` and, starting from YARN 2.10.2/3.1.1/3.2.0,
 `yarn.nodemanager.aux-services.<service-name>.remote-classpath` options can be used to configure
-this. In addition to setting up separate classpaths, it's necessary to ensure the two versions
-advertise to different ports. This can be achieved using the `spark-shuffle-site.xml` file described
-above. For example, you may have configuration like:
+this. Note that YARN 3.3.0/3.3.1 have an issue which requires setting
+`yarn.nodemanager.aux-services.<service-name>.system-classes` as a workaround. See
+[YARN-11053](https://issues.apache.org/jira/browse/YARN-11053) for details. In addition to setting
+up separate classpaths, it's necessary to ensure the two versions advertise to different ports.
+This can be achieved using the `spark-shuffle-site.xml` file described above. For example, you may
+have configuration like:
 
 ```properties
   yarn.nodemanager.aux-services = spark_shuffle_x,spark_shuffle_y

From c17ba3f9e9802ae5491cd914a4262cfe4e8e6d20 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Sun, 6 Feb 2022 10:59:21 +0100
Subject: [PATCH 157/513] [SPARK-37417][PYTHON][ML] Inline type hints for
 pyspark.ml.linalg.__init__.py

### What changes were proposed in this pull request?

Migration of type type annotations for `pyspark.ml.linalg.__init__.py` from stub file to inline hints.

### Why are the changes needed?

As part of ongoing type hint migrations.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #35380 from zero323/SPARK-37417.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/ml/_typing.pyi           |   7 +-
 python/pyspark/ml/linalg/__init__.py    | 370 ++++++++++++++++--------
 python/pyspark/ml/linalg/__init__.pyi   | 243 ----------------
 python/pyspark/mllib/linalg/__init__.py |  14 +-
 4 files changed, 267 insertions(+), 367 deletions(-)
 delete mode 100644 python/pyspark/ml/linalg/__init__.pyi

diff --git a/python/pyspark/ml/_typing.pyi b/python/pyspark/ml/_typing.pyi
index b51aa9634fe77..7862078bd2621 100644
--- a/python/pyspark/ml/_typing.pyi
+++ b/python/pyspark/ml/_typing.pyi
@@ -16,12 +16,15 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, Dict, TypeVar, Union
+from typing import Any, Dict, List, TypeVar, Tuple, Union
 from typing_extensions import Literal
 
+from numpy import ndarray
+
 import pyspark.ml.base
 import pyspark.ml.param
 import pyspark.ml.util
+from pyspark.ml.linalg import Vector
 import pyspark.ml.wrapper
 from py4j.java_gateway import JavaObject
 
@@ -75,3 +78,5 @@ RankingEvaluatorMetricType = Union[
     Literal["ndcgAtK"],
     Literal["recallAtK"],
 ]
+
+VectorLike = Union[ndarray, Vector, List[float], Tuple[float, ...]]
diff --git a/python/pyspark/ml/linalg/__init__.py b/python/pyspark/ml/linalg/__init__.py
index 03e63e9690316..d3d2cbdaa0a01 100644
--- a/python/pyspark/ml/linalg/__init__.py
+++ b/python/pyspark/ml/linalg/__init__.py
@@ -40,6 +40,22 @@
     BooleanType,
 )
 
+from typing import (
+    Any,
+    Callable,
+    cast,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    overload,
+    Sequence,
+    Tuple,
+    Type,
+    TYPE_CHECKING,
+    Union,
+)
+
 
 __all__ = [
     "Vector",
@@ -52,6 +68,11 @@
     "Matrices",
 ]
 
+if TYPE_CHECKING:
+    from pyspark.mllib._typing import NormType
+    from pyspark.ml._typing import VectorLike
+    from scipy.sparse import spmatrix
+
 
 # Check whether we have SciPy. MLlib works without it too, but if we have it, some methods,
 # such as _dot and _serialize_double_vector, start to support scipy.sparse matrices.
@@ -65,23 +86,23 @@
     _have_scipy = False
 
 
-def _convert_to_vector(d):
+def _convert_to_vector(d: Union["VectorLike", "spmatrix", range]) -> "Vector":
     if isinstance(d, Vector):
         return d
     elif type(d) in (array.array, np.array, np.ndarray, list, tuple, range):
         return DenseVector(d)
     elif _have_scipy and scipy.sparse.issparse(d):
-        assert d.shape[1] == 1, "Expected column vector"
+        assert cast("spmatrix", d).shape[1] == 1, "Expected column vector"
         # Make sure the converted csc_matrix has sorted indices.
-        csc = d.tocsc()
+        csc = cast("spmatrix", d).tocsc()
         if not csc.has_sorted_indices:
             csc.sort_indices()
-        return SparseVector(d.shape[0], csc.indices, csc.data)
+        return SparseVector(cast("spmatrix", d).shape[0], csc.indices, csc.data)
     else:
         raise TypeError("Cannot convert type %s into Vector" % type(d))
 
 
-def _vector_size(v):
+def _vector_size(v: Union["VectorLike", "spmatrix", range]) -> int:
     """
     Returns the size of the vector.
 
@@ -112,24 +133,24 @@ def _vector_size(v):
         else:
             raise ValueError("Cannot treat an ndarray of shape %s as a vector" % str(v.shape))
     elif _have_scipy and scipy.sparse.issparse(v):
-        assert v.shape[1] == 1, "Expected column vector"
-        return v.shape[0]
+        assert cast("spmatrix", v).shape[1] == 1, "Expected column vector"
+        return cast("spmatrix", v).shape[0]
     else:
         raise TypeError("Cannot treat type %s as a vector" % type(v))
 
 
-def _format_float(f, digits=4):
+def _format_float(f: float, digits: int = 4) -> str:
     s = str(round(f, digits))
     if "." in s:
         s = s[: s.index(".") + 1 + digits]
     return s
 
 
-def _format_float_list(xs):
+def _format_float_list(xs: Iterable[float]) -> List[str]:
     return [_format_float(x) for x in xs]
 
 
-def _double_to_long_bits(value):
+def _double_to_long_bits(value: float) -> int:
     if np.isnan(value):
         value = float("nan")
     # pack double into 64 bits, then unpack as long int
@@ -142,7 +163,7 @@ class VectorUDT(UserDefinedType):
     """
 
     @classmethod
-    def sqlType(cls):
+    def sqlType(cls) -> StructType:
         return StructType(
             [
                 StructField("type", ByteType(), False),
@@ -153,37 +174,41 @@ def sqlType(cls):
         )
 
     @classmethod
-    def module(cls):
+    def module(cls) -> str:
         return "pyspark.ml.linalg"
 
     @classmethod
-    def scalaUDT(cls):
+    def scalaUDT(cls) -> str:
         return "org.apache.spark.ml.linalg.VectorUDT"
 
-    def serialize(self, obj):
+    def serialize(
+        self, obj: "Vector"
+    ) -> Tuple[int, Optional[int], Optional[List[int]], List[float]]:
         if isinstance(obj, SparseVector):
             indices = [int(i) for i in obj.indices]
             values = [float(v) for v in obj.values]
             return (0, obj.size, indices, values)
         elif isinstance(obj, DenseVector):
-            values = [float(v) for v in obj]
+            values = [float(v) for v in obj]  # type: ignore[attr-defined]
             return (1, None, None, values)
         else:
             raise TypeError("cannot serialize %r of type %r" % (obj, type(obj)))
 
-    def deserialize(self, datum):
+    def deserialize(
+        self, datum: Tuple[int, Optional[int], Optional[List[int]], List[float]]
+    ) -> "Vector":
         assert (
             len(datum) == 4
         ), "VectorUDT.deserialize given row with length %d but requires 4" % len(datum)
         tpe = datum[0]
         if tpe == 0:
-            return SparseVector(datum[1], datum[2], datum[3])
+            return SparseVector(cast(int, datum[1]), cast(List[int], datum[2]), datum[3])
         elif tpe == 1:
             return DenseVector(datum[3])
         else:
             raise ValueError("do not recognize type %r" % tpe)
 
-    def simpleString(self):
+    def simpleString(self) -> str:
         return "vector"
 
 
@@ -193,7 +218,7 @@ class MatrixUDT(UserDefinedType):
     """
 
     @classmethod
-    def sqlType(cls):
+    def sqlType(cls) -> StructType:
         return StructType(
             [
                 StructField("type", ByteType(), False),
@@ -207,14 +232,16 @@ def sqlType(cls):
         )
 
     @classmethod
-    def module(cls):
+    def module(cls) -> str:
         return "pyspark.ml.linalg"
 
     @classmethod
-    def scalaUDT(cls):
+    def scalaUDT(cls) -> str:
         return "org.apache.spark.ml.linalg.MatrixUDT"
 
-    def serialize(self, obj):
+    def serialize(
+        self, obj: "Matrix"
+    ) -> Tuple[int, int, int, Optional[List[int]], Optional[List[int]], List[float], bool]:
         if isinstance(obj, SparseMatrix):
             colPtrs = [int(i) for i in obj.colPtrs]
             rowIndices = [int(i) for i in obj.rowIndices]
@@ -234,19 +261,22 @@ def serialize(self, obj):
         else:
             raise TypeError("cannot serialize type %r" % (type(obj)))
 
-    def deserialize(self, datum):
+    def deserialize(
+        self,
+        datum: Tuple[int, int, int, Optional[List[int]], Optional[List[int]], List[float], bool],
+    ) -> "Matrix":
         assert (
             len(datum) == 7
         ), "MatrixUDT.deserialize given row with length %d but requires 7" % len(datum)
         tpe = datum[0]
         if tpe == 0:
-            return SparseMatrix(*datum[1:])
+            return SparseMatrix(*datum[1:])  # type: ignore[arg-type]
         elif tpe == 1:
             return DenseMatrix(datum[1], datum[2], datum[5], datum[6])
         else:
             raise ValueError("do not recognize type %r" % tpe)
 
-    def simpleString(self):
+    def simpleString(self) -> str:
         return "matrix"
 
 
@@ -258,7 +288,7 @@ class Vector:
     Abstract class for DenseVector and SparseVector
     """
 
-    def toArray(self):
+    def toArray(self) -> np.ndarray:
         """
         Convert the vector into an numpy.ndarray
 
@@ -266,6 +296,9 @@ def toArray(self):
         """
         raise NotImplementedError
 
+    def __len__(self) -> int:
+        raise NotImplementedError
+
 
 class DenseVector(Vector):
     """
@@ -293,25 +326,26 @@ class DenseVector(Vector):
     DenseVector([-1.0, -2.0])
     """
 
-    def __init__(self, ar):
+    def __init__(self, ar: Union[bytes, np.ndarray, Iterable[float]]):
+        ar_: np.ndarray
         if isinstance(ar, bytes):
-            ar = np.frombuffer(ar, dtype=np.float64)
+            ar_ = np.frombuffer(ar, dtype=np.float64)
         elif not isinstance(ar, np.ndarray):
-            ar = np.array(ar, dtype=np.float64)
-        if ar.dtype != np.float64:
-            ar = ar.astype(np.float64)
-        self.array = ar
+            ar_ = np.array(ar, dtype=np.float64)
+        else:
+            ar_ = ar.astype(np.float64) if ar.dtype != np.float64 else ar
+        self.array = ar_
 
-    def __reduce__(self):
+    def __reduce__(self) -> Tuple[Type["DenseVector"], Tuple[bytes]]:
         return DenseVector, (self.array.tobytes(),)
 
-    def numNonzeros(self):
+    def numNonzeros(self) -> int:
         """
         Number of nonzero elements. This scans all active values and count non zeros
         """
         return np.count_nonzero(self.array)
 
-    def norm(self, p):
+    def norm(self, p: "NormType") -> np.float64:
         """
         Calculates the norm of a DenseVector.
 
@@ -325,7 +359,7 @@ def norm(self, p):
         """
         return np.linalg.norm(self.array, p)
 
-    def dot(self, other):
+    def dot(self, other: Iterable[float]) -> np.float64:
         """
         Compute the dot product of two Vectors. We support
         (Numpy array, list, SparseVector, or SciPy sparse)
@@ -359,8 +393,8 @@ def dot(self, other):
                 assert len(self) == other.shape[0], "dimension mismatch"
             return np.dot(self.array, other)
         elif _have_scipy and scipy.sparse.issparse(other):
-            assert len(self) == other.shape[0], "dimension mismatch"
-            return other.transpose().dot(self.toArray())
+            assert len(self) == cast("spmatrix", other).shape[0], "dimension mismatch"
+            return cast("spmatrix", other).transpose().dot(self.toArray())
         else:
             assert len(self) == _vector_size(other), "dimension mismatch"
             if isinstance(other, SparseVector):
@@ -368,9 +402,9 @@ def dot(self, other):
             elif isinstance(other, Vector):
                 return np.dot(self.toArray(), other.toArray())
             else:
-                return np.dot(self.toArray(), other)
+                return np.dot(self.toArray(), other)  # type: ignore[call-overload]
 
-    def squared_distance(self, other):
+    def squared_distance(self, other: Iterable[float]) -> np.float64:
         """
         Squared distance of two Vectors.
 
@@ -401,41 +435,49 @@ def squared_distance(self, other):
         if isinstance(other, SparseVector):
             return other.squared_distance(self)
         elif _have_scipy and scipy.sparse.issparse(other):
-            return _convert_to_vector(other).squared_distance(self)
+            return _convert_to_vector(other).squared_distance(self)  # type: ignore[attr-defined]
 
         if isinstance(other, Vector):
             other = other.toArray()
         elif not isinstance(other, np.ndarray):
             other = np.array(other)
-        diff = self.toArray() - other
+        diff: np.ndarray = self.toArray() - other
         return np.dot(diff, diff)
 
-    def toArray(self):
+    def toArray(self) -> np.ndarray:
         """
         Returns the underlying numpy.ndarray
         """
         return self.array
 
     @property
-    def values(self):
+    def values(self) -> np.ndarray:
         """
         Returns the underlying numpy.ndarray
         """
         return self.array
 
-    def __getitem__(self, item):
+    @overload
+    def __getitem__(self, item: int) -> np.float64:
+        ...
+
+    @overload
+    def __getitem__(self, item: slice) -> np.ndarray:
+        ...
+
+    def __getitem__(self, item: Union[int, slice]) -> Union[np.float64, np.ndarray]:
         return self.array[item]
 
-    def __len__(self):
+    def __len__(self) -> int:
         return len(self.array)
 
-    def __str__(self):
+    def __str__(self) -> str:
         return "[" + ",".join([str(v) for v in self.array]) + "]"
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return "DenseVector([%s])" % (", ".join(_format_float(i) for i in self.array))
 
-    def __eq__(self, other):
+    def __eq__(self, other: Any) -> bool:
         if isinstance(other, DenseVector):
             return np.array_equal(self.array, other.array)
         elif isinstance(other, SparseVector):
@@ -444,10 +486,10 @@ def __eq__(self, other):
             return Vectors._equals(list(range(len(self))), self.array, other.indices, other.values)
         return False
 
-    def __ne__(self, other):
+    def __ne__(self, other: Any) -> bool:
         return not self == other
 
-    def __hash__(self):
+    def __hash__(self) -> int:
         size = len(self)
         result = 31 + size
         nnz = 0
@@ -461,14 +503,14 @@ def __hash__(self):
             i += 1
         return result
 
-    def __getattr__(self, item):
+    def __getattr__(self, item: str) -> Any:
         return getattr(self.array, item)
 
-    def __neg__(self):
+    def __neg__(self) -> "DenseVector":
         return DenseVector(-self.array)
 
-    def _delegate(op):
-        def func(self, other):
+    def _delegate(op: str) -> Callable[["DenseVector", Any], "DenseVector"]:  # type: ignore[misc]
+        def func(self: "DenseVector", other: Any) -> "DenseVector":
             if isinstance(other, DenseVector):
                 other = other.array
             return DenseVector(getattr(self.array, op)(other))
@@ -495,7 +537,33 @@ class SparseVector(Vector):
     alternatively pass SciPy's {scipy.sparse} data types.
     """
 
-    def __init__(self, size, *args):
+    @overload
+    def __init__(self, size: int, __indices: bytes, __values: bytes):
+        ...
+
+    @overload
+    def __init__(self, size: int, *args: Tuple[int, float]):
+        ...
+
+    @overload
+    def __init__(self, size: int, __indices: Iterable[int], __values: Iterable[float]):
+        ...
+
+    @overload
+    def __init__(self, size: int, __pairs: Iterable[Tuple[int, float]]):
+        ...
+
+    @overload
+    def __init__(self, size: int, __map: Dict[int, float]):
+        ...
+
+    def __init__(
+        self,
+        size: int,
+        *args: Union[
+            bytes, Tuple[int, float], Iterable[float], Iterable[Tuple[int, float]], Dict[int, float]
+        ],
+    ):
         """
         Create a sparse vector, using either a dictionary, a list of
         (index, value) pairs, or two separate arrays of indices and
@@ -535,7 +603,7 @@ def __init__(self, size, *args):
             pairs = args[0]
             if type(pairs) == dict:
                 pairs = pairs.items()
-            pairs = sorted(pairs)
+            pairs = cast(Iterable[Tuple[int, float]], sorted(pairs))
             self.indices = np.array([p[0] for p in pairs], dtype=np.int32)
             """ A list of indices corresponding to active entries. """
             self.values = np.array([p[1] for p in pairs], dtype=np.float64)
@@ -570,13 +638,13 @@ def __init__(self, size, *args):
             )
             assert np.min(self.indices) >= 0, "Contains negative index %d" % (np.min(self.indices))
 
-    def numNonzeros(self):
+    def numNonzeros(self) -> int:
         """
         Number of nonzero elements. This scans all active values and count non zeros.
         """
         return np.count_nonzero(self.values)
 
-    def norm(self, p):
+    def norm(self, p: "NormType") -> np.float64:
         """
         Calculates the norm of a SparseVector.
 
@@ -590,10 +658,10 @@ def norm(self, p):
         """
         return np.linalg.norm(self.values, p)
 
-    def __reduce__(self):
+    def __reduce__(self) -> Tuple[Type["SparseVector"], Tuple[int, bytes, bytes]]:
         return (SparseVector, (self.size, self.indices.tobytes(), self.values.tobytes()))
 
-    def dot(self, other):
+    def dot(self, other: Iterable[float]) -> np.float64:
         """
         Dot product with a SparseVector or 1- or 2-dimensional Numpy array.
 
@@ -643,15 +711,15 @@ def dot(self, other):
             self_cmind = np.in1d(self.indices, other.indices, assume_unique=True)
             self_values = self.values[self_cmind]
             if self_values.size == 0:
-                return 0.0
+                return np.float64(0.0)
             else:
                 other_cmind = np.in1d(other.indices, self.indices, assume_unique=True)
                 return np.dot(self_values, other.values[other_cmind])
 
         else:
-            return self.dot(_convert_to_vector(other))
+            return self.dot(_convert_to_vector(other))  # type: ignore[arg-type]
 
-    def squared_distance(self, other):
+    def squared_distance(self, other: Iterable[float]) -> np.float64:
         """
         Squared distance from a SparseVector or 1-dimensional NumPy array.
 
@@ -719,9 +787,9 @@ def squared_distance(self, other):
                 j += 1
             return result
         else:
-            return self.squared_distance(_convert_to_vector(other))
+            return self.squared_distance(_convert_to_vector(other))  # type: ignore[arg-type]
 
-    def toArray(self):
+    def toArray(self) -> np.ndarray:
         """
         Returns a copy of this SparseVector as a 1-dimensional numpy.ndarray.
         """
@@ -729,15 +797,15 @@ def toArray(self):
         arr[self.indices] = self.values
         return arr
 
-    def __len__(self):
+    def __len__(self) -> int:
         return self.size
 
-    def __str__(self):
+    def __str__(self) -> str:
         inds = "[" + ",".join([str(i) for i in self.indices]) + "]"
         vals = "[" + ",".join([str(v) for v in self.values]) + "]"
         return "(" + ",".join((str(self.size), inds, vals)) + ")"
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         inds = self.indices
         vals = self.values
         entries = ", ".join(
@@ -745,7 +813,7 @@ def __repr__(self):
         )
         return "SparseVector({0}, {{{1}}})".format(self.size, entries)
 
-    def __eq__(self, other):
+    def __eq__(self, other: Any) -> bool:
         if isinstance(other, SparseVector):
             return (
                 other.size == self.size
@@ -758,7 +826,7 @@ def __eq__(self, other):
             return Vectors._equals(self.indices, self.values, list(range(len(other))), other.array)
         return False
 
-    def __getitem__(self, index):
+    def __getitem__(self, index: int) -> np.float64:
         inds = self.indices
         vals = self.values
         if not isinstance(index, int):
@@ -770,18 +838,18 @@ def __getitem__(self, index):
             index += self.size
 
         if (inds.size == 0) or (index > inds.item(-1)):
-            return 0.0
+            return np.float64(0.0)
 
         insert_index = np.searchsorted(inds, index)
         row_ind = inds[insert_index]
         if row_ind == index:
             return vals[insert_index]
-        return 0.0
+        return np.float64(0.0)
 
-    def __ne__(self, other):
+    def __ne__(self, other: Any) -> bool:
         return not self.__eq__(other)
 
-    def __hash__(self):
+    def __hash__(self) -> int:
         result = 31 + self.size
         nnz = 0
         i = 0
@@ -809,7 +877,37 @@ class Vectors:
     """
 
     @staticmethod
-    def sparse(size, *args):
+    @overload
+    def sparse(size: int, __indices: bytes, __values: bytes) -> SparseVector:
+        ...
+
+    @staticmethod
+    @overload
+    def sparse(size: int, *args: Tuple[int, float]) -> SparseVector:
+        ...
+
+    @staticmethod
+    @overload
+    def sparse(size: int, __indices: Iterable[int], __values: Iterable[float]) -> SparseVector:
+        ...
+
+    @staticmethod
+    @overload
+    def sparse(size: int, __pairs: Iterable[Tuple[int, float]]) -> SparseVector:
+        ...
+
+    @staticmethod
+    @overload
+    def sparse(size: int, __map: Dict[int, float]) -> SparseVector:
+        ...
+
+    @staticmethod
+    def sparse(
+        size: int,
+        *args: Union[
+            bytes, Tuple[int, float], Iterable[float], Iterable[Tuple[int, float]], Dict[int, float]
+        ],
+    ) -> SparseVector:
         """
         Create a sparse vector, using either a dictionary, a list of
         (index, value) pairs, or two separate arrays of indices and
@@ -832,10 +930,25 @@ def sparse(size, *args):
         >>> Vectors.sparse(4, [1, 3], [1.0, 5.5])
         SparseVector(4, {1: 1.0, 3: 5.5})
         """
-        return SparseVector(size, *args)
+        return SparseVector(size, *args)  # type: ignore[arg-type]
+
+    @overload
+    @staticmethod
+    def dense(*elements: float) -> DenseVector:
+        ...
+
+    @overload
+    @staticmethod
+    def dense(__arr: bytes) -> DenseVector:
+        ...
+
+    @overload
+    @staticmethod
+    def dense(__arr: Iterable[float]) -> DenseVector:
+        ...
 
     @staticmethod
-    def dense(*elements):
+    def dense(*elements: Union[float, bytes, np.ndarray, Iterable[float]]) -> DenseVector:
         """
         Create a dense vector of 64-bit floats from a Python list or numbers.
 
@@ -848,11 +961,11 @@ def dense(*elements):
         """
         if len(elements) == 1 and not isinstance(elements[0], (float, int)):
             # it's list, numpy.array or other iterable object.
-            elements = elements[0]
-        return DenseVector(elements)
+            elements = elements[0]  # type: ignore[assignment]
+        return DenseVector(cast(Iterable[float], elements))
 
     @staticmethod
-    def squared_distance(v1, v2):
+    def squared_distance(v1: Vector, v2: Vector) -> np.float64:
         """
         Squared distance between two vectors.
         a and b can be of type SparseVector, DenseVector, np.ndarray
@@ -866,21 +979,26 @@ def squared_distance(v1, v2):
         51.0
         """
         v1, v2 = _convert_to_vector(v1), _convert_to_vector(v2)
-        return v1.squared_distance(v2)
+        return v1.squared_distance(v2)  # type: ignore[attr-defined]
 
     @staticmethod
-    def norm(vector, p):
+    def norm(vector: Vector, p: "NormType") -> np.float64:
         """
         Find norm of the given vector.
         """
-        return _convert_to_vector(vector).norm(p)
+        return _convert_to_vector(vector).norm(p)  # type: ignore[attr-defined]
 
     @staticmethod
-    def zeros(size):
+    def zeros(size: int) -> DenseVector:
         return DenseVector(np.zeros(size))
 
     @staticmethod
-    def _equals(v1_indices, v1_values, v2_indices, v2_values):
+    def _equals(
+        v1_indices: Union[Sequence[int], np.ndarray],
+        v1_values: Union[Sequence[float], np.ndarray],
+        v2_indices: Union[Sequence[int], np.ndarray],
+        v2_values: Union[Sequence[float], np.ndarray],
+    ) -> bool:
         """
         Check equality between sparse/dense vectors,
         v1_indices and v2_indices assume to be strictly increasing.
@@ -913,19 +1031,19 @@ class Matrix:
     Represents a local matrix.
     """
 
-    def __init__(self, numRows, numCols, isTransposed=False):
+    def __init__(self, numRows: int, numCols: int, isTransposed: bool = False):
         self.numRows = numRows
         self.numCols = numCols
         self.isTransposed = isTransposed
 
-    def toArray(self):
+    def toArray(self) -> np.ndarray:
         """
         Returns its elements in a numpy.ndarray.
         """
         raise NotImplementedError
 
     @staticmethod
-    def _convert_to_array(array_like, dtype):
+    def _convert_to_array(array_like: Union[bytes, Iterable[float]], dtype: Any) -> np.ndarray:
         """
         Convert Matrix attributes which are array-like or buffer to array.
         """
@@ -939,13 +1057,19 @@ class DenseMatrix(Matrix):
     Column-major dense matrix.
     """
 
-    def __init__(self, numRows, numCols, values, isTransposed=False):
+    def __init__(
+        self,
+        numRows: int,
+        numCols: int,
+        values: Union[bytes, Iterable[float]],
+        isTransposed: bool = False,
+    ):
         Matrix.__init__(self, numRows, numCols, isTransposed)
         values = self._convert_to_array(values, np.float64)
         assert len(values) == numRows * numCols
         self.values = values
 
-    def __reduce__(self):
+    def __reduce__(self) -> Tuple[Type["DenseMatrix"], Tuple[int, int, bytes, int]]:
         return DenseMatrix, (
             self.numRows,
             self.numCols,
@@ -953,7 +1077,7 @@ def __reduce__(self):
             int(self.isTransposed),
         )
 
-    def __str__(self):
+    def __str__(self) -> str:
         """
         Pretty printing of a DenseMatrix
 
@@ -976,7 +1100,7 @@ def __str__(self):
         x = "\n".join([(" " * 6 + line) for line in array_lines[1:]])
         return array_lines[0].replace("array", "DenseMatrix") + "\n" + x
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """
         Representation of a DenseMatrix
 
@@ -995,12 +1119,12 @@ def __repr__(self):
                 _format_float_list(self.values[:8]) + ["..."] + _format_float_list(self.values[-8:])
             )
 
-        entries = ", ".join(entries)
+        entries = ", ".join(entries)  # type: ignore[assignment]
         return "DenseMatrix({0}, {1}, [{2}], {3})".format(
             self.numRows, self.numCols, entries, self.isTransposed
         )
 
-    def toArray(self):
+    def toArray(self) -> np.ndarray:
         """
         Return a :py:class:`numpy.ndarray`
 
@@ -1016,7 +1140,7 @@ def toArray(self):
         else:
             return self.values.reshape((self.numRows, self.numCols), order="F")
 
-    def toSparse(self):
+    def toSparse(self) -> "SparseMatrix":
         """Convert to SparseMatrix"""
         if self.isTransposed:
             values = np.ravel(self.toArray(), order="F")
@@ -1030,7 +1154,7 @@ def toSparse(self):
 
         return SparseMatrix(self.numRows, self.numCols, colPtrs, rowIndices, values)
 
-    def __getitem__(self, indices):
+    def __getitem__(self, indices: Tuple[int, int]) -> np.float64:
         i, j = indices
         if i < 0 or i >= self.numRows:
             raise IndexError("Row index %d is out of range [0, %d)" % (i, self.numRows))
@@ -1042,21 +1166,29 @@ def __getitem__(self, indices):
         else:
             return self.values[i + j * self.numRows]
 
-    def __eq__(self, other):
+    def __eq__(self, other: Any) -> bool:
         if self.numRows != other.numRows or self.numCols != other.numCols:
             return False
         if isinstance(other, SparseMatrix):
-            return np.all(self.toArray() == other.toArray())
+            return np.all(self.toArray() == other.toArray()).tolist()
 
         self_values = np.ravel(self.toArray(), order="F")
         other_values = np.ravel(other.toArray(), order="F")
-        return np.all(self_values == other_values)
+        return np.all(self_values == other_values).tolist()
 
 
 class SparseMatrix(Matrix):
     """Sparse Matrix stored in CSC format."""
 
-    def __init__(self, numRows, numCols, colPtrs, rowIndices, values, isTransposed=False):
+    def __init__(
+        self,
+        numRows: int,
+        numCols: int,
+        colPtrs: Union[bytes, Iterable[int]],
+        rowIndices: Union[bytes, Iterable[int]],
+        values: Union[bytes, Iterable[float]],
+        isTransposed: bool = False,
+    ):
         Matrix.__init__(self, numRows, numCols, isTransposed)
         self.colPtrs = self._convert_to_array(colPtrs, np.int32)
         self.rowIndices = self._convert_to_array(rowIndices, np.int32)
@@ -1078,7 +1210,7 @@ def __init__(self, numRows, numCols, colPtrs, rowIndices, values, isTransposed=F
                 % (self.rowIndices.size, self.values.size)
             )
 
-    def __str__(self):
+    def __str__(self) -> str:
         """
         Pretty printing of a SparseMatrix
 
@@ -1124,7 +1256,7 @@ def __str__(self):
             spstr += "\n.." * 2
         return spstr
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """
         Representation of a SparseMatrix
 
@@ -1149,14 +1281,14 @@ def __repr__(self):
         if len(self.colPtrs) > 16:
             colPtrs = colPtrs[:8] + ["..."] + colPtrs[-8:]
 
-        values = ", ".join(values)
-        rowIndices = ", ".join([str(ind) for ind in rowIndices])
-        colPtrs = ", ".join([str(ptr) for ptr in colPtrs])
+        values = ", ".join(values)  # type: ignore[assignment]
+        rowIndices = ", ".join([str(ind) for ind in rowIndices])  # type: ignore[assignment]
+        colPtrs = ", ".join([str(ptr) for ptr in colPtrs])  # type: ignore[assignment]
         return "SparseMatrix({0}, {1}, [{2}], [{3}], [{4}], {5})".format(
             self.numRows, self.numCols, colPtrs, rowIndices, values, self.isTransposed
         )
 
-    def __reduce__(self):
+    def __reduce__(self) -> Tuple[Type["SparseMatrix"], Tuple[int, int, bytes, bytes, bytes, int]]:
         return SparseMatrix, (
             self.numRows,
             self.numCols,
@@ -1166,7 +1298,7 @@ def __reduce__(self):
             int(self.isTransposed),
         )
 
-    def __getitem__(self, indices):
+    def __getitem__(self, indices: Tuple[int, int]) -> np.float64:
         i, j = indices
         if i < 0 or i >= self.numRows:
             raise IndexError("Row index %d is out of range [0, %d)" % (i, self.numRows))
@@ -1186,9 +1318,9 @@ def __getitem__(self, indices):
         if ind < colEnd and self.rowIndices[ind] == i:
             return self.values[ind]
         else:
-            return 0.0
+            return np.float64(0.0)
 
-    def toArray(self):
+    def toArray(self) -> np.ndarray:
         """
         Return a numpy.ndarray
         """
@@ -1202,32 +1334,38 @@ def toArray(self):
                 A[self.rowIndices[startptr:endptr], k] = self.values[startptr:endptr]
         return A
 
-    def toDense(self):
+    def toDense(self) -> "DenseMatrix":
         densevals = np.ravel(self.toArray(), order="F")
         return DenseMatrix(self.numRows, self.numCols, densevals)
 
     # TODO: More efficient implementation:
-    def __eq__(self, other):
-        return np.all(self.toArray() == other.toArray())
+    def __eq__(self, other: Any) -> bool:
+        return np.all(self.toArray() == other.toArray()).tolist()
 
 
 class Matrices:
     @staticmethod
-    def dense(numRows, numCols, values):
+    def dense(numRows: int, numCols: int, values: Union[bytes, Iterable[float]]) -> DenseMatrix:
         """
         Create a DenseMatrix
         """
         return DenseMatrix(numRows, numCols, values)
 
     @staticmethod
-    def sparse(numRows, numCols, colPtrs, rowIndices, values):
+    def sparse(
+        numRows: int,
+        numCols: int,
+        colPtrs: Union[bytes, Iterable[int]],
+        rowIndices: Union[bytes, Iterable[int]],
+        values: Union[bytes, Iterable[float]],
+    ) -> SparseMatrix:
         """
         Create a SparseMatrix
         """
         return SparseMatrix(numRows, numCols, colPtrs, rowIndices, values)
 
 
-def _test():
+def _test() -> None:
     import doctest
 
     try:
diff --git a/python/pyspark/ml/linalg/__init__.pyi b/python/pyspark/ml/linalg/__init__.pyi
deleted file mode 100644
index bb0939771b1be..0000000000000
--- a/python/pyspark/ml/linalg/__init__.pyi
+++ /dev/null
@@ -1,243 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import overload
-from typing import Any, Dict, Iterable, List, NoReturn, Optional, Tuple, Type, Union
-
-from pyspark.ml import linalg as newlinalg  # noqa: F401
-from pyspark.sql.types import StructType, UserDefinedType
-
-from numpy import float64, ndarray
-
-class VectorUDT(UserDefinedType):
-    @classmethod
-    def sqlType(cls) -> StructType: ...
-    @classmethod
-    def module(cls) -> str: ...
-    @classmethod
-    def scalaUDT(cls) -> str: ...
-    def serialize(
-        self, obj: Vector
-    ) -> Tuple[int, Optional[int], Optional[List[int]], List[float]]: ...
-    def deserialize(self, datum: Any) -> Vector: ...
-    def simpleString(self) -> str: ...
-
-class MatrixUDT(UserDefinedType):
-    @classmethod
-    def sqlType(cls) -> StructType: ...
-    @classmethod
-    def module(cls) -> str: ...
-    @classmethod
-    def scalaUDT(cls) -> str: ...
-    def serialize(
-        self, obj: Matrix
-    ) -> Tuple[int, int, int, Optional[List[int]], Optional[List[int]], List[float], bool]: ...
-    def deserialize(self, datum: Any) -> Matrix: ...
-    def simpleString(self) -> str: ...
-
-class Vector:
-    __UDT__: VectorUDT
-    def toArray(self) -> ndarray: ...
-
-class DenseVector(Vector):
-    array: ndarray
-    @overload
-    def __init__(self, *elements: float) -> None: ...
-    @overload
-    def __init__(self, __arr: bytes) -> None: ...
-    @overload
-    def __init__(self, __arr: Iterable[float]) -> None: ...
-    def __reduce__(self) -> Tuple[Type[DenseVector], bytes]: ...
-    def numNonzeros(self) -> int: ...
-    def norm(self, p: Union[float, str]) -> float64: ...
-    def dot(self, other: Iterable[float]) -> float64: ...
-    def squared_distance(self, other: Iterable[float]) -> float64: ...
-    def toArray(self) -> ndarray: ...
-    @property
-    def values(self) -> ndarray: ...
-    def __getitem__(self, item: int) -> float64: ...
-    def __len__(self) -> int: ...
-    def __eq__(self, other: Any) -> bool: ...
-    def __ne__(self, other: Any) -> bool: ...
-    def __hash__(self) -> int: ...
-    def __getattr__(self, item: str) -> Any: ...
-    def __neg__(self) -> DenseVector: ...
-    def __add__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __sub__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __mul__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __div__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __truediv__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __mod__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __radd__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __rsub__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __rmul__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __rdiv__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __rtruediv__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-    def __rmod__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
-
-class SparseVector(Vector):
-    size: int
-    indices: ndarray
-    values: ndarray
-    @overload
-    def __init__(self, size: int, *args: Tuple[int, float]) -> None: ...
-    @overload
-    def __init__(self, size: int, __indices: bytes, __values: bytes) -> None: ...
-    @overload
-    def __init__(self, size: int, __indices: Iterable[int], __values: Iterable[float]) -> None: ...
-    @overload
-    def __init__(self, size: int, __pairs: Iterable[Tuple[int, float]]) -> None: ...
-    @overload
-    def __init__(self, size: int, __map: Dict[int, float]) -> None: ...
-    def numNonzeros(self) -> int: ...
-    def norm(self, p: Union[float, str]) -> float64: ...
-    def __reduce__(self) -> Tuple[Type[SparseVector], Tuple[int, bytes, bytes]]: ...
-    def dot(self, other: Iterable[float]) -> float64: ...
-    def squared_distance(self, other: Iterable[float]) -> float64: ...
-    def toArray(self) -> ndarray: ...
-    def __len__(self) -> int: ...
-    def __eq__(self, other: Any) -> bool: ...
-    def __getitem__(self, index: int) -> float64: ...
-    def __ne__(self, other: Any) -> bool: ...
-    def __hash__(self) -> int: ...
-
-class Vectors:
-    @overload
-    @staticmethod
-    def sparse(size: int, *args: Tuple[int, float]) -> SparseVector: ...
-    @overload
-    @staticmethod
-    def sparse(size: int, __indices: bytes, __values: bytes) -> SparseVector: ...
-    @overload
-    @staticmethod
-    def sparse(size: int, __indices: Iterable[int], __values: Iterable[float]) -> SparseVector: ...
-    @overload
-    @staticmethod
-    def sparse(size: int, __pairs: Iterable[Tuple[int, float]]) -> SparseVector: ...
-    @overload
-    @staticmethod
-    def sparse(size: int, __map: Dict[int, float]) -> SparseVector: ...
-    @overload
-    @staticmethod
-    def dense(*elements: float) -> DenseVector: ...
-    @overload
-    @staticmethod
-    def dense(__arr: bytes) -> DenseVector: ...
-    @overload
-    @staticmethod
-    def dense(__arr: Iterable[float]) -> DenseVector: ...
-    @staticmethod
-    def stringify(vector: Vector) -> str: ...
-    @staticmethod
-    def squared_distance(v1: Vector, v2: Vector) -> float64: ...
-    @staticmethod
-    def norm(vector: Vector, p: Union[float, str]) -> float64: ...
-    @staticmethod
-    def zeros(size: int) -> DenseVector: ...
-
-class Matrix:
-    __UDT__: MatrixUDT
-    numRows: int
-    numCols: int
-    isTransposed: bool
-    def __init__(self, numRows: int, numCols: int, isTransposed: bool = ...) -> None: ...
-    def toArray(self) -> ndarray: ...
-
-class DenseMatrix(Matrix):
-    values: Any
-    @overload
-    def __init__(
-        self, numRows: int, numCols: int, values: bytes, isTransposed: bool = ...
-    ) -> None: ...
-    @overload
-    def __init__(
-        self,
-        numRows: int,
-        numCols: int,
-        values: Iterable[float],
-        isTransposed: bool = ...,
-    ) -> None: ...
-    def __reduce__(self) -> Tuple[Type[DenseMatrix], Tuple[int, int, bytes, int]]: ...
-    def toArray(self) -> ndarray: ...
-    def toSparse(self) -> SparseMatrix: ...
-    def __getitem__(self, indices: Tuple[int, int]) -> float64: ...
-    def __eq__(self, other: Any) -> bool: ...
-
-class SparseMatrix(Matrix):
-    colPtrs: ndarray
-    rowIndices: ndarray
-    values: ndarray
-    @overload
-    def __init__(
-        self,
-        numRows: int,
-        numCols: int,
-        colPtrs: bytes,
-        rowIndices: bytes,
-        values: bytes,
-        isTransposed: bool = ...,
-    ) -> None: ...
-    @overload
-    def __init__(
-        self,
-        numRows: int,
-        numCols: int,
-        colPtrs: Iterable[int],
-        rowIndices: Iterable[int],
-        values: Iterable[float],
-        isTransposed: bool = ...,
-    ) -> None: ...
-    def __reduce__(
-        self,
-    ) -> Tuple[Type[SparseMatrix], Tuple[int, int, bytes, bytes, bytes, int]]: ...
-    def __getitem__(self, indices: Tuple[int, int]) -> float64: ...
-    def toArray(self) -> ndarray: ...
-    def toDense(self) -> DenseMatrix: ...
-    def __eq__(self, other: Any) -> bool: ...
-
-class Matrices:
-    @overload
-    @staticmethod
-    def dense(
-        numRows: int, numCols: int, values: bytes, isTransposed: bool = ...
-    ) -> DenseMatrix: ...
-    @overload
-    @staticmethod
-    def dense(
-        numRows: int, numCols: int, values: Iterable[float], isTransposed: bool = ...
-    ) -> DenseMatrix: ...
-    @overload
-    @staticmethod
-    def sparse(
-        numRows: int,
-        numCols: int,
-        colPtrs: bytes,
-        rowIndices: bytes,
-        values: bytes,
-        isTransposed: bool = ...,
-    ) -> SparseMatrix: ...
-    @overload
-    @staticmethod
-    def sparse(
-        numRows: int,
-        numCols: int,
-        colPtrs: Iterable[int],
-        rowIndices: Iterable[int],
-        values: Iterable[float],
-        isTransposed: bool = ...,
-    ) -> SparseMatrix: ...
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index b9c391ebf82e2..dd7fad092d1c5 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -575,8 +575,8 @@ def __getattr__(self, item: str) -> Any:
     def __neg__(self) -> "DenseVector":
         return DenseVector(-self.array)
 
-    def _delegate(op: str) -> Callable[["DenseVector", Any], Any]:  # type: ignore[misc]
-        def func(self: "DenseVector", other: Any) -> Any:
+    def _delegate(op: str) -> Callable[["DenseVector", Any], "DenseVector"]:  # type: ignore[misc]
+        def func(self: "DenseVector", other: Any) -> "DenseVector":
             if isinstance(other, DenseVector):
                 other = other.array
             return DenseVector(getattr(self.array, op)(other))
@@ -768,7 +768,7 @@ def parse(s: str) -> "SparseVector":
             raise ValueError("Unable to parse values from %s." % s)
         return SparseVector(cast(int, size), indices, values)
 
-    def dot(self, other: Any) -> np.float64:
+    def dot(self, other: Iterable[float]) -> np.float64:
         """
         Dot product with a SparseVector or 1- or 2-dimensional Numpy array.
 
@@ -824,9 +824,9 @@ def dot(self, other: Any) -> np.float64:
                 return np.dot(self_values, other.values[other_cmind])
 
         else:
-            return self.dot(_convert_to_vector(other))
+            return self.dot(_convert_to_vector(other))  # type: ignore[arg-type]
 
-    def squared_distance(self, other: Any) -> np.float64:
+    def squared_distance(self, other: Iterable[float]) -> np.float64:
         """
         Squared distance from a SparseVector or 1-dimensional NumPy array.
 
@@ -894,7 +894,7 @@ def squared_distance(self, other: Any) -> np.float64:
                 j += 1
             return result
         else:
-            return self.squared_distance(_convert_to_vector(other))
+            return self.squared_distance(_convert_to_vector(other))  # type: ignore[arg-type]
 
     def toArray(self) -> np.ndarray:
         """
@@ -1140,7 +1140,7 @@ def squared_distance(v1: Vector, v2: Vector) -> np.float64:
         return v1.squared_distance(v2)  # type: ignore[attr-defined]
 
     @staticmethod
-    def norm(vector: Vector, p: Union[float, str]) -> np.float64:
+    def norm(vector: Vector, p: "NormType") -> np.float64:
         """
         Find norm of the given vector.
         """

From 0084a8677ad77143b109dff3d3e9be4035d00fd4 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Sun, 6 Feb 2022 11:09:41 +0100
Subject: [PATCH 158/513] [SPARK-37415][PYTHON][ML] Inline type hints for
 pyspark.ml.util

### What changes were proposed in this pull request?

This PR migrates type `pyspark.ml.util` annotations from stub file to inline type hints.

### Why are the changes needed?

Part of ongoing migration of type hints.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #35367 from zero323/SPARK-37415.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/ml/base.py     |   4 +-
 python/pyspark/ml/util.py     | 207 ++++++++++++++++++++--------------
 python/pyspark/ml/util.pyi    | 136 ----------------------
 python/pyspark/ml/wrapper.pyi |   1 +
 4 files changed, 128 insertions(+), 220 deletions(-)
 delete mode 100644 python/pyspark/ml/util.pyi

diff --git a/python/pyspark/ml/base.py b/python/pyspark/ml/base.py
index 4f4ddef2468f8..9e8252d321a8e 100644
--- a/python/pyspark/ml/base.py
+++ b/python/pyspark/ml/base.py
@@ -104,7 +104,7 @@ def next(self) -> Tuple[int, M]:
 
 
 @inherit_doc
-class Estimator(Generic[M], Params, metaclass=ABCMeta):
+class Estimator(Params, Generic[M], metaclass=ABCMeta):
     """
     Abstract class for estimators that fit models to data.
 
@@ -382,7 +382,7 @@ def setPredictionCol(self: P, value: str) -> P:
 
 
 @inherit_doc
-class PredictionModel(Generic[T], Transformer, _PredictorParams, metaclass=ABCMeta):
+class PredictionModel(Model, _PredictorParams, Generic[T], metaclass=ABCMeta):
     """
     Model for prediction tasks (regression and classification).
     """
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index ac60deda53c46..1dacffcb1122b 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -20,13 +20,31 @@
 import time
 import uuid
 
+from typing import Any, Dict, Generic, List, Optional, Sequence, Type, TypeVar, cast, TYPE_CHECKING
+
+
 from pyspark import SparkContext, since
 from pyspark.ml.common import inherit_doc
 from pyspark.sql import SparkSession
 from pyspark.util import VersionUtils
 
+if TYPE_CHECKING:
+    from py4j.java_gateway import JavaGateway, JavaObject
+    from pyspark.ml._typing import PipelineStage
+
+    from pyspark.ml.param import Param
+    from pyspark.ml.base import Params
+    from pyspark.ml.wrapper import JavaWrapper
 
-def _jvm():
+T = TypeVar("T")
+RW = TypeVar("RW", bound="BaseReadWrite")
+W = TypeVar("W", bound="MLWriter")
+JW = TypeVar("JW", bound="JavaMLWriter")
+RL = TypeVar("RL", bound="MLReadable")
+JR = TypeVar("JR", bound="JavaMLReader")
+
+
+def _jvm() -> "JavaGateway":
     """
     Returns the JVM view associated with SparkContext. Must be called
     after SparkContext is initialized.
@@ -43,15 +61,15 @@ class Identifiable:
     Object with a unique ID.
     """
 
-    def __init__(self):
+    def __init__(self) -> None:
         #: A unique id for the object.
         self.uid = self._randomUID()
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return self.uid
 
     @classmethod
-    def _randomUID(cls):
+    def _randomUID(cls) -> str:
         """
         Generate a unique string id for the object. The default implementation
         concatenates the class name, "_", and 12 random hex chars.
@@ -68,10 +86,10 @@ class BaseReadWrite:
     .. versionadded:: 2.3.0
     """
 
-    def __init__(self):
-        self._sparkSession = None
+    def __init__(self) -> None:
+        self._sparkSession: Optional[SparkSession] = None
 
-    def session(self, sparkSession):
+    def session(self: RW, sparkSession: SparkSession) -> RW:
         """
         Sets the Spark Session to use for saving/loading.
         """
@@ -79,19 +97,21 @@ def session(self, sparkSession):
         return self
 
     @property
-    def sparkSession(self):
+    def sparkSession(self) -> SparkSession:
         """
         Returns the user-specified Spark Session or the default.
         """
         if self._sparkSession is None:
             self._sparkSession = SparkSession._getActiveSessionOrCreate()
+        assert self._sparkSession is not None
         return self._sparkSession
 
     @property
-    def sc(self):
+    def sc(self) -> SparkContext:
         """
         Returns the underlying `SparkContext`.
         """
+        assert self.sparkSession is not None
         return self.sparkSession.sparkContext
 
 
@@ -103,37 +123,41 @@ class MLWriter(BaseReadWrite):
     .. versionadded:: 2.0.0
     """
 
-    def __init__(self):
+    def __init__(self) -> None:
         super(MLWriter, self).__init__()
-        self.shouldOverwrite = False
-        self.optionMap = {}
+        self.shouldOverwrite: bool = False
+        self.optionMap: Dict[str, Any] = {}
 
-    def _handleOverwrite(self, path):
+    def _handleOverwrite(self, path: str) -> None:
         from pyspark.ml.wrapper import JavaWrapper
 
-        _java_obj = JavaWrapper._new_java_obj("org.apache.spark.ml.util.FileSystemOverwrite")
+        _java_obj = JavaWrapper._new_java_obj(  # type: ignore[attr-defined]
+            "org.apache.spark.ml.util.FileSystemOverwrite"
+        )
         wrapper = JavaWrapper(_java_obj)
-        wrapper._call_java("handleOverwrite", path, True, self.sparkSession._jsparkSession)
+        wrapper._call_java(  # type: ignore[attr-defined]
+            "handleOverwrite", path, True, self.sparkSession._jsparkSession
+        )
 
-    def save(self, path):
+    def save(self, path: str) -> None:
         """Save the ML instance to the input path."""
         if self.shouldOverwrite:
             self._handleOverwrite(path)
         self.saveImpl(path)
 
-    def saveImpl(self, path):
+    def saveImpl(self, path: str) -> None:
         """
         save() handles overwriting and then calls this method.  Subclasses should override this
         method to implement the actual saving of the instance.
         """
         raise NotImplementedError("MLWriter is not yet implemented for type: %s" % type(self))
 
-    def overwrite(self):
+    def overwrite(self) -> "MLWriter":
         """Overwrites if the output path already exists."""
         self.shouldOverwrite = True
         return self
 
-    def option(self, key, value):
+    def option(self, key: str, value: Any) -> "MLWriter":
         """
         Adds an option to the underlying MLWriter. See the documentation for the specific model's
         writer for possible options. The option name (key) is case-insensitive.
@@ -150,7 +174,7 @@ class GeneralMLWriter(MLWriter):
     .. versionadded:: 2.4.0
     """
 
-    def format(self, source):
+    def format(self, source: str) -> "GeneralMLWriter":
         """
         Specifies the format of ML export ("pmml", "internal", or the fully qualified class
         name for export).
@@ -165,27 +189,29 @@ class JavaMLWriter(MLWriter):
     (Private) Specialization of :py:class:`MLWriter` for :py:class:`JavaParams` types
     """
 
-    def __init__(self, instance):
+    _jwrite: "JavaObject"
+
+    def __init__(self, instance: "JavaMLWritable"):
         super(JavaMLWriter, self).__init__()
-        _java_obj = instance._to_java()
+        _java_obj = instance._to_java()  # type: ignore[attr-defined]
         self._jwrite = _java_obj.write()
 
-    def save(self, path):
+    def save(self, path: str) -> None:
         """Save the ML instance to the input path."""
         if not isinstance(path, str):
             raise TypeError("path should be a string, got type %s" % type(path))
         self._jwrite.save(path)
 
-    def overwrite(self):
+    def overwrite(self) -> "JavaMLWriter":
         """Overwrites if the output path already exists."""
         self._jwrite.overwrite()
         return self
 
-    def option(self, key, value):
+    def option(self, key: str, value: str) -> "JavaMLWriter":
         self._jwrite.option(key, value)
         return self
 
-    def session(self, sparkSession):
+    def session(self, sparkSession: SparkSession) -> "JavaMLWriter":
         """Sets the Spark Session to use for saving."""
         self._jwrite.session(sparkSession._jsparkSession)
         return self
@@ -197,10 +223,10 @@ class GeneralJavaMLWriter(JavaMLWriter):
     (Private) Specialization of :py:class:`GeneralMLWriter` for :py:class:`JavaParams` types
     """
 
-    def __init__(self, instance):
+    def __init__(self, instance: "JavaMLWritable"):
         super(GeneralJavaMLWriter, self).__init__(instance)
 
-    def format(self, source):
+    def format(self, source: str) -> "GeneralJavaMLWriter":
         """
         Specifies the format of ML export ("pmml", "internal", or the fully qualified class
         name for export).
@@ -217,11 +243,11 @@ class MLWritable:
     .. versionadded:: 2.0.0
     """
 
-    def write(self):
+    def write(self) -> MLWriter:
         """Returns an MLWriter instance for this ML instance."""
         raise NotImplementedError("MLWritable is not yet implemented for type: %r" % type(self))
 
-    def save(self, path):
+    def save(self, path: str) -> None:
         """Save this ML instance to the given path, a shortcut of 'write().save(path)'."""
         self.write().save(path)
 
@@ -232,7 +258,7 @@ class JavaMLWritable(MLWritable):
     (Private) Mixin for ML instances that provide :py:class:`JavaMLWriter`.
     """
 
-    def write(self):
+    def write(self) -> JavaMLWriter:
         """Returns an MLWriter instance for this ML instance."""
         return JavaMLWriter(self)
 
@@ -243,39 +269,39 @@ class GeneralJavaMLWritable(JavaMLWritable):
     (Private) Mixin for ML instances that provide :py:class:`GeneralJavaMLWriter`.
     """
 
-    def write(self):
+    def write(self) -> GeneralJavaMLWriter:
         """Returns an GeneralMLWriter instance for this ML instance."""
         return GeneralJavaMLWriter(self)
 
 
 @inherit_doc
-class MLReader(BaseReadWrite):
+class MLReader(BaseReadWrite, Generic[RL]):
     """
     Utility class that can load ML instances.
 
     .. versionadded:: 2.0.0
     """
 
-    def __init__(self):
+    def __init__(self) -> None:
         super(MLReader, self).__init__()
 
-    def load(self, path):
+    def load(self, path: str) -> RL:
         """Load the ML instance from the input path."""
         raise NotImplementedError("MLReader is not yet implemented for type: %s" % type(self))
 
 
 @inherit_doc
-class JavaMLReader(MLReader):
+class JavaMLReader(MLReader[RL]):
     """
     (Private) Specialization of :py:class:`MLReader` for :py:class:`JavaParams` types
     """
 
-    def __init__(self, clazz):
+    def __init__(self, clazz: Type["JavaMLReadable[RL]"]) -> None:
         super(JavaMLReader, self).__init__()
         self._clazz = clazz
         self._jread = self._load_java_obj(clazz).read()
 
-    def load(self, path):
+    def load(self, path: str) -> RL:
         """Load the ML instance from the input path."""
         if not isinstance(path, str):
             raise TypeError("path should be a string, got type %s" % type(path))
@@ -284,15 +310,15 @@ def load(self, path):
             raise NotImplementedError(
                 "This Java ML type cannot be loaded into Python currently: %r" % self._clazz
             )
-        return self._clazz._from_java(java_obj)
+        return self._clazz._from_java(java_obj)  # type: ignore[attr-defined]
 
-    def session(self, sparkSession):
+    def session(self: JR, sparkSession: SparkSession) -> JR:
         """Sets the Spark Session to use for loading."""
         self._jread.session(sparkSession._jsparkSession)
         return self
 
     @classmethod
-    def _java_loader_class(cls, clazz):
+    def _java_loader_class(cls, clazz: Type["JavaMLReadable[RL]"]) -> str:
         """
         Returns the full class name of the Java ML instance. The default
         implementation replaces "pyspark" by "org.apache.spark" in
@@ -305,7 +331,7 @@ def _java_loader_class(cls, clazz):
         return java_package + "." + clazz.__name__
 
     @classmethod
-    def _load_java_obj(cls, clazz):
+    def _load_java_obj(cls, clazz: Type["JavaMLReadable[RL]"]) -> "JavaObject":
         """Load the peer Java object of the ML instance."""
         java_class = cls._java_loader_class(clazz)
         java_obj = _jvm()
@@ -315,7 +341,7 @@ def _load_java_obj(cls, clazz):
 
 
 @inherit_doc
-class MLReadable:
+class MLReadable(Generic[RL]):
     """
     Mixin for instances that provide :py:class:`MLReader`.
 
@@ -323,24 +349,24 @@ class MLReadable:
     """
 
     @classmethod
-    def read(cls):
+    def read(cls) -> MLReader[RL]:
         """Returns an MLReader instance for this class."""
         raise NotImplementedError("MLReadable.read() not implemented for type: %r" % cls)
 
     @classmethod
-    def load(cls, path):
+    def load(cls, path: str) -> RL:
         """Reads an ML instance from the input path, a shortcut of `read().load(path)`."""
         return cls.read().load(path)
 
 
 @inherit_doc
-class JavaMLReadable(MLReadable):
+class JavaMLReadable(MLReadable[RL]):
     """
     (Private) Mixin for instances that provide JavaMLReader.
     """
 
     @classmethod
-    def read(cls):
+    def read(cls) -> JavaMLReader[RL]:
         """Returns an MLReader instance for this class."""
         return JavaMLReader(cls)
 
@@ -358,7 +384,7 @@ class stores all data as :py:class:`Param` values, then extending this trait wil
     .. versionadded:: 2.3.0
     """
 
-    def write(self):
+    def write(self) -> MLWriter:
         """Returns a DefaultParamsWriter instance for this class."""
         from pyspark.ml.param import Params
 
@@ -382,15 +408,15 @@ class DefaultParamsWriter(MLWriter):
     .. versionadded:: 2.3.0
     """
 
-    def __init__(self, instance):
+    def __init__(self, instance: "Params"):
         super(DefaultParamsWriter, self).__init__()
         self.instance = instance
 
-    def saveImpl(self, path):
+    def saveImpl(self, path: str) -> None:
         DefaultParamsWriter.saveMetadata(self.instance, path, self.sc)
 
     @staticmethod
-    def extractJsonParams(instance, skipParams):
+    def extractJsonParams(instance: "Params", skipParams: Sequence[str]) -> Dict[str, Any]:
         paramMap = instance.extractParamMap()
         jsonParams = {
             param.name: value for param, value in paramMap.items() if param.name not in skipParams
@@ -398,7 +424,13 @@ def extractJsonParams(instance, skipParams):
         return jsonParams
 
     @staticmethod
-    def saveMetadata(instance, path, sc, extraMetadata=None, paramMap=None):
+    def saveMetadata(
+        instance: "Params",
+        path: str,
+        sc: SparkContext,
+        extraMetadata: Optional[Dict[str, Any]] = None,
+        paramMap: Optional[Dict[str, "Param"]] = None,
+    ) -> None:
         """
         Saves metadata + Params to: path + "/metadata"
 
@@ -424,7 +456,12 @@ def saveMetadata(instance, path, sc, extraMetadata=None, paramMap=None):
         sc.parallelize([metadataJson], 1).saveAsTextFile(metadataPath)
 
     @staticmethod
-    def _get_metadata_to_save(instance, sc, extraMetadata=None, paramMap=None):
+    def _get_metadata_to_save(
+        instance: "Params",
+        sc: SparkContext,
+        extraMetadata: Optional[Dict[str, Any]] = None,
+        paramMap: Optional[Dict[str, "Param"]] = None,
+    ) -> str:
         """
         Helper for :py:meth:`DefaultParamsWriter.saveMetadata` which extracts the JSON to save.
         This is useful for ensemble models which need to save metadata for many sub-models.
@@ -460,11 +497,11 @@ def _get_metadata_to_save(instance, sc, extraMetadata=None, paramMap=None):
         }
         if extraMetadata is not None:
             basicMetadata.update(extraMetadata)
-        return json.dumps(basicMetadata, separators=[",", ":"])
+        return json.dumps(basicMetadata, separators=(",", ":"))
 
 
 @inherit_doc
-class DefaultParamsReadable(MLReadable):
+class DefaultParamsReadable(MLReadable[RL]):
     """
     Helper trait for making simple :py:class:`Params` types readable.
     If a :py:class:`Params` class stores all data as :py:class:`Param` values,
@@ -477,13 +514,13 @@ class DefaultParamsReadable(MLReadable):
     """
 
     @classmethod
-    def read(cls):
+    def read(cls) -> "DefaultParamsReader[RL]":
         """Returns a DefaultParamsReader instance for this class."""
         return DefaultParamsReader(cls)
 
 
 @inherit_doc
-class DefaultParamsReader(MLReader):
+class DefaultParamsReader(MLReader[RL]):
     """
     Specialization of :py:class:`MLReader` for :py:class:`Params` types
 
@@ -494,12 +531,12 @@ class DefaultParamsReader(MLReader):
     .. versionadded:: 2.3.0
     """
 
-    def __init__(self, cls):
+    def __init__(self, cls: Type[DefaultParamsReadable[RL]]):
         super(DefaultParamsReader, self).__init__()
         self.cls = cls
 
     @staticmethod
-    def __get_class(clazz):
+    def __get_class(clazz: str) -> Type[RL]:
         """
         Loads Python class from its name.
         """
@@ -510,16 +547,16 @@ def __get_class(clazz):
             m = getattr(m, comp)
         return m
 
-    def load(self, path):
+    def load(self, path: str) -> RL:
         metadata = DefaultParamsReader.loadMetadata(path, self.sc)
-        py_type = DefaultParamsReader.__get_class(metadata["class"])
+        py_type: Type[RL] = DefaultParamsReader.__get_class(metadata["class"])
         instance = py_type()
-        instance._resetUid(metadata["uid"])
+        cast("Params", instance)._resetUid(metadata["uid"])
         DefaultParamsReader.getAndSetParams(instance, metadata)
         return instance
 
     @staticmethod
-    def loadMetadata(path, sc, expectedClassName=""):
+    def loadMetadata(path: str, sc: SparkContext, expectedClassName: str = "") -> Dict[str, Any]:
         """
         Load metadata saved using :py:meth:`DefaultParamsWriter.saveMetadata`
 
@@ -536,7 +573,7 @@ def loadMetadata(path, sc, expectedClassName=""):
         return loadedVals
 
     @staticmethod
-    def _parseMetaData(metadataStr, expectedClassName=""):
+    def _parseMetaData(metadataStr: str, expectedClassName: str = "") -> Dict[str, Any]:
         """
         Parse metadata JSON string produced by :py:meth`DefaultParamsWriter._get_metadata_to_save`.
         This is a helper function for :py:meth:`DefaultParamsReader.loadMetadata`.
@@ -558,16 +595,18 @@ def _parseMetaData(metadataStr, expectedClassName=""):
         return metadata
 
     @staticmethod
-    def getAndSetParams(instance, metadata, skipParams=None):
+    def getAndSetParams(
+        instance: RL, metadata: Dict[str, Any], skipParams: Optional[List[str]] = None
+    ) -> None:
         """
         Extract Params from metadata, and set them in the instance.
         """
         # Set user-supplied param values
         for paramName in metadata["paramMap"]:
-            param = instance.getParam(paramName)
+            param = cast("Params", instance).getParam(paramName)
             if skipParams is None or paramName not in skipParams:
                 paramValue = metadata["paramMap"][paramName]
-                instance.set(param, paramValue)
+                cast("Params", instance).set(param, paramValue)
 
         # Set default param values
         majorAndMinorVersions = VersionUtils.majorMinorVersion(metadata["sparkVersion"])
@@ -582,14 +621,14 @@ def getAndSetParams(instance, metadata, skipParams=None):
 
             for paramName in metadata["defaultParamMap"]:
                 paramValue = metadata["defaultParamMap"][paramName]
-                instance._setDefault(**{paramName: paramValue})
+                cast("Params", instance)._setDefault(**{paramName: paramValue})
 
     @staticmethod
-    def isPythonParamsInstance(metadata):
+    def isPythonParamsInstance(metadata: Dict[str, Any]) -> bool:
         return metadata["class"].startswith("pyspark.ml.")
 
     @staticmethod
-    def loadParamsInstance(path, sc):
+    def loadParamsInstance(path: str, sc: SparkContext) -> RL:
         """
         Load a :py:class:`Params` instance from the given path, and return it.
         This assumes the instance inherits from :py:class:`MLReadable`.
@@ -599,41 +638,41 @@ def loadParamsInstance(path, sc):
             pythonClassName = metadata["class"]
         else:
             pythonClassName = metadata["class"].replace("org.apache.spark", "pyspark")
-        py_type = DefaultParamsReader.__get_class(pythonClassName)
+        py_type: Type[RL] = DefaultParamsReader.__get_class(pythonClassName)
         instance = py_type.load(path)
         return instance
 
 
 @inherit_doc
-class HasTrainingSummary:
+class HasTrainingSummary(Generic[T]):
     """
     Base class for models that provides Training summary.
 
     .. versionadded:: 3.0.0
     """
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.1.0")
-    def hasSummary(self):
+    def hasSummary(self) -> bool:
         """
         Indicates whether a training summary exists for this model
         instance.
         """
-        return self._call_java("hasSummary")
+        return cast("JavaWrapper", self)._call_java("hasSummary")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.1.0")
-    def summary(self):
+    def summary(self) -> T:
         """
         Gets summary of the model trained on the training set. An exception is thrown if
         no summary exists.
         """
-        return self._call_java("summary")
+        return cast("JavaWrapper", self)._call_java("summary")
 
 
 class MetaAlgorithmReadWrite:
     @staticmethod
-    def isMetaEstimator(pyInstance):
+    def isMetaEstimator(pyInstance: Any) -> bool:
         from pyspark.ml import Estimator, Pipeline
         from pyspark.ml.tuning import _ValidatorParams
         from pyspark.ml.classification import OneVsRest
@@ -645,13 +684,15 @@ def isMetaEstimator(pyInstance):
         )
 
     @staticmethod
-    def getAllNestedStages(pyInstance):
+    def getAllNestedStages(pyInstance: Any) -> List["PipelineStage"]:
         from pyspark.ml import Pipeline, PipelineModel
         from pyspark.ml.tuning import _ValidatorParams
         from pyspark.ml.classification import OneVsRest, OneVsRestModel
 
         # TODO: We need to handle `RFormulaModel.pipelineModel` here after Pyspark RFormulaModel
         #  support pipelineModel property.
+        pySubStages: List["PipelineStage"]
+
         if isinstance(pyInstance, Pipeline):
             pySubStages = pyInstance.getStages()
         elif isinstance(pyInstance, PipelineModel):
@@ -661,7 +702,9 @@ def getAllNestedStages(pyInstance):
         elif isinstance(pyInstance, OneVsRest):
             pySubStages = [pyInstance.getClassifier()]
         elif isinstance(pyInstance, OneVsRestModel):
-            pySubStages = [pyInstance.getClassifier()] + pyInstance.models
+            pySubStages = [
+                pyInstance.getClassifier()
+            ] + pyInstance.models  # type: ignore[assignment, operator]
         else:
             pySubStages = []
 
@@ -672,7 +715,7 @@ def getAllNestedStages(pyInstance):
         return [pyInstance] + nestedStages
 
     @staticmethod
-    def getUidMap(instance):
+    def getUidMap(instance: Any) -> Dict[str, "PipelineStage"]:
         nestedStages = MetaAlgorithmReadWrite.getAllNestedStages(instance)
         uidMap = {stage.uid: stage for stage in nestedStages}
         if len(nestedStages) != len(uidMap):
diff --git a/python/pyspark/ml/util.pyi b/python/pyspark/ml/util.pyi
deleted file mode 100644
index db28c095a5568..0000000000000
--- a/python/pyspark/ml/util.pyi
+++ /dev/null
@@ -1,136 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Any, Dict, Generic, Optional, Type, TypeVar, Union
-
-from pyspark import SparkContext as SparkContext, since as since  # noqa: F401
-from pyspark.ml.common import inherit_doc as inherit_doc  # noqa: F401
-from pyspark.sql import SparkSession as SparkSession
-from pyspark.util import VersionUtils as VersionUtils  # noqa: F401
-
-S = TypeVar("S")
-R = TypeVar("R", bound=MLReadable)
-
-class Identifiable:
-    uid: str
-    def __init__(self) -> None: ...
-
-class BaseReadWrite:
-    def __init__(self) -> None: ...
-    def session(self, sparkSession: SparkSession) -> Union[MLWriter, MLReader]: ...
-    @property
-    def sparkSession(self) -> SparkSession: ...
-    @property
-    def sc(self) -> SparkContext: ...
-
-class MLWriter(BaseReadWrite):
-    shouldOverwrite: bool = ...
-    def __init__(self) -> None: ...
-    def save(self, path: str) -> None: ...
-    def saveImpl(self, path: str) -> None: ...
-    def overwrite(self) -> MLWriter: ...
-
-class GeneralMLWriter(MLWriter):
-    source: str
-    def format(self, source: str) -> MLWriter: ...
-
-class JavaMLWriter(MLWriter):
-    def __init__(self, instance: JavaMLWritable) -> None: ...
-    def save(self, path: str) -> None: ...
-    def overwrite(self) -> JavaMLWriter: ...
-    def option(self, key: str, value: Any) -> JavaMLWriter: ...
-    def session(self, sparkSession: SparkSession) -> JavaMLWriter: ...
-
-class GeneralJavaMLWriter(JavaMLWriter):
-    def __init__(self, instance: MLWritable) -> None: ...
-    def format(self, source: str) -> GeneralJavaMLWriter: ...
-
-class MLWritable:
-    def write(self) -> MLWriter: ...
-    def save(self, path: str) -> None: ...
-
-class JavaMLWritable(MLWritable):
-    def write(self) -> JavaMLWriter: ...
-
-class GeneralJavaMLWritable(JavaMLWritable):
-    def write(self) -> GeneralJavaMLWriter: ...
-
-class MLReader(BaseReadWrite, Generic[R]):
-    def load(self, path: str) -> R: ...
-
-class JavaMLReader(MLReader[R]):
-    def __init__(self, clazz: Type[JavaMLReadable]) -> None: ...
-    def load(self, path: str) -> R: ...
-    def session(self, sparkSession: SparkSession) -> JavaMLReader[R]: ...
-
-class MLReadable(Generic[R]):
-    @classmethod
-    def read(cls: Type[R]) -> MLReader[R]: ...
-    @classmethod
-    def load(cls: Type[R], path: str) -> R: ...
-
-class JavaMLReadable(MLReadable[R]):
-    @classmethod
-    def read(cls: Type[R]) -> JavaMLReader[R]: ...
-
-class DefaultParamsWritable(MLWritable):
-    def write(self) -> MLWriter: ...
-
-class DefaultParamsWriter(MLWriter):
-    instance: DefaultParamsWritable
-    def __init__(self, instance: DefaultParamsWritable) -> None: ...
-    def saveImpl(self, path: str) -> None: ...
-    @staticmethod
-    def saveMetadata(
-        instance: DefaultParamsWritable,
-        path: str,
-        sc: SparkContext,
-        extraMetadata: Optional[Dict[str, Any]] = ...,
-        paramMap: Optional[Dict[str, Any]] = ...,
-    ) -> None: ...
-
-class DefaultParamsReadable(MLReadable[R]):
-    @classmethod
-    def read(cls: Type[R]) -> MLReader[R]: ...
-
-class DefaultParamsReader(MLReader[R]):
-    cls: Type[R]
-    def __init__(self, cls: Type[MLReadable]) -> None: ...
-    def load(self, path: str) -> R: ...
-    @staticmethod
-    def loadMetadata(
-        path: str, sc: SparkContext, expectedClassName: str = ...
-    ) -> Dict[str, Any]: ...
-    @staticmethod
-    def getAndSetParams(instance: R, metadata: Dict[str, Any]) -> None: ...
-    @staticmethod
-    def loadParamsInstance(path: str, sc: SparkContext) -> R: ...
-
-class HasTrainingSummary(Generic[S]):
-    @property
-    def hasSummary(self) -> bool: ...
-    @property
-    def summary(self) -> S: ...
-
-class MetaAlgorithmReadWrite:
-    @staticmethod
-    def isMetaEstimator(pyInstance: Any) -> bool: ...
-    @staticmethod
-    def getAllNestedStages(pyInstance: Any) -> list: ...
-    @staticmethod
-    def getUidMap(instance: Any) -> dict: ...
diff --git a/python/pyspark/ml/wrapper.pyi b/python/pyspark/ml/wrapper.pyi
index a238436eb17ec..7b3bfb4a7a36d 100644
--- a/python/pyspark/ml/wrapper.pyi
+++ b/python/pyspark/ml/wrapper.pyi
@@ -28,6 +28,7 @@ from pyspark.sql.dataframe import DataFrame
 class JavaWrapper:
     def __init__(self, java_obj: Optional[Any] = ...) -> None: ...
     def __del__(self) -> None: ...
+    def _call_java(self, name: str, *args: Any) -> Any: ...
 
 class JavaParams(JavaWrapper, Params, metaclass=abc.ABCMeta):
     def copy(self: P, extra: Optional[ParamMap] = ...) -> P: ...

From 0d56c947f10f747ab4b76426b2d6a34a1d3b8277 Mon Sep 17 00:00:00 2001
From: Tengfei Huang <tengfei.h@gmail.com>
Date: Sun, 6 Feb 2022 21:19:29 +0300
Subject: [PATCH 159/513] [SPARK-38105][SQL] Use error classes in the parsing
 errors of joins

### What changes were proposed in this pull request?
Migrate the following errors in QueryParsingErrors onto use error classes:
1. joinCriteriaUnimplementedError => throw IllegalStateException instead, since it should never happen and not visible to users, introduced by improving exhaustivity in [PR](https://github.com/apache/spark/pull/30455)
2. naturalCrossJoinUnsupportedError => UNSUPPORTED_FEATURE

### Why are the changes needed?
Porting join parsing errors to new error framework.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
UT added.

Closes #35405 from ivoson/SPARK-38105.

Authored-by: Tengfei Huang <tengfei.h@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 2 +-
 .../org/apache/spark/sql/errors/QueryParsingErrors.scala  | 6 +-----
 .../apache/spark/sql/errors/QueryParsingErrorsSuite.scala | 8 ++++++++
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index ed2623ebf420d..bd43cffc98dd0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1146,7 +1146,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
           case Some(c) if c.booleanExpression != null =>
             (baseJoinType, Option(expression(c.booleanExpression)))
           case Some(c) =>
-            throw QueryParsingErrors.joinCriteriaUnimplementedError(c, ctx)
+            throw new IllegalStateException(s"Unimplemented joinCriteria: $c")
           case None if join.NATURAL != null =>
             if (join.LATERAL != null) {
               throw QueryParsingErrors.lateralJoinWithNaturalJoinUnsupportedError(ctx)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index 6bcd20c19b336..6d7ed7be6760e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -129,12 +129,8 @@ object QueryParsingErrors {
     new ParseException(s"Cannot resolve window reference '$name'", ctx)
   }
 
-  def joinCriteriaUnimplementedError(join: JoinCriteriaContext, ctx: RelationContext): Throwable = {
-    new ParseException(s"Unimplemented joinCriteria: $join", ctx)
-  }
-
   def naturalCrossJoinUnsupportedError(ctx: RelationContext): Throwable = {
-    new ParseException("NATURAL CROSS JOIN is not supported", ctx)
+    new ParseException("UNSUPPORTED_FEATURE", Array("NATURAL CROSS JOIN."), ctx)
   }
 
   def emptyInputForTableSampleError(ctx: ParserRuleContext): Throwable = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
index 1a213bf835b15..03117b9608d0f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
@@ -78,4 +78,12 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession {
         message = "Invalid SQL syntax: LATERAL can only be used with subquery.")
     }
   }
+
+  test("UNSUPPORTED_FEATURE: NATURAL CROSS JOIN is not supported") {
+    validateParsingError(
+      sqlText = "SELECT * FROM a NATURAL CROSS JOIN b",
+      errorClass = "UNSUPPORTED_FEATURE",
+      sqlState = "0A000",
+      message = "The feature is not supported: NATURAL CROSS JOIN.")
+  }
 }

From 711d0b4aac45f5bb69f91a774e94063a769d31d0 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Mon, 7 Feb 2022 00:02:33 +0100
Subject: [PATCH 160/513] [SPARK-37416][PYTHON][ML] Inline type hints for
 pyspark.ml.wrapper

### What changes were proposed in this pull request?

This PR migrates type `pyspark.ml.wrapper` annotations from stub file to inline type hints.

### Why are the changes needed?

Part of ongoing migration of type hints.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #35399 from zero323/SPARK-37416.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/ml/classification.pyi |  16 +++-
 python/pyspark/ml/clustering.pyi     |   6 ++
 python/pyspark/ml/feature.pyi        |  23 +++++-
 python/pyspark/ml/fpm.pyi            |   3 +
 python/pyspark/ml/recommendation.pyi |   3 +
 python/pyspark/ml/regression.pyi     |  14 +++-
 python/pyspark/ml/wrapper.py         | 108 ++++++++++++++++++---------
 python/pyspark/ml/wrapper.pyi        |  51 -------------
 8 files changed, 133 insertions(+), 91 deletions(-)
 delete mode 100644 python/pyspark/ml/wrapper.pyi

diff --git a/python/pyspark/ml/classification.pyi b/python/pyspark/ml/classification.pyi
index 4170a8ca3db0c..89089a46936e3 100644
--- a/python/pyspark/ml/classification.pyi
+++ b/python/pyspark/ml/classification.pyi
@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, List, Optional, Type
+from typing import Any, Generic, List, Optional, Type
 from pyspark.ml._typing import JM, M, P, T, ParamMap
 
 import abc
@@ -69,6 +69,8 @@ from pyspark.ml.param import Param
 from pyspark.ml.regression import DecisionTreeRegressionModel
 from pyspark.sql.dataframe import DataFrame
 
+from py4j.java_gateway import JavaObject  # type: ignore[import]
+
 class _ClassifierParams(HasRawPredictionCol, _PredictorParams): ...
 
 class Classifier(Predictor, _ClassifierParams, metaclass=abc.ABCMeta):
@@ -96,7 +98,7 @@ class ProbabilisticClassificationModel(
     @abstractmethod
     def predictProbability(self, value: Vector) -> Vector: ...
 
-class _JavaClassifier(Classifier, JavaPredictor[JM], metaclass=abc.ABCMeta):
+class _JavaClassifier(Classifier, JavaPredictor[JM], Generic[JM], metaclass=abc.ABCMeta):
     def setRawPredictionCol(self: P, value: str) -> P: ...
 
 class _JavaClassificationModel(ClassificationModel, JavaPredictionModel[T]):
@@ -105,7 +107,7 @@ class _JavaClassificationModel(ClassificationModel, JavaPredictionModel[T]):
     def predictRaw(self, value: Vector) -> Vector: ...
 
 class _JavaProbabilisticClassifier(
-    ProbabilisticClassifier, _JavaClassifier[JM], metaclass=abc.ABCMeta
+    ProbabilisticClassifier, _JavaClassifier[JM], Generic[JM], metaclass=abc.ABCMeta
 ): ...
 
 class _JavaProbabilisticClassificationModel(
@@ -231,6 +233,7 @@ class LinearSVC(
     def setWeightCol(self, value: str) -> LinearSVC: ...
     def setAggregationDepth(self, value: int) -> LinearSVC: ...
     def setMaxBlockSizeInMB(self, value: float) -> LinearSVC: ...
+    def _create_model(self, java_model: JavaObject) -> LinearSVCModel: ...
 
 class LinearSVCModel(
     _JavaClassificationModel[Vector],
@@ -350,6 +353,7 @@ class LogisticRegression(
     def setWeightCol(self, value: str) -> LogisticRegression: ...
     def setAggregationDepth(self, value: int) -> LogisticRegression: ...
     def setMaxBlockSizeInMB(self, value: float) -> LogisticRegression: ...
+    def _create_model(self, java_model: JavaObject) -> LogisticRegressionModel: ...
 
 class LogisticRegressionModel(
     _JavaProbabilisticClassificationModel[Vector],
@@ -444,6 +448,7 @@ class DecisionTreeClassifier(
     def setCheckpointInterval(self, value: int) -> DecisionTreeClassifier: ...
     def setSeed(self, value: int) -> DecisionTreeClassifier: ...
     def setWeightCol(self, value: str) -> DecisionTreeClassifier: ...
+    def _create_model(self, java_model: JavaObject) -> DecisionTreeClassificationModel: ...
 
 class DecisionTreeClassificationModel(
     _DecisionTreeModel,
@@ -529,6 +534,7 @@ class RandomForestClassifier(
     def setCheckpointInterval(self, value: int) -> RandomForestClassifier: ...
     def setWeightCol(self, value: str) -> RandomForestClassifier: ...
     def setMinWeightFractionPerNode(self, value: float) -> RandomForestClassifier: ...
+    def _create_model(self, java_model: JavaObject) -> RandomForestClassificationModel: ...
 
 class RandomForestClassificationModel(
     _TreeEnsembleModel,
@@ -633,6 +639,7 @@ class GBTClassifier(
     def setStepSize(self, value: float) -> GBTClassifier: ...
     def setWeightCol(self, value: str) -> GBTClassifier: ...
     def setMinWeightFractionPerNode(self, value: float) -> GBTClassifier: ...
+    def _create_model(self, java_model: JavaObject) -> GBTClassificationModel: ...
 
 class GBTClassificationModel(
     _TreeEnsembleModel,
@@ -691,6 +698,7 @@ class NaiveBayes(
     def setSmoothing(self, value: float) -> NaiveBayes: ...
     def setModelType(self, value: str) -> NaiveBayes: ...
     def setWeightCol(self, value: str) -> NaiveBayes: ...
+    def _create_model(self, java_model: JavaObject) -> NaiveBayesModel: ...
 
 class NaiveBayesModel(
     _JavaProbabilisticClassificationModel[Vector],
@@ -769,6 +777,7 @@ class MultilayerPerceptronClassifier(
     def setTol(self, value: float) -> MultilayerPerceptronClassifier: ...
     def setStepSize(self, value: float) -> MultilayerPerceptronClassifier: ...
     def setSolver(self, value: str) -> MultilayerPerceptronClassifier: ...
+    def _create_model(self, java_model: JavaObject) -> MultilayerPerceptronClassificationModel: ...
 
 class MultilayerPerceptronClassificationModel(
     _JavaProbabilisticClassificationModel[Vector],
@@ -921,6 +930,7 @@ class FMClassifier(
     def setSeed(self, value: int) -> FMClassifier: ...
     def setFitIntercept(self, value: bool) -> FMClassifier: ...
     def setRegParam(self, value: float) -> FMClassifier: ...
+    def _create_model(self, java_model: JavaObject) -> FMClassificationModel: ...
 
 class FMClassificationModel(
     _JavaProbabilisticClassificationModel[Vector],
diff --git a/python/pyspark/ml/clustering.pyi b/python/pyspark/ml/clustering.pyi
index 81074fc285273..e0ee3d6394e7b 100644
--- a/python/pyspark/ml/clustering.pyi
+++ b/python/pyspark/ml/clustering.pyi
@@ -45,6 +45,8 @@ from pyspark.sql.dataframe import DataFrame
 
 from numpy import ndarray
 
+from py4j.java_gateway import JavaObject  # type: ignore[import]
+
 class ClusteringSummary(JavaWrapper):
     @property
     def predictionCol(self) -> str: ...
@@ -137,6 +139,7 @@ class GaussianMixture(
     def setSeed(self, value: int) -> GaussianMixture: ...
     def setTol(self, value: float) -> GaussianMixture: ...
     def setAggregationDepth(self, value: int) -> GaussianMixture: ...
+    def _create_model(self, java_model: JavaObject) -> GaussianMixtureModel: ...
 
 class GaussianMixtureSummary(ClusteringSummary):
     @property
@@ -219,6 +222,7 @@ class KMeans(JavaEstimator[KMeansModel], _KMeansParams, JavaMLWritable, JavaMLRe
     def setSeed(self, value: int) -> KMeans: ...
     def setTol(self, value: float) -> KMeans: ...
     def setWeightCol(self, value: str) -> KMeans: ...
+    def _create_model(self, java_model: JavaObject) -> KMeansModel: ...
 
 class _BisectingKMeansParams(
     HasMaxIter,
@@ -287,6 +291,7 @@ class BisectingKMeans(
     def setPredictionCol(self, value: str) -> BisectingKMeans: ...
     def setSeed(self, value: int) -> BisectingKMeans: ...
     def setWeightCol(self, value: str) -> BisectingKMeans: ...
+    def _create_model(self, java_model: JavaObject) -> BisectingKMeansModel: ...
 
 class BisectingKMeansSummary(ClusteringSummary):
     @property
@@ -386,6 +391,7 @@ class LDA(JavaEstimator[LDAModel], _LDAParams, JavaMLReadable[LDA], JavaMLWritab
     def setKeepLastCheckpoint(self, value: bool) -> LDA: ...
     def setMaxIter(self, value: int) -> LDA: ...
     def setFeaturesCol(self, value: str) -> LDA: ...
+    def _create_model(self, java_model: JavaObject) -> LDAModel: ...
 
 class _PowerIterationClusteringParams(HasMaxIter, HasWeightCol):
     k: Param[int]
diff --git a/python/pyspark/ml/feature.pyi b/python/pyspark/ml/feature.pyi
index 6efc304b897f4..ecfd26ebbadb7 100644
--- a/python/pyspark/ml/feature.pyi
+++ b/python/pyspark/ml/feature.pyi
@@ -42,6 +42,8 @@ from pyspark.ml.linalg import Vector, DenseVector, DenseMatrix
 from pyspark.sql.dataframe import DataFrame
 from pyspark.ml.param import Param
 
+from py4j.java_gateway import JavaObject  # type: ignore[import]
+
 class Binarizer(
     JavaTransformer,
     HasThreshold,
@@ -103,6 +105,7 @@ class _LSH(Generic[JM], JavaEstimator[JM], _LSHParams, JavaMLReadable, JavaMLWri
     def setNumHashTables(self: P, value: int) -> P: ...
     def setInputCol(self: P, value: str) -> P: ...
     def setOutputCol(self: P, value: str) -> P: ...
+    def _create_model(self, java_model: JavaObject) -> JM: ...
 
 class _LSHModel(JavaModel, _LSHParams):
     def setInputCol(self: P, value: str) -> P: ...
@@ -268,6 +271,7 @@ class CountVectorizer(
     def setBinary(self, value: bool) -> CountVectorizer: ...
     def setInputCol(self, value: str) -> CountVectorizer: ...
     def setOutputCol(self, value: str) -> CountVectorizer: ...
+    def _create_model(self, java_model: JavaObject) -> CountVectorizerModel: ...
 
 class CountVectorizerModel(JavaModel, JavaMLReadable[CountVectorizerModel], JavaMLWritable):
     def setInputCol(self, value: str) -> CountVectorizerModel: ...
@@ -412,6 +416,7 @@ class IDF(JavaEstimator[IDFModel], _IDFParams, JavaMLReadable[IDF], JavaMLWritab
     def setMinDocFreq(self, value: int) -> IDF: ...
     def setInputCol(self, value: str) -> IDF: ...
     def setOutputCol(self, value: str) -> IDF: ...
+    def _create_model(self, java_model: JavaObject) -> IDFModel: ...
 
 class IDFModel(JavaModel, _IDFParams, JavaMLReadable[IDFModel], JavaMLWritable):
     def setInputCol(self, value: str) -> IDFModel: ...
@@ -477,6 +482,7 @@ class Imputer(JavaEstimator[ImputerModel], _ImputerParams, JavaMLReadable[Impute
     def setInputCol(self, value: str) -> Imputer: ...
     def setOutputCol(self, value: str) -> Imputer: ...
     def setRelativeError(self, value: float) -> Imputer: ...
+    def _create_model(self, java_model: JavaObject) -> ImputerModel: ...
 
 class ImputerModel(JavaModel, _ImputerParams, JavaMLReadable[ImputerModel], JavaMLWritable):
     def setInputCols(self, value: List[str]) -> ImputerModel: ...
@@ -518,6 +524,7 @@ class MaxAbsScaler(
     ) -> MaxAbsScaler: ...
     def setInputCol(self, value: str) -> MaxAbsScaler: ...
     def setOutputCol(self, value: str) -> MaxAbsScaler: ...
+    def _create_model(self, java_model: JavaObject) -> MaxAbsScalerModel: ...
 
 class MaxAbsScalerModel(
     JavaModel, _MaxAbsScalerParams, JavaMLReadable[MaxAbsScalerModel], JavaMLWritable
@@ -588,6 +595,7 @@ class MinMaxScaler(
     def setMax(self, value: float) -> MinMaxScaler: ...
     def setInputCol(self, value: str) -> MinMaxScaler: ...
     def setOutputCol(self, value: str) -> MinMaxScaler: ...
+    def _create_model(self, java_model: JavaObject) -> MinMaxScalerModel: ...
 
 class MinMaxScalerModel(
     JavaModel, _MinMaxScalerParams, JavaMLReadable[MinMaxScalerModel], JavaMLWritable
@@ -687,6 +695,7 @@ class OneHotEncoder(
     def setHandleInvalid(self, value: str) -> OneHotEncoder: ...
     def setInputCol(self, value: str) -> OneHotEncoder: ...
     def setOutputCol(self, value: str) -> OneHotEncoder: ...
+    def _create_model(self, java_model: JavaObject) -> OneHotEncoderModel: ...
 
 class OneHotEncoderModel(
     JavaModel, _OneHotEncoderParams, JavaMLReadable[OneHotEncoderModel], JavaMLWritable
@@ -783,6 +792,7 @@ class QuantileDiscretizer(
     def setOutputCol(self, value: str) -> QuantileDiscretizer: ...
     def setOutputCols(self, value: List[str]) -> QuantileDiscretizer: ...
     def setHandleInvalid(self, value: str) -> QuantileDiscretizer: ...
+    def _create_model(self, java_model: JavaObject) -> Bucketizer: ...
 
 class _RobustScalerParams(HasInputCol, HasOutputCol, HasRelativeError):
     lower: Param[float]
@@ -827,6 +837,7 @@ class RobustScaler(
     def setInputCol(self, value: str) -> RobustScaler: ...
     def setOutputCol(self, value: str) -> RobustScaler: ...
     def setRelativeError(self, value: float) -> RobustScaler: ...
+    def _create_model(self, java_model: JavaObject) -> RobustScalerModel: ...
 
 class RobustScalerModel(
     JavaModel, _RobustScalerParams, JavaMLReadable[RobustScalerModel], JavaMLWritable
@@ -920,6 +931,7 @@ class StandardScaler(
     def setWithStd(self, value: bool) -> StandardScaler: ...
     def setInputCol(self, value: str) -> StandardScaler: ...
     def setOutputCol(self, value: str) -> StandardScaler: ...
+    def _create_model(self, java_model: JavaObject) -> StandardScalerModel: ...
 
 class StandardScalerModel(
     JavaModel,
@@ -990,6 +1002,7 @@ class StringIndexer(
     def setOutputCol(self, value: str) -> StringIndexer: ...
     def setOutputCols(self, value: List[str]) -> StringIndexer: ...
     def setHandleInvalid(self, value: str) -> StringIndexer: ...
+    def _create_model(self, java_model: JavaObject) -> StringIndexerModel: ...
 
 class StringIndexerModel(
     JavaModel, _StringIndexerParams, JavaMLReadable[StringIndexerModel], JavaMLWritable
@@ -1186,6 +1199,7 @@ class VectorIndexer(
     def setInputCol(self, value: str) -> VectorIndexer: ...
     def setOutputCol(self, value: str) -> VectorIndexer: ...
     def setHandleInvalid(self, value: str) -> VectorIndexer: ...
+    def _create_model(self, java_model: JavaObject) -> VectorIndexerModel: ...
 
 class VectorIndexerModel(
     JavaModel, _VectorIndexerParams, JavaMLReadable[VectorIndexerModel], JavaMLWritable
@@ -1286,6 +1300,7 @@ class Word2Vec(
     def setOutputCol(self, value: str) -> Word2Vec: ...
     def setSeed(self, value: int) -> Word2Vec: ...
     def setStepSize(self, value: float) -> Word2Vec: ...
+    def _create_model(self, java_model: JavaObject) -> Word2VecModel: ...
 
 class Word2VecModel(JavaModel, _Word2VecParams, JavaMLReadable[Word2VecModel], JavaMLWritable):
     def getVectors(self) -> DataFrame: ...
@@ -1322,6 +1337,7 @@ class PCA(JavaEstimator[PCAModel], _PCAParams, JavaMLReadable[PCA], JavaMLWritab
     def setK(self, value: int) -> PCA: ...
     def setInputCol(self, value: str) -> PCA: ...
     def setOutputCol(self, value: str) -> PCA: ...
+    def _create_model(self, java_model: JavaObject) -> PCAModel: ...
 
 class PCAModel(JavaModel, _PCAParams, JavaMLReadable[PCAModel], JavaMLWritable):
     def setInputCol(self, value: str) -> PCAModel: ...
@@ -1373,6 +1389,7 @@ class RFormula(
     def setFeaturesCol(self, value: str) -> RFormula: ...
     def setLabelCol(self, value: str) -> RFormula: ...
     def setHandleInvalid(self, value: str) -> RFormula: ...
+    def _create_model(self, java_model: JavaObject) -> RFormulaModel: ...
 
 class RFormulaModel(JavaModel, _RFormulaParams, JavaMLReadable[RFormulaModel], JavaMLWritable): ...
 
@@ -1391,7 +1408,7 @@ class _SelectorParams(HasFeaturesCol, HasOutputCol, HasLabelCol):
     def getFdr(self) -> float: ...
     def getFwe(self) -> float: ...
 
-class _Selector(JavaEstimator[JM], _SelectorParams, JavaMLReadable, JavaMLWritable):
+class _Selector(JavaEstimator[JM], _SelectorParams, JavaMLReadable, JavaMLWritable, Generic[JM]):
     def setSelectorType(self: P, value: str) -> P: ...
     def setNumTopFeatures(self: P, value: int) -> P: ...
     def setPercentile(self: P, value: float) -> P: ...
@@ -1401,6 +1418,7 @@ class _Selector(JavaEstimator[JM], _SelectorParams, JavaMLReadable, JavaMLWritab
     def setFeaturesCol(self: P, value: str) -> P: ...
     def setOutputCol(self: P, value: str) -> P: ...
     def setLabelCol(self: P, value: str) -> P: ...
+    def _create_model(self, java_model: JavaObject) -> JM: ...
 
 class _SelectorModel(JavaModel, _SelectorParams):
     def setFeaturesCol(self: P, value: str) -> P: ...
@@ -1448,6 +1466,7 @@ class ChiSqSelector(
     def setFeaturesCol(self, value: str) -> ChiSqSelector: ...
     def setOutputCol(self, value: str) -> ChiSqSelector: ...
     def setLabelCol(self, value: str) -> ChiSqSelector: ...
+    def _create_model(self, java_model: JavaObject) -> ChiSqSelectorModel: ...
 
 class ChiSqSelectorModel(_SelectorModel, JavaMLReadable[ChiSqSelectorModel], JavaMLWritable):
     def setFeaturesCol(self, value: str) -> ChiSqSelectorModel: ...
@@ -1500,6 +1519,7 @@ class VarianceThresholdSelector(
     def setVarianceThreshold(self, value: float) -> VarianceThresholdSelector: ...
     def setFeaturesCol(self, value: str) -> VarianceThresholdSelector: ...
     def setOutputCol(self, value: str) -> VarianceThresholdSelector: ...
+    def _create_model(self, java_model: JavaObject) -> VarianceThresholdSelectorModel: ...
 
 class VarianceThresholdSelectorModel(
     JavaModel,
@@ -1552,6 +1572,7 @@ class UnivariateFeatureSelector(
     def setFeaturesCol(self, value: str) -> UnivariateFeatureSelector: ...
     def setOutputCol(self, value: str) -> UnivariateFeatureSelector: ...
     def setLabelCol(self, value: str) -> UnivariateFeatureSelector: ...
+    def _create_model(self, java_model: JavaObject) -> UnivariateFeatureSelectorModel: ...
 
 class UnivariateFeatureSelectorModel(
     JavaModel,
diff --git a/python/pyspark/ml/fpm.pyi b/python/pyspark/ml/fpm.pyi
index 609bc447735b7..00d5c5fe6b055 100644
--- a/python/pyspark/ml/fpm.pyi
+++ b/python/pyspark/ml/fpm.pyi
@@ -25,6 +25,8 @@ from pyspark.sql.dataframe import DataFrame
 
 from pyspark.ml.param import Param
 
+from py4j.java_gateway import JavaObject  # type: ignore[import]
+
 class _FPGrowthParams(HasPredictionCol):
     itemsCol: Param[str]
     minSupport: Param[float]
@@ -74,6 +76,7 @@ class FPGrowth(
     def setNumPartitions(self, value: int) -> FPGrowth: ...
     def setMinConfidence(self, value: float) -> FPGrowth: ...
     def setPredictionCol(self, value: str) -> FPGrowth: ...
+    def _create_model(self, java_model: JavaObject) -> FPGrowthModel: ...
 
 class PrefixSpan(JavaParams):
     minSupport: Param[float]
diff --git a/python/pyspark/ml/recommendation.pyi b/python/pyspark/ml/recommendation.pyi
index 6ce178b9d71b1..f7faacaf48b29 100644
--- a/python/pyspark/ml/recommendation.pyi
+++ b/python/pyspark/ml/recommendation.pyi
@@ -36,6 +36,8 @@ from pyspark.ml.util import JavaMLWritable, JavaMLReadable
 
 from pyspark.sql.dataframe import DataFrame
 
+from py4j.java_gateway import JavaObject  # type: ignore[import]
+
 class _ALSModelParams(HasPredictionCol, HasBlockSize):
     userCol: Param[str]
     itemCol: Param[str]
@@ -127,6 +129,7 @@ class ALS(JavaEstimator[ALSModel], _ALSParams, JavaMLWritable, JavaMLReadable[AL
     def setCheckpointInterval(self, value: int) -> ALS: ...
     def setSeed(self, value: int) -> ALS: ...
     def setBlockSize(self, value: int) -> ALS: ...
+    def _create_model(self, java_model: JavaObject) -> ALSModel: ...
 
 class ALSModel(JavaModel, _ALSModelParams, JavaMLWritable, JavaMLReadable[ALSModel]):
     def setUserCol(self, value: str) -> ALSModel: ...
diff --git a/python/pyspark/ml/regression.pyi b/python/pyspark/ml/regression.pyi
index 3b553b1401819..750e4c7223b84 100644
--- a/python/pyspark/ml/regression.pyi
+++ b/python/pyspark/ml/regression.pyi
@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, List, Optional
+from typing import Any, Generic, List, Optional
 from pyspark.ml._typing import JM, M, T
 
 import abc
@@ -67,9 +67,11 @@ from pyspark.ml.linalg import Matrix, Vector
 from pyspark.ml.param import Param
 from pyspark.sql.dataframe import DataFrame
 
+from py4j.java_gateway import JavaObject  # type: ignore[import]
+
 class Regressor(Predictor[M], _PredictorParams, metaclass=abc.ABCMeta): ...
 class RegressionModel(PredictionModel[T], _PredictorParams, metaclass=abc.ABCMeta): ...
-class _JavaRegressor(Regressor, JavaPredictor[JM], metaclass=abc.ABCMeta): ...
+class _JavaRegressor(Regressor, JavaPredictor[JM], Generic[JM], metaclass=abc.ABCMeta): ...
 class _JavaRegressionModel(RegressionModel, JavaPredictionModel[T], metaclass=abc.ABCMeta): ...
 
 class _LinearRegressionParams(
@@ -146,6 +148,7 @@ class LinearRegression(
     def setAggregationDepth(self, value: int) -> LinearRegression: ...
     def setLoss(self, value: str) -> LinearRegression: ...
     def setMaxBlockSizeInMB(self, value: float) -> LinearRegression: ...
+    def _create_model(self, java_model: JavaObject) -> LinearRegressionModel: ...
 
 class LinearRegressionModel(
     _JavaRegressionModel[Vector],
@@ -241,6 +244,7 @@ class IsotonicRegression(
     def setPredictionCol(self, value: str) -> IsotonicRegression: ...
     def setLabelCol(self, value: str) -> IsotonicRegression: ...
     def setWeightCol(self, value: str) -> IsotonicRegression: ...
+    def _create_model(self, java_model: JavaObject) -> IsotonicRegressionModel: ...
 
 class IsotonicRegressionModel(
     JavaModel,
@@ -320,6 +324,7 @@ class DecisionTreeRegressor(
     def setSeed(self, value: int) -> DecisionTreeRegressor: ...
     def setWeightCol(self, value: str) -> DecisionTreeRegressor: ...
     def setVarianceCol(self, value: str) -> DecisionTreeRegressor: ...
+    def _create_model(self, java_model: JavaObject) -> DecisionTreeRegressionModel: ...
 
 class DecisionTreeRegressionModel(
     _JavaRegressionModel[Vector],
@@ -402,6 +407,7 @@ class RandomForestRegressor(
     def setSeed(self, value: int) -> RandomForestRegressor: ...
     def setWeightCol(self, value: str) -> RandomForestRegressor: ...
     def setMinWeightFractionPerNode(self, value: float) -> RandomForestRegressor: ...
+    def _create_model(self, java_model: JavaObject) -> RandomForestRegressionModel: ...
 
 class RandomForestRegressionModel(
     _JavaRegressionModel[Vector],
@@ -496,6 +502,7 @@ class GBTRegressor(
     def setStepSize(self, value: float) -> GBTRegressor: ...
     def setWeightCol(self, value: str) -> GBTRegressor: ...
     def setMinWeightFractionPerNode(self, value: float) -> GBTRegressor: ...
+    def _create_model(self, java_model: JavaObject) -> GBTRegressionModel: ...
 
 class GBTRegressionModel(
     _JavaRegressionModel[Vector],
@@ -570,6 +577,7 @@ class AFTSurvivalRegression(
     def setFitIntercept(self, value: bool) -> AFTSurvivalRegression: ...
     def setAggregationDepth(self, value: int) -> AFTSurvivalRegression: ...
     def setMaxBlockSizeInMB(self, value: float) -> AFTSurvivalRegression: ...
+    def _create_model(self, java_model: JavaObject) -> AFTSurvivalRegressionModel: ...
 
 class AFTSurvivalRegressionModel(
     _JavaRegressionModel[Vector],
@@ -672,6 +680,7 @@ class GeneralizedLinearRegression(
     def setWeightCol(self, value: str) -> GeneralizedLinearRegression: ...
     def setSolver(self, value: str) -> GeneralizedLinearRegression: ...
     def setAggregationDepth(self, value: int) -> GeneralizedLinearRegression: ...
+    def _create_model(self, java_model: JavaObject) -> GeneralizedLinearRegressionModel: ...
 
 class GeneralizedLinearRegressionModel(
     _JavaRegressionModel[Vector],
@@ -802,6 +811,7 @@ class FMRegressor(
     def setSeed(self, value: int) -> FMRegressor: ...
     def setFitIntercept(self, value: bool) -> FMRegressor: ...
     def setRegParam(self, value: float) -> FMRegressor: ...
+    def _create_model(self, java_model: JavaObject) -> FMRegressionModel: ...
 
 class FMRegressionModel(
     _JavaRegressionModel,
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index c35df2e5b6ef1..7f03f64ef7176 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -17,49 +17,68 @@
 
 from abc import ABCMeta, abstractmethod
 
+from typing import Any, Generic, Optional, List, Type, TypeVar, TYPE_CHECKING
+
 from pyspark import since
 from pyspark import SparkContext
 from pyspark.sql import DataFrame
 from pyspark.ml import Estimator, Predictor, PredictionModel, Transformer, Model
 from pyspark.ml.base import _PredictorParams
-from pyspark.ml.param import Params
-from pyspark.ml.util import _jvm
+from pyspark.ml.param import Param, Params
+from pyspark.ml.util import _jvm  # type: ignore[attr-defined]
 from pyspark.ml.common import inherit_doc, _java2py, _py2java
 
 
+if TYPE_CHECKING:
+    from pyspark.ml._typing import ParamMap
+    from py4j.java_gateway import JavaObject, JavaClass
+
+
+T = TypeVar("T")
+JW = TypeVar("JW", bound="JavaWrapper")
+JM = TypeVar("JM", bound="JavaTransformer")
+JP = TypeVar("JP", bound="JavaParams")
+
+
 class JavaWrapper:
     """
     Wrapper class for a Java companion object
     """
 
-    def __init__(self, java_obj=None):
+    def __init__(self, java_obj: Optional["JavaObject"] = None):
         super(JavaWrapper, self).__init__()
         self._java_obj = java_obj
 
-    def __del__(self):
+    def __del__(self) -> None:
         if SparkContext._active_spark_context and self._java_obj is not None:
-            SparkContext._active_spark_context._gateway.detach(self._java_obj)
+            SparkContext._active_spark_context._gateway.detach(  # type: ignore[union-attr]
+                self._java_obj
+            )
 
     @classmethod
-    def _create_from_java_class(cls, java_class, *args):
+    def _create_from_java_class(cls: Type[JW], java_class: str, *args: Any) -> JW:
         """
         Construct this object from given Java classname and arguments
         """
         java_obj = JavaWrapper._new_java_obj(java_class, *args)
         return cls(java_obj)
 
-    def _call_java(self, name, *args):
+    def _call_java(self, name: str, *args: Any) -> Any:
         m = getattr(self._java_obj, name)
         sc = SparkContext._active_spark_context
+        assert sc is not None
+
         java_args = [_py2java(sc, arg) for arg in args]
         return _java2py(sc, m(*java_args))
 
     @staticmethod
-    def _new_java_obj(java_class, *args):
+    def _new_java_obj(java_class: str, *args: Any) -> "JavaObject":
         """
         Returns a new Java object.
         """
         sc = SparkContext._active_spark_context
+        assert sc is not None
+
         java_obj = _jvm()
         for name in java_class.split("."):
             java_obj = getattr(java_obj, name)
@@ -67,7 +86,7 @@ def _new_java_obj(java_class, *args):
         return java_obj(*java_args)
 
     @staticmethod
-    def _new_java_array(pylist, java_class):
+    def _new_java_array(pylist: List[Any], java_class: "JavaClass") -> "JavaObject":
         """
         Create a Java array of given java_class type. Useful for
         calling a method with a Scala Array from Python with Py4J.
@@ -97,6 +116,9 @@ def _new_java_array(pylist, java_class):
           Java Array of converted pylist.
         """
         sc = SparkContext._active_spark_context
+        assert sc is not None
+        assert sc._gateway is not None
+
         java_array = None
         if len(pylist) > 0 and isinstance(pylist[0], list):
             # If pylist is a 2D array, then a 2D java array will be created.
@@ -125,20 +147,24 @@ class JavaParams(JavaWrapper, Params, metaclass=ABCMeta):
     #: The param values in the Java object should be
     #: synced with the Python wrapper in fit/transform/evaluate/copy.
 
-    def _make_java_param_pair(self, param, value):
+    def _make_java_param_pair(self, param: Param[T], value: T) -> "JavaObject":
         """
         Makes a Java param pair.
         """
         sc = SparkContext._active_spark_context
+        assert sc is not None and self._java_obj is not None
+
         param = self._resolveParam(param)
         java_param = self._java_obj.getParam(param.name)
         java_value = _py2java(sc, value)
         return java_param.w(java_value)
 
-    def _transfer_params_to_java(self):
+    def _transfer_params_to_java(self) -> None:
         """
         Transforms the embedded params to the companion Java object.
         """
+        assert self._java_obj is not None
+
         pair_defaults = []
         for param in self.params:
             if self.isSet(param):
@@ -149,10 +175,12 @@ def _transfer_params_to_java(self):
                 pair_defaults.append(pair)
         if len(pair_defaults) > 0:
             sc = SparkContext._active_spark_context
+            assert sc is not None and sc._jvm is not None
+
             pair_defaults_seq = sc._jvm.PythonUtils.toSeq(pair_defaults)
             self._java_obj.setDefault(pair_defaults_seq)
 
-    def _transfer_param_map_to_java(self, pyParamMap):
+    def _transfer_param_map_to_java(self, pyParamMap: "ParamMap") -> "JavaObject":
         """
         Transforms a Python ParamMap into a Java ParamMap.
         """
@@ -163,26 +191,30 @@ def _transfer_param_map_to_java(self, pyParamMap):
                 paramMap.put([pair])
         return paramMap
 
-    def _create_params_from_java(self):
+    def _create_params_from_java(self) -> None:
         """
         SPARK-10931: Temporary fix to create params that are defined in the Java obj but not here
         """
+        assert self._java_obj is not None
+
         java_params = list(self._java_obj.params())
         from pyspark.ml.param import Param
 
         for java_param in java_params:
             java_param_name = java_param.name()
             if not hasattr(self, java_param_name):
-                param = Param(self, java_param_name, java_param.doc())
+                param: Param[Any] = Param(self, java_param_name, java_param.doc())
                 setattr(param, "created_from_java_param", True)
                 setattr(self, java_param_name, param)
                 self._params = None  # need to reset so self.params will discover new params
 
-    def _transfer_params_from_java(self):
+    def _transfer_params_from_java(self) -> None:
         """
         Transforms the embedded params from the companion Java object.
         """
         sc = SparkContext._active_spark_context
+        assert sc is not None and self._java_obj is not None
+
         for param in self.params:
             if self._java_obj.hasParam(param.name):
                 java_param = self._java_obj.getParam(param.name)
@@ -195,11 +227,13 @@ def _transfer_params_from_java(self):
                     value = _java2py(sc, self._java_obj.getDefault(java_param)).get()
                     self._setDefault(**{param.name: value})
 
-    def _transfer_param_map_from_java(self, javaParamMap):
+    def _transfer_param_map_from_java(self, javaParamMap: "JavaObject") -> "ParamMap":
         """
         Transforms a Java ParamMap into a Python ParamMap.
         """
         sc = SparkContext._active_spark_context
+        assert sc is not None
+
         paramMap = dict()
         for pair in javaParamMap.toList():
             param = pair.param()
@@ -208,13 +242,13 @@ def _transfer_param_map_from_java(self, javaParamMap):
         return paramMap
 
     @staticmethod
-    def _empty_java_param_map():
+    def _empty_java_param_map() -> "JavaObject":
         """
         Returns an empty Java ParamMap reference.
         """
         return _jvm().org.apache.spark.ml.param.ParamMap()
 
-    def _to_java(self):
+    def _to_java(self) -> "JavaObject":
         """
         Transfer this instance's Params to the wrapped Java object, and return the Java object.
         Used for ML persistence.
@@ -230,7 +264,7 @@ def _to_java(self):
         return self._java_obj
 
     @staticmethod
-    def _from_java(java_stage):
+    def _from_java(java_stage: "JavaObject") -> "JP":
         """
         Given a Java object, create and return a Python wrapper of it.
         Used for ML persistence.
@@ -238,7 +272,7 @@ def _from_java(java_stage):
         Meta-algorithms such as Pipeline should override this method as a classmethod.
         """
 
-        def __get_class(clazz):
+        def __get_class(clazz: str) -> Type[JP]:
             """
             Loads Python class from its name.
             """
@@ -271,7 +305,7 @@ def __get_class(clazz):
             )
         return py_stage
 
-    def copy(self, extra=None):
+    def copy(self: "JP", extra: Optional["ParamMap"] = None) -> "JP":
         """
         Creates a copy of this instance with the same uid and some
         extra params. This implementation first calls Params.copy and
@@ -297,30 +331,32 @@ def copy(self, extra=None):
             that._transfer_params_to_java()
         return that
 
-    def clear(self, param):
+    def clear(self, param: Param) -> None:
         """
         Clears a param from the param map if it has been explicitly set.
         """
+        assert self._java_obj is not None
+
         super(JavaParams, self).clear(param)
         java_param = self._java_obj.getParam(param.name)
         self._java_obj.clear(java_param)
 
 
 @inherit_doc
-class JavaEstimator(JavaParams, Estimator, metaclass=ABCMeta):
+class JavaEstimator(JavaParams, Estimator[JM], metaclass=ABCMeta):
     """
     Base class for :py:class:`Estimator`s that wrap Java/Scala
     implementations.
     """
 
     @abstractmethod
-    def _create_model(self, java_model):
+    def _create_model(self, java_model: "JavaObject") -> JM:
         """
         Creates a model from the input Java model reference.
         """
         raise NotImplementedError()
 
-    def _fit_java(self, dataset):
+    def _fit_java(self, dataset: DataFrame) -> "JavaObject":
         """
         Fits a Java model to the input dataset.
 
@@ -334,10 +370,12 @@ def _fit_java(self, dataset):
         py4j.java_gateway.JavaObject
             fitted Java model
         """
+        assert self._java_obj is not None
+
         self._transfer_params_to_java()
         return self._java_obj.fit(dataset._jdf)
 
-    def _fit(self, dataset):
+    def _fit(self, dataset: DataFrame) -> JM:
         java_model = self._fit_java(dataset)
         model = self._create_model(java_model)
         return self._copyValues(model)
@@ -351,7 +389,9 @@ class JavaTransformer(JavaParams, Transformer, metaclass=ABCMeta):
     available as _java_obj.
     """
 
-    def _transform(self, dataset):
+    def _transform(self, dataset: DataFrame) -> DataFrame:
+        assert self._java_obj is not None
+
         self._transfer_params_to_java()
         return DataFrame(self._java_obj.transform(dataset._jdf), dataset.sql_ctx)
 
@@ -364,7 +404,7 @@ class JavaModel(JavaTransformer, Model, metaclass=ABCMeta):
     param mix-ins, because this sets the UID from the Java model.
     """
 
-    def __init__(self, java_model=None):
+    def __init__(self, java_model: Optional["JavaObject"] = None):
         """
         Initialize this instance with a Java model object.
         Subclasses should call this constructor, initialize params,
@@ -388,12 +428,12 @@ def __init__(self, java_model=None):
 
             self._resetUid(java_model.uid())
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return self._call_java("toString")
 
 
 @inherit_doc
-class JavaPredictor(Predictor, JavaEstimator, _PredictorParams, metaclass=ABCMeta):
+class JavaPredictor(Predictor, JavaEstimator[JM], _PredictorParams, Generic[JM], metaclass=ABCMeta):
     """
     (Private) Java Estimator for prediction tasks (regression and classification).
     """
@@ -402,21 +442,21 @@ class JavaPredictor(Predictor, JavaEstimator, _PredictorParams, metaclass=ABCMet
 
 
 @inherit_doc
-class JavaPredictionModel(PredictionModel, JavaModel, _PredictorParams):
+class JavaPredictionModel(PredictionModel[T], JavaModel, _PredictorParams):
     """
     (Private) Java Model for prediction tasks (regression and classification).
     """
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.1.0")
-    def numFeatures(self):
+    def numFeatures(self) -> int:
         """
         Returns the number of features the model was trained on. If unknown, returns -1
         """
         return self._call_java("numFeatures")
 
     @since("3.0.0")
-    def predict(self, value):
+    def predict(self, value: T) -> float:
         """
         Predict label for the given features.
         """
diff --git a/python/pyspark/ml/wrapper.pyi b/python/pyspark/ml/wrapper.pyi
deleted file mode 100644
index 7b3bfb4a7a36d..0000000000000
--- a/python/pyspark/ml/wrapper.pyi
+++ /dev/null
@@ -1,51 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import abc
-from typing import Any, Optional, Generic
-from pyspark.ml._typing import P, T, JM, ParamMap
-
-from pyspark.ml import Estimator, Predictor, PredictionModel, Transformer, Model
-from pyspark.ml.base import _PredictorParams
-from pyspark.ml.param import Param, Params
-from pyspark.sql.dataframe import DataFrame
-
-class JavaWrapper:
-    def __init__(self, java_obj: Optional[Any] = ...) -> None: ...
-    def __del__(self) -> None: ...
-    def _call_java(self, name: str, *args: Any) -> Any: ...
-
-class JavaParams(JavaWrapper, Params, metaclass=abc.ABCMeta):
-    def copy(self: P, extra: Optional[ParamMap] = ...) -> P: ...
-    def clear(self, param: Param) -> None: ...
-
-class JavaEstimator(Generic[JM], JavaParams, Estimator[JM], metaclass=abc.ABCMeta):
-    def _fit(self, dataset: DataFrame) -> JM: ...
-
-class JavaTransformer(JavaParams, Transformer, metaclass=abc.ABCMeta):
-    def _transform(self, dataset: DataFrame) -> DataFrame: ...
-
-class JavaModel(JavaTransformer, Model, metaclass=abc.ABCMeta):
-    def __init__(self, java_model: Optional[Any] = ...) -> None: ...
-
-class JavaPredictor(Predictor[JM], JavaEstimator, _PredictorParams, metaclass=abc.ABCMeta): ...
-
-class JavaPredictionModel(PredictionModel[T], JavaModel, _PredictorParams):
-    @property
-    def numFeatures(self) -> int: ...
-    def predict(self, value: T) -> float: ...

From 26138a4a3a0968885de5316108f0d1164139e357 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Mon, 7 Feb 2022 13:49:41 +0800
Subject: [PATCH 161/513] [SPARK-38122][DOCS] Update the App Key of DocSearch

### What changes were proposed in this pull request?

Update the App Key of DocSearch as per the comment https://github.com/algolia/docsearch-configs/pull/5054#issuecomment-1025483446

### Why are the changes needed?

DocSearch uses new infra now: https://docsearch.algolia.com/docs/migrating-from-legacy/
The new version allows us to manage indexes, but we have to update the App Key and App id in configuration to make it work.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manual verified in local setup.

Closes #35411 from gengliangwang/updateDocSearch.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 docs/_config.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/_config.yml b/docs/_config.yml
index d234c08bd62d5..8ac911a1fe400 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -34,7 +34,8 @@ SPARK_GITHUB_URL: https://github.com/apache/spark
 # (https://spark.apache.org/docs/latest/) even when visiting the documentation of previous releases.
 DOCSEARCH_SCRIPT: |
   docsearch({
-      apiKey: 'b18ca3732c502995563043aa17bc6ecb',
+      apiKey: 'd62f962a82bc9abb53471cb7b89da35e',
+      appId: 'RAI69RXRSK',
       indexName: 'apache_spark',
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,

From 93a4ed042feb0d82781bcc7578e9820355061609 Mon Sep 17 00:00:00 2001
From: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
Date: Sun, 6 Feb 2022 23:24:46 -0800
Subject: [PATCH 162/513] [SPARK-37735][K8S][TESTS][FOLLOWUP] Remove casting to
 KubernetesConf in tests

### What changes were proposed in this pull request?

Minor simplification:

There is no need to cast
org.apache.spark.deploy.k8s.KubernetesDriverConf/KubernetesExecutorConf to org.apache.spark.deploy.k8s.KubernetesConf in KubernetesConfSuite

Related-to: https://github.com/apache/spark/pull/35015

### Why are the changes needed?

Small simplification of the test code.

### Does this PR introduce _any_ user-facing change?

No!

### How was this patch tested?

Build and test KubernetesConfSuite#test("SPARK-37735: access appId in KubernetesConf") {

Closes #35413 from martin-g/spark-37735-remove-casting.

Authored-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/deploy/k8s/KubernetesConfSuite.scala     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
index 1b3aaa579c621..d0a222df40bc1 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
@@ -222,8 +222,8 @@ class KubernetesConfSuite extends SparkFunSuite {
     val sparkConf = new SparkConf(false)
     val driverConf = KubernetesTestConf.createDriverConf(sparkConf)
     val execConf = KubernetesTestConf.createExecutorConf(sparkConf)
-    assert(driverConf.asInstanceOf[KubernetesConf].appId === KubernetesTestConf.APP_ID)
-    assert(execConf.asInstanceOf[KubernetesConf].appId === KubernetesTestConf.APP_ID)
+    assert(driverConf.appId === KubernetesTestConf.APP_ID)
+    assert(execConf.appId === KubernetesTestConf.APP_ID)
   }
 
   test("SPARK-36566: get app name label") {

From e98f13e4a4eaf4719e85ba881c894cbc8377c363 Mon Sep 17 00:00:00 2001
From: Jerry Peng <jerry.peng@databricks.com>
Date: Mon, 7 Feb 2022 17:45:26 +0900
Subject: [PATCH 163/513] [SPARK-38046][SS][TEST] Fix
 KafkaSource/KafkaMicroBatch flaky test due to non-deterministic timing

### What changes were proposed in this pull request?

Fix a flaky test in KafkaMicroBatchSourceSuite

### Why are the changes needed?

There is a test call "compositeReadLimit"

https://github.com/apache/spark/blob/master/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala#L460

that is flaky.  The problem is because the Kakfa connector is always getting the actual system time and not advancing it manually, thus leaving room for non-deterministic behaviors especially since the source determines if "maxTriggerDelayMs" is satisfied by comparing the last trigger time with the current system time.  One can simply "sleep" at points in the test to generate different outcomes.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

Closes #35343 from jerrypeng/SPARK-38046.

Authored-by: Jerry Peng <jerry.peng@databricks.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../sql/kafka010/KafkaMicroBatchStream.scala  | 37 +++++++++++++++++--
 .../spark/sql/kafka010/KafkaSource.scala      | 15 ++++++--
 .../sql/kafka010/KafkaSourceProvider.scala    |  2 +
 .../kafka010/KafkaMicroBatchSourceSuite.scala | 18 ++++++---
 4 files changed, 59 insertions(+), 13 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
index 829ee15c13a3d..77bc658a1ef20 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
@@ -31,8 +31,9 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory}
 import org.apache.spark.sql.connector.read.streaming._
 import org.apache.spark.sql.kafka010.KafkaSourceProvider._
+import org.apache.spark.sql.kafka010.MockedSystemClock.currentMockSystemTime
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
-import org.apache.spark.util.{UninterruptibleThread, Utils}
+import org.apache.spark.util.{Clock, ManualClock, SystemClock, UninterruptibleThread, Utils}
 
 /**
  * A [[MicroBatchStream]] that reads data from Kafka.
@@ -73,6 +74,13 @@ private[kafka010] class KafkaMicroBatchStream(
     Utils.timeStringAsMs(Option(options.get(
       KafkaSourceProvider.MAX_TRIGGER_DELAY)).getOrElse(DEFAULT_MAX_TRIGGER_DELAY))
 
+  // this allows us to mock system clock for testing purposes
+  private[kafka010] val clock: Clock = if (options.containsKey(MOCK_SYSTEM_TIME)) {
+    new MockedSystemClock
+  } else {
+    new SystemClock
+  }
+
   private var lastTriggerMillis = 0L
 
   private val includeHeaders = options.getBoolean(INCLUDE_HEADERS, false)
@@ -166,9 +174,9 @@ private[kafka010] class KafkaMicroBatchStream(
       currentOffsets: Map[TopicPartition, Long],
       maxTriggerDelayMs: Long): Boolean = {
     // Checking first if the maxbatchDelay time has passed
-    if ((System.currentTimeMillis() - lastTriggerMillis) >= maxTriggerDelayMs) {
+    if ((clock.getTimeMillis() - lastTriggerMillis) >= maxTriggerDelayMs) {
       logDebug("Maximum wait time is passed, triggering batch")
-      lastTriggerMillis = System.currentTimeMillis()
+      lastTriggerMillis = clock.getTimeMillis()
       false
     } else {
       val newRecords = latestOffsets.flatMap {
@@ -176,7 +184,7 @@ private[kafka010] class KafkaMicroBatchStream(
           Some(topic -> (offset - currentOffsets.getOrElse(topic, 0L)))
       }.values.sum.toDouble
       if (newRecords < minLimit) true else {
-        lastTriggerMillis = System.currentTimeMillis()
+        lastTriggerMillis = clock.getTimeMillis()
         false
       }
     }
@@ -347,3 +355,24 @@ object KafkaMicroBatchStream extends Logging {
     ju.Collections.emptyMap()
   }
 }
+
+/**
+ * To return a mocked system clock for testing purposes
+ */
+private[kafka010] class MockedSystemClock extends ManualClock {
+  override def getTimeMillis(): Long = {
+    currentMockSystemTime
+  }
+}
+
+private[kafka010] object MockedSystemClock {
+  var currentMockSystemTime = 0L
+
+  def advanceCurrentSystemTime(advanceByMillis: Long): Unit = {
+    currentMockSystemTime += advanceByMillis
+  }
+
+  def reset(): Unit = {
+    currentMockSystemTime = 0L
+  }
+}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index 09db0a7e82dfe..c82fda85eb4e8 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.connector.read.streaming.{Offset => _, _}
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.kafka010.KafkaSourceProvider._
 import org.apache.spark.sql.types._
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{Clock, SystemClock, Utils}
 
 /**
  * A [[Source]] that reads data from Kafka using the following design.
@@ -94,6 +94,13 @@ private[kafka010] class KafkaSource(
   private[kafka010] val maxTriggerDelayMs =
     Utils.timeStringAsMs(sourceOptions.get(MAX_TRIGGER_DELAY).getOrElse(DEFAULT_MAX_TRIGGER_DELAY))
 
+  // this allows us to mock system clock for testing purposes
+  private[kafka010] val clock: Clock = if (sourceOptions.contains(MOCK_SYSTEM_TIME)) {
+    new MockedSystemClock
+  } else {
+    new SystemClock
+  }
+
   private val includeHeaders =
     sourceOptions.getOrElse(INCLUDE_HEADERS, "false").toBoolean
 
@@ -216,9 +223,9 @@ private[kafka010] class KafkaSource(
       currentOffsets: Map[TopicPartition, Long],
       maxTriggerDelayMs: Long): Boolean = {
     // Checking first if the maxbatchDelay time has passed
-    if ((System.currentTimeMillis() - lastTriggerMillis) >= maxTriggerDelayMs) {
+    if ((clock.getTimeMillis() - lastTriggerMillis) >= maxTriggerDelayMs) {
       logDebug("Maximum wait time is passed, triggering batch")
-      lastTriggerMillis = System.currentTimeMillis()
+      lastTriggerMillis = clock.getTimeMillis()
       false
     } else {
       val newRecords = latestOffsets.flatMap {
@@ -226,7 +233,7 @@ private[kafka010] class KafkaSource(
           Some(topic -> (offset - currentOffsets.getOrElse(topic, 0L)))
       }.values.sum.toDouble
       if (newRecords < minLimit) true else {
-        lastTriggerMillis = System.currentTimeMillis()
+        lastTriggerMillis = clock.getTimeMillis()
         false
       }
     }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index 640996da67bca..3747621b36089 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -562,6 +562,8 @@ private[kafka010] object KafkaSourceProvider extends Logging {
     "startingoffsetsbytimestampstrategy"
   private val GROUP_ID_PREFIX = "groupidprefix"
   private[kafka010] val INCLUDE_HEADERS = "includeheaders"
+  // This is only for internal testing and should not be used otherwise.
+  private[kafka010] val MOCK_SYSTEM_TIME = "_mockSystemTime"
 
   private[kafka010] object StrategyOnNoMatchStartingOffset extends Enumeration {
     val ERROR, LATEST = Value
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index 61be7dd6cd8ef..5037af16c28e4 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -458,6 +458,8 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
   }
 
   test("compositeReadLimit") {
+    MockedSystemClock.reset()
+
     val topic = newTopic()
     testUtils.createTopic(topic, partitions = 3)
     testUtils.sendMessages(topic, (100 to 120).map(_.toString).toArray, Some(0))
@@ -474,6 +476,9 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
       .option("maxOffsetsPerTrigger", 20)
       .option("subscribe", topic)
       .option("startingOffsets", "earliest")
+      // mock system time to ensure deterministic behavior
+      // in determining if maxOffsetsPerTrigger is satisfied
+      .option("_mockSystemTime", "")
     val kafka = reader.load()
       .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
       .as[(String, String)]
@@ -481,6 +486,10 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
 
     val clock = new StreamManualClock
 
+    def advanceSystemClock(mills: Long): ExternalAction = () => {
+      MockedSystemClock.advanceCurrentSystemTime(mills)
+    }
+
     testStream(mapped)(
       StartStream(Trigger.ProcessingTime(100), clock),
       waitUntilBatchProcessed(clock),
@@ -492,6 +501,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
       // No data is processed for next batch as data is less than minOffsetsPerTrigger
       // and maxTriggerDelay is not expired
       AdvanceManualClock(100),
+      advanceSystemClock(100),
       waitUntilBatchProcessed(clock),
       CheckNewAnswer(),
       Assert {
@@ -501,6 +511,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
         true
       },
       AdvanceManualClock(100),
+      advanceSystemClock(100),
       waitUntilBatchProcessed(clock),
       // Running batch now as number of new records is greater than minOffsetsPerTrigger
       // but reading limited data as per maxOffsetsPerTrigger
@@ -512,14 +523,11 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
       // Testing maxTriggerDelay
       // No data is processed for next batch till maxTriggerDelay is expired
       AdvanceManualClock(100),
+      advanceSystemClock(100),
       waitUntilBatchProcessed(clock),
       CheckNewAnswer(),
-      // Sleeping for 5s to let maxTriggerDelay expire
-      Assert {
-        Thread.sleep(5 * 1000)
-        true
-      },
       AdvanceManualClock(100),
+      advanceSystemClock(5000),
       // Running batch as maxTriggerDelay is expired
       waitUntilBatchProcessed(clock),
       CheckAnswer(1, 10, 100, 101, 102, 103, 104, 105, 106, 107,

From f62b36c6d3964c40336959b129b284edb8097f61 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 7 Feb 2022 21:18:04 +0900
Subject: [PATCH 164/513] [SPARK-38128][PYTHON][TESTS] Show full stacktrace in
 tests by default in PySpark tests

### What changes were proposed in this pull request?

This PR proposes to show full stacktrace of Python worker and JVM in PySpark by controlling `spark.sql.pyspark.jvmStacktrace.enabled` and `spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled` only in tests.

### Why are the changes needed?

[SPARK-33407](https://issues.apache.org/jira/browse/SPARK-33407) and [SPARK-31849](https://issues.apache.org/jira/browse/SPARK-31849) hide Java stacktrace and internal Python worker side traceback by default for simpler error messages to end users. However, specifically for unit tests, that makes a bit harder to debug the test failures. We should probably show the full stacktrace by default in tests.

### Does this PR introduce _any_ user-facing change?

No, this is test only.

### How was this patch tested?

Manually tested. Now the test failures show the logs as below:

**Before:**

```
=====================================================================
ERROR [3.480s]: test (pyspark.sql.tests.test_functions.FunctionsTests)
----------------------------------------------------------------------
Traceback (most recent call last):
  ...
pyspark.sql.utils.PythonException:
  An exception was thrown from the Python worker. Please see the stack trace below.
Traceback (most recent call last):
  File "/.../pyspark/sql/tests/test_functions.py", line 60, in <lambda>
    self.spark.range(1).select(udf(lambda x: x / 0)("id")).show()
ZeroDivisionError: division by zero

----------------------------------------------------------------------
Ran 1 test in 12.468s

FAILED (errors=1)
```

**After:**

```
======================================================================
ERROR [3.259s]: test (pyspark.sql.tests.test_functions.FunctionsTests)
----------------------------------------------------------------------
Traceback (most recent call last):
  ...
pyspark.sql.utils.PythonException:
  An exception was thrown from the Python worker. Please see the stack trace below.
Traceback (most recent call last):
  File "/.../pyspark/worker.py", line 678, in main
    process()
  File "/.../pyspark/worker.py", line 670, in process
    serializer.dump_stream(out_iter, outfile)
  File "/.../lib/pyspark/serializers.py", line 217, in dump_stream
    self.serializer.dump_stream(self._batched(iterator), stream)
  ...
ZeroDivisionError: division by zero

JVM stacktrace:
...
	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:558)
	at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:86)
	at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:68)
	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:511)
	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
...

Driver stacktrace:
...

Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):
	... 1 more

----------------------------------------------------------------------
Ran 1 test in 12.610s

FAILED (errors=1)
```

Closes #35423 from HyukjinKwon/SPARK-38128.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../main/scala/org/apache/spark/sql/internal/SQLConf.scala  | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 42979a68d8578..59a896a29b6f2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2383,7 +2383,8 @@ object SQLConf {
         "and shows a Python-friendly exception only.")
       .version("3.0.0")
       .booleanConf
-      .createWithDefault(false)
+      // show full stacktrace in tests but hide in production by default.
+      .createWithDefault(Utils.isTesting)
 
   val ARROW_SPARKR_EXECUTION_ENABLED =
     buildConf("spark.sql.execution.arrow.sparkr.enabled")
@@ -2440,7 +2441,8 @@ object SQLConf {
         "shows the exception messages from UDFs. Note that this works only with CPython 3.7+.")
       .version("3.1.0")
       .booleanConf
-      .createWithDefault(true)
+      // show full stacktrace in tests but hide in production by default.
+      .createWithDefault(!Utils.isTesting)
 
   val PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME =
     buildConf("spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName")

From 65c0bdf499b81c9febfc1591ec94ebf4c72c92c7 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 7 Feb 2022 19:58:52 +0300
Subject: [PATCH 165/513] [SPARK-38126][SQL][TESTS] Check the whole message of
 error classes

### What changes were proposed in this pull request?
In the PR, I propose to check the whole error messages of error classes in the test suites `Query.*ErrorsSuite`.

### Why are the changes needed?
1. To catch changes in error classes.
2. To improve test coverage.
3. Set user expectations from Spark's errors.
4. The checks can be considered as documentation of error messages in tests.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
By running the modified test suites:
```
$ build/sbt "test:testOnly *QueryParsingErrorsSuite"
$ build/sbt "test:testOnly *QueryCompilationErrorsSuite"
$ build/sbt "test:testOnly *QueryCompilationErrorsDSv2Suite"
$ build/sbt "test:testOnly *QueryExecutionErrorsSuite"
```

Closes #35416 from MaxGekk/error-classes-check-whole-msg.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../errors/QueryCompilationErrorsSuite.scala  |  9 ++-
 .../errors/QueryExecutionErrorsSuite.scala    | 12 ++--
 .../sql/errors/QueryParsingErrorsSuite.scala  | 64 +++++++++++++++----
 3 files changed, 62 insertions(+), 23 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index f41030bf3cbcc..673925865c06f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -37,7 +37,7 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession {
     val msg1 = intercept[AnalysisException] {
       sql("select 'value1' as a, 1L as b").as[StringIntClass]
     }.message
-    assert(msg1 ==
+    assert(msg1 ===
       s"""
          |Cannot up cast b from bigint to int.
          |The type path of the target object is:
@@ -51,7 +51,7 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession {
         " named_struct('a', 'value1', 'b', cast(1.0 as decimal(38,18))) as b")
         .as[ComplexClass]
     }.message
-    assert(msg2 ==
+    assert(msg2 ===
       s"""
          |Cannot up cast b.`b` from decimal(38,18) to bigint.
          |The type path of the target object is:
@@ -72,9 +72,8 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession {
 
       Dataset.ofRows(spark, plan)
     }.message
-    assert(msg.contains("The feature is not supported: " +
+    assert(msg.matches("The feature is not supported: " +
       "UpCast only support DecimalType as AbstractDataType yet," +
-      " but got: org.apache.spark.sql.types.NumericType"))
+      """ but got: org.apache.spark.sql.types.NumericType\$\@\w+"""))
   }
-
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index 4b2564034344a..d241f6c3b768e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -50,9 +50,9 @@ class QueryExecutionErrorsSuite extends QueryTest with SharedSparkSession {
       }.getCause.asInstanceOf[SparkRuntimeException]
       assert(e.getErrorClass === "INVALID_PARAMETER_VALUE")
       assert(e.getSqlState === "22023")
-      assert(e.getMessage.contains(
-        "The value of parameter(s) 'key' in the aes_encrypt/aes_decrypt function is invalid: " +
-        "expects a binary value with 16, 24 or 32 bytes, but got"))
+      assert(e.getMessage.matches(
+        "The value of parameter\\(s\\) 'key' in the aes_encrypt/aes_decrypt function is invalid: " +
+        "expects a binary value with 16, 24 or 32 bytes, but got \\d+ bytes."))
     }
 
     // Encryption failure - invalid key length
@@ -84,9 +84,11 @@ class QueryExecutionErrorsSuite extends QueryTest with SharedSparkSession {
       }.getCause.asInstanceOf[SparkRuntimeException]
       assert(e.getErrorClass === "INVALID_PARAMETER_VALUE")
       assert(e.getSqlState === "22023")
-      assert(e.getMessage.contains(
+      assert(e.getMessage ===
         "The value of parameter(s) 'expr, key' in the aes_encrypt/aes_decrypt function " +
-        "is invalid: Detail message:"))
+        "is invalid: Detail message: " +
+        "Given final block not properly padded. " +
+        "Such issues can arise if a bad key is used during decryption.")
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
index 03117b9608d0f..466852dae7022 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
@@ -32,7 +32,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession {
     }
     assert(e.getErrorClass === errorClass)
     assert(e.getSqlState === sqlState)
-    assert(e.getMessage.contains(message))
+    assert(e.getMessage === message)
   }
 
   test("UNSUPPORTED_FEATURE: LATERAL join with NATURAL join not supported") {
@@ -40,7 +40,14 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession {
       sqlText = "SELECT * FROM t1 NATURAL JOIN LATERAL (SELECT c1 + c2 AS c2)",
       errorClass = "UNSUPPORTED_FEATURE",
       sqlState = "0A000",
-      message = "The feature is not supported: LATERAL join with NATURAL join.")
+      message =
+        """
+          |The feature is not supported: LATERAL join with NATURAL join.(line 1, pos 14)
+          |
+          |== SQL ==
+          |SELECT * FROM t1 NATURAL JOIN LATERAL (SELECT c1 + c2 AS c2)
+          |--------------^^^
+          |""".stripMargin)
   }
 
   test("UNSUPPORTED_FEATURE: LATERAL join with USING join not supported") {
@@ -48,11 +55,19 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession {
       sqlText = "SELECT * FROM t1 JOIN LATERAL (SELECT c1 + c2 AS c2) USING (c2)",
       errorClass = "UNSUPPORTED_FEATURE",
       sqlState = "0A000",
-      message = "The feature is not supported: LATERAL join with USING join.")
+      message =
+        """
+          |The feature is not supported: LATERAL join with USING join.(line 1, pos 14)
+          |
+          |== SQL ==
+          |SELECT * FROM t1 JOIN LATERAL (SELECT c1 + c2 AS c2) USING (c2)
+          |--------------^^^
+          |""".stripMargin)
   }
 
   test("UNSUPPORTED_FEATURE: Unsupported LATERAL join type") {
-    Seq(("RIGHT OUTER", "RightOuter"),
+    Seq(
+      ("RIGHT OUTER", "RightOuter"),
       ("FULL OUTER", "FullOuter"),
       ("LEFT SEMI", "LeftSemi"),
       ("LEFT ANTI", "LeftAnti")).foreach { pair =>
@@ -60,22 +75,38 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession {
         sqlText = s"SELECT * FROM t1 ${pair._1} JOIN LATERAL (SELECT c1 + c2 AS c3) ON c2 = c3",
         errorClass = "UNSUPPORTED_FEATURE",
         sqlState = "0A000",
-        message = s"The feature is not supported: LATERAL join type '${pair._2}'.")
+        message =
+          s"""
+            |The feature is not supported: LATERAL join type '${pair._2}'.(line 1, pos 14)
+            |
+            |== SQL ==
+            |SELECT * FROM t1 ${pair._1} JOIN LATERAL (SELECT c1 + c2 AS c3) ON c2 = c3
+            |--------------^^^
+            |""".stripMargin)
     }
   }
 
   test("SPARK-35789: INVALID_SQL_SYNTAX - LATERAL can only be used with subquery") {
-    Seq("SELECT * FROM t1, LATERAL t2",
-      "SELECT * FROM t1 JOIN LATERAL t2",
-      "SELECT * FROM t1, LATERAL (t2 JOIN t3)",
-      "SELECT * FROM t1, LATERAL (LATERAL t2)",
-      "SELECT * FROM t1, LATERAL VALUES (0, 1)",
-      "SELECT * FROM t1, LATERAL RANGE(0, 1)").foreach { sqlText =>
+    Seq(
+      "SELECT * FROM t1, LATERAL t2" -> 26,
+      "SELECT * FROM t1 JOIN LATERAL t2" -> 30,
+      "SELECT * FROM t1, LATERAL (t2 JOIN t3)" -> 26,
+      "SELECT * FROM t1, LATERAL (LATERAL t2)" -> 26,
+      "SELECT * FROM t1, LATERAL VALUES (0, 1)" -> 26,
+      "SELECT * FROM t1, LATERAL RANGE(0, 1)" -> 26
+    ).foreach { case (sqlText, pos) =>
       validateParsingError(
         sqlText = sqlText,
         errorClass = "INVALID_SQL_SYNTAX",
         sqlState = "42000",
-        message = "Invalid SQL syntax: LATERAL can only be used with subquery.")
+        message =
+          s"""
+            |Invalid SQL syntax: LATERAL can only be used with subquery.(line 1, pos $pos)
+            |
+            |== SQL ==
+            |$sqlText
+            |${"-" * pos}^^^
+            |""".stripMargin)
     }
   }
 
@@ -84,6 +115,13 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession {
       sqlText = "SELECT * FROM a NATURAL CROSS JOIN b",
       errorClass = "UNSUPPORTED_FEATURE",
       sqlState = "0A000",
-      message = "The feature is not supported: NATURAL CROSS JOIN.")
+      message =
+        """
+          |The feature is not supported: NATURAL CROSS JOIN.(line 1, pos 14)
+          |
+          |== SQL ==
+          |SELECT * FROM a NATURAL CROSS JOIN b
+          |--------------^^^
+          |""".stripMargin)
   }
 }

From 0704e957266aac0fa4247eb2e53d019348188164 Mon Sep 17 00:00:00 2001
From: Kazuyuki Tanimura <ktanimura@apple.com>
Date: Mon, 7 Feb 2022 16:18:08 -0800
Subject: [PATCH 166/513] [SPARK-38132][SQL] Remove `NotPropagation` rule

### What changes were proposed in this pull request?
This is a follow-up PR to mitigate the bug introduced by SPARK-36665. This PR removes `NotPropagation` optimization for now until we find a better approach.

### Why are the changes needed?
`NotPropagation` optimization previously broke `RewritePredicateSubquery` so that it does not properly rewrite the predicate to a NULL-aware left anti join anymore.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing tests

Closes #35428 from kazuyukitanimura/SPARK-36665-fix.

Authored-by: Kazuyuki Tanimura <ktanimura@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/catalyst/optimizer/Optimizer.scala    |   1 -
 .../sql/catalyst/optimizer/expressions.scala  |  47 -----
 .../sql/catalyst/rules/RuleIdCollection.scala |   1 -
 .../optimizer/NotPropagationSuite.scala       | 176 ------------------
 .../optimizer/NullDownPropagationSuite.scala  |   1 -
 .../org/apache/spark/sql/SubquerySuite.scala  |  26 +++
 6 files changed, 26 insertions(+), 226 deletions(-)
 delete mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NotPropagationSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 8fba271524e85..61d6e3901cda5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -108,7 +108,6 @@ abstract class Optimizer(catalogManager: CatalogManager)
         EliminateAggregateFilter,
         ReorderAssociativeOperator,
         LikeSimplification,
-        NotPropagation,
         BooleanSimplification,
         SimplifyConditionals,
         PushFoldableIntoBranches,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index eda4217cd957d..c1e5783d63c6b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -447,53 +447,6 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper {
 }
 
 
-/**
- * Move/Push `Not` operator if it's beneficial.
- */
-object NotPropagation extends Rule[LogicalPlan] {
-  // Given argument x, return true if expression Not(x) can be simplified
-  // E.g. let x == Not(y), then canSimplifyNot(x) == true because Not(x) == Not(Not(y)) == y
-  // For the case of x = EqualTo(a, b), recursively check each child expression
-  // Extra nullable check is required for EqualNullSafe because
-  // Not(EqualNullSafe(e, null)) is different from EqualNullSafe(e, Not(null))
-  private def canSimplifyNot(x: Expression): Boolean = x match {
-    case Literal(_, BooleanType) | Literal(_, NullType) => true
-    case _: Not | _: IsNull | _: IsNotNull | _: And | _: Or => true
-    case _: GreaterThan | _: GreaterThanOrEqual | _: LessThan | _: LessThanOrEqual => true
-    case EqualTo(a, b) if canSimplifyNot(a) || canSimplifyNot(b) => true
-    case EqualNullSafe(a, b)
-      if !a.nullable && !b.nullable && (canSimplifyNot(a) || canSimplifyNot(b)) => true
-    case _ => false
-  }
-
-  def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
-    _.containsPattern(NOT), ruleId) {
-    case q: LogicalPlan => q.transformExpressionsDownWithPruning(_.containsPattern(NOT), ruleId) {
-      // Move `Not` from one side of `EqualTo`/`EqualNullSafe` to the other side if it's beneficial.
-      // E.g. `EqualTo(Not(a), b)` where `b = Not(c)`, it will become
-      // `EqualTo(a, Not(b))` => `EqualTo(a, Not(Not(c)))` => `EqualTo(a, c)`
-      // In addition, `if canSimplifyNot(b)` checks if the optimization can converge
-      // that avoids the situation two conditions are returning to each other.
-      case EqualTo(Not(a), b) if !canSimplifyNot(a) && canSimplifyNot(b) => EqualTo(a, Not(b))
-      case EqualTo(a, Not(b)) if canSimplifyNot(a) && !canSimplifyNot(b) => EqualTo(Not(a), b)
-      case EqualNullSafe(Not(a), b) if !canSimplifyNot(a) && canSimplifyNot(b) =>
-        EqualNullSafe(a, Not(b))
-      case EqualNullSafe(a, Not(b)) if canSimplifyNot(a) && !canSimplifyNot(b) =>
-        EqualNullSafe(Not(a), b)
-
-      // Push `Not` to one side of `EqualTo`/`EqualNullSafe` if it's beneficial.
-      // E.g. Not(EqualTo(x, false)) => EqualTo(x, true)
-      case Not(EqualTo(a, b)) if canSimplifyNot(b) => EqualTo(a, Not(b))
-      case Not(EqualTo(a, b)) if canSimplifyNot(a) => EqualTo(Not(a), b)
-      case Not(EqualNullSafe(a, b)) if !a.nullable && !b.nullable && canSimplifyNot(b) =>
-        EqualNullSafe(a, Not(b))
-      case Not(EqualNullSafe(a, b)) if !a.nullable && !b.nullable && canSimplifyNot(a) =>
-        EqualNullSafe(Not(a), b)
-    }
-  }
-}
-
-
 /**
  * Simplifies binary comparisons with semantically-equal expressions:
  * 1) Replace '<=>' with 'true' literal.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
index 66a6a890022ac..935e51cd4a3d9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
@@ -116,7 +116,6 @@ object RuleIdCollection {
       "org.apache.spark.sql.catalyst.optimizer.LikeSimplification" ::
       "org.apache.spark.sql.catalyst.optimizer.LimitPushDown" ::
       "org.apache.spark.sql.catalyst.optimizer.LimitPushDownThroughWindow" ::
-      "org.apache.spark.sql.catalyst.optimizer.NotPropagation" ::
       "org.apache.spark.sql.catalyst.optimizer.NullDownPropagation" ::
       "org.apache.spark.sql.catalyst.optimizer.NullPropagation" ::
       "org.apache.spark.sql.catalyst.optimizer.ObjectSerializerPruning" ::
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NotPropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NotPropagationSuite.scala
deleted file mode 100644
index d9506098b1d00..0000000000000
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NotPropagationSuite.scala
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.optimizer
-
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.types.BooleanType
-
-class NotPropagationSuite extends PlanTest with ExpressionEvalHelper {
-
-  object Optimize extends RuleExecutor[LogicalPlan] {
-    val batches =
-      Batch("AnalysisNodes", Once, EliminateSubqueryAliases) ::
-      Batch("Not Propagation", FixedPoint(50),
-        NullPropagation,
-        NullDownPropagation,
-        ConstantFolding,
-        SimplifyConditionals,
-        BooleanSimplification,
-        NotPropagation,
-        PruneFilters) :: Nil
-  }
-
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.string,
-    'e.boolean, 'f.boolean, 'g.boolean, 'h.boolean)
-
-  val testRelationWithData = LocalRelation.fromExternalRows(
-    testRelation.output, Seq(Row(1, 2, 3, "abc"))
-  )
-
-  private def checkCondition(input: Expression, expected: LogicalPlan): Unit = {
-    val plan = testRelationWithData.where(input).analyze
-    val actual = Optimize.execute(plan)
-    comparePlans(actual, expected)
-  }
-
-  private def checkCondition(input: Expression, expected: Expression): Unit = {
-    val plan = testRelation.where(input).analyze
-    val actual = Optimize.execute(plan)
-    val correctAnswer = testRelation.where(expected).analyze
-    comparePlans(actual, correctAnswer)
-  }
-
-  test("Using (Not(a) === b) == (a === Not(b)), (Not(a) <=> b) == (a <=> Not(b)) rules") {
-    checkCondition(Not('e) === Literal(true), 'e === Literal(false))
-    checkCondition(Not('e) === Literal(false), 'e === Literal(true))
-    checkCondition(Not('e) === Literal(null, BooleanType), testRelation)
-    checkCondition(Literal(true) === Not('e), Literal(false) === 'e)
-    checkCondition(Literal(false) === Not('e), Literal(true) === 'e)
-    checkCondition(Literal(null, BooleanType) === Not('e), testRelation)
-    checkCondition(Not('e) <=> Literal(true), 'e <=> Literal(false))
-    checkCondition(Not('e) <=> Literal(false), 'e <=> Literal(true))
-    checkCondition(Not('e) <=> Literal(null, BooleanType), IsNull('e))
-    checkCondition(Literal(true) <=> Not('e), Literal(false) <=> 'e)
-    checkCondition(Literal(false) <=> Not('e), Literal(true) <=> 'e)
-    checkCondition(Literal(null, BooleanType) <=> Not('e), IsNull('e))
-
-    checkCondition(Not('e) === Not('f), 'e === 'f)
-    checkCondition(Not('e) <=> Not('f), 'e <=> 'f)
-
-    checkCondition(IsNull('e) === Not('f), IsNotNull('e) === 'f)
-    checkCondition(Not('e) === IsNull('f), 'e === IsNotNull('f))
-    checkCondition(IsNull('e) <=> Not('f), IsNotNull('e) <=> 'f)
-    checkCondition(Not('e) <=> IsNull('f), 'e <=> IsNotNull('f))
-
-    checkCondition(IsNotNull('e) === Not('f), IsNull('e) === 'f)
-    checkCondition(Not('e) === IsNotNull('f), 'e === IsNull('f))
-    checkCondition(IsNotNull('e) <=> Not('f), IsNull('e) <=> 'f)
-    checkCondition(Not('e) <=> IsNotNull('f), 'e <=> IsNull('f))
-
-    checkCondition(Not('e) === Not(And('f, 'g)), 'e === And('f, 'g))
-    checkCondition(Not(And('e, 'f)) === Not('g), And('e, 'f) === 'g)
-    checkCondition(Not('e) <=> Not(And('f, 'g)), 'e <=> And('f, 'g))
-    checkCondition(Not(And('e, 'f)) <=> Not('g), And('e, 'f) <=> 'g)
-
-    checkCondition(Not('e) === Not(Or('f, 'g)), 'e === Or('f, 'g))
-    checkCondition(Not(Or('e, 'f)) === Not('g), Or('e, 'f) === 'g)
-    checkCondition(Not('e) <=> Not(Or('f, 'g)), 'e <=> Or('f, 'g))
-    checkCondition(Not(Or('e, 'f)) <=> Not('g), Or('e, 'f) <=> 'g)
-
-    checkCondition(('a > 'b) === Not('f), ('a <= 'b) === 'f)
-    checkCondition(Not('e) === ('a > 'b), 'e === ('a <= 'b))
-    checkCondition(('a > 'b) <=> Not('f), ('a <= 'b) <=> 'f)
-    checkCondition(Not('e) <=> ('a > 'b), 'e <=> ('a <= 'b))
-
-    checkCondition(('a >= 'b) === Not('f), ('a < 'b) === 'f)
-    checkCondition(Not('e) === ('a >= 'b), 'e === ('a < 'b))
-    checkCondition(('a >= 'b) <=> Not('f), ('a < 'b) <=> 'f)
-    checkCondition(Not('e) <=> ('a >= 'b), 'e <=> ('a < 'b))
-
-    checkCondition(('a < 'b) === Not('f), ('a >= 'b) === 'f)
-    checkCondition(Not('e) === ('a < 'b), 'e === ('a >= 'b))
-    checkCondition(('a < 'b) <=> Not('f), ('a >= 'b) <=> 'f)
-    checkCondition(Not('e) <=> ('a < 'b), 'e <=> ('a >= 'b))
-
-    checkCondition(('a <= 'b) === Not('f), ('a > 'b) === 'f)
-    checkCondition(Not('e) === ('a <= 'b), 'e === ('a > 'b))
-    checkCondition(('a <= 'b) <=> Not('f), ('a > 'b) <=> 'f)
-    checkCondition(Not('e) <=> ('a <= 'b), 'e <=> ('a > 'b))
-  }
-
-  test("Using (a =!= b) == (a === Not(b)), Not(a <=> b) == (a <=> Not(b)) rules") {
-    checkCondition('e =!= Literal(true), 'e === Literal(false))
-    checkCondition('e =!= Literal(false), 'e === Literal(true))
-    checkCondition('e =!= Literal(null, BooleanType), testRelation)
-    checkCondition(Literal(true) =!= 'e, Literal(false) === 'e)
-    checkCondition(Literal(false) =!= 'e, Literal(true) === 'e)
-    checkCondition(Literal(null, BooleanType) =!= 'e, testRelation)
-    checkCondition(Not(('a <=> 'b) <=> Literal(true)), ('a <=> 'b) <=> Literal(false))
-    checkCondition(Not(('a <=> 'b) <=> Literal(false)), ('a <=> 'b) <=> Literal(true))
-    checkCondition(Not(('a <=> 'b) <=> Literal(null, BooleanType)), testRelationWithData)
-    checkCondition(Not(Literal(true) <=> ('a <=> 'b)), Literal(false) <=> ('a <=> 'b))
-    checkCondition(Not(Literal(false) <=> ('a <=> 'b)), Literal(true) <=> ('a <=> 'b))
-    checkCondition(Not(Literal(null, BooleanType) <=> IsNull('e)), testRelationWithData)
-
-    checkCondition('e =!= Not('f), 'e === 'f)
-    checkCondition(Not('e) =!= 'f, 'e === 'f)
-    checkCondition(Not(('a <=> 'b) <=> Not(('b <=> 'c))), ('a <=> 'b) <=> ('b <=> 'c))
-    checkCondition(Not(Not(('a <=> 'b)) <=> ('b <=> 'c)), ('a <=> 'b) <=> ('b <=> 'c))
-
-    checkCondition('e =!= IsNull('f), 'e === IsNotNull('f))
-    checkCondition(IsNull('e) =!= 'f, IsNotNull('e) === 'f)
-    checkCondition(Not(('a <=> 'b) <=> IsNull('f)), ('a <=> 'b) <=> IsNotNull('f))
-    checkCondition(Not(IsNull('e) <=> ('b <=> 'c)), IsNotNull('e) <=> ('b <=> 'c))
-
-    checkCondition('e =!= IsNotNull('f), 'e === IsNull('f))
-    checkCondition(IsNotNull('e) =!= 'f, IsNull('e) === 'f)
-    checkCondition(Not(('a <=> 'b) <=> IsNotNull('f)), ('a <=> 'b) <=> IsNull('f))
-    checkCondition(Not(IsNotNull('e) <=> ('b <=> 'c)), IsNull('e) <=> ('b <=> 'c))
-
-    checkCondition('e =!= Not(And('f, 'g)), 'e === And('f, 'g))
-    checkCondition(Not(And('e, 'f)) =!= 'g, And('e, 'f) === 'g)
-    checkCondition('e =!= Not(Or('f, 'g)), 'e === Or('f, 'g))
-    checkCondition(Not(Or('e, 'f)) =!= 'g, Or('e, 'f) === 'g)
-
-    checkCondition(('a > 'b) =!= 'f, ('a <= 'b) === 'f)
-    checkCondition('e =!= ('a > 'b), 'e === ('a <= 'b))
-    checkCondition(('a >= 'b) =!= 'f, ('a < 'b) === 'f)
-    checkCondition('e =!= ('a >= 'b), 'e === ('a < 'b))
-    checkCondition(('a < 'b) =!= 'f, ('a >= 'b) === 'f)
-    checkCondition('e =!= ('a < 'b), 'e === ('a >= 'b))
-    checkCondition(('a <= 'b) =!= 'f, ('a > 'b) === 'f)
-    checkCondition('e =!= ('a <= 'b), 'e === ('a > 'b))
-
-    checkCondition('e =!= ('f === ('g === Not('h))), 'e === ('f === ('g === 'h)))
-
-  }
-
-  test("Properly avoid non optimize-able cases") {
-    checkCondition(Not(('a > 'b) <=> 'f), Not(('a > 'b) <=> 'f))
-    checkCondition(Not('e <=> ('a > 'b)), Not('e <=> ('a > 'b)))
-    checkCondition(('a === 'b) =!= ('a === 'c), ('a === 'b) =!= ('a === 'c))
-    checkCondition(('a === 'b) =!= ('c in(1, 2, 3)), ('a === 'b) =!= ('c in(1, 2, 3)))
-  }
-}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NullDownPropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NullDownPropagationSuite.scala
index c9d1f3357dc8a..7097ebd4c0c63 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NullDownPropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NullDownPropagationSuite.scala
@@ -36,7 +36,6 @@ class NullDownPropagationSuite extends PlanTest with ExpressionEvalHelper {
         ConstantFolding,
         SimplifyConditionals,
         BooleanSimplification,
-        NotPropagation,
         PruneFilters) :: Nil
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index a376c9ce1b09b..89157be3097a6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -1956,4 +1956,30 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     assert(!nonDeterministicQueryPlan.deterministic)
   }
 
+  test("SPARK-38132: Not IN subquery correctness checks") {
+    val t = "test_table"
+    withTable(t) {
+      Seq[(Integer, Integer)](
+        (1, 1),
+        (2, 2),
+        (3, 3),
+        (4, null),
+        (null, 0))
+        .toDF("c1", "c2").write.saveAsTable(t)
+      val df = spark.table(t)
+
+      checkAnswer(df.where(s"(c1 NOT IN (SELECT c2 FROM $t)) = true"), Seq.empty)
+      checkAnswer(df.where(s"(c1 NOT IN (SELECT c2 FROM $t WHERE c2 IS NOT NULL)) = true"),
+        Row(4, null) :: Nil)
+      checkAnswer(df.where(s"(c1 NOT IN (SELECT c2 FROM $t)) <=> true"), Seq.empty)
+      checkAnswer(df.where(s"(c1 NOT IN (SELECT c2 FROM $t WHERE c2 IS NOT NULL)) <=> true"),
+        Row(4, null) :: Nil)
+      checkAnswer(df.where(s"(c1 NOT IN (SELECT c2 FROM $t)) != false"), Seq.empty)
+      checkAnswer(df.where(s"(c1 NOT IN (SELECT c2 FROM $t WHERE c2 IS NOT NULL)) != false"),
+        Row(4, null) :: Nil)
+      checkAnswer(df.where(s"NOT((c1 NOT IN (SELECT c2 FROM $t)) <=> false)"), Seq.empty)
+      checkAnswer(df.where(s"NOT((c1 NOT IN (SELECT c2 FROM $t WHERE c2 IS NOT NULL)) <=> false)"),
+        Row(4, null) :: Nil)
+    }
+  }
 }

From add939f2c852f49f18fd770382f375e00b76e679 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Tue, 8 Feb 2022 01:51:12 +0100
Subject: [PATCH 167/513] [SPARK-37409][PYTHON][ML] Inline hints for
 pyspark.ml.pipeline

### What changes were proposed in this pull request?

This PR migrates type `pyspark.ml.pipeline` annotations from stub file to inline type hints.

### Why are the changes needed?

Part of ongoing migration of type hints.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #35408 from zero323/SPARK-37409.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/ml/param/__init__.py |   2 +-
 python/pyspark/ml/pipeline.py       | 131 +++++++++++++++++-----------
 python/pyspark/ml/pipeline.pyi      |  98 ---------------------
 python/pyspark/ml/util.py           |   7 +-
 4 files changed, 84 insertions(+), 154 deletions(-)
 delete mode 100644 python/pyspark/ml/pipeline.pyi

diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py
index fd5ed63ca944a..ee2c289cd0bce 100644
--- a/python/pyspark/ml/param/__init__.py
+++ b/python/pyspark/ml/param/__init__.py
@@ -569,7 +569,7 @@ def _copyValues(self, to: P, extra: Optional["ParamMap"] = None) -> P:
                 to._set(**{param.name: paramMap[param]})
         return to
 
-    def _resetUid(self, newUid: Any) -> "Params":
+    def _resetUid(self: "P", newUid: Any) -> "P":
         """
         Changes the uid of this instance. This updates both
         the stored uid and the parent uid of params and param maps.
diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py
index 57ca47ec3bca8..1da86ec2bd83d 100644
--- a/python/pyspark/ml/pipeline.py
+++ b/python/pyspark/ml/pipeline.py
@@ -16,6 +16,8 @@
 #
 import os
 
+from typing import Any, Dict, List, Optional, Tuple, Type, Union, cast, TYPE_CHECKING
+
 from pyspark import keyword_only, since, SparkContext
 from pyspark.ml.base import Estimator, Model, Transformer
 from pyspark.ml.param import Param, Params
@@ -28,14 +30,20 @@
     DefaultParamsWriter,
     MLWriter,
     MLReader,
+    JavaMLReadable,
     JavaMLWritable,
 )
 from pyspark.ml.wrapper import JavaParams
 from pyspark.ml.common import inherit_doc
+from pyspark.sql.dataframe import DataFrame
+
+if TYPE_CHECKING:
+    from pyspark.ml._typing import ParamMap, PipelineStage
+    from py4j.java_gateway import JavaObject  # type: ignore[import]
 
 
 @inherit_doc
-class Pipeline(Estimator, MLReadable, MLWritable):
+class Pipeline(Estimator["PipelineModel"], MLReadable["Pipeline"], MLWritable):
     """
     A simple pipeline, which acts as an estimator. A Pipeline consists
     of a sequence of stages, each of which is either an
@@ -56,10 +64,14 @@ class Pipeline(Estimator, MLReadable, MLWritable):
     .. versionadded:: 1.3.0
     """
 
-    stages = Param(Params._dummy(), "stages", "a list of pipeline stages")
+    stages: Param[List["PipelineStage"]] = Param(
+        Params._dummy(), "stages", "a list of pipeline stages"
+    )
+
+    _input_kwargs: Dict[str, Any]
 
     @keyword_only
-    def __init__(self, *, stages=None):
+    def __init__(self, *, stages: Optional[List["PipelineStage"]] = None):
         """
         __init__(self, \\*, stages=None)
         """
@@ -67,7 +79,7 @@ def __init__(self, *, stages=None):
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
-    def setStages(self, value):
+    def setStages(self, value: List["PipelineStage"]) -> "Pipeline":
         """
         Set pipeline stages.
 
@@ -87,7 +99,7 @@ def setStages(self, value):
         return self._set(stages=value)
 
     @since("1.3.0")
-    def getStages(self):
+    def getStages(self) -> List["PipelineStage"]:
         """
         Get pipeline stages.
         """
@@ -95,7 +107,7 @@ def getStages(self):
 
     @keyword_only
     @since("1.3.0")
-    def setParams(self, *, stages=None):
+    def setParams(self, *, stages: Optional[List["PipelineStage"]] = None) -> "Pipeline":
         """
         setParams(self, \\*, stages=None)
         Sets params for Pipeline.
@@ -103,7 +115,7 @@ def setParams(self, *, stages=None):
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
-    def _fit(self, dataset):
+    def _fit(self, dataset: DataFrame) -> "PipelineModel":
         stages = self.getStages()
         for stage in stages:
             if not (isinstance(stage, Estimator) or isinstance(stage, Transformer)):
@@ -112,7 +124,7 @@ def _fit(self, dataset):
         for i, stage in enumerate(stages):
             if isinstance(stage, Estimator):
                 indexOfLastEstimator = i
-        transformers = []
+        transformers: List[Transformer] = []
         for i, stage in enumerate(stages):
             if i <= indexOfLastEstimator:
                 if isinstance(stage, Transformer):
@@ -124,10 +136,10 @@ def _fit(self, dataset):
                     if i < indexOfLastEstimator:
                         dataset = model.transform(dataset)
             else:
-                transformers.append(stage)
+                transformers.append(cast(Transformer, stage))
         return PipelineModel(transformers)
 
-    def copy(self, extra=None):
+    def copy(self, extra: Optional["ParamMap"] = None) -> "Pipeline":
         """
         Creates a copy of this instance.
 
@@ -150,21 +162,21 @@ def copy(self, extra=None):
         return that.setStages(stages)
 
     @since("2.0.0")
-    def write(self):
+    def write(self) -> MLWriter:
         """Returns an MLWriter instance for this ML instance."""
         allStagesAreJava = PipelineSharedReadWrite.checkStagesForJava(self.getStages())
         if allStagesAreJava:
-            return JavaMLWriter(self)
+            return JavaMLWriter(self)  # type: ignore[arg-type]
         return PipelineWriter(self)
 
     @classmethod
     @since("2.0.0")
-    def read(cls):
+    def read(cls) -> "PipelineReader":
         """Returns an MLReader instance for this class."""
         return PipelineReader(cls)
 
     @classmethod
-    def _from_java(cls, java_stage):
+    def _from_java(cls, java_stage: "JavaObject") -> "Pipeline":
         """
         Given a Java Pipeline, create and return a Python wrapper of it.
         Used for ML persistence.
@@ -172,12 +184,14 @@ def _from_java(cls, java_stage):
         # Create a new instance of this stage.
         py_stage = cls()
         # Load information from java_stage to the instance.
-        py_stages = [JavaParams._from_java(s) for s in java_stage.getStages()]
+        py_stages: List["PipelineStage"] = [
+            JavaParams._from_java(s) for s in java_stage.getStages()
+        ]
         py_stage.setStages(py_stages)
         py_stage._resetUid(java_stage.uid())
         return py_stage
 
-    def _to_java(self):
+    def _to_java(self) -> "JavaObject":
         """
         Transfer this instance to a Java Pipeline.  Used for ML persistence.
 
@@ -188,10 +202,12 @@ def _to_java(self):
         """
 
         gateway = SparkContext._gateway
+        assert gateway is not None and SparkContext._jvm is not None
+
         cls = SparkContext._jvm.org.apache.spark.ml.PipelineStage
         java_stages = gateway.new_array(cls, len(self.getStages()))
         for idx, stage in enumerate(self.getStages()):
-            java_stages[idx] = stage._to_java()
+            java_stages[idx] = cast(JavaParams, stage)._to_java()
 
         _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.Pipeline", self.uid)
         _java_obj.setStages(java_stages)
@@ -205,30 +221,30 @@ class PipelineWriter(MLWriter):
     (Private) Specialization of :py:class:`MLWriter` for :py:class:`Pipeline` types
     """
 
-    def __init__(self, instance):
+    def __init__(self, instance: Pipeline):
         super(PipelineWriter, self).__init__()
         self.instance = instance
 
-    def saveImpl(self, path):
+    def saveImpl(self, path: str) -> None:
         stages = self.instance.getStages()
         PipelineSharedReadWrite.validateStages(stages)
         PipelineSharedReadWrite.saveImpl(self.instance, stages, self.sc, path)
 
 
 @inherit_doc
-class PipelineReader(MLReader):
+class PipelineReader(MLReader[Pipeline]):
     """
     (Private) Specialization of :py:class:`MLReader` for :py:class:`Pipeline` types
     """
 
-    def __init__(self, cls):
+    def __init__(self, cls: Type[Pipeline]):
         super(PipelineReader, self).__init__()
         self.cls = cls
 
-    def load(self, path):
+    def load(self, path: str) -> Pipeline:
         metadata = DefaultParamsReader.loadMetadata(path, self.sc)
         if "language" not in metadata["paramMap"] or metadata["paramMap"]["language"] != "Python":
-            return JavaMLReader(self.cls).load(path)
+            return JavaMLReader(cast(Type["JavaMLReadable[Pipeline]"], self.cls)).load(path)
         else:
             uid, stages = PipelineSharedReadWrite.load(metadata, self.sc, path)
             return Pipeline(stages=stages)._resetUid(uid)
@@ -240,53 +256,55 @@ class PipelineModelWriter(MLWriter):
     (Private) Specialization of :py:class:`MLWriter` for :py:class:`PipelineModel` types
     """
 
-    def __init__(self, instance):
+    def __init__(self, instance: "PipelineModel"):
         super(PipelineModelWriter, self).__init__()
         self.instance = instance
 
-    def saveImpl(self, path):
+    def saveImpl(self, path: str) -> None:
         stages = self.instance.stages
-        PipelineSharedReadWrite.validateStages(stages)
-        PipelineSharedReadWrite.saveImpl(self.instance, stages, self.sc, path)
+        PipelineSharedReadWrite.validateStages(cast(List["PipelineStage"], stages))
+        PipelineSharedReadWrite.saveImpl(
+            self.instance, cast(List["PipelineStage"], stages), self.sc, path
+        )
 
 
 @inherit_doc
-class PipelineModelReader(MLReader):
+class PipelineModelReader(MLReader["PipelineModel"]):
     """
     (Private) Specialization of :py:class:`MLReader` for :py:class:`PipelineModel` types
     """
 
-    def __init__(self, cls):
+    def __init__(self, cls: Type["PipelineModel"]):
         super(PipelineModelReader, self).__init__()
         self.cls = cls
 
-    def load(self, path):
+    def load(self, path: str) -> "PipelineModel":
         metadata = DefaultParamsReader.loadMetadata(path, self.sc)
         if "language" not in metadata["paramMap"] or metadata["paramMap"]["language"] != "Python":
-            return JavaMLReader(self.cls).load(path)
+            return JavaMLReader(cast(Type["JavaMLReadable[PipelineModel]"], self.cls)).load(path)
         else:
             uid, stages = PipelineSharedReadWrite.load(metadata, self.sc, path)
-            return PipelineModel(stages=stages)._resetUid(uid)
+            return PipelineModel(stages=cast(List[Transformer], stages))._resetUid(uid)
 
 
 @inherit_doc
-class PipelineModel(Model, MLReadable, MLWritable):
+class PipelineModel(Model, MLReadable["PipelineModel"], MLWritable):
     """
     Represents a compiled pipeline with transformers and fitted models.
 
     .. versionadded:: 1.3.0
     """
 
-    def __init__(self, stages):
+    def __init__(self, stages: List[Transformer]):
         super(PipelineModel, self).__init__()
         self.stages = stages
 
-    def _transform(self, dataset):
+    def _transform(self, dataset: DataFrame) -> DataFrame:
         for t in self.stages:
             dataset = t.transform(dataset)
         return dataset
 
-    def copy(self, extra=None):
+    def copy(self, extra: Optional["ParamMap"] = None) -> "PipelineModel":
         """
         Creates a copy of this instance.
 
@@ -301,33 +319,35 @@ def copy(self, extra=None):
         return PipelineModel(stages)
 
     @since("2.0.0")
-    def write(self):
+    def write(self) -> MLWriter:
         """Returns an MLWriter instance for this ML instance."""
-        allStagesAreJava = PipelineSharedReadWrite.checkStagesForJava(self.stages)
+        allStagesAreJava = PipelineSharedReadWrite.checkStagesForJava(
+            cast(List["PipelineStage"], self.stages)
+        )
         if allStagesAreJava:
-            return JavaMLWriter(self)
+            return JavaMLWriter(self)  # type: ignore[arg-type]
         return PipelineModelWriter(self)
 
     @classmethod
     @since("2.0.0")
-    def read(cls):
+    def read(cls) -> PipelineModelReader:
         """Returns an MLReader instance for this class."""
         return PipelineModelReader(cls)
 
     @classmethod
-    def _from_java(cls, java_stage):
+    def _from_java(cls, java_stage: "JavaObject") -> "PipelineModel":
         """
         Given a Java PipelineModel, create and return a Python wrapper of it.
         Used for ML persistence.
         """
         # Load information from java_stage to the instance.
-        py_stages = [JavaParams._from_java(s) for s in java_stage.stages()]
+        py_stages: List[Transformer] = [JavaParams._from_java(s) for s in java_stage.stages()]
         # Create a new instance of this stage.
         py_stage = cls(py_stages)
         py_stage._resetUid(java_stage.uid())
         return py_stage
 
-    def _to_java(self):
+    def _to_java(self) -> "JavaObject":
         """
         Transfer this instance to a Java PipelineModel.  Used for ML persistence.
 
@@ -335,10 +355,12 @@ def _to_java(self):
         """
 
         gateway = SparkContext._gateway
+        assert gateway is not None and SparkContext._jvm is not None
+
         cls = SparkContext._jvm.org.apache.spark.ml.Transformer
         java_stages = gateway.new_array(cls, len(self.stages))
         for idx, stage in enumerate(self.stages):
-            java_stages[idx] = stage._to_java()
+            java_stages[idx] = cast(JavaParams, stage)._to_java()
 
         _java_obj = JavaParams._new_java_obj(
             "org.apache.spark.ml.PipelineModel", self.uid, java_stages
@@ -357,11 +379,11 @@ class PipelineSharedReadWrite:
     """
 
     @staticmethod
-    def checkStagesForJava(stages):
+    def checkStagesForJava(stages: List["PipelineStage"]) -> bool:
         return all(isinstance(stage, JavaMLWritable) for stage in stages)
 
     @staticmethod
-    def validateStages(stages):
+    def validateStages(stages: List["PipelineStage"]) -> None:
         """
         Check that all stages are Writable
         """
@@ -375,7 +397,12 @@ def validateStages(stages):
                 )
 
     @staticmethod
-    def saveImpl(instance, stages, sc, path):
+    def saveImpl(
+        instance: Union[Pipeline, PipelineModel],
+        stages: List["PipelineStage"],
+        sc: SparkContext,
+        path: str,
+    ) -> None:
         """
         Save metadata and stages for a :py:class:`Pipeline` or :py:class:`PipelineModel`
         - save metadata to path/metadata
@@ -386,12 +413,14 @@ def saveImpl(instance, stages, sc, path):
         DefaultParamsWriter.saveMetadata(instance, path, sc, paramMap=jsonParams)
         stagesDir = os.path.join(path, "stages")
         for index, stage in enumerate(stages):
-            stage.write().save(
+            cast(MLWritable, stage).write().save(
                 PipelineSharedReadWrite.getStagePath(stage.uid, index, len(stages), stagesDir)
             )
 
     @staticmethod
-    def load(metadata, sc, path):
+    def load(
+        metadata: Dict[str, Any], sc: SparkContext, path: str
+    ) -> Tuple[str, List["PipelineStage"]]:
         """
         Load metadata and stages for a :py:class:`Pipeline` or :py:class:`PipelineModel`
 
@@ -407,12 +436,12 @@ def load(metadata, sc, path):
             stagePath = PipelineSharedReadWrite.getStagePath(
                 stageUid, index, len(stageUids), stagesDir
             )
-            stage = DefaultParamsReader.loadParamsInstance(stagePath, sc)
+            stage: "PipelineStage" = DefaultParamsReader.loadParamsInstance(stagePath, sc)
             stages.append(stage)
         return (metadata["uid"], stages)
 
     @staticmethod
-    def getStagePath(stageUid, stageIdx, numStages, stagesDir):
+    def getStagePath(stageUid: str, stageIdx: int, numStages: int, stagesDir: str) -> str:
         """
         Get path for saving the given stage.
         """
diff --git a/python/pyspark/ml/pipeline.pyi b/python/pyspark/ml/pipeline.pyi
deleted file mode 100644
index 7b3890058294f..0000000000000
--- a/python/pyspark/ml/pipeline.pyi
+++ /dev/null
@@ -1,98 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Any, Dict, List, Optional, Tuple, Type, Union
-
-from pyspark.ml._typing import PipelineStage
-from pyspark.context import SparkContext
-from pyspark.ml.base import Estimator, Model, Transformer
-from pyspark.ml.param import Param
-from pyspark.ml.util import (  # noqa: F401
-    DefaultParamsReader as DefaultParamsReader,
-    DefaultParamsWriter as DefaultParamsWriter,
-    JavaMLReader as JavaMLReader,
-    JavaMLWritable as JavaMLWritable,
-    JavaMLWriter as JavaMLWriter,
-    MLReadable as MLReadable,
-    MLReader as MLReader,
-    MLWritable as MLWritable,
-    MLWriter as MLWriter,
-)
-from pyspark.sql.dataframe import DataFrame
-
-class Pipeline(Estimator[PipelineModel], MLReadable[Pipeline], MLWritable):
-    stages: List[PipelineStage]
-    def __init__(self, *, stages: Optional[List[PipelineStage]] = ...) -> None: ...
-    def _fit(self, dataset: DataFrame) -> PipelineModel: ...
-    def setStages(self, stages: List[PipelineStage]) -> Pipeline: ...
-    def getStages(self) -> List[PipelineStage]: ...
-    def setParams(self, *, stages: Optional[List[PipelineStage]] = ...) -> Pipeline: ...
-    def copy(self, extra: Optional[Dict[Param, str]] = ...) -> Pipeline: ...
-    def write(self) -> JavaMLWriter: ...
-    def save(self, path: str) -> None: ...
-    @classmethod
-    def read(cls) -> PipelineReader: ...
-
-class PipelineWriter(MLWriter):
-    instance: Pipeline
-    def __init__(self, instance: Pipeline) -> None: ...
-    def saveImpl(self, path: str) -> None: ...
-
-class PipelineReader(MLReader[Pipeline]):
-    cls: Type[Pipeline]
-    def __init__(self, cls: Type[Pipeline]) -> None: ...
-    def load(self, path: str) -> Pipeline: ...
-
-class PipelineModelWriter(MLWriter):
-    instance: PipelineModel
-    def __init__(self, instance: PipelineModel) -> None: ...
-    def saveImpl(self, path: str) -> None: ...
-
-class PipelineModelReader(MLReader[PipelineModel]):
-    cls: Type[PipelineModel]
-    def __init__(self, cls: Type[PipelineModel]) -> None: ...
-    def load(self, path: str) -> PipelineModel: ...
-
-class PipelineModel(Model, MLReadable[PipelineModel], MLWritable):
-    stages: List[PipelineStage]
-    def __init__(self, stages: List[Transformer]) -> None: ...
-    def _transform(self, dataset: DataFrame) -> DataFrame: ...
-    def copy(self, extra: Optional[Dict[Param, Any]] = ...) -> PipelineModel: ...
-    def write(self) -> JavaMLWriter: ...
-    def save(self, path: str) -> None: ...
-    @classmethod
-    def read(cls) -> PipelineModelReader: ...
-
-class PipelineSharedReadWrite:
-    @staticmethod
-    def checkStagesForJava(stages: List[PipelineStage]) -> bool: ...
-    @staticmethod
-    def validateStages(stages: List[PipelineStage]) -> None: ...
-    @staticmethod
-    def saveImpl(
-        instance: Union[Pipeline, PipelineModel],
-        stages: List[PipelineStage],
-        sc: SparkContext,
-        path: str,
-    ) -> None: ...
-    @staticmethod
-    def load(
-        metadata: Dict[str, Any], sc: SparkContext, path: str
-    ) -> Tuple[str, List[PipelineStage]]: ...
-    @staticmethod
-    def getStagePath(stageUid: str, stageIdx: int, numStages: int, stagesDir: str) -> str: ...
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index 1dacffcb1122b..019420bc3684e 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -32,7 +32,6 @@
     from py4j.java_gateway import JavaGateway, JavaObject
     from pyspark.ml._typing import PipelineStage
 
-    from pyspark.ml.param import Param
     from pyspark.ml.base import Params
     from pyspark.ml.wrapper import JavaWrapper
 
@@ -429,7 +428,7 @@ def saveMetadata(
         path: str,
         sc: SparkContext,
         extraMetadata: Optional[Dict[str, Any]] = None,
-        paramMap: Optional[Dict[str, "Param"]] = None,
+        paramMap: Optional[Dict[str, Any]] = None,
     ) -> None:
         """
         Saves metadata + Params to: path + "/metadata"
@@ -460,7 +459,7 @@ def _get_metadata_to_save(
         instance: "Params",
         sc: SparkContext,
         extraMetadata: Optional[Dict[str, Any]] = None,
-        paramMap: Optional[Dict[str, "Param"]] = None,
+        paramMap: Optional[Dict[str, Any]] = None,
     ) -> str:
         """
         Helper for :py:meth:`DefaultParamsWriter.saveMetadata` which extracts the JSON to save.
@@ -696,7 +695,7 @@ def getAllNestedStages(pyInstance: Any) -> List["PipelineStage"]:
         if isinstance(pyInstance, Pipeline):
             pySubStages = pyInstance.getStages()
         elif isinstance(pyInstance, PipelineModel):
-            pySubStages = pyInstance.stages
+            pySubStages = cast(List["PipelineStage"], pyInstance.stages)
         elif isinstance(pyInstance, _ValidatorParams):
             raise ValueError("PySpark does not support nested validator.")
         elif isinstance(pyInstance, OneVsRest):

From 9d0563733bacc39ffbb7a07e5d3fc6de71d0cfb3 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Tue, 8 Feb 2022 03:14:47 +0100
Subject: [PATCH 168/513] [SPARK-37412][PYTHON][ML] Inline typehints for
 pyspark.ml.stat

### What changes were proposed in this pull request?

This PR migrates type `pyspark.ml.stat` annotations from stub file to inline type hints.

### Why are the changes needed?

Part of ongoing migration of type hints.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #35401 from zero323/SPARK-37412.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/ml/stat.py  | 68 ++++++++++++++++++++++-------------
 python/pyspark/ml/stat.pyi | 73 --------------------------------------
 2 files changed, 44 insertions(+), 97 deletions(-)
 delete mode 100644 python/pyspark/ml/stat.pyi

diff --git a/python/pyspark/ml/stat.py b/python/pyspark/ml/stat.py
index 15bb6ca93f179..ad8a3ad85a233 100644
--- a/python/pyspark/ml/stat.py
+++ b/python/pyspark/ml/stat.py
@@ -17,12 +17,20 @@
 
 import sys
 
+from typing import Optional, Tuple, TYPE_CHECKING
+
+
 from pyspark import since, SparkContext
 from pyspark.ml.common import _java2py, _py2java
-from pyspark.ml.wrapper import JavaWrapper, _jvm
+from pyspark.ml.linalg import Matrix, Vector
+from pyspark.ml.wrapper import JavaWrapper, _jvm  # type: ignore[attr-defined]
 from pyspark.sql.column import Column, _to_seq
+from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.functions import lit
 
+if TYPE_CHECKING:
+    from py4j.java_gateway import JavaObject  # type: ignore[import]
+
 
 class ChiSquareTest:
     """
@@ -37,7 +45,9 @@ class ChiSquareTest:
     """
 
     @staticmethod
-    def test(dataset, featuresCol, labelCol, flatten=False):
+    def test(
+        dataset: DataFrame, featuresCol: str, labelCol: str, flatten: bool = False
+    ) -> DataFrame:
         """
         Perform a Pearson's independence test using dataset.
 
@@ -95,6 +105,8 @@ def test(dataset, featuresCol, labelCol, flatten=False):
         4.0
         """
         sc = SparkContext._active_spark_context
+        assert sc is not None
+
         javaTestObj = _jvm().org.apache.spark.ml.stat.ChiSquareTest
         args = [_py2java(sc, arg) for arg in (dataset, featuresCol, labelCol, flatten)]
         return _java2py(sc, javaTestObj.test(*args))
@@ -116,7 +128,7 @@ class Correlation:
     """
 
     @staticmethod
-    def corr(dataset, column, method="pearson"):
+    def corr(dataset: DataFrame, column: str, method: str = "pearson") -> DataFrame:
         """
         Compute the correlation matrix with specified method using dataset.
 
@@ -162,6 +174,8 @@ def corr(dataset, column, method="pearson"):
                      [ 0.4       ,  0.9486... ,         NaN,  1.        ]])
         """
         sc = SparkContext._active_spark_context
+        assert sc is not None
+
         javaCorrObj = _jvm().org.apache.spark.ml.stat.Correlation
         args = [_py2java(sc, arg) for arg in (dataset, column, method)]
         return _java2py(sc, javaCorrObj.corr(*args))
@@ -181,7 +195,7 @@ class KolmogorovSmirnovTest:
     """
 
     @staticmethod
-    def test(dataset, sampleCol, distName, *params):
+    def test(dataset: DataFrame, sampleCol: str, distName: str, *params: float) -> DataFrame:
         """
         Conduct a one-sample, two-sided Kolmogorov-Smirnov test for probability distribution
         equality. Currently supports the normal distribution, taking as parameters the mean and
@@ -228,9 +242,11 @@ def test(dataset, sampleCol, distName, *params):
         0.175
         """
         sc = SparkContext._active_spark_context
+        assert sc is not None
+
         javaTestObj = _jvm().org.apache.spark.ml.stat.KolmogorovSmirnovTest
         dataset = _py2java(sc, dataset)
-        params = [float(param) for param in params]
+        params = [float(param) for param in params]  # type: ignore[assignment]
         return _java2py(
             sc, javaTestObj.test(dataset, sampleCol, distName, _jvm().PythonUtils.toSeq(params))
         )
@@ -284,7 +300,7 @@ class Summarizer:
 
     @staticmethod
     @since("2.4.0")
-    def mean(col, weightCol=None):
+    def mean(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of mean summary
         """
@@ -292,7 +308,7 @@ def mean(col, weightCol=None):
 
     @staticmethod
     @since("3.0.0")
-    def sum(col, weightCol=None):
+    def sum(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of sum summary
         """
@@ -300,7 +316,7 @@ def sum(col, weightCol=None):
 
     @staticmethod
     @since("2.4.0")
-    def variance(col, weightCol=None):
+    def variance(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of variance summary
         """
@@ -308,7 +324,7 @@ def variance(col, weightCol=None):
 
     @staticmethod
     @since("3.0.0")
-    def std(col, weightCol=None):
+    def std(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of std summary
         """
@@ -316,7 +332,7 @@ def std(col, weightCol=None):
 
     @staticmethod
     @since("2.4.0")
-    def count(col, weightCol=None):
+    def count(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of count summary
         """
@@ -324,7 +340,7 @@ def count(col, weightCol=None):
 
     @staticmethod
     @since("2.4.0")
-    def numNonZeros(col, weightCol=None):
+    def numNonZeros(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of numNonZero summary
         """
@@ -332,7 +348,7 @@ def numNonZeros(col, weightCol=None):
 
     @staticmethod
     @since("2.4.0")
-    def max(col, weightCol=None):
+    def max(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of max summary
         """
@@ -340,7 +356,7 @@ def max(col, weightCol=None):
 
     @staticmethod
     @since("2.4.0")
-    def min(col, weightCol=None):
+    def min(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of min summary
         """
@@ -348,7 +364,7 @@ def min(col, weightCol=None):
 
     @staticmethod
     @since("2.4.0")
-    def normL1(col, weightCol=None):
+    def normL1(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of normL1 summary
         """
@@ -356,14 +372,14 @@ def normL1(col, weightCol=None):
 
     @staticmethod
     @since("2.4.0")
-    def normL2(col, weightCol=None):
+    def normL2(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of normL2 summary
         """
         return Summarizer._get_single_metric(col, weightCol, "normL2")
 
     @staticmethod
-    def _check_param(featuresCol, weightCol):
+    def _check_param(featuresCol: Column, weightCol: Optional[Column]) -> Tuple[Column, Column]:
         if weightCol is None:
             weightCol = lit(1.0)
         if not isinstance(featuresCol, Column) or not isinstance(weightCol, Column):
@@ -371,16 +387,16 @@ def _check_param(featuresCol, weightCol):
         return featuresCol, weightCol
 
     @staticmethod
-    def _get_single_metric(col, weightCol, metric):
+    def _get_single_metric(col: Column, weightCol: Optional[Column], metric: str) -> Column:
         col, weightCol = Summarizer._check_param(col, weightCol)
         return Column(
-            JavaWrapper._new_java_obj(
+            JavaWrapper._new_java_obj(  # type: ignore[attr-defined]
                 "org.apache.spark.ml.stat.Summarizer." + metric, col._jc, weightCol._jc
             )
         )
 
     @staticmethod
-    def metrics(*metrics):
+    def metrics(*metrics: str) -> "SummaryBuilder":
         """
         Given a list of metrics, provides a builder that it turns computes metrics from a column.
 
@@ -415,7 +431,9 @@ def metrics(*metrics):
         :py:class:`pyspark.ml.stat.SummaryBuilder`
         """
         sc = SparkContext._active_spark_context
-        js = JavaWrapper._new_java_obj(
+        assert sc is not None
+
+        js = JavaWrapper._new_java_obj(  # type: ignore[attr-defined]
             "org.apache.spark.ml.stat.Summarizer.metrics", _to_seq(sc, metrics)
         )
         return SummaryBuilder(js)
@@ -432,10 +450,10 @@ class SummaryBuilder(JavaWrapper):
 
     """
 
-    def __init__(self, jSummaryBuilder):
+    def __init__(self, jSummaryBuilder: "JavaObject"):
         super(SummaryBuilder, self).__init__(jSummaryBuilder)
 
-    def summary(self, featuresCol, weightCol=None):
+    def summary(self, featuresCol: Column, weightCol: Optional[Column] = None) -> Column:
         """
         Returns an aggregate object that contains the summary of the column with the requested
         metrics.
@@ -456,7 +474,9 @@ def summary(self, featuresCol, weightCol=None):
             structure is determined during the creation of the builder.
         """
         featuresCol, weightCol = Summarizer._check_param(featuresCol, weightCol)
-        return Column(self._java_obj.summary(featuresCol._jc, weightCol._jc))
+        return Column(
+            self._java_obj.summary(featuresCol._jc, weightCol._jc)  # type: ignore[attr-defined]
+        )
 
 
 class MultivariateGaussian:
@@ -474,7 +494,7 @@ class MultivariateGaussian:
            [ 3.,  2.]]))
     """
 
-    def __init__(self, mean, cov):
+    def __init__(self, mean: Vector, cov: Matrix):
         self.mean = mean
         self.cov = cov
 
diff --git a/python/pyspark/ml/stat.pyi b/python/pyspark/ml/stat.pyi
deleted file mode 100644
index 90b0686b1c746..0000000000000
--- a/python/pyspark/ml/stat.pyi
+++ /dev/null
@@ -1,73 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Optional
-
-from pyspark.ml.linalg import Matrix, Vector
-from pyspark.ml.wrapper import JavaWrapper
-from pyspark.sql.column import Column
-from pyspark.sql.dataframe import DataFrame
-
-from py4j.java_gateway import JavaObject  # type: ignore[import]
-
-class ChiSquareTest:
-    @staticmethod
-    def test(
-        dataset: DataFrame, featuresCol: str, labelCol: str, flatten: bool = ...
-    ) -> DataFrame: ...
-
-class Correlation:
-    @staticmethod
-    def corr(dataset: DataFrame, column: str, method: str = ...) -> DataFrame: ...
-
-class KolmogorovSmirnovTest:
-    @staticmethod
-    def test(dataset: DataFrame, sampleCol: str, distName: str, *params: float) -> DataFrame: ...
-
-class Summarizer:
-    @staticmethod
-    def mean(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def sum(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def variance(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def std(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def count(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def numNonZeros(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def max(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def min(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def normL1(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def normL2(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def metrics(*metrics: str) -> SummaryBuilder: ...
-
-class SummaryBuilder(JavaWrapper):
-    def __init__(self, jSummaryBuilder: JavaObject) -> None: ...
-    def summary(self, featuresCol: Column, weightCol: Optional[Column] = ...) -> Column: ...
-
-class MultivariateGaussian:
-    mean: Vector
-    cov: Matrix
-    def __init__(self, mean: Vector, cov: Matrix) -> None: ...

From e34d8eec019a0e60576fa7d6d2193d8a3c5bedab Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 8 Feb 2022 11:58:51 +0900
Subject: [PATCH 169/513] Revert "[SPARK-37412][PYTHON][ML] Inline typehints
 for pyspark.ml.stat"

This reverts commit 9d0563733bacc39ffbb7a07e5d3fc6de71d0cfb3.
---
 python/pyspark/ml/stat.py  | 68 +++++++++++++----------------------
 python/pyspark/ml/stat.pyi | 73 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+), 44 deletions(-)
 create mode 100644 python/pyspark/ml/stat.pyi

diff --git a/python/pyspark/ml/stat.py b/python/pyspark/ml/stat.py
index ad8a3ad85a233..15bb6ca93f179 100644
--- a/python/pyspark/ml/stat.py
+++ b/python/pyspark/ml/stat.py
@@ -17,20 +17,12 @@
 
 import sys
 
-from typing import Optional, Tuple, TYPE_CHECKING
-
-
 from pyspark import since, SparkContext
 from pyspark.ml.common import _java2py, _py2java
-from pyspark.ml.linalg import Matrix, Vector
-from pyspark.ml.wrapper import JavaWrapper, _jvm  # type: ignore[attr-defined]
+from pyspark.ml.wrapper import JavaWrapper, _jvm
 from pyspark.sql.column import Column, _to_seq
-from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.functions import lit
 
-if TYPE_CHECKING:
-    from py4j.java_gateway import JavaObject  # type: ignore[import]
-
 
 class ChiSquareTest:
     """
@@ -45,9 +37,7 @@ class ChiSquareTest:
     """
 
     @staticmethod
-    def test(
-        dataset: DataFrame, featuresCol: str, labelCol: str, flatten: bool = False
-    ) -> DataFrame:
+    def test(dataset, featuresCol, labelCol, flatten=False):
         """
         Perform a Pearson's independence test using dataset.
 
@@ -105,8 +95,6 @@ def test(
         4.0
         """
         sc = SparkContext._active_spark_context
-        assert sc is not None
-
         javaTestObj = _jvm().org.apache.spark.ml.stat.ChiSquareTest
         args = [_py2java(sc, arg) for arg in (dataset, featuresCol, labelCol, flatten)]
         return _java2py(sc, javaTestObj.test(*args))
@@ -128,7 +116,7 @@ class Correlation:
     """
 
     @staticmethod
-    def corr(dataset: DataFrame, column: str, method: str = "pearson") -> DataFrame:
+    def corr(dataset, column, method="pearson"):
         """
         Compute the correlation matrix with specified method using dataset.
 
@@ -174,8 +162,6 @@ def corr(dataset: DataFrame, column: str, method: str = "pearson") -> DataFrame:
                      [ 0.4       ,  0.9486... ,         NaN,  1.        ]])
         """
         sc = SparkContext._active_spark_context
-        assert sc is not None
-
         javaCorrObj = _jvm().org.apache.spark.ml.stat.Correlation
         args = [_py2java(sc, arg) for arg in (dataset, column, method)]
         return _java2py(sc, javaCorrObj.corr(*args))
@@ -195,7 +181,7 @@ class KolmogorovSmirnovTest:
     """
 
     @staticmethod
-    def test(dataset: DataFrame, sampleCol: str, distName: str, *params: float) -> DataFrame:
+    def test(dataset, sampleCol, distName, *params):
         """
         Conduct a one-sample, two-sided Kolmogorov-Smirnov test for probability distribution
         equality. Currently supports the normal distribution, taking as parameters the mean and
@@ -242,11 +228,9 @@ def test(dataset: DataFrame, sampleCol: str, distName: str, *params: float) -> D
         0.175
         """
         sc = SparkContext._active_spark_context
-        assert sc is not None
-
         javaTestObj = _jvm().org.apache.spark.ml.stat.KolmogorovSmirnovTest
         dataset = _py2java(sc, dataset)
-        params = [float(param) for param in params]  # type: ignore[assignment]
+        params = [float(param) for param in params]
         return _java2py(
             sc, javaTestObj.test(dataset, sampleCol, distName, _jvm().PythonUtils.toSeq(params))
         )
@@ -300,7 +284,7 @@ class Summarizer:
 
     @staticmethod
     @since("2.4.0")
-    def mean(col: Column, weightCol: Optional[Column] = None) -> Column:
+    def mean(col, weightCol=None):
         """
         return a column of mean summary
         """
@@ -308,7 +292,7 @@ def mean(col: Column, weightCol: Optional[Column] = None) -> Column:
 
     @staticmethod
     @since("3.0.0")
-    def sum(col: Column, weightCol: Optional[Column] = None) -> Column:
+    def sum(col, weightCol=None):
         """
         return a column of sum summary
         """
@@ -316,7 +300,7 @@ def sum(col: Column, weightCol: Optional[Column] = None) -> Column:
 
     @staticmethod
     @since("2.4.0")
-    def variance(col: Column, weightCol: Optional[Column] = None) -> Column:
+    def variance(col, weightCol=None):
         """
         return a column of variance summary
         """
@@ -324,7 +308,7 @@ def variance(col: Column, weightCol: Optional[Column] = None) -> Column:
 
     @staticmethod
     @since("3.0.0")
-    def std(col: Column, weightCol: Optional[Column] = None) -> Column:
+    def std(col, weightCol=None):
         """
         return a column of std summary
         """
@@ -332,7 +316,7 @@ def std(col: Column, weightCol: Optional[Column] = None) -> Column:
 
     @staticmethod
     @since("2.4.0")
-    def count(col: Column, weightCol: Optional[Column] = None) -> Column:
+    def count(col, weightCol=None):
         """
         return a column of count summary
         """
@@ -340,7 +324,7 @@ def count(col: Column, weightCol: Optional[Column] = None) -> Column:
 
     @staticmethod
     @since("2.4.0")
-    def numNonZeros(col: Column, weightCol: Optional[Column] = None) -> Column:
+    def numNonZeros(col, weightCol=None):
         """
         return a column of numNonZero summary
         """
@@ -348,7 +332,7 @@ def numNonZeros(col: Column, weightCol: Optional[Column] = None) -> Column:
 
     @staticmethod
     @since("2.4.0")
-    def max(col: Column, weightCol: Optional[Column] = None) -> Column:
+    def max(col, weightCol=None):
         """
         return a column of max summary
         """
@@ -356,7 +340,7 @@ def max(col: Column, weightCol: Optional[Column] = None) -> Column:
 
     @staticmethod
     @since("2.4.0")
-    def min(col: Column, weightCol: Optional[Column] = None) -> Column:
+    def min(col, weightCol=None):
         """
         return a column of min summary
         """
@@ -364,7 +348,7 @@ def min(col: Column, weightCol: Optional[Column] = None) -> Column:
 
     @staticmethod
     @since("2.4.0")
-    def normL1(col: Column, weightCol: Optional[Column] = None) -> Column:
+    def normL1(col, weightCol=None):
         """
         return a column of normL1 summary
         """
@@ -372,14 +356,14 @@ def normL1(col: Column, weightCol: Optional[Column] = None) -> Column:
 
     @staticmethod
     @since("2.4.0")
-    def normL2(col: Column, weightCol: Optional[Column] = None) -> Column:
+    def normL2(col, weightCol=None):
         """
         return a column of normL2 summary
         """
         return Summarizer._get_single_metric(col, weightCol, "normL2")
 
     @staticmethod
-    def _check_param(featuresCol: Column, weightCol: Optional[Column]) -> Tuple[Column, Column]:
+    def _check_param(featuresCol, weightCol):
         if weightCol is None:
             weightCol = lit(1.0)
         if not isinstance(featuresCol, Column) or not isinstance(weightCol, Column):
@@ -387,16 +371,16 @@ def _check_param(featuresCol: Column, weightCol: Optional[Column]) -> Tuple[Colu
         return featuresCol, weightCol
 
     @staticmethod
-    def _get_single_metric(col: Column, weightCol: Optional[Column], metric: str) -> Column:
+    def _get_single_metric(col, weightCol, metric):
         col, weightCol = Summarizer._check_param(col, weightCol)
         return Column(
-            JavaWrapper._new_java_obj(  # type: ignore[attr-defined]
+            JavaWrapper._new_java_obj(
                 "org.apache.spark.ml.stat.Summarizer." + metric, col._jc, weightCol._jc
             )
         )
 
     @staticmethod
-    def metrics(*metrics: str) -> "SummaryBuilder":
+    def metrics(*metrics):
         """
         Given a list of metrics, provides a builder that it turns computes metrics from a column.
 
@@ -431,9 +415,7 @@ def metrics(*metrics: str) -> "SummaryBuilder":
         :py:class:`pyspark.ml.stat.SummaryBuilder`
         """
         sc = SparkContext._active_spark_context
-        assert sc is not None
-
-        js = JavaWrapper._new_java_obj(  # type: ignore[attr-defined]
+        js = JavaWrapper._new_java_obj(
             "org.apache.spark.ml.stat.Summarizer.metrics", _to_seq(sc, metrics)
         )
         return SummaryBuilder(js)
@@ -450,10 +432,10 @@ class SummaryBuilder(JavaWrapper):
 
     """
 
-    def __init__(self, jSummaryBuilder: "JavaObject"):
+    def __init__(self, jSummaryBuilder):
         super(SummaryBuilder, self).__init__(jSummaryBuilder)
 
-    def summary(self, featuresCol: Column, weightCol: Optional[Column] = None) -> Column:
+    def summary(self, featuresCol, weightCol=None):
         """
         Returns an aggregate object that contains the summary of the column with the requested
         metrics.
@@ -474,9 +456,7 @@ def summary(self, featuresCol: Column, weightCol: Optional[Column] = None) -> Co
             structure is determined during the creation of the builder.
         """
         featuresCol, weightCol = Summarizer._check_param(featuresCol, weightCol)
-        return Column(
-            self._java_obj.summary(featuresCol._jc, weightCol._jc)  # type: ignore[attr-defined]
-        )
+        return Column(self._java_obj.summary(featuresCol._jc, weightCol._jc))
 
 
 class MultivariateGaussian:
@@ -494,7 +474,7 @@ class MultivariateGaussian:
            [ 3.,  2.]]))
     """
 
-    def __init__(self, mean: Vector, cov: Matrix):
+    def __init__(self, mean, cov):
         self.mean = mean
         self.cov = cov
 
diff --git a/python/pyspark/ml/stat.pyi b/python/pyspark/ml/stat.pyi
new file mode 100644
index 0000000000000..90b0686b1c746
--- /dev/null
+++ b/python/pyspark/ml/stat.pyi
@@ -0,0 +1,73 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Optional
+
+from pyspark.ml.linalg import Matrix, Vector
+from pyspark.ml.wrapper import JavaWrapper
+from pyspark.sql.column import Column
+from pyspark.sql.dataframe import DataFrame
+
+from py4j.java_gateway import JavaObject  # type: ignore[import]
+
+class ChiSquareTest:
+    @staticmethod
+    def test(
+        dataset: DataFrame, featuresCol: str, labelCol: str, flatten: bool = ...
+    ) -> DataFrame: ...
+
+class Correlation:
+    @staticmethod
+    def corr(dataset: DataFrame, column: str, method: str = ...) -> DataFrame: ...
+
+class KolmogorovSmirnovTest:
+    @staticmethod
+    def test(dataset: DataFrame, sampleCol: str, distName: str, *params: float) -> DataFrame: ...
+
+class Summarizer:
+    @staticmethod
+    def mean(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def sum(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def variance(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def std(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def count(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def numNonZeros(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def max(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def min(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def normL1(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def normL2(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def metrics(*metrics: str) -> SummaryBuilder: ...
+
+class SummaryBuilder(JavaWrapper):
+    def __init__(self, jSummaryBuilder: JavaObject) -> None: ...
+    def summary(self, featuresCol: Column, weightCol: Optional[Column] = ...) -> Column: ...
+
+class MultivariateGaussian:
+    mean: Vector
+    cov: Matrix
+    def __init__(self, mean: Vector, cov: Matrix) -> None: ...

From 28a2f62a1de55dca14071986194a0a57a3c01bb0 Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Mon, 7 Feb 2022 19:36:37 -0800
Subject: [PATCH 170/513] [SPARK-38133][SQL] UnsafeRow should treat
 TIMESTAMP_NTZ as mutable and fixed width

### What changes were proposed in this pull request?

Add TimestampNTZType to UnsafeRow's list of mutable fields.

### Why are the changes needed?

Assume this data:
```
create or replace temp view v1 as
select * from values
  (1, timestamp_ntz'2012-01-01 00:00:00', 10000),
  (2, timestamp_ntz'2012-01-01 00:00:00', 20000),
  (1, timestamp_ntz'2012-01-01 00:00:00', 5000),
  (1, timestamp_ntz'2013-01-01 00:00:00', 48000),
  (2, timestamp_ntz'2013-01-01 00:00:00', 30000)
  as data(a, b, c);
```
The following query produces incorrect results:
```
select *
from v1
pivot (
  sum(c)
  for a in (1, 2)
);
```
The timestamp_ntz values are corrupted:
```
2012-01-01 19:05:19.476736	15000	20000
2013-01-01 19:05:19.476736	48000	30000
```
Because `UnsafeRow.isFixedLength` returns `false` for data type `TimestampNTZType`, `GenerateUnsafeRowJoiner` generates code for the `TIMESTAMP_NTZ` field as though it was a variable length field (it adds an offset to the Long value, thus corrupting the timestamp value).

By adding `TimestampNTZType` to `UnsafeRow`'s list of mutable fields, `UnsafeRow.isFixedLength` returns `true`.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

New unit test.

Closes #35430 from bersprockets/isfixedlength_issue.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/catalyst/expressions/UnsafeRow.java   |  3 ++-
 .../org/apache/spark/sql/DataFramePivotSuite.scala  | 13 +++++++++++++
 .../sql/hive/execution/AggregationQuerySuite.scala  |  5 ++++-
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index 5088d06de9b32..7b11ab20966e1 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -90,7 +90,8 @@ public static int calculateBitSetWidthInBytes(int numFields) {
           FloatType,
           DoubleType,
           DateType,
-          TimestampType
+          TimestampType,
+          TimestampNTZType
         })));
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
index bbdae29fa3b05..1a0c95beb18b8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import java.time.LocalDateTime
 import java.util.Locale
 
 import org.apache.spark.sql.catalyst.expressions.aggregate.PivotFirst
@@ -341,4 +342,16 @@ class DataFramePivotSuite extends QueryTest with SharedSparkSession {
         percentile_approx(col("value"), array(lit(0.5)), lit(10000)))
     checkAnswer(actual, Row(Array(2.5), Array(3.0)))
   }
+
+  test("SPARK-38133: Grouping by TIMESTAMP_NTZ should not corrupt results") {
+    checkAnswer(
+      courseSales.withColumn("ts", $"year".cast("string").cast("timestamp_ntz"))
+        .groupBy("ts")
+        .pivot("course", Seq("dotNET", "Java"))
+        .agg(sum($"earnings"))
+        .select("ts", "dotNET", "Java"),
+      Row(LocalDateTime.of(2012, 1, 1, 0, 0, 0, 0), 15000.0, 20000.0) ::
+        Row(LocalDateTime.of(2013, 1, 1, 0, 0, 0, 0), 48000.0, 30000.0) :: Nil
+    )
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
index e560c2ea32afa..c5a75acbd91b8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
@@ -896,7 +896,10 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
     // UnsafeRow.mutableFieldTypes.asScala.toSeq will trigger SortAggregate to use
     // UnsafeRow as the aggregation buffer. While, dataTypes will trigger
     // SortAggregate to use a safe row as the aggregation buffer.
-    Seq(dataTypes, UnsafeRow.mutableFieldTypes.asScala.toSeq).foreach { dataTypes =>
+    // udaf cannot yet handle TimestampNTZType
+    val mutableFieldTypes = UnsafeRow.mutableFieldTypes
+      .asScala.filterNot(_.isInstanceOf[TimestampNTZType]).toSeq
+    Seq(dataTypes, mutableFieldTypes).foreach { dataTypes =>
       val fields = dataTypes.zipWithIndex.map { case (dataType, index) =>
         StructField(s"col$index", dataType, nullable = true)
       }

From d4f275b1d81a5f855412613c30c39cf300cb013d Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Mon, 7 Feb 2022 21:04:11 -0800
Subject: [PATCH 171/513] [SPARK-38127][SQL][TESTS] Fix bug of
 `EnumTypeSetBenchmark` and update benchmark result

### What changes were proposed in this pull request?
`EnumTypeSetBenchmark`  use to compare the `create` and `contains` performance of `EnumSet` and `HashSet`.Before this PR, there are some logical errors in the bench case, for example `testContainsOperation `:

https://github.com/apache/spark/blob/9d0563733bacc39ffbb7a07e5d3fc6de71d0cfb3/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/EnumTypeSetBenchmark.scala#L102-L110

`Use HashSet` circulates 100000 times(`valuesPerIteration `is 100000) and  `Use EnumSet` circulates only once.

So this pr fix this bug and update benchmark result.

### Why are the changes needed?
Bug fix.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass GA

Closes #35418 from LuciferYang/Fix-EnumTypeSetBenchmark.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../EnumTypeSetBenchmark-jdk11-results.txt    | 120 +++++++++---------
 .../EnumTypeSetBenchmark-jdk17-results.txt    | 120 +++++++++---------
 .../EnumTypeSetBenchmark-results.txt          | 120 +++++++++---------
 .../catalog/EnumTypeSetBenchmark.scala        |   8 +-
 4 files changed, 186 insertions(+), 182 deletions(-)

diff --git a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk11-results.txt b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk11-results.txt
index 956db2edfbc02..d26da81a2514e 100644
--- a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk11-results.txt
+++ b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk11-results.txt
@@ -1,105 +1,105 @@
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Test contains use empty Set:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0       1722.9           0.6       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                           1              1           0       1120.1           0.9       1.0X
+Use EnumSet                                           2              2           0        550.8           1.8       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Test contains use 1 item Set:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          10             11           1         97.5          10.3       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                           8              8           1        126.0           7.9       1.0X
+Use EnumSet                                           2              2           0        590.4           1.7       4.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Test contains use 3 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          22             23           1         46.1          21.7       1.0X
-Use EnumSet                                           0              0           0   10000000.0           0.0  216928.7X
+Use HashSet                                          15             15           1         67.4          14.8       1.0X
+Use EnumSet                                           2              2           0        652.3           1.5       9.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Test contains use 5 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          18             20           2         57.0          17.6       1.0X
-Use EnumSet                                           0              0           0   10000000.0           0.0  175588.1X
+Use HashSet                                          17             18           1         57.5          17.4       1.0X
+Use EnumSet                                           2              2           0        591.2           1.7      10.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Test contains use 10 items Set:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          20             22           2         50.2          19.9       1.0X
-Use EnumSet                                           0              0           0   10000000.0           0.0  199224.4X
+Use HashSet                                          18             18           0         54.8          18.2       1.0X
+Use EnumSet                                           2              2           0        591.4           1.7      10.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Test create empty Set:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0        147.1           6.8       1.0X
-Use EnumSet                                           2              2           0         57.9          17.3       0.4X
+Use HashSet                                           1              1           0         95.0          10.5       1.0X
+Use EnumSet                                           2              2           0         54.4          18.4       0.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Test create 1 item Set:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          15             16           2          6.7         149.6       1.0X
-Use EnumSet                                           2              3           1         42.6          23.5       6.4X
+Use HashSet                                          31             32           2          3.2         310.3       1.0X
+Use EnumSet                                           3              3           0         38.0          26.3      11.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Test create 3 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          45             47           2          2.2         450.9       1.0X
-Use EnumSet                                           2              3           1         41.2          24.3      18.6X
+Use HashSet                                          75             75           0          1.3         751.6       1.0X
+Use EnumSet                                           3              3           0         36.1          27.7      27.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Test create 5 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                         104            108           5          1.0        1036.7       1.0X
-Use EnumSet                                           2              3           1         44.3          22.6      46.0X
+Use HashSet                                         122            123           0          0.8        1225.0       1.0X
+Use EnumSet                                           2              2           0         41.8          23.9      51.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Test create 10 items Set:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                         147            154           5          0.7        1474.0       1.0X
-Use EnumSet                                           2              2           1         56.9          17.6      83.8X
+Use HashSet                                         161            162           0          0.6        1614.9       1.0X
+Use EnumSet                                           2              2           0         52.1          19.2      84.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Test create and contains use empty Set:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0        798.4           1.3       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                           2              2           0        608.4           1.6       1.0X
+Use EnumSet                                           3              4           0        295.5           3.4       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Test create and contains use 1 item Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          39             42           3         25.5          39.2       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                          57             58           2         17.6          56.8       1.0X
+Use EnumSet                                           4              4           0        284.2           3.5      16.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Test create and contains use 3 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          73             75           2         13.7          73.2       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                          97             97           0         10.3          96.7       1.0X
+Use EnumSet                                           4              4           0        263.3           3.8      25.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Test create and contains use 5 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                         157            162           3          6.4         157.3       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                         174            175           2          5.8         173.6       1.0X
+Use EnumSet                                           4              4           0        240.7           4.2      41.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Test create and contains use 10 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          197            206           6          5.1         197.4       1.0X
-Use EnumSet                                            0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                          211            214           5          4.7         211.4       1.0X
+Use EnumSet                                            4              4           0        272.3           3.7      57.6X
 
diff --git a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk17-results.txt b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk17-results.txt
index 982ad076e100d..d110a292f8e66 100644
--- a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk17-results.txt
+++ b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk17-results.txt
@@ -1,105 +1,105 @@
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Test contains use empty Set:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           0              1           0       2155.1           0.5       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                           5              6           0        194.8           5.1       1.0X
+Use EnumSet                                           1              1           0        879.0           1.1       4.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Test contains use 1 item Set:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          10             10           0        100.7           9.9       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                           8             10           1        117.8           8.5       1.0X
+Use EnumSet                                           1              1           0        904.7           1.1       7.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Test contains use 3 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          18             18           0         57.0          17.5       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                          16             18           1         60.8          16.4       1.0X
+Use EnumSet                                           1              1           0        965.2           1.0      15.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Test contains use 5 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          15             15           0         68.0          14.7       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                          16             17           1         63.7          15.7       1.0X
+Use EnumSet                                           1              1           0        933.1           1.1      14.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Test contains use 10 items Set:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          16             17           1         61.0          16.4       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                          16             19           2         60.7          16.5       1.0X
+Use EnumSet                                           1              1           0        831.7           1.2      13.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Test create empty Set:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           0              1           0        218.9           4.6       1.0X
-Use EnumSet                                           1              1           0         83.2          12.0       0.4X
+Use HashSet                                           1              1           0         99.7          10.0       1.0X
+Use EnumSet                                           1              1           0         82.8          12.1       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Test create 1 item Set:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          17             17           1          5.9         168.4       1.0X
-Use EnumSet                                           2              2           0         56.2          17.8       9.5X
+Use HashSet                                          13             14           1          7.6         132.1       1.0X
+Use EnumSet                                           2              2           0         46.9          21.3       6.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Test create 3 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          49             50           1          2.0         493.3       1.0X
-Use EnumSet                                           1              1           0         89.7          11.1      44.2X
+Use HashSet                                          45             46           1          2.2         446.6       1.0X
+Use EnumSet                                           1              2           0         68.6          14.6      30.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Test create 5 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                         116            118           1          0.9        1164.2       1.0X
-Use EnumSet                                           1              1           0         89.7          11.2     104.4X
+Use HashSet                                         127            128           1          0.8        1268.6       1.0X
+Use EnumSet                                           1              2           0         80.3          12.5     101.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Test create 10 items Set:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                         168            170           2          0.6        1681.4       1.0X
-Use EnumSet                                           1              1           0         83.6          12.0     140.6X
+Use HashSet                                         148            158           6          0.7        1479.8       1.0X
+Use EnumSet                                           1              1           0         87.4          11.4     129.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Test create and contains use empty Set:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0        904.5           1.1       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                           1              1           1        870.5           1.1       1.0X
+Use EnumSet                                           2              2           0        497.6           2.0       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Test create and contains use 1 item Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          38             38           2         26.5          37.8       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                          27             30           2         36.9          27.1       1.0X
+Use EnumSet                                           2              3           0        457.0           2.2      12.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Test create and contains use 3 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          67             68           2         14.9          67.2       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                          60             64           3         16.6          60.1       1.0X
+Use EnumSet                                           2              2           0        460.9           2.2      27.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Test create and contains use 5 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                         135            137           3          7.4         134.6       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                         146            151           4          6.9         145.6       1.0X
+Use EnumSet                                           2              2           0        645.0           1.6      93.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Test create and contains use 10 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          187            190           3          5.3         187.2       1.0X
-Use EnumSet                                            0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                          193            200           5          5.2         192.6       1.0X
+Use EnumSet                                            2              2           0        602.8           1.7     116.1X
 
diff --git a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt
index e2c0e3d5dc22d..4d4eb0269ebf3 100644
--- a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt
@@ -1,105 +1,105 @@
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test contains use empty Set:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           0              1           1       2192.0           0.5       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                           1              1           0       1709.1           0.6       1.0X
+Use EnumSet                                           2              2           0        554.8           1.8       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test contains use 1 item Set:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          10             11           1        102.0           9.8       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                           8              8           0        124.2           8.1       1.0X
+Use EnumSet                                           2              2           0        423.8           2.4       3.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test contains use 3 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          19             21           1         53.2          18.8       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                          16             16           0         62.6          16.0       1.0X
+Use EnumSet                                           2              2           0        423.8           2.4       6.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test contains use 5 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          16             17           1         61.5          16.3       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                          15             15           0         66.3          15.1       1.0X
+Use EnumSet                                           2              4           2        423.8           2.4       6.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test contains use 10 items Set:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          18             20           1         56.6          17.7       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                          15             16           0         65.3          15.3       1.0X
+Use EnumSet                                           2              3           0        423.8           2.4       6.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create empty Set:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0        136.6           7.3       1.0X
-Use EnumSet                                           2              2           0         65.5          15.3       0.5X
+Use HashSet                                           1              1           0        132.0           7.6       1.0X
+Use EnumSet                                           2              2           0         62.4          16.0       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create 1 item Set:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          13             14           1          7.9         127.3       1.0X
-Use EnumSet                                           2              2           0         54.9          18.2       7.0X
+Use HashSet                                          16             17           1          6.4         156.6       1.0X
+Use EnumSet                                           2              2           0         59.7          16.7       9.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create 3 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          41             43           2          2.4         408.2       1.0X
-Use EnumSet                                           2              2           0         66.0          15.1      27.0X
+Use HashSet                                          51             51           1          2.0         510.7       1.0X
+Use EnumSet                                           2              2           0         62.9          15.9      32.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create 5 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                         101            106           3          1.0        1010.4       1.0X
-Use EnumSet                                           2              2           0         60.4          16.6      61.0X
+Use HashSet                                         110            118           7          0.9        1099.9       1.0X
+Use EnumSet                                           2              2           0         58.4          17.1      64.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create 10 items Set:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                         157            164           3          0.6        1571.3       1.0X
-Use EnumSet                                           1              2           0         78.2          12.8     122.9X
+Use HashSet                                         144            145           1          0.7        1442.6       1.0X
+Use EnumSet                                           1              2           0         71.0          14.1     102.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create and contains use empty Set:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              2           0        714.6           1.4       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                           1              1           0        816.8           1.2       1.0X
+Use EnumSet                                           2              2           0        484.0           2.1       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create and contains use 1 item Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          38             42           2         26.5          37.7       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                          33             33           0         30.7          32.6       1.0X
+Use EnumSet                                           2              3           0        405.3           2.5      13.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create and contains use 3 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          68             72           2         14.8          67.5       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                          76             76           1         13.2          75.6       1.0X
+Use EnumSet                                           2              3           0        400.6           2.5      30.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create and contains use 5 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                         151            160           4          6.6         150.8       1.0X
-Use EnumSet                                           0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                         170            170           1          5.9         169.6       1.0X
+Use EnumSet                                           3              9           1        308.1           3.2      52.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create and contains use 10 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          209            223          12          4.8         208.5       1.0X
-Use EnumSet                                            0              0           0     Infinity           0.0  InfinityX
+Use HashSet                                          156            157           1          6.4         155.8       1.0X
+Use EnumSet                                            9              9           0        110.2           9.1      17.2X
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/EnumTypeSetBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/EnumTypeSetBenchmark.scala
index a23ff6eaa2a55..a918bae4a8402 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/EnumTypeSetBenchmark.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/EnumTypeSetBenchmark.scala
@@ -106,7 +106,9 @@ object EnumTypeSetBenchmark extends BenchmarkBase {
     }
 
     benchmark.addCase("Use EnumSet") { _: Int =>
-      capabilities.foreach(enumSet.contains)
+      for (_ <- 0L until valuesPerIteration) {
+        capabilities.foreach(enumSet.contains)
+      }
     }
     benchmark.run()
   }
@@ -131,7 +133,9 @@ object EnumTypeSetBenchmark extends BenchmarkBase {
     }
 
     benchmark.addCase("Use EnumSet") { _: Int =>
-      capabilities.foreach(creatEnumSetFunctions.apply().contains)
+      for (_ <- 0L until valuesPerIteration) {
+        capabilities.foreach(creatEnumSetFunctions.apply().contains)
+      }
     }
     benchmark.run()
   }

From 2e703ae9d210d26573b849a9a88c55667b24127d Mon Sep 17 00:00:00 2001
From: Shardul Mahadik <smahadik@linkedin.com>
Date: Tue, 8 Feb 2022 13:55:34 +0800
Subject: [PATCH 172/513] [SPARK-38030][SQL] Canonicalization should not remove
 nullability of AttributeReference dataType

### What changes were proposed in this pull request?
Canonicalization of AttributeReference should not remove nullability information of its dataType.

### Why are the changes needed?
SPARK-38030 lists an issue where canonicalization of cast resulted in an unresolved expression, thus causing query failure. The issue was that the child AttributeReference's dataType was converted to nullable during canonicalization and hence the Cast's `checkInputDataTypes` fails. Although the exact repro listed in SPARK-38030 no longer works in master due to an unrelated change (details in the JIRA), some other codepaths which depend on canonicalized representations can trigger the same issue.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added unit test to ensure that canonicalization preserves nullability of AttributeReference and does not result in an unresolved cast

Closes #35332 from shardulm94/SPARK-38030.

Authored-by: Shardul Mahadik <smahadik@linkedin.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/expressions/namedExpressions.scala   |  2 +-
 .../catalyst/expressions/CanonicalizeSuite.scala  | 15 ++++++++++++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index a099fadcec365..06ca139870804 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -294,7 +294,7 @@ case class AttributeReference(
   }
 
   override lazy val preCanonicalized: Expression = {
-    AttributeReference("none", dataType.asNullable)(exprId)
+    AttributeReference("none", dataType)(exprId)
   }
 
   override def newInstance(): AttributeReference =
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
index 1805189b268db..83307c9022dd2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.plans.logical.Range
-import org.apache.spark.sql.types.{IntegerType, LongType, StructField, StructType}
+import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructField, StructType}
 
 class CanonicalizeSuite extends SparkFunSuite {
 
@@ -177,4 +177,17 @@ class CanonicalizeSuite extends SparkFunSuite {
     assert(expr.semanticEquals(attr))
     assert(attr.semanticEquals(expr))
   }
+
+  test("SPARK-38030: Canonicalization should not remove nullability of AttributeReference" +
+    " dataType") {
+    val structType = StructType(Seq(StructField("name", StringType, nullable = false)))
+    val attr = AttributeReference("col", structType)()
+    // AttributeReference dataType should not be converted to nullable
+    assert(attr.canonicalized.dataType === structType)
+
+    val cast = Cast(attr, structType)
+    assert(cast.resolved)
+    // canonicalization should not converted resolved cast to unresolved
+    assert(cast.canonicalized.resolved)
+  }
 }

From 08c851dff98514ee3b93250b371c2b5aa35d6bf6 Mon Sep 17 00:00:00 2001
From: SaurabhChawla <s.saurabhtim@gmail.com>
Date: Tue, 8 Feb 2022 11:57:56 +0300
Subject: [PATCH 173/513] [SPARK-37943][SQL] Use error classes in the
 compilation errors of grouping

### What changes were proposed in this pull request?
Migrate the following errors in QueryCompilationErrors onto use error classes:
groupingMustWithGroupingSetsOrCubeOrRollupError => UNSUPPORTED_GROUPING_EXPRESSION

### Why are the changes needed?
Porting grouping /grouping Id  errors to new error framework.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added the unit test in QueryCompilationErrorsSuite and tested the unit test

Closes #35389 from SaurabhChawla100/SPARK-37943.

Authored-by: SaurabhChawla <s.saurabhtim@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../main/resources/error/error-classes.json   |  3 ++
 .../sql/errors/QueryCompilationErrors.scala   |  4 ++-
 .../errors/QueryCompilationErrorsSuite.scala  | 35 +++++++++++++++++++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 686c4b63488b1..71909771f7228 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -153,6 +153,9 @@
     "message" : [ "The feature is not supported: %s" ],
     "sqlState" : "0A000"
   },
+  "UNSUPPORTED_GROUPING_EXPRESSION" : {
+    "message" : [ "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup" ]
+  },
   "WRITING_JOB_ABORTED" : {
     "message" : [ "Writing job aborted" ],
     "sqlState" : "40000"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 331636618a502..726c490350291 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -200,7 +200,9 @@ object QueryCompilationErrors {
   }
 
   def groupingMustWithGroupingSetsOrCubeOrRollupError(): Throwable = {
-    new AnalysisException("grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup")
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_GROUPING_EXPRESSION",
+      messageParameters = Array.empty)
   }
 
   def pandasUDFAggregateNotSupportedInPivotError(): Throwable = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 673925865c06f..b6de5fb887854 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.{AnalysisException, Dataset, QueryTest}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.expressions.{Alias, UpCast}
 import org.apache.spark.sql.catalyst.plans.logical.Project
+import org.apache.spark.sql.functions.{grouping, grouping_id}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.NumericType
 
@@ -76,4 +77,38 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession {
       "UpCast only support DecimalType as AbstractDataType yet," +
       """ but got: org.apache.spark.sql.types.NumericType\$\@\w+"""))
   }
+
+  test("UNSUPPORTED_GROUPING_EXPRESSION: filter with grouping/grouping_Id expression") {
+    val df = Seq(
+      (536361, "85123A", 2, 17850),
+      (536362, "85123B", 4, 17850),
+      (536363, "86123A", 6, 17851)
+    ).toDF("InvoiceNo", "StockCode", "Quantity", "CustomerID")
+    Seq("grouping", "grouping_id").foreach { grouping =>
+      val errMsg = intercept[AnalysisException] {
+        df.groupBy("CustomerId").agg(Map("Quantity" -> "max"))
+          .filter(s"$grouping(CustomerId)=17850")
+      }
+      assert(errMsg.message ===
+        "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup")
+      assert(errMsg.errorClass === Some("UNSUPPORTED_GROUPING_EXPRESSION"))
+    }
+  }
+
+  test("UNSUPPORTED_GROUPING_EXPRESSION: Sort with grouping/grouping_Id expression") {
+    val df = Seq(
+      (536361, "85123A", 2, 17850),
+      (536362, "85123B", 4, 17850),
+      (536363, "86123A", 6, 17851)
+    ).toDF("InvoiceNo", "StockCode", "Quantity", "CustomerID")
+    Seq(grouping("CustomerId"), grouping_id("CustomerId")).foreach { grouping =>
+      val errMsg = intercept[AnalysisException] {
+        df.groupBy("CustomerId").agg(Map("Quantity" -> "max")).
+          sort(grouping)
+      }
+      assert(errMsg.errorClass === Some("UNSUPPORTED_GROUPING_EXPRESSION"))
+      assert(errMsg.message ===
+        "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup")
+    }
+  }
 }

From 6b62c30fa686dfca812b19f42e0333a0ddde2791 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 8 Feb 2022 18:53:12 +0900
Subject: [PATCH 174/513] [SPARK-38136][INFRA][TESTS] Update GitHub Action test
 image and PyArrow dependency

### What changes were proposed in this pull request?

This PR aims to update `GitHub Action` test docker image to make the test environment up-to-date. For example, use `PyArrow 7.0.0` instead of `6.0.0`. In addition, `Python 3.8`'s `PyArrow` installation is also updated together to be consistent.

Please note that this aims to upgrade the test infra instead of Spark itself.

### Why are the changes needed?

|   SW  |  20211228  | 20220207 |
| ----- | ----------- | ----------- |
| OpenJDK  | 1.8.0_292 | 1.8.0_312 |
| numpy | 1.21.4 | 1.22.2 |
| pandas | 1.3.4 | 1.3.5 |
| pyarrow | 6.0.0 | 7.0.0 |
| scipy | 1.7.2 | 1.8.0 |

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

- Pass the GitHub Action with new image.
- Check the package list in the GitHub Action log. Or check the docker image directly.

Closes #35434 from dongjoon-hyun/SPARK-38136.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .github/workflows/build_and_test.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 4529cd9ba4c29..ae35f50d1d2a8 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -252,7 +252,7 @@ jobs:
     - name: Install Python packages (Python 3.8)
       if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
       run: |
-        python3.8 -m pip install 'numpy>=1.20.0' 'pyarrow<7.0.0' pandas scipy xmlrunner
+        python3.8 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy xmlrunner
         python3.8 -m pip list
     # Run the tests.
     - name: Run tests
@@ -287,7 +287,7 @@ jobs:
     name: "Build modules (${{ format('{0}, {1} job', needs.configure-jobs.outputs.branch, needs.configure-jobs.outputs.type) }}): ${{ matrix.modules }}"
     runs-on: ubuntu-20.04
     container:
-      image: dongjoon/apache-spark-github-action-image:20211228
+      image: dongjoon/apache-spark-github-action-image:20220207
     strategy:
       fail-fast: false
       matrix:
@@ -391,7 +391,7 @@ jobs:
     name: "Build modules: sparkr"
     runs-on: ubuntu-20.04
     container:
-      image: dongjoon/apache-spark-github-action-image:20211228
+      image: dongjoon/apache-spark-github-action-image:20220207
     env:
       HADOOP_PROFILE: ${{ needs.configure-jobs.outputs.hadoop }}
       HIVE_PROFILE: hive2.3
@@ -462,7 +462,7 @@ jobs:
       PYSPARK_DRIVER_PYTHON: python3.9
       PYSPARK_PYTHON: python3.9
     container:
-      image: dongjoon/apache-spark-github-action-image:20211228
+      image: dongjoon/apache-spark-github-action-image:20220207
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
@@ -530,7 +530,7 @@ jobs:
         # Jinja2 3.0.0+ causes error when building with Sphinx.
         #   See also https://issues.apache.org/jira/browse/SPARK-35375.
         python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme ipython nbsphinx numpydoc 'jinja2<3.0.0'
-        python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' 'pyarrow<7.0.0' pandas 'plotly>=4.8'
+        python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8'
         apt-get update -y
         apt-get install -y ruby ruby-dev
         Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"

From 3d736d978cdaf345d81833a6216d41464fa86c69 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Tue, 8 Feb 2022 12:46:30 +0100
Subject: [PATCH 175/513] [SPARK-37412][PYTHON][ML] Inline typehints for
 pyspark.ml.stat

### What changes were proposed in this pull request?

This PR migrates type `pyspark.ml.stat` annotations from stub file to inline type hints.

(second take, after issue resulting in reversion of #35401)

### Why are the changes needed?

Part of ongoing migration of type hints.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #35437 from zero323/SPARK-37412.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/ml/stat.py  | 62 +++++++++++++++++++++-----------
 python/pyspark/ml/stat.pyi | 73 --------------------------------------
 2 files changed, 41 insertions(+), 94 deletions(-)
 delete mode 100644 python/pyspark/ml/stat.pyi

diff --git a/python/pyspark/ml/stat.py b/python/pyspark/ml/stat.py
index 15bb6ca93f179..3b3588c2af0d2 100644
--- a/python/pyspark/ml/stat.py
+++ b/python/pyspark/ml/stat.py
@@ -17,12 +17,20 @@
 
 import sys
 
+from typing import Optional, Tuple, TYPE_CHECKING
+
+
 from pyspark import since, SparkContext
 from pyspark.ml.common import _java2py, _py2java
-from pyspark.ml.wrapper import JavaWrapper, _jvm
+from pyspark.ml.linalg import Matrix, Vector
+from pyspark.ml.wrapper import JavaWrapper, _jvm  # type: ignore[attr-defined]
 from pyspark.sql.column import Column, _to_seq
+from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.functions import lit
 
+if TYPE_CHECKING:
+    from py4j.java_gateway import JavaObject  # type: ignore[import]
+
 
 class ChiSquareTest:
     """
@@ -37,7 +45,9 @@ class ChiSquareTest:
     """
 
     @staticmethod
-    def test(dataset, featuresCol, labelCol, flatten=False):
+    def test(
+        dataset: DataFrame, featuresCol: str, labelCol: str, flatten: bool = False
+    ) -> DataFrame:
         """
         Perform a Pearson's independence test using dataset.
 
@@ -95,6 +105,8 @@ def test(dataset, featuresCol, labelCol, flatten=False):
         4.0
         """
         sc = SparkContext._active_spark_context
+        assert sc is not None
+
         javaTestObj = _jvm().org.apache.spark.ml.stat.ChiSquareTest
         args = [_py2java(sc, arg) for arg in (dataset, featuresCol, labelCol, flatten)]
         return _java2py(sc, javaTestObj.test(*args))
@@ -116,7 +128,7 @@ class Correlation:
     """
 
     @staticmethod
-    def corr(dataset, column, method="pearson"):
+    def corr(dataset: DataFrame, column: str, method: str = "pearson") -> DataFrame:
         """
         Compute the correlation matrix with specified method using dataset.
 
@@ -162,6 +174,8 @@ def corr(dataset, column, method="pearson"):
                      [ 0.4       ,  0.9486... ,         NaN,  1.        ]])
         """
         sc = SparkContext._active_spark_context
+        assert sc is not None
+
         javaCorrObj = _jvm().org.apache.spark.ml.stat.Correlation
         args = [_py2java(sc, arg) for arg in (dataset, column, method)]
         return _java2py(sc, javaCorrObj.corr(*args))
@@ -181,7 +195,7 @@ class KolmogorovSmirnovTest:
     """
 
     @staticmethod
-    def test(dataset, sampleCol, distName, *params):
+    def test(dataset: DataFrame, sampleCol: str, distName: str, *params: float) -> DataFrame:
         """
         Conduct a one-sample, two-sided Kolmogorov-Smirnov test for probability distribution
         equality. Currently supports the normal distribution, taking as parameters the mean and
@@ -228,9 +242,11 @@ def test(dataset, sampleCol, distName, *params):
         0.175
         """
         sc = SparkContext._active_spark_context
+        assert sc is not None
+
         javaTestObj = _jvm().org.apache.spark.ml.stat.KolmogorovSmirnovTest
         dataset = _py2java(sc, dataset)
-        params = [float(param) for param in params]
+        params = [float(param) for param in params]  # type: ignore[assignment]
         return _java2py(
             sc, javaTestObj.test(dataset, sampleCol, distName, _jvm().PythonUtils.toSeq(params))
         )
@@ -284,7 +300,7 @@ class Summarizer:
 
     @staticmethod
     @since("2.4.0")
-    def mean(col, weightCol=None):
+    def mean(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of mean summary
         """
@@ -292,7 +308,7 @@ def mean(col, weightCol=None):
 
     @staticmethod
     @since("3.0.0")
-    def sum(col, weightCol=None):
+    def sum(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of sum summary
         """
@@ -300,7 +316,7 @@ def sum(col, weightCol=None):
 
     @staticmethod
     @since("2.4.0")
-    def variance(col, weightCol=None):
+    def variance(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of variance summary
         """
@@ -308,7 +324,7 @@ def variance(col, weightCol=None):
 
     @staticmethod
     @since("3.0.0")
-    def std(col, weightCol=None):
+    def std(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of std summary
         """
@@ -316,7 +332,7 @@ def std(col, weightCol=None):
 
     @staticmethod
     @since("2.4.0")
-    def count(col, weightCol=None):
+    def count(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of count summary
         """
@@ -324,7 +340,7 @@ def count(col, weightCol=None):
 
     @staticmethod
     @since("2.4.0")
-    def numNonZeros(col, weightCol=None):
+    def numNonZeros(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of numNonZero summary
         """
@@ -332,7 +348,7 @@ def numNonZeros(col, weightCol=None):
 
     @staticmethod
     @since("2.4.0")
-    def max(col, weightCol=None):
+    def max(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of max summary
         """
@@ -340,7 +356,7 @@ def max(col, weightCol=None):
 
     @staticmethod
     @since("2.4.0")
-    def min(col, weightCol=None):
+    def min(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of min summary
         """
@@ -348,7 +364,7 @@ def min(col, weightCol=None):
 
     @staticmethod
     @since("2.4.0")
-    def normL1(col, weightCol=None):
+    def normL1(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of normL1 summary
         """
@@ -356,14 +372,14 @@ def normL1(col, weightCol=None):
 
     @staticmethod
     @since("2.4.0")
-    def normL2(col, weightCol=None):
+    def normL2(col: Column, weightCol: Optional[Column] = None) -> Column:
         """
         return a column of normL2 summary
         """
         return Summarizer._get_single_metric(col, weightCol, "normL2")
 
     @staticmethod
-    def _check_param(featuresCol, weightCol):
+    def _check_param(featuresCol: Column, weightCol: Optional[Column]) -> Tuple[Column, Column]:
         if weightCol is None:
             weightCol = lit(1.0)
         if not isinstance(featuresCol, Column) or not isinstance(weightCol, Column):
@@ -371,7 +387,7 @@ def _check_param(featuresCol, weightCol):
         return featuresCol, weightCol
 
     @staticmethod
-    def _get_single_metric(col, weightCol, metric):
+    def _get_single_metric(col: Column, weightCol: Optional[Column], metric: str) -> Column:
         col, weightCol = Summarizer._check_param(col, weightCol)
         return Column(
             JavaWrapper._new_java_obj(
@@ -380,7 +396,7 @@ def _get_single_metric(col, weightCol, metric):
         )
 
     @staticmethod
-    def metrics(*metrics):
+    def metrics(*metrics: str) -> "SummaryBuilder":
         """
         Given a list of metrics, provides a builder that it turns computes metrics from a column.
 
@@ -415,6 +431,8 @@ def metrics(*metrics):
         :py:class:`pyspark.ml.stat.SummaryBuilder`
         """
         sc = SparkContext._active_spark_context
+        assert sc is not None
+
         js = JavaWrapper._new_java_obj(
             "org.apache.spark.ml.stat.Summarizer.metrics", _to_seq(sc, metrics)
         )
@@ -432,10 +450,10 @@ class SummaryBuilder(JavaWrapper):
 
     """
 
-    def __init__(self, jSummaryBuilder):
+    def __init__(self, jSummaryBuilder: "JavaObject"):
         super(SummaryBuilder, self).__init__(jSummaryBuilder)
 
-    def summary(self, featuresCol, weightCol=None):
+    def summary(self, featuresCol: Column, weightCol: Optional[Column] = None) -> Column:
         """
         Returns an aggregate object that contains the summary of the column with the requested
         metrics.
@@ -456,6 +474,8 @@ def summary(self, featuresCol, weightCol=None):
             structure is determined during the creation of the builder.
         """
         featuresCol, weightCol = Summarizer._check_param(featuresCol, weightCol)
+        assert self._java_obj is not None
+
         return Column(self._java_obj.summary(featuresCol._jc, weightCol._jc))
 
 
@@ -474,7 +494,7 @@ class MultivariateGaussian:
            [ 3.,  2.]]))
     """
 
-    def __init__(self, mean, cov):
+    def __init__(self, mean: Vector, cov: Matrix):
         self.mean = mean
         self.cov = cov
 
diff --git a/python/pyspark/ml/stat.pyi b/python/pyspark/ml/stat.pyi
deleted file mode 100644
index 90b0686b1c746..0000000000000
--- a/python/pyspark/ml/stat.pyi
+++ /dev/null
@@ -1,73 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Optional
-
-from pyspark.ml.linalg import Matrix, Vector
-from pyspark.ml.wrapper import JavaWrapper
-from pyspark.sql.column import Column
-from pyspark.sql.dataframe import DataFrame
-
-from py4j.java_gateway import JavaObject  # type: ignore[import]
-
-class ChiSquareTest:
-    @staticmethod
-    def test(
-        dataset: DataFrame, featuresCol: str, labelCol: str, flatten: bool = ...
-    ) -> DataFrame: ...
-
-class Correlation:
-    @staticmethod
-    def corr(dataset: DataFrame, column: str, method: str = ...) -> DataFrame: ...
-
-class KolmogorovSmirnovTest:
-    @staticmethod
-    def test(dataset: DataFrame, sampleCol: str, distName: str, *params: float) -> DataFrame: ...
-
-class Summarizer:
-    @staticmethod
-    def mean(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def sum(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def variance(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def std(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def count(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def numNonZeros(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def max(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def min(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def normL1(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def normL2(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
-    @staticmethod
-    def metrics(*metrics: str) -> SummaryBuilder: ...
-
-class SummaryBuilder(JavaWrapper):
-    def __init__(self, jSummaryBuilder: JavaObject) -> None: ...
-    def summary(self, featuresCol: Column, weightCol: Optional[Column] = ...) -> Column: ...
-
-class MultivariateGaussian:
-    mean: Vector
-    cov: Matrix
-    def __init__(self, mean: Vector, cov: Matrix) -> None: ...

From 6115f5806db2de7fa0defe679724141354ebfe2b Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Wed, 9 Feb 2022 01:21:35 +0900
Subject: [PATCH 176/513] [MINOR][SQL] Remove redundant array creation in
 UnsafeRow

### What changes were proposed in this pull request?
`j.u.Arrays.asList` is a varargs api, this pr remove the redundant array creation to simplify the code.

### Why are the changes needed?
Code simplification.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35435 from LuciferYang/remove-redundant-array-creation.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Kousuke Saruta <sarutak@oss.nttdata.com>
---
 .../org/apache/spark/sql/catalyst/expressions/UnsafeRow.java  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index 7b11ab20966e1..476201c9a8d8e 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -80,7 +80,7 @@ public static int calculateBitSetWidthInBytes(int numFields) {
   static {
     mutableFieldTypes = Collections.unmodifiableSet(
       new HashSet<>(
-        Arrays.asList(new DataType[] {
+        Arrays.asList(
           NullType,
           BooleanType,
           ByteType,
@@ -92,7 +92,7 @@ public static int calculateBitSetWidthInBytes(int numFields) {
           DateType,
           TimestampType,
           TimestampNTZType
-        })));
+        )));
   }
 
   public static boolean isFixedLength(DataType dt) {

From c69f08f81042c3ecca4b5dfa5511c1217ae88096 Mon Sep 17 00:00:00 2001
From: Venkata krishnan Sowrirajan <vsowrirajan@linkedin.com>
Date: Tue, 8 Feb 2022 11:24:15 -0600
Subject: [PATCH 177/513] [SPARK-34826][SHUFFLE] Adaptively fetch shuffle
 mergers for push based shuffle

### What changes were proposed in this pull request?

Currently shuffle mergers are fetched before the start of the ShuffleMapStage. But for initial stages this can be problematic as shuffle mergers are nothing but unique hosts with shuffle services running which could be very few based on executors and this can cause merge ratio to be low.

With this approach, `ShuffleMapTask` query for merger locations if not available and if available and start using this for pushing the blocks. Since partitions are mapped uniquely to a merger location, it should be fine to not push for the earlier set of tasks. This should improve the merge ratio for even initial stages.

### Why are the changes needed?

Performance improvement. No new APIs change.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Added unit tests and also has been working in our internal production environment for a while now.

Closes #34122 from venkata91/SPARK-34826.

Authored-by: Venkata krishnan Sowrirajan <vsowrirajan@linkedin.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../scala/org/apache/spark/Dependency.scala   |  39 +--
 .../org/apache/spark/MapOutputTracker.scala   |  88 ++++++-
 .../apache/spark/scheduler/DAGScheduler.scala |  86 +++++--
 .../apache/spark/scheduler/StageInfo.scala    |  16 +-
 .../spark/shuffle/ShuffleWriteProcessor.scala |  14 +-
 .../shuffle/sort/SortShuffleManager.scala     |   2 +-
 .../apache/spark/MapOutputTrackerSuite.scala  |  30 ++-
 .../spark/scheduler/DAGSchedulerSuite.scala   | 224 +++++++++++++++++-
 8 files changed, 440 insertions(+), 59 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/Dependency.scala b/core/src/main/scala/org/apache/spark/Dependency.scala
index 8e348eefef6c2..fbb92b4b4e293 100644
--- a/core/src/main/scala/org/apache/spark/Dependency.scala
+++ b/core/src/main/scala/org/apache/spark/Dependency.scala
@@ -104,15 +104,17 @@ class ShuffleDependency[K: ClassTag, V: ClassTag, C: ClassTag](
 
   private[this] val numPartitions = rdd.partitions.length
 
-  // By default, shuffle merge is enabled for ShuffleDependency if push based shuffle
+  // By default, shuffle merge is allowed for ShuffleDependency if push based shuffle
   // is enabled
-  private[this] var _shuffleMergeEnabled = canShuffleMergeBeEnabled()
+  private[this] var _shuffleMergeAllowed = canShuffleMergeBeEnabled()
 
-  private[spark] def setShuffleMergeEnabled(shuffleMergeEnabled: Boolean): Unit = {
-    _shuffleMergeEnabled = shuffleMergeEnabled
+  private[spark] def setShuffleMergeAllowed(shuffleMergeAllowed: Boolean): Unit = {
+    _shuffleMergeAllowed = shuffleMergeAllowed
   }
 
-  def shuffleMergeEnabled : Boolean = _shuffleMergeEnabled
+  def shuffleMergeEnabled : Boolean = shuffleMergeAllowed && mergerLocs.nonEmpty
+
+  def shuffleMergeAllowed : Boolean = _shuffleMergeAllowed
 
   /**
    * Stores the location of the list of chosen external shuffle services for handling the
@@ -124,7 +126,7 @@ class ShuffleDependency[K: ClassTag, V: ClassTag, C: ClassTag](
    * Stores the information about whether the shuffle merge is finalized for the shuffle map stage
    * associated with this shuffle dependency
    */
-  private[this] var _shuffleMergedFinalized: Boolean = false
+  private[this] var _shuffleMergeFinalized: Boolean = false
 
   /**
    * shuffleMergeId is used to uniquely identify merging process of shuffle
@@ -135,31 +137,34 @@ class ShuffleDependency[K: ClassTag, V: ClassTag, C: ClassTag](
   def shuffleMergeId: Int = _shuffleMergeId
 
   def setMergerLocs(mergerLocs: Seq[BlockManagerId]): Unit = {
+    assert(shuffleMergeAllowed)
     this.mergerLocs = mergerLocs
   }
 
   def getMergerLocs: Seq[BlockManagerId] = mergerLocs
 
   private[spark] def markShuffleMergeFinalized(): Unit = {
-    _shuffleMergedFinalized = true
+    _shuffleMergeFinalized = true
+  }
+
+  private[spark] def isShuffleMergeFinalizedMarked: Boolean = {
+    _shuffleMergeFinalized
   }
 
   /**
-   * Returns true if push-based shuffle is disabled for this stage or empty RDD,
-   * or if the shuffle merge for this stage is finalized, i.e. the shuffle merge
-   * results for all partitions are available.
+   * Returns true if push-based shuffle is disabled or if the shuffle merge for
+   * this shuffle is finalized.
    */
   def shuffleMergeFinalized: Boolean = {
-    // Empty RDD won't be computed therefore shuffle merge finalized should be true by default.
-    if (shuffleMergeEnabled && numPartitions > 0) {
-      _shuffleMergedFinalized
+    if (shuffleMergeEnabled) {
+      isShuffleMergeFinalizedMarked
     } else {
       true
     }
   }
 
   def newShuffleMergeState(): Unit = {
-    _shuffleMergedFinalized = false
+    _shuffleMergeFinalized = false
     mergerLocs = Nil
     _shuffleMergeId += 1
     finalizeTask = None
@@ -187,7 +192,7 @@ class ShuffleDependency[K: ClassTag, V: ClassTag, C: ClassTag](
    * @param mapIndex Map task index
    * @return number of map tasks with block push completed
    */
-  def incPushCompleted(mapIndex: Int): Int = {
+  private[spark] def incPushCompleted(mapIndex: Int): Int = {
     shufflePushCompleted.add(mapIndex)
     shufflePushCompleted.getCardinality
   }
@@ -195,9 +200,9 @@ class ShuffleDependency[K: ClassTag, V: ClassTag, C: ClassTag](
   // Only used by DAGScheduler to coordinate shuffle merge finalization
   @transient private[this] var finalizeTask: Option[ScheduledFuture[_]] = None
 
-  def getFinalizeTask: Option[ScheduledFuture[_]] = finalizeTask
+  private[spark] def getFinalizeTask: Option[ScheduledFuture[_]] = finalizeTask
 
-  def setFinalizeTask(task: ScheduledFuture[_]): Unit = {
+  private[spark] def setFinalizeTask(task: ScheduledFuture[_]): Unit = {
     finalizeTask = Option(task)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index d71fb09682924..9835695794d83 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -144,6 +144,8 @@ private class ShuffleStatus(
    */
   private[this] var _numAvailableMergeResults: Int = 0
 
+  private[this] var shufflePushMergerLocations: Seq[BlockManagerId] = Seq.empty
+
   /**
    * Register a map output. If there is already a registered location for the map output then it
    * will be replaced by the new location.
@@ -213,6 +215,16 @@ private class ShuffleStatus(
     mergeStatuses(reduceId) = status
   }
 
+  def registerShuffleMergerLocations(shuffleMergers: Seq[BlockManagerId]): Unit = withWriteLock {
+    if (shufflePushMergerLocations.isEmpty) {
+      shufflePushMergerLocations = shuffleMergers
+    }
+  }
+
+  def removeShuffleMergerLocations(): Unit = withWriteLock {
+    shufflePushMergerLocations = Nil
+  }
+
   // TODO support updateMergeResult for similar use cases as updateMapOutput
 
   /**
@@ -392,6 +404,10 @@ private class ShuffleStatus(
     f(mergeStatuses)
   }
 
+  def getShufflePushMergerLocations: Seq[BlockManagerId] = withReadLock {
+    shufflePushMergerLocations
+  }
+
   /**
    * Clears the cached serialized map output statuses.
    */
@@ -429,6 +445,8 @@ private[spark] case class GetMapOutputStatuses(shuffleId: Int)
   extends MapOutputTrackerMessage
 private[spark] case class GetMapAndMergeResultStatuses(shuffleId: Int)
   extends MapOutputTrackerMessage
+private[spark] case class GetShufflePushMergerLocations(shuffleId: Int)
+  extends MapOutputTrackerMessage
 private[spark] case object StopMapOutputTracker extends MapOutputTrackerMessage
 
 private[spark] sealed trait MapOutputTrackerMasterMessage
@@ -436,6 +454,8 @@ private[spark] case class GetMapOutputMessage(shuffleId: Int,
   context: RpcCallContext) extends MapOutputTrackerMasterMessage
 private[spark] case class GetMapAndMergeOutputMessage(shuffleId: Int,
   context: RpcCallContext) extends MapOutputTrackerMasterMessage
+private[spark] case class GetShufflePushMergersMessage(shuffleId: Int,
+  context: RpcCallContext) extends MapOutputTrackerMasterMessage
 private[spark] case class MapSizesByExecutorId(
   iter: Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])], enableBatchFetch: Boolean)
 
@@ -457,6 +477,11 @@ private[spark] class MapOutputTrackerMasterEndpoint(
       logInfo(s"Asked to send map/merge result locations for shuffle $shuffleId to $hostPort")
       tracker.post(GetMapAndMergeOutputMessage(shuffleId, context))
 
+    case GetShufflePushMergerLocations(shuffleId: Int) =>
+      logInfo(s"Asked to send shuffle push merger locations for shuffle" +
+        s" $shuffleId to ${context.senderAddress.hostPort}")
+      tracker.post(GetShufflePushMergersMessage(shuffleId, context))
+
     case StopMapOutputTracker =>
       logInfo("MapOutputTrackerMasterEndpoint stopped!")
       context.reply(true)
@@ -596,6 +621,16 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
       partitionId: Int,
       chunkBitmap: RoaringBitmap): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])]
 
+  /**
+   * Called from executors whenever a task with push based shuffle is enabled doesn't have shuffle
+   * mergers available. This typically happens when the initial stages doesn't have enough shuffle
+   * mergers available since very few executors got registered. This is on a best effort basis,
+   * if there is not enough shuffle mergers available for this stage then an empty sequence would
+   * be returned indicating the task to avoid shuffle push.
+   * @param shuffleId
+   */
+  def getShufflePushMergerLocations(shuffleId: Int): Seq[BlockManagerId]
+
   /**
    * Deletes map output status information for the specified shuffle stage.
    */
@@ -711,6 +746,11 @@ private[spark] class MapOutputTrackerMaster(
                 handleStatusMessage(shuffleId, context, false)
               case GetMapAndMergeOutputMessage(shuffleId, context) =>
                 handleStatusMessage(shuffleId, context, true)
+              case GetShufflePushMergersMessage(shuffleId, context) =>
+                logDebug(s"Handling request to send shuffle push merger locations for shuffle" +
+                  s" $shuffleId to ${context.senderAddress.hostPort}")
+                context.reply(shuffleStatuses.get(shuffleId).map(_.getShufflePushMergerLocations)
+                  .getOrElse(Seq.empty[BlockManagerId]))
             }
           } catch {
             case NonFatal(e) => logError(e.getMessage, e)
@@ -772,6 +812,7 @@ private[spark] class MapOutputTrackerMaster(
       case Some(shuffleStatus) =>
         shuffleStatus.removeOutputsByFilter(x => true)
         shuffleStatus.removeMergeResultsByFilter(x => true)
+        shuffleStatus.removeShuffleMergerLocations()
         incrementEpoch()
       case None =>
         throw new SparkException(
@@ -789,6 +830,12 @@ private[spark] class MapOutputTrackerMaster(
     }
   }
 
+  def registerShufflePushMergerLocations(
+      shuffleId: Int,
+      shuffleMergers: Seq[BlockManagerId]): Unit = {
+    shuffleStatuses(shuffleId).registerShuffleMergerLocations(shuffleMergers)
+  }
+
   /**
    * Unregisters a merge result corresponding to the reduceId if present. If the optional mapIndex
    * is specified, it will only unregister the merge result if the mapIndex is part of that merge
@@ -1142,6 +1189,11 @@ private[spark] class MapOutputTrackerMaster(
     Seq.empty.toIterator
   }
 
+  // This method is only called in local-mode.
+  override def getShufflePushMergerLocations(shuffleId: Int): Seq[BlockManagerId] = {
+    shuffleStatuses(shuffleId).getShufflePushMergerLocations
+  }
+
   override def stop(): Unit = {
     mapOutputTrackerMasterMessages.offer(PoisonPill)
     threadpool.shutdown()
@@ -1176,6 +1228,14 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
   // instantiate a serializer. See the followup to SPARK-36705 for more details.
   private lazy val fetchMergeResult = Utils.isPushBasedShuffleEnabled(conf, isDriver = false)
 
+  /**
+   * [[shufflePushMergerLocations]] tracks shuffle push merger locations for the latest
+   * shuffle execution
+   *
+   * Exposed for testing
+   */
+  val shufflePushMergerLocations = new ConcurrentHashMap[Int, Seq[BlockManagerId]]().asScala
+
   /**
    * A [[KeyLock]] whose key is a shuffle id to ensure there is only one thread fetching
    * the same shuffle block.
@@ -1213,10 +1273,10 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
       useMergeResult: Boolean): MapSizesByExecutorId = {
     logDebug(s"Fetching outputs for shuffle $shuffleId")
     val (mapOutputStatuses, mergedOutputStatuses) = getStatuses(shuffleId, conf,
-      // EnableBatchFetch can be set to false during stage retry when the
-      // shuffleDependency.shuffleMergeEnabled is set to false, and Driver
+      // enableBatchFetch can be set to false during stage retry when the
+      // shuffleDependency.isShuffleMergeFinalizedMarked is set to false, and Driver
       // has already collected the mergedStatus for its shuffle dependency.
-      // In this case, boolean check helps to insure that the unnecessary
+      // In this case, boolean check helps to ensure that the unnecessary
       // mergeStatus won't be fetched, thus mergedOutputStatuses won't be
       // passed to convertMapStatuses. See details in [SPARK-37023].
       if (useMergeResult) fetchMergeResult else false)
@@ -1281,6 +1341,26 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
     }
   }
 
+  override def getShufflePushMergerLocations(shuffleId: Int): Seq[BlockManagerId] = {
+    shufflePushMergerLocations.getOrElse(shuffleId, getMergerLocations(shuffleId))
+  }
+
+  private def getMergerLocations(shuffleId: Int): Seq[BlockManagerId] = {
+    fetchingLock.withLock(shuffleId) {
+      var fetchedMergers = shufflePushMergerLocations.get(shuffleId).orNull
+      if (null == fetchedMergers) {
+        fetchedMergers =
+          askTracker[Seq[BlockManagerId]](GetShufflePushMergerLocations(shuffleId))
+        if (fetchedMergers.nonEmpty) {
+          shufflePushMergerLocations(shuffleId) = fetchedMergers
+        } else {
+          fetchedMergers = Seq.empty[BlockManagerId]
+        }
+      }
+      fetchedMergers
+    }
+  }
+
   /**
    * Get or fetch the array of MapStatuses and MergeStatuses if push based shuffle enabled
    * for a given shuffle ID. NOTE: clients MUST synchronize
@@ -1364,6 +1444,7 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
   def unregisterShuffle(shuffleId: Int): Unit = {
     mapStatuses.remove(shuffleId)
     mergeStatuses.remove(shuffleId)
+    shufflePushMergerLocations.remove(shuffleId)
   }
 
   /**
@@ -1378,6 +1459,7 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
         epoch = newEpoch
         mapStatuses.clear()
         mergeStatuses.clear()
+        shufflePushMergerLocations.clear()
       }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index eed71038b3e33..ffaabba71e8cc 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -1369,24 +1369,37 @@ private[spark] class DAGScheduler(
    * locations for block push/merge by getting the historical locations of past executors.
    */
   private def prepareShuffleServicesForShuffleMapStage(stage: ShuffleMapStage): Unit = {
-    assert(stage.shuffleDep.shuffleMergeEnabled && !stage.shuffleDep.shuffleMergeFinalized)
+    assert(stage.shuffleDep.shuffleMergeAllowed && !stage.shuffleDep.isShuffleMergeFinalizedMarked)
     if (stage.shuffleDep.getMergerLocs.isEmpty) {
-      val mergerLocs = sc.schedulerBackend.getShufflePushMergerLocations(
-        stage.shuffleDep.partitioner.numPartitions, stage.resourceProfileId)
-      if (mergerLocs.nonEmpty) {
-        stage.shuffleDep.setMergerLocs(mergerLocs)
-        logInfo(s"Push-based shuffle enabled for $stage (${stage.name}) with" +
-          s" ${stage.shuffleDep.getMergerLocs.size} merger locations")
-
-        logDebug("List of shuffle push merger locations " +
-          s"${stage.shuffleDep.getMergerLocs.map(_.host).mkString(", ")}")
-      } else {
-        stage.shuffleDep.setShuffleMergeEnabled(false)
-        logInfo(s"Push-based shuffle disabled for $stage (${stage.name})")
-      }
+      getAndSetShufflePushMergerLocations(stage)
+    }
+
+    val shuffleId = stage.shuffleDep.shuffleId
+    val shuffleMergeId = stage.shuffleDep.shuffleMergeId
+    if (stage.shuffleDep.shuffleMergeEnabled) {
+      logInfo(s"Shuffle merge enabled before starting the stage for $stage with shuffle" +
+        s" $shuffleId and shuffle merge $shuffleMergeId with" +
+        s" ${stage.shuffleDep.getMergerLocs.size} merger locations")
+    } else {
+      logInfo(s"Shuffle merge disabled for $stage with shuffle $shuffleId" +
+        s" and shuffle merge $shuffleMergeId, but can get enabled later adaptively" +
+        s" once enough mergers are available")
     }
   }
 
+  private def getAndSetShufflePushMergerLocations(stage: ShuffleMapStage): Seq[BlockManagerId] = {
+    val mergerLocs = sc.schedulerBackend.getShufflePushMergerLocations(
+      stage.shuffleDep.partitioner.numPartitions, stage.resourceProfileId)
+    if (mergerLocs.nonEmpty) {
+      stage.shuffleDep.setMergerLocs(mergerLocs)
+    }
+
+    logDebug(s"Shuffle merge locations for shuffle ${stage.shuffleDep.shuffleId} with" +
+      s" shuffle merge ${stage.shuffleDep.shuffleMergeId} is" +
+      s" ${stage.shuffleDep.getMergerLocs.map(_.host).mkString(", ")}")
+    mergerLocs
+  }
+
   /** Called when stage's parents are available and we can now do its task. */
   private def submitMissingTasks(stage: Stage, jobId: Int): Unit = {
     logDebug("submitMissingTasks(" + stage + ")")
@@ -1418,15 +1431,15 @@ private[spark] class DAGScheduler(
       case s: ShuffleMapStage =>
         outputCommitCoordinator.stageStart(stage = s.id, maxPartitionId = s.numPartitions - 1)
         // Only generate merger location for a given shuffle dependency once.
-        if (s.shuffleDep.shuffleMergeEnabled) {
-          if (!s.shuffleDep.shuffleMergeFinalized) {
+        if (s.shuffleDep.shuffleMergeAllowed) {
+          if (!s.shuffleDep.isShuffleMergeFinalizedMarked) {
             prepareShuffleServicesForShuffleMapStage(s)
           } else {
             // Disable Shuffle merge for the retry/reuse of the same shuffle dependency if it has
             // already been merge finalized. If the shuffle dependency was previously assigned
             // merger locations but the corresponding shuffle map stage did not complete
             // successfully, we would still enable push for its retry.
-            s.shuffleDep.setShuffleMergeEnabled(false)
+            s.shuffleDep.setShuffleMergeAllowed(false)
             logInfo(s"Push-based shuffle disabled for $stage (${stage.name}) since it" +
               " is already shuffle merge finalized")
           }
@@ -1821,7 +1834,7 @@ private[spark] class DAGScheduler(
             }
 
             if (runningStages.contains(shuffleStage) && shuffleStage.pendingPartitions.isEmpty) {
-              if (!shuffleStage.shuffleDep.shuffleMergeFinalized &&
+              if (!shuffleStage.shuffleDep.isShuffleMergeFinalizedMarked &&
                 shuffleStage.shuffleDep.getMergerLocs.nonEmpty) {
                 checkAndScheduleShuffleMergeFinalize(shuffleStage)
               } else {
@@ -2313,7 +2326,7 @@ private[spark] class DAGScheduler(
     // Register merge statuses if the stage is still running and shuffle merge is not finalized yet.
     // TODO: SPARK-35549: Currently merge statuses results which come after shuffle merge
     // TODO: is finalized is not registered.
-    if (runningStages.contains(stage) && !stage.shuffleDep.shuffleMergeFinalized) {
+    if (runningStages.contains(stage) && !stage.shuffleDep.isShuffleMergeFinalizedMarked) {
       mapOutputTracker.registerMergeResults(stage.shuffleDep.shuffleId, mergeStatuses)
     }
   }
@@ -2350,7 +2363,7 @@ private[spark] class DAGScheduler(
         // This is required to prevent shuffle merge finalization by dangling tasks of a
         // previous attempt in the case of indeterminate stage.
         if (shuffleDep.shuffleMergeId == shuffleMergeId) {
-          if (!shuffleDep.shuffleMergeFinalized &&
+          if (!shuffleDep.isShuffleMergeFinalizedMarked &&
             shuffleDep.incPushCompleted(mapIndex).toDouble / shuffleDep.rdd.partitions.length
               >= shufflePushMinRatio) {
             scheduleShuffleMergeFinalize(mapStage, delay = 0)
@@ -2487,6 +2500,23 @@ private[spark] class DAGScheduler(
       executorFailureEpoch -= execId
     }
     shuffleFileLostEpoch -= execId
+
+    if (pushBasedShuffleEnabled) {
+      // Only set merger locations for stages that are not yet finished and have empty mergers
+      shuffleIdToMapStage.filter { case (_, stage) =>
+        stage.shuffleDep.shuffleMergeAllowed && stage.shuffleDep.getMergerLocs.isEmpty &&
+          runningStages.contains(stage)
+      }.foreach { case(_, stage: ShuffleMapStage) =>
+          if (getAndSetShufflePushMergerLocations(stage).nonEmpty) {
+            logInfo(s"Shuffle merge enabled adaptively for $stage with shuffle" +
+              s" ${stage.shuffleDep.shuffleId} and shuffle merge" +
+              s" ${stage.shuffleDep.shuffleMergeId} with ${stage.shuffleDep.getMergerLocs.size}" +
+              s" merger locations")
+            mapOutputTracker.registerShufflePushMergerLocations(stage.shuffleDep.shuffleId,
+              stage.shuffleDep.getMergerLocs)
+          }
+        }
+    }
   }
 
   private[scheduler] def handleStageCancellation(stageId: Int, reason: Option[String]): Unit = {
@@ -2540,7 +2570,7 @@ private[spark] class DAGScheduler(
       stage.latestInfo.stageFailed(errorMessage.get)
       logInfo(s"$stage (${stage.name}) failed in $serviceTime s due to ${errorMessage.get}")
     }
-
+    updateStageInfoForPushBasedShuffle(stage)
     if (!willRetry) {
       outputCommitCoordinator.stageEnd(stage.id)
     }
@@ -2563,6 +2593,7 @@ private[spark] class DAGScheduler(
     val dependentJobs: Seq[ActiveJob] =
       activeJobs.filter(job => stageDependsOn(job.finalStage, failedStage)).toSeq
     failedStage.latestInfo.completionTime = Some(clock.getTimeMillis())
+    updateStageInfoForPushBasedShuffle(failedStage)
     for (job <- dependentJobs) {
       failJobAndIndependentStages(job, s"Job aborted due to stage failure: $reason", exception)
     }
@@ -2571,6 +2602,19 @@ private[spark] class DAGScheduler(
     }
   }
 
+  private def updateStageInfoForPushBasedShuffle(stage: Stage): Unit = {
+    // With adaptive shuffle mergers, StageInfo's
+    // isPushBasedShuffleEnabled and shuffleMergers need to be updated at the end.
+    stage match {
+      case s: ShuffleMapStage =>
+        stage.latestInfo.setPushBasedShuffleEnabled(s.shuffleDep.shuffleMergeEnabled)
+        if (s.shuffleDep.shuffleMergeEnabled) {
+          stage.latestInfo.setShuffleMergerCount(s.shuffleDep.getMergerLocs.size)
+        }
+      case _ =>
+    }
+  }
+
   /** Cancel all independent, running stages that are only used by this job. */
   private def cancelRunningIndependentStages(job: ActiveJob, reason: String): Boolean = {
     var ableToCancelStages = true
diff --git a/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
index 7b681bf0abfe8..29835c482dfa1 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
@@ -39,7 +39,9 @@ class StageInfo(
     val taskMetrics: TaskMetrics = null,
     private[spark] val taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty,
     private[spark] val shuffleDepId: Option[Int] = None,
-    val resourceProfileId: Int) {
+    val resourceProfileId: Int,
+    private[spark] var isPushBasedShuffleEnabled: Boolean = false,
+    private[spark] var shuffleMergerCount: Int = 0) {
   /** When this stage was submitted from the DAGScheduler to a TaskScheduler. */
   var submissionTime: Option[Long] = None
   /** Time when the stage completed or when the stage was cancelled. */
@@ -73,6 +75,14 @@ class StageInfo(
       "running"
     }
   }
+
+  private[spark] def setShuffleMergerCount(mergers: Int): Unit = {
+    shuffleMergerCount = mergers
+  }
+
+  private[spark] def setPushBasedShuffleEnabled(pushBasedShuffleEnabled: Boolean): Unit = {
+    isPushBasedShuffleEnabled = pushBasedShuffleEnabled
+  }
 }
 
 private[spark] object StageInfo {
@@ -108,6 +118,8 @@ private[spark] object StageInfo {
       taskMetrics,
       taskLocalityPreferences,
       shuffleDepId,
-      resourceProfileId)
+      resourceProfileId,
+      false,
+      0)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
index 270d23efc1b2d..be5b8385f5e7e 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
@@ -59,13 +59,25 @@ private[spark] class ShuffleWriteProcessor extends Serializable with Logging {
         rdd.iterator(partition, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]])
       val mapStatus = writer.stop(success = true)
       if (mapStatus.isDefined) {
+        // Check if sufficient shuffle mergers are available now for the ShuffleMapTask to push
+        if (dep.shuffleMergeAllowed && dep.getMergerLocs.isEmpty) {
+          val mapOutputTracker = SparkEnv.get.mapOutputTracker
+          val mergerLocs =
+            mapOutputTracker.getShufflePushMergerLocations(dep.shuffleId)
+          if (mergerLocs.nonEmpty) {
+            dep.setMergerLocs(mergerLocs)
+          }
+        }
         // Initiate shuffle push process if push based shuffle is enabled
         // The map task only takes care of converting the shuffle data file into multiple
         // block push requests. It delegates pushing the blocks to a different thread-pool -
         // ShuffleBlockPusher.BLOCK_PUSHER_POOL.
-        if (dep.shuffleMergeEnabled && dep.getMergerLocs.nonEmpty && !dep.shuffleMergeFinalized) {
+        if (!dep.shuffleMergeFinalized) {
           manager.shuffleBlockResolver match {
             case resolver: IndexShuffleBlockResolver =>
+              logInfo(s"Shuffle merge enabled with ${dep.getMergerLocs.size} merger locations " +
+                s" for stage ${context.stageId()} with shuffle ID ${dep.shuffleId}")
+              logDebug(s"Starting pushing blocks for the task ${context.taskAttemptId()}")
               val dataFile = resolver.getDataFile(dep.shuffleId, mapId)
               new ShuffleBlockPusher(SparkEnv.get.conf)
                 .initiateBlockPush(dataFile, writer.getPartitionLengths(), dep, partition.index)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index e8c7f1f4d91c3..46aca07ce43f6 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -131,7 +131,7 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
       metrics: ShuffleReadMetricsReporter): ShuffleReader[K, C] = {
     val baseShuffleHandle = handle.asInstanceOf[BaseShuffleHandle[K, _, C]]
     val (blocksByAddress, canEnableBatchFetch) =
-      if (baseShuffleHandle.dependency.shuffleMergeEnabled) {
+      if (baseShuffleHandle.dependency.isShuffleMergeFinalizedMarked) {
         val res = SparkEnv.get.mapOutputTracker.getPushBasedShuffleMapSizesByExecutorId(
           handle.shuffleId, startMapIndex, endMapIndex, startPartition, endPartition)
         (res.iter, res.enableBatchFetch)
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index 0ee2c77997973..5e502eb568759 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -855,7 +855,7 @@ class MapOutputTrackerSuite extends SparkFunSuite with LocalSparkContext {
     rpcEnv.shutdown()
   }
 
-  test("SPARK-37023: Avoid fetching merge status when shuffleMergeEnabled is false") {
+  test("SPARK-37023: Avoid fetching merge status when useMergeResult is false") {
     val newConf = new SparkConf
     newConf.set(PUSH_BASED_SHUFFLE_ENABLED, true)
     newConf.set(IS_TESTING, true)
@@ -910,4 +910,32 @@ class MapOutputTrackerSuite extends SparkFunSuite with LocalSparkContext {
     rpcEnv.shutdown()
     slaveRpcEnv.shutdown()
   }
+
+  test("SPARK-34826: Adaptive shuffle mergers") {
+    val newConf = new SparkConf
+    newConf.set("spark.shuffle.push.based.enabled", "true")
+    newConf.set("spark.shuffle.service.enabled", "true")
+
+    // needs TorrentBroadcast so need a SparkContext
+    withSpark(new SparkContext("local", "MapOutputTrackerSuite", newConf)) { sc =>
+      val masterTracker = sc.env.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]
+      val rpcEnv = sc.env.rpcEnv
+      val masterEndpoint = new MapOutputTrackerMasterEndpoint(rpcEnv, masterTracker, newConf)
+      rpcEnv.stop(masterTracker.trackerEndpoint)
+      rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME, masterEndpoint)
+
+      val worker = new MapOutputTrackerWorker(newConf)
+      worker.trackerEndpoint =
+        rpcEnv.setupEndpointRef(rpcEnv.address, MapOutputTracker.ENDPOINT_NAME)
+
+      masterTracker.registerShuffle(20, 100, 100)
+      worker.updateEpoch(masterTracker.getEpoch)
+      val mergerLocs = (1 to 10).map(x => BlockManagerId(s"exec-$x", s"host-$x", 7337))
+      masterTracker.registerShufflePushMergerLocations(20, mergerLocs)
+
+      assert(worker.getShufflePushMergerLocations(20).size == 10)
+      worker.unregisterShuffle(20)
+      assert(worker.shufflePushMergerLocations.isEmpty)
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 76612cb605835..023e352ba1b02 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -3613,8 +3613,8 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     val shuffleStage2 = scheduler.stageIdToStage(1).asInstanceOf[ShuffleMapStage]
     assert(shuffleStage2.shuffleDep.getMergerLocs.nonEmpty)
 
-    assert(shuffleStage2.shuffleDep.shuffleMergeFinalized)
-    assert(shuffleStage1.shuffleDep.shuffleMergeFinalized)
+    assert(shuffleStage2.shuffleDep.isShuffleMergeFinalizedMarked)
+    assert(shuffleStage1.shuffleDep.isShuffleMergeFinalizedMarked)
     assert(mapOutputTracker.getNumAvailableMergeResults(shuffleDep1.shuffleId) == parts)
     assert(mapOutputTracker.getNumAvailableMergeResults(shuffleDep2.shuffleId) == parts)
 
@@ -3671,7 +3671,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
 
     completeShuffleMapStageSuccessfully(0, 0, parts)
     val shuffleStage = scheduler.stageIdToStage(0).asInstanceOf[ShuffleMapStage]
-    assert(!shuffleStage.shuffleDep.shuffleMergeEnabled)
+    assert(shuffleStage.shuffleDep.mergerLocs.isEmpty)
 
     completeNextResultStageWithSuccess(1, 0)
 
@@ -3686,14 +3686,13 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     completeNextStageWithFetchFailure(3, 0, shuffleDep)
     scheduler.resubmitFailedStages()
 
-    // Make sure shuffle merge is disabled for the retry
     val stage2 = scheduler.stageIdToStage(2).asInstanceOf[ShuffleMapStage]
-    assert(!stage2.shuffleDep.shuffleMergeEnabled)
+    assert(stage2.shuffleDep.shuffleMergeEnabled)
 
     // the scheduler now creates a new task set to regenerate the missing map output, but this time
     // using a different stage, the "skipped" one
     assert(scheduler.stageIdToStage(2).latestInfo.taskMetrics != null)
-    completeShuffleMapStageSuccessfully(2, 1, 2)
+    completeShuffleMapStageSuccessfully(2, 1, parts)
     completeNextResultStageWithSuccess(3, 1, idx => idx + 1234)
 
     val expected = (0 until parts).map(idx => (idx, idx + 1234))
@@ -3798,7 +3797,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     submit(reduceRdd, (0 until parts).toArray)
     completeShuffleMapStageSuccessfully(0, 0, reduceRdd.partitions.length)
     val shuffleMapStage = scheduler.stageIdToStage(0).asInstanceOf[ShuffleMapStage]
-    assert(!shuffleMapStage.shuffleDep.shuffleMergeEnabled)
+    assert(!shuffleMapStage.shuffleDep.shuffleMergeAllowed)
   }
 
   test("SPARK-32920: metadata fetch failure should not unregister map status") {
@@ -3926,7 +3925,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     val finalizeTask1 = shuffleStage1.shuffleDep.getFinalizeTask.get
       .asInstanceOf[DummyScheduledFuture]
     assert(finalizeTask1.delay == 10 && finalizeTask1.registerMergeResults)
-    assert(shuffleStage1.shuffleDep.shuffleMergeFinalized)
+    assert(shuffleStage1.shuffleDep.isShuffleMergeFinalizedMarked)
 
     complete(taskSets(1), taskSets(1).tasks.zipWithIndex.map {
       case (_, idx) =>
@@ -4051,8 +4050,8 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     runEvent(StageCancelled(0, Option("Explicit cancel check")))
     scheduler.handleShuffleMergeFinalized(shuffleStage1, shuffleStage1.shuffleDep.shuffleMergeId)
 
-    assert(shuffleStage1.shuffleDep.shuffleMergeEnabled)
-    assert(!shuffleStage1.shuffleDep.shuffleMergeFinalized)
+    assert(shuffleStage1.shuffleDep.mergerLocs.nonEmpty)
+    assert(!shuffleStage1.shuffleDep.isShuffleMergeFinalizedMarked)
     assert(mapOutputTracker.
       getNumAvailableMergeResults(shuffleStage1.shuffleDep.shuffleId) == 0)
 
@@ -4082,7 +4081,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     assert(shuffleIndeterminateStage.isIndeterminate)
     scheduler.handleShuffleMergeFinalized(shuffleIndeterminateStage, 2)
     assert(shuffleIndeterminateStage.shuffleDep.shuffleMergeEnabled)
-    assert(!shuffleIndeterminateStage.shuffleDep.shuffleMergeFinalized)
+    assert(!shuffleIndeterminateStage.shuffleDep.isShuffleMergeFinalizedMarked)
   }
 
   // With Adaptive shuffle merge finalization, once minimum shuffle pushes complete after stage
@@ -4130,7 +4129,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     }
     val shuffleStage1 = scheduler.stageIdToStage(0).asInstanceOf[ShuffleMapStage]
     assert(shuffleStage1.shuffleDep.shuffleMergeEnabled)
-    assert(!shuffleStage1.shuffleDep.shuffleMergeFinalized)
+    assert(!shuffleStage1.shuffleDep.isShuffleMergeFinalizedMarked)
     val finalizeTask1 = shuffleStage1.shuffleDep.getFinalizeTask.get.
       asInstanceOf[DummyScheduledFuture]
     assert(finalizeTask1.delay == 10 && finalizeTask1.registerMergeResults)
@@ -4147,7 +4146,206 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     assert(finalizeTask2.delay == 0 && finalizeTask2.registerMergeResults)
   }
 
-    /**
+  test("SPARK-34826: Adaptively fetch shuffle mergers") {
+    initPushBasedShuffleConfs(conf)
+    conf.set(config.SHUFFLE_MERGER_LOCATIONS_MIN_STATIC_THRESHOLD, 2)
+    DAGSchedulerSuite.clearMergerLocs()
+    DAGSchedulerSuite.addMergerLocs(Seq("host1"))
+    val parts = 2
+
+    val shuffleMapRdd = new MyRDD(sc, parts, Nil)
+    val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(parts))
+    val reduceRdd = new MyRDD(sc, parts, List(shuffleDep), tracker = mapOutputTracker)
+
+    // Submit a reduce job that depends which will create a map stage
+    submit(reduceRdd, (0 until parts).toArray)
+
+    runEvent(makeCompletionEvent(
+      taskSets(0).tasks(0), Success, makeMapStatus("hostA", parts),
+      Seq.empty, Array.empty, createFakeTaskInfoWithId(0)))
+
+    val shuffleStage1 = scheduler.stageIdToStage(0).asInstanceOf[ShuffleMapStage]
+    assert(!shuffleStage1.shuffleDep.shuffleMergeEnabled)
+    assert(mapOutputTracker.getShufflePushMergerLocations(0).isEmpty)
+
+    DAGSchedulerSuite.addMergerLocs(Seq("host2", "host3"))
+
+    // host2 executor added event to trigger registering of shuffle merger locations
+    // as shuffle mergers are tracked separately for test
+    runEvent(ExecutorAdded("exec2", "host2"))
+
+    // Check if new shuffle merger locations are available for push or not
+    assert(mapOutputTracker.getShufflePushMergerLocations(0).size == 2)
+    assert(shuffleStage1.shuffleDep.getMergerLocs.size == 2)
+
+    // Complete remaining tasks in ShuffleMapStage 0
+    runEvent(makeCompletionEvent(taskSets(0).tasks(1), Success,
+      makeMapStatus("host1", parts), Seq.empty, Array.empty, createFakeTaskInfoWithId(1)))
+
+    completeNextResultStageWithSuccess(1, 0)
+    assert(results === Map(0 -> 42, 1 -> 42))
+
+    results.clear()
+    assertDataStructuresEmpty()
+  }
+
+  test("SPARK-34826: Adaptively fetch shuffle mergers with stage retry") {
+    initPushBasedShuffleConfs(conf)
+    conf.set(config.SHUFFLE_MERGER_LOCATIONS_MIN_STATIC_THRESHOLD, 2)
+    DAGSchedulerSuite.clearMergerLocs()
+    DAGSchedulerSuite.addMergerLocs(Seq("host1"))
+    val parts = 2
+
+    val shuffleMapRdd1 = new MyRDD(sc, parts, Nil)
+    val shuffleDep1 = new ShuffleDependency(shuffleMapRdd1, new HashPartitioner(parts))
+    val shuffleMapRdd2 = new MyRDD(sc, parts, Nil)
+    val shuffleDep2 = new ShuffleDependency(shuffleMapRdd2, new HashPartitioner(parts))
+    val reduceRdd = new MyRDD(sc, parts, List(shuffleDep1, shuffleDep2),
+      tracker = mapOutputTracker)
+
+    // Submit a reduce job that depends which will create a map stage
+    submit(reduceRdd, (0 until parts).toArray)
+
+    val taskResults = taskSets(0).tasks.zipWithIndex.map {
+      case (_, idx) =>
+        (Success, makeMapStatus("host" + idx, parts))
+    }.toSeq
+
+    val shuffleStage1 = scheduler.stageIdToStage(0).asInstanceOf[ShuffleMapStage]
+    DAGSchedulerSuite.addMergerLocs(Seq("host2", "host3"))
+    // host2 executor added event to trigger registering of shuffle merger locations
+    // as shuffle mergers are tracked separately for test
+    runEvent(ExecutorAdded("exec2", "host2"))
+    // Check if new shuffle merger locations are available for push or not
+    assert(mapOutputTracker.getShufflePushMergerLocations(0).size == 2)
+    assert(shuffleStage1.shuffleDep.getMergerLocs.size == 2)
+    val mergerLocsBeforeRetry = shuffleStage1.shuffleDep.getMergerLocs
+
+    // Clear merger locations to check if new mergers are not getting set for the
+    // retry of determinate stage
+    DAGSchedulerSuite.clearMergerLocs()
+
+    // Remove MapStatus on one of the host before the stage ends to trigger
+    // a scenario where stage 0 needs to be resubmitted upon finishing all tasks.
+    // Merge finalization should be scheduled in this case.
+    for ((result, i) <- taskResults.zipWithIndex) {
+      if (i == taskSets(0).tasks.size - 1) {
+        mapOutputTracker.removeOutputsOnHost("host0")
+      }
+      runEvent(makeCompletionEvent(taskSets(0).tasks(i), result._1, result._2))
+    }
+    assert(shuffleStage1.shuffleDep.isShuffleMergeFinalizedMarked)
+
+    DAGSchedulerSuite.addMergerLocs(Seq("host4", "host5"))
+    // host4 executor added event shouldn't reset merger locations given merger locations
+    // are already set
+    runEvent(ExecutorAdded("exec4", "host4"))
+
+    // Successfully completing the retry of stage 0.
+    complete(taskSets(2), taskSets(2).tasks.zipWithIndex.map {
+      case (_, idx) =>
+        (Success, makeMapStatus("host" + idx, parts))
+    }.toSeq)
+
+    assert(shuffleStage1.shuffleDep.shuffleMergeId == 0)
+    assert(shuffleStage1.shuffleDep.getMergerLocs.size == 2)
+    assert(shuffleStage1.shuffleDep.isShuffleMergeFinalizedMarked)
+    val newMergerLocs =
+      scheduler.stageIdToStage(0).asInstanceOf[ShuffleMapStage].shuffleDep.getMergerLocs
+    assert(mergerLocsBeforeRetry.sortBy(_.host) === newMergerLocs.sortBy(_.host))
+    val shuffleStage2 = scheduler.stageIdToStage(1).asInstanceOf[ShuffleMapStage]
+    complete(taskSets(1), taskSets(1).tasks.zipWithIndex.map {
+      case (_, idx) =>
+        (Success, makeMapStatus("host" + idx, parts, 10))
+    }.toSeq)
+    assert(shuffleStage2.shuffleDep.getMergerLocs.size == 2)
+    completeNextResultStageWithSuccess(2, 0)
+    assert(results === Map(0 -> 42, 1 -> 42))
+
+    results.clear()
+    assertDataStructuresEmpty()
+  }
+
+  test("SPARK-34826: Adaptively fetch shuffle mergers with stage retry for indeterminate stage") {
+    initPushBasedShuffleConfs(conf)
+    conf.set(config.SHUFFLE_MERGER_LOCATIONS_MIN_STATIC_THRESHOLD, 2)
+    DAGSchedulerSuite.clearMergerLocs()
+    DAGSchedulerSuite.addMergerLocs(Seq("host1"))
+    val parts = 2
+
+    val shuffleMapRdd1 = new MyRDD(sc, parts, Nil, indeterminate = true)
+    val shuffleDep1 = new ShuffleDependency(shuffleMapRdd1, new HashPartitioner(parts))
+    val shuffleMapRdd2 = new MyRDD(sc, parts, Nil, indeterminate = true)
+    val shuffleDep2 = new ShuffleDependency(shuffleMapRdd2, new HashPartitioner(parts))
+    val reduceRdd = new MyRDD(sc, parts, List(shuffleDep1, shuffleDep2),
+      tracker = mapOutputTracker)
+
+    // Submit a reduce job that depends which will create a map stage
+    submit(reduceRdd, (0 until parts).toArray)
+
+    val taskResults = taskSets(0).tasks.zipWithIndex.map {
+      case (_, idx) =>
+        (Success, makeMapStatus("host" + idx, parts))
+    }.toSeq
+
+    val shuffleStage1 = scheduler.stageIdToStage(0).asInstanceOf[ShuffleMapStage]
+    DAGSchedulerSuite.addMergerLocs(Seq("host2", "host3"))
+    // host2 executor added event to trigger registering of shuffle merger locations
+    // as shuffle mergers are tracked separately for test
+    runEvent(ExecutorAdded("exec2", "host2"))
+    // Check if new shuffle merger locations are available for push or not
+    assert(mapOutputTracker.getShufflePushMergerLocations(0).size == 2)
+    assert(shuffleStage1.shuffleDep.getMergerLocs.size == 2)
+    val mergerLocsBeforeRetry = shuffleStage1.shuffleDep.getMergerLocs
+
+    // Clear merger locations to check if new mergers are getting set for the
+    // retry of indeterminate stage
+    DAGSchedulerSuite.clearMergerLocs()
+
+    // Remove MapStatus on one of the host before the stage ends to trigger
+    // a scenario where stage 0 needs to be resubmitted upon finishing all tasks.
+    // Merge finalization should be scheduled in this case.
+    for ((result, i) <- taskResults.zipWithIndex) {
+      if (i == taskSets(0).tasks.size - 1) {
+        mapOutputTracker.removeOutputsOnHost("host0")
+      }
+      runEvent(makeCompletionEvent(taskSets(0).tasks(i), result._1, result._2))
+    }
+
+    // Indeterminate stage should recompute all partitions, hence
+    // isShuffleMergeFinalizedMarked should be false here
+    assert(!shuffleStage1.shuffleDep.isShuffleMergeFinalizedMarked)
+
+    DAGSchedulerSuite.addMergerLocs(Seq("host4", "host5"))
+    // host4 executor added event should reset merger locations given merger locations
+    // are already reset
+    runEvent(ExecutorAdded("exec4", "host4"))
+    assert(shuffleStage1.shuffleDep.getMergerLocs.size == 2)
+    // Successfully completing the retry of stage 0.
+    complete(taskSets(2), taskSets(2).tasks.zipWithIndex.map {
+      case (_, idx) =>
+        (Success, makeMapStatus("host" + idx, parts))
+    }.toSeq)
+
+    assert(shuffleStage1.shuffleDep.shuffleMergeId == 2)
+    assert(shuffleStage1.shuffleDep.isShuffleMergeFinalizedMarked)
+    val newMergerLocs =
+      scheduler.stageIdToStage(0).asInstanceOf[ShuffleMapStage].shuffleDep.getMergerLocs
+    assert(mergerLocsBeforeRetry.sortBy(_.host) !== newMergerLocs.sortBy(_.host))
+    val shuffleStage2 = scheduler.stageIdToStage(1).asInstanceOf[ShuffleMapStage]
+    complete(taskSets(1), taskSets(1).tasks.zipWithIndex.map {
+      case (_, idx) =>
+        (Success, makeMapStatus("host" + idx, parts, 10))
+    }.toSeq)
+    assert(shuffleStage2.shuffleDep.getMergerLocs.size == 2)
+    completeNextResultStageWithSuccess(2, 0)
+    assert(results === Map(0 -> 42, 1 -> 42))
+
+    results.clear()
+    assertDataStructuresEmpty()
+  }
+
+  /**
    * Assert that the supplied TaskSet has exactly the given hosts as its preferred locations.
    * Note that this checks only the host and not the executor ID.
    */

From 899d3bb44d7c72dc0179545189ac8170bde993a8 Mon Sep 17 00:00:00 2001
From: Anton Okolnychyi <aokolnychyi@apple.com>
Date: Wed, 9 Feb 2022 07:11:30 +0900
Subject: [PATCH 178/513] [SPARK-34183][SS] DataSource V2: Required
 distribution and ordering in micro-batch execution

### What changes were proposed in this pull request?

This PR adjusts existing logical plans for micro-batch writes to support required distribution and ordering. This change implements what was discussed in PR #31700. In particular, the consensus was to adapt existing streaming plans to support write requirements instead of introducing new logical plans for Structured Streaming. That's a separate item and must be addressed independently.

### Why are the changes needed?

These changes are needed so that data sources can request a specific distribution and ordering not only for batch but also for micro-batch writes.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

This PR extends existing tests to cover micro-batch cases.

Closes #35374 from aokolnychyi/spark-34183-v2.

Authored-by: Anton Okolnychyi <aokolnychyi@apple.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../RequiresDistributionAndOrdering.java      |  10 +
 .../sql/errors/QueryCompilationErrors.scala   |   5 +
 .../sql/connector/catalog/InMemoryTable.scala |   5 +-
 .../execution/datasources/v2/V2Writes.scala   |  46 ++-
 .../streaming/MicroBatchExecution.scala       |  11 +-
 .../execution/streaming/StreamExecution.scala |  12 +-
 .../continuous/ContinuousExecution.scala      |  33 +-
 .../sources/WriteToMicroBatchDataSource.scala |  20 +-
 .../WriteDistributionAndOrderingSuite.scala   | 294 +++++++++++++++++-
 9 files changed, 403 insertions(+), 33 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java
index 2284086f99f6e..983e6b0fffb20 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java
@@ -35,6 +35,11 @@ public interface RequiresDistributionAndOrdering extends Write {
    * Spark will distribute incoming records across partitions to satisfy the required distribution
    * before passing the records to the data source table on write.
    * <p>
+   * Batch and micro-batch writes can request a particular data distribution.
+   * If a distribution is requested in the micro-batch context, incoming records in each micro batch
+   * will satisfy the required distribution (but not across micro batches). The continuous execution
+   * mode continuously processes streaming data and does not support distribution requirements.
+   * <p>
    * Implementations may return {@link UnspecifiedDistribution} if they don't require any specific
    * distribution of data on write.
    *
@@ -61,6 +66,11 @@ public interface RequiresDistributionAndOrdering extends Write {
    * Spark will order incoming records within partitions to satisfy the required ordering
    * before passing those records to the data source table on write.
    * <p>
+   * Batch and micro-batch writes can request a particular data ordering.
+   * If an ordering is requested in the micro-batch context, incoming records in each micro batch
+   * will satisfy the required ordering (but not across micro batches). The continuous execution
+   * mode continuously processes streaming data and does not support ordering requirements.
+   * <p>
    * Implementations may return an empty array if they don't require any specific ordering of data
    * on write.
    *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 726c490350291..d3f33e719c88c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -2378,4 +2378,9 @@ object QueryCompilationErrors {
   def tableNotSupportTimeTravelError(tableName: Identifier): UnsupportedOperationException = {
     new UnsupportedOperationException(s"Table $tableName does not support time travel.")
   }
+
+  def writeDistributionAndOrderingNotSupportedInContinuousExecution(): Throwable = {
+    new AnalysisException(
+      "Sinks cannot request distribution and ordering in continuous execution mode")
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
index 8e5e920d89abe..5d72b2060bfd8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
@@ -35,6 +35,7 @@ import org.apache.spark.sql.connector.metric.{CustomMetric, CustomTaskMetric}
 import org.apache.spark.sql.connector.read._
 import org.apache.spark.sql.connector.write._
 import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
+import org.apache.spark.sql.internal.connector.SupportsStreamingUpdateAsAppend
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -311,7 +312,9 @@ class InMemoryTable(
     InMemoryTable.maybeSimulateFailedTableWrite(new CaseInsensitiveStringMap(properties))
     InMemoryTable.maybeSimulateFailedTableWrite(info.options)
 
-    new WriteBuilder with SupportsTruncate with SupportsOverwrite with SupportsDynamicOverwrite {
+    new WriteBuilder with SupportsTruncate with SupportsOverwrite
+      with SupportsDynamicOverwrite with SupportsStreamingUpdateAsAppend {
+
       private var writer: BatchWrite = Append
       private var streamingWriter: StreamingWrite = StreamingAppend
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala
index 8494bba078552..38f741532d786 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala
@@ -22,11 +22,15 @@ import java.util.UUID
 import org.apache.spark.sql.catalyst.expressions.PredicateHelper
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.connector.catalog.Table
-import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsDynamicOverwrite, SupportsOverwrite, SupportsTruncate, WriteBuilder}
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table}
+import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsDynamicOverwrite, SupportsOverwrite, SupportsTruncate, Write, WriteBuilder}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
+import org.apache.spark.sql.execution.streaming.sources.{MicroBatchWrite, WriteToMicroBatchDataSource}
+import org.apache.spark.sql.internal.connector.SupportsStreamingUpdateAsAppend
 import org.apache.spark.sql.sources.{AlwaysTrue, Filter}
+import org.apache.spark.sql.streaming.OutputMode
 
 /**
  * A rule that constructs logical writes.
@@ -77,6 +81,36 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
       }
       val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, conf)
       o.copy(write = Some(write), query = newQuery)
+
+    case WriteToMicroBatchDataSource(
+        relation, table, query, queryId, writeOptions, outputMode, Some(batchId)) =>
+
+      val writeBuilder = newWriteBuilder(table, query, writeOptions, queryId)
+      val write = buildWriteForMicroBatch(table, writeBuilder, outputMode)
+      val microBatchWrite = new MicroBatchWrite(batchId, write.toStreaming)
+      val customMetrics = write.supportedCustomMetrics.toSeq
+      val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, conf)
+      WriteToDataSourceV2(relation, microBatchWrite, newQuery, customMetrics)
+  }
+
+  private def buildWriteForMicroBatch(
+      table: SupportsWrite,
+      writeBuilder: WriteBuilder,
+      outputMode: OutputMode): Write = {
+
+    outputMode match {
+      case Append =>
+        writeBuilder.build()
+      case Complete =>
+        // TODO: we should do this check earlier when we have capability API.
+        require(writeBuilder.isInstanceOf[SupportsTruncate],
+          table.name + " does not support Complete mode.")
+        writeBuilder.asInstanceOf[SupportsTruncate].truncate().build()
+      case Update =>
+        require(writeBuilder.isInstanceOf[SupportsStreamingUpdateAsAppend],
+          table.name + " does not support Update mode.")
+        writeBuilder.asInstanceOf[SupportsStreamingUpdateAsAppend].build()
+    }
   }
 
   private def isTruncate(filters: Array[Filter]): Boolean = {
@@ -86,12 +120,10 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
   private def newWriteBuilder(
       table: Table,
       query: LogicalPlan,
-      writeOptions: Map[String, String]): WriteBuilder = {
+      writeOptions: Map[String, String],
+      queryId: String = UUID.randomUUID().toString): WriteBuilder = {
 
-    val info = LogicalWriteInfoImpl(
-      queryId = UUID.randomUUID().toString,
-      query.schema,
-      writeOptions.asOptions)
+    val info = LogicalWriteInfoImpl(queryId, query.schema, writeOptions.asOptions)
     table.asWritable.newWriteBuilder(info)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index e9e4be90a0449..8725b701225ff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -156,11 +156,16 @@ class MicroBatchExecution(
     // TODO (SPARK-27484): we should add the writing node before the plan is analyzed.
     sink match {
       case s: SupportsWrite =>
-        val (streamingWrite, customMetrics) = createStreamingWrite(s, extraOptions, _logicalPlan)
         val relationOpt = plan.catalogAndIdent.map {
           case (catalog, ident) => DataSourceV2Relation.create(s, Some(catalog), Some(ident))
         }
-        WriteToMicroBatchDataSource(relationOpt, streamingWrite, _logicalPlan, customMetrics)
+        WriteToMicroBatchDataSource(
+          relationOpt,
+          table = s,
+          query = _logicalPlan,
+          queryId = id.toString,
+          extraOptions,
+          outputMode)
 
       case _ => _logicalPlan
     }
@@ -607,7 +612,7 @@ class MicroBatchExecution(
     val triggerLogicalPlan = sink match {
       case _: Sink => newAttributePlan
       case _: SupportsWrite =>
-        newAttributePlan.asInstanceOf[WriteToMicroBatchDataSource].createPlan(currentBatchId)
+        newAttributePlan.asInstanceOf[WriteToMicroBatchDataSource].withNewBatchId(currentBatchId)
       case _ => throw new IllegalArgumentException(s"unknown sink type for $sink")
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index d1dfcdc514a10..bbc6fa05d514b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -37,10 +37,8 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table}
-import org.apache.spark.sql.connector.metric.CustomMetric
 import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, ReadLimit, SparkDataStream}
-import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsTruncate}
-import org.apache.spark.sql.connector.write.streaming.StreamingWrite
+import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsTruncate, Write}
 import org.apache.spark.sql.execution.command.StreamingExplainCommand
 import org.apache.spark.sql.execution.datasources.v2.StreamWriterCommitProgress
 import org.apache.spark.sql.internal.SQLConf
@@ -579,16 +577,16 @@ abstract class StreamExecution(
         |batch = $batchDescription""".stripMargin
   }
 
-  protected def createStreamingWrite(
+  protected def createWrite(
       table: SupportsWrite,
       options: Map[String, String],
-      inputPlan: LogicalPlan): (StreamingWrite, Seq[CustomMetric]) = {
+      inputPlan: LogicalPlan): Write = {
     val info = LogicalWriteInfoImpl(
       queryId = id.toString,
       inputPlan.schema,
       new CaseInsensitiveStringMap(options.asJava))
     val writeBuilder = table.newWriteBuilder(info)
-    val write = outputMode match {
+    outputMode match {
       case Append =>
         writeBuilder.build()
 
@@ -603,8 +601,6 @@ abstract class StreamExecution(
           table.name + " does not support Update mode.")
         writeBuilder.asInstanceOf[SupportsStreamingUpdateAsAppend].build()
     }
-
-    (write.toStreaming, write.supportedCustomMetrics().toSeq)
   }
 
   protected def purge(threshold: Long): Unit = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index 5101bdf46ed1f..a0b407469ab36 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -31,8 +31,10 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.streaming.{StreamingRelationV2, WriteToStream}
 import org.apache.spark.sql.catalyst.trees.TreePattern.CURRENT_LIKE
 import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, TableCapability}
+import org.apache.spark.sql.connector.distributions.UnspecifiedDistribution
 import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, PartitionOffset, ReadLimit}
-import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.connector.write.{RequiresDistributionAndOrdering, Write}
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming._
@@ -85,11 +87,36 @@ class ContinuousExecution(
     uniqueSources = sources.distinct.map(s => s -> ReadLimit.allAvailable()).toMap
 
     // TODO (SPARK-27484): we should add the writing node before the plan is analyzed.
-    val (streamingWrite, customMetrics) = createStreamingWrite(
-      plan.sink.asInstanceOf[SupportsWrite], extraOptions, _logicalPlan)
+    val write = createWrite(plan.sink.asInstanceOf[SupportsWrite], extraOptions, _logicalPlan)
+
+    if (hasDistributionRequirements(write) || hasOrderingRequirements(write)) {
+      throw QueryCompilationErrors.writeDistributionAndOrderingNotSupportedInContinuousExecution()
+    }
+
+    val streamingWrite = write.toStreaming
+    val customMetrics = write.supportedCustomMetrics.toSeq
     WriteToContinuousDataSource(streamingWrite, _logicalPlan, customMetrics)
   }
 
+  private def hasDistributionRequirements(write: Write): Boolean = write match {
+    case w: RequiresDistributionAndOrdering if w.requiredNumPartitions == 0 =>
+      w.requiredDistribution match {
+        case _: UnspecifiedDistribution =>
+          false
+        case _ =>
+          true
+      }
+    case _ =>
+      false
+  }
+
+  private def hasOrderingRequirements(write: Write): Boolean = write match {
+    case w: RequiresDistributionAndOrdering if w.requiredOrdering.nonEmpty =>
+      true
+    case _ =>
+      false
+  }
+
   private val triggerExecutor = trigger match {
     case ContinuousTrigger(t) => ProcessingTimeExecutor(ProcessingTimeTrigger(t), triggerClock)
     case _ => throw new IllegalStateException(s"Unsupported type of trigger: $trigger")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala
index b8b85a7ded877..0a33093dcbcea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala
@@ -19,27 +19,31 @@ package org.apache.spark.sql.execution.streaming.sources
 
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode}
-import org.apache.spark.sql.connector.metric.CustomMetric
-import org.apache.spark.sql.connector.write.streaming.StreamingWrite
-import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, WriteToDataSourceV2}
+import org.apache.spark.sql.connector.catalog.SupportsWrite
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.streaming.OutputMode
 
 /**
  * The logical plan for writing data to a micro-batch stream.
  *
  * Note that this logical plan does not have a corresponding physical plan, as it will be converted
- * to [[WriteToDataSourceV2]] with [[MicroBatchWrite]] before execution.
+ * to [[org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2 WriteToDataSourceV2]]
+ * with [[MicroBatchWrite]] before execution.
  */
 case class WriteToMicroBatchDataSource(
     relation: Option[DataSourceV2Relation],
-    write: StreamingWrite,
+    table: SupportsWrite,
     query: LogicalPlan,
-    customMetrics: Seq[CustomMetric])
+    queryId: String,
+    writeOptions: Map[String, String],
+    outputMode: OutputMode,
+    batchId: Option[Long] = None)
   extends UnaryNode {
   override def child: LogicalPlan = query
   override def output: Seq[Attribute] = Nil
 
-  def createPlan(batchId: Long): WriteToDataSourceV2 = {
-    WriteToDataSourceV2(relation, new MicroBatchWrite(batchId, write), query, customMetrics)
+  def withNewBatchId(batchId: Long): WriteToMicroBatchDataSource = {
+    copy(batchId = Some(batchId))
   }
 
   override protected def withNewChildInternal(newChild: LogicalPlan): WriteToMicroBatchDataSource =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
index db4a9c153c0ff..5f8684a144778 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
@@ -21,7 +21,7 @@ import java.util.Collections
 
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.sql.{catalyst, AnalysisException, DataFrame, QueryTest}
+import org.apache.spark.sql.{catalyst, AnalysisException, DataFrame, QueryTest, Row}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.plans.physical
 import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, RangePartitioning, UnknownPartitioning}
@@ -33,7 +33,10 @@ import org.apache.spark.sql.execution.{QueryExecution, SortExec, SparkPlan}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike
+import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.execution.streaming.sources.ContinuousMemoryStream
 import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.streaming.{StreamingQueryException, Trigger}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
 import org.apache.spark.sql.util.QueryExecutionListener
@@ -42,6 +45,7 @@ class WriteDistributionAndOrderingSuite
   extends QueryTest with SharedSparkSession with BeforeAndAfter with AdaptiveSparkPlanHelper {
 
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+  import testImplicits._
 
   before {
     spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName)
@@ -52,6 +56,7 @@ class WriteDistributionAndOrderingSuite
     spark.sessionState.conf.unsetConf("spark.sql.catalog.testcat")
   }
 
+  private val microBatchPrefix = "micro_batch_"
   private val namespace = Array("ns1")
   private val ident = Identifier.of(namespace, "test_table")
   private val tableNameAsString = "testcat." + ident.toString
@@ -74,6 +79,18 @@ class WriteDistributionAndOrderingSuite
     checkOrderedDistributionAndSortWithSameExprs("overwriteDynamic")
   }
 
+  test("ordered distribution and sort with same exprs: micro-batch append") {
+    checkOrderedDistributionAndSortWithSameExprs(microBatchPrefix + "append")
+  }
+
+  test("ordered distribution and sort with same exprs: micro-batch update") {
+    checkOrderedDistributionAndSortWithSameExprs(microBatchPrefix + "update")
+  }
+
+  test("ordered distribution and sort with same exprs: micro-batch complete") {
+    checkOrderedDistributionAndSortWithSameExprs(microBatchPrefix + "complete")
+  }
+
   test("ordered distribution and sort with same exprs with numPartitions: append") {
     checkOrderedDistributionAndSortWithSameExprs("append", Some(10))
   }
@@ -86,6 +103,18 @@ class WriteDistributionAndOrderingSuite
     checkOrderedDistributionAndSortWithSameExprs("overwriteDynamic", Some(10))
   }
 
+  test("ordered distribution and sort with same exprs with numPartitions: micro-batch append") {
+    checkOrderedDistributionAndSortWithSameExprs(microBatchPrefix + "append", Some(10))
+  }
+
+  test("ordered distribution and sort with same exprs with numPartitions: micro-batch update") {
+    checkOrderedDistributionAndSortWithSameExprs(microBatchPrefix + "update", Some(10))
+  }
+
+  test("ordered distribution and sort with same exprs with numPartitions: micro-batch complete") {
+    checkOrderedDistributionAndSortWithSameExprs(microBatchPrefix + "complete", Some(10))
+  }
+
   private def checkOrderedDistributionAndSortWithSameExprs(command: String): Unit = {
     checkOrderedDistributionAndSortWithSameExprs(command, None)
   }
@@ -129,6 +158,18 @@ class WriteDistributionAndOrderingSuite
     checkClusteredDistributionAndSortWithSameExprs("overwriteDynamic")
   }
 
+  test("clustered distribution and sort with same exprs: micro-batch append") {
+    checkClusteredDistributionAndSortWithSameExprs(microBatchPrefix + "append")
+  }
+
+  test("clustered distribution and sort with same exprs: micro-batch update") {
+    checkClusteredDistributionAndSortWithSameExprs(microBatchPrefix + "update")
+  }
+
+  test("clustered distribution and sort with same exprs: micro-batch complete") {
+    checkClusteredDistributionAndSortWithSameExprs(microBatchPrefix + "complete")
+  }
+
   test("clustered distribution and sort with same exprs with numPartitions: append") {
     checkClusteredDistributionAndSortWithSameExprs("append", Some(10))
   }
@@ -141,6 +182,18 @@ class WriteDistributionAndOrderingSuite
     checkClusteredDistributionAndSortWithSameExprs("overwriteDynamic", Some(10))
   }
 
+  test("clustered distribution and sort with same exprs with numPartitions: micro-batch append") {
+    checkClusteredDistributionAndSortWithSameExprs(microBatchPrefix + "append", Some(10))
+  }
+
+  test("clustered distribution and sort with same exprs with numPartitions: micro-batch update") {
+    checkClusteredDistributionAndSortWithSameExprs(microBatchPrefix + "update", Some(10))
+  }
+
+  test("clustered distribution and sort with same exprs with numPartitions: micro-batch complete") {
+    checkClusteredDistributionAndSortWithSameExprs(microBatchPrefix + "complete", Some(10))
+  }
+
   private def checkClusteredDistributionAndSortWithSameExprs(command: String): Unit = {
     checkClusteredDistributionAndSortWithSameExprs(command, None)
   }
@@ -193,6 +246,18 @@ class WriteDistributionAndOrderingSuite
     checkClusteredDistributionAndSortWithExtendedExprs("overwriteDynamic")
   }
 
+  test("clustered distribution and sort with extended exprs: micro-batch append") {
+    checkClusteredDistributionAndSortWithExtendedExprs(microBatchPrefix + "append")
+  }
+
+  test("clustered distribution and sort with extended exprs: micro-batch update") {
+    checkClusteredDistributionAndSortWithExtendedExprs(microBatchPrefix + "update")
+  }
+
+  test("clustered distribution and sort with extended exprs: micro-batch complete") {
+    checkClusteredDistributionAndSortWithExtendedExprs(microBatchPrefix + "complete")
+  }
+
   test("clustered distribution and sort with extended exprs with numPartitions: append") {
     checkClusteredDistributionAndSortWithExtendedExprs("append", Some(10))
   }
@@ -206,6 +271,21 @@ class WriteDistributionAndOrderingSuite
     checkClusteredDistributionAndSortWithExtendedExprs("overwriteDynamic", Some(10))
   }
 
+  test("clustered distribution and sort with extended exprs with numPartitions: " +
+    "micro-batch append") {
+    checkClusteredDistributionAndSortWithExtendedExprs(microBatchPrefix + "append", Some(10))
+  }
+
+  test("clustered distribution and sort with extended exprs with numPartitions: " +
+    "micro-batch update") {
+    checkClusteredDistributionAndSortWithExtendedExprs(microBatchPrefix + "update", Some(10))
+  }
+
+  test("clustered distribution and sort with extended exprs with numPartitions: " +
+    "micro-batch complete") {
+    checkClusteredDistributionAndSortWithExtendedExprs(microBatchPrefix + "complete", Some(10))
+  }
+
   private def checkClusteredDistributionAndSortWithExtendedExprs(command: String): Unit = {
     checkClusteredDistributionAndSortWithExtendedExprs(command, None)
   }
@@ -258,6 +338,18 @@ class WriteDistributionAndOrderingSuite
     checkUnspecifiedDistributionAndLocalSort("overwriteDynamic")
   }
 
+  test("unspecified distribution and local sort: micro-batch append") {
+    checkUnspecifiedDistributionAndLocalSort(microBatchPrefix + "append")
+  }
+
+  test("unspecified distribution and local sort: micro-batch update") {
+    checkUnspecifiedDistributionAndLocalSort(microBatchPrefix + "update")
+  }
+
+  test("unspecified distribution and local sort: micro-batch complete") {
+    checkUnspecifiedDistributionAndLocalSort(microBatchPrefix + "complete")
+  }
+
   test("unspecified distribution and local sort with numPartitions: append") {
     checkUnspecifiedDistributionAndLocalSort("append", Some(10))
   }
@@ -270,6 +362,18 @@ class WriteDistributionAndOrderingSuite
     checkUnspecifiedDistributionAndLocalSort("overwriteDynamic", Some(10))
   }
 
+  test("unspecified distribution and local sort with numPartitions: micro-batch append") {
+    checkUnspecifiedDistributionAndLocalSort(microBatchPrefix + "append", Some(10))
+  }
+
+  test("unspecified distribution and local sort with numPartitions: micro-batch update") {
+    checkUnspecifiedDistributionAndLocalSort(microBatchPrefix + "update", Some(10))
+  }
+
+  test("unspecified distribution and local sort with numPartitions: micro-batch complete") {
+    checkUnspecifiedDistributionAndLocalSort(microBatchPrefix + "complete", Some(10))
+  }
+
   private def checkUnspecifiedDistributionAndLocalSort(command: String): Unit = {
     checkUnspecifiedDistributionAndLocalSort(command, None)
   }
@@ -316,6 +420,18 @@ class WriteDistributionAndOrderingSuite
     checkUnspecifiedDistributionAndNoSort("overwriteDynamic")
   }
 
+  test("unspecified distribution and no sort: micro-batch append") {
+    checkUnspecifiedDistributionAndNoSort(microBatchPrefix + "append")
+  }
+
+  test("unspecified distribution and no sort: micro-batch update") {
+    checkUnspecifiedDistributionAndNoSort(microBatchPrefix + "update")
+  }
+
+  test("unspecified distribution and no sort: micro-batch complete") {
+    checkUnspecifiedDistributionAndNoSort(microBatchPrefix + "complete")
+  }
+
   test("unspecified distribution and no sort with numPartitions: append") {
     checkUnspecifiedDistributionAndNoSort("append", Some(10))
   }
@@ -328,6 +444,18 @@ class WriteDistributionAndOrderingSuite
     checkUnspecifiedDistributionAndNoSort("overwriteDynamic", Some(10))
   }
 
+  test("unspecified distribution and no sort with numPartitions: micro-batch append") {
+    checkUnspecifiedDistributionAndNoSort(microBatchPrefix + "append", Some(10))
+  }
+
+  test("unspecified distribution and no sort with numPartitions: micro-batch update") {
+    checkUnspecifiedDistributionAndNoSort(microBatchPrefix + "update", Some(10))
+  }
+
+  test("unspecified distribution and no sort with numPartitions: micro-batch complete") {
+    checkUnspecifiedDistributionAndNoSort(microBatchPrefix + "complete", Some(10))
+  }
+
   private def checkUnspecifiedDistributionAndNoSort(command: String): Unit = {
     checkUnspecifiedDistributionAndNoSort(command, None)
   }
@@ -677,7 +805,95 @@ class WriteDistributionAndOrderingSuite
       writeCommand = command)
   }
 
+  test("continuous mode does not support write distribution and ordering") {
+    val ordering = Array[SortOrder](
+      sort(FieldReference("data"), SortDirection.ASCENDING, NullOrdering.NULLS_FIRST)
+    )
+    val distribution = Distributions.ordered(ordering)
+
+    catalog.createTable(ident, schema, Array.empty, emptyProps, distribution, ordering, None)
+
+    withTempDir { checkpointDir =>
+      val inputData = ContinuousMemoryStream[(Long, String)]
+      val inputDF = inputData.toDF().toDF("id", "data")
+
+      val writer = inputDF
+        .writeStream
+        .trigger(Trigger.Continuous(100))
+        .option("checkpointLocation", checkpointDir.getAbsolutePath)
+        .outputMode("append")
+
+      val analysisException = intercept[AnalysisException] {
+        val query = writer.toTable(tableNameAsString)
+
+        inputData.addData((1, "a"), (2, "b"))
+
+        query.processAllAvailable()
+        query.stop()
+      }
+
+      assert(analysisException.message.contains("Sinks cannot request distribution and ordering"))
+    }
+  }
+
+  test("continuous mode allows unspecified distribution and empty ordering") {
+    catalog.createTable(ident, schema, Array.empty, emptyProps)
+
+    withTempDir { checkpointDir =>
+      val inputData = ContinuousMemoryStream[(Long, String)]
+      val inputDF = inputData.toDF().toDF("id", "data")
+
+      val writer = inputDF
+        .writeStream
+        .trigger(Trigger.Continuous(100))
+        .option("checkpointLocation", checkpointDir.getAbsolutePath)
+        .outputMode("append")
+
+      val query = writer.toTable(tableNameAsString)
+
+      inputData.addData((1, "a"), (2, "b"))
+
+      query.processAllAvailable()
+      query.stop()
+
+      checkAnswer(spark.table(tableNameAsString), Row(1, "a") :: Row(2, "b") :: Nil)
+    }
+  }
+
   private def checkWriteRequirements(
+      tableDistribution: Distribution,
+      tableOrdering: Array[SortOrder],
+      tableNumPartitions: Option[Int],
+      expectedWritePartitioning: physical.Partitioning,
+      expectedWriteOrdering: Seq[catalyst.expressions.SortOrder],
+      writeTransform: DataFrame => DataFrame = df => df,
+      writeCommand: String,
+      expectAnalysisException: Boolean = false): Unit = {
+
+    if (writeCommand.startsWith(microBatchPrefix)) {
+      checkMicroBatchWriteRequirements(
+        tableDistribution,
+        tableOrdering,
+        tableNumPartitions,
+        expectedWritePartitioning,
+        expectedWriteOrdering,
+        writeTransform,
+        outputMode = writeCommand.stripPrefix(microBatchPrefix),
+        expectAnalysisException)
+    } else {
+      checkBatchWriteRequirements(
+        tableDistribution,
+        tableOrdering,
+        tableNumPartitions,
+        expectedWritePartitioning,
+        expectedWriteOrdering,
+        writeTransform,
+        writeCommand,
+        expectAnalysisException)
+    }
+  }
+
+  private def checkBatchWriteRequirements(
       tableDistribution: Distribution,
       tableOrdering: Array[SortOrder],
       tableNumPartitions: Option[Int],
@@ -712,15 +928,84 @@ class WriteDistributionAndOrderingSuite
     }
   }
 
+  private def checkMicroBatchWriteRequirements(
+      tableDistribution: Distribution,
+      tableOrdering: Array[SortOrder],
+      tableNumPartitions: Option[Int],
+      expectedWritePartitioning: physical.Partitioning,
+      expectedWriteOrdering: Seq[catalyst.expressions.SortOrder],
+      writeTransform: DataFrame => DataFrame = df => df,
+      outputMode: String = "append",
+      expectAnalysisException: Boolean = false): Unit = {
+
+    catalog.createTable(ident, schema, Array.empty, emptyProps, tableDistribution,
+      tableOrdering, tableNumPartitions)
+
+    withTempDir { checkpointDir =>
+      val inputData = MemoryStream[(Long, String)]
+      val inputDF = inputData.toDF().toDF("id", "data")
+
+      val queryDF = outputMode match {
+        case "append" | "update" =>
+          inputDF
+        case "complete" =>
+          // add an aggregate for complete mode
+          inputDF
+            .groupBy("id")
+            .agg(Map("data" -> "count"))
+            .select($"id", $"count(data)".cast("string").as("data"))
+      }
+
+      val writer = writeTransform(queryDF)
+        .writeStream
+        .option("checkpointLocation", checkpointDir.getAbsolutePath)
+        .outputMode(outputMode)
+
+      def executeCommand(): SparkPlan = execute {
+        val query = writer.toTable(tableNameAsString)
+
+        inputData.addData((1, "a"), (2, "b"))
+
+        query.processAllAvailable()
+        query.stop()
+      }
+
+      if (expectAnalysisException) {
+        val streamingQueryException = intercept[StreamingQueryException] {
+          executeCommand()
+        }
+        val cause = streamingQueryException.cause
+        assert(cause.getMessage.contains("number of partitions can't be specified"))
+
+      } else {
+        val executedPlan = executeCommand()
+
+        checkPartitioningAndOrdering(
+          executedPlan,
+          expectedWritePartitioning,
+          expectedWriteOrdering,
+          // there is an extra shuffle for groupBy in complete mode
+          maxNumShuffles = if (outputMode != "complete") 1 else 2)
+
+        val expectedRows = outputMode match {
+          case "append" | "update" => Row(1, "a") :: Row(2, "b") :: Nil
+          case "complete" => Row(1, "1") :: Row(2, "1") :: Nil
+        }
+        checkAnswer(spark.table(tableNameAsString), expectedRows)
+      }
+    }
+  }
+
   private def checkPartitioningAndOrdering(
       plan: SparkPlan,
       partitioning: physical.Partitioning,
-      ordering: Seq[catalyst.expressions.SortOrder]): Unit = {
+      ordering: Seq[catalyst.expressions.SortOrder],
+      maxNumShuffles: Int = 1): Unit = {
 
     val sorts = collect(plan) { case s: SortExec => s }
     assert(sorts.size <= 1, "must be at most one sort")
     val shuffles = collect(plan) { case s: ShuffleExchangeLike => s }
-    assert(shuffles.size <= 1, "must be at most one shuffle")
+    assert(shuffles.size <= maxNumShuffles, $"must be at most $maxNumShuffles shuffles")
 
     val actualPartitioning = plan.outputPartitioning
     val expectedPartitioning = partitioning match {
@@ -730,6 +1015,9 @@ class WriteDistributionAndOrderingSuite
       case p: physical.HashPartitioning =>
         val resolvedExprs = p.expressions.map(resolveAttrs(_, plan))
         p.copy(expressions = resolvedExprs)
+      case _: UnknownPartitioning =>
+        // don't check partitioning if no particular one is expected
+        actualPartitioning
       case other => other
     }
     assert(actualPartitioning == expectedPartitioning, "partitioning must match")

From 5b02a345da618c3f9adabd59c9a3e59663843973 Mon Sep 17 00:00:00 2001
From: Kazuyuki Tanimura <ktanimura@apple.com>
Date: Tue, 8 Feb 2022 15:31:44 -0800
Subject: [PATCH 179/513] [SPARK-38142][SQL][TESTS] Move
 `ArrowColumnVectorSuite` to `org.apache.spark.sql.vectorized`

### What changes were proposed in this pull request?
This PR proposes to move `ArrowColumnVectorSuite` to `org.apache.spark.sql.vectorized` so that the package names match

### Why are the changes needed?
Currently `ArrowColumnVector` is under `org.apache.spark.sql.vectorized`. However, `ArrowColumnVectorSuite` is under `org.apache.spark.sql.execution.vectorized`.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing tests

Closes #35448 from kazuyukitanimura/SPARK-38142.

Authored-by: Kazuyuki Tanimura <ktanimura@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../{execution => }/vectorized/ArrowColumnVectorSuite.scala    | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)
 rename sql/core/src/test/scala/org/apache/spark/sql/{execution => }/vectorized/ArrowColumnVectorSuite.scala (99%)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/vectorized/ArrowColumnVectorSuite.scala
similarity index 99%
rename from sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/vectorized/ArrowColumnVectorSuite.scala
index 60f1b32a41f05..dec10e061d737 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/vectorized/ArrowColumnVectorSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.execution.vectorized
+package org.apache.spark.sql.vectorized
 
 import org.apache.arrow.vector._
 import org.apache.arrow.vector.complex._
@@ -23,7 +23,6 @@ import org.apache.arrow.vector.complex._
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.ArrowUtils
-import org.apache.spark.sql.vectorized.ArrowColumnVector
 import org.apache.spark.unsafe.types.UTF8String
 
 class ArrowColumnVectorSuite extends SparkFunSuite {

From 43cce92958fc1b5d7153cb75db8701493c89c7ff Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Wed, 9 Feb 2022 08:32:31 +0900
Subject: [PATCH 180/513] [SPARK-38124][SQL][SS] Introduce
 StatefulOpClusteredDistribution and apply to stream-stream join

### What changes were proposed in this pull request?

This PR revives `HashClusteredDistribution` and renames to `StatefulOpClusteredDistribution` so that the rationalization of the distribution is clear from the name. Renaming is safe because this class no longer needs to be general one - in SPARK-35703 we moved out the usages of `HashClusteredDistribution` to `ClusteredDistribution`; stateful operators are exceptions.

Only `HashPartitioning` with same expressions and number of partitions can satisfy `StatefulOpClusteredDistribution`. That said, we cannot modify `HashPartitioning` unless we clone `HashPartitioning` and assign the clone to `StatefulOpClusteredDistribution`.

This PR documents the expectation of stateful operator on partitioning in the classdoc of `StatefulOpClusteredDistribution`.

This PR also changes stream-stream join to use `StatefulOpClusteredDistribution` instead of `ClusteredDistribution`. This effectively reverts a part of SPARK-35703 which hasn't been shipped to any releases. This PR doesn't deal with other stateful operators since it has been long standing issue (probably Spark 2.2.0+) and we need a plan for dealing with existing state.

### Why are the changes needed?

Spark does not guarantee stable physical partitioning for stateful operators across query lifetime, and due to the relaxed distribution requirement it is hard to expect what would be the current physical partitioning of the state.
(We expect hash partitioning with grouping keys, but ClusteredDistribution does not "guarantee" the partitioning. It is much more relaxed.)

This PR will enforce the physical partitioning of stream-stream join operators to be hash partition with grouping keys, which is our general expectation of state store partitioning.

### Does this PR introduce _any_ user-facing change?

No, since SPARK-35703 hasn't been shipped to any release yet.

### How was this patch tested?

Existing tests.

Closes #35419 from HeartSaVioR/SPARK-38124.

Authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../plans/physical/partitioning.scala         | 40 +++++++++++++++++++
 .../StreamingSymmetricHashJoinExec.scala      |  4 +-
 .../sql/streaming/StreamingJoinSuite.scala    |  2 +-
 3 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
index 7a730c4b7318b..4418d3253a8b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
@@ -90,6 +90,37 @@ case class ClusteredDistribution(
   }
 }
 
+/**
+ * Represents the requirement of distribution on the stateful operator in Structured Streaming.
+ *
+ * Each partition in stateful operator initializes state store(s), which are independent with state
+ * store(s) in other partitions. Since it is not possible to repartition the data in state store,
+ * Spark should make sure the physical partitioning of the stateful operator is unchanged across
+ * Spark versions. Violation of this requirement may bring silent correctness issue.
+ *
+ * Since this distribution relies on [[HashPartitioning]] on the physical partitioning of the
+ * stateful operator, only [[HashPartitioning]] (and HashPartitioning in
+ * [[PartitioningCollection]]) can satisfy this distribution.
+ */
+case class StatefulOpClusteredDistribution(
+    expressions: Seq[Expression],
+    _requiredNumPartitions: Int) extends Distribution {
+  require(
+    expressions != Nil,
+    "The expressions for hash of a StatefulOpClusteredDistribution should not be Nil. " +
+      "An AllTuples should be used to represent a distribution that only has " +
+      "a single partition.")
+
+  override val requiredNumPartitions: Option[Int] = Some(_requiredNumPartitions)
+
+  override def createPartitioning(numPartitions: Int): Partitioning = {
+    assert(_requiredNumPartitions == numPartitions,
+      s"This StatefulOpClusteredDistribution requires ${_requiredNumPartitions} " +
+        s"partitions, but the actual number of partitions is $numPartitions.")
+    HashPartitioning(expressions, numPartitions)
+  }
+}
+
 /**
  * Represents data where tuples have been ordered according to the `ordering`
  * [[Expression Expressions]]. Its requirement is defined as the following:
@@ -200,6 +231,11 @@ case object SinglePartition extends Partitioning {
  * Represents a partitioning where rows are split up across partitions based on the hash
  * of `expressions`.  All rows where `expressions` evaluate to the same values are guaranteed to be
  * in the same partition.
+ *
+ * Since [[StatefulOpClusteredDistribution]] relies on this partitioning and Spark requires
+ * stateful operators to retain the same physical partitioning during the lifetime of the query
+ * (including restart), the result of evaluation on `partitionIdExpression` must be unchanged
+ * across Spark versions. Violation of this requirement may bring silent correctness issue.
  */
 case class HashPartitioning(expressions: Seq[Expression], numPartitions: Int)
   extends Expression with Partitioning with Unevaluable {
@@ -211,6 +247,10 @@ case class HashPartitioning(expressions: Seq[Expression], numPartitions: Int)
   override def satisfies0(required: Distribution): Boolean = {
     super.satisfies0(required) || {
       required match {
+        case h: StatefulOpClusteredDistribution =>
+          expressions.length == h.expressions.length && expressions.zip(h.expressions).forall {
+            case (l, r) => l.semanticEquals(r)
+          }
         case ClusteredDistribution(requiredClustering, _) =>
           expressions.forall(x => requiredClustering.exists(_.semanticEquals(x)))
         case _ => false
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
index 74b82451e029f..adb84a3b7d3fc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
@@ -185,8 +185,8 @@ case class StreamingSymmetricHashJoinExec(
   val nullRight = new GenericInternalRow(right.output.map(_.withNullability(true)).length)
 
   override def requiredChildDistribution: Seq[Distribution] =
-    ClusteredDistribution(leftKeys, stateInfo.map(_.numPartitions)) ::
-      ClusteredDistribution(rightKeys, stateInfo.map(_.numPartitions)) :: Nil
+    StatefulOpClusteredDistribution(leftKeys, getStateInfo.numPartitions) ::
+      StatefulOpClusteredDistribution(rightKeys, getStateInfo.numPartitions) :: Nil
 
   override def output: Seq[Attribute] = joinType match {
     case _: InnerLike => left.output ++ right.output
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index 5ec47bb2aa527..e0926ef0a82ff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -571,7 +571,7 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
       CheckNewAnswer((5, 10, 5, 15, 5, 25)))
   }
 
-  test("streaming join should require HashClusteredDistribution from children") {
+  test("streaming join should require StatefulOpClusteredDistribution from children") {
     val input1 = MemoryStream[Int]
     val input2 = MemoryStream[Int]
 

From cc53a0e0734cf56711667b65cfbaf5684fc06923 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 8 Feb 2022 15:35:54 -0800
Subject: [PATCH 181/513] [SPARK-38144][CORE] Remove unused
 `spark.storage.safetyFraction` config

### What changes were proposed in this pull request?

This PR aims to remove the unused `spark.storage.safetyFraction`.

### Why are the changes needed?

Apache Spark 3.0.0 deleted `StaticMemoryManager` and its `spark.storage.safetyFraction` usage via
SPARK-26539.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

Closes #35447 from dongjoon-hyun/SPARK-38144.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../scala/org/apache/spark/internal/config/package.scala     | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 9e6cf341c197b..dbec61a1fdb76 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -364,11 +364,6 @@ package object config {
     .doubleConf
     .createWithDefault(0.6)
 
-  private[spark] val STORAGE_SAFETY_FRACTION = ConfigBuilder("spark.storage.safetyFraction")
-    .version("1.1.0")
-    .doubleConf
-    .createWithDefault(0.9)
-
   private[spark] val STORAGE_UNROLL_MEMORY_THRESHOLD =
     ConfigBuilder("spark.storage.unrollMemoryThreshold")
       .doc("Initial memory to request before unrolling any block")

From 8d2e08ff166d33c99ba40a311fd1310a2b17b98b Mon Sep 17 00:00:00 2001
From: nyingping <smileyingping@163.com>
Date: Tue, 8 Feb 2022 15:39:02 -0800
Subject: [PATCH 182/513] [SPARK-38069][SQL][SS] Improve the calculation of
 time window
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

Remove the CaseWhen，Modified the calculation method of the obtained window

new logic：
`lastStart ` needs to be, which is less than `timestamp ` and is the maximum integer multiple of `windowsize`

`lastStart `is equal to `timestamp `minus the time left in the maximum integer multiple window
`val lastStart = timestamp - (timestamp - window.startTime
            + window.slideDuration) % window.slideDuration`

After getting `lastStart`, `lastEnd `is obvious, and other possible Windows can be computed using `i` and `windowsize`
### Why are the changes needed?

Structed Streaming computes window by intermediate result windowId, and windowId computes window by CaseWhen.

We can use Flink's method of calculating window to write it, which is more easy to understand, simple and efficient

### Does this PR introduce _any_ user-facing change?

NO

### How was this patch tested?

Existing test as this is just refactoring.

Also composed and ran a simple benchmark in this commit: https://github.com/HeartSaVioR/spark/commit/d532b6f6bcdd80cdaac520b21587ebb69ff2df8f

Quoting queries used to benchmark the change:

> tumble window

```
    spark.range(numOfRow)
      .selectExpr("CAST(id AS timestamp) AS time")
      .select(window(col("time"), "12 seconds", "12 seconds", "2 seconds"))
      .count()
```

> sliding window

```
    spark.range(numOfRow)
      .selectExpr("CAST(id AS timestamp) AS time")
      .select(window(col("time"), "17 seconds", "5 seconds", "2 seconds"))
      .count()
```

Results are following:

> tumble window

```
[info] OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws
[info] Intel(R) Xeon(R) Platinum 8259CL CPU  2.50GHz
[info] tumbling windows:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
[info] ------------------------------------------------------------------------------------------------------------------------
[info] old logic                                            22             31          13        457.0           2.2       1.0X
[info] new logic                                            17             19           2        589.9           1.7       1.3X
```

> sliding window

```
[info] OpenJDK 64-Bit Server VM 1.8.0_292-8u292-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1045-aws
[info] Intel(R) Xeon(R) Platinum 8259CL CPU  2.50GHz
[info] sliding windows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
[info] ------------------------------------------------------------------------------------------------------------------------
[info] old logic                                          1347           1368          16          7.4         134.7       1.0X
[info] new logic                                           867            886          16         11.5          86.7       1.6X
```

Closes #35362 from nyingping/main.

Lead-authored-by: nyingping <smileyingping@163.com>
Co-authored-by: Nie yingping <smileyingping@163.com>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 21 ++++++++-----------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 0390131172bb6..ba7c39f9db571 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -3843,8 +3843,8 @@ object TimeWindowing extends Rule[LogicalPlan] {
    * The windows are calculated as below:
    * maxNumOverlapping <- ceil(windowDuration / slideDuration)
    * for (i <- 0 until maxNumOverlapping)
-   *   windowId <- ceil((timestamp - startTime) / slideDuration)
-   *   windowStart <- windowId * slideDuration + (i - maxNumOverlapping) * slideDuration + startTime
+   *   lastStart <- timestamp - (timestamp - startTime + slideDuration) % slideDuration
+   *   windowStart <- lastStart - i * slideDuration
    *   windowEnd <- windowStart + windowDuration
    *   return windowStart, windowEnd
    *
@@ -3884,14 +3884,11 @@ object TimeWindowing extends Rule[LogicalPlan] {
           case _ => Metadata.empty
         }
 
-        def getWindow(i: Int, overlappingWindows: Int, dataType: DataType): Expression = {
-          val division = (PreciseTimestampConversion(
-            window.timeColumn, dataType, LongType) - window.startTime) / window.slideDuration
-          val ceil = Ceil(division)
-          // if the division is equal to the ceiling, our record is the start of a window
-          val windowId = CaseWhen(Seq((ceil === division, ceil + 1)), Some(ceil))
-          val windowStart = (windowId + i - overlappingWindows) *
-            window.slideDuration + window.startTime
+        def getWindow(i: Int, dataType: DataType): Expression = {
+          val timestamp = PreciseTimestampConversion(window.timeColumn, dataType, LongType)
+          val lastStart = timestamp - (timestamp - window.startTime
+            + window.slideDuration) % window.slideDuration
+          val windowStart = lastStart - i * window.slideDuration
           val windowEnd = windowStart + window.windowDuration
 
           CreateNamedStruct(
@@ -3906,7 +3903,7 @@ object TimeWindowing extends Rule[LogicalPlan] {
           WINDOW_COL_NAME, window.dataType, metadata = metadata)()
 
         if (window.windowDuration == window.slideDuration) {
-          val windowStruct = Alias(getWindow(0, 1, window.timeColumn.dataType), WINDOW_COL_NAME)(
+          val windowStruct = Alias(getWindow(0, window.timeColumn.dataType), WINDOW_COL_NAME)(
             exprId = windowAttr.exprId, explicitMetadata = Some(metadata))
 
           val replacedPlan = p transformExpressions {
@@ -3924,7 +3921,7 @@ object TimeWindowing extends Rule[LogicalPlan] {
             math.ceil(window.windowDuration * 1.0 / window.slideDuration).toInt
           val windows =
             Seq.tabulate(overlappingWindows)(i =>
-              getWindow(i, overlappingWindows, window.timeColumn.dataType))
+              getWindow(i, window.timeColumn.dataType))
 
           val projections = windows.map(_ +: child.output)
 

From 2a9416abd5c8d83901c09a08433bf91e649dc43f Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Wed, 9 Feb 2022 01:32:57 +0100
Subject: [PATCH 183/513] [SPARK-37404][PYTHON][ML] Inline type hints for
 pyspark.ml.evaluation.py

### What changes were proposed in this pull request?

This PR migrates type `pyspark.ml.evaluation` annotations from stub file to inline type hints.

### Why are the changes needed?

Part of ongoing migration of type hints.

### Does this PR introduce _any_ user-facing change?

No,

### How was this patch tested?

Existing tests.

Closes #35403 from zero323/SPARK-37404.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/ml/_typing.pyi                 |   2 +
 python/pyspark/ml/evaluation.py               | 349 ++++++++++--------
 python/pyspark/ml/evaluation.pyi              | 277 --------------
 .../ml/tests/typing/test_evaluation.yml       |   2 +
 4 files changed, 207 insertions(+), 423 deletions(-)
 delete mode 100644 python/pyspark/ml/evaluation.pyi

diff --git a/python/pyspark/ml/_typing.pyi b/python/pyspark/ml/_typing.pyi
index 7862078bd2621..12d831f1e8c7e 100644
--- a/python/pyspark/ml/_typing.pyi
+++ b/python/pyspark/ml/_typing.pyi
@@ -71,6 +71,8 @@ MultilabelClassificationEvaluatorMetricType = Union[
     Literal["microF1Measure"],
 ]
 ClusteringEvaluatorMetricType = Literal["silhouette"]
+ClusteringEvaluatorDistanceMeasureType = Union[Literal["squaredEuclidean"], Literal["cosine"]]
+
 RankingEvaluatorMetricType = Union[
     Literal["meanAveragePrecision"],
     Literal["meanAveragePrecisionAtK"],
diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
index be63a8f8ce972..ff0e5b91e424d 100644
--- a/python/pyspark/ml/evaluation.py
+++ b/python/pyspark/ml/evaluation.py
@@ -18,6 +18,8 @@
 import sys
 from abc import abstractmethod, ABCMeta
 
+from typing import Any, Dict, Optional, TYPE_CHECKING
+
 from pyspark import since, keyword_only
 from pyspark.ml.wrapper import JavaParams
 from pyspark.ml.param import Param, Params, TypeConverters
@@ -31,6 +33,20 @@
 )
 from pyspark.ml.common import inherit_doc
 from pyspark.ml.util import JavaMLReadable, JavaMLWritable
+from pyspark.sql.dataframe import DataFrame
+
+if TYPE_CHECKING:
+    from pyspark.ml._typing import (
+        ParamMap,
+        BinaryClassificationEvaluatorMetricType,
+        ClusteringEvaluatorDistanceMeasureType,
+        ClusteringEvaluatorMetricType,
+        MulticlassClassificationEvaluatorMetricType,
+        MultilabelClassificationEvaluatorMetricType,
+        RankingEvaluatorMetricType,
+        RegressionEvaluatorMetricType,
+    )
+
 
 __all__ = [
     "Evaluator",
@@ -54,7 +70,7 @@ class Evaluator(Params, metaclass=ABCMeta):
     pass
 
     @abstractmethod
-    def _evaluate(self, dataset):
+    def _evaluate(self, dataset: DataFrame) -> float:
         """
         Evaluates the output.
 
@@ -70,7 +86,7 @@ def _evaluate(self, dataset):
         """
         raise NotImplementedError()
 
-    def evaluate(self, dataset, params=None):
+    def evaluate(self, dataset: DataFrame, params: Optional["ParamMap"] = None) -> float:
         """
         Evaluates the output with optional parameters.
 
@@ -99,7 +115,7 @@ def evaluate(self, dataset, params=None):
             raise TypeError("Params must be a param map but got %s." % type(params))
 
     @since("1.5.0")
-    def isLargerBetter(self):
+    def isLargerBetter(self) -> bool:
         """
         Indicates whether the metric returned by :py:meth:`evaluate` should be maximized
         (True, default) or minimized (False).
@@ -115,7 +131,7 @@ class JavaEvaluator(JavaParams, Evaluator, metaclass=ABCMeta):
     implementations.
     """
 
-    def _evaluate(self, dataset):
+    def _evaluate(self, dataset: DataFrame) -> float:
         """
         Evaluates the output.
 
@@ -130,16 +146,23 @@ def _evaluate(self, dataset):
             evaluation metric
         """
         self._transfer_params_to_java()
+        assert self._java_obj is not None
         return self._java_obj.evaluate(dataset._jdf)
 
-    def isLargerBetter(self):
+    def isLargerBetter(self) -> bool:
         self._transfer_params_to_java()
+        assert self._java_obj is not None
         return self._java_obj.isLargerBetter()
 
 
 @inherit_doc
 class BinaryClassificationEvaluator(
-    JavaEvaluator, HasLabelCol, HasRawPredictionCol, HasWeightCol, JavaMLReadable, JavaMLWritable
+    JavaEvaluator,
+    HasLabelCol,
+    HasRawPredictionCol,
+    HasWeightCol,
+    JavaMLReadable["BinaryClassificationEvaluator"],
+    JavaMLWritable,
 ):
     """
     Evaluator for binary classification, which expects input columns rawPrediction, label
@@ -182,14 +205,14 @@ class BinaryClassificationEvaluator(
     1000
     """
 
-    metricName = Param(
+    metricName: Param["BinaryClassificationEvaluatorMetricType"] = Param(
         Params._dummy(),
         "metricName",
         "metric name in evaluation (areaUnderROC|areaUnderPR)",
-        typeConverter=TypeConverters.toString,
+        typeConverter=TypeConverters.toString,  # type: ignore[arg-type]
     )
 
-    numBins = Param(
+    numBins: Param[int] = Param(
         Params._dummy(),
         "numBins",
         "Number of bins to down-sample the curves "
@@ -198,15 +221,17 @@ class BinaryClassificationEvaluator(
         typeConverter=TypeConverters.toInt,
     )
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        rawPredictionCol="rawPrediction",
-        labelCol="label",
-        metricName="areaUnderROC",
-        weightCol=None,
-        numBins=1000,
+        rawPredictionCol: str = "rawPrediction",
+        labelCol: str = "label",
+        metricName: "BinaryClassificationEvaluatorMetricType" = "areaUnderROC",
+        weightCol: Optional[str] = None,
+        numBins: int = 1000,
     ):
         """
         __init__(self, \\*, rawPredictionCol="rawPrediction", labelCol="label", \
@@ -221,47 +246,49 @@ def __init__(
         self._set(**kwargs)
 
     @since("1.4.0")
-    def setMetricName(self, value):
+    def setMetricName(
+        self, value: "BinaryClassificationEvaluatorMetricType"
+    ) -> "BinaryClassificationEvaluator":
         """
         Sets the value of :py:attr:`metricName`.
         """
         return self._set(metricName=value)
 
     @since("1.4.0")
-    def getMetricName(self):
+    def getMetricName(self) -> str:
         """
         Gets the value of metricName or its default value.
         """
         return self.getOrDefault(self.metricName)
 
     @since("3.0.0")
-    def setNumBins(self, value):
+    def setNumBins(self, value: int) -> "BinaryClassificationEvaluator":
         """
         Sets the value of :py:attr:`numBins`.
         """
         return self._set(numBins=value)
 
     @since("3.0.0")
-    def getNumBins(self):
+    def getNumBins(self) -> int:
         """
         Gets the value of numBins or its default value.
         """
         return self.getOrDefault(self.numBins)
 
-    def setLabelCol(self, value):
+    def setLabelCol(self, value: str) -> "BinaryClassificationEvaluator":
         """
         Sets the value of :py:attr:`labelCol`.
         """
         return self._set(labelCol=value)
 
-    def setRawPredictionCol(self, value):
+    def setRawPredictionCol(self, value: str) -> "BinaryClassificationEvaluator":
         """
         Sets the value of :py:attr:`rawPredictionCol`.
         """
         return self._set(rawPredictionCol=value)
 
     @since("3.0.0")
-    def setWeightCol(self, value):
+    def setWeightCol(self, value: str) -> "BinaryClassificationEvaluator":
         """
         Sets the value of :py:attr:`weightCol`.
         """
@@ -272,12 +299,12 @@ def setWeightCol(self, value):
     def setParams(
         self,
         *,
-        rawPredictionCol="rawPrediction",
-        labelCol="label",
-        metricName="areaUnderROC",
-        weightCol=None,
-        numBins=1000,
-    ):
+        rawPredictionCol: str = "rawPrediction",
+        labelCol: str = "label",
+        metricName: "BinaryClassificationEvaluatorMetricType" = "areaUnderROC",
+        weightCol: Optional[str] = None,
+        numBins: int = 1000,
+    ) -> "BinaryClassificationEvaluator":
         """
         setParams(self, \\*, rawPredictionCol="rawPrediction", labelCol="label", \
                   metricName="areaUnderROC", weightCol=None, numBins=1000)
@@ -289,7 +316,12 @@ def setParams(
 
 @inherit_doc
 class RegressionEvaluator(
-    JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeightCol, JavaMLReadable, JavaMLWritable
+    JavaEvaluator,
+    HasLabelCol,
+    HasPredictionCol,
+    HasWeightCol,
+    JavaMLReadable["RegressionEvaluator"],
+    JavaMLWritable,
 ):
     """
     Evaluator for Regression, which expects input columns prediction, label
@@ -328,7 +360,7 @@ class RegressionEvaluator(
     False
     """
 
-    metricName = Param(
+    metricName: Param["RegressionEvaluatorMetricType"] = Param(
         Params._dummy(),
         "metricName",
         """metric name in evaluation - one of:
@@ -337,25 +369,27 @@ class RegressionEvaluator(
                        r2 - r^2 metric
                        mae - mean absolute error
                        var - explained variance.""",
-        typeConverter=TypeConverters.toString,
+        typeConverter=TypeConverters.toString,  # type: ignore[arg-type]
     )
 
-    throughOrigin = Param(
+    throughOrigin: Param[bool] = Param(
         Params._dummy(),
         "throughOrigin",
         "whether the regression is through the origin.",
         typeConverter=TypeConverters.toBoolean,
     )
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        predictionCol="prediction",
-        labelCol="label",
-        metricName="rmse",
-        weightCol=None,
-        throughOrigin=False,
+        predictionCol: str = "prediction",
+        labelCol: str = "label",
+        metricName: "RegressionEvaluatorMetricType" = "rmse",
+        weightCol: Optional[str] = None,
+        throughOrigin: bool = False,
     ):
         """
         __init__(self, \\*, predictionCol="prediction", labelCol="label", \
@@ -370,47 +404,47 @@ def __init__(
         self._set(**kwargs)
 
     @since("1.4.0")
-    def setMetricName(self, value):
+    def setMetricName(self, value: "RegressionEvaluatorMetricType") -> "RegressionEvaluator":
         """
         Sets the value of :py:attr:`metricName`.
         """
         return self._set(metricName=value)
 
     @since("1.4.0")
-    def getMetricName(self):
+    def getMetricName(self) -> "RegressionEvaluatorMetricType":
         """
         Gets the value of metricName or its default value.
         """
         return self.getOrDefault(self.metricName)
 
     @since("3.0.0")
-    def setThroughOrigin(self, value):
+    def setThroughOrigin(self, value: bool) -> "RegressionEvaluator":
         """
         Sets the value of :py:attr:`throughOrigin`.
         """
         return self._set(throughOrigin=value)
 
     @since("3.0.0")
-    def getThroughOrigin(self):
+    def getThroughOrigin(self) -> bool:
         """
         Gets the value of throughOrigin or its default value.
         """
         return self.getOrDefault(self.throughOrigin)
 
-    def setLabelCol(self, value):
+    def setLabelCol(self, value: str) -> "RegressionEvaluator":
         """
         Sets the value of :py:attr:`labelCol`.
         """
         return self._set(labelCol=value)
 
-    def setPredictionCol(self, value):
+    def setPredictionCol(self, value: str) -> "RegressionEvaluator":
         """
         Sets the value of :py:attr:`predictionCol`.
         """
         return self._set(predictionCol=value)
 
     @since("3.0.0")
-    def setWeightCol(self, value):
+    def setWeightCol(self, value: str) -> "RegressionEvaluator":
         """
         Sets the value of :py:attr:`weightCol`.
         """
@@ -421,12 +455,12 @@ def setWeightCol(self, value):
     def setParams(
         self,
         *,
-        predictionCol="prediction",
-        labelCol="label",
-        metricName="rmse",
-        weightCol=None,
-        throughOrigin=False,
-    ):
+        predictionCol: str = "prediction",
+        labelCol: str = "label",
+        metricName: "RegressionEvaluatorMetricType" = "rmse",
+        weightCol: Optional[str] = None,
+        throughOrigin: bool = False,
+    ) -> "RegressionEvaluator":
         """
         setParams(self, \\*, predictionCol="prediction", labelCol="label", \
                   metricName="rmse", weightCol=None, throughOrigin=False)
@@ -443,7 +477,7 @@ class MulticlassClassificationEvaluator(
     HasPredictionCol,
     HasWeightCol,
     HasProbabilityCol,
-    JavaMLReadable,
+    JavaMLReadable["MulticlassClassificationEvaluator"],
     JavaMLWritable,
 ):
     """
@@ -499,7 +533,7 @@ class MulticlassClassificationEvaluator(
     0.9682...
     """
 
-    metricName = Param(
+    metricName: Param["MulticlassClassificationEvaluatorMetricType"] = Param(
         Params._dummy(),
         "metricName",
         "metric name in evaluation "
@@ -507,9 +541,9 @@ class MulticlassClassificationEvaluator(
         "weightedFalsePositiveRate|weightedFMeasure|truePositiveRateByLabel| "
         "falsePositiveRateByLabel|precisionByLabel|recallByLabel|fMeasureByLabel| "
         "logLoss|hammingLoss)",
-        typeConverter=TypeConverters.toString,
+        typeConverter=TypeConverters.toString,  # type: ignore[arg-type]
     )
-    metricLabel = Param(
+    metricLabel: Param[float] = Param(
         Params._dummy(),
         "metricLabel",
         "The class whose metric will be computed in truePositiveRateByLabel|"
@@ -517,14 +551,14 @@ class MulticlassClassificationEvaluator(
         " Must be >= 0. The default value is 0.",
         typeConverter=TypeConverters.toFloat,
     )
-    beta = Param(
+    beta: Param[float] = Param(
         Params._dummy(),
         "beta",
         "The beta value used in weightedFMeasure|fMeasureByLabel."
         " Must be > 0. The default value is 1.",
         typeConverter=TypeConverters.toFloat,
     )
-    eps = Param(
+    eps: Param[float] = Param(
         Params._dummy(),
         "eps",
         "log-loss is undefined for p=0 or p=1, so probabilities are clipped to "
@@ -533,18 +567,20 @@ class MulticlassClassificationEvaluator(
         typeConverter=TypeConverters.toFloat,
     )
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        predictionCol="prediction",
-        labelCol="label",
-        metricName="f1",
-        weightCol=None,
-        metricLabel=0.0,
-        beta=1.0,
-        probabilityCol="probability",
-        eps=1e-15,
+        predictionCol: str = "prediction",
+        labelCol: str = "label",
+        metricName: "MulticlassClassificationEvaluatorMetricType" = "f1",
+        weightCol: Optional[str] = None,
+        metricLabel: float = 0.0,
+        beta: float = 1.0,
+        probabilityCol: str = "probability",
+        eps: float = 1e-15,
     ):
         """
         __init__(self, \\*, predictionCol="prediction", labelCol="label", \
@@ -560,82 +596,84 @@ def __init__(
         self._set(**kwargs)
 
     @since("1.5.0")
-    def setMetricName(self, value):
+    def setMetricName(
+        self, value: "MulticlassClassificationEvaluatorMetricType"
+    ) -> "MulticlassClassificationEvaluator":
         """
         Sets the value of :py:attr:`metricName`.
         """
         return self._set(metricName=value)
 
     @since("1.5.0")
-    def getMetricName(self):
+    def getMetricName(self) -> "MulticlassClassificationEvaluatorMetricType":
         """
         Gets the value of metricName or its default value.
         """
         return self.getOrDefault(self.metricName)
 
     @since("3.0.0")
-    def setMetricLabel(self, value):
+    def setMetricLabel(self, value: float) -> "MulticlassClassificationEvaluator":
         """
         Sets the value of :py:attr:`metricLabel`.
         """
         return self._set(metricLabel=value)
 
     @since("3.0.0")
-    def getMetricLabel(self):
+    def getMetricLabel(self) -> float:
         """
         Gets the value of metricLabel or its default value.
         """
         return self.getOrDefault(self.metricLabel)
 
     @since("3.0.0")
-    def setBeta(self, value):
+    def setBeta(self, value: float) -> "MulticlassClassificationEvaluator":
         """
         Sets the value of :py:attr:`beta`.
         """
         return self._set(beta=value)
 
     @since("3.0.0")
-    def getBeta(self):
+    def getBeta(self) -> float:
         """
         Gets the value of beta or its default value.
         """
         return self.getOrDefault(self.beta)
 
     @since("3.0.0")
-    def setEps(self, value):
+    def setEps(self, value: float) -> "MulticlassClassificationEvaluator":
         """
         Sets the value of :py:attr:`eps`.
         """
         return self._set(eps=value)
 
     @since("3.0.0")
-    def getEps(self):
+    def getEps(self) -> float:
         """
         Gets the value of eps or its default value.
         """
         return self.getOrDefault(self.eps)
 
-    def setLabelCol(self, value):
+    def setLabelCol(self, value: str) -> "MulticlassClassificationEvaluator":
         """
         Sets the value of :py:attr:`labelCol`.
         """
         return self._set(labelCol=value)
 
-    def setPredictionCol(self, value):
+    def setPredictionCol(self, value: str) -> "MulticlassClassificationEvaluator":
         """
         Sets the value of :py:attr:`predictionCol`.
         """
         return self._set(predictionCol=value)
 
     @since("3.0.0")
-    def setProbabilityCol(self, value):
+    def setProbabilityCol(self, value: str) -> "MulticlassClassificationEvaluator":
         """
         Sets the value of :py:attr:`probabilityCol`.
         """
         return self._set(probabilityCol=value)
 
     @since("3.0.0")
-    def setWeightCol(self, value):
+    def setWeightCol(self, value: str) -> "MulticlassClassificationEvaluator":
         """
         Sets the value of :py:attr:`weightCol`.
         """
@@ -646,15 +684,15 @@ def setWeightCol(self, value):
     def setParams(
         self,
         *,
-        predictionCol="prediction",
-        labelCol="label",
-        metricName="f1",
-        weightCol=None,
-        metricLabel=0.0,
-        beta=1.0,
-        probabilityCol="probability",
-        eps=1e-15,
-    ):
+        predictionCol: str = "prediction",
+        labelCol: str = "label",
+        metricName: "MulticlassClassificationEvaluatorMetricType" = "f1",
+        weightCol: Optional[str] = None,
+        metricLabel: float = 0.0,
+        beta: float = 1.0,
+        probabilityCol: str = "probability",
+        eps: float = 1e-15,
+    ) -> "MulticlassClassificationEvaluator":
         """
         setParams(self, \\*, predictionCol="prediction", labelCol="label", \
                   metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0, \
@@ -667,7 +705,11 @@ def setParams(
 
 @inherit_doc
 class MultilabelClassificationEvaluator(
-    JavaEvaluator, HasLabelCol, HasPredictionCol, JavaMLReadable, JavaMLWritable
+    JavaEvaluator,
+    HasLabelCol,
+    HasPredictionCol,
+    JavaMLReadable["MultilabelClassificationEvaluator"],
+    JavaMLWritable,
 ):
     """
     Evaluator for Multilabel Classification, which expects two input
@@ -700,16 +742,16 @@ class MultilabelClassificationEvaluator(
     'prediction'
     """
 
-    metricName = Param(
+    metricName: Param["MultilabelClassificationEvaluatorMetricType"] = Param(
         Params._dummy(),
         "metricName",
         "metric name in evaluation "
         "(subsetAccuracy|accuracy|hammingLoss|precision|recall|f1Measure|"
         "precisionByLabel|recallByLabel|f1MeasureByLabel|microPrecision|"
         "microRecall|microF1Measure)",
-        typeConverter=TypeConverters.toString,
+        typeConverter=TypeConverters.toString,  # type: ignore[arg-type]
     )
-    metricLabel = Param(
+    metricLabel: Param[float] = Param(
         Params._dummy(),
         "metricLabel",
         "The class whose metric will be computed in precisionByLabel|"
@@ -718,15 +760,17 @@ class MultilabelClassificationEvaluator(
         typeConverter=TypeConverters.toFloat,
     )
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        predictionCol="prediction",
-        labelCol="label",
-        metricName="f1Measure",
-        metricLabel=0.0,
-    ):
+        predictionCol: str = "prediction",
+        labelCol: str = "label",
+        metricName: "MultilabelClassificationEvaluatorMetricType" = "f1Measure",
+        metricLabel: float = 0.0,
+    ) -> None:
         """
         __init__(self, \\*, predictionCol="prediction", labelCol="label", \
                  metricName="f1Measure", metricLabel=0.0)
@@ -740,42 +784,44 @@ def __init__(
         self._set(**kwargs)
 
     @since("3.0.0")
-    def setMetricName(self, value):
+    def setMetricName(
+        self, value: "MultilabelClassificationEvaluatorMetricType"
+    ) -> "MultilabelClassificationEvaluator":
         """
         Sets the value of :py:attr:`metricName`.
         """
         return self._set(metricName=value)
 
     @since("3.0.0")
-    def getMetricName(self):
+    def getMetricName(self) -> "MultilabelClassificationEvaluatorMetricType":
         """
         Gets the value of metricName or its default value.
         """
         return self.getOrDefault(self.metricName)
 
     @since("3.0.0")
-    def setMetricLabel(self, value):
+    def setMetricLabel(self, value: float) -> "MultilabelClassificationEvaluator":
         """
         Sets the value of :py:attr:`metricLabel`.
         """
         return self._set(metricLabel=value)
 
     @since("3.0.0")
-    def getMetricLabel(self):
+    def getMetricLabel(self) -> float:
         """
         Gets the value of metricLabel or its default value.
         """
         return self.getOrDefault(self.metricLabel)
 
     @since("3.0.0")
-    def setLabelCol(self, value):
+    def setLabelCol(self, value: str) -> "MultilabelClassificationEvaluator":
         """
         Sets the value of :py:attr:`labelCol`.
         """
         return self._set(labelCol=value)
 
     @since("3.0.0")
-    def setPredictionCol(self, value):
+    def setPredictionCol(self, value: str) -> "MultilabelClassificationEvaluator":
         """
         Sets the value of :py:attr:`predictionCol`.
         """
@@ -786,11 +832,11 @@ def setPredictionCol(self, value):
     def setParams(
         self,
         *,
-        predictionCol="prediction",
-        labelCol="label",
-        metricName="f1Measure",
-        metricLabel=0.0,
-    ):
+        predictionCol: str = "prediction",
+        labelCol: str = "label",
+        metricName: "MultilabelClassificationEvaluatorMetricType" = "f1Measure",
+        metricLabel: float = 0.0,
+    ) -> "MultilabelClassificationEvaluator":
         """
         setParams(self, \\*, predictionCol="prediction", labelCol="label", \
                   metricName="f1Measure", metricLabel=0.0)
@@ -802,7 +848,12 @@ def setParams(
 
 @inherit_doc
 class ClusteringEvaluator(
-    JavaEvaluator, HasPredictionCol, HasFeaturesCol, HasWeightCol, JavaMLReadable, JavaMLWritable
+    JavaEvaluator,
+    HasPredictionCol,
+    HasFeaturesCol,
+    HasWeightCol,
+    JavaMLReadable["ClusteringEvaluator"],
+    JavaMLWritable,
 ):
     """
     Evaluator for Clustering results, which expects two input
@@ -848,28 +899,30 @@ class ClusteringEvaluator(
     'prediction'
     """
 
-    metricName = Param(
+    metricName: Param["ClusteringEvaluatorMetricType"] = Param(
         Params._dummy(),
         "metricName",
         "metric name in evaluation (silhouette)",
-        typeConverter=TypeConverters.toString,
+        typeConverter=TypeConverters.toString,  # type: ignore[arg-type]
     )
-    distanceMeasure = Param(
+    distanceMeasure: Param["ClusteringEvaluatorDistanceMeasureType"] = Param(
         Params._dummy(),
         "distanceMeasure",
         "The distance measure. " + "Supported options: 'squaredEuclidean' and 'cosine'.",
-        typeConverter=TypeConverters.toString,
+        typeConverter=TypeConverters.toString,  # type: ignore[arg-type]
     )
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        predictionCol="prediction",
-        featuresCol="features",
-        metricName="silhouette",
-        distanceMeasure="squaredEuclidean",
-        weightCol=None,
+        predictionCol: str = "prediction",
+        featuresCol: str = "features",
+        metricName: "ClusteringEvaluatorMetricType" = "silhouette",
+        distanceMeasure: str = "squaredEuclidean",
+        weightCol: Optional[str] = None,
     ):
         """
         __init__(self, \\*, predictionCol="prediction", featuresCol="features", \
@@ -888,12 +941,12 @@ def __init__(
     def setParams(
         self,
         *,
-        predictionCol="prediction",
-        featuresCol="features",
-        metricName="silhouette",
-        distanceMeasure="squaredEuclidean",
-        weightCol=None,
-    ):
+        predictionCol: str = "prediction",
+        featuresCol: str = "features",
+        metricName: "ClusteringEvaluatorMetricType" = "silhouette",
+        distanceMeasure: str = "squaredEuclidean",
+        weightCol: Optional[str] = None,
+    ) -> "ClusteringEvaluator":
         """
         setParams(self, \\*, predictionCol="prediction", featuresCol="features", \
                   metricName="silhouette", distanceMeasure="squaredEuclidean", weightCol=None)
@@ -903,47 +956,49 @@ def setParams(
         return self._set(**kwargs)
 
     @since("2.3.0")
-    def setMetricName(self, value):
+    def setMetricName(self, value: "ClusteringEvaluatorMetricType") -> "ClusteringEvaluator":
         """
         Sets the value of :py:attr:`metricName`.
         """
         return self._set(metricName=value)
 
     @since("2.3.0")
-    def getMetricName(self):
+    def getMetricName(self) -> "ClusteringEvaluatorMetricType":
         """
         Gets the value of metricName or its default value.
         """
         return self.getOrDefault(self.metricName)
 
     @since("2.4.0")
-    def setDistanceMeasure(self, value):
+    def setDistanceMeasure(
+        self, value: "ClusteringEvaluatorDistanceMeasureType"
+    ) -> "ClusteringEvaluator":
         """
         Sets the value of :py:attr:`distanceMeasure`.
         """
         return self._set(distanceMeasure=value)
 
     @since("2.4.0")
-    def getDistanceMeasure(self):
+    def getDistanceMeasure(self) -> "ClusteringEvaluatorDistanceMeasureType":
         """
         Gets the value of `distanceMeasure`
         """
         return self.getOrDefault(self.distanceMeasure)
 
-    def setFeaturesCol(self, value):
+    def setFeaturesCol(self, value: "str") -> "ClusteringEvaluator":
         """
         Sets the value of :py:attr:`featuresCol`.
         """
         return self._set(featuresCol=value)
 
-    def setPredictionCol(self, value):
+    def setPredictionCol(self, value: str) -> "ClusteringEvaluator":
         """
         Sets the value of :py:attr:`predictionCol`.
         """
         return self._set(predictionCol=value)
 
     @since("3.1.0")
-    def setWeightCol(self, value):
+    def setWeightCol(self, value: str) -> "ClusteringEvaluator":
         """
         Sets the value of :py:attr:`weightCol`.
         """
@@ -952,7 +1007,7 @@ def setWeightCol(self, value):
 
 @inherit_doc
 class RankingEvaluator(
-    JavaEvaluator, HasLabelCol, HasPredictionCol, JavaMLReadable, JavaMLWritable
+    JavaEvaluator, HasLabelCol, HasPredictionCol, JavaMLReadable["RankingEvaluator"], JavaMLWritable
 ):
     """
     Evaluator for Ranking, which expects two input
@@ -986,15 +1041,15 @@ class RankingEvaluator(
     'prediction'
     """
 
-    metricName = Param(
+    metricName: Param["RankingEvaluatorMetricType"] = Param(
         Params._dummy(),
         "metricName",
         "metric name in evaluation "
         "(meanAveragePrecision|meanAveragePrecisionAtK|"
         "precisionAtK|ndcgAtK|recallAtK)",
-        typeConverter=TypeConverters.toString,
+        typeConverter=TypeConverters.toString,  # type: ignore[arg-type]
     )
-    k = Param(
+    k: Param[int] = Param(
         Params._dummy(),
         "k",
         "The ranking position value used in meanAveragePrecisionAtK|precisionAtK|"
@@ -1002,14 +1057,16 @@ class RankingEvaluator(
         typeConverter=TypeConverters.toInt,
     )
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        predictionCol="prediction",
-        labelCol="label",
-        metricName="meanAveragePrecision",
-        k=10,
+        predictionCol: str = "prediction",
+        labelCol: str = "label",
+        metricName: "RankingEvaluatorMetricType" = "meanAveragePrecision",
+        k: int = 10,
     ):
         """
         __init__(self, \\*, predictionCol="prediction", labelCol="label", \
@@ -1024,42 +1081,42 @@ def __init__(
         self._set(**kwargs)
 
     @since("3.0.0")
-    def setMetricName(self, value):
+    def setMetricName(self, value: "RankingEvaluatorMetricType") -> "RankingEvaluator":
         """
         Sets the value of :py:attr:`metricName`.
         """
         return self._set(metricName=value)
 
     @since("3.0.0")
-    def getMetricName(self):
+    def getMetricName(self) -> "RankingEvaluatorMetricType":
         """
         Gets the value of metricName or its default value.
         """
         return self.getOrDefault(self.metricName)
 
     @since("3.0.0")
-    def setK(self, value):
+    def setK(self, value: int) -> "RankingEvaluator":
         """
         Sets the value of :py:attr:`k`.
         """
         return self._set(k=value)
 
     @since("3.0.0")
-    def getK(self):
+    def getK(self) -> int:
         """
         Gets the value of k or its default value.
         """
         return self.getOrDefault(self.k)
 
     @since("3.0.0")
-    def setLabelCol(self, value):
+    def setLabelCol(self, value: str) -> "RankingEvaluator":
         """
         Sets the value of :py:attr:`labelCol`.
         """
         return self._set(labelCol=value)
 
     @since("3.0.0")
-    def setPredictionCol(self, value):
+    def setPredictionCol(self, value: str) -> "RankingEvaluator":
         """
         Sets the value of :py:attr:`predictionCol`.
         """
@@ -1070,11 +1127,11 @@ def setPredictionCol(self, value):
     def setParams(
         self,
         *,
-        predictionCol="prediction",
-        labelCol="label",
-        metricName="meanAveragePrecision",
-        k=10,
-    ):
+        predictionCol: str = "prediction",
+        labelCol: str = "label",
+        metricName: "RankingEvaluatorMetricType" = "meanAveragePrecision",
+        k: int = 10,
+    ) -> "RankingEvaluator":
         """
         setParams(self, \\*, predictionCol="prediction", labelCol="label", \
                   metricName="meanAveragePrecision", k=10)
diff --git a/python/pyspark/ml/evaluation.pyi b/python/pyspark/ml/evaluation.pyi
deleted file mode 100644
index d7883f4e1b1aa..0000000000000
--- a/python/pyspark/ml/evaluation.pyi
+++ /dev/null
@@ -1,277 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import abc
-from typing import Optional
-from pyspark.ml._typing import (
-    ParamMap,
-    BinaryClassificationEvaluatorMetricType,
-    ClusteringEvaluatorMetricType,
-    MulticlassClassificationEvaluatorMetricType,
-    MultilabelClassificationEvaluatorMetricType,
-    RankingEvaluatorMetricType,
-    RegressionEvaluatorMetricType,
-)
-
-from pyspark.ml.wrapper import JavaParams
-from pyspark.ml.param import Param, Params
-from pyspark.ml.param.shared import (
-    HasFeaturesCol,
-    HasLabelCol,
-    HasPredictionCol,
-    HasProbabilityCol,
-    HasRawPredictionCol,
-    HasWeightCol,
-)
-from pyspark.ml.util import JavaMLReadable, JavaMLWritable
-from pyspark.sql.dataframe import DataFrame
-
-class Evaluator(Params, metaclass=abc.ABCMeta):
-    def evaluate(self, dataset: DataFrame, params: Optional[ParamMap] = ...) -> float: ...
-    def isLargerBetter(self) -> bool: ...
-
-class JavaEvaluator(JavaParams, Evaluator, metaclass=abc.ABCMeta):
-    def isLargerBetter(self) -> bool: ...
-
-class BinaryClassificationEvaluator(
-    JavaEvaluator,
-    HasLabelCol,
-    HasRawPredictionCol,
-    HasWeightCol,
-    JavaMLReadable[BinaryClassificationEvaluator],
-    JavaMLWritable,
-):
-    metricName: Param[BinaryClassificationEvaluatorMetricType]
-    numBins: Param[int]
-    def __init__(
-        self,
-        *,
-        rawPredictionCol: str = ...,
-        labelCol: str = ...,
-        metricName: BinaryClassificationEvaluatorMetricType = ...,
-        weightCol: Optional[str] = ...,
-        numBins: int = ...,
-    ) -> None: ...
-    def setMetricName(
-        self, value: BinaryClassificationEvaluatorMetricType
-    ) -> BinaryClassificationEvaluator: ...
-    def getMetricName(self) -> BinaryClassificationEvaluatorMetricType: ...
-    def setNumBins(self, value: int) -> BinaryClassificationEvaluator: ...
-    def getNumBins(self) -> int: ...
-    def setLabelCol(self, value: str) -> BinaryClassificationEvaluator: ...
-    def setRawPredictionCol(self, value: str) -> BinaryClassificationEvaluator: ...
-    def setWeightCol(self, value: str) -> BinaryClassificationEvaluator: ...
-    def setParams(
-        self,
-        *,
-        rawPredictionCol: str = ...,
-        labelCol: str = ...,
-        metricName: BinaryClassificationEvaluatorMetricType = ...,
-        weightCol: Optional[str] = ...,
-        numBins: int = ...,
-    ) -> BinaryClassificationEvaluator: ...
-
-class RegressionEvaluator(
-    JavaEvaluator,
-    HasLabelCol,
-    HasPredictionCol,
-    HasWeightCol,
-    JavaMLReadable[RegressionEvaluator],
-    JavaMLWritable,
-):
-    metricName: Param[RegressionEvaluatorMetricType]
-    throughOrigin: Param[bool]
-    def __init__(
-        self,
-        *,
-        predictionCol: str = ...,
-        labelCol: str = ...,
-        metricName: RegressionEvaluatorMetricType = ...,
-        weightCol: Optional[str] = ...,
-        throughOrigin: bool = ...,
-    ) -> None: ...
-    def setMetricName(self, value: RegressionEvaluatorMetricType) -> RegressionEvaluator: ...
-    def getMetricName(self) -> RegressionEvaluatorMetricType: ...
-    def setThroughOrigin(self, value: bool) -> RegressionEvaluator: ...
-    def getThroughOrigin(self) -> bool: ...
-    def setLabelCol(self, value: str) -> RegressionEvaluator: ...
-    def setPredictionCol(self, value: str) -> RegressionEvaluator: ...
-    def setWeightCol(self, value: str) -> RegressionEvaluator: ...
-    def setParams(
-        self,
-        *,
-        predictionCol: str = ...,
-        labelCol: str = ...,
-        metricName: RegressionEvaluatorMetricType = ...,
-        weightCol: Optional[str] = ...,
-        throughOrigin: bool = ...,
-    ) -> RegressionEvaluator: ...
-
-class MulticlassClassificationEvaluator(
-    JavaEvaluator,
-    HasLabelCol,
-    HasPredictionCol,
-    HasWeightCol,
-    HasProbabilityCol,
-    JavaMLReadable[MulticlassClassificationEvaluator],
-    JavaMLWritable,
-):
-    metricName: Param[MulticlassClassificationEvaluatorMetricType]
-    metricLabel: Param[float]
-    beta: Param[float]
-    eps: Param[float]
-    def __init__(
-        self,
-        *,
-        predictionCol: str = ...,
-        labelCol: str = ...,
-        metricName: MulticlassClassificationEvaluatorMetricType = ...,
-        weightCol: Optional[str] = ...,
-        metricLabel: float = ...,
-        beta: float = ...,
-        probabilityCol: str = ...,
-        eps: float = ...,
-    ) -> None: ...
-    def setMetricName(
-        self, value: MulticlassClassificationEvaluatorMetricType
-    ) -> MulticlassClassificationEvaluator: ...
-    def getMetricName(self) -> MulticlassClassificationEvaluatorMetricType: ...
-    def setMetricLabel(self, value: float) -> MulticlassClassificationEvaluator: ...
-    def getMetricLabel(self) -> float: ...
-    def setBeta(self, value: float) -> MulticlassClassificationEvaluator: ...
-    def getBeta(self) -> float: ...
-    def setEps(self, value: float) -> MulticlassClassificationEvaluator: ...
-    def getEps(self) -> float: ...
-    def setLabelCol(self, value: str) -> MulticlassClassificationEvaluator: ...
-    def setPredictionCol(self, value: str) -> MulticlassClassificationEvaluator: ...
-    def setProbabilityCol(self, value: str) -> MulticlassClassificationEvaluator: ...
-    def setWeightCol(self, value: str) -> MulticlassClassificationEvaluator: ...
-    def setParams(
-        self,
-        *,
-        predictionCol: str = ...,
-        labelCol: str = ...,
-        metricName: MulticlassClassificationEvaluatorMetricType = ...,
-        weightCol: Optional[str] = ...,
-        metricLabel: float = ...,
-        beta: float = ...,
-        probabilityCol: str = ...,
-        eps: float = ...,
-    ) -> MulticlassClassificationEvaluator: ...
-
-class MultilabelClassificationEvaluator(
-    JavaEvaluator,
-    HasLabelCol,
-    HasPredictionCol,
-    JavaMLReadable[MultilabelClassificationEvaluator],
-    JavaMLWritable,
-):
-    metricName: Param[MultilabelClassificationEvaluatorMetricType]
-    metricLabel: Param[float]
-    def __init__(
-        self,
-        *,
-        predictionCol: str = ...,
-        labelCol: str = ...,
-        metricName: MultilabelClassificationEvaluatorMetricType = ...,
-        metricLabel: float = ...,
-    ) -> None: ...
-    def setMetricName(
-        self, value: MultilabelClassificationEvaluatorMetricType
-    ) -> MultilabelClassificationEvaluator: ...
-    def getMetricName(self) -> MultilabelClassificationEvaluatorMetricType: ...
-    def setMetricLabel(self, value: float) -> MultilabelClassificationEvaluator: ...
-    def getMetricLabel(self) -> float: ...
-    def setLabelCol(self, value: str) -> MultilabelClassificationEvaluator: ...
-    def setPredictionCol(self, value: str) -> MultilabelClassificationEvaluator: ...
-    def setParams(
-        self,
-        *,
-        predictionCol: str = ...,
-        labelCol: str = ...,
-        metricName: MultilabelClassificationEvaluatorMetricType = ...,
-        metricLabel: float = ...,
-    ) -> MultilabelClassificationEvaluator: ...
-
-class ClusteringEvaluator(
-    JavaEvaluator,
-    HasPredictionCol,
-    HasFeaturesCol,
-    HasWeightCol,
-    JavaMLReadable[ClusteringEvaluator],
-    JavaMLWritable,
-):
-    metricName: Param[ClusteringEvaluatorMetricType]
-    distanceMeasure: Param[str]
-    def __init__(
-        self,
-        *,
-        predictionCol: str = ...,
-        featuresCol: str = ...,
-        metricName: ClusteringEvaluatorMetricType = ...,
-        distanceMeasure: str = ...,
-        weightCol: Optional[str] = ...,
-    ) -> None: ...
-    def setParams(
-        self,
-        *,
-        predictionCol: str = ...,
-        featuresCol: str = ...,
-        metricName: ClusteringEvaluatorMetricType = ...,
-        distanceMeasure: str = ...,
-        weightCol: Optional[str] = ...,
-    ) -> ClusteringEvaluator: ...
-    def setMetricName(self, value: ClusteringEvaluatorMetricType) -> ClusteringEvaluator: ...
-    def getMetricName(self) -> ClusteringEvaluatorMetricType: ...
-    def setDistanceMeasure(self, value: str) -> ClusteringEvaluator: ...
-    def getDistanceMeasure(self) -> str: ...
-    def setFeaturesCol(self, value: str) -> ClusteringEvaluator: ...
-    def setPredictionCol(self, value: str) -> ClusteringEvaluator: ...
-    def setWeightCol(self, value: str) -> ClusteringEvaluator: ...
-
-class RankingEvaluator(
-    JavaEvaluator,
-    HasLabelCol,
-    HasPredictionCol,
-    JavaMLReadable[RankingEvaluator],
-    JavaMLWritable,
-):
-    metricName: Param[RankingEvaluatorMetricType]
-    k: Param[int]
-    def __init__(
-        self,
-        *,
-        predictionCol: str = ...,
-        labelCol: str = ...,
-        metricName: RankingEvaluatorMetricType = ...,
-        k: int = ...,
-    ) -> None: ...
-    def setMetricName(self, value: RankingEvaluatorMetricType) -> RankingEvaluator: ...
-    def getMetricName(self) -> RankingEvaluatorMetricType: ...
-    def setK(self, value: int) -> RankingEvaluator: ...
-    def getK(self) -> int: ...
-    def setLabelCol(self, value: str) -> RankingEvaluator: ...
-    def setPredictionCol(self, value: str) -> RankingEvaluator: ...
-    def setParams(
-        self,
-        *,
-        predictionCol: str = ...,
-        labelCol: str = ...,
-        metricName: RankingEvaluatorMetricType = ...,
-        k: int = ...,
-    ) -> RankingEvaluator: ...
diff --git a/python/pyspark/ml/tests/typing/test_evaluation.yml b/python/pyspark/ml/tests/typing/test_evaluation.yml
index e9e8f20570b45..a60166dfb96fd 100644
--- a/python/pyspark/ml/tests/typing/test_evaluation.yml
+++ b/python/pyspark/ml/tests/typing/test_evaluation.yml
@@ -24,3 +24,5 @@
 
     BinaryClassificationEvaluator().setMetricName("foo")  # E: Argument 1 to "setMetricName" of "BinaryClassificationEvaluator" has incompatible type "Literal['foo']"; expected "Union[Literal['areaUnderROC'], Literal['areaUnderPR']]"  [arg-type]
     BinaryClassificationEvaluator(metricName="bar")  # E: Argument "metricName" to "BinaryClassificationEvaluator" has incompatible type "Literal['bar']"; expected "Union[Literal['areaUnderROC'], Literal['areaUnderPR']]"  [arg-type]
+
+    reveal_type(BinaryClassificationEvaluator.load("foo"))  # N: Revealed type is "pyspark.ml.evaluation.BinaryClassificationEvaluator*"

From 0af4fc8ea837d9d7f7f023e2eb3b9807fb073db1 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Wed, 9 Feb 2022 03:16:20 +0100
Subject: [PATCH 184/513] [SPARK-37414][PYTHON][ML] Inline type hints for
 pyspark.ml.tuning

### What changes were proposed in this pull request?

This PR migrates type `pyspark.ml.tuning` annotations from stub file to inline type hints.

### Why are the changes needed?

Part of ongoing migration of type hints.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #35406 from zero323/SPARK-37414.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/ml/base.py           |   3 +-
 python/pyspark/ml/param/__init__.py |   2 +-
 python/pyspark/ml/tuning.py         | 476 +++++++++++++++++-----------
 python/pyspark/ml/tuning.pyi        | 228 -------------
 python/pyspark/ml/util.py           |   7 +-
 5 files changed, 302 insertions(+), 414 deletions(-)
 delete mode 100644 python/pyspark/ml/tuning.pyi

diff --git a/python/pyspark/ml/base.py b/python/pyspark/ml/base.py
index 9e8252d321a8e..20540ebbef65a 100644
--- a/python/pyspark/ml/base.py
+++ b/python/pyspark/ml/base.py
@@ -24,7 +24,6 @@
     Any,
     Callable,
     Generic,
-    Iterable,
     Iterator,
     List,
     Optional,
@@ -133,7 +132,7 @@ def _fit(self, dataset: DataFrame) -> M:
 
     def fitMultiple(
         self, dataset: DataFrame, paramMaps: Sequence["ParamMap"]
-    ) -> Iterable[Tuple[int, M]]:
+    ) -> Iterator[Tuple[int, M]]:
         """
         Fits a model to the input dataset for each param map in `paramMaps`.
 
diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py
index ee2c289cd0bce..6c223c61df870 100644
--- a/python/pyspark/ml/param/__init__.py
+++ b/python/pyspark/ml/param/__init__.py
@@ -569,7 +569,7 @@ def _copyValues(self, to: P, extra: Optional["ParamMap"] = None) -> P:
                 to._set(**{param.name: paramMap[param]})
         return to
 
-    def _resetUid(self: "P", newUid: Any) -> "P":
+    def _resetUid(self: P, newUid: Any) -> P:
         """
         Changes the uid of this instance. This updates both
         the stored uid and the parent uid of params and param maps.
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 47805c9c2bee9..9fae5fe9af715 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -20,12 +20,28 @@
 import itertools
 from multiprocessing.pool import ThreadPool
 
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+    Union,
+    cast,
+    overload,
+    TYPE_CHECKING,
+)
+
 import numpy as np
 
 from pyspark import keyword_only, since, SparkContext, inheritable_thread_target
 from pyspark.ml import Estimator, Transformer, Model
 from pyspark.ml.common import inherit_doc, _py2java, _java2py
-from pyspark.ml.evaluation import Evaluator
+from pyspark.ml.evaluation import Evaluator, JavaEvaluator
 from pyspark.ml.param import Params, Param, TypeConverters
 from pyspark.ml.param.shared import HasCollectSubModels, HasParallelism, HasSeed
 from pyspark.ml.util import (
@@ -43,6 +59,13 @@
 from pyspark.sql.functions import col, lit, rand, UserDefinedFunction
 from pyspark.sql.types import BooleanType
 
+from pyspark.sql.dataframe import DataFrame
+
+if TYPE_CHECKING:
+    from pyspark.ml._typing import ParamMap
+    from py4j.java_gateway import JavaObject  # type: ignore[import]
+    from py4j.java_collections import JavaArray  # type: ignore[import]
+
 __all__ = [
     "ParamGridBuilder",
     "CrossValidator",
@@ -52,7 +75,14 @@
 ]
 
 
-def _parallelFitTasks(est, train, eva, validation, epm, collectSubModel):
+def _parallelFitTasks(
+    est: Estimator,
+    train: DataFrame,
+    eva: Evaluator,
+    validation: DataFrame,
+    epm: Sequence["ParamMap"],
+    collectSubModel: bool,
+) -> List[Callable[[], Tuple[int, float, Transformer]]]:
     """
     Creates a list of callables which can be called from different threads to fit and evaluate
     an estimator in parallel. Each callable returns an `(index, metric)` pair.
@@ -79,7 +109,7 @@ def _parallelFitTasks(est, train, eva, validation, epm, collectSubModel):
     """
     modelIter = est.fitMultiple(train, epm)
 
-    def singleTask():
+    def singleTask() -> Tuple[int, float, Transformer]:
         index, model = next(modelIter)
         # TODO: duplicate evaluator to take extra params from input
         #  Note: Supporting tuning params in evaluator need update method
@@ -119,11 +149,11 @@ class ParamGridBuilder:
     True
     """
 
-    def __init__(self):
-        self._param_grid = {}
+    def __init__(self) -> None:
+        self._param_grid: "ParamMap" = {}
 
     @since("1.4.0")
-    def addGrid(self, param, values):
+    def addGrid(self, param: Param[Any], values: List[Any]) -> "ParamGridBuilder":
         """
         Sets the given parameters in this grid to fixed values.
 
@@ -137,8 +167,16 @@ def addGrid(self, param, values):
 
         return self
 
+    @overload
+    def baseOn(self, __args: "ParamMap") -> "ParamGridBuilder":
+        ...
+
+    @overload
+    def baseOn(self, *args: Tuple[Param, Any]) -> "ParamGridBuilder":
+        ...
+
     @since("1.4.0")
-    def baseOn(self, *args):
+    def baseOn(self, *args: Union["ParamMap", Tuple[Param, Any]]) -> "ParamGridBuilder":
         """
         Sets the given parameters in this grid to fixed values.
         Accepts either a parameter dictionary or a list of (parameter, value) pairs.
@@ -152,7 +190,7 @@ def baseOn(self, *args):
         return self
 
     @since("1.4.0")
-    def build(self):
+    def build(self) -> List["ParamMap"]:
         """
         Builds and returns all combinations of parameters specified
         by the param grid.
@@ -160,7 +198,9 @@ def build(self):
         keys = self._param_grid.keys()
         grid_values = self._param_grid.values()
 
-        def to_key_value_pairs(keys, values):
+        def to_key_value_pairs(
+            keys: Iterable[Param], values: Iterable[Any]
+        ) -> Sequence[Tuple[Param, Any]]:
             return [(key, key.typeConverter(value)) for key, value in zip(keys, values)]
 
         return [dict(to_key_value_pairs(keys, prod)) for prod in itertools.product(*grid_values)]
@@ -171,47 +211,53 @@ class _ValidatorParams(HasSeed):
     Common params for TrainValidationSplit and CrossValidator.
     """
 
-    estimator = Param(Params._dummy(), "estimator", "estimator to be cross-validated")
-    estimatorParamMaps = Param(Params._dummy(), "estimatorParamMaps", "estimator param maps")
-    evaluator = Param(
+    estimator: Param[Estimator] = Param(
+        Params._dummy(), "estimator", "estimator to be cross-validated"
+    )
+    estimatorParamMaps: Param[List["ParamMap"]] = Param(
+        Params._dummy(), "estimatorParamMaps", "estimator param maps"
+    )
+    evaluator: Param[Evaluator] = Param(
         Params._dummy(),
         "evaluator",
         "evaluator used to select hyper-parameters that maximize the validator metric",
     )
 
     @since("2.0.0")
-    def getEstimator(self):
+    def getEstimator(self) -> Estimator:
         """
         Gets the value of estimator or its default value.
         """
         return self.getOrDefault(self.estimator)
 
     @since("2.0.0")
-    def getEstimatorParamMaps(self):
+    def getEstimatorParamMaps(self) -> List["ParamMap"]:
         """
         Gets the value of estimatorParamMaps or its default value.
         """
         return self.getOrDefault(self.estimatorParamMaps)
 
     @since("2.0.0")
-    def getEvaluator(self):
+    def getEvaluator(self) -> Evaluator:
         """
         Gets the value of evaluator or its default value.
         """
         return self.getOrDefault(self.evaluator)
 
     @classmethod
-    def _from_java_impl(cls, java_stage):
+    def _from_java_impl(
+        cls, java_stage: "JavaObject"
+    ) -> Tuple[Estimator, List["ParamMap"], Evaluator]:
         """
         Return Python estimator, estimatorParamMaps, and evaluator from a Java ValidatorParams.
         """
 
         # Load information from java_stage to the instance.
-        estimator = JavaParams._from_java(java_stage.getEstimator())
-        evaluator = JavaParams._from_java(java_stage.getEvaluator())
+        estimator: Estimator = JavaParams._from_java(java_stage.getEstimator())
+        evaluator: Evaluator = JavaParams._from_java(java_stage.getEvaluator())
         if isinstance(estimator, JavaEstimator):
             epms = [
-                estimator._transfer_param_map_from_java(epm)
+                cast("JavaEstimator", estimator)._transfer_param_map_from_java(epm)
                 for epm in java_stage.getEstimatorParamMaps()
             ]
         elif MetaAlgorithmReadWrite.isMetaEstimator(estimator):
@@ -224,19 +270,21 @@ def _from_java_impl(cls, java_stage):
 
         return estimator, epms, evaluator
 
-    def _to_java_impl(self):
+    def _to_java_impl(self) -> Tuple["JavaObject", "JavaObject", "JavaObject"]:
         """
         Return Java estimator, estimatorParamMaps, and evaluator from this Python instance.
         """
 
         gateway = SparkContext._gateway
+        assert gateway is not None and SparkContext._jvm is not None
+
         cls = SparkContext._jvm.org.apache.spark.ml.param.ParamMap
 
         estimator = self.getEstimator()
         if isinstance(estimator, JavaEstimator):
             java_epms = gateway.new_array(cls, len(self.getEstimatorParamMaps()))
             for idx, epm in enumerate(self.getEstimatorParamMaps()):
-                java_epms[idx] = self.getEstimator()._transfer_param_map_to_java(epm)
+                java_epms[idx] = cast(JavaEstimator, estimator)._transfer_param_map_to_java(epm)
         elif MetaAlgorithmReadWrite.isMetaEstimator(estimator):
             # Meta estimator such as Pipeline, OneVsRest
             java_epms = _ValidatorSharedReadWrite.meta_estimator_transfer_param_maps_to_java(
@@ -245,18 +293,24 @@ def _to_java_impl(self):
         else:
             raise ValueError("Unsupported estimator used in tuning: " + str(estimator))
 
-        java_estimator = self.getEstimator()._to_java()
-        java_evaluator = self.getEvaluator()._to_java()
+        java_estimator = cast(JavaEstimator, self.getEstimator())._to_java()
+        java_evaluator = cast(JavaEvaluator, self.getEvaluator())._to_java()
         return java_estimator, java_epms, java_evaluator
 
 
 class _ValidatorSharedReadWrite:
     @staticmethod
-    def meta_estimator_transfer_param_maps_to_java(pyEstimator, pyParamMaps):
+    def meta_estimator_transfer_param_maps_to_java(
+        pyEstimator: Estimator, pyParamMaps: Sequence["ParamMap"]
+    ) -> "JavaArray":
         pyStages = MetaAlgorithmReadWrite.getAllNestedStages(pyEstimator)
-        stagePairs = list(map(lambda stage: (stage, stage._to_java()), pyStages))
+        stagePairs = list(map(lambda stage: (stage, cast(JavaParams, stage)._to_java()), pyStages))
         sc = SparkContext._active_spark_context
 
+        assert (
+            sc is not None and SparkContext._jvm is not None and SparkContext._gateway is not None
+        )
+
         paramMapCls = SparkContext._jvm.org.apache.spark.ml.param.ParamMap
         javaParamMaps = SparkContext._gateway.new_array(paramMapCls, len(pyParamMaps))
 
@@ -271,7 +325,7 @@ def meta_estimator_transfer_param_maps_to_java(pyEstimator, pyParamMaps):
                 if javaParam is None:
                     raise ValueError("Resolve param in estimatorParamMaps failed: " + str(pyParam))
                 if isinstance(pyValue, Params) and hasattr(pyValue, "_to_java"):
-                    javaValue = pyValue._to_java()
+                    javaValue = cast(JavaParams, pyValue)._to_java()
                 else:
                     javaValue = _py2java(sc, pyValue)
                 pair = javaParam.w(javaValue)
@@ -280,10 +334,15 @@ def meta_estimator_transfer_param_maps_to_java(pyEstimator, pyParamMaps):
         return javaParamMaps
 
     @staticmethod
-    def meta_estimator_transfer_param_maps_from_java(pyEstimator, javaParamMaps):
+    def meta_estimator_transfer_param_maps_from_java(
+        pyEstimator: Estimator, javaParamMaps: "JavaArray"
+    ) -> List["ParamMap"]:
         pyStages = MetaAlgorithmReadWrite.getAllNestedStages(pyEstimator)
-        stagePairs = list(map(lambda stage: (stage, stage._to_java()), pyStages))
+        stagePairs = list(map(lambda stage: (stage, cast(JavaParams, stage)._to_java()), pyStages))
         sc = SparkContext._active_spark_context
+
+        assert sc is not None and sc._jvm is not None
+
         pyParamMaps = []
         for javaParamMap in javaParamMaps:
             pyParamMap = dict()
@@ -301,6 +360,7 @@ def meta_estimator_transfer_param_maps_from_java(pyEstimator, javaParamMaps):
                         + javaParam.name()
                     )
                 javaValue = javaPair.value()
+                pyValue: Any
                 if sc._jvm.Class.forName(
                     "org.apache.spark.ml.util.DefaultParamsWritable"
                 ).isInstance(javaValue):
@@ -312,20 +372,25 @@ def meta_estimator_transfer_param_maps_from_java(pyEstimator, javaParamMaps):
         return pyParamMaps
 
     @staticmethod
-    def is_java_convertible(instance):
+    def is_java_convertible(instance: _ValidatorParams) -> bool:
         allNestedStages = MetaAlgorithmReadWrite.getAllNestedStages(instance.getEstimator())
         evaluator_convertible = isinstance(instance.getEvaluator(), JavaParams)
         estimator_convertible = all(map(lambda stage: hasattr(stage, "_to_java"), allNestedStages))
         return estimator_convertible and evaluator_convertible
 
     @staticmethod
-    def saveImpl(path, instance, sc, extraMetadata=None):
+    def saveImpl(
+        path: str,
+        instance: _ValidatorParams,
+        sc: SparkContext,
+        extraMetadata: Optional[Dict[str, Any]] = None,
+    ) -> None:
         numParamsNotJson = 0
         jsonEstimatorParamMaps = []
         for paramMap in instance.getEstimatorParamMaps():
             jsonParamMap = []
             for p, v in paramMap.items():
-                jsonParam = {"parent": p.parent, "name": p.name}
+                jsonParam: Dict[str, Any] = {"parent": p.parent, "name": p.name}
                 if (
                     (isinstance(v, Estimator) and not MetaAlgorithmReadWrite.isMetaEstimator(v))
                     or isinstance(v, Transformer)
@@ -334,7 +399,7 @@ def saveImpl(path, instance, sc, extraMetadata=None):
                     relative_path = f"epm_{p.name}{numParamsNotJson}"
                     param_path = os.path.join(path, relative_path)
                     numParamsNotJson += 1
-                    v.save(param_path)
+                    cast(MLWritable, v).save(param_path)
                     jsonParam["value"] = relative_path
                     jsonParam["isJson"] = False
                 elif isinstance(v, MLWritable):
@@ -355,16 +420,18 @@ def saveImpl(path, instance, sc, extraMetadata=None):
 
         DefaultParamsWriter.saveMetadata(instance, path, sc, extraMetadata, jsonParams)
         evaluatorPath = os.path.join(path, "evaluator")
-        instance.getEvaluator().save(evaluatorPath)
+        cast(MLWritable, instance.getEvaluator()).save(evaluatorPath)
         estimatorPath = os.path.join(path, "estimator")
-        instance.getEstimator().save(estimatorPath)
+        cast(MLWritable, instance.getEstimator()).save(estimatorPath)
 
     @staticmethod
-    def load(path, sc, metadata):
+    def load(
+        path: str, sc: SparkContext, metadata: Dict[str, Any]
+    ) -> Tuple[Dict[str, Any], Estimator, Evaluator, List["ParamMap"]]:
         evaluatorPath = os.path.join(path, "evaluator")
-        evaluator = DefaultParamsReader.loadParamsInstance(evaluatorPath, sc)
+        evaluator: Evaluator = DefaultParamsReader.loadParamsInstance(evaluatorPath, sc)
         estimatorPath = os.path.join(path, "estimator")
-        estimator = DefaultParamsReader.loadParamsInstance(estimatorPath, sc)
+        estimator: Estimator = DefaultParamsReader.loadParamsInstance(estimatorPath, sc)
 
         uidToParams = MetaAlgorithmReadWrite.getUidMap(estimator)
         uidToParams[evaluator.uid] = evaluator
@@ -389,12 +456,12 @@ def load(path, sc, metadata):
         return metadata, estimator, evaluator, estimatorParamMaps
 
     @staticmethod
-    def validateParams(instance):
+    def validateParams(instance: _ValidatorParams) -> None:
         estiamtor = instance.getEstimator()
         evaluator = instance.getEvaluator()
         uidMap = MetaAlgorithmReadWrite.getUidMap(estiamtor)
 
-        for elem in [evaluator] + list(uidMap.values()):
+        for elem in [evaluator] + list(uidMap.values()):  # type: ignore[arg-type]
             if not isinstance(elem, MLWritable):
                 raise ValueError(
                     f"Validator write will fail because it contains {elem.uid} "
@@ -412,7 +479,7 @@ def validateParams(instance):
                     raise ValueError(paramErr + repr(param))
 
     @staticmethod
-    def getValidatorModelWriterPersistSubModelsParam(writer):
+    def getValidatorModelWriterPersistSubModelsParam(writer: MLWriter) -> bool:
         if "persistsubmodels" in writer.optionMap:
             persistSubModelsParam = writer.optionMap["persistsubmodels"].lower()
             if persistSubModelsParam == "true":
@@ -425,10 +492,10 @@ def getValidatorModelWriterPersistSubModelsParam(writer):
                     f"the possible values are True, 'True' or False, 'False'"
                 )
         else:
-            return writer.instance.subModels is not None
+            return writer.instance.subModels is not None  # type: ignore[attr-defined]
 
 
-_save_with_persist_submodels_no_submodels_found_err = (
+_save_with_persist_submodels_no_submodels_found_err: str = (
     "When persisting tuning models, you can only set persistSubModels to true if the tuning "
     "was done with collectSubModels set to true. To save the sub-models, try rerunning fitting "
     "with collectSubModels set to true."
@@ -436,15 +503,15 @@ def getValidatorModelWriterPersistSubModelsParam(writer):
 
 
 @inherit_doc
-class CrossValidatorReader(MLReader):
-    def __init__(self, cls):
+class CrossValidatorReader(MLReader["CrossValidator"]):
+    def __init__(self, cls: Type["CrossValidator"]):
         super(CrossValidatorReader, self).__init__()
         self.cls = cls
 
-    def load(self, path):
+    def load(self, path: str) -> "CrossValidator":
         metadata = DefaultParamsReader.loadMetadata(path, self.sc)
         if not DefaultParamsReader.isPythonParamsInstance(metadata):
-            return JavaMLReader(self.cls).load(path)
+            return JavaMLReader(self.cls).load(path)  # type: ignore[arg-type]
         else:
             metadata, estimator, evaluator, estimatorParamMaps = _ValidatorSharedReadWrite.load(
                 path, self.sc, metadata
@@ -459,32 +526,32 @@ def load(self, path):
 
 @inherit_doc
 class CrossValidatorWriter(MLWriter):
-    def __init__(self, instance):
+    def __init__(self, instance: "CrossValidator"):
         super(CrossValidatorWriter, self).__init__()
         self.instance = instance
 
-    def saveImpl(self, path):
+    def saveImpl(self, path: str) -> None:
         _ValidatorSharedReadWrite.validateParams(self.instance)
         _ValidatorSharedReadWrite.saveImpl(path, self.instance, self.sc)
 
 
 @inherit_doc
-class CrossValidatorModelReader(MLReader):
-    def __init__(self, cls):
+class CrossValidatorModelReader(MLReader["CrossValidatorModel"]):
+    def __init__(self, cls: Type["CrossValidatorModel"]):
         super(CrossValidatorModelReader, self).__init__()
         self.cls = cls
 
-    def load(self, path):
+    def load(self, path: str) -> "CrossValidatorModel":
         metadata = DefaultParamsReader.loadMetadata(path, self.sc)
         if not DefaultParamsReader.isPythonParamsInstance(metadata):
-            return JavaMLReader(self.cls).load(path)
+            return JavaMLReader(self.cls).load(path)  # type: ignore[arg-type]
         else:
             metadata, estimator, evaluator, estimatorParamMaps = _ValidatorSharedReadWrite.load(
                 path, self.sc, metadata
             )
             numFolds = metadata["paramMap"]["numFolds"]
             bestModelPath = os.path.join(path, "bestModel")
-            bestModel = DefaultParamsReader.loadParamsInstance(bestModelPath, self.sc)
+            bestModel: Model = DefaultParamsReader.loadParamsInstance(bestModelPath, self.sc)
             avgMetrics = metadata["avgMetrics"]
             if "stdMetrics" in metadata:
                 stdMetrics = metadata["stdMetrics"]
@@ -506,7 +573,10 @@ def load(self, path):
                 subModels = None
 
             cvModel = CrossValidatorModel(
-                bestModel, avgMetrics=avgMetrics, subModels=subModels, stdMetrics=stdMetrics
+                bestModel,
+                avgMetrics=avgMetrics,
+                subModels=cast(List[List[Model]], subModels),
+                stdMetrics=stdMetrics,
             )
             cvModel = cvModel._resetUid(metadata["uid"])
             cvModel.set(cvModel.estimator, estimator)
@@ -520,11 +590,11 @@ def load(self, path):
 
 @inherit_doc
 class CrossValidatorModelWriter(MLWriter):
-    def __init__(self, instance):
+    def __init__(self, instance: "CrossValidatorModel"):
         super(CrossValidatorModelWriter, self).__init__()
         self.instance = instance
 
-    def saveImpl(self, path):
+    def saveImpl(self, path: str) -> None:
         _ValidatorSharedReadWrite.validateParams(self.instance)
         instance = self.instance
         persistSubModels = _ValidatorSharedReadWrite.getValidatorModelWriterPersistSubModelsParam(
@@ -536,7 +606,7 @@ def saveImpl(self, path):
 
         _ValidatorSharedReadWrite.saveImpl(path, instance, self.sc, extraMetadata=extraMetadata)
         bestModelPath = os.path.join(path, "bestModel")
-        instance.bestModel.save(bestModelPath)
+        cast(MLWritable, instance.bestModel).save(bestModelPath)
         if persistSubModels:
             if instance.subModels is None:
                 raise ValueError(_save_with_persist_submodels_no_submodels_found_err)
@@ -545,7 +615,7 @@ def saveImpl(self, path):
                 splitPath = os.path.join(subModelsPath, f"fold{splitIndex}")
                 for paramIndex in range(len(instance.getEstimatorParamMaps())):
                     modelPath = os.path.join(splitPath, f"{paramIndex}")
-                    instance.subModels[splitIndex][paramIndex].save(modelPath)
+                    cast(MLWritable, instance.subModels[splitIndex][paramIndex]).save(modelPath)
 
 
 class _CrossValidatorParams(_ValidatorParams):
@@ -555,14 +625,14 @@ class _CrossValidatorParams(_ValidatorParams):
     .. versionadded:: 3.0.0
     """
 
-    numFolds = Param(
+    numFolds: Param[int] = Param(
         Params._dummy(),
         "numFolds",
         "number of folds for cross validation",
         typeConverter=TypeConverters.toInt,
     )
 
-    foldCol = Param(
+    foldCol: Param[str] = Param(
         Params._dummy(),
         "foldCol",
         "Param for the column name of user "
@@ -573,19 +643,19 @@ class _CrossValidatorParams(_ValidatorParams):
         typeConverter=TypeConverters.toString,
     )
 
-    def __init__(self, *args):
+    def __init__(self, *args: Any):
         super(_CrossValidatorParams, self).__init__(*args)
         self._setDefault(numFolds=3, foldCol="")
 
     @since("1.4.0")
-    def getNumFolds(self):
+    def getNumFolds(self) -> int:
         """
         Gets the value of numFolds or its default value.
         """
         return self.getOrDefault(self.numFolds)
 
     @since("3.1.0")
-    def getFoldCol(self):
+    def getFoldCol(self) -> str:
         """
         Gets the value of foldCol or its default value.
         """
@@ -593,7 +663,12 @@ def getFoldCol(self):
 
 
 class CrossValidator(
-    Estimator, _CrossValidatorParams, HasParallelism, HasCollectSubModels, MLReadable, MLWritable
+    Estimator["CrossValidatorModel"],
+    _CrossValidatorParams,
+    HasParallelism,
+    HasCollectSubModels,
+    MLReadable["CrossValidator"],
+    MLWritable,
 ):
     """
 
@@ -641,19 +716,21 @@ class CrossValidator(
     0.8333...
     """
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        estimator=None,
-        estimatorParamMaps=None,
-        evaluator=None,
-        numFolds=3,
-        seed=None,
-        parallelism=1,
-        collectSubModels=False,
-        foldCol="",
-    ):
+        estimator: Optional[Estimator] = None,
+        estimatorParamMaps: Optional[List["ParamMap"]] = None,
+        evaluator: Optional[Evaluator] = None,
+        numFolds: int = 3,
+        seed: Optional[int] = None,
+        parallelism: int = 1,
+        collectSubModels: bool = False,
+        foldCol: str = "",
+    ) -> None:
         """
         __init__(self, \\*, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3,\
                  seed=None, parallelism=1, collectSubModels=False, foldCol="")
@@ -668,15 +745,15 @@ def __init__(
     def setParams(
         self,
         *,
-        estimator=None,
-        estimatorParamMaps=None,
-        evaluator=None,
-        numFolds=3,
-        seed=None,
-        parallelism=1,
-        collectSubModels=False,
-        foldCol="",
-    ):
+        estimator: Optional[Estimator] = None,
+        estimatorParamMaps: Optional[List["ParamMap"]] = None,
+        evaluator: Optional[Evaluator] = None,
+        numFolds: int = 3,
+        seed: Optional[int] = None,
+        parallelism: int = 1,
+        collectSubModels: bool = False,
+        foldCol: str = "",
+    ) -> "CrossValidator":
         """
         setParams(self, \\*, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3,\
                   seed=None, parallelism=1, collectSubModels=False, foldCol=""):
@@ -686,65 +763,65 @@ def setParams(
         return self._set(**kwargs)
 
     @since("2.0.0")
-    def setEstimator(self, value):
+    def setEstimator(self, value: Estimator) -> "CrossValidator":
         """
         Sets the value of :py:attr:`estimator`.
         """
         return self._set(estimator=value)
 
     @since("2.0.0")
-    def setEstimatorParamMaps(self, value):
+    def setEstimatorParamMaps(self, value: List["ParamMap"]) -> "CrossValidator":
         """
         Sets the value of :py:attr:`estimatorParamMaps`.
         """
         return self._set(estimatorParamMaps=value)
 
     @since("2.0.0")
-    def setEvaluator(self, value):
+    def setEvaluator(self, value: Evaluator) -> "CrossValidator":
         """
         Sets the value of :py:attr:`evaluator`.
         """
         return self._set(evaluator=value)
 
     @since("1.4.0")
-    def setNumFolds(self, value):
+    def setNumFolds(self, value: int) -> "CrossValidator":
         """
         Sets the value of :py:attr:`numFolds`.
         """
         return self._set(numFolds=value)
 
     @since("3.1.0")
-    def setFoldCol(self, value):
+    def setFoldCol(self, value: str) -> "CrossValidator":
         """
         Sets the value of :py:attr:`foldCol`.
         """
         return self._set(foldCol=value)
 
-    def setSeed(self, value):
+    def setSeed(self, value: int) -> "CrossValidator":
         """
         Sets the value of :py:attr:`seed`.
         """
         return self._set(seed=value)
 
-    def setParallelism(self, value):
+    def setParallelism(self, value: int) -> "CrossValidator":
         """
         Sets the value of :py:attr:`parallelism`.
         """
         return self._set(parallelism=value)
 
-    def setCollectSubModels(self, value):
+    def setCollectSubModels(self, value: bool) -> "CrossValidator":
         """
         Sets the value of :py:attr:`collectSubModels`.
         """
         return self._set(collectSubModels=value)
 
     @staticmethod
-    def _gen_avg_and_std_metrics(metrics_all):
+    def _gen_avg_and_std_metrics(metrics_all: List[List[float]]) -> Tuple[List[float], List[float]]:
         avg_metrics = np.mean(metrics_all, axis=0)
         std_metrics = np.std(metrics_all, axis=0)
         return list(avg_metrics), list(std_metrics)
 
-    def _fit(self, dataset):
+    def _fit(self, dataset: DataFrame) -> "CrossValidatorModel":
         est = self.getOrDefault(self.estimator)
         epm = self.getOrDefault(self.estimatorParamMaps)
         numModels = len(epm)
@@ -770,6 +847,7 @@ def _fit(self, dataset):
             for j, metric, subModel in pool.imap_unordered(lambda f: f(), tasks):
                 metrics_all[i][j] = metric
                 if collectSubModelsParam:
+                    assert subModels is not None
                     subModels[i][j] = subModel
 
             validation.unpersist()
@@ -782,9 +860,11 @@ def _fit(self, dataset):
         else:
             bestIndex = np.argmin(metrics)
         bestModel = est.fit(dataset, epm[bestIndex])
-        return self._copyValues(CrossValidatorModel(bestModel, metrics, subModels, std_metrics))
+        return self._copyValues(
+            CrossValidatorModel(bestModel, metrics, cast(List[List[Model]], subModels), std_metrics)
+        )
 
-    def _kFold(self, dataset):
+    def _kFold(self, dataset: DataFrame) -> List[Tuple[DataFrame, DataFrame]]:
         nFolds = self.getOrDefault(self.numFolds)
         foldCol = self.getOrDefault(self.foldCol)
 
@@ -804,7 +884,7 @@ def _kFold(self, dataset):
                 datasets.append((train, validation))
         else:
             # Use user-specified fold numbers.
-            def checker(foldNum):
+            def checker(foldNum: int) -> bool:
                 if foldNum < 0 or foldNum >= nFolds:
                     raise ValueError(
                         "Fold number must be in range [0, %s), but got %s." % (nFolds, foldNum)
@@ -825,7 +905,7 @@ def checker(foldNum):
 
         return datasets
 
-    def copy(self, extra=None):
+    def copy(self, extra: Optional["ParamMap"] = None) -> "CrossValidator":
         """
         Creates a copy of this instance with a randomly generated uid
         and some extra params. This copies creates a deep copy of
@@ -855,20 +935,20 @@ def copy(self, extra=None):
         return newCV
 
     @since("2.3.0")
-    def write(self):
+    def write(self) -> MLWriter:
         """Returns an MLWriter instance for this ML instance."""
         if _ValidatorSharedReadWrite.is_java_convertible(self):
-            return JavaMLWriter(self)
+            return JavaMLWriter(self)  # type: ignore[arg-type]
         return CrossValidatorWriter(self)
 
     @classmethod
     @since("2.3.0")
-    def read(cls):
+    def read(cls) -> CrossValidatorReader:
         """Returns an MLReader instance for this class."""
         return CrossValidatorReader(cls)
 
     @classmethod
-    def _from_java(cls, java_stage):
+    def _from_java(cls, java_stage: "JavaObject") -> "CrossValidator":
         """
         Given a Java CrossValidator, create and return a Python wrapper of it.
         Used for ML persistence.
@@ -894,7 +974,7 @@ def _from_java(cls, java_stage):
         py_stage._resetUid(java_stage.uid())
         return py_stage
 
-    def _to_java(self):
+    def _to_java(self) -> "JavaObject":
         """
         Transfer this instance to a Java CrossValidator. Used for ML persistence.
 
@@ -919,7 +999,9 @@ def _to_java(self):
         return _java_obj
 
 
-class CrossValidatorModel(Model, _CrossValidatorParams, MLReadable, MLWritable):
+class CrossValidatorModel(
+    Model, _CrossValidatorParams, MLReadable["CrossValidatorModel"], MLWritable
+):
     """
     CrossValidatorModel contains the model with the highest average cross-validation
     metric across folds and uses this model to transform input data. CrossValidatorModel
@@ -934,7 +1016,13 @@ class CrossValidatorModel(Model, _CrossValidatorParams, MLReadable, MLWritable):
     CrossValidator.estimatorParamMaps.
     """
 
-    def __init__(self, bestModel, avgMetrics=None, subModels=None, stdMetrics=None):
+    def __init__(
+        self,
+        bestModel: Model,
+        avgMetrics: Optional[List[float]] = None,
+        subModels: Optional[List[List[Model]]] = None,
+        stdMetrics: Optional[List[float]] = None,
+    ):
         super(CrossValidatorModel, self).__init__()
         #: best model from cross validation
         self.bestModel = bestModel
@@ -947,10 +1035,10 @@ def __init__(self, bestModel, avgMetrics=None, subModels=None, stdMetrics=None):
         #: CrossValidator.estimatorParamMaps, in the corresponding order.
         self.stdMetrics = stdMetrics or []
 
-    def _transform(self, dataset):
+    def _transform(self, dataset: DataFrame) -> DataFrame:
         return self.bestModel.transform(dataset)
 
-    def copy(self, extra=None):
+    def copy(self, extra: Optional["ParamMap"] = None) -> "CrossValidatorModel":
         """
         Creates a copy of this instance with a randomly generated uid
         and some extra params. This copies the underlying bestModel,
@@ -974,6 +1062,7 @@ def copy(self, extra=None):
             extra = dict()
         bestModel = self.bestModel.copy(extra)
         avgMetrics = list(self.avgMetrics)
+        assert self.subModels is not None
         subModels = [
             [sub_model.copy() for sub_model in fold_sub_models]
             for fold_sub_models in self.subModels
@@ -984,26 +1073,28 @@ def copy(self, extra=None):
         )
 
     @since("2.3.0")
-    def write(self):
+    def write(self) -> MLWriter:
         """Returns an MLWriter instance for this ML instance."""
         if _ValidatorSharedReadWrite.is_java_convertible(self):
-            return JavaMLWriter(self)
+            return JavaMLWriter(self)  # type: ignore[arg-type]
         return CrossValidatorModelWriter(self)
 
     @classmethod
     @since("2.3.0")
-    def read(cls):
+    def read(cls) -> CrossValidatorModelReader:
         """Returns an MLReader instance for this class."""
         return CrossValidatorModelReader(cls)
 
     @classmethod
-    def _from_java(cls, java_stage):
+    def _from_java(cls, java_stage: "JavaObject") -> "CrossValidatorModel":
         """
         Given a Java CrossValidatorModel, create and return a Python wrapper of it.
         Used for ML persistence.
         """
         sc = SparkContext._active_spark_context
-        bestModel = JavaParams._from_java(java_stage.bestModel())
+        assert sc is not None
+
+        bestModel: Model = JavaParams._from_java(java_stage.bestModel())
         avgMetrics = _java2py(sc, java_stage.avgMetrics())
         estimator, epms, evaluator = super(CrossValidatorModel, cls)._from_java_impl(java_stage)
 
@@ -1028,7 +1119,7 @@ def _from_java(cls, java_stage):
         py_stage._resetUid(java_stage.uid())
         return py_stage
 
-    def _to_java(self):
+    def _to_java(self) -> "JavaObject":
         """
         Transfer this instance to a Java CrossValidatorModel. Used for ML persistence.
 
@@ -1039,10 +1130,12 @@ def _to_java(self):
         """
 
         sc = SparkContext._active_spark_context
+        assert sc is not None
+
         _java_obj = JavaParams._new_java_obj(
             "org.apache.spark.ml.tuning.CrossValidatorModel",
             self.uid,
-            self.bestModel._to_java(),
+            cast(JavaParams, self.bestModel)._to_java(),
             _py2java(sc, self.avgMetrics),
         )
         estimator, epms, evaluator = super(CrossValidatorModel, self)._to_java_impl()
@@ -1062,7 +1155,7 @@ def _to_java(self):
 
         if self.subModels is not None:
             java_sub_models = [
-                [sub_model._to_java() for sub_model in fold_sub_models]
+                [cast(JavaParams, sub_model)._to_java() for sub_model in fold_sub_models]
                 for fold_sub_models in self.subModels
             ]
             _java_obj.setSubModels(java_sub_models)
@@ -1070,15 +1163,15 @@ def _to_java(self):
 
 
 @inherit_doc
-class TrainValidationSplitReader(MLReader):
-    def __init__(self, cls):
+class TrainValidationSplitReader(MLReader["TrainValidationSplit"]):
+    def __init__(self, cls: Type["TrainValidationSplit"]):
         super(TrainValidationSplitReader, self).__init__()
         self.cls = cls
 
-    def load(self, path):
+    def load(self, path: str) -> "TrainValidationSplit":
         metadata = DefaultParamsReader.loadMetadata(path, self.sc)
         if not DefaultParamsReader.isPythonParamsInstance(metadata):
-            return JavaMLReader(self.cls).load(path)
+            return JavaMLReader(self.cls).load(path)  # type: ignore[arg-type]
         else:
             metadata, estimator, evaluator, estimatorParamMaps = _ValidatorSharedReadWrite.load(
                 path, self.sc, metadata
@@ -1093,31 +1186,31 @@ def load(self, path):
 
 @inherit_doc
 class TrainValidationSplitWriter(MLWriter):
-    def __init__(self, instance):
+    def __init__(self, instance: "TrainValidationSplit"):
         super(TrainValidationSplitWriter, self).__init__()
         self.instance = instance
 
-    def saveImpl(self, path):
+    def saveImpl(self, path: str) -> None:
         _ValidatorSharedReadWrite.validateParams(self.instance)
         _ValidatorSharedReadWrite.saveImpl(path, self.instance, self.sc)
 
 
 @inherit_doc
-class TrainValidationSplitModelReader(MLReader):
-    def __init__(self, cls):
+class TrainValidationSplitModelReader(MLReader["TrainValidationSplitModel"]):
+    def __init__(self, cls: Type["TrainValidationSplitModel"]):
         super(TrainValidationSplitModelReader, self).__init__()
         self.cls = cls
 
-    def load(self, path):
+    def load(self, path: str) -> "TrainValidationSplitModel":
         metadata = DefaultParamsReader.loadMetadata(path, self.sc)
         if not DefaultParamsReader.isPythonParamsInstance(metadata):
-            return JavaMLReader(self.cls).load(path)
+            return JavaMLReader(self.cls).load(path)  # type: ignore[arg-type]
         else:
             metadata, estimator, evaluator, estimatorParamMaps = _ValidatorSharedReadWrite.load(
                 path, self.sc, metadata
             )
             bestModelPath = os.path.join(path, "bestModel")
-            bestModel = DefaultParamsReader.loadParamsInstance(bestModelPath, self.sc)
+            bestModel: Model = DefaultParamsReader.loadParamsInstance(bestModelPath, self.sc)
             validationMetrics = metadata["validationMetrics"]
             persistSubModels = ("persistSubModels" in metadata) and metadata["persistSubModels"]
 
@@ -1132,7 +1225,9 @@ def load(self, path):
                 subModels = None
 
             tvsModel = TrainValidationSplitModel(
-                bestModel, validationMetrics=validationMetrics, subModels=subModels
+                bestModel,
+                validationMetrics=validationMetrics,
+                subModels=cast(Optional[List[Model]], subModels),
             )
             tvsModel = tvsModel._resetUid(metadata["uid"])
             tvsModel.set(tvsModel.estimator, estimator)
@@ -1146,11 +1241,11 @@ def load(self, path):
 
 @inherit_doc
 class TrainValidationSplitModelWriter(MLWriter):
-    def __init__(self, instance):
+    def __init__(self, instance: "TrainValidationSplitModel"):
         super(TrainValidationSplitModelWriter, self).__init__()
         self.instance = instance
 
-    def saveImpl(self, path):
+    def saveImpl(self, path: str) -> None:
         _ValidatorSharedReadWrite.validateParams(self.instance)
         instance = self.instance
         persistSubModels = _ValidatorSharedReadWrite.getValidatorModelWriterPersistSubModelsParam(
@@ -1163,14 +1258,14 @@ def saveImpl(self, path):
         }
         _ValidatorSharedReadWrite.saveImpl(path, instance, self.sc, extraMetadata=extraMetadata)
         bestModelPath = os.path.join(path, "bestModel")
-        instance.bestModel.save(bestModelPath)
+        cast(MLWritable, instance.bestModel).save(bestModelPath)
         if persistSubModels:
             if instance.subModels is None:
                 raise ValueError(_save_with_persist_submodels_no_submodels_found_err)
             subModelsPath = os.path.join(path, "subModels")
             for paramIndex in range(len(instance.getEstimatorParamMaps())):
                 modelPath = os.path.join(subModelsPath, f"{paramIndex}")
-                instance.subModels[paramIndex].save(modelPath)
+                cast(MLWritable, instance.subModels[paramIndex]).save(modelPath)
 
 
 class _TrainValidationSplitParams(_ValidatorParams):
@@ -1180,7 +1275,7 @@ class _TrainValidationSplitParams(_ValidatorParams):
     .. versionadded:: 3.0.0
     """
 
-    trainRatio = Param(
+    trainRatio: Param[float] = Param(
         Params._dummy(),
         "trainRatio",
         "Param for ratio between train and\
@@ -1188,12 +1283,12 @@ class _TrainValidationSplitParams(_ValidatorParams):
         typeConverter=TypeConverters.toFloat,
     )
 
-    def __init__(self, *args):
+    def __init__(self, *args: Any):
         super(_TrainValidationSplitParams, self).__init__(*args)
         self._setDefault(trainRatio=0.75)
 
     @since("2.0.0")
-    def getTrainRatio(self):
+    def getTrainRatio(self) -> float:
         """
         Gets the value of trainRatio or its default value.
         """
@@ -1201,11 +1296,11 @@ def getTrainRatio(self):
 
 
 class TrainValidationSplit(
-    Estimator,
+    Estimator["TrainValidationSplitModel"],
     _TrainValidationSplitParams,
     HasParallelism,
     HasCollectSubModels,
-    MLReadable,
+    MLReadable["TrainValidationSplit"],
     MLWritable,
 ):
     """
@@ -1252,18 +1347,20 @@ class TrainValidationSplit(
     0.833...
     """
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        estimator=None,
-        estimatorParamMaps=None,
-        evaluator=None,
-        trainRatio=0.75,
-        parallelism=1,
-        collectSubModels=False,
-        seed=None,
-    ):
+        estimator: Optional[Estimator] = None,
+        estimatorParamMaps: Optional[List["ParamMap"]] = None,
+        evaluator: Optional[Evaluator] = None,
+        trainRatio: float = 0.75,
+        parallelism: int = 1,
+        collectSubModels: bool = False,
+        seed: Optional[int] = None,
+    ) -> None:
         """
         __init__(self, \\*, estimator=None, estimatorParamMaps=None, evaluator=None, \
                  trainRatio=0.75, parallelism=1, collectSubModels=False, seed=None)
@@ -1278,14 +1375,14 @@ def __init__(
     def setParams(
         self,
         *,
-        estimator=None,
-        estimatorParamMaps=None,
-        evaluator=None,
-        trainRatio=0.75,
-        parallelism=1,
-        collectSubModels=False,
-        seed=None,
-    ):
+        estimator: Optional[Estimator] = None,
+        estimatorParamMaps: Optional[List["ParamMap"]] = None,
+        evaluator: Optional[Evaluator] = None,
+        trainRatio: float = 0.75,
+        parallelism: int = 1,
+        collectSubModels: bool = False,
+        seed: Optional[int] = None,
+    ) -> "TrainValidationSplit":
         """
         setParams(self, \\*, estimator=None, estimatorParamMaps=None, evaluator=None, \
                   trainRatio=0.75, parallelism=1, collectSubModels=False, seed=None):
@@ -1295,52 +1392,52 @@ def setParams(
         return self._set(**kwargs)
 
     @since("2.0.0")
-    def setEstimator(self, value):
+    def setEstimator(self, value: Estimator) -> "TrainValidationSplit":
         """
         Sets the value of :py:attr:`estimator`.
         """
         return self._set(estimator=value)
 
     @since("2.0.0")
-    def setEstimatorParamMaps(self, value):
+    def setEstimatorParamMaps(self, value: List["ParamMap"]) -> "TrainValidationSplit":
         """
         Sets the value of :py:attr:`estimatorParamMaps`.
         """
         return self._set(estimatorParamMaps=value)
 
     @since("2.0.0")
-    def setEvaluator(self, value):
+    def setEvaluator(self, value: Evaluator) -> "TrainValidationSplit":
         """
         Sets the value of :py:attr:`evaluator`.
         """
         return self._set(evaluator=value)
 
     @since("2.0.0")
-    def setTrainRatio(self, value):
+    def setTrainRatio(self, value: float) -> "TrainValidationSplit":
         """
         Sets the value of :py:attr:`trainRatio`.
         """
         return self._set(trainRatio=value)
 
-    def setSeed(self, value):
+    def setSeed(self, value: int) -> "TrainValidationSplit":
         """
         Sets the value of :py:attr:`seed`.
         """
         return self._set(seed=value)
 
-    def setParallelism(self, value):
+    def setParallelism(self, value: int) -> "TrainValidationSplit":
         """
         Sets the value of :py:attr:`parallelism`.
         """
         return self._set(parallelism=value)
 
-    def setCollectSubModels(self, value):
+    def setCollectSubModels(self, value: bool) -> "TrainValidationSplit":
         """
         Sets the value of :py:attr:`collectSubModels`.
         """
         return self._set(collectSubModels=value)
 
-    def _fit(self, dataset):
+    def _fit(self, dataset: DataFrame) -> "TrainValidationSplitModel":
         est = self.getOrDefault(self.estimator)
         epm = self.getOrDefault(self.estimatorParamMaps)
         numModels = len(epm)
@@ -1367,19 +1464,26 @@ def _fit(self, dataset):
         for j, metric, subModel in pool.imap_unordered(lambda f: f(), tasks):
             metrics[j] = metric
             if collectSubModelsParam:
+                assert subModels is not None
                 subModels[j] = subModel
 
         train.unpersist()
         validation.unpersist()
 
         if eva.isLargerBetter():
-            bestIndex = np.argmax(metrics)
+            bestIndex = np.argmax(cast(List[float], metrics))
         else:
-            bestIndex = np.argmin(metrics)
+            bestIndex = np.argmin(cast(List[float], metrics))
         bestModel = est.fit(dataset, epm[bestIndex])
-        return self._copyValues(TrainValidationSplitModel(bestModel, metrics, subModels))
+        return self._copyValues(
+            TrainValidationSplitModel(
+                bestModel,
+                cast(List[float], metrics),
+                subModels,  # type: ignore[arg-type]
+            )
+        )
 
-    def copy(self, extra=None):
+    def copy(self, extra: Optional["ParamMap"] = None) -> "TrainValidationSplit":
         """
         Creates a copy of this instance with a randomly generated uid
         and some extra params. This copies creates a deep copy of
@@ -1408,20 +1512,20 @@ def copy(self, extra=None):
         return newTVS
 
     @since("2.3.0")
-    def write(self):
+    def write(self) -> MLWriter:
         """Returns an MLWriter instance for this ML instance."""
         if _ValidatorSharedReadWrite.is_java_convertible(self):
-            return JavaMLWriter(self)
+            return JavaMLWriter(self)  # type: ignore[arg-type]
         return TrainValidationSplitWriter(self)
 
     @classmethod
     @since("2.3.0")
-    def read(cls):
+    def read(cls) -> TrainValidationSplitReader:
         """Returns an MLReader instance for this class."""
         return TrainValidationSplitReader(cls)
 
     @classmethod
-    def _from_java(cls, java_stage):
+    def _from_java(cls, java_stage: "JavaObject") -> "TrainValidationSplit":
         """
         Given a Java TrainValidationSplit, create and return a Python wrapper of it.
         Used for ML persistence.
@@ -1445,7 +1549,7 @@ def _from_java(cls, java_stage):
         py_stage._resetUid(java_stage.uid())
         return py_stage
 
-    def _to_java(self):
+    def _to_java(self) -> "JavaObject":
         """
         Transfer this instance to a Java TrainValidationSplit. Used for ML persistence.
 
@@ -1470,14 +1574,21 @@ def _to_java(self):
         return _java_obj
 
 
-class TrainValidationSplitModel(Model, _TrainValidationSplitParams, MLReadable, MLWritable):
+class TrainValidationSplitModel(
+    Model, _TrainValidationSplitParams, MLReadable["TrainValidationSplitModel"], MLWritable
+):
     """
     Model from train validation split.
 
     .. versionadded:: 2.0.0
     """
 
-    def __init__(self, bestModel, validationMetrics=None, subModels=None):
+    def __init__(
+        self,
+        bestModel: Model,
+        validationMetrics: Optional[List[float]] = None,
+        subModels: Optional[List[Model]] = None,
+    ):
         super(TrainValidationSplitModel, self).__init__()
         #: best model from train validation split
         self.bestModel = bestModel
@@ -1486,10 +1597,10 @@ def __init__(self, bestModel, validationMetrics=None, subModels=None):
         #: sub models from train validation split
         self.subModels = subModels
 
-    def _transform(self, dataset):
+    def _transform(self, dataset: DataFrame) -> DataFrame:
         return self.bestModel.transform(dataset)
 
-    def copy(self, extra=None):
+    def copy(self, extra: Optional["ParamMap"] = None) -> "TrainValidationSplitModel":
         """
         Creates a copy of this instance with a randomly generated uid
         and some extra params. This copies the underlying bestModel,
@@ -1514,26 +1625,27 @@ def copy(self, extra=None):
             extra = dict()
         bestModel = self.bestModel.copy(extra)
         validationMetrics = list(self.validationMetrics)
+        assert self.subModels is not None
         subModels = [model.copy() for model in self.subModels]
         return self._copyValues(
             TrainValidationSplitModel(bestModel, validationMetrics, subModels), extra=extra
         )
 
     @since("2.3.0")
-    def write(self):
+    def write(self) -> MLWriter:
         """Returns an MLWriter instance for this ML instance."""
         if _ValidatorSharedReadWrite.is_java_convertible(self):
-            return JavaMLWriter(self)
+            return JavaMLWriter(self)  # type: ignore[arg-type]
         return TrainValidationSplitModelWriter(self)
 
     @classmethod
     @since("2.3.0")
-    def read(cls):
+    def read(cls) -> TrainValidationSplitModelReader:
         """Returns an MLReader instance for this class."""
         return TrainValidationSplitModelReader(cls)
 
     @classmethod
-    def _from_java(cls, java_stage):
+    def _from_java(cls, java_stage: "JavaObject") -> "TrainValidationSplitModel":
         """
         Given a Java TrainValidationSplitModel, create and return a Python wrapper of it.
         Used for ML persistence.
@@ -1541,7 +1653,9 @@ def _from_java(cls, java_stage):
 
         # Load information from java_stage to the instance.
         sc = SparkContext._active_spark_context
-        bestModel = JavaParams._from_java(java_stage.bestModel())
+        assert sc is not None
+
+        bestModel: Model = JavaParams._from_java(java_stage.bestModel())
         validationMetrics = _java2py(sc, java_stage.validationMetrics())
         estimator, epms, evaluator = super(TrainValidationSplitModel, cls)._from_java_impl(
             java_stage
@@ -1566,7 +1680,7 @@ def _from_java(cls, java_stage):
         py_stage._resetUid(java_stage.uid())
         return py_stage
 
-    def _to_java(self):
+    def _to_java(self) -> "JavaObject":
         """
         Transfer this instance to a Java TrainValidationSplitModel. Used for ML persistence.
 
@@ -1577,10 +1691,12 @@ def _to_java(self):
         """
 
         sc = SparkContext._active_spark_context
+        assert sc is not None
+
         _java_obj = JavaParams._new_java_obj(
             "org.apache.spark.ml.tuning.TrainValidationSplitModel",
             self.uid,
-            self.bestModel._to_java(),
+            cast(JavaParams, self.bestModel)._to_java(),
             _py2java(sc, self.validationMetrics),
         )
         estimator, epms, evaluator = super(TrainValidationSplitModel, self)._to_java_impl()
@@ -1598,7 +1714,9 @@ def _to_java(self):
             _java_obj.set(pair)
 
         if self.subModels is not None:
-            java_sub_models = [sub_model._to_java() for sub_model in self.subModels]
+            java_sub_models = [
+                cast(JavaParams, sub_model)._to_java() for sub_model in self.subModels
+            ]
             _java_obj.setSubModels(java_sub_models)
 
         return _java_obj
diff --git a/python/pyspark/ml/tuning.pyi b/python/pyspark/ml/tuning.pyi
deleted file mode 100644
index 25380591cba46..0000000000000
--- a/python/pyspark/ml/tuning.pyi
+++ /dev/null
@@ -1,228 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import overload
-from typing import Any, List, Optional, Tuple, Type
-from pyspark.ml._typing import ParamMap
-
-from pyspark.ml import Estimator, Model
-from pyspark.ml.evaluation import Evaluator
-from pyspark.ml.param import Param
-from pyspark.ml.param.shared import HasCollectSubModels, HasParallelism, HasSeed
-from pyspark.ml.util import MLReader, MLReadable, MLWriter, MLWritable
-from pyspark.sql import DataFrame
-
-class ParamGridBuilder:
-    def __init__(self) -> None: ...
-    def addGrid(self, param: Param, values: List[Any]) -> ParamGridBuilder: ...
-    @overload
-    def baseOn(self, __args: ParamMap) -> ParamGridBuilder: ...
-    @overload
-    def baseOn(self, *args: Tuple[Param, Any]) -> ParamGridBuilder: ...
-    def build(self) -> List[ParamMap]: ...
-
-class _ValidatorParams(HasSeed):
-    estimator: Param[Estimator]
-    estimatorParamMaps: Param[List[ParamMap]]
-    evaluator: Param[Evaluator]
-    def getEstimator(self) -> Estimator: ...
-    def getEstimatorParamMaps(self) -> List[ParamMap]: ...
-    def getEvaluator(self) -> Evaluator: ...
-
-class _CrossValidatorParams(_ValidatorParams):
-    numFolds: Param[int]
-    foldCol: Param[str]
-    def __init__(self, *args: Any): ...
-    def getNumFolds(self) -> int: ...
-    def getFoldCol(self) -> str: ...
-
-class CrossValidator(
-    Estimator[CrossValidatorModel],
-    _CrossValidatorParams,
-    HasParallelism,
-    HasCollectSubModels,
-    MLReadable[CrossValidator],
-    MLWritable,
-):
-    def __init__(
-        self,
-        *,
-        estimator: Optional[Estimator] = ...,
-        estimatorParamMaps: Optional[List[ParamMap]] = ...,
-        evaluator: Optional[Evaluator] = ...,
-        numFolds: int = ...,
-        seed: Optional[int] = ...,
-        parallelism: int = ...,
-        collectSubModels: bool = ...,
-        foldCol: str = ...,
-    ) -> None: ...
-    def setParams(
-        self,
-        *,
-        estimator: Optional[Estimator] = ...,
-        estimatorParamMaps: Optional[List[ParamMap]] = ...,
-        evaluator: Optional[Evaluator] = ...,
-        numFolds: int = ...,
-        seed: Optional[int] = ...,
-        parallelism: int = ...,
-        collectSubModels: bool = ...,
-        foldCol: str = ...,
-    ) -> CrossValidator: ...
-    def _fit(self, dataset: DataFrame) -> CrossValidatorModel: ...
-    def setEstimator(self, value: Estimator) -> CrossValidator: ...
-    def setEstimatorParamMaps(self, value: List[ParamMap]) -> CrossValidator: ...
-    def setEvaluator(self, value: Evaluator) -> CrossValidator: ...
-    def setNumFolds(self, value: int) -> CrossValidator: ...
-    def setFoldCol(self, value: str) -> CrossValidator: ...
-    def setSeed(self, value: int) -> CrossValidator: ...
-    def setParallelism(self, value: int) -> CrossValidator: ...
-    def setCollectSubModels(self, value: bool) -> CrossValidator: ...
-    def copy(self, extra: Optional[ParamMap] = ...) -> CrossValidator: ...
-    def write(self) -> MLWriter: ...
-    @classmethod
-    def read(cls: Type[CrossValidator]) -> MLReader: ...
-
-class CrossValidatorModel(
-    Model, _CrossValidatorParams, MLReadable[CrossValidatorModel], MLWritable
-):
-    bestModel: Model
-    avgMetrics: List[float]
-    subModels: List[List[Model]]
-    def __init__(
-        self,
-        bestModel: Model,
-        avgMetrics: Optional[List[float]] = ...,
-        subModels: Optional[List[List[Model]]] = ...,
-    ) -> None: ...
-    def _transform(self, dataset: DataFrame) -> DataFrame: ...
-    def copy(self, extra: Optional[ParamMap] = ...) -> CrossValidatorModel: ...
-    def write(self) -> MLWriter: ...
-    @classmethod
-    def read(cls: Type[CrossValidatorModel]) -> MLReader: ...
-
-class _TrainValidationSplitParams(_ValidatorParams):
-    trainRatio: Param[float]
-    def __init__(self, *args: Any): ...
-    def getTrainRatio(self) -> float: ...
-
-class TrainValidationSplit(
-    Estimator[TrainValidationSplitModel],
-    _TrainValidationSplitParams,
-    HasParallelism,
-    HasCollectSubModels,
-    MLReadable[TrainValidationSplit],
-    MLWritable,
-):
-    def __init__(
-        self,
-        *,
-        estimator: Optional[Estimator] = ...,
-        estimatorParamMaps: Optional[List[ParamMap]] = ...,
-        evaluator: Optional[Evaluator] = ...,
-        trainRatio: float = ...,
-        parallelism: int = ...,
-        collectSubModels: bool = ...,
-        seed: Optional[int] = ...,
-    ) -> None: ...
-    def setParams(
-        self,
-        *,
-        estimator: Optional[Estimator] = ...,
-        estimatorParamMaps: Optional[List[ParamMap]] = ...,
-        evaluator: Optional[Evaluator] = ...,
-        trainRatio: float = ...,
-        parallelism: int = ...,
-        collectSubModels: bool = ...,
-        seed: Optional[int] = ...,
-    ) -> TrainValidationSplit: ...
-    def _fit(self, dataset: DataFrame) -> TrainValidationSplitModel: ...
-    def setEstimator(self, value: Estimator) -> TrainValidationSplit: ...
-    def setEstimatorParamMaps(self, value: List[ParamMap]) -> TrainValidationSplit: ...
-    def setEvaluator(self, value: Evaluator) -> TrainValidationSplit: ...
-    def setTrainRatio(self, value: float) -> TrainValidationSplit: ...
-    def setSeed(self, value: int) -> TrainValidationSplit: ...
-    def setParallelism(self, value: int) -> TrainValidationSplit: ...
-    def setCollectSubModels(self, value: bool) -> TrainValidationSplit: ...
-    def copy(self, extra: Optional[ParamMap] = ...) -> TrainValidationSplit: ...
-    def write(self) -> MLWriter: ...
-    @classmethod
-    def read(cls: Type[TrainValidationSplit]) -> MLReader: ...
-
-class TrainValidationSplitModel(
-    Model,
-    _TrainValidationSplitParams,
-    MLReadable[TrainValidationSplitModel],
-    MLWritable,
-):
-    bestModel: Model
-    validationMetrics: List[float]
-    subModels: List[Model]
-    def __init__(
-        self,
-        bestModel: Model,
-        validationMetrics: Optional[List[float]] = ...,
-        subModels: Optional[List[Model]] = ...,
-    ) -> None: ...
-    def _transform(self, dataset: DataFrame) -> DataFrame: ...
-    def setEstimator(self, value: Estimator) -> TrainValidationSplitModel: ...
-    def setEstimatorParamMaps(self, value: List[ParamMap]) -> TrainValidationSplitModel: ...
-    def setEvaluator(self, value: Evaluator) -> TrainValidationSplitModel: ...
-    def copy(self, extra: Optional[ParamMap] = ...) -> TrainValidationSplitModel: ...
-    def write(self) -> MLWriter: ...
-    @classmethod
-    def read(cls: Type[TrainValidationSplitModel]) -> MLReader: ...
-
-class CrossValidatorWriter(MLWriter):
-    instance: CrossValidator
-    def __init__(self, instance: CrossValidator) -> None: ...
-    def saveImpl(self, path: str) -> None: ...
-
-class CrossValidatorReader(MLReader[CrossValidator]):
-    cls: Type[CrossValidator]
-    def __init__(self, cls: Type[CrossValidator]) -> None: ...
-    def load(self, path: str) -> CrossValidator: ...
-
-class CrossValidatorModelWriter(MLWriter):
-    instance: CrossValidatorModel
-    def __init__(self, instance: CrossValidatorModel) -> None: ...
-    def saveImpl(self, path: str) -> None: ...
-
-class CrossValidatorModelReader(MLReader[CrossValidatorModel]):
-    cls: Type[CrossValidatorModel]
-    def __init__(self, cls: Type[CrossValidatorModel]) -> None: ...
-    def load(self, path: str) -> CrossValidatorModel: ...
-
-class TrainValidationSplitWriter(MLWriter):
-    instance: TrainValidationSplit
-    def __init__(self, instance: TrainValidationSplit) -> None: ...
-    def saveImpl(self, path: str) -> None: ...
-
-class TrainValidationSplitReader(MLReader[TrainValidationSplit]):
-    cls: Type[TrainValidationSplit]
-    def __init__(self, cls: Type[TrainValidationSplit]) -> None: ...
-    def load(self, path: str) -> TrainValidationSplit: ...
-
-class TrainValidationSplitModelWriter(MLWriter):
-    instance: TrainValidationSplitModel
-    def __init__(self, instance: TrainValidationSplitModel) -> None: ...
-    def saveImpl(self, path: str) -> None: ...
-
-class TrainValidationSplitModelReader(MLReader[TrainValidationSplitModel]):
-    cls: Type[TrainValidationSplitModel]
-    def __init__(self, cls: Type[TrainValidationSplitModel]) -> None: ...
-    def load(self, path: str) -> TrainValidationSplitModel: ...
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index 019420bc3684e..d7c9bd24bf39f 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -31,7 +31,6 @@
 if TYPE_CHECKING:
     from py4j.java_gateway import JavaGateway, JavaObject
     from pyspark.ml._typing import PipelineStage
-
     from pyspark.ml.base import Params
     from pyspark.ml.wrapper import JavaWrapper
 
@@ -683,14 +682,14 @@ def isMetaEstimator(pyInstance: Any) -> bool:
         )
 
     @staticmethod
-    def getAllNestedStages(pyInstance: Any) -> List["PipelineStage"]:
+    def getAllNestedStages(pyInstance: Any) -> List["Params"]:
         from pyspark.ml import Pipeline, PipelineModel
         from pyspark.ml.tuning import _ValidatorParams
         from pyspark.ml.classification import OneVsRest, OneVsRestModel
 
         # TODO: We need to handle `RFormulaModel.pipelineModel` here after Pyspark RFormulaModel
         #  support pipelineModel property.
-        pySubStages: List["PipelineStage"]
+        pySubStages: Sequence["Params"]
 
         if isinstance(pyInstance, Pipeline):
             pySubStages = pyInstance.getStages()
@@ -714,7 +713,7 @@ def getAllNestedStages(pyInstance: Any) -> List["PipelineStage"]:
         return [pyInstance] + nestedStages
 
     @staticmethod
-    def getUidMap(instance: Any) -> Dict[str, "PipelineStage"]:
+    def getUidMap(instance: Any) -> Dict[str, "Params"]:
         nestedStages = MetaAlgorithmReadWrite.getAllNestedStages(instance)
         uidMap = {stage.uid: stage for stage in nestedStages}
         if len(nestedStages) != len(uidMap):

From de5e45ab5d5963305be6471ab97890408a88c660 Mon Sep 17 00:00:00 2001
From: Kazuyuki Tanimura <ktanimura@apple.com>
Date: Tue, 8 Feb 2022 20:20:36 -0800
Subject: [PATCH 185/513] [SPARK-38086][SQL] Make ArrowColumnVector Extendable

### What changes were proposed in this pull request?
This PR proposes to make ArrowColumnVector extendable by relaxing access modifier restrictions

### Why are the changes needed?
Some Spark extension libraries need to extend ArrowColumnVector.java.

It is impossible extend ArrowColumnVector class for now because the class is final and the accessors are all private.
Proposing to relax private/final restrictions to make ArrowColumnVector extendable.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing tests.

Closes #35393 from kazuyukitanimura/SPARK-38086.

Authored-by: Kazuyuki Tanimura <ktanimura@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/vectorized/ArrowColumnVector.java     | 59 +++++++++++--------
 .../vectorized/ArrowColumnVectorSuite.scala   | 31 ++++++++++
 2 files changed, 65 insertions(+), 25 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
index 89daee1cbbfc7..fe60605525ae4 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
@@ -21,6 +21,7 @@
 import org.apache.arrow.vector.complex.*;
 import org.apache.arrow.vector.holders.NullableVarCharHolder;
 
+import org.apache.spark.annotation.DeveloperApi;
 import org.apache.spark.sql.util.ArrowUtils;
 import org.apache.spark.sql.types.*;
 import org.apache.spark.unsafe.types.UTF8String;
@@ -28,10 +29,11 @@
 /**
  * A column vector backed by Apache Arrow.
  */
-public final class ArrowColumnVector extends ColumnVector {
+@DeveloperApi
+public class ArrowColumnVector extends ColumnVector {
 
-  private final ArrowVectorAccessor accessor;
-  private ArrowColumnVector[] childColumns;
+  ArrowVectorAccessor accessor;
+  ArrowColumnVector[] childColumns;
 
   public ValueVector getValueVector() { return accessor.vector; }
 
@@ -130,9 +132,16 @@ public ColumnarMap getMap(int rowId) {
   @Override
   public ArrowColumnVector getChild(int ordinal) { return childColumns[ordinal]; }
 
+  ArrowColumnVector(DataType type) {
+     super(type);
+  }
+
   public ArrowColumnVector(ValueVector vector) {
-    super(ArrowUtils.fromArrowField(vector.getField()));
+    this(ArrowUtils.fromArrowField(vector.getField()));
+    initAccessor(vector);
+  }
 
+  void initAccessor(ValueVector vector) {
     if (vector instanceof BitVector) {
       accessor = new BooleanAccessor((BitVector) vector);
     } else if (vector instanceof TinyIntVector) {
@@ -184,9 +193,9 @@ public ArrowColumnVector(ValueVector vector) {
     }
   }
 
-  private abstract static class ArrowVectorAccessor {
+  abstract static class ArrowVectorAccessor {
 
-    private final ValueVector vector;
+    final ValueVector vector;
 
     ArrowVectorAccessor(ValueVector vector) {
       this.vector = vector;
@@ -254,7 +263,7 @@ ColumnarMap getMap(int rowId) {
     }
   }
 
-  private static class BooleanAccessor extends ArrowVectorAccessor {
+  static class BooleanAccessor extends ArrowVectorAccessor {
 
     private final BitVector accessor;
 
@@ -269,7 +278,7 @@ final boolean getBoolean(int rowId) {
     }
   }
 
-  private static class ByteAccessor extends ArrowVectorAccessor {
+  static class ByteAccessor extends ArrowVectorAccessor {
 
     private final TinyIntVector accessor;
 
@@ -284,7 +293,7 @@ final byte getByte(int rowId) {
     }
   }
 
-  private static class ShortAccessor extends ArrowVectorAccessor {
+  static class ShortAccessor extends ArrowVectorAccessor {
 
     private final SmallIntVector accessor;
 
@@ -299,7 +308,7 @@ final short getShort(int rowId) {
     }
   }
 
-  private static class IntAccessor extends ArrowVectorAccessor {
+  static class IntAccessor extends ArrowVectorAccessor {
 
     private final IntVector accessor;
 
@@ -314,7 +323,7 @@ final int getInt(int rowId) {
     }
   }
 
-  private static class LongAccessor extends ArrowVectorAccessor {
+  static class LongAccessor extends ArrowVectorAccessor {
 
     private final BigIntVector accessor;
 
@@ -329,7 +338,7 @@ final long getLong(int rowId) {
     }
   }
 
-  private static class FloatAccessor extends ArrowVectorAccessor {
+  static class FloatAccessor extends ArrowVectorAccessor {
 
     private final Float4Vector accessor;
 
@@ -344,7 +353,7 @@ final float getFloat(int rowId) {
     }
   }
 
-  private static class DoubleAccessor extends ArrowVectorAccessor {
+  static class DoubleAccessor extends ArrowVectorAccessor {
 
     private final Float8Vector accessor;
 
@@ -359,7 +368,7 @@ final double getDouble(int rowId) {
     }
   }
 
-  private static class DecimalAccessor extends ArrowVectorAccessor {
+  static class DecimalAccessor extends ArrowVectorAccessor {
 
     private final DecimalVector accessor;
 
@@ -375,7 +384,7 @@ final Decimal getDecimal(int rowId, int precision, int scale) {
     }
   }
 
-  private static class StringAccessor extends ArrowVectorAccessor {
+  static class StringAccessor extends ArrowVectorAccessor {
 
     private final VarCharVector accessor;
     private final NullableVarCharHolder stringResult = new NullableVarCharHolder();
@@ -398,7 +407,7 @@ final UTF8String getUTF8String(int rowId) {
     }
   }
 
-  private static class BinaryAccessor extends ArrowVectorAccessor {
+  static class BinaryAccessor extends ArrowVectorAccessor {
 
     private final VarBinaryVector accessor;
 
@@ -413,7 +422,7 @@ final byte[] getBinary(int rowId) {
     }
   }
 
-  private static class DateAccessor extends ArrowVectorAccessor {
+  static class DateAccessor extends ArrowVectorAccessor {
 
     private final DateDayVector accessor;
 
@@ -428,7 +437,7 @@ final int getInt(int rowId) {
     }
   }
 
-  private static class TimestampAccessor extends ArrowVectorAccessor {
+  static class TimestampAccessor extends ArrowVectorAccessor {
 
     private final TimeStampMicroTZVector accessor;
 
@@ -443,7 +452,7 @@ final long getLong(int rowId) {
     }
   }
 
-  private static class TimestampNTZAccessor extends ArrowVectorAccessor {
+  static class TimestampNTZAccessor extends ArrowVectorAccessor {
 
     private final TimeStampMicroVector accessor;
 
@@ -458,7 +467,7 @@ final long getLong(int rowId) {
     }
   }
 
-  private static class ArrayAccessor extends ArrowVectorAccessor {
+  static class ArrayAccessor extends ArrowVectorAccessor {
 
     private final ListVector accessor;
     private final ArrowColumnVector arrayData;
@@ -495,14 +504,14 @@ final ColumnarArray getArray(int rowId) {
    * bug in the code.
    *
    */
-  private static class StructAccessor extends ArrowVectorAccessor {
+  static class StructAccessor extends ArrowVectorAccessor {
 
     StructAccessor(StructVector vector) {
       super(vector);
     }
   }
 
-  private static class MapAccessor extends ArrowVectorAccessor {
+  static class MapAccessor extends ArrowVectorAccessor {
     private final MapVector accessor;
     private final ArrowColumnVector keys;
     private final ArrowColumnVector values;
@@ -524,14 +533,14 @@ final ColumnarMap getMap(int rowId) {
     }
   }
 
-  private static class NullAccessor extends ArrowVectorAccessor {
+  static class NullAccessor extends ArrowVectorAccessor {
 
     NullAccessor(NullVector vector) {
       super(vector);
     }
   }
 
-  private static class IntervalYearAccessor extends ArrowVectorAccessor {
+  static class IntervalYearAccessor extends ArrowVectorAccessor {
 
     private final IntervalYearVector accessor;
 
@@ -546,7 +555,7 @@ int getInt(int rowId) {
     }
   }
 
-  private static class DurationAccessor extends ArrowVectorAccessor {
+  static class DurationAccessor extends ArrowVectorAccessor {
 
     private final DurationVector accessor;
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/vectorized/ArrowColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/vectorized/ArrowColumnVectorSuite.scala
index dec10e061d737..25beda99cd654 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/vectorized/ArrowColumnVectorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/vectorized/ArrowColumnVectorSuite.scala
@@ -430,4 +430,35 @@ class ArrowColumnVectorSuite extends SparkFunSuite {
     columnVector.close()
     allocator.close()
   }
+
+  test ("SPARK-38086: subclassing") {
+    class ChildArrowColumnVector(vector: ValueVector, n: Int)
+      extends ArrowColumnVector(vector: ValueVector) {
+
+      override def getValueVector: ValueVector = accessor.vector
+      override def getInt(rowId: Int): Int = accessor.getInt(rowId) + n
+    }
+
+    val allocator = ArrowUtils.rootAllocator.newChildAllocator("int", 0, Long.MaxValue)
+    val vector = ArrowUtils.toArrowField("int", IntegerType, nullable = true, null)
+      .createVector(allocator).asInstanceOf[IntVector]
+    vector.allocateNew()
+
+    (0 until 10).foreach { i =>
+      vector.setSafe(i, i)
+    }
+
+    val columnVector = new ChildArrowColumnVector(vector, 1)
+    assert(columnVector.dataType === IntegerType)
+    assert(!columnVector.hasNull)
+
+    val intVector = columnVector.getValueVector.asInstanceOf[IntVector]
+    (0 until 10).foreach { i =>
+      assert(columnVector.getInt(i) === i + 1)
+      assert(intVector.get(i) === i)
+    }
+
+    columnVector.close()
+    allocator.close()
+  }
 }

From fdda0ebc752e6c8d9d71a6a83454c9c072c6d743 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 8 Feb 2022 20:56:11 -0800
Subject: [PATCH 186/513] [SPARK-38147][BUILD][MLLIB] Upgrade `shapeless` to
 2.3.7

### What changes were proposed in this pull request?

This PR aims to upgrade `shapeless` to 2.3.7.

### Why are the changes needed?

This will bring the latest bug fixes.
- https://github.com/milessabin/shapeless/releases/tag/v2.3.7 (Released on May 16, 2021)
- https://github.com/milessabin/shapeless/releases/tag/v2.3.6 (This is recommended to skip.)
- https://github.com/milessabin/shapeless/releases/tag/v2.3.5 (This is recommended to skip.)
- https://github.com/milessabin/shapeless/releases/tag/v2.3.4

### Does this PR introduce _any_ user-facing change?

No. `v2.3.7` is backward binary-compatible with `v2.3.3`.
- https://github.com/milessabin/shapeless/releases/tag/v2.3.7

### How was this patch tested?

Pass the CIs.

Closes #35450 from dongjoon-hyun/SPARK-38147.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2-hive-2.3 | 3 +--
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 3 +--
 pom.xml                               | 5 +++++
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index 8284237904765..63e3f8716cfa0 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -193,7 +193,6 @@ log4j-core/2.17.1//log4j-core-2.17.1.jar
 log4j-slf4j-impl/2.17.1//log4j-slf4j-impl-2.17.1.jar
 logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
 lz4-java/1.8.0//lz4-java-1.8.0.jar
-macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar
 mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar
 metrics-core/4.2.7//metrics-core-4.2.7.jar
 metrics-graphite/4.2.7//metrics-graphite-4.2.7.jar
@@ -243,7 +242,7 @@ scala-library/2.12.15//scala-library-2.12.15.jar
 scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar
 scala-reflect/2.12.15//scala-reflect-2.12.15.jar
 scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar
-shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar
+shapeless_2.12/2.3.7//shapeless_2.12-2.3.7.jar
 shims/0.9.23//shims-0.9.23.jar
 slf4j-api/1.7.32//slf4j-api-1.7.32.jar
 snakeyaml/1.28//snakeyaml-1.28.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index f1692777dadb1..88cd56029f3db 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -179,7 +179,6 @@ log4j-core/2.17.1//log4j-core-2.17.1.jar
 log4j-slf4j-impl/2.17.1//log4j-slf4j-impl-2.17.1.jar
 logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
 lz4-java/1.8.0//lz4-java-1.8.0.jar
-macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar
 mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar
 metrics-core/4.2.7//metrics-core-4.2.7.jar
 metrics-graphite/4.2.7//metrics-graphite-4.2.7.jar
@@ -229,7 +228,7 @@ scala-library/2.12.15//scala-library-2.12.15.jar
 scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar
 scala-reflect/2.12.15//scala-reflect-2.12.15.jar
 scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar
-shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar
+shapeless_2.12/2.3.7//shapeless_2.12-2.3.7.jar
 shims/0.9.23//shims-0.9.23.jar
 slf4j-api/1.7.32//slf4j-api-1.7.32.jar
 snakeyaml/1.28//snakeyaml-1.28.jar
diff --git a/pom.xml b/pom.xml
index f8f13fc77f65a..496c370c3b0a6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1026,6 +1026,11 @@
           </exclusion>
         </exclusions>
       </dependency>
+      <dependency>
+        <groupId>com.chuusai</groupId>
+        <artifactId>shapeless_${scala.binary.version}</artifactId>
+        <version>2.3.7</version>
+      </dependency>
       <dependency>
         <groupId>org.json4s</groupId>
         <artifactId>json4s-jackson_${scala.binary.version}</artifactId>

From 8a559b317f60e2630717a01424a7663e451fb3ce Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Tue, 8 Feb 2022 20:57:12 -0800
Subject: [PATCH 187/513] [SPARK-38149][BUILD] Upgrade joda-time to 2.10.13

### What changes were proposed in this pull request?

This PR proposes to upgrade `joda-time` to `2.10.13`.

### Why are the changes needed?

`joda-time 2.10.13` was released, which supports the latest TZ database of 2021e.
https://github.com/JodaOrg/joda-time/compare/v2.10.12...v2.10.13

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

CIs.

Closes #35452 from sarutak/upgrade-joda-2.10.13.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +-
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +-
 pom.xml                               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index 63e3f8716cfa0..1925ae0a92b5d 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -149,7 +149,7 @@ jetty-util/6.1.26//jetty-util-6.1.26.jar
 jetty-util/9.4.44.v20210927//jetty-util-9.4.44.v20210927.jar
 jetty/6.1.26//jetty-6.1.26.jar
 jline/2.14.6//jline-2.14.6.jar
-joda-time/2.10.12//joda-time-2.10.12.jar
+joda-time/2.10.13//joda-time-2.10.13.jar
 jodd-core/3.5.2//jodd-core-3.5.2.jar
 jpam/1.1//jpam-1.1.jar
 json/1.8//json-1.8.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 88cd56029f3db..dad4817239ee3 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -136,7 +136,7 @@ jettison/1.1//jettison-1.1.jar
 jetty-util-ajax/9.4.44.v20210927//jetty-util-ajax-9.4.44.v20210927.jar
 jetty-util/9.4.44.v20210927//jetty-util-9.4.44.v20210927.jar
 jline/2.14.6//jline-2.14.6.jar
-joda-time/2.10.12//joda-time-2.10.12.jar
+joda-time/2.10.13//joda-time-2.10.13.jar
 jodd-core/3.5.2//jodd-core-3.5.2.jar
 jpam/1.1//jpam-1.1.jar
 json/1.8//json-1.8.jar
diff --git a/pom.xml b/pom.xml
index 496c370c3b0a6..4e1e5cc70b287 100644
--- a/pom.xml
+++ b/pom.xml
@@ -184,7 +184,7 @@
     <guava.version>14.0.1</guava.version>
     <janino.version>3.0.16</janino.version>
     <jersey.version>2.34</jersey.version>
-    <joda.version>2.10.12</joda.version>
+    <joda.version>2.10.13</joda.version>
     <jodd.version>3.5.2</jodd.version>
     <jsr305.version>3.0.0</jsr305.version>
     <libthrift.version>0.12.0</libthrift.version>

From 69c2c34e80a21927c3ca3664c477748ff47a3804 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Tue, 8 Feb 2022 23:01:24 -0800
Subject: [PATCH 188/513] [SPARK-38153][BUILD] Remove option
 newlines.topLevelStatements in scalafmt.conf

### What changes were proposed in this pull request?

Remove option newlines.topLevelStatements in scalafmt.conf

### Why are the changes needed?

The configuration
```
newlines.topLevelStatements = [before,after]
```
is to add a blank line before the first member or after the last member of the class. This is neither encouraged nor discouraged as per https://github.com/databricks/scala-style-guide#blanklines

**Without the conf, scalafmt will still add blank lines between consecutive members (or initializers) of a class.**

As I tried running the script `./dev/scalafmt`, I saw unnessary blank lines
![image](https://user-images.githubusercontent.com/1097932/153122925-3238f15c-312b-4973-8e2d-92978bf5c6ad.png)

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manual test

Closes #35455 from gengliangwang/removeToplevelLine.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/.scalafmt.conf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dev/.scalafmt.conf b/dev/.scalafmt.conf
index 9598540752ebd..d2196e601aa2d 100644
--- a/dev/.scalafmt.conf
+++ b/dev/.scalafmt.conf
@@ -25,4 +25,3 @@ optIn = {
 danglingParentheses = false
 docstrings = JavaDoc
 maxColumn = 98
-newlines.topLevelStatements = [before,after]

From dee294b453b550471028fdbd9e17952963504a3a Mon Sep 17 00:00:00 2001
From: kuwii <kuwii.someone@gmail.com>
Date: Wed, 9 Feb 2022 16:59:38 +0900
Subject: [PATCH 189/513] [SPARK-38056][WEB UI] Fix issue of Structured
 streaming not working in history server when using LevelDB

### What changes were proposed in this pull request?

Change type of `org.apache.spark.sql.streaming.ui.StreamingQueryData.runId` from `UUID` to `String`.

### Why are the changes needed?

In [SPARK-31953](https://github.com/apache/spark/commit/4f9667035886a67e6c9a4e8fad2efa390e87ca68), structured streaming support is added in history server. However this does not work when history server is using LevelDB instead of in-memory KV store.

- Level DB does not support `UUID` as key.
- If `spark.history.store.path` is set in history server to use Level DB, when writing info to the store during replaying events, error will occur.
- `StreamingQueryStatusListener` will throw exceptions when writing info, saying `java.lang.IllegalArgumentException: Type java.util.UUID not allowed as key.`.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added tests in `StreamingQueryStatusListenerSuite` to test whether `StreamingQueryData` can be successfully written to in-memory store,  LevelDB and RocksDB.

Closes #35356 from kuwii/hs-streaming-fix.

Authored-by: kuwii <kuwii.someone@gmail.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../ui/StreamingQueryStatisticsPage.scala     |  4 +-
 .../ui/StreamingQueryStatusListener.scala     |  6 +-
 .../ui/StreamingQueryPageSuite.scala          |  2 +-
 .../StreamingQueryStatusListenerSuite.scala   | 64 ++++++++++++++++---
 4 files changed, 62 insertions(+), 14 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
index 97691d9d7e827..e13ac4e487c95 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.streaming.ui
 
 import java.{util => ju}
 import java.lang.{Long => JLong}
-import java.util.{Locale, UUID}
+import java.util.Locale
 import javax.servlet.http.HttpServletRequest
 
 import scala.collection.JavaConverters._
@@ -59,7 +59,7 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
     require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
 
     val query = parent.store.allQueryUIData.find { uiData =>
-      uiData.summary.runId.equals(UUID.fromString(parameterId))
+      uiData.summary.runId.equals(parameterId)
     }.getOrElse(throw new IllegalArgumentException(s"Failed to find streaming query $parameterId"))
 
     val resources = generateLoadResources(request)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala
index fdd3754344108..b59ec0477d5d4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala
@@ -75,7 +75,7 @@ private[sql] class StreamingQueryStatusListener(
     store.write(new StreamingQueryData(
       event.name,
       event.id,
-      event.runId,
+      event.runId.toString,
       isActive = true,
       None,
       startTimestamp
@@ -100,7 +100,7 @@ private[sql] class StreamingQueryStatusListener(
 
   override def onQueryTerminated(
       event: StreamingQueryListener.QueryTerminatedEvent): Unit = {
-    val querySummary = store.read(classOf[StreamingQueryData], event.runId)
+    val querySummary = store.read(classOf[StreamingQueryData], event.runId.toString)
     val curTime = System.currentTimeMillis()
     store.write(new StreamingQueryData(
       querySummary.name,
@@ -118,7 +118,7 @@ private[sql] class StreamingQueryStatusListener(
 private[sql] class StreamingQueryData(
     val name: String,
     val id: UUID,
-    @KVIndexParam val runId: UUID,
+    @KVIndexParam val runId: String,
     @KVIndexParam("active") val isActive: Boolean,
     val exception: Option[String],
     @KVIndexParam("startTimestamp") val startTimestamp: Long,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala
index 246fa1f7c9184..78ade6a1eef36 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala
@@ -103,7 +103,7 @@ class StreamingQueryPageSuite extends SharedSparkSession with BeforeAndAfter {
     when(summary.isActive).thenReturn(true)
     when(summary.name).thenReturn("query")
     when(summary.id).thenReturn(id)
-    when(summary.runId).thenReturn(id)
+    when(summary.runId).thenReturn(id.toString)
     when(summary.startTimestamp).thenReturn(1L)
     when(summary.exception).thenReturn(None)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala
index 91c55d5598a6b..eee1a7c5ff3cd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala
@@ -28,8 +28,9 @@ import org.apache.spark.sql.execution.ui.StreamingQueryStatusStore
 import org.apache.spark.sql.internal.StaticSQLConf
 import org.apache.spark.sql.streaming.{StreamingQueryListener, StreamingQueryProgress, StreamTest}
 import org.apache.spark.sql.streaming
-import org.apache.spark.status.ElementTrackingStore
-import org.apache.spark.util.kvstore.InMemoryStore
+import org.apache.spark.status.{ElementTrackingStore, KVUtils}
+import org.apache.spark.util.Utils
+import org.apache.spark.util.kvstore.{InMemoryStore, KVStore, RocksDB}
 
 class StreamingQueryStatusListenerSuite extends StreamTest {
 
@@ -48,7 +49,7 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
     // result checking
     assert(queryStore.allQueryUIData.count(_.summary.isActive) == 1)
     assert(queryStore.allQueryUIData.filter(_.summary.isActive).exists(uiData =>
-      uiData.summary.runId == runId && uiData.summary.name.equals("test")))
+      uiData.summary.runId == runId.toString && uiData.summary.name.equals("test")))
 
     // handle query progress event
     val progress = mock(classOf[StreamingQueryProgress], RETURNS_SMART_NULLS)
@@ -64,7 +65,7 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
 
     // result checking
     val activeQuery =
-      queryStore.allQueryUIData.filter(_.summary.isActive).find(_.summary.runId == runId)
+      queryStore.allQueryUIData.filter(_.summary.isActive).find(_.summary.runId == runId.toString)
     assert(activeQuery.isDefined)
     assert(activeQuery.get.summary.isActive)
     assert(activeQuery.get.recentProgress.length == 1)
@@ -81,7 +82,8 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
     listener.onQueryTerminated(terminateEvent)
 
     assert(!queryStore.allQueryUIData.filterNot(_.summary.isActive).head.summary.isActive)
-    assert(queryStore.allQueryUIData.filterNot(_.summary.isActive).head.summary.runId == runId)
+    assert(
+      queryStore.allQueryUIData.filterNot(_.summary.isActive).head.summary.runId == runId.toString)
     assert(queryStore.allQueryUIData.filterNot(_.summary.isActive).head.summary.id == id)
   }
 
@@ -110,10 +112,12 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
     // result checking
     assert(queryStore.allQueryUIData.count(_.summary.isActive) == 1)
     assert(queryStore.allQueryUIData.filterNot(_.summary.isActive).length == 1)
-    assert(queryStore.allQueryUIData.filter(_.summary.isActive).exists(_.summary.runId == runId1))
+    assert(queryStore.allQueryUIData.filter(_.summary.isActive).exists(
+      _.summary.runId == runId1.toString))
     assert(queryStore.allQueryUIData.filter(_.summary.isActive).exists(uiData =>
-      uiData.summary.runId == runId1 && uiData.summary.id == id))
-    assert(queryStore.allQueryUIData.filterNot(_.summary.isActive).head.summary.runId == runId0)
+      uiData.summary.runId == runId1.toString && uiData.summary.id == id))
+    assert(
+      queryStore.allQueryUIData.filterNot(_.summary.isActive).head.summary.runId == runId0.toString)
     assert(queryStore.allQueryUIData.filterNot(_.summary.isActive).head.summary.id == id)
   }
 
@@ -210,4 +214,48 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
     addQueryProgress()
     checkQueryProcessData(5)
   }
+
+  test("SPARK-38056: test writing StreamingQueryData to an in-memory store") {
+    testStreamingQueryData(new InMemoryStore())
+  }
+
+  test("SPARK-38056: test writing StreamingQueryData to a LevelDB store") {
+    assume(!Utils.isMacOnAppleSilicon)
+    val testDir = Utils.createTempDir()
+    val kvStore = KVUtils.open(testDir, getClass.getName)
+    try {
+      testStreamingQueryData(kvStore)
+    } finally {
+      kvStore.close()
+      Utils.deleteRecursively(testDir)
+    }
+  }
+
+  test("SPARK-38056: test writing StreamingQueryData to a RocksDB store") {
+    assume(!Utils.isMacOnAppleSilicon)
+    val testDir = Utils.createTempDir()
+    val kvStore = new RocksDB(testDir)
+    try {
+      testStreamingQueryData(kvStore)
+    } finally {
+      kvStore.close()
+      Utils.deleteRecursively(testDir)
+    }
+  }
+
+  private def testStreamingQueryData(kvStore: KVStore): Unit = {
+    val id = UUID.randomUUID()
+    val testData = new StreamingQueryData(
+      "some-query",
+      id,
+      id.toString,
+      isActive = false,
+      None,
+      1L,
+      None
+    )
+    val store = new ElementTrackingStore(kvStore, sparkConf)
+    store.write(testData)
+    store.close(closeParent = false)
+  }
 }

From 2ba8a4e263933e7500cbc7c38badb6cb059803c9 Mon Sep 17 00:00:00 2001
From: khalidmammadov <xmamedov@hotmail.com>
Date: Wed, 9 Feb 2022 16:54:31 +0800
Subject: [PATCH 190/513] [SPARK-38120][SQL] Fix
 HiveExternalCatalog.listPartitions when partition column name is upper case
 and dot in partition value

### What changes were proposed in this pull request?

HiveExternalCatalog.listPartitions method call is failing when a partition column name is upper case and partition value contains dot. It's related to this change https://github.com/apache/spark/commit/f18b905f6cace7686ef169fda7de474079d0af23

The test case in that PR does not produce the issue as partition column name is lower case.

This change will lowercase the partition column name during comparison to produce expected result, it's is inline with the actual spec transformation i.e. making it lower case for Hive and using the same function

Below how to reproduce the issue:
```
Using Scala version 2.12.15 (OpenJDK 64-Bit Server VM, Java 1.8.0_312)
Type in expressions to have them evaluated.
Type :help for more information.

scala> import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.TableIdentifier

scala> spark.sql("CREATE TABLE customer(id INT, name STRING) PARTITIONED BY (partCol1 STRING, partCol2 STRING)")
22/02/06 21:10:45 WARN ResolveSessionCatalog: A Hive serde table will be created as there is no table provider specified. You can set spark.sql.legacy.createHiveTableByDefault to false so that native data source table will be created instead.
res0: org.apache.spark.sql.DataFrame = []

scala> spark.sql("INSERT INTO customer PARTITION (partCol1 = 'CA', partCol2 = 'i.j') VALUES (100, 'John')")
res1: org.apache.spark.sql.DataFrame = []

scala> spark.sessionState.catalog.listPartitions(TableIdentifier("customer"), Some(Map("partCol2" -> "i.j"))).foreach(println)
java.util.NoSuchElementException: key not found: partcol2
  at scala.collection.immutable.Map$Map2.apply(Map.scala:227)
  at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.$anonfun$isPartialPartitionSpec$1(ExternalCatalogUtils.scala:205)
  at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.$anonfun$isPartialPartitionSpec$1$adapted(ExternalCatalogUtils.scala:202)
  at scala.collection.immutable.Map$Map1.forall(Map.scala:196)
  at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.isPartialPartitionSpec(ExternalCatalogUtils.scala:202)
  at org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$listPartitions$6(HiveExternalCatalog.scala:1312)
  at org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$listPartitions$6$adapted(HiveExternalCatalog.scala:1312)
  at scala.collection.TraversableLike.$anonfun$filterImpl$1(TraversableLike.scala:304)
  at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
  at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
  at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
  at scala.collection.TraversableLike.filterImpl(TraversableLike.scala:303)
  at scala.collection.TraversableLike.filterImpl$(TraversableLike.scala:297)
  at scala.collection.AbstractTraversable.filterImpl(Traversable.scala:108)
  at scala.collection.TraversableLike.filter(TraversableLike.scala:395)
  at scala.collection.TraversableLike.filter$(TraversableLike.scala:395)
  at scala.collection.AbstractTraversable.filter(Traversable.scala:108)
  at org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$listPartitions$1(HiveExternalCatalog.scala:1312)
  at org.apache.spark.sql.hive.HiveExternalCatalog.withClientWrappingException(HiveExternalCatalog.scala:114)
  at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:103)
  at org.apache.spark.sql.hive.HiveExternalCatalog.listPartitions(HiveExternalCatalog.scala:1296)
  at org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener.listPartitions(ExternalCatalogWithListener.scala:254)
  at org.apache.spark.sql.catalyst.catalog.SessionCatalog.listPartitions(SessionCatalog.scala:1251)
  ... 47 elided

*******AFTER FIX*********

scala> import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.TableIdentifier

scala> spark.sql("CREATE TABLE customer(id INT, name STRING) PARTITIONED BY (partCol1 STRING, partCol2 STRING)")
22/02/06 22:08:11 WARN ResolveSessionCatalog: A Hive serde table will be created as there is no table provider specified. You can set spark.sql.legacy.createHiveTableByDefault to false so that native data source table will be created instead.
res1: org.apache.spark.sql.DataFrame = []

scala> spark.sql("INSERT INTO customer PARTITION (partCol1 = 'CA', partCol2 = 'i.j') VALUES (100, 'John')")
res2: org.apache.spark.sql.DataFrame = []

scala> spark.sessionState.catalog.listPartitions(TableIdentifier("customer"), Some(Map("partCol2" -> "i.j"))).foreach(println)
CatalogPartition(
	Partition Values: [partCol1=CA, partCol2=i.j]
	Location: file:/home/khalid/dev/oss/test/spark-warehouse/customer/partcol1=CA/partcol2=i.j
	Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
	InputFormat: org.apache.hadoop.mapred.TextInputFormat
	OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
	Storage Properties: [serialization.format=1]
	Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1644185314, totalSize=9, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
	Created Time: Sun Feb 06 22:08:34 GMT 2022
	Last Access: UNKNOWN
	Partition Statistics: 9 bytes)

```

### Why are the changes needed?
It fixes the bug

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?

`build/sbt -v -d "test:testOnly *CatalogSuite"`

Closes #35409 from khalidmammadov/fix_list_partitions_bug2.

Authored-by: khalidmammadov <xmamedov@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalog/ExternalCatalogSuite.scala        | 23 +++++++++++++++++++
 .../spark/sql/hive/HiveExternalCatalog.scala  |  2 +-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
index d310538e302de..f791f778ecdc6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
@@ -481,6 +481,29 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     assert(catalog.listPartitions("db2", "tbl1", Some(part2.spec)).map(_.spec) == Seq(part2.spec))
   }
 
+  test("SPARK-38120: list partitions with special chars and mixed case column name") {
+    val catalog = newBasicCatalog()
+    val table = CatalogTable(
+      identifier = TableIdentifier("tbl", Some("db1")),
+      tableType = CatalogTableType.EXTERNAL,
+      storage = storageFormat.copy(locationUri = Some(Utils.createTempDir().toURI)),
+      schema = new StructType()
+        .add("col1", "int")
+        .add("col2", "string")
+        .add("partCol1", "int")
+        .add("partCol2", "string"),
+      provider = Some(defaultProvider),
+      partitionColumnNames = Seq("partCol1", "partCol2"))
+    catalog.createTable(table, ignoreIfExists = false)
+
+    val part1 = CatalogTablePartition(Map("partCol1" -> "1", "partCol2" -> "i+j"), storageFormat)
+    val part2 = CatalogTablePartition(Map("partCol1" -> "1", "partCol2" -> "i.j"), storageFormat)
+    catalog.createPartitions("db1", "tbl", Seq(part1, part2), ignoreIfExists = false)
+
+    assert(catalog.listPartitions("db1", "tbl", Some(part1.spec)).map(_.spec) == Seq(part1.spec))
+    assert(catalog.listPartitions("db1", "tbl", Some(part2.spec)).map(_.spec) == Seq(part2.spec))
+  }
+
   test("list partitions by filter") {
     val tz = TimeZone.getDefault.getID
     val catalog = newBasicCatalog()
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 5fccce2678f86..1770fbb5bc6d9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -1273,7 +1273,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       // treats dot as matching any single character and may return more partitions than we
       // expected. Here we do an extra filter to drop unexpected partitions.
       case Some(spec) if spec.exists(_._2.contains(".")) =>
-        res.filter(p => isPartialPartitionSpec(spec, p.spec))
+        res.filter(p => isPartialPartitionSpec(spec, toMetaStorePartitionSpec(p.spec)))
       case _ => res
     }
   }

From da03b554aa9b7c5fae9d3dc65eed89a8cf003c4c Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Wed, 9 Feb 2022 17:05:50 +0800
Subject: [PATCH 191/513] [SPARK-38150][SQL] Update comment of
 RelationConversions

### What changes were proposed in this pull request?
https://github.com/apache/spark/pull/17113 change the behaviors but remain comment not correct, this pr update this

### Why are the changes needed?
Make comment correct

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Not need

Closes #35453 from AngersZhuuuu/SPARK-38150.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../main/scala/org/apache/spark/sql/hive/HiveStrategies.scala   | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 37970fbe532d4..6a3de557e6e09 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -184,6 +184,8 @@ object HiveAnalysis extends Rule[LogicalPlan] {
  * Relation conversion from metastore relations to data source relations for better performance
  *
  * - When writing to non-partitioned Hive-serde Parquet/Orc tables
+ * - When writing to partitioned Hive-serde Parquet/Orc tables when
+ *   `spark.sql.hive.convertInsertingPartitionedTable` is true
  * - When scanning Hive-serde Parquet/ORC tables
  *
  * This rule must be run before all other DDL post-hoc resolution rules, i.e.

From 3c9c858d740b88792ac235d3813e44300d9f9591 Mon Sep 17 00:00:00 2001
From: Yuto Akutsu <yuto.akutsu@oss.nttdata.com>
Date: Wed, 9 Feb 2022 17:38:52 +0800
Subject: [PATCH 192/513] [SPARK-37952][DOCS] Add missing statements to ALTER
 TABLE document

### What changes were proposed in this pull request?

Add some missing statements to the ALTER TABLE document (which are mainly supported with v2 table).

### Why are the changes needed?

To let users know those statements and how to use them.

### Does this PR introduce _any_ user-facing change?

Yes, docs changed.

### How was this patch tested?

`SKIP_API=1 bundle exec jekyll build`
![Screenshot from 2022-01-18 17-10-11](https://user-images.githubusercontent.com/87687356/149903242-357b2377-edd9-40c0-9ac4-feec8b25cbb3.png)
![Screenshot from 2022-01-18 17-10-39](https://user-images.githubusercontent.com/87687356/149903304-89ff0dc4-4abc-4f4e-b659-65d8eac32cc7.png)
![Screenshot from 2022-01-18 17-11-09](https://user-images.githubusercontent.com/87687356/149903324-f5998189-0647-4937-89f9-1c8bc69048d4.png)

Closes #35239 from yutoacts/SPARK-37952.

Authored-by: Yuto Akutsu <yuto.akutsu@oss.nttdata.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-ref-syntax-ddl-alter-table.md | 250 +++++++++++++++++++++----
 1 file changed, 215 insertions(+), 35 deletions(-)

diff --git a/docs/sql-ref-syntax-ddl-alter-table.md b/docs/sql-ref-syntax-ddl-alter-table.md
index 2d42eb478001c..566e73da21513 100644
--- a/docs/sql-ref-syntax-ddl-alter-table.md
+++ b/docs/sql-ref-syntax-ddl-alter-table.md
@@ -75,6 +75,52 @@ ALTER TABLE table_identifier ADD COLUMNS ( col_spec [ , ... ] )
 
     Specifies the columns to be added.
 
+### DROP COLUMNS
+
+`ALTER TABLE DROP COLUMNS` statement drops mentioned columns from an existing table.
+Note that this statement is only supported with v2 tables.
+
+#### Syntax
+
+```sql
+ALTER TABLE table_identifier DROP { COLUMN | COLUMNS } [ ( ] col_name [ , ... ] [ ) ]
+```
+
+#### Parameters
+
+* **table_identifier**
+
+  Specifies a table name, which may be optionally qualified with a database name.
+
+  **Syntax:** `[ database_name. ] table_name`
+
+* **col_name**
+
+  Specifies the name of the column.
+
+### RENAME COLUMN
+
+`ALTER TABLE RENAME COLUMN` statement changes the column name of an existing table.
+Note that this statement is only supported with v2 tables.
+
+#### Syntax
+
+```sql
+ALTER TABLE table_identifier RENAME COLUMN col_name TO col_name
+```
+
+#### Parameters
+
+* **table_identifier**
+
+  Specifies a table name, which may be optionally qualified with a database name.
+
+  **Syntax:** `[ database_name. ] table_name`
+
+* **col_name**
+
+  Specifies the name of the column.
+
 ### ALTER OR CHANGE COLUMN
 
 `ALTER TABLE ALTER COLUMN` or `ALTER TABLE CHANGE COLUMN` statement changes column's definition.
@@ -82,7 +128,7 @@ ALTER TABLE table_identifier ADD COLUMNS ( col_spec [ , ... ] )
 #### Syntax
 
 ```sql
-ALTER TABLE table_identifier { ALTER | CHANGE } [ COLUMN ] col_spec alterColumnAction
+ALTER TABLE table_identifier { ALTER | CHANGE } [ COLUMN ] col_name alterColumnAction
 ```
 
 #### Parameters
@@ -93,14 +139,46 @@ ALTER TABLE table_identifier { ALTER | CHANGE } [ COLUMN ] col_spec alterColumnA
 
     **Syntax:** `[ database_name. ] table_name`
 
-* **COLUMNS ( col_spec )**
+* **col_name**
 
-    Specifies the column to be altered or be changed.
+    Specifies the name of the column.
 
 * **alterColumnAction**
 
     Change column's definition.
 
+### REPLACE COLUMNS
+
+`ALTER TABLE REPLACE COLUMNS` statement removes all existing columns and adds the new set of columns.
+Note that this statement is only supported with v2 tables.
+
+#### Syntax
+
+```sql
+ALTER TABLE table_identifier [ partition_spec ] REPLACE COLUMNS  
+  [ ( ] qualified_col_type_with_position_list [ ) ]
+```
+
+#### Parameters
+
+* **table_identifier**
+
+  Specifies a table name, which may be optionally qualified with a database name.
+
+  **Syntax:** `[ database_name. ] table_name`
+
+* **partition_spec**
+
+  Partition to be replaced. Note that one can use a typed literal (e.g., date'2019-01-02') in the partition spec.
+
+  **Syntax:** `PARTITION ( partition_col_name  = partition_col_val [ , ... ] )`
+
+* **qualified_col_type_with_position_list**
+
+  The list of the column(s) to be added
+
+  **Syntax:** `col_name col_type [ col_comment ] [ col_position ] [ , ... ]`
+
 ### ADD AND DROP PARTITION
 
 #### ADD PARTITION
@@ -225,6 +303,25 @@ ALTER TABLE table_identifier [ partition_spec ] SET LOCATION 'new_location'
 
     Specifies the SERDE properties to be set.
 
+### RECOVER PARTITIONS
+
+`ALTER TABLE RECOVER PARTITIONS` statement recovers all the partitions in the directory of a table and updates the Hive metastore.
+Another way to recover partitions is to use `MSCK REPAIR TABLE`.
+
+#### Syntax
+
+```sql
+ALTER TABLE table_identifier RECOVER PARTITIONS
+```
+
+#### Parameters
+
+* **table_identifier**
+
+  Specifies a table name, which may be optionally qualified with a database name.
+
+  **Syntax:** `[ database_name. ] table_name`
+
 ### Examples
 
 ```sql
@@ -309,6 +406,118 @@ DESC StudentInfo;
 |                    age|      int|   NULL|
 +-----------------------+---------+-------+
 
+-- Drop columns of a table
+DESC StudentInfo;
++-----------------------+---------+-------+
+|               col_name|data_type|comment|
++-----------------------+---------+-------+
+|                   name|   string|   NULL|
+|                 rollno|      int|   NULL|
+|               LastName|   string|   NULL|
+|                    DOB|timestamp|   NULL|
+|                    age|      int|   NULL|
+|# Partition Information|         |       |
+|             # col_name|data_type|comment|
+|                    age|      int|   NULL|
++-----------------------+---------+-------+
+
+ALTER TABLE StudentInfo DROP columns (LastName, DOB);
+
+-- After dropping columns of the table
+DESC StudentInfo;
++-----------------------+---------+-------+
+|               col_name|data_type|comment|
++-----------------------+---------+-------+
+|                   name|   string|   NULL|
+|                 rollno|      int|   NULL|
+|                    age|      int|   NULL|
+|# Partition Information|         |       |
+|             # col_name|data_type|comment|
+|                    age|      int|   NULL|
++-----------------------+---------+-------+
+
+-- Rename a column of a table
+DESC StudentInfo;
++-----------------------+---------+-------+
+|               col_name|data_type|comment|
++-----------------------+---------+-------+
+|                   name|   string|   NULL|
+|                 rollno|      int|   NULL|
+|                    age|      int|   NULL|
+|# Partition Information|         |       |
+|             # col_name|data_type|comment|
+|                    age|      int|   NULL|
++-----------------------+---------+-------+
+
+ALTER TABLE StudentInfo RENAME COLUMN name TO FirstName;
+
+-- After renaming a column of the table
+DESC StudentInfo;
++-----------------------+---------+-------+
+|               col_name|data_type|comment|
++-----------------------+---------+-------+
+|              FirstName|   string|   NULL|
+|                 rollno|      int|   NULL|
+|                    age|      int|   NULL|
+|# Partition Information|         |       |
+|             # col_name|data_type|comment|
+|                    age|      int|   NULL|
++-----------------------+---------+-------+
+
+-- ALTER OR CHANGE COLUMNS
+DESC StudentInfo;
++-----------------------+---------+-------+
+|               col_name|data_type|comment|
++-----------------------+---------+-------+
+|              FirstName|   string|   NULL|
+|                 rollno|      int|   NULL|
+|                    age|      int|   NULL|
+|# Partition Information|         |       |
+|             # col_name|data_type|comment|
+|                    age|      int|   NULL|
++-----------------------+---------+-------+
+
+ALTER TABLE StudentInfo ALTER COLUMN FirstName COMMENT "new comment";
+
+-- After ALTER or CHANGE COLUMNS
+DESC StudentInfo;
++-----------------------+---------+-----------+
+|               col_name|data_type|    comment|
++-----------------------+---------+-----------+
+|              FirstName|   string|new comment|
+|                 rollno|      int|       NULL|
+|                    age|      int|       NULL|
+|# Partition Information|         |           |
+|             # col_name|data_type|    comment|
+|                    age|      int|       NULL|
++-----------------------+---------+-----------+
+
+-- REPLACE COLUMNS
+DESC StudentInfo;
++-----------------------+---------+-----------+
+|               col_name|data_type|    comment|
++-----------------------+---------+-----------+
+|              FirstName|   string|new comment|
+|                 rollno|      int|       NULL|
+|                    age|      int|       NULL|
+|# Partition Information|         |           |
+|             # col_name|data_type|    comment|
+|                    age|      int|       NULL|
++-----------------------+---------+-----------+
+
+ALTER TABLE StudentInfo REPLACE COLUMNS (name string, ID int COMMENT 'new comment');
+
+-- After replacing COLUMNS
+DESC StudentInfo;
++-----=---------+---------+-----------+
+|       col_name|data_type|    comment|
++---------------+---------+-----------+
+|           name|   string|       NULL|
+|             ID|      int|new comment|
+| # Partitioning|         |           |
+|Not partitioned|         |           |
++---------------+---------+-----------+
+
 -- Add a new partition to a table 
 SHOW PARTITIONS StudentInfo;
 +---------+
@@ -379,38 +588,6 @@ SHOW PARTITIONS StudentInfo;
 |   age=20|
 +---------+
 
--- ALTER OR CHANGE COLUMNS
-DESC StudentInfo;
-+-----------------------+---------+-------+
-|               col_name|data_type|comment|
-+-----------------------+---------+-------+
-|                   name|   string|   NULL|
-|                 rollno|      int|   NULL|
-|               LastName|   string|   NULL|
-|                    DOB|timestamp|   NULL|
-|                    age|      int|   NULL|
-|# Partition Information|         |       |
-|             # col_name|data_type|comment|
-|                    age|      int|   NULL|
-+-----------------------+---------+-------+
-
-ALTER TABLE StudentInfo ALTER COLUMN name COMMENT "new comment";
-
---After ALTER or CHANGE COLUMNS
-DESC StudentInfo;
-+-----------------------+---------+-----------+
-|               col_name|data_type|    comment|
-+-----------------------+---------+-----------+
-|                   name|   string|new comment|
-|                 rollno|      int|       NULL|
-|               LastName|   string|       NULL|
-|                    DOB|timestamp|       NULL|
-|                    age|      int|       NULL|
-|# Partition Information|         |           |
-|             # col_name|data_type|    comment|
-|                    age|      int|       NULL|
-+-----------------------+---------+-----------+
-
 -- Change the fileformat
 ALTER TABLE loc_orc SET fileformat orc;
 
@@ -435,6 +612,9 @@ ALTER TABLE dbx.tab1 SET TBLPROPERTIES ('comment' = 'This is a new comment.');
 
 -- DROP TABLE PROPERTIES
 ALTER TABLE dbx.tab1 UNSET TBLPROPERTIES ('winner');
+
+-- RECOVER PARTITIONS
+ALTER TABLE dbx.tab1 RECOVER PARTITIONS;
 ```
 
 ### Related Statements

From 15885f2f8aea7905a3ecdf08906fa72355186030 Mon Sep 17 00:00:00 2001
From: mcdull-zhang <work4dong@163.com>
Date: Wed, 9 Feb 2022 21:54:33 +0800
Subject: [PATCH 193/513] [SPARK-37652][SQL] Add test for optimize skewed join
 through union
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

https://github.com/apache/spark/pull/34974, solved most scenarios of data skew in union.
add test for it.

### Why are the changes needed?

Added tests for the following scenarios：

<b>scenes 1</b>
```
Union
    SMJ
        ShuffleQueryStage
        ShuffleQueryStage
    SMJ
        ShuffleQueryStage
        ShuffleQueryStage
```

<b>scenes 2</b>
```
Union
    SMJ
        ShuffleQueryStage
        ShuffleQueryStage
    HashAggregate
```

<b>scenes 3: not yet supported, SMJ-3 will introduce a new shuffle, so SMJ-1 cannot be optimized</b>
```
Union
    SMJ-1
        ShuffleQueryStage
        ShuffleQueryStage
    SMJ-2
       SMJ-3
         ShuffleQueryStage
         ShuffleQueryStage
       HashAggregate
```

### Does this PR introduce any user-facing change?

No

### How was this patch tested?

Pass the added test

Closes #34908 from mcdull-zhang/skewed_union.

Authored-by: mcdull-zhang <work4dong@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../adaptive/AdaptiveQueryExecSuite.scala     | 43 +++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index 1bd8ad90f83da..d1c7064ad7763 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -2441,6 +2441,49 @@ class AdaptiveQueryExecSuite
       }
     }
   }
+
+  test("SPARK-37652: optimize skewed join through union") {
+    withSQLConf(
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.SKEW_JOIN_SKEWED_PARTITION_THRESHOLD.key -> "100",
+      SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "100") {
+      withTempView("skewData1", "skewData2") {
+        spark
+          .range(0, 1000, 1, 10)
+          .selectExpr("id % 3 as key1", "id as value1")
+          .createOrReplaceTempView("skewData1")
+        spark
+          .range(0, 1000, 1, 10)
+          .selectExpr("id % 1 as key2", "id as value2")
+          .createOrReplaceTempView("skewData2")
+
+        def checkSkewJoin(query: String, joinNums: Int, optimizeSkewJoinNums: Int): Unit = {
+          val (_, innerAdaptivePlan) = runAdaptiveAndVerifyResult(query)
+          val joins = findTopLevelSortMergeJoin(innerAdaptivePlan)
+          val optimizeSkewJoins = joins.filter(_.isSkewJoin)
+          assert(joins.size == joinNums && optimizeSkewJoins.size == optimizeSkewJoinNums)
+        }
+
+        // skewJoin union skewJoin
+        checkSkewJoin(
+          "SELECT key1 FROM skewData1 JOIN skewData2 ON key1 = key2 " +
+            "UNION ALL SELECT key2 FROM skewData1 JOIN skewData2 ON key1 = key2", 2, 2)
+
+        // skewJoin union aggregate
+        checkSkewJoin(
+          "SELECT key1 FROM skewData1 JOIN skewData2 ON key1 = key2 " +
+            "UNION ALL SELECT key2 FROM skewData2 GROUP BY key2", 1, 1)
+
+        // skewJoin1 union (skewJoin2 join aggregate)
+        // skewJoin2 will lead to extra shuffles, but skew1 cannot be optimized
+         checkSkewJoin(
+          "SELECT key1 FROM skewData1 JOIN skewData2 ON key1 = key2 UNION ALL " +
+            "SELECT key1 from (SELECT key1 FROM skewData1 JOIN skewData2 ON key1 = key2) tmp1 " +
+            "JOIN (SELECT key2 FROM skewData2 GROUP BY key2) tmp2 ON key1 = key2", 3, 0)
+      }
+    }
+  }
 }
 
 /**

From 23f580beae7c82c14b0c3a2e821a0382dde99223 Mon Sep 17 00:00:00 2001
From: Bo Zhang <bo.zhang@databricks.com>
Date: Wed, 9 Feb 2022 22:11:27 +0800
Subject: [PATCH 194/513] [SPARK-37585][SQL] Update InputMetric in
 DataSourceRDD with TaskCompletionListener

### What changes were proposed in this pull request?
Before this change, Spark only updates `InputMetrics.bytesRead` in `DataSourceRDD` once every 1000 records, or at the end of the iterator (when `MetricsIterator.hasNext` returns false). So when the output is limited in a query but there is still data in the datasource, `InputMetrics.bytesRead` will not be updated at the end of the iterator, leading to incorrect metric results. This is more pronounced when the total number of records is less than 1000 (which will lead to `InputMetrics.bytesRead == 0`).

This change fixes this bug by adding a force metric update in `TaskCompletionListener`.

### Why are the changes needed?
This is to fix the bug that `InputMetrics.bytesRead` is not updated when there is still data in the datasource but the output is limited.

### Does this PR introduce _any_ user-facing change?
Users will see more accurate `InputMetrics.bytesRead` with this change.

### How was this patch tested?
Added a unit test.

Closes #35432 from bozhang2820/spark-37585.

Authored-by: Bo Zhang <bo.zhang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/v2/DataSourceRDD.scala        |  5 +-
 .../spark/sql/FileBasedDataSourceSuite.scala  | 58 +++++++++++++++++++
 .../DataSourceScanExecRedactionSuite.scala    | 31 ----------
 3 files changed, 62 insertions(+), 32 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala
index 217a1d5750d42..a1eb857c4ed41 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala
@@ -70,6 +70,7 @@ class DataSourceRDD(
       // In case of early stopping before consuming the entire iterator,
       // we need to do one more metric update at the end of the task.
       CustomMetrics.updateMetrics(reader.currentMetricsValues, customMetrics)
+      iter.forceUpdateMetrics()
       reader.close()
     }
     // TODO: SPARK-25083 remove the type erasure hack in data source scan
@@ -130,10 +131,12 @@ private abstract class MetricsIterator[I](iter: Iterator[I]) extends Iterator[I]
     if (iter.hasNext) {
       true
     } else {
-      metricsHandler.updateMetrics(0, force = true)
+      forceUpdateMetrics()
       false
     }
   }
+
+  def forceUpdateMetrics(): Unit = metricsHandler.updateMetrics(0, force = true)
 }
 
 private class MetricsRowIterator(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 39b08bd560bb1..8024f24e2eb13 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -534,6 +534,64 @@ class FileBasedDataSourceSuite extends QueryTest
     }
   }
 
+  test("SPARK-30362: test input metrics for DSV2") {
+    withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "") {
+      Seq("json", "orc", "parquet").foreach { format =>
+        withTempPath { path =>
+          val dir = path.getCanonicalPath
+          spark.range(0, 10).write.format(format).save(dir)
+          val df = spark.read.format(format).load(dir)
+          val bytesReads = new mutable.ArrayBuffer[Long]()
+          val recordsRead = new mutable.ArrayBuffer[Long]()
+          val bytesReadListener = new SparkListener() {
+            override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
+              bytesReads += taskEnd.taskMetrics.inputMetrics.bytesRead
+              recordsRead += taskEnd.taskMetrics.inputMetrics.recordsRead
+            }
+          }
+          sparkContext.addSparkListener(bytesReadListener)
+          try {
+            df.collect()
+            sparkContext.listenerBus.waitUntilEmpty()
+            assert(bytesReads.sum > 0)
+            assert(recordsRead.sum == 10)
+          } finally {
+            sparkContext.removeSparkListener(bytesReadListener)
+          }
+        }
+      }
+    }
+  }
+
+  test("SPARK-37585: test input metrics for DSV2 with output limits") {
+    withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "") {
+      Seq("json", "orc", "parquet").foreach { format =>
+        withTempPath { path =>
+          val dir = path.getCanonicalPath
+          spark.range(0, 100).write.format(format).save(dir)
+          val df = spark.read.format(format).load(dir)
+          val bytesReads = new mutable.ArrayBuffer[Long]()
+          val recordsRead = new mutable.ArrayBuffer[Long]()
+          val bytesReadListener = new SparkListener() {
+            override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
+              bytesReads += taskEnd.taskMetrics.inputMetrics.bytesRead
+              recordsRead += taskEnd.taskMetrics.inputMetrics.recordsRead
+            }
+          }
+          sparkContext.addSparkListener(bytesReadListener)
+          try {
+            df.limit(10).collect()
+            sparkContext.listenerBus.waitUntilEmpty()
+            assert(bytesReads.sum > 0)
+            assert(recordsRead.sum > 0)
+          } finally {
+            sparkContext.removeSparkListener(bytesReadListener)
+          }
+        }
+      }
+    }
+  }
+
   test("Do not use cache on overwrite") {
     Seq("", "orc").foreach { useV1SourceReaderList =>
       withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> useV1SourceReaderList) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
index 612cd6f0d891b..e29b7f579fa91 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
@@ -18,13 +18,11 @@ package org.apache.spark.sql.execution
 
 import java.io.File
 
-import scala.collection.mutable
 import scala.util.Random
 
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkConf
-import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
 import org.apache.spark.sql.{DataFrame, QueryTest}
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
@@ -215,33 +213,4 @@ class DataSourceV2ScanExecRedactionSuite extends DataSourceScanRedactionTest {
       }
     }
   }
-
-  test("SPARK-30362: test input metrics for DSV2") {
-    withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "") {
-      Seq("json", "orc", "parquet").foreach { format =>
-        withTempPath { path =>
-          val dir = path.getCanonicalPath
-          spark.range(0, 10).write.format(format).save(dir)
-          val df = spark.read.format(format).load(dir)
-          val bytesReads = new mutable.ArrayBuffer[Long]()
-          val recordsRead = new mutable.ArrayBuffer[Long]()
-          val bytesReadListener = new SparkListener() {
-            override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
-              bytesReads += taskEnd.taskMetrics.inputMetrics.bytesRead
-              recordsRead += taskEnd.taskMetrics.inputMetrics.recordsRead
-            }
-          }
-          sparkContext.addSparkListener(bytesReadListener)
-          try {
-            df.collect()
-            sparkContext.listenerBus.waitUntilEmpty()
-            assert(bytesReads.sum > 0)
-            assert(recordsRead.sum == 10)
-          } finally {
-            sparkContext.removeSparkListener(bytesReadListener)
-          }
-        }
-      }
-    }
-  }
 }

From 305388dce4d5334e9e2f3dc0aa8baabf06a52e2c Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Wed, 9 Feb 2022 22:29:32 +0800
Subject: [PATCH 195/513] [SPARK-37969][SQL] HiveFileFormat should check field
 name

### What changes were proposed in this pull request?
When write ORC, Spark side check passed, but when initial OutputWriter, it failed.
```
[info]   Cause: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0) (10.12.188.15 executor driver): java.lang.IllegalArgumentException: Error: : expected at the position 19 of 'struct<ID:bigint,IF(ID=1,ID,0):bigint,B:bigint>' but '(' is found.
[info] 	at org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils$TypeInfoParser.expect(TypeInfoUtils.java:384)
[info] 	at org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils$TypeInfoParser.expect(TypeInfoUtils.java:355)
[info] 	at org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils$TypeInfoParser.parseType(TypeInfoUtils.java:507)
[info] 	at org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils$TypeInfoParser.parseTypeInfos(TypeInfoUtils.java:329)
[info] 	at org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString(TypeInfoUtils.java:814)
[info] 	at org.apache.hadoop.hive.ql.io.orc.OrcSerde.initialize(OrcSerde.java:112)
[info] 	at org.apache.spark.sql.hive.execution.HiveOutputWriter.<init>(HiveFileFormat.scala:122)
[info] 	at org.apache.spark.sql.hive.execution.HiveFileFormat$$anon$1.newInstance(HiveFileFormat.scala:105)
[info] 	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
[info] 	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
[info] 	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:313)
[info] 	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$20(FileFormatWriter.scala:252)
[info] 	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
[info] 	at org.apache.spark.scheduler.Task.run(Task.scala:136)
[info] 	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:507)
[info] 	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1475)
[info] 	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:510)
[info] 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[info] 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[info] 	at java.lang.Thread.run(Thread.java:748)
[info]
```

Error for parquet
```

[info]   Cause: java.lang.IllegalArgumentException: field ended by ';': expected ';' but got 'IF' at line 2:   optional int32 (IF
[info]   at org.apache.parquet.schema.MessageTypeParser.check(MessageTypeParser.java:239)
[info]   at org.apache.parquet.schema.MessageTypeParser.addPrimitiveType(MessageTypeParser.java:208)
[info]   at org.apache.parquet.schema.MessageTypeParser.addType(MessageTypeParser.java:113)
[info]   at org.apache.parquet.schema.MessageTypeParser.addGroupTypeFields(MessageTypeParser.java:101)
[info]   at org.apache.parquet.schema.MessageTypeParser.parse(MessageTypeParser.java:94)
[info]   at org.apache.parquet.schema.MessageTypeParser.parseMessageType(MessageTypeParser.java:84)
[info]   at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport.getSchema(DataWritableWriteSupport.java:43)
[info]   at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport.init(DataWritableWriteSupport.java:48)
[info]   at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:476)
[info]   at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:430)
[info]   at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:425)
[info]   at org.apache.hadoop.hive.ql.io.parquet.write.ParquetRecordWriterWrapper.<init>(ParquetRecordWriterWrapper.java:70)
[info]   at org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat.getParquerRecordWriterWrapper(MapredParquetOutputFormat.java:137)
[info]   at org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat.getHiveRecordWriter(MapredParquetOutputFormat.java:126)
[info]   at org.apache.hadoop.hive.ql.io.HiveFileFormatUtils.getRecordWriter(HiveFileFormatUtils.java:286)
[info]   at org.apache.hadoop.hive.ql.io.HiveFileFormatUtils.getHiveRecordWriter(HiveFileFormatUtils.java:271)
[info]   at org.apache.spark.sql.hive.execution.HiveOutputWriter.<init>(HiveFileFormat.scala:132)
[info]   at org.apache.spark.sql.hive.execution.HiveFileFormat$$anon$1.newInstance(HiveFileFormat.scala:105)
[info]   at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:161)
[info]   at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:146)
[info]   at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:313)
[info]   at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$20(FileFormatWriter.scala:252)
[info]   at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
[info]   at org.apache.spark.scheduler.Task.run(Task.scala:136)
[info]   at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:507)
[info]   at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1475)
[info]   at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:510)
[info]   at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[info]   at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[info]   at java.lang.Thread.run(Thread.java:748)
```

We should make this type error failed earlier.

### Why are the changes needed?
Failed earlier, avoid wast computation.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
WIP

Closes #35258 from AngersZhuuuu/SPARK-37969.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/hive/execution/HiveFileFormat.scala   | 16 ++++++++++++++
 .../sql/hive/execution/HiveDDLSuite.scala     | 22 ++++++++++---------
 2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala
index b6b3cac4130a0..7dc1fbb433cd5 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.io.{HiveFileFormatUtils, HiveOutputFormat}
 import org.apache.hadoop.hive.serde2.Serializer
 import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspectorUtils, StructObjectInspector}
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils
 import org.apache.hadoop.io.Writable
 import org.apache.hadoop.mapred.{JobConf, Reporter}
 import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
@@ -106,6 +107,21 @@ class HiveFileFormat(fileSinkConf: FileSinkDesc)
       }
     }
   }
+
+  override def supportFieldName(name: String): Boolean = {
+    fileSinkConf.getTableInfo.getOutputFileFormatClassName match {
+      case "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat" =>
+        !name.matches(".*[ ,;{}()\n\t=].*")
+      case "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat" =>
+        try {
+          TypeInfoUtils.getTypeInfoFromTypeString(s"struct<$name:int>")
+          true
+        } catch {
+          case _: IllegalArgumentException => false
+        }
+      case _ => true
+    }
+  }
 }
 
 class HiveOutputWriter(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index e5bd1aa1194ce..c3c47c58b90be 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -2926,16 +2926,18 @@ class HiveDDLSuite
     }
   }
 
-  test("SPARK-33844: Insert overwrite directory should check schema too") {
+  test("SPARK-33844, 37969: Insert overwrite directory should check schema too") {
     withView("v") {
       spark.range(1).createTempView("v")
       withTempPath { path =>
-        val e = intercept[SparkException] {
-          spark.sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}' " +
-            s"STORED AS PARQUET SELECT ID, if(1=1, 1, 0), abs(id), '^-' FROM v")
-        }.getCause.getCause.getMessage
-        assert(e.contains(
-          "field ended by ';': expected ';' but got 'IF' at line 2:   optional int32 (IF"))
+        Seq("PARQUET", "ORC").foreach { format =>
+          val e = intercept[SparkException] {
+            spark.sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}' " +
+              s"STORED AS $format SELECT ID, if(1=1, 1, 0), abs(id), '^-' FROM v")
+          }.getCause.getMessage
+          assert(e.contains("Column name \"(IF((1 = 1), 1, 0))\" contains" +
+            " invalid character(s). Please use alias to rename it."))
+        }
       }
     }
   }
@@ -2953,9 +2955,9 @@ class HiveDDLSuite
                |NAMED_STRUCT('ID', ID, 'IF(ID=1,ID,0)', IF(ID=1,ID,0), 'B', ABS(ID)) AS col1
                |FROM v
                """.stripMargin)
-        }.getCause.getCause.getMessage
-        assert(e.contains("expected at the position 19 of " +
-          "'struct<ID:bigint,IF(ID=1,ID,0):bigint,B:bigint>' but '(' is found."))
+        }.getCause.getMessage
+        assert(e.contains("Column name \"IF(ID=1,ID,0)\" contains invalid character(s). " +
+          "Please use alias to rename it."))
       }
     }
   }

From 4dde01fc5b0ed44fd6c5ad8da093650931e4dcd4 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 9 Feb 2022 13:20:46 -0800
Subject: [PATCH 196/513] [SPARK-38151][SQL][TESTS] Handle `Pacific/Kanton` in
 DateTimeUtilsSuite

### What changes were proposed in this pull request?

This PR aims to fix the flaky UT failures due to https://bugs.openjdk.java.net/browse/JDK-8274407 (`Update Timezone Data to 2021c`) and its backport commits that renamed 'Pacific/Enderbury' to 'Pacific/Kanton' in the latest Java `17.0.2`, `11.0.14`, and `8u311`.
```
Rename Pacific/Enderbury to Pacific/Kanton.
```

### Why are the changes needed?

The flaky failures were observed twice in `GitHub Action` environment like the following.

**MASTER**
- https://github.com/dongjoon-hyun/spark/runs/5119322349?check_suite_focus=true
```
[info] - daysToMicros and microsToDays *** FAILED *** (620 milliseconds)
[info]   9131 did not equal 9130 Round trip of 9130 did not work in tz Pacific/Kanton (DateTimeUtilsSuite.scala:783)
```

**BRANCH-3.2**
- https://github.com/apache/spark/runs/5122380604?check_suite_focus=true
```
[info] - daysToMicros and microsToDays *** FAILED *** (643 milliseconds)
[info]   9131 did not equal 9130 Round trip of 9130 did not work in tz Pacific/Kanton (DateTimeUtilsSuite.scala:771)
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs

Closes #35468 from dongjoon-hyun/SPARK-38151.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala    | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index af0c26e39a7c8..09a011d5ccee1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -766,12 +766,15 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
     assert(daysToMicros(16800, UTC) === expected)
 
     // There are some days are skipped entirely in some timezone, skip them here.
+    // JDK-8274407 and its backport commits renamed 'Pacific/Enderbury' to 'Pacific/Kanton'
+    // in Java 8u311, 11.0.14, and 17.0.2
     val skipped_days = Map[String, Set[Int]](
       "Kwajalein" -> Set(8632, 8633, 8634),
       "Pacific/Apia" -> Set(15338),
       "Pacific/Enderbury" -> Set(9130, 9131),
       "Pacific/Fakaofo" -> Set(15338),
       "Pacific/Kiritimati" -> Set(9130, 9131),
+      "Pacific/Kanton" -> Set(9130, 9131),
       "Pacific/Kwajalein" -> Set(8632, 8633, 8634),
       MIT.getId -> Set(15338))
     for (zid <- ALL_TIMEZONES) {

From 22f611b4f279b5ba4d204043788d2c410841b791 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Thu, 10 Feb 2022 09:36:07 +0900
Subject: [PATCH 197/513] [SPARK-38164][SQL] New SQL functions: try_subtract
 and try_multiply

### What changes were proposed in this pull request?

Add new SQL function: try_subtract and try_multiply. The two new functions are identical to the add operator `-` and `*`, except that they return `NULL` result instead of throwing an exception on integral value overflow.

### Why are the changes needed?

Similar to try_add and try_divide:
1. Users can manage to finish queries without interruptions in ANSI mode.
2. Users can get NULLs instead of unreasonable results if overflow occurs when ANSI mode is off.
For example, the behavior of the following SQL operations is unreasonable:
```
2147483647 * 2 => -2
```

With the new safe version SQL functions:
```
try_multiply(2147483647, 2) => null
```

### Does this PR introduce _any_ user-facing change?

Yes, new SQL function: try_subtract and try_multiply

### How was this patch tested?

Unit tests

Closes #35461 from gengliangwang/moreTryFunc.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/sql-ref-ansi-compliance.md               |   2 +
 .../catalyst/analysis/FunctionRegistry.scala  |   2 +
 .../sql/catalyst/expressions/TryEval.scala    |  64 +++++++
 .../catalyst/expressions/TryEvalSuite.scala   |  26 +++
 .../sql-functions/sql-expression-schema.md    |   4 +-
 .../sql-tests/inputs/try_arithmetic.sql       |  28 +++
 .../results/ansi/try_arithmetic.sql.out       | 162 +++++++++++++++++-
 .../sql-tests/results/try_arithmetic.sql.out  | 162 +++++++++++++++++-
 8 files changed, 447 insertions(+), 3 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 1b4a778edf85d..8e03cafeb3043 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -306,6 +306,8 @@ The behavior of some SQL operators can be different under ANSI mode (`spark.sql.
 When ANSI mode is on, it throws exceptions for invalid operations. You can use the following SQL functions to suppress such exceptions.
   - `try_cast`: identical to `CAST`, except that it returns `NULL` result instead of throwing an exception on runtime error.
   - `try_add`: identical to the add operator `+`, except that it returns `NULL` result instead of throwing an exception on integral value overflow.
+  - `try_subtract`: identical to the add operator `-`, except that it returns `NULL` result instead of throwing an exception on integral value overflow.
+  - `try_multiply`: identical to the add operator `*`, except that it returns `NULL` result instead of throwing an exception on integral value overflow.
   - `try_divide`: identical to the division operator `/`, except that it returns `NULL` result instead of throwing an exception on dividing 0.
   - `try_element_at`: identical to the function `element_at`, except that it returns `NULL` result instead of throwing an exception on array's index out of bound or map's key not found.
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index e98759bd5021f..04acaa871eb85 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -412,6 +412,8 @@ object FunctionRegistry {
     // "try_*" function which always return Null instead of runtime error.
     expression[TryAdd]("try_add"),
     expression[TryDivide]("try_divide"),
+    expression[TrySubtract]("try_subtract"),
+    expression[TryMultiply]("try_multiply"),
     expression[TryElementAt]("try_element_at"),
 
     // aggregate functions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala
index bc2604a3447ed..4663d4826286a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala
@@ -124,3 +124,67 @@ case class TryDivide(left: Expression, right: Expression, child: Expression)
   override protected def withNewChildInternal(newChild: Expression): Expression =
     this.copy(child = newChild)
 }
+
+@ExpressionDescription(
+  usage = "expr1 _FUNC_ expr2 - Returns `expr1`-`expr2` and the result is null on overflow. " +
+    "The acceptable input types are the same with the `-` operator.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(2, 1);
+       1
+      > SELECT _FUNC_(-2147483648, 1);
+       NULL
+      > SELECT _FUNC_(date'2021-01-02', 1);
+       2021-01-01
+      > SELECT _FUNC_(date'2021-01-01', interval 1 year);
+       2020-01-01
+      > SELECT _FUNC_(timestamp'2021-01-02 00:00:00', interval 1 day);
+       2021-01-01 00:00:00
+      > SELECT _FUNC_(interval 2 year, interval 1 year);
+       1-0
+  """,
+  since = "3.3.0",
+  group = "math_funcs")
+case class TrySubtract(left: Expression, right: Expression, child: Expression)
+  extends RuntimeReplaceable {
+  def this(left: Expression, right: Expression) =
+    this(left, right, TryEval(Subtract(left, right, failOnError = true)))
+
+  override def flatArguments: Iterator[Any] = Iterator(left, right)
+
+  override def exprsReplaced: Seq[Expression] = Seq(left, right)
+
+  override def prettyName: String = "try_subtract"
+
+  override protected def withNewChildInternal(newChild: Expression): Expression =
+    this.copy(child = newChild)
+}
+
+@ExpressionDescription(
+  usage = "expr1 _FUNC_ expr2 - Returns `expr1`*`expr2` and the result is null on overflow. " +
+    "The acceptable input types are the same with the `*` operator.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(2, 3);
+       6
+      > SELECT _FUNC_(-2147483648, 10);
+       NULL
+      > SELECT _FUNC_(interval 2 year, 3);
+       6-0
+  """,
+  since = "3.3.0",
+  group = "math_funcs")
+case class TryMultiply(left: Expression, right: Expression, child: Expression)
+  extends RuntimeReplaceable {
+  def this(left: Expression, right: Expression) =
+    this(left, right, TryEval(Multiply(left, right, failOnError = true)))
+
+  override def flatArguments: Iterator[Any] = Iterator(left, right)
+
+  override def exprsReplaced: Seq[Expression] = Seq(left, right)
+
+  override def prettyName: String = "try_multiply"
+
+  override protected def withNewChildInternal(newChild: Expression): Expression =
+    this.copy(child = newChild)
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryEvalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryEvalSuite.scala
index 928077523d7e3..1eccd46d960f7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryEvalSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryEvalSuite.scala
@@ -45,4 +45,30 @@ class TryEvalSuite extends SparkFunSuite with ExpressionEvalHelper {
       checkEvaluation(input, expected)
     }
   }
+
+  test("try_subtract") {
+    Seq(
+      (1, 1, 0),
+      (Int.MaxValue, -1, null),
+      (Int.MinValue, 1, null)
+    ).foreach { case (a, b, expected) =>
+      val left = Literal(a)
+      val right = Literal(b)
+      val input = TryEval(Subtract(left, right, failOnError = true))
+      checkEvaluation(input, expected)
+    }
+  }
+
+  test("try_multiply") {
+    Seq(
+      (2, 3, 6),
+      (Int.MaxValue, -10, null),
+      (Int.MinValue, 10, null)
+    ).foreach { case (a, b, expected) =>
+      val left = Literal(a)
+      val right = Literal(b)
+      val input = TryEval(Multiply(left, right, failOnError = true))
+      checkEvaluation(input, expected)
+    }
+  }
 }
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index b742a05fdfb75..126960e4fdc94 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,6 +1,6 @@
 <!-- Automatically generated by ExpressionsSchemaSuite -->
 ## Summary
-  - Number of queries: 376
+  - Number of queries: 378
   - Number of expressions that missing example: 12
   - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint
 ## Schema of Built-in Functions
@@ -311,6 +311,8 @@
 | org.apache.spark.sql.catalyst.expressions.TryAdd | try_add | SELECT try_add(1, 2) | struct<try_add(1, 2):int> |
 | org.apache.spark.sql.catalyst.expressions.TryDivide | try_divide | SELECT try_divide(3, 2) | struct<try_divide(3, 2):double> |
 | org.apache.spark.sql.catalyst.expressions.TryElementAt | try_element_at | SELECT try_element_at(array(1, 2, 3), 2) | struct<try_element_at(array(1, 2, 3), 2):int> |
+| org.apache.spark.sql.catalyst.expressions.TryMultiply | try_multiply | SELECT try_multiply(2, 3) | struct<try_multiply(2, 3):int> |
+| org.apache.spark.sql.catalyst.expressions.TrySubtract | try_subtract | SELECT try_subtract(2, 1) | struct<try_subtract(2, 1):int> |
 | org.apache.spark.sql.catalyst.expressions.TypeOf | typeof | SELECT typeof(1) | struct<typeof(1):string> |
 | org.apache.spark.sql.catalyst.expressions.UnBase64 | unbase64 | SELECT unbase64('U3BhcmsgU1FM') | struct<unbase64(U3BhcmsgU1FM):binary> |
 | org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct<negative(1):int> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql b/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql
index 5962a5d55bb89..586680f550761 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql
@@ -40,3 +40,31 @@ SELECT try_divide(interval 2 year, 0);
 SELECT try_divide(interval 2 second, 0);
 SELECT try_divide(interval 2147483647 month, 0.5);
 SELECT try_divide(interval 106751991 day, 0.5);
+
+-- Numeric - Numeric
+SELECT try_subtract(1, 1);
+SELECT try_subtract(2147483647, -1);
+SELECT try_subtract(-2147483648, 1);
+SELECT try_subtract(9223372036854775807L, -1);
+SELECT try_subtract(-9223372036854775808L, 1);
+
+-- Interval - Interval
+SELECT try_subtract(interval 2 year, interval 3 year);
+SELECT try_subtract(interval 3 second, interval 2 second);
+SELECT try_subtract(interval 2147483647 month, interval -2 month);
+SELECT try_subtract(interval 106751991 day, interval -3 day);
+
+-- Numeric * Numeric
+SELECT try_multiply(2, 3);
+SELECT try_multiply(2147483647, -2);
+SELECT try_multiply(-2147483648, 2);
+SELECT try_multiply(9223372036854775807L, 2);
+SELECT try_multiply(-9223372036854775808L, -2);
+
+-- Interval * Numeric
+SELECT try_multiply(interval 2 year, 2);
+SELECT try_multiply(interval 2 second, 2);
+SELECT try_multiply(interval 2 year, 0);
+SELECT try_multiply(interval 2 second, 0);
+SELECT try_multiply(interval 2147483647 month, 2);
+SELECT try_multiply(interval 106751991 day, 2);
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
index 47faeb3ce9ea4..f3c483cfafea8 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 29
+-- Number of queries: 49
 
 
 -- !query
@@ -233,3 +233,163 @@ SELECT try_divide(interval 106751991 day, 0.5)
 struct<try_divide(INTERVAL '106751991' DAY, 0.5):interval day to second>
 -- !query output
 NULL
+
+
+-- !query
+SELECT try_subtract(1, 1)
+-- !query schema
+struct<try_subtract(1, 1):int>
+-- !query output
+0
+
+
+-- !query
+SELECT try_subtract(2147483647, -1)
+-- !query schema
+struct<try_subtract(2147483647, -1):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_subtract(-2147483648, 1)
+-- !query schema
+struct<try_subtract(-2147483648, 1):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_subtract(9223372036854775807L, -1)
+-- !query schema
+struct<try_subtract(9223372036854775807, -1):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_subtract(-9223372036854775808L, 1)
+-- !query schema
+struct<try_subtract(-9223372036854775808, 1):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_subtract(interval 2 year, interval 3 year)
+-- !query schema
+struct<try_subtract(INTERVAL '2' YEAR, INTERVAL '3' YEAR):interval year>
+-- !query output
+-1-0
+
+
+-- !query
+SELECT try_subtract(interval 3 second, interval 2 second)
+-- !query schema
+struct<try_subtract(INTERVAL '03' SECOND, INTERVAL '02' SECOND):interval second>
+-- !query output
+0 00:00:01.000000000
+
+
+-- !query
+SELECT try_subtract(interval 2147483647 month, interval -2 month)
+-- !query schema
+struct<try_subtract(INTERVAL '2147483647' MONTH, INTERVAL '-2' MONTH):interval month>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_subtract(interval 106751991 day, interval -3 day)
+-- !query schema
+struct<try_subtract(INTERVAL '106751991' DAY, INTERVAL '-3' DAY):interval day>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_multiply(2, 3)
+-- !query schema
+struct<try_multiply(2, 3):int>
+-- !query output
+6
+
+
+-- !query
+SELECT try_multiply(2147483647, -2)
+-- !query schema
+struct<try_multiply(2147483647, -2):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_multiply(-2147483648, 2)
+-- !query schema
+struct<try_multiply(-2147483648, 2):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_multiply(9223372036854775807L, 2)
+-- !query schema
+struct<try_multiply(9223372036854775807, 2):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_multiply(-9223372036854775808L, -2)
+-- !query schema
+struct<try_multiply(-9223372036854775808, -2):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_multiply(interval 2 year, 2)
+-- !query schema
+struct<try_multiply(INTERVAL '2' YEAR, 2):interval year to month>
+-- !query output
+4-0
+
+
+-- !query
+SELECT try_multiply(interval 2 second, 2)
+-- !query schema
+struct<try_multiply(INTERVAL '02' SECOND, 2):interval day to second>
+-- !query output
+0 00:00:04.000000000
+
+
+-- !query
+SELECT try_multiply(interval 2 year, 0)
+-- !query schema
+struct<try_multiply(INTERVAL '2' YEAR, 0):interval year to month>
+-- !query output
+0-0
+
+
+-- !query
+SELECT try_multiply(interval 2 second, 0)
+-- !query schema
+struct<try_multiply(INTERVAL '02' SECOND, 0):interval day to second>
+-- !query output
+0 00:00:00.000000000
+
+
+-- !query
+SELECT try_multiply(interval 2147483647 month, 2)
+-- !query schema
+struct<try_multiply(INTERVAL '2147483647' MONTH, 2):interval year to month>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_multiply(interval 106751991 day, 2)
+-- !query schema
+struct<try_multiply(INTERVAL '106751991' DAY, 2):interval day to second>
+-- !query output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
index 47faeb3ce9ea4..f3c483cfafea8 100644
--- a/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 29
+-- Number of queries: 49
 
 
 -- !query
@@ -233,3 +233,163 @@ SELECT try_divide(interval 106751991 day, 0.5)
 struct<try_divide(INTERVAL '106751991' DAY, 0.5):interval day to second>
 -- !query output
 NULL
+
+
+-- !query
+SELECT try_subtract(1, 1)
+-- !query schema
+struct<try_subtract(1, 1):int>
+-- !query output
+0
+
+
+-- !query
+SELECT try_subtract(2147483647, -1)
+-- !query schema
+struct<try_subtract(2147483647, -1):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_subtract(-2147483648, 1)
+-- !query schema
+struct<try_subtract(-2147483648, 1):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_subtract(9223372036854775807L, -1)
+-- !query schema
+struct<try_subtract(9223372036854775807, -1):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_subtract(-9223372036854775808L, 1)
+-- !query schema
+struct<try_subtract(-9223372036854775808, 1):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_subtract(interval 2 year, interval 3 year)
+-- !query schema
+struct<try_subtract(INTERVAL '2' YEAR, INTERVAL '3' YEAR):interval year>
+-- !query output
+-1-0
+
+
+-- !query
+SELECT try_subtract(interval 3 second, interval 2 second)
+-- !query schema
+struct<try_subtract(INTERVAL '03' SECOND, INTERVAL '02' SECOND):interval second>
+-- !query output
+0 00:00:01.000000000
+
+
+-- !query
+SELECT try_subtract(interval 2147483647 month, interval -2 month)
+-- !query schema
+struct<try_subtract(INTERVAL '2147483647' MONTH, INTERVAL '-2' MONTH):interval month>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_subtract(interval 106751991 day, interval -3 day)
+-- !query schema
+struct<try_subtract(INTERVAL '106751991' DAY, INTERVAL '-3' DAY):interval day>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_multiply(2, 3)
+-- !query schema
+struct<try_multiply(2, 3):int>
+-- !query output
+6
+
+
+-- !query
+SELECT try_multiply(2147483647, -2)
+-- !query schema
+struct<try_multiply(2147483647, -2):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_multiply(-2147483648, 2)
+-- !query schema
+struct<try_multiply(-2147483648, 2):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_multiply(9223372036854775807L, 2)
+-- !query schema
+struct<try_multiply(9223372036854775807, 2):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_multiply(-9223372036854775808L, -2)
+-- !query schema
+struct<try_multiply(-9223372036854775808, -2):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_multiply(interval 2 year, 2)
+-- !query schema
+struct<try_multiply(INTERVAL '2' YEAR, 2):interval year to month>
+-- !query output
+4-0
+
+
+-- !query
+SELECT try_multiply(interval 2 second, 2)
+-- !query schema
+struct<try_multiply(INTERVAL '02' SECOND, 2):interval day to second>
+-- !query output
+0 00:00:04.000000000
+
+
+-- !query
+SELECT try_multiply(interval 2 year, 0)
+-- !query schema
+struct<try_multiply(INTERVAL '2' YEAR, 0):interval year to month>
+-- !query output
+0-0
+
+
+-- !query
+SELECT try_multiply(interval 2 second, 0)
+-- !query schema
+struct<try_multiply(INTERVAL '02' SECOND, 0):interval day to second>
+-- !query output
+0 00:00:00.000000000
+
+
+-- !query
+SELECT try_multiply(interval 2147483647 month, 2)
+-- !query schema
+struct<try_multiply(INTERVAL '2147483647' MONTH, 2):interval year to month>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_multiply(interval 106751991 day, 2)
+-- !query schema
+struct<try_multiply(INTERVAL '106751991' DAY, 2):interval day to second>
+-- !query output
+NULL

From 43e34f56e82d9102fa4142a8e899dd2f4b810e00 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Thu, 10 Feb 2022 02:53:25 +0100
Subject: [PATCH 198/513] [SPARK-37406][PYTHON][ML] Inline hints for
 pyspark.ml.fpm

### What changes were proposed in this pull request?

This PR migrates type `pyspark.ml.fpm` annotations from stub file to inline type hints.

### Why are the changes needed?

Part of ongoing migration of type hints.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #35407 from zero323/SPARK-37406.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/ml/fpm.py  | 124 ++++++++++++++++++++------------------
 python/pyspark/ml/fpm.pyi | 110 ---------------------------------
 2 files changed, 67 insertions(+), 167 deletions(-)
 delete mode 100644 python/pyspark/ml/fpm.pyi

diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py
index 9cfd3afb386d1..0795ec2348f8b 100644
--- a/python/pyspark/ml/fpm.py
+++ b/python/pyspark/ml/fpm.py
@@ -16,6 +16,7 @@
 #
 
 import sys
+from typing import Any, Dict, Optional, TYPE_CHECKING
 
 from pyspark import keyword_only, since
 from pyspark.sql import DataFrame
@@ -23,6 +24,9 @@
 from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams
 from pyspark.ml.param.shared import HasPredictionCol, Param, TypeConverters, Params
 
+if TYPE_CHECKING:
+    from py4j.java_gateway import JavaObject  # type: ignore[import]
+
 __all__ = ["FPGrowth", "FPGrowthModel", "PrefixSpan"]
 
 
@@ -33,10 +37,10 @@ class _FPGrowthParams(HasPredictionCol):
     .. versionadded:: 3.0.0
     """
 
-    itemsCol = Param(
+    itemsCol: Param[str] = Param(
         Params._dummy(), "itemsCol", "items column name", typeConverter=TypeConverters.toString
     )
-    minSupport = Param(
+    minSupport: Param[float] = Param(
         Params._dummy(),
         "minSupport",
         "Minimal support level of the frequent pattern. [0.0, 1.0]. "
@@ -44,7 +48,7 @@ class _FPGrowthParams(HasPredictionCol):
         + "times will be output in the frequent itemsets.",
         typeConverter=TypeConverters.toFloat,
     )
-    numPartitions = Param(
+    numPartitions: Param[int] = Param(
         Params._dummy(),
         "numPartitions",
         "Number of partitions (at least 1) used by parallel FP-growth. "
@@ -52,7 +56,7 @@ class _FPGrowthParams(HasPredictionCol):
         + "and partition number of the input dataset is used.",
         typeConverter=TypeConverters.toInt,
     )
-    minConfidence = Param(
+    minConfidence: Param[float] = Param(
         Params._dummy(),
         "minConfidence",
         "Minimal confidence for generating Association Rule. [0.0, 1.0]. "
@@ -61,38 +65,38 @@ class _FPGrowthParams(HasPredictionCol):
         typeConverter=TypeConverters.toFloat,
     )
 
-    def __init__(self, *args):
+    def __init__(self, *args: Any):
         super(_FPGrowthParams, self).__init__(*args)
         self._setDefault(
             minSupport=0.3, minConfidence=0.8, itemsCol="items", predictionCol="prediction"
         )
 
-    def getItemsCol(self):
+    def getItemsCol(self) -> str:
         """
         Gets the value of itemsCol or its default value.
         """
         return self.getOrDefault(self.itemsCol)
 
-    def getMinSupport(self):
+    def getMinSupport(self) -> float:
         """
         Gets the value of minSupport or its default value.
         """
         return self.getOrDefault(self.minSupport)
 
-    def getNumPartitions(self):
+    def getNumPartitions(self) -> int:
         """
         Gets the value of :py:attr:`numPartitions` or its default value.
         """
         return self.getOrDefault(self.numPartitions)
 
-    def getMinConfidence(self):
+    def getMinConfidence(self) -> float:
         """
         Gets the value of minConfidence or its default value.
         """
         return self.getOrDefault(self.minConfidence)
 
 
-class FPGrowthModel(JavaModel, _FPGrowthParams, JavaMLWritable, JavaMLReadable):
+class FPGrowthModel(JavaModel, _FPGrowthParams, JavaMLWritable, JavaMLReadable["FPGrowthModel"]):
     """
     Model fitted by FPGrowth.
 
@@ -100,29 +104,29 @@ class FPGrowthModel(JavaModel, _FPGrowthParams, JavaMLWritable, JavaMLReadable):
     """
 
     @since("3.0.0")
-    def setItemsCol(self, value):
+    def setItemsCol(self, value: str) -> "FPGrowthModel":
         """
         Sets the value of :py:attr:`itemsCol`.
         """
         return self._set(itemsCol=value)
 
     @since("3.0.0")
-    def setMinConfidence(self, value):
+    def setMinConfidence(self, value: float) -> "FPGrowthModel":
         """
         Sets the value of :py:attr:`minConfidence`.
         """
         return self._set(minConfidence=value)
 
     @since("3.0.0")
-    def setPredictionCol(self, value):
+    def setPredictionCol(self, value: str) -> "FPGrowthModel":
         """
         Sets the value of :py:attr:`predictionCol`.
         """
         return self._set(predictionCol=value)
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.2.0")
-    def freqItemsets(self):
+    def freqItemsets(self) -> DataFrame:
         """
         DataFrame with two columns:
         * `items` - Itemset of the same type as the input column.
@@ -130,9 +134,9 @@ def freqItemsets(self):
         """
         return self._call_java("freqItemsets")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.2.0")
-    def associationRules(self):
+    def associationRules(self) -> DataFrame:
         """
         DataFrame with four columns:
         * `antecedent`  - Array of the same type as the input column.
@@ -143,7 +147,9 @@ def associationRules(self):
         return self._call_java("associationRules")
 
 
-class FPGrowth(JavaEstimator, _FPGrowthParams, JavaMLWritable, JavaMLReadable):
+class FPGrowth(
+    JavaEstimator[FPGrowthModel], _FPGrowthParams, JavaMLWritable, JavaMLReadable["FPGrowth"]
+):
     r"""
     A parallel FP-growth algorithm to mine frequent itemsets.
 
@@ -229,16 +235,17 @@ class FPGrowth(JavaEstimator, _FPGrowthParams, JavaMLWritable, JavaMLReadable):
     >>> fpm.transform(data).take(1) == model2.transform(data).take(1)
     True
     """
+    _input_kwargs: Dict[str, Any]
 
     @keyword_only
     def __init__(
         self,
         *,
-        minSupport=0.3,
-        minConfidence=0.8,
-        itemsCol="items",
-        predictionCol="prediction",
-        numPartitions=None,
+        minSupport: float = 0.3,
+        minConfidence: float = 0.8,
+        itemsCol: str = "items",
+        predictionCol: str = "prediction",
+        numPartitions: Optional[int] = None,
     ):
         """
         __init__(self, \\*, minSupport=0.3, minConfidence=0.8, itemsCol="items", \
@@ -254,12 +261,12 @@ def __init__(
     def setParams(
         self,
         *,
-        minSupport=0.3,
-        minConfidence=0.8,
-        itemsCol="items",
-        predictionCol="prediction",
-        numPartitions=None,
-    ):
+        minSupport: float = 0.3,
+        minConfidence: float = 0.8,
+        itemsCol: str = "items",
+        predictionCol: str = "prediction",
+        numPartitions: Optional[int] = None,
+    ) -> "FPGrowth":
         """
         setParams(self, \\*, minSupport=0.3, minConfidence=0.8, itemsCol="items", \
                   predictionCol="prediction", numPartitions=None)
@@ -267,37 +274,37 @@ def setParams(
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
-    def setItemsCol(self, value):
+    def setItemsCol(self, value: str) -> "FPGrowth":
         """
         Sets the value of :py:attr:`itemsCol`.
         """
         return self._set(itemsCol=value)
 
-    def setMinSupport(self, value):
+    def setMinSupport(self, value: float) -> "FPGrowth":
         """
         Sets the value of :py:attr:`minSupport`.
         """
         return self._set(minSupport=value)
 
-    def setNumPartitions(self, value):
+    def setNumPartitions(self, value: int) -> "FPGrowth":
         """
         Sets the value of :py:attr:`numPartitions`.
         """
         return self._set(numPartitions=value)
 
-    def setMinConfidence(self, value):
+    def setMinConfidence(self, value: float) -> "FPGrowth":
         """
         Sets the value of :py:attr:`minConfidence`.
         """
         return self._set(minConfidence=value)
 
-    def setPredictionCol(self, value):
+    def setPredictionCol(self, value: str) -> "FPGrowth":
         """
         Sets the value of :py:attr:`predictionCol`.
         """
         return self._set(predictionCol=value)
 
-    def _create_model(self, java_model):
+    def _create_model(self, java_model: "JavaObject") -> FPGrowthModel:
         return FPGrowthModel(java_model)
 
 
@@ -347,7 +354,9 @@ class PrefixSpan(JavaParams):
     ...
     """
 
-    minSupport = Param(
+    _input_kwargs: Dict[str, Any]
+
+    minSupport: Param[float] = Param(
         Params._dummy(),
         "minSupport",
         "The minimal support level of the "
@@ -356,14 +365,14 @@ class PrefixSpan(JavaParams):
         typeConverter=TypeConverters.toFloat,
     )
 
-    maxPatternLength = Param(
+    maxPatternLength: Param[int] = Param(
         Params._dummy(),
         "maxPatternLength",
         "The maximal length of the sequential pattern. Must be > 0.",
         typeConverter=TypeConverters.toInt,
     )
 
-    maxLocalProjDBSize = Param(
+    maxLocalProjDBSize: Param[int] = Param(
         Params._dummy(),
         "maxLocalProjDBSize",
         "The maximum number of items (including delimiters used in the "
@@ -374,7 +383,7 @@ class PrefixSpan(JavaParams):
         typeConverter=TypeConverters.toInt,
     )
 
-    sequenceCol = Param(
+    sequenceCol: Param[str] = Param(
         Params._dummy(),
         "sequenceCol",
         "The name of the sequence column in "
@@ -386,10 +395,10 @@ class PrefixSpan(JavaParams):
     def __init__(
         self,
         *,
-        minSupport=0.1,
-        maxPatternLength=10,
-        maxLocalProjDBSize=32000000,
-        sequenceCol="sequence",
+        minSupport: float = 0.1,
+        maxPatternLength: int = 10,
+        maxLocalProjDBSize: int = 32000000,
+        sequenceCol: str = "sequence",
     ):
         """
         __init__(self, \\*, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000, \
@@ -408,11 +417,11 @@ def __init__(
     def setParams(
         self,
         *,
-        minSupport=0.1,
-        maxPatternLength=10,
-        maxLocalProjDBSize=32000000,
-        sequenceCol="sequence",
-    ):
+        minSupport: float = 0.1,
+        maxPatternLength: int = 10,
+        maxLocalProjDBSize: int = 32000000,
+        sequenceCol: str = "sequence",
+    ) -> "PrefixSpan":
         """
         setParams(self, \\*, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000, \
                   sequenceCol="sequence")
@@ -421,62 +430,62 @@ def setParams(
         return self._set(**kwargs)
 
     @since("3.0.0")
-    def setMinSupport(self, value):
+    def setMinSupport(self, value: float) -> "PrefixSpan":
         """
         Sets the value of :py:attr:`minSupport`.
         """
         return self._set(minSupport=value)
 
     @since("3.0.0")
-    def getMinSupport(self):
+    def getMinSupport(self) -> float:
         """
         Gets the value of minSupport or its default value.
         """
         return self.getOrDefault(self.minSupport)
 
     @since("3.0.0")
-    def setMaxPatternLength(self, value):
+    def setMaxPatternLength(self, value: int) -> "PrefixSpan":
         """
         Sets the value of :py:attr:`maxPatternLength`.
         """
         return self._set(maxPatternLength=value)
 
     @since("3.0.0")
-    def getMaxPatternLength(self):
+    def getMaxPatternLength(self) -> int:
         """
         Gets the value of maxPatternLength or its default value.
         """
         return self.getOrDefault(self.maxPatternLength)
 
     @since("3.0.0")
-    def setMaxLocalProjDBSize(self, value):
+    def setMaxLocalProjDBSize(self, value: int) -> "PrefixSpan":
         """
         Sets the value of :py:attr:`maxLocalProjDBSize`.
         """
         return self._set(maxLocalProjDBSize=value)
 
     @since("3.0.0")
-    def getMaxLocalProjDBSize(self):
+    def getMaxLocalProjDBSize(self) -> int:
         """
         Gets the value of maxLocalProjDBSize or its default value.
         """
         return self.getOrDefault(self.maxLocalProjDBSize)
 
     @since("3.0.0")
-    def setSequenceCol(self, value):
+    def setSequenceCol(self, value: str) -> "PrefixSpan":
         """
         Sets the value of :py:attr:`sequenceCol`.
         """
         return self._set(sequenceCol=value)
 
     @since("3.0.0")
-    def getSequenceCol(self):
+    def getSequenceCol(self) -> str:
         """
         Gets the value of sequenceCol or its default value.
         """
         return self.getOrDefault(self.sequenceCol)
 
-    def findFrequentSequentialPatterns(self, dataset):
+    def findFrequentSequentialPatterns(self, dataset: DataFrame) -> DataFrame:
         """
         Finds the complete set of frequent sequential patterns in the input sequences of itemsets.
 
@@ -499,6 +508,7 @@ def findFrequentSequentialPatterns(self, dataset):
         """
 
         self._transfer_params_to_java()
+        assert self._java_obj is not None
         jdf = self._java_obj.findFrequentSequentialPatterns(dataset._jdf)
         return DataFrame(jdf, dataset.sql_ctx)
 
diff --git a/python/pyspark/ml/fpm.pyi b/python/pyspark/ml/fpm.pyi
deleted file mode 100644
index 00d5c5fe6b055..0000000000000
--- a/python/pyspark/ml/fpm.pyi
+++ /dev/null
@@ -1,110 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Any, Optional
-
-from pyspark.ml.util import JavaMLReadable, JavaMLWritable
-from pyspark.ml.wrapper import JavaEstimator, JavaParams, JavaModel
-from pyspark.ml.param.shared import HasPredictionCol
-from pyspark.sql.dataframe import DataFrame
-
-from pyspark.ml.param import Param
-
-from py4j.java_gateway import JavaObject  # type: ignore[import]
-
-class _FPGrowthParams(HasPredictionCol):
-    itemsCol: Param[str]
-    minSupport: Param[float]
-    numPartitions: Param[int]
-    minConfidence: Param[float]
-    def __init__(self, *args: Any): ...
-    def getItemsCol(self) -> str: ...
-    def getMinSupport(self) -> float: ...
-    def getNumPartitions(self) -> int: ...
-    def getMinConfidence(self) -> float: ...
-
-class FPGrowthModel(JavaModel, _FPGrowthParams, JavaMLWritable, JavaMLReadable[FPGrowthModel]):
-    def setItemsCol(self, value: str) -> FPGrowthModel: ...
-    def setMinConfidence(self, value: float) -> FPGrowthModel: ...
-    def setPredictionCol(self, value: str) -> FPGrowthModel: ...
-    @property
-    def freqItemsets(self) -> DataFrame: ...
-    @property
-    def associationRules(self) -> DataFrame: ...
-
-class FPGrowth(
-    JavaEstimator[FPGrowthModel],
-    _FPGrowthParams,
-    JavaMLWritable,
-    JavaMLReadable[FPGrowth],
-):
-    def __init__(
-        self,
-        *,
-        minSupport: float = ...,
-        minConfidence: float = ...,
-        itemsCol: str = ...,
-        predictionCol: str = ...,
-        numPartitions: Optional[int] = ...,
-    ) -> None: ...
-    def setParams(
-        self,
-        *,
-        minSupport: float = ...,
-        minConfidence: float = ...,
-        itemsCol: str = ...,
-        predictionCol: str = ...,
-        numPartitions: Optional[int] = ...,
-    ) -> FPGrowth: ...
-    def setItemsCol(self, value: str) -> FPGrowth: ...
-    def setMinSupport(self, value: float) -> FPGrowth: ...
-    def setNumPartitions(self, value: int) -> FPGrowth: ...
-    def setMinConfidence(self, value: float) -> FPGrowth: ...
-    def setPredictionCol(self, value: str) -> FPGrowth: ...
-    def _create_model(self, java_model: JavaObject) -> FPGrowthModel: ...
-
-class PrefixSpan(JavaParams):
-    minSupport: Param[float]
-    maxPatternLength: Param[int]
-    maxLocalProjDBSize: Param[int]
-    sequenceCol: Param[str]
-    def __init__(
-        self,
-        *,
-        minSupport: float = ...,
-        maxPatternLength: int = ...,
-        maxLocalProjDBSize: int = ...,
-        sequenceCol: str = ...,
-    ) -> None: ...
-    def setParams(
-        self,
-        *,
-        minSupport: float = ...,
-        maxPatternLength: int = ...,
-        maxLocalProjDBSize: int = ...,
-        sequenceCol: str = ...,
-    ) -> PrefixSpan: ...
-    def setMinSupport(self, value: float) -> PrefixSpan: ...
-    def getMinSupport(self) -> float: ...
-    def setMaxPatternLength(self, value: int) -> PrefixSpan: ...
-    def getMaxPatternLength(self) -> int: ...
-    def setMaxLocalProjDBSize(self, value: int) -> PrefixSpan: ...
-    def getMaxLocalProjDBSize(self) -> int: ...
-    def setSequenceCol(self, value: str) -> PrefixSpan: ...
-    def getSequenceCol(self) -> str: ...
-    def findFrequentSequentialPatterns(self, dataset: DataFrame) -> DataFrame: ...

From 862a561952552b41fba44155a22e390de96435e6 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Thu, 10 Feb 2022 12:30:11 +0900
Subject: [PATCH 199/513] [SPARK-38163][SQL] Preserve the error class of
 `SparkThrowable` while constructing of function builder

### What changes were proposed in this pull request?
In the PR, I propose to propagate `SparkThrowable` to user space AS IS in constructing of function builder. This allows to preserve the error class, for instance.

### Why are the changes needed?
This fixes the issues portrayed by the example below:
```scala
scala> try { sql("select format_string('%0$s', 'Hello')") } catch { case e: org.apache.spark.sql.AnalysisException => println(e.getErrorClass) }
null
```
The expected error should be `ILLEGAL_SUBSTRING`. It sets at https://github.com/apache/spark/blob/899d3bb44d7c72dc0179545189ac8170bde993a8/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala#L71.

### Does this PR introduce _any_ user-facing change?
Yes, the PR changes exceptions that can be handles by users.

After the changes:
```scala
scala> try { sql("select format_string('%0$s', 'Hello')") } catch { case e: org.apache.spark.sql.AnalysisException => println(e.getErrorClass) }
ILLEGAL_SUBSTRING
```

### How was this patch tested?
By running new test:
```
$ build/sbt "test:testOnly *QueryCompilationErrorsSuite"
```

Closes #35467 from MaxGekk/error-class-func-builder.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../spark/sql/catalyst/analysis/FunctionRegistry.scala   | 7 ++++++-
 .../spark/sql/errors/QueryCompilationErrorsSuite.scala   | 9 +++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 04acaa871eb85..ce7cee5764ce4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -23,6 +23,7 @@ import javax.annotation.concurrent.GuardedBy
 import scala.collection.mutable
 import scala.reflect.ClassTag
 
+import org.apache.spark.SparkThrowable
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.FunctionIdentifier
@@ -129,7 +130,11 @@ object FunctionRegistryBase {
         } catch {
           // the exception is an invocation exception. To get a meaningful message, we need the
           // cause.
-          case e: Exception => throw new AnalysisException(e.getCause.getMessage)
+          case e: Exception =>
+            throw e.getCause match {
+              case ae: SparkThrowable => ae
+              case _ => new AnalysisException(e.getCause.getMessage)
+            }
         }
       } else {
         // Otherwise, find a constructor method that matches the number of arguments, and use that.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index b6de5fb887854..d52fe028b6047 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -111,4 +111,13 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession {
         "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup")
     }
   }
+
+  test("ILLEGAL_SUBSTRING: the argument_index of string format is invalid") {
+    val e = intercept[AnalysisException] {
+      sql("select format_string('%0$s', 'Hello')")
+    }
+    assert(e.errorClass === Some("ILLEGAL_SUBSTRING"))
+    assert(e.message ===
+      "The argument_index of string format cannot contain position 0$.")
+  }
 }

From 278df9eec9163e1c5853ee805baf148adf56a76e Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Thu, 10 Feb 2022 12:01:25 +0800
Subject: [PATCH 200/513] [SPARK-38146][SQL] Call `setLong` rather than
 `update` on aggregation buffer when aggregating a TIMESTAMP_NTZ column with a
 UDAF

### What changes were proposed in this pull request?

Change BufferSetterGetterUtils to use `InternalRow.setLong` for TIMESTAMP_NTZ values rather then `InternalRow.update`.

### Why are the changes needed?

When a query aggregates a TIMESTAMP_NTZ column using a UDAF that extends `UserDefinedAggregateFunction`, Spark will use `TungstenAggregationIterator`, which creates an `UnsafeRow` for the low-level aggregation buffer.

However, the wrapper of that buffer (`MutableAggregationBufferImpl`) fails to properly set up a field setter function for the TIMESTAMP_NTZ column, so it attempts to call `UnsafeRow.update` on the underlying buffer. The UnsafeRow instance throws `java.lang.UnsupportedOperationException`:

```
Caused by: java.lang.UnsupportedOperationException
  at org.apache.spark.sql.catalyst.expressions.UnsafeRow.update(UnsafeRow.java:218)
  at org.apache.spark.sql.execution.aggregate.BufferSetterGetterUtils.$anonfun$createSetters$15(udaf.scala:217)
  at org.apache.spark.sql.execution.aggregate.BufferSetterGetterUtils.$anonfun$createSetters$15$adapted(udaf.scala:215)
  at org.apache.spark.sql.execution.aggregate.MutableAggregationBufferImpl.update(udaf.scala:272)
  at MyAverage$.initialize(<console>:52)
  at org.apache.spark.sql.execution.aggregate.ScalaUDAF.initialize(udaf.scala:450)

```
See the Jira (SPARK-38146) for a full reproduction example.

Before the fix for SPARK-38133, `UnsafeRow` did not consider TIMESTAMP_NTZ as mutable or fixed size, so Spark would use `SortBasedAggregationIterator`, which would created a `GenericInternalRow` for the underlying buffer rather than an `UnsafeRow` instance. Therefore, before SPARK-38133, a UDAF could aggegrate a TIMESTAMP_NTZ column.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Turned back on testing of TIMESTAMP_NTZ in the unit test "udaf with all data types" in `HashAggregationQuerySuite`.

Closes #35470 from bersprockets/unsafe_udaf_issue.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../org/apache/spark/sql/execution/aggregate/udaf.scala      | 4 ++--
 .../spark/sql/hive/execution/AggregationQuerySuite.scala     | 5 +----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
index 8879e1499f930..c1e225200f7b4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
@@ -83,7 +83,7 @@ sealed trait BufferSetterGetterUtils {
           (row: InternalRow, ordinal: Int) =>
             if (row.isNullAt(ordinal)) null else row.getInt(ordinal)
 
-        case TimestampType =>
+        case TimestampType | TimestampNTZType =>
           (row: InternalRow, ordinal: Int) =>
             if (row.isNullAt(ordinal)) null else row.getLong(ordinal)
 
@@ -187,7 +187,7 @@ sealed trait BufferSetterGetterUtils {
               row.setNullAt(ordinal)
             }
 
-        case TimestampType =>
+        case TimestampType | TimestampNTZType =>
           (row: InternalRow, ordinal: Int, value: Any) =>
             if (value != null) {
               row.setLong(ordinal, value.asInstanceOf[Long])
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
index c5a75acbd91b8..e560c2ea32afa 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
@@ -896,10 +896,7 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
     // UnsafeRow.mutableFieldTypes.asScala.toSeq will trigger SortAggregate to use
     // UnsafeRow as the aggregation buffer. While, dataTypes will trigger
     // SortAggregate to use a safe row as the aggregation buffer.
-    // udaf cannot yet handle TimestampNTZType
-    val mutableFieldTypes = UnsafeRow.mutableFieldTypes
-      .asScala.filterNot(_.isInstanceOf[TimestampNTZType]).toSeq
-    Seq(dataTypes, mutableFieldTypes).foreach { dataTypes =>
+    Seq(dataTypes, UnsafeRow.mutableFieldTypes.asScala.toSeq).foreach { dataTypes =>
       val fields = dataTypes.zipWithIndex.map { case (dataType, index) =>
         StructField(s"col$index", dataType, nullable = true)
       }

From 5f0a92c435bc23dbe4049069250cf94f167df1cd Mon Sep 17 00:00:00 2001
From: Xinyi Yu <xinyi.yu@databricks.com>
Date: Thu, 10 Feb 2022 14:49:38 +0800
Subject: [PATCH 201/513] [SPARK-38157][SQL] Explicitly set ANSI to false in
 test timestampNTZ/timestamp.sql and SQLQueryTestSuite to match the expected
 golden results

### What changes were proposed in this pull request?
This PR explicitly sets the ANSI to false in the timestampNTZ/timestamp.sql input itself, and in the `SQLQueryTestSuite` when the input doesn't match with any of `PgSQLTest`, `AnsiTest` or `TimestampNTZTest`.

### Why are the changes needed?

Without this change, `ThriftServerQueryTestSuite` will fail on timestampNTZ/timestamp.sql, when ANSI is on by default. It is because the timestampNTZ/timestamp.sql should only work with ANSI off according to the golden result file, but ThriftServerQueryTestSuite or the timestamp.sql test doesn't override the default ANSI setting.
The same goes with the `SQLQueryTestSuite`.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Unit test.

Closes #35471 from anchovYu/SPARK-38157-fix-thriftserver-suite-ansi.

Authored-by: Xinyi Yu <xinyi.yu@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../test/resources/sql-tests/inputs/timestampNTZ/timestamp.sql   | 1 +
 .../src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestampNTZ/timestamp.sql b/sql/core/src/test/resources/sql-tests/inputs/timestampNTZ/timestamp.sql
index 79193c900d046..47988ee65fb7c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/timestampNTZ/timestamp.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/timestampNTZ/timestamp.sql
@@ -1 +1,2 @@
+--SET spark.sql.ansi.enabled = false
 --IMPORT timestamp.sql
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 7a5684ef3ffbc..d6a7c69018f90 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -388,6 +388,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
         localSparkSession.conf.set(SQLConf.TIMESTAMP_TYPE.key,
           TimestampTypes.TIMESTAMP_NTZ.toString)
       case _ =>
+        localSparkSession.conf.set(SQLConf.ANSI_ENABLED.key, false)
     }
 
     if (configSet.nonEmpty) {

From 7688d839ccc78972b4590be7b30758dd5eb7fc9f Mon Sep 17 00:00:00 2001
From: Tengfei Huang <tengfei.h@gmail.com>
Date: Thu, 10 Feb 2022 12:13:30 +0300
Subject: [PATCH 202/513] [SPARK-38113][SQL] Use error classes in the execution
 errors of pivoting

### What changes were proposed in this pull request?
Migrate the following errors in QueryExecutionErrors onto use error classes:
1. repeatedPivotsUnsupportedError => UNSUPPORTED_FEATURE
2. pivotNotAfterGroupByUnsupportedError => UNSUPPORTED_FEATURE

### Why are the changes needed?
Porting pivot execute errors to new error framework.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
UT added.

Closes #35466 from ivoson/SPARK-38113.

Authored-by: Tengfei Huang <tengfei.h@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../sql/errors/QueryExecutionErrors.scala     |  8 ++++--
 .../errors/QueryExecutionErrorsSuite.scala    | 27 ++++++++++++++++++-
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 76eb4311e41bf..cddf55bedd5bf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -1885,11 +1885,15 @@ object QueryExecutionErrors {
   }
 
   def repeatedPivotsUnsupportedError(): Throwable = {
-    new UnsupportedOperationException("repeated pivots are not supported")
+    new SparkUnsupportedOperationException(
+      errorClass = "UNSUPPORTED_FEATURE",
+      messageParameters = Array("Repeated pivots."))
   }
 
   def pivotNotAfterGroupByUnsupportedError(): Throwable = {
-    new UnsupportedOperationException("pivot is only supported after a groupBy")
+    new SparkUnsupportedOperationException(
+      errorClass = "UNSUPPORTED_FEATURE",
+      messageParameters = Array("Pivot not after a groupBy."))
   }
 
   def invalidAesKeyLengthError(actualLength: Int): RuntimeException = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index d241f6c3b768e..11bbd43d9be06 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.errors
 
-import org.apache.spark.{SparkException, SparkRuntimeException}
+import org.apache.spark.{SparkException, SparkRuntimeException, SparkUnsupportedOperationException}
 import org.apache.spark.sql.{DataFrame, QueryTest}
 import org.apache.spark.sql.functions.{lit, lower, struct, sum}
 import org.apache.spark.sql.test.SharedSparkSession
@@ -137,4 +137,29 @@ class QueryExecutionErrorsSuite extends QueryTest with SharedSparkSession {
       "literal for '[dotnet,Dummies]' of class " +
       "org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema.")
   }
+
+  test("UNSUPPORTED_FEATURE: unsupported pivot operations") {
+    val e1 = intercept[SparkUnsupportedOperationException] {
+      trainingSales
+        .groupBy($"sales.year")
+        .pivot($"sales.course")
+        .pivot($"training")
+        .agg(sum($"sales.earnings"))
+        .collect()
+    }
+    assert(e1.getErrorClass === "UNSUPPORTED_FEATURE")
+    assert(e1.getSqlState === "0A000")
+    assert(e1.getMessage === "The feature is not supported: Repeated pivots.")
+
+    val e2 = intercept[SparkUnsupportedOperationException] {
+      trainingSales
+        .rollup($"sales.year")
+        .pivot($"training")
+        .agg(sum($"sales.earnings"))
+        .collect()
+    }
+    assert(e2.getErrorClass === "UNSUPPORTED_FEATURE")
+    assert(e2.getSqlState === "0A000")
+    assert(e2.getMessage === "The feature is not supported: Pivot not after a groupBy.")
+  }
 }

From 53ba6e2affc0d96e45a50ba9f4bdd07b359dcd7c Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Thu, 10 Feb 2022 12:35:33 +0300
Subject: [PATCH 203/513] [SPARK-38131][SQL] Use error classes in user-facing
 exceptions only

### What changes were proposed in this pull request?
In the PR, I propose to remove the error class `ROW_FROM_CSV_PARSER_NOT_EXPECTED` and don't use the error class `UNSUPPORTED_FEATURE` in an up-cast exception which is an internal one.

### Why are the changes needed?
The error classes are supposed to be used in user-facing errors/exceptions only. It doesn't make sense to introduce/use them in internal errors.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
By running existing test suites.

Closes #35445 from MaxGekk/keep-only-user-facing-error-classes.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../main/resources/error/error-classes.json   |  4 ----
 .../sql/catalyst/analysis/Analyzer.scala      |  3 ++-
 .../catalyst/expressions/csvExpressions.scala |  2 +-
 .../sql/errors/QueryCompilationErrors.scala   |  8 -------
 .../sql/errors/QueryExecutionErrors.scala     |  5 -----
 .../errors/QueryCompilationErrorsSuite.scala  | 21 +------------------
 6 files changed, 4 insertions(+), 39 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 71909771f7228..48812b95f7129 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -130,10 +130,6 @@
     "message" : [ "Failed to rename as %s was not found" ],
     "sqlState" : "22023"
   },
-  "ROW_FROM_CSV_PARSER_NOT_EXPECTED" : {
-    "message" : [ "Expected one row from CSV parser." ],
-    "sqlState" : "42000"
-  },
   "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER" : {
     "message" : [ "The second argument of '%s' function needs to be an integer." ],
     "sqlState" : "22023"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index ba7c39f9db571..18684bdad63cc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -3591,7 +3591,8 @@ class Analyzer(override val catalogManager: CatalogManager)
         case u @ UpCast(child, _, _) if !child.resolved => u
 
         case UpCast(_, target, _) if target != DecimalType && !target.isInstanceOf[DataType] =>
-          throw QueryCompilationErrors.unsupportedAbstractDataTypeForUpCastError(target)
+          throw new IllegalStateException(
+            s"UpCast only supports DecimalType as AbstractDataType yet, but got: $target")
 
         case UpCast(child, target, walkedTypePath) if target == DecimalType
           && child.dataType.isInstanceOf[DecimalType] =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
index 30d992a2eea6d..6e08ad346c853 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
@@ -91,7 +91,7 @@ case class CsvToStructs(
       assert(!rows.hasNext)
       result
     } else {
-      throw QueryExecutionErrors.rowFromCSVParserNotExpectedError
+      throw new IllegalStateException("Expected one row from CSV parser.")
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index d3f33e719c88c..71caafee2da4f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -170,14 +170,6 @@ object QueryCompilationErrors {
     )
   }
 
-  def unsupportedAbstractDataTypeForUpCastError(gotType: AbstractDataType): Throwable = {
-    new AnalysisException(
-      errorClass = "UNSUPPORTED_FEATURE",
-      messageParameters =
-        Array(s"UpCast only support DecimalType as AbstractDataType yet, but got: $gotType")
-    )
-  }
-
   def outerScopeFailureForNewInstanceError(className: String): Throwable = {
     new AnalysisException(
       s"Unable to generate an encoder for inner class `$className` without " +
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index cddf55bedd5bf..24ef61cd67d0d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -181,11 +181,6 @@ object QueryExecutionErrors {
     }
   }
 
-  def rowFromCSVParserNotExpectedError(): Throwable = {
-    new SparkIllegalArgumentException(errorClass = "ROW_FROM_CSV_PARSER_NOT_EXPECTED",
-      messageParameters = Array.empty)
-  }
-
   def inputTypeUnsupportedError(dataType: DataType): Throwable = {
     new IllegalArgumentException(s"Unsupported input type ${dataType.catalogString}")
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index d52fe028b6047..485022c9c79dc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -17,13 +17,9 @@
 
 package org.apache.spark.sql.errors
 
-import org.apache.spark.sql.{AnalysisException, Dataset, QueryTest}
-import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
-import org.apache.spark.sql.catalyst.expressions.{Alias, UpCast}
-import org.apache.spark.sql.catalyst.plans.logical.Project
+import org.apache.spark.sql.{AnalysisException, QueryTest}
 import org.apache.spark.sql.functions.{grouping, grouping_id}
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.NumericType
 
 case class StringLongClass(a: String, b: Long)
 
@@ -63,21 +59,6 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession {
        """.stripMargin.trim + " of the field in the target object")
   }
 
-  test("UNSUPPORTED_FEATURE: UpCast only support DecimalType as AbstractDataType") {
-    val df = sql("select 1 as value")
-
-    val msg = intercept[AnalysisException] {
-      val plan = Project(
-        Seq(Alias(UpCast(UnresolvedAttribute("value"), NumericType), "value")()),
-        df.logicalPlan)
-
-      Dataset.ofRows(spark, plan)
-    }.message
-    assert(msg.matches("The feature is not supported: " +
-      "UpCast only support DecimalType as AbstractDataType yet," +
-      """ but got: org.apache.spark.sql.types.NumericType\$\@\w+"""))
-  }
-
   test("UNSUPPORTED_GROUPING_EXPRESSION: filter with grouping/grouping_Id expression") {
     val df = Seq(
       (536361, "85123A", 2, 17850),

From 17653fbbb5fa98e73932b702641f50671579d431 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Thu, 10 Feb 2022 10:38:22 +0100
Subject: [PATCH 204/513] [SPARK-37401][PYTHON][ML] Inline typehints for
 pyspark.ml.clustering

### What changes were proposed in this pull request?

This PR migrates type `pyspark.ml.stat clustering` from stub file to inline type hints.

### Why are the changes needed?

Part of ongoing migration of type hints.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests + new data test.

Closes #35439 from zero323/SPARK-37401.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/ml/clustering.py               | 590 ++++++++++--------
 python/pyspark/ml/clustering.pyi              | 439 -------------
 .../ml/tests/typing/test_clustering.yaml      |  33 +
 3 files changed, 362 insertions(+), 700 deletions(-)
 delete mode 100644 python/pyspark/ml/clustering.pyi
 create mode 100644 python/pyspark/ml/tests/typing/test_clustering.yaml

diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 11fbdf5cf9246..a66d6e347055a 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -18,6 +18,10 @@
 import sys
 import warnings
 
+from typing import Any, Dict, List, Optional, TYPE_CHECKING
+
+import numpy as np
+
 from pyspark import since, keyword_only
 from pyspark.ml.param.shared import (
     HasMaxIter,
@@ -45,6 +49,12 @@
 from pyspark.ml.common import inherit_doc, _java2py
 from pyspark.ml.stat import MultivariateGaussian
 from pyspark.sql import DataFrame
+from pyspark.ml.linalg import Vector, Matrix
+
+if TYPE_CHECKING:
+    from pyspark.ml._typing import M
+    from py4j.java_gateway import JavaObject  # type: ignore[import]
+
 
 __all__ = [
     "BisectingKMeans",
@@ -71,57 +81,57 @@ class ClusteringSummary(JavaWrapper):
     .. versionadded:: 2.1.0
     """
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.1.0")
-    def predictionCol(self):
+    def predictionCol(self) -> str:
         """
         Name for column of predicted clusters in `predictions`.
         """
         return self._call_java("predictionCol")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.1.0")
-    def predictions(self):
+    def predictions(self) -> DataFrame:
         """
         DataFrame produced by the model's `transform` method.
         """
         return self._call_java("predictions")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.1.0")
-    def featuresCol(self):
+    def featuresCol(self) -> str:
         """
         Name for column of features in `predictions`.
         """
         return self._call_java("featuresCol")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.1.0")
-    def k(self):
+    def k(self) -> int:
         """
         The number of clusters the model was trained with.
         """
         return self._call_java("k")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.1.0")
-    def cluster(self):
+    def cluster(self) -> DataFrame:
         """
         DataFrame of predicted cluster centers for each training data point.
         """
         return self._call_java("cluster")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.1.0")
-    def clusterSizes(self):
+    def clusterSizes(self) -> List[int]:
         """
         Size of (number of data points in) each cluster.
         """
         return self._call_java("clusterSizes")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.4.0")
-    def numIter(self):
+    def numIter(self) -> int:
         """
         Number of iterations.
         """
@@ -145,19 +155,19 @@ class _GaussianMixtureParams(
     .. versionadded:: 3.0.0
     """
 
-    k = Param(
+    k: Param[int] = Param(
         Params._dummy(),
         "k",
         "Number of independent Gaussians in the mixture model. " + "Must be > 1.",
         typeConverter=TypeConverters.toInt,
     )
 
-    def __init__(self, *args):
+    def __init__(self, *args: Any):
         super(_GaussianMixtureParams, self).__init__(*args)
         self._setDefault(k=2, tol=0.01, maxIter=100, aggregationDepth=2)
 
     @since("2.0.0")
-    def getK(self):
+    def getK(self) -> int:
         """
         Gets the value of `k`
         """
@@ -165,7 +175,11 @@ def getK(self):
 
 
 class GaussianMixtureModel(
-    JavaModel, _GaussianMixtureParams, JavaMLWritable, JavaMLReadable, HasTrainingSummary
+    JavaModel,
+    _GaussianMixtureParams,
+    JavaMLWritable,
+    JavaMLReadable["GaussianMixtureModel"],
+    HasTrainingSummary["GaussianMixtureSummary"],
 ):
     """
     Model fitted by GaussianMixture.
@@ -174,29 +188,29 @@ class GaussianMixtureModel(
     """
 
     @since("3.0.0")
-    def setFeaturesCol(self, value):
+    def setFeaturesCol(self, value: str) -> "GaussianMixtureModel":
         """
         Sets the value of :py:attr:`featuresCol`.
         """
         return self._set(featuresCol=value)
 
     @since("3.0.0")
-    def setPredictionCol(self, value):
+    def setPredictionCol(self, value: str) -> "GaussianMixtureModel":
         """
         Sets the value of :py:attr:`predictionCol`.
         """
         return self._set(predictionCol=value)
 
     @since("3.0.0")
-    def setProbabilityCol(self, value):
+    def setProbabilityCol(self, value: str) -> "GaussianMixtureModel":
         """
         Sets the value of :py:attr:`probabilityCol`.
         """
         return self._set(probabilityCol=value)
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def weights(self):
+    def weights(self) -> List[float]:
         """
         Weight for each Gaussian distribution in the mixture.
         This is a multinomial probability distribution over the k Gaussians,
@@ -204,23 +218,25 @@ def weights(self):
         """
         return self._call_java("weights")
 
-    @property
+    @property  # type: ignore[misc]
     @since("3.0.0")
-    def gaussians(self):
+    def gaussians(self) -> List[MultivariateGaussian]:
         """
         Array of :py:class:`MultivariateGaussian` where gaussians[i] represents
         the Multivariate Gaussian (Normal) Distribution for Gaussian i
         """
         sc = SparkContext._active_spark_context
+        assert sc is not None and self._java_obj is not None
+
         jgaussians = self._java_obj.gaussians()
         return [
             MultivariateGaussian(_java2py(sc, jgaussian.mean()), _java2py(sc, jgaussian.cov()))
             for jgaussian in jgaussians
         ]
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def gaussiansDF(self):
+    def gaussiansDF(self) -> DataFrame:
         """
         Retrieve Gaussian distributions as a DataFrame.
         Each row represents a Gaussian Distribution.
@@ -228,9 +244,9 @@ def gaussiansDF(self):
         """
         return self._call_java("gaussiansDF")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.1.0")
-    def summary(self):
+    def summary(self) -> "GaussianMixtureSummary":
         """
         Gets summary (cluster assignments, cluster sizes) of the model trained on the
         training set. An exception is thrown if no summary exists.
@@ -243,14 +259,14 @@ def summary(self):
             )
 
     @since("3.0.0")
-    def predict(self, value):
+    def predict(self, value: Vector) -> int:
         """
         Predict label for the given features.
         """
         return self._call_java("predict", value)
 
     @since("3.0.0")
-    def predictProbability(self, value):
+    def predictProbability(self, value: Vector) -> Vector:
         """
         Predict probability for the given features.
         """
@@ -258,7 +274,12 @@ def predictProbability(self, value):
 
 
 @inherit_doc
-class GaussianMixture(JavaEstimator, _GaussianMixtureParams, JavaMLWritable, JavaMLReadable):
+class GaussianMixture(
+    JavaEstimator[GaussianMixtureModel],
+    _GaussianMixtureParams,
+    JavaMLWritable,
+    JavaMLReadable["GaussianMixture"],
+):
     """
     GaussianMixture clustering.
     This class performs expectation maximization for multivariate Gaussian
@@ -379,19 +400,21 @@ class GaussianMixture(JavaEstimator, _GaussianMixtureParams, JavaMLWritable, Jav
     GaussianMixture...
     """
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        featuresCol="features",
-        predictionCol="prediction",
-        k=2,
-        probabilityCol="probability",
-        tol=0.01,
-        maxIter=100,
-        seed=None,
-        aggregationDepth=2,
-        weightCol=None,
+        featuresCol: str = "features",
+        predictionCol: str = "prediction",
+        k: int = 2,
+        probabilityCol: str = "probability",
+        tol: float = 0.01,
+        maxIter: int = 100,
+        seed: Optional[int] = None,
+        aggregationDepth: int = 2,
+        weightCol: Optional[str] = None,
     ):
         """
         __init__(self, \\*, featuresCol="features", predictionCol="prediction", k=2, \
@@ -405,7 +428,7 @@ def __init__(
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
-    def _create_model(self, java_model):
+    def _create_model(self, java_model: "JavaObject") -> "GaussianMixtureModel":
         return GaussianMixtureModel(java_model)
 
     @keyword_only
@@ -413,16 +436,16 @@ def _create_model(self, java_model):
     def setParams(
         self,
         *,
-        featuresCol="features",
-        predictionCol="prediction",
-        k=2,
-        probabilityCol="probability",
-        tol=0.01,
-        maxIter=100,
-        seed=None,
-        aggregationDepth=2,
-        weightCol=None,
-    ):
+        featuresCol: str = "features",
+        predictionCol: str = "prediction",
+        k: int = 2,
+        probabilityCol: str = "probability",
+        tol: float = 0.01,
+        maxIter: int = 100,
+        seed: Optional[int] = None,
+        aggregationDepth: int = 2,
+        weightCol: Optional[str] = None,
+    ) -> "GaussianMixture":
         """
         setParams(self, \\*, featuresCol="features", predictionCol="prediction", k=2, \
                   probabilityCol="probability", tol=0.01, maxIter=100, seed=None, \
@@ -434,63 +457,63 @@ def setParams(
         return self._set(**kwargs)
 
     @since("2.0.0")
-    def setK(self, value):
+    def setK(self, value: int) -> "GaussianMixture":
         """
         Sets the value of :py:attr:`k`.
         """
         return self._set(k=value)
 
     @since("2.0.0")
-    def setMaxIter(self, value):
+    def setMaxIter(self, value: int) -> "GaussianMixture":
         """
         Sets the value of :py:attr:`maxIter`.
         """
         return self._set(maxIter=value)
 
     @since("2.0.0")
-    def setFeaturesCol(self, value):
+    def setFeaturesCol(self, value: str) -> "GaussianMixture":
         """
         Sets the value of :py:attr:`featuresCol`.
         """
         return self._set(featuresCol=value)
 
     @since("2.0.0")
-    def setPredictionCol(self, value):
+    def setPredictionCol(self, value: str) -> "GaussianMixture":
         """
         Sets the value of :py:attr:`predictionCol`.
         """
         return self._set(predictionCol=value)
 
     @since("2.0.0")
-    def setProbabilityCol(self, value):
+    def setProbabilityCol(self, value: str) -> "GaussianMixture":
         """
         Sets the value of :py:attr:`probabilityCol`.
         """
         return self._set(probabilityCol=value)
 
     @since("3.0.0")
-    def setWeightCol(self, value):
+    def setWeightCol(self, value: str) -> "GaussianMixture":
         """
         Sets the value of :py:attr:`weightCol`.
         """
         return self._set(weightCol=value)
 
     @since("2.0.0")
-    def setSeed(self, value):
+    def setSeed(self, value: int) -> "GaussianMixture":
         """
         Sets the value of :py:attr:`seed`.
         """
         return self._set(seed=value)
 
     @since("2.0.0")
-    def setTol(self, value):
+    def setTol(self, value: float) -> "GaussianMixture":
         """
         Sets the value of :py:attr:`tol`.
         """
         return self._set(tol=value)
 
     @since("3.0.0")
-    def setAggregationDepth(self, value):
+    def setAggregationDepth(self, value: int) -> "GaussianMixture":
         """
         Sets the value of :py:attr:`aggregationDepth`.
         """
@@ -504,25 +527,25 @@ class GaussianMixtureSummary(ClusteringSummary):
     .. versionadded:: 2.1.0
     """
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.1.0")
-    def probabilityCol(self):
+    def probabilityCol(self) -> str:
         """
         Name for column of predicted probability of each cluster in `predictions`.
         """
         return self._call_java("probabilityCol")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.1.0")
-    def probability(self):
+    def probability(self) -> DataFrame:
         """
         DataFrame of probabilities of each cluster for each training data point.
         """
         return self._call_java("probability")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.2.0")
-    def logLikelihood(self):
+    def logLikelihood(self) -> float:
         """
         Total log-likelihood for this model on the given data.
         """
@@ -536,9 +559,9 @@ class KMeansSummary(ClusteringSummary):
     .. versionadded:: 2.1.0
     """
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.4.0")
-    def trainingCost(self):
+    def trainingCost(self) -> float:
         """
         K-means cost (sum of squared distances to the nearest centroid for all points in the
         training dataset). This is equivalent to sklearn's inertia.
@@ -556,13 +579,13 @@ class _KMeansParams(
     .. versionadded:: 3.0.0
     """
 
-    k = Param(
+    k: Param[int] = Param(
         Params._dummy(),
         "k",
         "The number of clusters to create. Must be > 1.",
         typeConverter=TypeConverters.toInt,
     )
-    initMode = Param(
+    initMode: Param[str] = Param(
         Params._dummy(),
         "initMode",
         'The initialization algorithm. This can be either "random" to '
@@ -570,14 +593,14 @@ class _KMeansParams(
         + "to use a parallel variant of k-means++",
         typeConverter=TypeConverters.toString,
     )
-    initSteps = Param(
+    initSteps: Param[int] = Param(
         Params._dummy(),
         "initSteps",
         "The number of steps for k-means|| " + "initialization mode. Must be > 0.",
         typeConverter=TypeConverters.toInt,
     )
 
-    def __init__(self, *args):
+    def __init__(self, *args: Any):
         super(_KMeansParams, self).__init__(*args)
         self._setDefault(
             k=2,
@@ -589,21 +612,21 @@ def __init__(self, *args):
         )
 
     @since("1.5.0")
-    def getK(self):
+    def getK(self) -> int:
         """
         Gets the value of `k`
         """
         return self.getOrDefault(self.k)
 
     @since("1.5.0")
-    def getInitMode(self):
+    def getInitMode(self) -> str:
         """
         Gets the value of `initMode`
         """
         return self.getOrDefault(self.initMode)
 
     @since("1.5.0")
-    def getInitSteps(self):
+    def getInitSteps(self) -> int:
         """
         Gets the value of `initSteps`
         """
@@ -611,7 +634,11 @@ def getInitSteps(self):
 
 
 class KMeansModel(
-    JavaModel, _KMeansParams, GeneralJavaMLWritable, JavaMLReadable, HasTrainingSummary
+    JavaModel,
+    _KMeansParams,
+    GeneralJavaMLWritable,
+    JavaMLReadable["KMeansModel"],
+    HasTrainingSummary["KMeansSummary"],
 ):
     """
     Model fitted by KMeans.
@@ -620,27 +647,27 @@ class KMeansModel(
     """
 
     @since("3.0.0")
-    def setFeaturesCol(self, value):
+    def setFeaturesCol(self, value: str) -> "KMeansModel":
         """
         Sets the value of :py:attr:`featuresCol`.
         """
         return self._set(featuresCol=value)
 
     @since("3.0.0")
-    def setPredictionCol(self, value):
+    def setPredictionCol(self, value: str) -> "KMeansModel":
         """
         Sets the value of :py:attr:`predictionCol`.
         """
         return self._set(predictionCol=value)
 
     @since("1.5.0")
-    def clusterCenters(self):
+    def clusterCenters(self) -> List[np.ndarray]:
         """Get the cluster centers, represented as a list of NumPy arrays."""
         return [c.toArray() for c in self._call_java("clusterCenters")]
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.1.0")
-    def summary(self):
+    def summary(self) -> KMeansSummary:
         """
         Gets summary (cluster assignments, cluster sizes) of the model trained on the
         training set. An exception is thrown if no summary exists.
@@ -653,7 +680,7 @@ def summary(self):
             )
 
     @since("3.0.0")
-    def predict(self, value):
+    def predict(self, value: Vector) -> int:
         """
         Predict label for the given features.
         """
@@ -661,7 +688,7 @@ def predict(self, value):
 
 
 @inherit_doc
-class KMeans(JavaEstimator, _KMeansParams, JavaMLWritable, JavaMLReadable):
+class KMeans(JavaEstimator[KMeansModel], _KMeansParams, JavaMLWritable, JavaMLReadable["KMeans"]):
     """
     K-means clustering with a k-means++ like initialization mode
     (the k-means|| algorithm by Bahmani et al).
@@ -727,20 +754,22 @@ class KMeans(JavaEstimator, _KMeansParams, JavaMLWritable, JavaMLReadable):
     True
     """
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        featuresCol="features",
-        predictionCol="prediction",
-        k=2,
-        initMode="k-means||",
-        initSteps=2,
-        tol=1e-4,
-        maxIter=20,
-        seed=None,
-        distanceMeasure="euclidean",
-        weightCol=None,
+        featuresCol: str = "features",
+        predictionCol: str = "prediction",
+        k: int = 2,
+        initMode: str = "k-means||",
+        initSteps: int = 2,
+        tol: float = 1e-4,
+        maxIter: int = 20,
+        seed: Optional[int] = None,
+        distanceMeasure: str = "euclidean",
+        weightCol: Optional[str] = None,
     ):
         """
         __init__(self, \\*, featuresCol="features", predictionCol="prediction", k=2, \
@@ -752,7 +781,7 @@ def __init__(
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
-    def _create_model(self, java_model):
+    def _create_model(self, java_model: "JavaObject") -> KMeansModel:
         return KMeansModel(java_model)
 
     @keyword_only
@@ -760,17 +789,17 @@ def _create_model(self, java_model):
     def setParams(
         self,
         *,
-        featuresCol="features",
-        predictionCol="prediction",
-        k=2,
-        initMode="k-means||",
-        initSteps=2,
-        tol=1e-4,
-        maxIter=20,
-        seed=None,
-        distanceMeasure="euclidean",
-        weightCol=None,
-    ):
+        featuresCol: str = "features",
+        predictionCol: str = "prediction",
+        k: int = 2,
+        initMode: str = "k-means||",
+        initSteps: int = 2,
+        tol: float = 1e-4,
+        maxIter: int = 20,
+        seed: Optional[int] = None,
+        distanceMeasure: str = "euclidean",
+        weightCol: Optional[str] = None,
+    ) -> "KMeans":
         """
         setParams(self, \\*, featuresCol="features", predictionCol="prediction", k=2, \
                   initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, seed=None, \
@@ -782,70 +811,70 @@ def setParams(
         return self._set(**kwargs)
 
     @since("1.5.0")
-    def setK(self, value):
+    def setK(self, value: int) -> "KMeans":
         """
         Sets the value of :py:attr:`k`.
         """
         return self._set(k=value)
 
     @since("1.5.0")
-    def setInitMode(self, value):
+    def setInitMode(self, value: str) -> "KMeans":
         """
         Sets the value of :py:attr:`initMode`.
         """
         return self._set(initMode=value)
 
     @since("1.5.0")
-    def setInitSteps(self, value):
+    def setInitSteps(self, value: int) -> "KMeans":
         """
         Sets the value of :py:attr:`initSteps`.
         """
         return self._set(initSteps=value)
 
     @since("2.4.0")
-    def setDistanceMeasure(self, value):
+    def setDistanceMeasure(self, value: str) -> "KMeans":
         """
         Sets the value of :py:attr:`distanceMeasure`.
         """
         return self._set(distanceMeasure=value)
 
     @since("1.5.0")
-    def setMaxIter(self, value):
+    def setMaxIter(self, value: int) -> "KMeans":
         """
         Sets the value of :py:attr:`maxIter`.
         """
         return self._set(maxIter=value)
 
     @since("1.5.0")
-    def setFeaturesCol(self, value):
+    def setFeaturesCol(self, value: str) -> "KMeans":
         """
         Sets the value of :py:attr:`featuresCol`.
         """
         return self._set(featuresCol=value)
 
     @since("1.5.0")
-    def setPredictionCol(self, value):
+    def setPredictionCol(self, value: str) -> "KMeans":
         """
         Sets the value of :py:attr:`predictionCol`.
         """
         return self._set(predictionCol=value)
 
     @since("1.5.0")
-    def setSeed(self, value):
+    def setSeed(self, value: int) -> "KMeans":
         """
         Sets the value of :py:attr:`seed`.
         """
         return self._set(seed=value)
 
     @since("1.5.0")
-    def setTol(self, value):
+    def setTol(self, value: float) -> "KMeans":
         """
         Sets the value of :py:attr:`tol`.
         """
         return self._set(tol=value)
 
     @since("3.0.0")
-    def setWeightCol(self, value):
+    def setWeightCol(self, value: str) -> "KMeans":
         """
         Sets the value of :py:attr:`weightCol`.
         """
@@ -854,7 +883,12 @@ def setWeightCol(self, value):
 
 @inherit_doc
 class _BisectingKMeansParams(
-    HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionCol, HasDistanceMeasure, HasWeightCol
+    HasMaxIter,
+    HasFeaturesCol,
+    HasSeed,
+    HasPredictionCol,
+    HasDistanceMeasure,
+    HasWeightCol,
 ):
     """
     Params for :py:class:`BisectingKMeans` and :py:class:`BisectingKMeansModel`.
@@ -862,13 +896,13 @@ class _BisectingKMeansParams(
     .. versionadded:: 3.0.0
     """
 
-    k = Param(
+    k: Param[int] = Param(
         Params._dummy(),
         "k",
         "The desired number of leaf clusters. Must be > 1.",
         typeConverter=TypeConverters.toInt,
     )
-    minDivisibleClusterSize = Param(
+    minDivisibleClusterSize: Param[float] = Param(
         Params._dummy(),
         "minDivisibleClusterSize",
         "The minimum number of points (if >= 1.0) or the minimum "
@@ -876,19 +910,19 @@ class _BisectingKMeansParams(
         typeConverter=TypeConverters.toFloat,
     )
 
-    def __init__(self, *args):
+    def __init__(self, *args: Any):
         super(_BisectingKMeansParams, self).__init__(*args)
         self._setDefault(maxIter=20, k=4, minDivisibleClusterSize=1.0)
 
     @since("2.0.0")
-    def getK(self):
+    def getK(self) -> int:
         """
         Gets the value of `k` or its default value.
         """
         return self.getOrDefault(self.k)
 
     @since("2.0.0")
-    def getMinDivisibleClusterSize(self):
+    def getMinDivisibleClusterSize(self) -> float:
         """
         Gets the value of `minDivisibleClusterSize` or its default value.
         """
@@ -896,7 +930,11 @@ def getMinDivisibleClusterSize(self):
 
 
 class BisectingKMeansModel(
-    JavaModel, _BisectingKMeansParams, JavaMLWritable, JavaMLReadable, HasTrainingSummary
+    JavaModel,
+    _BisectingKMeansParams,
+    JavaMLWritable,
+    JavaMLReadable["BisectingKMeansModel"],
+    HasTrainingSummary["BisectingKMeansSummary"],
 ):
     """
     Model fitted by BisectingKMeans.
@@ -905,26 +943,26 @@ class BisectingKMeansModel(
     """
 
     @since("3.0.0")
-    def setFeaturesCol(self, value):
+    def setFeaturesCol(self, value: str) -> "BisectingKMeansModel":
         """
         Sets the value of :py:attr:`featuresCol`.
         """
         return self._set(featuresCol=value)
 
     @since("3.0.0")
-    def setPredictionCol(self, value):
+    def setPredictionCol(self, value: str) -> "BisectingKMeansModel":
         """
         Sets the value of :py:attr:`predictionCol`.
         """
         return self._set(predictionCol=value)
 
     @since("2.0.0")
-    def clusterCenters(self):
+    def clusterCenters(self) -> List[np.ndarray]:
         """Get the cluster centers, represented as a list of NumPy arrays."""
         return [c.toArray() for c in self._call_java("clusterCenters")]
 
     @since("2.0.0")
-    def computeCost(self, dataset):
+    def computeCost(self, dataset: DataFrame) -> float:
         """
         Computes the sum of squared distances between the input points
         and their corresponding cluster centers.
@@ -941,9 +979,9 @@ def computeCost(self, dataset):
         )
         return self._call_java("computeCost", dataset)
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.1.0")
-    def summary(self):
+    def summary(self) -> "BisectingKMeansSummary":
         """
         Gets summary (cluster assignments, cluster sizes) of the model trained on the
         training set. An exception is thrown if no summary exists.
@@ -956,7 +994,7 @@ def summary(self):
             )
 
     @since("3.0.0")
-    def predict(self, value):
+    def predict(self, value: Vector) -> int:
         """
         Predict label for the given features.
         """
@@ -964,7 +1002,12 @@ def predict(self, value):
 
 
 @inherit_doc
-class BisectingKMeans(JavaEstimator, _BisectingKMeansParams, JavaMLWritable, JavaMLReadable):
+class BisectingKMeans(
+    JavaEstimator[BisectingKMeansModel],
+    _BisectingKMeansParams,
+    JavaMLWritable,
+    JavaMLReadable["BisectingKMeans"],
+):
     """
     A bisecting k-means algorithm based on the paper "A comparison of document clustering
     techniques" by Steinbach, Karypis, and Kumar, with modification to fit Spark.
@@ -1043,18 +1086,20 @@ class BisectingKMeans(JavaEstimator, _BisectingKMeansParams, JavaMLWritable, Jav
     True
     """
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        featuresCol="features",
-        predictionCol="prediction",
-        maxIter=20,
-        seed=None,
-        k=4,
-        minDivisibleClusterSize=1.0,
-        distanceMeasure="euclidean",
-        weightCol=None,
+        featuresCol: str = "features",
+        predictionCol: str = "prediction",
+        maxIter: int = 20,
+        seed: Optional[int] = None,
+        k: int = 4,
+        minDivisibleClusterSize: float = 1.0,
+        distanceMeasure: str = "euclidean",
+        weightCol: Optional[str] = None,
     ):
         """
         __init__(self, \\*, featuresCol="features", predictionCol="prediction", maxIter=20, \
@@ -1073,15 +1118,15 @@ def __init__(
     def setParams(
         self,
         *,
-        featuresCol="features",
-        predictionCol="prediction",
-        maxIter=20,
-        seed=None,
-        k=4,
-        minDivisibleClusterSize=1.0,
-        distanceMeasure="euclidean",
-        weightCol=None,
-    ):
+        featuresCol: str = "features",
+        predictionCol: str = "prediction",
+        maxIter: int = 20,
+        seed: Optional[int] = None,
+        k: int = 4,
+        minDivisibleClusterSize: float = 1.0,
+        distanceMeasure: str = "euclidean",
+        weightCol: Optional[str] = None,
+    ) -> "BisectingKMeans":
         """
         setParams(self, \\*, featuresCol="features", predictionCol="prediction", maxIter=20, \
                   seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean", \
@@ -1092,62 +1137,62 @@ def setParams(
         return self._set(**kwargs)
 
     @since("2.0.0")
-    def setK(self, value):
+    def setK(self, value: int) -> "BisectingKMeans":
         """
         Sets the value of :py:attr:`k`.
         """
         return self._set(k=value)
 
     @since("2.0.0")
-    def setMinDivisibleClusterSize(self, value):
+    def setMinDivisibleClusterSize(self, value: float) -> "BisectingKMeans":
         """
         Sets the value of :py:attr:`minDivisibleClusterSize`.
         """
         return self._set(minDivisibleClusterSize=value)
 
     @since("2.4.0")
-    def setDistanceMeasure(self, value):
+    def setDistanceMeasure(self, value: str) -> "BisectingKMeans":
         """
         Sets the value of :py:attr:`distanceMeasure`.
         """
         return self._set(distanceMeasure=value)
 
     @since("2.0.0")
-    def setMaxIter(self, value):
+    def setMaxIter(self, value: int) -> "BisectingKMeans":
         """
         Sets the value of :py:attr:`maxIter`.
         """
         return self._set(maxIter=value)
 
     @since("2.0.0")
-    def setFeaturesCol(self, value):
+    def setFeaturesCol(self, value: str) -> "BisectingKMeans":
         """
         Sets the value of :py:attr:`featuresCol`.
         """
         return self._set(featuresCol=value)
 
     @since("2.0.0")
-    def setPredictionCol(self, value):
+    def setPredictionCol(self, value: str) -> "BisectingKMeans":
         """
         Sets the value of :py:attr:`predictionCol`.
         """
         return self._set(predictionCol=value)
 
     @since("2.0.0")
-    def setSeed(self, value):
+    def setSeed(self, value: int) -> "BisectingKMeans":
         """
         Sets the value of :py:attr:`seed`.
         """
         return self._set(seed=value)
 
     @since("3.0.0")
-    def setWeightCol(self, value):
+    def setWeightCol(self, value: str) -> "BisectingKMeans":
         """
         Sets the value of :py:attr:`weightCol`.
         """
         return self._set(weightCol=value)
 
-    def _create_model(self, java_model):
+    def _create_model(self, java_model: "JavaObject") -> BisectingKMeansModel:
         return BisectingKMeansModel(java_model)
 
 
@@ -1158,9 +1203,9 @@ class BisectingKMeansSummary(ClusteringSummary):
     .. versionadded:: 2.1.0
     """
 
-    @property
+    @property  # type: ignore[misc]
     @since("3.0.0")
-    def trainingCost(self):
+    def trainingCost(self) -> float:
         """
         Sum of squared distances to the nearest centroid for all points in the training dataset.
         This is equivalent to sklearn's inertia.
@@ -1176,27 +1221,27 @@ class _LDAParams(HasMaxIter, HasFeaturesCol, HasSeed, HasCheckpointInterval):
     .. versionadded:: 3.0.0
     """
 
-    k = Param(
+    k: Param[int] = Param(
         Params._dummy(),
         "k",
         "The number of topics (clusters) to infer. Must be > 1.",
         typeConverter=TypeConverters.toInt,
     )
-    optimizer = Param(
+    optimizer: Param[str] = Param(
         Params._dummy(),
         "optimizer",
         "Optimizer or inference algorithm used to estimate the LDA model.  "
         "Supported: online, em",
         typeConverter=TypeConverters.toString,
     )
-    learningOffset = Param(
+    learningOffset: Param[float] = Param(
         Params._dummy(),
         "learningOffset",
         "A (positive) learning parameter that downweights early iterations."
         " Larger values make early iterations count less",
         typeConverter=TypeConverters.toFloat,
     )
-    learningDecay = Param(
+    learningDecay: Param[float] = Param(
         Params._dummy(),
         "learningDecay",
         "Learning rate, set as an"
@@ -1204,14 +1249,14 @@ class _LDAParams(HasMaxIter, HasFeaturesCol, HasSeed, HasCheckpointInterval):
         "guarantee asymptotic convergence.",
         typeConverter=TypeConverters.toFloat,
     )
-    subsamplingRate = Param(
+    subsamplingRate: Param[float] = Param(
         Params._dummy(),
         "subsamplingRate",
         "Fraction of the corpus to be sampled and used in each iteration "
         "of mini-batch gradient descent, in range (0, 1].",
         typeConverter=TypeConverters.toFloat,
     )
-    optimizeDocConcentration = Param(
+    optimizeDocConcentration: Param[bool] = Param(
         Params._dummy(),
         "optimizeDocConcentration",
         "Indicates whether the docConcentration (Dirichlet parameter "
@@ -1219,21 +1264,21 @@ class _LDAParams(HasMaxIter, HasFeaturesCol, HasSeed, HasCheckpointInterval):
         "training.",
         typeConverter=TypeConverters.toBoolean,
     )
-    docConcentration = Param(
+    docConcentration: Param[List[float]] = Param(
         Params._dummy(),
         "docConcentration",
         'Concentration parameter (commonly named "alpha") for the '
         'prior placed on documents\' distributions over topics ("theta").',
         typeConverter=TypeConverters.toListFloat,
     )
-    topicConcentration = Param(
+    topicConcentration: Param[float] = Param(
         Params._dummy(),
         "topicConcentration",
         'Concentration parameter (commonly named "beta" or "eta") for '
         "the prior placed on topic' distributions over terms.",
         typeConverter=TypeConverters.toFloat,
     )
-    topicDistributionCol = Param(
+    topicDistributionCol: Param[str] = Param(
         Params._dummy(),
         "topicDistributionCol",
         "Output column with estimates of the topic mixture distribution "
@@ -1241,7 +1286,7 @@ class _LDAParams(HasMaxIter, HasFeaturesCol, HasSeed, HasCheckpointInterval):
         "Returns a vector of zeros for an empty document.",
         typeConverter=TypeConverters.toString,
     )
-    keepLastCheckpoint = Param(
+    keepLastCheckpoint: Param[bool] = Param(
         Params._dummy(),
         "keepLastCheckpoint",
         "(For EM optimizer) If using checkpointing, this indicates whether"
@@ -1251,7 +1296,7 @@ class _LDAParams(HasMaxIter, HasFeaturesCol, HasSeed, HasCheckpointInterval):
         TypeConverters.toBoolean,
     )
 
-    def __init__(self, *args):
+    def __init__(self, *args: Any):
         super(_LDAParams, self).__init__(*args)
         self._setDefault(
             maxIter=20,
@@ -1267,70 +1312,70 @@ def __init__(self, *args):
         )
 
     @since("2.0.0")
-    def getK(self):
+    def getK(self) -> int:
         """
         Gets the value of :py:attr:`k` or its default value.
         """
         return self.getOrDefault(self.k)
 
     @since("2.0.0")
-    def getOptimizer(self):
+    def getOptimizer(self) -> str:
         """
         Gets the value of :py:attr:`optimizer` or its default value.
         """
         return self.getOrDefault(self.optimizer)
 
     @since("2.0.0")
-    def getLearningOffset(self):
+    def getLearningOffset(self) -> float:
         """
         Gets the value of :py:attr:`learningOffset` or its default value.
         """
         return self.getOrDefault(self.learningOffset)
 
     @since("2.0.0")
-    def getLearningDecay(self):
+    def getLearningDecay(self) -> float:
         """
         Gets the value of :py:attr:`learningDecay` or its default value.
         """
         return self.getOrDefault(self.learningDecay)
 
     @since("2.0.0")
-    def getSubsamplingRate(self):
+    def getSubsamplingRate(self) -> float:
         """
         Gets the value of :py:attr:`subsamplingRate` or its default value.
         """
         return self.getOrDefault(self.subsamplingRate)
 
     @since("2.0.0")
-    def getOptimizeDocConcentration(self):
+    def getOptimizeDocConcentration(self) -> bool:
         """
         Gets the value of :py:attr:`optimizeDocConcentration` or its default value.
         """
         return self.getOrDefault(self.optimizeDocConcentration)
 
     @since("2.0.0")
-    def getDocConcentration(self):
+    def getDocConcentration(self) -> List[float]:
         """
         Gets the value of :py:attr:`docConcentration` or its default value.
         """
         return self.getOrDefault(self.docConcentration)
 
     @since("2.0.0")
-    def getTopicConcentration(self):
+    def getTopicConcentration(self) -> float:
         """
         Gets the value of :py:attr:`topicConcentration` or its default value.
         """
         return self.getOrDefault(self.topicConcentration)
 
     @since("2.0.0")
-    def getTopicDistributionCol(self):
+    def getTopicDistributionCol(self) -> str:
         """
         Gets the value of :py:attr:`topicDistributionCol` or its default value.
         """
         return self.getOrDefault(self.topicDistributionCol)
 
     @since("2.0.0")
-    def getKeepLastCheckpoint(self):
+    def getKeepLastCheckpoint(self) -> bool:
         """
         Gets the value of :py:attr:`keepLastCheckpoint` or its default value.
         """
@@ -1348,40 +1393,40 @@ class LDAModel(JavaModel, _LDAParams):
     """
 
     @since("3.0.0")
-    def setFeaturesCol(self, value):
+    def setFeaturesCol(self: "M", value: str) -> "M":
         """
         Sets the value of :py:attr:`featuresCol`.
         """
         return self._set(featuresCol=value)
 
     @since("3.0.0")
-    def setSeed(self, value):
+    def setSeed(self: "M", value: int) -> "M":
         """
         Sets the value of :py:attr:`seed`.
         """
         return self._set(seed=value)
 
     @since("3.0.0")
-    def setTopicDistributionCol(self, value):
+    def setTopicDistributionCol(self: "M", value: str) -> "M":
         """
         Sets the value of :py:attr:`topicDistributionCol`.
         """
         return self._set(topicDistributionCol=value)
 
     @since("2.0.0")
-    def isDistributed(self):
+    def isDistributed(self) -> bool:
         """
         Indicates whether this instance is of type DistributedLDAModel
         """
         return self._call_java("isDistributed")
 
     @since("2.0.0")
-    def vocabSize(self):
+    def vocabSize(self) -> int:
         """Vocabulary size (number of terms or words in the vocabulary)"""
         return self._call_java("vocabSize")
 
     @since("2.0.0")
-    def topicsMatrix(self):
+    def topicsMatrix(self) -> Matrix:
         """
         Inferred topics, where each topic is represented by a distribution over terms.
         This is a matrix of size vocabSize x k, where each column is a topic.
@@ -1395,7 +1440,7 @@ def topicsMatrix(self):
         return self._call_java("topicsMatrix")
 
     @since("2.0.0")
-    def logLikelihood(self, dataset):
+    def logLikelihood(self, dataset: DataFrame) -> float:
         """
         Calculates a lower bound on the log likelihood of the entire corpus.
         See Equation (16) in the Online LDA paper (Hoffman et al., 2010).
@@ -1407,7 +1452,7 @@ def logLikelihood(self, dataset):
         return self._call_java("logLikelihood", dataset)
 
     @since("2.0.0")
-    def logPerplexity(self, dataset):
+    def logPerplexity(self, dataset: DataFrame) -> float:
         """
         Calculate an upper bound on perplexity.  (Lower is better.)
         See Equation (16) in the Online LDA paper (Hoffman et al., 2010).
@@ -1419,14 +1464,14 @@ def logPerplexity(self, dataset):
         return self._call_java("logPerplexity", dataset)
 
     @since("2.0.0")
-    def describeTopics(self, maxTermsPerTopic=10):
+    def describeTopics(self, maxTermsPerTopic: int = 10) -> DataFrame:
         """
         Return the topics described by their top-weighted terms.
         """
         return self._call_java("describeTopics", maxTermsPerTopic)
 
     @since("2.0.0")
-    def estimatedDocConcentration(self):
+    def estimatedDocConcentration(self) -> Vector:
         """
         Value for :py:attr:`LDA.docConcentration` estimated from data.
         If Online LDA was used and :py:attr:`LDA.optimizeDocConcentration` was set to false,
@@ -1436,7 +1481,7 @@ def estimatedDocConcentration(self):
 
 
 @inherit_doc
-class DistributedLDAModel(LDAModel, JavaMLReadable, JavaMLWritable):
+class DistributedLDAModel(LDAModel, JavaMLReadable["DistributedLDAModel"], JavaMLWritable):
     """
     Distributed model fitted by :py:class:`LDA`.
     This type of model is currently only produced by Expectation-Maximization (EM).
@@ -1448,7 +1493,7 @@ class DistributedLDAModel(LDAModel, JavaMLReadable, JavaMLWritable):
     """
 
     @since("2.0.0")
-    def toLocal(self):
+    def toLocal(self) -> "LocalLDAModel":
         """
         Convert this distributed model to a local representation.  This discards info about the
         training dataset.
@@ -1464,7 +1509,7 @@ def toLocal(self):
         return model
 
     @since("2.0.0")
-    def trainingLogLikelihood(self):
+    def trainingLogLikelihood(self) -> float:
         """
         Log likelihood of the observed tokens in the training set,
         given the current parameter estimates:
@@ -1482,14 +1527,14 @@ def trainingLogLikelihood(self):
         return self._call_java("trainingLogLikelihood")
 
     @since("2.0.0")
-    def logPrior(self):
+    def logPrior(self) -> float:
         """
         Log probability of the current parameter estimate:
         log P(topics, topic distributions for docs | alpha, eta)
         """
         return self._call_java("logPrior")
 
-    def getCheckpointFiles(self):
+    def getCheckpointFiles(self) -> List[str]:
         """
         If using checkpointing and :py:attr:`LDA.keepLastCheckpoint` is set to true, then there may
         be saved checkpoint files.  This method is provided so that users can manage those files.
@@ -1511,7 +1556,7 @@ def getCheckpointFiles(self):
 
 
 @inherit_doc
-class LocalLDAModel(LDAModel, JavaMLReadable, JavaMLWritable):
+class LocalLDAModel(LDAModel, JavaMLReadable["LocalLDAModel"], JavaMLWritable):
     """
     Local (non-distributed) model fitted by :py:class:`LDA`.
     This model stores the inferred topics only; it does not store info about the training dataset.
@@ -1523,7 +1568,7 @@ class LocalLDAModel(LDAModel, JavaMLReadable, JavaMLWritable):
 
 
 @inherit_doc
-class LDA(JavaEstimator, _LDAParams, JavaMLReadable, JavaMLWritable):
+class LDA(JavaEstimator[LDAModel], _LDAParams, JavaMLReadable["LDA"], JavaMLWritable):
     """
     Latent Dirichlet Allocation (LDA), a topic model designed for text documents.
 
@@ -1593,24 +1638,26 @@ class LDA(JavaEstimator, _LDAParams, JavaMLReadable, JavaMLWritable):
     True
     """
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        featuresCol="features",
-        maxIter=20,
-        seed=None,
-        checkpointInterval=10,
-        k=10,
-        optimizer="online",
-        learningOffset=1024.0,
-        learningDecay=0.51,
-        subsamplingRate=0.05,
-        optimizeDocConcentration=True,
-        docConcentration=None,
-        topicConcentration=None,
-        topicDistributionCol="topicDistribution",
-        keepLastCheckpoint=True,
+        featuresCol: str = "features",
+        maxIter: int = 20,
+        seed: Optional[int] = None,
+        checkpointInterval: int = 10,
+        k: int = 10,
+        optimizer: str = "online",
+        learningOffset: float = 1024.0,
+        learningDecay: float = 0.51,
+        subsamplingRate: float = 0.05,
+        optimizeDocConcentration: bool = True,
+        docConcentration: Optional[List[float]] = None,
+        topicConcentration: Optional[float] = None,
+        topicDistributionCol: str = "topicDistribution",
+        keepLastCheckpoint: bool = True,
     ):
         """
         __init__(self, \\*, featuresCol="features", maxIter=20, seed=None, checkpointInterval=10,\
@@ -1624,7 +1671,7 @@ def __init__(
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
-    def _create_model(self, java_model):
+    def _create_model(self, java_model: "JavaObject") -> LDAModel:
         if self.getOptimizer() == "em":
             return DistributedLDAModel(java_model)
         else:
@@ -1635,21 +1682,21 @@ def _create_model(self, java_model):
     def setParams(
         self,
         *,
-        featuresCol="features",
-        maxIter=20,
-        seed=None,
-        checkpointInterval=10,
-        k=10,
-        optimizer="online",
-        learningOffset=1024.0,
-        learningDecay=0.51,
-        subsamplingRate=0.05,
-        optimizeDocConcentration=True,
-        docConcentration=None,
-        topicConcentration=None,
-        topicDistributionCol="topicDistribution",
-        keepLastCheckpoint=True,
-    ):
+        featuresCol: str = "features",
+        maxIter: int = 20,
+        seed: Optional[int] = None,
+        checkpointInterval: int = 10,
+        k: int = 10,
+        optimizer: str = "online",
+        learningOffset: float = 1024.0,
+        learningDecay: float = 0.51,
+        subsamplingRate: float = 0.05,
+        optimizeDocConcentration: bool = True,
+        docConcentration: Optional[List[float]] = None,
+        topicConcentration: Optional[float] = None,
+        topicDistributionCol: str = "topicDistribution",
+        keepLastCheckpoint: bool = True,
+    ) -> "LDA":
         """
         setParams(self, \\*, featuresCol="features", maxIter=20, seed=None, checkpointInterval=10,\
                   k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51,\
@@ -1663,21 +1710,21 @@ def setParams(
         return self._set(**kwargs)
 
     @since("2.0.0")
-    def setCheckpointInterval(self, value):
+    def setCheckpointInterval(self, value: int) -> "LDA":
         """
         Sets the value of :py:attr:`checkpointInterval`.
         """
         return self._set(checkpointInterval=value)
 
     @since("2.0.0")
-    def setSeed(self, value):
+    def setSeed(self, value: int) -> "LDA":
         """
         Sets the value of :py:attr:`seed`.
         """
         return self._set(seed=value)
 
     @since("2.0.0")
-    def setK(self, value):
+    def setK(self, value: int) -> "LDA":
         """
         Sets the value of :py:attr:`k`.
 
@@ -1688,7 +1735,7 @@ def setK(self, value):
         return self._set(k=value)
 
     @since("2.0.0")
-    def setOptimizer(self, value):
+    def setOptimizer(self, value: str) -> "LDA":
         """
         Sets the value of :py:attr:`optimizer`.
         Currently only support 'em' and 'online'.
@@ -1702,7 +1749,7 @@ def setOptimizer(self, value):
         return self._set(optimizer=value)
 
     @since("2.0.0")
-    def setLearningOffset(self, value):
+    def setLearningOffset(self, value: float) -> "LDA":
         """
         Sets the value of :py:attr:`learningOffset`.
 
@@ -1715,7 +1762,7 @@ def setLearningOffset(self, value):
         return self._set(learningOffset=value)
 
     @since("2.0.0")
-    def setLearningDecay(self, value):
+    def setLearningDecay(self, value: float) -> "LDA":
         """
         Sets the value of :py:attr:`learningDecay`.
 
@@ -1728,7 +1775,7 @@ def setLearningDecay(self, value):
         return self._set(learningDecay=value)
 
     @since("2.0.0")
-    def setSubsamplingRate(self, value):
+    def setSubsamplingRate(self, value: float) -> "LDA":
         """
         Sets the value of :py:attr:`subsamplingRate`.
 
@@ -1741,7 +1788,7 @@ def setSubsamplingRate(self, value):
         return self._set(subsamplingRate=value)
 
     @since("2.0.0")
-    def setOptimizeDocConcentration(self, value):
+    def setOptimizeDocConcentration(self, value: bool) -> "LDA":
         """
         Sets the value of :py:attr:`optimizeDocConcentration`.
 
@@ -1754,7 +1801,7 @@ def setOptimizeDocConcentration(self, value):
         return self._set(optimizeDocConcentration=value)
 
     @since("2.0.0")
-    def setDocConcentration(self, value):
+    def setDocConcentration(self, value: List[float]) -> "LDA":
         """
         Sets the value of :py:attr:`docConcentration`.
 
@@ -1767,7 +1814,7 @@ def setDocConcentration(self, value):
         return self._set(docConcentration=value)
 
     @since("2.0.0")
-    def setTopicConcentration(self, value):
+    def setTopicConcentration(self, value: float) -> "LDA":
         """
         Sets the value of :py:attr:`topicConcentration`.
 
@@ -1780,7 +1827,7 @@ def setTopicConcentration(self, value):
         return self._set(topicConcentration=value)
 
     @since("2.0.0")
-    def setTopicDistributionCol(self, value):
+    def setTopicDistributionCol(self, value: str) -> "LDA":
         """
         Sets the value of :py:attr:`topicDistributionCol`.
 
@@ -1793,7 +1840,7 @@ def setTopicDistributionCol(self, value):
         return self._set(topicDistributionCol=value)
 
     @since("2.0.0")
-    def setKeepLastCheckpoint(self, value):
+    def setKeepLastCheckpoint(self, value: bool) -> "LDA":
         """
         Sets the value of :py:attr:`keepLastCheckpoint`.
 
@@ -1806,14 +1853,14 @@ def setKeepLastCheckpoint(self, value):
         return self._set(keepLastCheckpoint=value)
 
     @since("2.0.0")
-    def setMaxIter(self, value):
+    def setMaxIter(self, value: int) -> "LDA":
         """
         Sets the value of :py:attr:`maxIter`.
         """
         return self._set(maxIter=value)
 
     @since("2.0.0")
-    def setFeaturesCol(self, value):
+    def setFeaturesCol(self, value: str) -> "LDA":
         """
         Sets the value of :py:attr:`featuresCol`.
         """
@@ -1828,13 +1875,13 @@ class _PowerIterationClusteringParams(HasMaxIter, HasWeightCol):
     .. versionadded:: 3.0.0
     """
 
-    k = Param(
+    k: Param[int] = Param(
         Params._dummy(),
         "k",
         "The number of clusters to create. Must be > 1.",
         typeConverter=TypeConverters.toInt,
     )
-    initMode = Param(
+    initMode: Param[str] = Param(
         Params._dummy(),
         "initMode",
         "The initialization algorithm. This can be either "
@@ -1843,46 +1890,46 @@ class _PowerIterationClusteringParams(HasMaxIter, HasWeightCol):
         + "'random' and 'degree'.",
         typeConverter=TypeConverters.toString,
     )
-    srcCol = Param(
+    srcCol: Param[str] = Param(
         Params._dummy(),
         "srcCol",
         "Name of the input column for source vertex IDs.",
         typeConverter=TypeConverters.toString,
     )
-    dstCol = Param(
+    dstCol: Param[str] = Param(
         Params._dummy(),
         "dstCol",
         "Name of the input column for destination vertex IDs.",
         typeConverter=TypeConverters.toString,
     )
 
-    def __init__(self, *args):
+    def __init__(self, *args: Any):
         super(_PowerIterationClusteringParams, self).__init__(*args)
         self._setDefault(k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst")
 
     @since("2.4.0")
-    def getK(self):
+    def getK(self) -> int:
         """
         Gets the value of :py:attr:`k` or its default value.
         """
         return self.getOrDefault(self.k)
 
     @since("2.4.0")
-    def getInitMode(self):
+    def getInitMode(self) -> str:
         """
         Gets the value of :py:attr:`initMode` or its default value.
         """
         return self.getOrDefault(self.initMode)
 
     @since("2.4.0")
-    def getSrcCol(self):
+    def getSrcCol(self) -> str:
         """
         Gets the value of :py:attr:`srcCol` or its default value.
         """
         return self.getOrDefault(self.srcCol)
 
     @since("2.4.0")
-    def getDstCol(self):
+    def getDstCol(self) -> str:
         """
         Gets the value of :py:attr:`dstCol` or its default value.
         """
@@ -1891,7 +1938,10 @@ def getDstCol(self):
 
 @inherit_doc
 class PowerIterationClustering(
-    _PowerIterationClusteringParams, JavaParams, JavaMLReadable, JavaMLWritable
+    _PowerIterationClusteringParams,
+    JavaParams,
+    JavaMLReadable["PowerIterationClustering"],
+    JavaMLWritable,
 ):
     """
     Power Iteration Clustering (PIC), a scalable graph clustering algorithm developed by
@@ -1943,9 +1993,18 @@ class PowerIterationClustering(
     True
     """
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
-        self, *, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst", weightCol=None
+        self,
+        *,
+        k: int = 2,
+        maxIter: int = 20,
+        initMode: str = "random",
+        srcCol: str = "src",
+        dstCol: str = "dst",
+        weightCol: Optional[str] = None,
     ):
         """
         __init__(self, \\*, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst",\
@@ -1961,8 +2020,15 @@ def __init__(
     @keyword_only
     @since("2.4.0")
     def setParams(
-        self, *, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst", weightCol=None
-    ):
+        self,
+        *,
+        k: int = 2,
+        maxIter: int = 20,
+        initMode: str = "random",
+        srcCol: str = "src",
+        dstCol: str = "dst",
+        weightCol: Optional[str] = None,
+    ) -> "PowerIterationClustering":
         """
         setParams(self, \\*, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst",\
                   weightCol=None)
@@ -1972,49 +2038,49 @@ def setParams(
         return self._set(**kwargs)
 
     @since("2.4.0")
-    def setK(self, value):
+    def setK(self, value: int) -> "PowerIterationClustering":
         """
         Sets the value of :py:attr:`k`.
         """
         return self._set(k=value)
 
     @since("2.4.0")
-    def setInitMode(self, value):
+    def setInitMode(self, value: str) -> "PowerIterationClustering":
         """
         Sets the value of :py:attr:`initMode`.
         """
         return self._set(initMode=value)
 
     @since("2.4.0")
-    def setSrcCol(self, value):
+    def setSrcCol(self, value: str) -> "PowerIterationClustering":
         """
         Sets the value of :py:attr:`srcCol`.
         """
         return self._set(srcCol=value)
 
     @since("2.4.0")
-    def setDstCol(self, value):
+    def setDstCol(self, value: str) -> "PowerIterationClustering":
         """
         Sets the value of :py:attr:`dstCol`.
         """
         return self._set(dstCol=value)
 
     @since("2.4.0")
-    def setMaxIter(self, value):
+    def setMaxIter(self, value: int) -> "PowerIterationClustering":
         """
         Sets the value of :py:attr:`maxIter`.
         """
         return self._set(maxIter=value)
 
     @since("2.4.0")
-    def setWeightCol(self, value):
+    def setWeightCol(self, value: str) -> "PowerIterationClustering":
         """
         Sets the value of :py:attr:`weightCol`.
         """
         return self._set(weightCol=value)
 
     @since("2.4.0")
-    def assignClusters(self, dataset):
+    def assignClusters(self, dataset: DataFrame) -> DataFrame:
         """
         Run the PIC algorithm and returns a cluster assignment for each input vertex.
 
@@ -2038,6 +2104,8 @@ def assignClusters(self, dataset):
             - cluster: Int
         """
         self._transfer_params_to_java()
+        assert self._java_obj is not None
+
         jdf = self._java_obj.assignClusters(dataset._jdf)
         return DataFrame(jdf, dataset.sql_ctx)
 
diff --git a/python/pyspark/ml/clustering.pyi b/python/pyspark/ml/clustering.pyi
deleted file mode 100644
index e0ee3d6394e7b..0000000000000
--- a/python/pyspark/ml/clustering.pyi
+++ /dev/null
@@ -1,439 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Any, List, Optional
-
-from pyspark.ml.linalg import Matrix, Vector
-from pyspark.ml.util import (
-    GeneralJavaMLWritable,
-    HasTrainingSummary,
-    JavaMLReadable,
-    JavaMLWritable,
-)
-from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, JavaWrapper
-from pyspark.ml.param.shared import (
-    HasAggregationDepth,
-    HasCheckpointInterval,
-    HasDistanceMeasure,
-    HasFeaturesCol,
-    HasMaxIter,
-    HasPredictionCol,
-    HasProbabilityCol,
-    HasSeed,
-    HasTol,
-    HasWeightCol,
-)
-
-from pyspark.ml.param import Param
-from pyspark.ml.stat import MultivariateGaussian
-from pyspark.sql.dataframe import DataFrame
-
-from numpy import ndarray
-
-from py4j.java_gateway import JavaObject  # type: ignore[import]
-
-class ClusteringSummary(JavaWrapper):
-    @property
-    def predictionCol(self) -> str: ...
-    @property
-    def predictions(self) -> DataFrame: ...
-    @property
-    def featuresCol(self) -> str: ...
-    @property
-    def k(self) -> int: ...
-    @property
-    def cluster(self) -> DataFrame: ...
-    @property
-    def clusterSizes(self) -> List[int]: ...
-    @property
-    def numIter(self) -> int: ...
-
-class _GaussianMixtureParams(
-    HasMaxIter,
-    HasFeaturesCol,
-    HasSeed,
-    HasPredictionCol,
-    HasProbabilityCol,
-    HasTol,
-    HasAggregationDepth,
-    HasWeightCol,
-):
-    k: Param[int]
-    def __init__(self, *args: Any): ...
-    def getK(self) -> int: ...
-
-class GaussianMixtureModel(
-    JavaModel,
-    _GaussianMixtureParams,
-    JavaMLWritable,
-    JavaMLReadable[GaussianMixtureModel],
-    HasTrainingSummary[GaussianMixtureSummary],
-):
-    def setFeaturesCol(self, value: str) -> GaussianMixtureModel: ...
-    def setPredictionCol(self, value: str) -> GaussianMixtureModel: ...
-    def setProbabilityCol(self, value: str) -> GaussianMixtureModel: ...
-    @property
-    def weights(self) -> List[float]: ...
-    @property
-    def gaussians(self) -> List[MultivariateGaussian]: ...
-    @property
-    def gaussiansDF(self) -> DataFrame: ...
-    @property
-    def summary(self) -> GaussianMixtureSummary: ...
-    def predict(self, value: Vector) -> int: ...
-    def predictProbability(self, value: Vector) -> Vector: ...
-
-class GaussianMixture(
-    JavaEstimator[GaussianMixtureModel],
-    _GaussianMixtureParams,
-    JavaMLWritable,
-    JavaMLReadable[GaussianMixture],
-):
-    def __init__(
-        self,
-        *,
-        featuresCol: str = ...,
-        predictionCol: str = ...,
-        k: int = ...,
-        probabilityCol: str = ...,
-        tol: float = ...,
-        maxIter: int = ...,
-        seed: Optional[int] = ...,
-        aggregationDepth: int = ...,
-        weightCol: Optional[str] = ...,
-    ) -> None: ...
-    def setParams(
-        self,
-        *,
-        featuresCol: str = ...,
-        predictionCol: str = ...,
-        k: int = ...,
-        probabilityCol: str = ...,
-        tol: float = ...,
-        maxIter: int = ...,
-        seed: Optional[int] = ...,
-        aggregationDepth: int = ...,
-        weightCol: Optional[str] = ...,
-    ) -> GaussianMixture: ...
-    def setK(self, value: int) -> GaussianMixture: ...
-    def setMaxIter(self, value: int) -> GaussianMixture: ...
-    def setFeaturesCol(self, value: str) -> GaussianMixture: ...
-    def setPredictionCol(self, value: str) -> GaussianMixture: ...
-    def setProbabilityCol(self, value: str) -> GaussianMixture: ...
-    def setWeightCol(self, value: str) -> GaussianMixture: ...
-    def setSeed(self, value: int) -> GaussianMixture: ...
-    def setTol(self, value: float) -> GaussianMixture: ...
-    def setAggregationDepth(self, value: int) -> GaussianMixture: ...
-    def _create_model(self, java_model: JavaObject) -> GaussianMixtureModel: ...
-
-class GaussianMixtureSummary(ClusteringSummary):
-    @property
-    def probabilityCol(self) -> str: ...
-    @property
-    def probability(self) -> DataFrame: ...
-    @property
-    def logLikelihood(self) -> float: ...
-
-class KMeansSummary(ClusteringSummary):
-    def trainingCost(self) -> float: ...
-
-class _KMeansParams(
-    HasMaxIter,
-    HasFeaturesCol,
-    HasSeed,
-    HasPredictionCol,
-    HasTol,
-    HasDistanceMeasure,
-    HasWeightCol,
-):
-    k: Param[int]
-    initMode: Param[str]
-    initSteps: Param[int]
-    def __init__(self, *args: Any): ...
-    def getK(self) -> int: ...
-    def getInitMode(self) -> str: ...
-    def getInitSteps(self) -> int: ...
-
-class KMeansModel(
-    JavaModel,
-    _KMeansParams,
-    GeneralJavaMLWritable,
-    JavaMLReadable[KMeansModel],
-    HasTrainingSummary[KMeansSummary],
-):
-    def setFeaturesCol(self, value: str) -> KMeansModel: ...
-    def setPredictionCol(self, value: str) -> KMeansModel: ...
-    def clusterCenters(self) -> List[ndarray]: ...
-    @property
-    def summary(self) -> KMeansSummary: ...
-    def predict(self, value: Vector) -> int: ...
-
-class KMeans(JavaEstimator[KMeansModel], _KMeansParams, JavaMLWritable, JavaMLReadable[KMeans]):
-    def __init__(
-        self,
-        *,
-        featuresCol: str = ...,
-        predictionCol: str = ...,
-        k: int = ...,
-        initMode: str = ...,
-        initSteps: int = ...,
-        tol: float = ...,
-        maxIter: int = ...,
-        seed: Optional[int] = ...,
-        distanceMeasure: str = ...,
-        weightCol: Optional[str] = ...,
-    ) -> None: ...
-    def setParams(
-        self,
-        *,
-        featuresCol: str = ...,
-        predictionCol: str = ...,
-        k: int = ...,
-        initMode: str = ...,
-        initSteps: int = ...,
-        tol: float = ...,
-        maxIter: int = ...,
-        seed: Optional[int] = ...,
-        distanceMeasure: str = ...,
-        weightCol: Optional[str] = ...,
-    ) -> KMeans: ...
-    def setK(self, value: int) -> KMeans: ...
-    def setInitMode(self, value: str) -> KMeans: ...
-    def setInitSteps(self, value: int) -> KMeans: ...
-    def setDistanceMeasure(self, value: str) -> KMeans: ...
-    def setMaxIter(self, value: int) -> KMeans: ...
-    def setFeaturesCol(self, value: str) -> KMeans: ...
-    def setPredictionCol(self, value: str) -> KMeans: ...
-    def setSeed(self, value: int) -> KMeans: ...
-    def setTol(self, value: float) -> KMeans: ...
-    def setWeightCol(self, value: str) -> KMeans: ...
-    def _create_model(self, java_model: JavaObject) -> KMeansModel: ...
-
-class _BisectingKMeansParams(
-    HasMaxIter,
-    HasFeaturesCol,
-    HasSeed,
-    HasPredictionCol,
-    HasDistanceMeasure,
-    HasWeightCol,
-):
-    k: Param[int]
-    minDivisibleClusterSize: Param[float]
-    def __init__(self, *args: Any): ...
-    def getK(self) -> int: ...
-    def getMinDivisibleClusterSize(self) -> float: ...
-
-class BisectingKMeansModel(
-    JavaModel,
-    _BisectingKMeansParams,
-    JavaMLWritable,
-    JavaMLReadable[BisectingKMeansModel],
-    HasTrainingSummary[BisectingKMeansSummary],
-):
-    def setFeaturesCol(self, value: str) -> BisectingKMeansModel: ...
-    def setPredictionCol(self, value: str) -> BisectingKMeansModel: ...
-    def clusterCenters(self) -> List[ndarray]: ...
-    def computeCost(self, dataset: DataFrame) -> float: ...
-    @property
-    def summary(self) -> BisectingKMeansSummary: ...
-    def predict(self, value: Vector) -> int: ...
-
-class BisectingKMeans(
-    JavaEstimator[BisectingKMeansModel],
-    _BisectingKMeansParams,
-    JavaMLWritable,
-    JavaMLReadable[BisectingKMeans],
-):
-    def __init__(
-        self,
-        *,
-        featuresCol: str = ...,
-        predictionCol: str = ...,
-        maxIter: int = ...,
-        seed: Optional[int] = ...,
-        k: int = ...,
-        minDivisibleClusterSize: float = ...,
-        distanceMeasure: str = ...,
-        weightCol: Optional[str] = ...,
-    ) -> None: ...
-    def setParams(
-        self,
-        *,
-        featuresCol: str = ...,
-        predictionCol: str = ...,
-        maxIter: int = ...,
-        seed: Optional[int] = ...,
-        k: int = ...,
-        minDivisibleClusterSize: float = ...,
-        distanceMeasure: str = ...,
-        weightCol: Optional[str] = ...,
-    ) -> BisectingKMeans: ...
-    def setK(self, value: int) -> BisectingKMeans: ...
-    def setMinDivisibleClusterSize(self, value: float) -> BisectingKMeans: ...
-    def setDistanceMeasure(self, value: str) -> BisectingKMeans: ...
-    def setMaxIter(self, value: int) -> BisectingKMeans: ...
-    def setFeaturesCol(self, value: str) -> BisectingKMeans: ...
-    def setPredictionCol(self, value: str) -> BisectingKMeans: ...
-    def setSeed(self, value: int) -> BisectingKMeans: ...
-    def setWeightCol(self, value: str) -> BisectingKMeans: ...
-    def _create_model(self, java_model: JavaObject) -> BisectingKMeansModel: ...
-
-class BisectingKMeansSummary(ClusteringSummary):
-    @property
-    def trainingCost(self) -> float: ...
-
-class _LDAParams(HasMaxIter, HasFeaturesCol, HasSeed, HasCheckpointInterval):
-    k: Param[int]
-    optimizer: Param[str]
-    learningOffset: Param[float]
-    learningDecay: Param[float]
-    subsamplingRate: Param[float]
-    optimizeDocConcentration: Param[bool]
-    docConcentration: Param[List[float]]
-    topicConcentration: Param[float]
-    topicDistributionCol: Param[str]
-    keepLastCheckpoint: Param[bool]
-    def __init__(self, *args: Any): ...
-    def setK(self, value: int) -> LDA: ...
-    def getOptimizer(self) -> str: ...
-    def getLearningOffset(self) -> float: ...
-    def getLearningDecay(self) -> float: ...
-    def getSubsamplingRate(self) -> float: ...
-    def getOptimizeDocConcentration(self) -> bool: ...
-    def getDocConcentration(self) -> List[float]: ...
-    def getTopicConcentration(self) -> float: ...
-    def getTopicDistributionCol(self) -> str: ...
-    def getKeepLastCheckpoint(self) -> bool: ...
-
-class LDAModel(JavaModel, _LDAParams):
-    def setFeaturesCol(self, value: str) -> LDAModel: ...
-    def setSeed(self, value: int) -> LDAModel: ...
-    def setTopicDistributionCol(self, value: str) -> LDAModel: ...
-    def isDistributed(self) -> bool: ...
-    def vocabSize(self) -> int: ...
-    def topicsMatrix(self) -> Matrix: ...
-    def logLikelihood(self, dataset: DataFrame) -> float: ...
-    def logPerplexity(self, dataset: DataFrame) -> float: ...
-    def describeTopics(self, maxTermsPerTopic: int = ...) -> DataFrame: ...
-    def estimatedDocConcentration(self) -> Vector: ...
-
-class DistributedLDAModel(LDAModel, JavaMLReadable[DistributedLDAModel], JavaMLWritable):
-    def toLocal(self) -> LDAModel: ...
-    def trainingLogLikelihood(self) -> float: ...
-    def logPrior(self) -> float: ...
-    def getCheckpointFiles(self) -> List[str]: ...
-
-class LocalLDAModel(LDAModel, JavaMLReadable[LocalLDAModel], JavaMLWritable): ...
-
-class LDA(JavaEstimator[LDAModel], _LDAParams, JavaMLReadable[LDA], JavaMLWritable):
-    def __init__(
-        self,
-        *,
-        featuresCol: str = ...,
-        maxIter: int = ...,
-        seed: Optional[int] = ...,
-        checkpointInterval: int = ...,
-        k: int = ...,
-        optimizer: str = ...,
-        learningOffset: float = ...,
-        learningDecay: float = ...,
-        subsamplingRate: float = ...,
-        optimizeDocConcentration: bool = ...,
-        docConcentration: Optional[List[float]] = ...,
-        topicConcentration: Optional[float] = ...,
-        topicDistributionCol: str = ...,
-        keepLastCheckpoint: bool = ...,
-    ) -> None: ...
-    def setParams(
-        self,
-        *,
-        featuresCol: str = ...,
-        maxIter: int = ...,
-        seed: Optional[int] = ...,
-        checkpointInterval: int = ...,
-        k: int = ...,
-        optimizer: str = ...,
-        learningOffset: float = ...,
-        learningDecay: float = ...,
-        subsamplingRate: float = ...,
-        optimizeDocConcentration: bool = ...,
-        docConcentration: Optional[List[float]] = ...,
-        topicConcentration: Optional[float] = ...,
-        topicDistributionCol: str = ...,
-        keepLastCheckpoint: bool = ...,
-    ) -> LDA: ...
-    def setCheckpointInterval(self, value: int) -> LDA: ...
-    def setSeed(self, value: int) -> LDA: ...
-    def setK(self, value: int) -> LDA: ...
-    def setOptimizer(self, value: str) -> LDA: ...
-    def setLearningOffset(self, value: float) -> LDA: ...
-    def setLearningDecay(self, value: float) -> LDA: ...
-    def setSubsamplingRate(self, value: float) -> LDA: ...
-    def setOptimizeDocConcentration(self, value: bool) -> LDA: ...
-    def setDocConcentration(self, value: List[float]) -> LDA: ...
-    def setTopicConcentration(self, value: float) -> LDA: ...
-    def setTopicDistributionCol(self, value: str) -> LDA: ...
-    def setKeepLastCheckpoint(self, value: bool) -> LDA: ...
-    def setMaxIter(self, value: int) -> LDA: ...
-    def setFeaturesCol(self, value: str) -> LDA: ...
-    def _create_model(self, java_model: JavaObject) -> LDAModel: ...
-
-class _PowerIterationClusteringParams(HasMaxIter, HasWeightCol):
-    k: Param[int]
-    initMode: Param[str]
-    srcCol: Param[str]
-    dstCol: Param[str]
-    def __init__(self, *args: Any): ...
-    def getK(self) -> int: ...
-    def getInitMode(self) -> str: ...
-    def getSrcCol(self) -> str: ...
-    def getDstCol(self) -> str: ...
-
-class PowerIterationClustering(
-    _PowerIterationClusteringParams,
-    JavaParams,
-    JavaMLReadable[PowerIterationClustering],
-    JavaMLWritable,
-):
-    def __init__(
-        self,
-        *,
-        k: int = ...,
-        maxIter: int = ...,
-        initMode: str = ...,
-        srcCol: str = ...,
-        dstCol: str = ...,
-        weightCol: Optional[str] = ...,
-    ) -> None: ...
-    def setParams(
-        self,
-        *,
-        k: int = ...,
-        maxIter: int = ...,
-        initMode: str = ...,
-        srcCol: str = ...,
-        dstCol: str = ...,
-        weightCol: Optional[str] = ...,
-    ) -> PowerIterationClustering: ...
-    def setK(self, value: int) -> PowerIterationClustering: ...
-    def setInitMode(self, value: str) -> PowerIterationClustering: ...
-    def setSrcCol(self, value: str) -> str: ...
-    def setDstCol(self, value: str) -> PowerIterationClustering: ...
-    def setMaxIter(self, value: int) -> PowerIterationClustering: ...
-    def setWeightCol(self, value: str) -> PowerIterationClustering: ...
-    def assignClusters(self, dataset: DataFrame) -> DataFrame: ...
diff --git a/python/pyspark/ml/tests/typing/test_clustering.yaml b/python/pyspark/ml/tests/typing/test_clustering.yaml
new file mode 100644
index 0000000000000..b208573975d7f
--- /dev/null
+++ b/python/pyspark/ml/tests/typing/test_clustering.yaml
@@ -0,0 +1,33 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+- case: InheritedLDAMethods
+  main: |
+    from pyspark.ml.clustering import LDAModel, LocalLDAModel, DistributedLDAModel
+
+    distributed_model = DistributedLDAModel.load("foo")
+    reveal_type(distributed_model)
+    reveal_type(distributed_model.setFeaturesCol("foo"))
+
+    local_model = distributed_model.toLocal()
+    reveal_type(local_model)
+    reveal_type(local_model.setFeaturesCol("foo"))
+  out: |
+    main:4: note: Revealed type is "pyspark.ml.clustering.DistributedLDAModel*"
+    main:5: note: Revealed type is "pyspark.ml.clustering.DistributedLDAModel*"
+    main:8: note: Revealed type is "pyspark.ml.clustering.LocalLDAModel"
+    main:9: note: Revealed type is "pyspark.ml.clustering.LocalLDAModel*"

From 3d285c11b611e63d6ebb0b209f52d6ec7a61debe Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Thu, 10 Feb 2022 12:56:08 +0300
Subject: [PATCH 205/513] [SPARK-38123][SQL] Unified use `DataType` as
 `targetType` of `QueryExecutionErrors#castingCauseOverflowError`

### What changes were proposed in this pull request?
SPARK-33541 introduces `QueryExecutionErrors#castingCauseOverflowError` and there are 2 ways for input parameter `targetType` in Spark code now:

- Use `DataType.catalogString` as `targetType`, such as use in `Cast.scala` and `IntervalUtils.scala`
- Use custom literal such as `short`,`int` and `long` in `Decimal.scala` and `numberics.scala`

This pr change to unified use `DataType` as the `targetType`.

Another change of this pr is to change the `targetType` from `int` to `LongType.catalogString` when `FloatExactNumeric#toLong` method throw castingCauseOverflowError, this seems to be a issue left over from history.

### Why are the changes needed?
Unified use `DataType.catalogString` as `targetType` when throwing castingCauseOverflowError and bug fix

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35412 from LuciferYang/use-catalogString.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../spark/sql/catalyst/expressions/Cast.scala | 60 ++++++++++---------
 .../sql/catalyst/util/IntervalUtils.scala     | 18 +++---
 .../sql/errors/QueryExecutionErrors.scala     |  4 +-
 .../org/apache/spark/sql/types/Decimal.scala  | 14 ++---
 .../org/apache/spark/sql/types/numerics.scala | 10 ++--
 .../results/postgreSQL/float4.sql.out         |  2 +-
 .../results/postgreSQL/float8.sql.out         |  2 +-
 .../sql-tests/results/postgreSQL/int8.sql.out |  2 +-
 8 files changed, 59 insertions(+), 53 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index e5fa433b78d64..5091ee5d27927 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -671,7 +671,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
         if (longValue == longValue.toInt) {
           longValue.toInt
         } else {
-          throw QueryExecutionErrors.castingCauseOverflowError(t, IntegerType.catalogString)
+          throw QueryExecutionErrors.castingCauseOverflowError(t, IntegerType)
         }
       })
     case TimestampType =>
@@ -707,7 +707,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
         if (longValue == longValue.toShort) {
           longValue.toShort
         } else {
-          throw QueryExecutionErrors.castingCauseOverflowError(t, ShortType.catalogString)
+          throw QueryExecutionErrors.castingCauseOverflowError(t, ShortType)
         }
       })
     case TimestampType =>
@@ -718,12 +718,12 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
           x.exactNumeric.asInstanceOf[Numeric[Any]].toInt(b)
         } catch {
           case _: ArithmeticException =>
-            throw QueryExecutionErrors.castingCauseOverflowError(b, ShortType.catalogString)
+            throw QueryExecutionErrors.castingCauseOverflowError(b, ShortType)
         }
         if (intValue == intValue.toShort) {
           intValue.toShort
         } else {
-          throw QueryExecutionErrors.castingCauseOverflowError(b, ShortType.catalogString)
+          throw QueryExecutionErrors.castingCauseOverflowError(b, ShortType)
         }
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b).toShort
@@ -754,7 +754,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
         if (longValue == longValue.toByte) {
           longValue.toByte
         } else {
-          throw QueryExecutionErrors.castingCauseOverflowError(t, ByteType.catalogString)
+          throw QueryExecutionErrors.castingCauseOverflowError(t, ByteType)
         }
       })
     case TimestampType =>
@@ -765,12 +765,12 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
           x.exactNumeric.asInstanceOf[Numeric[Any]].toInt(b)
         } catch {
           case _: ArithmeticException =>
-            throw QueryExecutionErrors.castingCauseOverflowError(b, ByteType.catalogString)
+            throw QueryExecutionErrors.castingCauseOverflowError(b, ByteType)
         }
         if (intValue == intValue.toByte) {
           intValue.toByte
         } else {
-          throw QueryExecutionErrors.castingCauseOverflowError(b, ByteType.catalogString)
+          throw QueryExecutionErrors.castingCauseOverflowError(b, ByteType)
         }
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b).toByte
@@ -1639,20 +1639,21 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
   private[this] def castTimestampToIntegralTypeCode(
       ctx: CodegenContext,
       integralType: String,
-      catalogType: String): CastFunction = {
+      dataType: DataType): CastFunction = {
     if (ansiEnabled) {
       val longValue = ctx.freshName("longValue")
-      (c, evPrim, evNull) =>
+      val dt = ctx.addReferenceObj("dataType", dataType, dataType.getClass.getName)
+      (c, evPrim, _) =>
         code"""
           long $longValue = ${timestampToLongCode(c)};
           if ($longValue == ($integralType) $longValue) {
             $evPrim = ($integralType) $longValue;
           } else {
-            throw QueryExecutionErrors.castingCauseOverflowError($c, "$catalogType");
+            throw QueryExecutionErrors.castingCauseOverflowError($c, $dt);
           }
         """
     } else {
-      (c, evPrim, evNull) => code"$evPrim = ($integralType) ${timestampToLongCode(c)};"
+      (c, evPrim, _) => code"$evPrim = ($integralType) ${timestampToLongCode(c)};"
     }
   }
 
@@ -1690,19 +1691,22 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
   }
 
   private[this] def castIntegralTypeToIntegralTypeExactCode(
+      ctx: CodegenContext,
       integralType: String,
-      catalogType: String): CastFunction = {
+      dataType: DataType): CastFunction = {
     assert(ansiEnabled)
-    (c, evPrim, evNull) =>
+    val dt = ctx.addReferenceObj("dataType", dataType, dataType.getClass.getName)
+    (c, evPrim, _) =>
       code"""
         if ($c == ($integralType) $c) {
           $evPrim = ($integralType) $c;
         } else {
-          throw QueryExecutionErrors.castingCauseOverflowError($c, "$catalogType");
+          throw QueryExecutionErrors.castingCauseOverflowError($c, $dt);
         }
       """
   }
 
+
   private[this] def lowerAndUpperBound(integralType: String): (String, String) = {
     val (min, max, typeIndicator) = integralType.toLowerCase(Locale.ROOT) match {
       case "long" => (Long.MinValue, Long.MaxValue, "L")
@@ -1714,22 +1718,24 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
   }
 
   private[this] def castFractionToIntegralTypeCode(
+      ctx: CodegenContext,
       integralType: String,
-      catalogType: String): CastFunction = {
+      dataType: DataType): CastFunction = {
     assert(ansiEnabled)
     val (min, max) = lowerAndUpperBound(integralType)
     val mathClass = classOf[Math].getName
+    val dt = ctx.addReferenceObj("dataType", dataType, dataType.getClass.getName)
     // When casting floating values to integral types, Spark uses the method `Numeric.toInt`
     // Or `Numeric.toLong` directly. For positive floating values, it is equivalent to `Math.floor`;
     // for negative floating values, it is equivalent to `Math.ceil`.
     // So, we can use the condition `Math.floor(x) <= upperBound && Math.ceil(x) >= lowerBound`
     // to check if the floating value x is in the range of an integral type after rounding.
-    (c, evPrim, evNull) =>
+    (c, evPrim, _) =>
       code"""
         if ($mathClass.floor($c) <= $max && $mathClass.ceil($c) >= $min) {
           $evPrim = ($integralType) $c;
         } else {
-          throw QueryExecutionErrors.castingCauseOverflowError($c, "$catalogType");
+          throw QueryExecutionErrors.castingCauseOverflowError($c, $dt);
         }
       """
   }
@@ -1754,12 +1760,12 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
       (c, evPrim, evNull) => code"$evPrim = $c ? (byte) 1 : (byte) 0;"
     case DateType =>
       (c, evPrim, evNull) => code"$evNull = true;"
-    case TimestampType => castTimestampToIntegralTypeCode(ctx, "byte", ByteType.catalogString)
+    case TimestampType => castTimestampToIntegralTypeCode(ctx, "byte", ByteType)
     case DecimalType() => castDecimalToIntegralTypeCode(ctx, "byte", ByteType.catalogString)
     case ShortType | IntegerType | LongType if ansiEnabled =>
-      castIntegralTypeToIntegralTypeExactCode("byte", ByteType.catalogString)
+      castIntegralTypeToIntegralTypeExactCode(ctx, "byte", ByteType)
     case FloatType | DoubleType if ansiEnabled =>
-      castFractionToIntegralTypeCode("byte", ByteType.catalogString)
+      castFractionToIntegralTypeCode(ctx, "byte", ByteType)
     case x: NumericType =>
       (c, evPrim, evNull) => code"$evPrim = (byte) $c;"
     case x: DayTimeIntervalType =>
@@ -1790,12 +1796,12 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
       (c, evPrim, evNull) => code"$evPrim = $c ? (short) 1 : (short) 0;"
     case DateType =>
       (c, evPrim, evNull) => code"$evNull = true;"
-    case TimestampType => castTimestampToIntegralTypeCode(ctx, "short", ShortType.catalogString)
+    case TimestampType => castTimestampToIntegralTypeCode(ctx, "short", ShortType)
     case DecimalType() => castDecimalToIntegralTypeCode(ctx, "short", ShortType.catalogString)
     case IntegerType | LongType if ansiEnabled =>
-      castIntegralTypeToIntegralTypeExactCode("short", ShortType.catalogString)
+      castIntegralTypeToIntegralTypeExactCode(ctx, "short", ShortType)
     case FloatType | DoubleType if ansiEnabled =>
-      castFractionToIntegralTypeCode("short", ShortType.catalogString)
+      castFractionToIntegralTypeCode(ctx, "short", ShortType)
     case x: NumericType =>
       (c, evPrim, evNull) => code"$evPrim = (short) $c;"
     case x: DayTimeIntervalType =>
@@ -1824,12 +1830,12 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
       (c, evPrim, evNull) => code"$evPrim = $c ? 1 : 0;"
     case DateType =>
       (c, evPrim, evNull) => code"$evNull = true;"
-    case TimestampType => castTimestampToIntegralTypeCode(ctx, "int", IntegerType.catalogString)
+    case TimestampType => castTimestampToIntegralTypeCode(ctx, "int", IntegerType)
     case DecimalType() => castDecimalToIntegralTypeCode(ctx, "int", IntegerType.catalogString)
     case LongType if ansiEnabled =>
-      castIntegralTypeToIntegralTypeExactCode("int", IntegerType.catalogString)
+      castIntegralTypeToIntegralTypeExactCode(ctx, "int", IntegerType)
     case FloatType | DoubleType if ansiEnabled =>
-      castFractionToIntegralTypeCode("int", IntegerType.catalogString)
+      castFractionToIntegralTypeCode(ctx, "int", IntegerType)
     case x: NumericType =>
       (c, evPrim, evNull) => code"$evPrim = (int) $c;"
     case x: DayTimeIntervalType =>
@@ -1862,7 +1868,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
       (c, evPrim, evNull) => code"$evPrim = (long) ${timestampToLongCode(c)};"
     case DecimalType() => castDecimalToIntegralTypeCode(ctx, "long", LongType.catalogString)
     case FloatType | DoubleType if ansiEnabled =>
-      castFractionToIntegralTypeCode("long", LongType.catalogString)
+      castFractionToIntegralTypeCode(ctx, "long", LongType)
     case x: NumericType =>
       (c, evPrim, evNull) => code"$evPrim = (long) $c;"
     case x: DayTimeIntervalType =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
index fc927ba054f82..ceed8df5026d7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
@@ -1263,7 +1263,7 @@ object IntervalUtils {
           Math.multiplyExact(v, MONTHS_PER_YEAR)
         } catch {
           case _: ArithmeticException =>
-            throw QueryExecutionErrors.castingCauseOverflowError(v, YM(endField).catalogString)
+            throw QueryExecutionErrors.castingCauseOverflowError(v, YM(endField))
         }
       case MONTH => v
     }
@@ -1272,7 +1272,7 @@ object IntervalUtils {
   def longToYearMonthInterval(v: Long, endField: Byte): Int = {
     val vInt = v.toInt
     if (v != vInt) {
-      throw QueryExecutionErrors.castingCauseOverflowError(v, YM(endField).catalogString)
+      throw QueryExecutionErrors.castingCauseOverflowError(v, YM(endField))
     }
     intToYearMonthInterval(vInt, endField)
   }
@@ -1289,7 +1289,7 @@ object IntervalUtils {
     val vShort = vInt.toShort
     if (vInt != vShort) {
       throw QueryExecutionErrors.castingCauseOverflowError(
-        toYearMonthIntervalString(v, ANSI_STYLE, startField, endField), ShortType.catalogString)
+        toYearMonthIntervalString(v, ANSI_STYLE, startField, endField), ShortType)
     }
     vShort
   }
@@ -1299,7 +1299,7 @@ object IntervalUtils {
     val vByte = vInt.toByte
     if (vInt != vByte) {
       throw QueryExecutionErrors.castingCauseOverflowError(
-        toYearMonthIntervalString(v, ANSI_STYLE, startField, endField), ByteType.catalogString)
+        toYearMonthIntervalString(v, ANSI_STYLE, startField, endField), ByteType)
     }
     vByte
   }
@@ -1311,7 +1311,7 @@ object IntervalUtils {
           Math.multiplyExact(v, MICROS_PER_DAY)
         } catch {
           case _: ArithmeticException =>
-            throw QueryExecutionErrors.castingCauseOverflowError(v, DT(endField).catalogString)
+            throw QueryExecutionErrors.castingCauseOverflowError(v, DT(endField))
         }
       case HOUR => v * MICROS_PER_HOUR
       case MINUTE => v * MICROS_PER_MINUTE
@@ -1329,7 +1329,7 @@ object IntervalUtils {
       }
     } catch {
       case _: ArithmeticException =>
-        throw QueryExecutionErrors.castingCauseOverflowError(v, DT(endField).catalogString)
+        throw QueryExecutionErrors.castingCauseOverflowError(v, DT(endField))
     }
   }
 
@@ -1347,7 +1347,7 @@ object IntervalUtils {
     val vInt = vLong.toInt
     if (vLong != vInt) {
       throw QueryExecutionErrors.castingCauseOverflowError(
-        toDayTimeIntervalString(v, ANSI_STYLE, startField, endField), IntegerType.catalogString)
+        toDayTimeIntervalString(v, ANSI_STYLE, startField, endField), IntegerType)
     }
     vInt
   }
@@ -1357,7 +1357,7 @@ object IntervalUtils {
     val vShort = vLong.toShort
     if (vLong != vShort) {
       throw QueryExecutionErrors.castingCauseOverflowError(
-        toDayTimeIntervalString(v, ANSI_STYLE, startField, endField), ShortType.catalogString)
+        toDayTimeIntervalString(v, ANSI_STYLE, startField, endField), ShortType)
     }
     vShort
   }
@@ -1367,7 +1367,7 @@ object IntervalUtils {
     val vByte = vLong.toByte
     if (vLong != vByte) {
       throw QueryExecutionErrors.castingCauseOverflowError(
-        toDayTimeIntervalString(v, ANSI_STYLE, startField, endField), ByteType.catalogString)
+        toDayTimeIntervalString(v, ANSI_STYLE, startField, endField), ByteType)
     }
     vByte
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 24ef61cd67d0d..361a7b421d511 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -89,9 +89,9 @@ object QueryExecutionErrors {
       messageParameters = Array(s"Cannot terminate expression: $generator"))
   }
 
-  def castingCauseOverflowError(t: Any, targetType: String): ArithmeticException = {
+  def castingCauseOverflowError(t: Any, dataType: DataType): ArithmeticException = {
     new SparkArithmeticException(errorClass = "CAST_CAUSES_OVERFLOW",
-      messageParameters = Array(t.toString, targetType, SQLConf.ANSI_ENABLED.key))
+      messageParameters = Array(t.toString, dataType.catalogString, SQLConf.ANSI_ENABLED.key))
   }
 
   def cannotChangeDecimalPrecisionError(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index 4681429723183..bbf902849e7f9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -261,14 +261,14 @@ final class Decimal extends Ordered[Decimal] with Serializable {
       if (actualLongVal == actualLongVal.toByte) {
         actualLongVal.toByte
       } else {
-        throw QueryExecutionErrors.castingCauseOverflowError(this, "byte")
+        throw QueryExecutionErrors.castingCauseOverflowError(this, ByteType)
       }
     } else {
       val doubleVal = decimalVal.toDouble
       if (Math.floor(doubleVal) <= Byte.MaxValue && Math.ceil(doubleVal) >= Byte.MinValue) {
         doubleVal.toByte
       } else {
-        throw QueryExecutionErrors.castingCauseOverflowError(this, "byte")
+        throw QueryExecutionErrors.castingCauseOverflowError(this, ByteType)
       }
     }
   }
@@ -283,14 +283,14 @@ final class Decimal extends Ordered[Decimal] with Serializable {
       if (actualLongVal == actualLongVal.toShort) {
         actualLongVal.toShort
       } else {
-        throw QueryExecutionErrors.castingCauseOverflowError(this, "short")
+        throw QueryExecutionErrors.castingCauseOverflowError(this, ShortType)
       }
     } else {
       val doubleVal = decimalVal.toDouble
       if (Math.floor(doubleVal) <= Short.MaxValue && Math.ceil(doubleVal) >= Short.MinValue) {
         doubleVal.toShort
       } else {
-        throw QueryExecutionErrors.castingCauseOverflowError(this, "short")
+        throw QueryExecutionErrors.castingCauseOverflowError(this, ShortType)
       }
     }
   }
@@ -305,14 +305,14 @@ final class Decimal extends Ordered[Decimal] with Serializable {
       if (actualLongVal == actualLongVal.toInt) {
         actualLongVal.toInt
       } else {
-        throw QueryExecutionErrors.castingCauseOverflowError(this, "int")
+        throw QueryExecutionErrors.castingCauseOverflowError(this, IntegerType)
       }
     } else {
       val doubleVal = decimalVal.toDouble
       if (Math.floor(doubleVal) <= Int.MaxValue && Math.ceil(doubleVal) >= Int.MinValue) {
         doubleVal.toInt
       } else {
-        throw QueryExecutionErrors.castingCauseOverflowError(this, "int")
+        throw QueryExecutionErrors.castingCauseOverflowError(this, IntegerType)
       }
     }
   }
@@ -332,7 +332,7 @@ final class Decimal extends Ordered[Decimal] with Serializable {
         decimalVal.bigDecimal.toBigInteger.longValueExact()
       } catch {
         case _: ArithmeticException =>
-          throw QueryExecutionErrors.castingCauseOverflowError(this, "long")
+          throw QueryExecutionErrors.castingCauseOverflowError(this, LongType)
       }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala
index 6811e50ccdf94..16adec71bc84f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala
@@ -115,7 +115,7 @@ private[sql] object LongExactNumeric extends LongIsIntegral with Ordering.LongOr
     if (x == x.toInt) {
       x.toInt
     } else {
-      throw QueryExecutionErrors.castingCauseOverflowError(x, "int")
+      throw QueryExecutionErrors.castingCauseOverflowError(x, IntegerType)
     }
 }
 
@@ -135,7 +135,7 @@ private[sql] object FloatExactNumeric extends FloatIsFractional {
     if (Math.floor(x) <= intUpperBound && Math.ceil(x) >= intLowerBound) {
       x.toInt
     } else {
-      throw QueryExecutionErrors.castingCauseOverflowError(x, "int")
+      throw QueryExecutionErrors.castingCauseOverflowError(x, IntegerType)
     }
   }
 
@@ -143,7 +143,7 @@ private[sql] object FloatExactNumeric extends FloatIsFractional {
     if (Math.floor(x) <= longUpperBound && Math.ceil(x) >= longLowerBound) {
       x.toLong
     } else {
-      throw QueryExecutionErrors.castingCauseOverflowError(x, "int")
+      throw QueryExecutionErrors.castingCauseOverflowError(x, LongType)
     }
   }
 
@@ -160,7 +160,7 @@ private[sql] object DoubleExactNumeric extends DoubleIsFractional {
     if (Math.floor(x) <= intUpperBound && Math.ceil(x) >= intLowerBound) {
       x.toInt
     } else {
-      throw QueryExecutionErrors.castingCauseOverflowError(x, "int")
+      throw QueryExecutionErrors.castingCauseOverflowError(x, IntegerType)
     }
   }
 
@@ -168,7 +168,7 @@ private[sql] object DoubleExactNumeric extends DoubleIsFractional {
     if (Math.floor(x) <= longUpperBound && Math.ceil(x) >= longLowerBound) {
       x.toLong
     } else {
-      throw QueryExecutionErrors.castingCauseOverflowError(x, "long")
+      throw QueryExecutionErrors.castingCauseOverflowError(x, LongType)
     }
   }
 
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out
index 6aa890efe5c95..214776391f6f7 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out
@@ -375,7 +375,7 @@ SELECT bigint(float('-9223380000000000000'))
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Casting -9.22338E18 to int causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+Casting -9.22338E18 to bigint causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error.
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out
index 2e4fbc2dfa537..2b71be5a5d96c 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out
@@ -833,7 +833,7 @@ SELECT bigint(double('-9223372036854780000'))
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Casting -9.22337203685478E18 to long causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+Casting -9.22337203685478E18 to bigint causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error.
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out
index 24f0b3c5ed3bf..427e89a8d1b41 100755
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out
@@ -661,7 +661,7 @@ SELECT CAST(double('922337203685477580700.0') AS bigint)
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Casting 9.223372036854776E20 to long causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+Casting 9.223372036854776E20 to bigint causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error.
 
 
 -- !query

From fab8d43fac18ee91ab3369918c636ad0239475d0 Mon Sep 17 00:00:00 2001
From: Jiaan Geng <beliefer@163.com>
Date: Thu, 10 Feb 2022 21:25:53 +0800
Subject: [PATCH 206/513] [SPARK-37960][SQL] A new framework to represent
 catalyst expressions in DS v2 APIs

### What changes were proposed in this pull request?
This PR provides a new framework to represent catalyst expressions in DS v2 APIs.
`GeneralSQLExpression` is a general SQL expression to represent catalyst expression in DS v2 API.
`ExpressionSQLBuilder` is a builder to generate `GeneralSQLExpression` from catalyst expressions.
`CASE ... WHEN ... ELSE ... END` is just the first use case.

This PR also supports aggregate push down with `CASE ... WHEN ... ELSE ... END`.

### Why are the changes needed?
Support aggregate push down with `CASE ... WHEN ... ELSE ... END`.

### Does this PR introduce _any_ user-facing change?
Yes. Users could use `CASE ... WHEN ... ELSE ... END` with aggregate push down.

### How was this patch tested?
New tests.

Closes #35248 from beliefer/SPARK-37960.

Authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/GeneralSQLExpression.java     | 41 +++++++++++
 .../connector/expressions/aggregate/Avg.java  | 14 ++--
 .../expressions/aggregate/Count.java          | 14 ++--
 .../connector/expressions/aggregate/Max.java  | 10 +--
 .../connector/expressions/aggregate/Min.java  | 10 +--
 .../connector/expressions/aggregate/Sum.java  | 14 ++--
 .../catalyst/util/ExpressionSQLBuilder.scala  | 69 +++++++++++++++++++
 .../datasources/AggregatePushDownUtils.scala  | 37 +++++-----
 .../datasources/DataSourceStrategy.scala      | 28 +++++---
 .../execution/datasources/orc/OrcUtils.scala  | 13 ++--
 .../datasources/parquet/ParquetUtils.scala    | 18 ++---
 .../datasources/v2/V2ColumnUtils.scala        | 27 ++++++++
 .../apache/spark/sql/jdbc/JdbcDialects.scala  | 55 +++++++++++----
 .../FileSourceAggregatePushDownSuite.scala    | 28 ++++++++
 .../apache/spark/sql/jdbc/JDBCV2Suite.scala   | 48 +++++++++++--
 15 files changed, 332 insertions(+), 94 deletions(-)
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralSQLExpression.java
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/ExpressionSQLBuilder.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ColumnUtils.scala

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralSQLExpression.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralSQLExpression.java
new file mode 100644
index 0000000000000..ebeee22a853cf
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralSQLExpression.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.expressions;
+
+import java.io.Serializable;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * The general SQL string corresponding to expression.
+ *
+ * @since 3.3.0
+ */
+@Evolving
+public class GeneralSQLExpression implements Expression, Serializable {
+    private String sql;
+
+    public GeneralSQLExpression(String sql) {
+        this.sql = sql;
+    }
+
+    public String sql() { return sql; }
+
+    @Override
+    public String toString() { return sql; }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Avg.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Avg.java
index 5e10ec9ee1644..cc9d27ab8e59c 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Avg.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Avg.java
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.connector.expressions.aggregate;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.connector.expressions.NamedReference;
+import org.apache.spark.sql.connector.expressions.Expression;
 
 /**
  * An aggregate function that returns the mean of all the values in a group.
@@ -27,23 +27,23 @@
  */
 @Evolving
 public final class Avg implements AggregateFunc {
-  private final NamedReference column;
+  private final Expression input;
   private final boolean isDistinct;
 
-  public Avg(NamedReference column, boolean isDistinct) {
-    this.column = column;
+  public Avg(Expression column, boolean isDistinct) {
+    this.input = column;
     this.isDistinct = isDistinct;
   }
 
-  public NamedReference column() { return column; }
+  public Expression column() { return input; }
   public boolean isDistinct() { return isDistinct; }
 
   @Override
   public String toString() {
     if (isDistinct) {
-      return "AVG(DISTINCT " + column.describe() + ")";
+      return "AVG(DISTINCT " + input.describe() + ")";
     } else {
-      return "AVG(" + column.describe() + ")";
+      return "AVG(" + input.describe() + ")";
     }
   }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Count.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Count.java
index 1685770604a46..54c64b83c5d52 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Count.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Count.java
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.connector.expressions.aggregate;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.connector.expressions.NamedReference;
+import org.apache.spark.sql.connector.expressions.Expression;
 
 /**
  * An aggregate function that returns the number of the specific row in a group.
@@ -27,23 +27,23 @@
  */
 @Evolving
 public final class Count implements AggregateFunc {
-  private final NamedReference column;
+  private final Expression input;
   private final boolean isDistinct;
 
-  public Count(NamedReference column, boolean isDistinct) {
-    this.column = column;
+  public Count(Expression column, boolean isDistinct) {
+    this.input = column;
     this.isDistinct = isDistinct;
   }
 
-  public NamedReference column() { return column; }
+  public Expression column() { return input; }
   public boolean isDistinct() { return isDistinct; }
 
   @Override
   public String toString() {
     if (isDistinct) {
-      return "COUNT(DISTINCT " + column.describe() + ")";
+      return "COUNT(DISTINCT " + input.describe() + ")";
     } else {
-      return "COUNT(" + column.describe() + ")";
+      return "COUNT(" + input.describe() + ")";
     }
   }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Max.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Max.java
index 5acdf14bf7e2f..971aac279e09b 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Max.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Max.java
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.connector.expressions.aggregate;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.connector.expressions.NamedReference;
+import org.apache.spark.sql.connector.expressions.Expression;
 
 /**
  * An aggregate function that returns the maximum value in a group.
@@ -27,12 +27,12 @@
  */
 @Evolving
 public final class Max implements AggregateFunc {
-  private final NamedReference column;
+  private final Expression input;
 
-  public Max(NamedReference column) { this.column = column; }
+  public Max(Expression column) { this.input = column; }
 
-  public NamedReference column() { return column; }
+  public Expression column() { return input; }
 
   @Override
-  public String toString() { return "MAX(" + column.describe() + ")"; }
+  public String toString() { return "MAX(" + input.describe() + ")"; }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Min.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Min.java
index 824c607ea7df0..8d0644b0f0103 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Min.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Min.java
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.connector.expressions.aggregate;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.connector.expressions.NamedReference;
+import org.apache.spark.sql.connector.expressions.Expression;
 
 /**
  * An aggregate function that returns the minimum value in a group.
@@ -27,12 +27,12 @@
  */
 @Evolving
 public final class Min implements AggregateFunc {
-  private final NamedReference column;
+  private final Expression input;
 
-  public Min(NamedReference column) { this.column = column; }
+  public Min(Expression column) { this.input = column; }
 
-  public NamedReference column() { return column; }
+  public Expression column() { return input; }
 
   @Override
-  public String toString() { return "MIN(" + column.describe() + ")"; }
+  public String toString() { return "MIN(" + input.describe() + ")"; }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Sum.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Sum.java
index 6b04dc38c2846..721ef31c9a817 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Sum.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Sum.java
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.connector.expressions.aggregate;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.connector.expressions.NamedReference;
+import org.apache.spark.sql.connector.expressions.Expression;
 
 /**
  * An aggregate function that returns the summation of all the values in a group.
@@ -27,23 +27,23 @@
  */
 @Evolving
 public final class Sum implements AggregateFunc {
-  private final NamedReference column;
+  private final Expression input;
   private final boolean isDistinct;
 
-  public Sum(NamedReference column, boolean isDistinct) {
-    this.column = column;
+  public Sum(Expression column, boolean isDistinct) {
+    this.input = column;
     this.isDistinct = isDistinct;
   }
 
-  public NamedReference column() { return column; }
+  public Expression column() { return input; }
   public boolean isDistinct() { return isDistinct; }
 
   @Override
   public String toString() {
     if (isDistinct) {
-      return "SUM(DISTINCT " + column.describe() + ")";
+      return "SUM(DISTINCT " + input.describe() + ")";
     } else {
-      return "SUM(" + column.describe() + ")";
+      return "SUM(" + input.describe() + ")";
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/ExpressionSQLBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/ExpressionSQLBuilder.scala
new file mode 100644
index 0000000000000..6239d0e2e7ae8
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/ExpressionSQLBuilder.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import org.apache.spark.sql.catalyst.expressions.{Attribute, BinaryOperator, CaseWhen, EqualTo, Expression, IsNotNull, IsNull, Literal, Not}
+import org.apache.spark.sql.connector.expressions.LiteralValue
+
+/**
+ * The builder to generate SQL string from catalyst expressions.
+ */
+class ExpressionSQLBuilder(e: Expression) {
+
+  def build(): Option[String] = generateSQL(e)
+
+  private def generateSQL(expr: Expression): Option[String] = expr match {
+    case Literal(value, dataType) => Some(LiteralValue(value, dataType).toString)
+    case a: Attribute => Some(quoteIfNeeded(a.name))
+    case IsNull(col) => generateSQL(col).map(c => s"$c IS NULL")
+    case IsNotNull(col) => generateSQL(col).map(c => s"$c IS NOT NULL")
+    case b: BinaryOperator =>
+      val l = generateSQL(b.left)
+      val r = generateSQL(b.right)
+      if (l.isDefined && r.isDefined) {
+        Some(s"(${l.get}) ${b.sqlOperator} (${r.get})")
+      } else {
+        None
+      }
+    case Not(EqualTo(left, right)) =>
+      val l = generateSQL(left)
+      val r = generateSQL(right)
+      if (l.isDefined && r.isDefined) {
+        Some(s"${l.get} != ${r.get}")
+      } else {
+        None
+      }
+    case Not(child) => generateSQL(child).map(v => s"NOT ($v)")
+    case CaseWhen(branches, elseValue) =>
+      val conditionsSQL = branches.map(_._1).flatMap(generateSQL)
+      val valuesSQL = branches.map(_._2).flatMap(generateSQL)
+      if (conditionsSQL.length == branches.length && valuesSQL.length == branches.length) {
+        val branchSQL =
+          conditionsSQL.zip(valuesSQL).map { case (c, v) => s" WHEN $c THEN $v" }.mkString
+        if (elseValue.isDefined) {
+          elseValue.flatMap(generateSQL).map(v => s"CASE$branchSQL ELSE $v END")
+        } else {
+          Some(s"CASE$branchSQL END")
+        }
+      } else {
+        None
+      }
+    // TODO supports other expressions
+    case _ => None
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/AggregatePushDownUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/AggregatePushDownUtils.scala
index e7069137f31cb..4779a3eaf2531 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/AggregatePushDownUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/AggregatePushDownUtils.scala
@@ -19,9 +19,9 @@ package org.apache.spark.sql.execution.datasources
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Expression, GenericInternalRow}
-import org.apache.spark.sql.connector.expressions.NamedReference
 import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Aggregation, Count, CountStar, Max, Min}
 import org.apache.spark.sql.execution.RowToColumnConverter
+import org.apache.spark.sql.execution.datasources.v2.V2ColumnUtils
 import org.apache.spark.sql.execution.vectorized.{OffHeapColumnVector, OnHeapColumnVector}
 import org.apache.spark.sql.types.{BooleanType, ByteType, DateType, DoubleType, FloatType, IntegerType, LongType, ShortType, StructField, StructType}
 import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector}
@@ -42,27 +42,28 @@ object AggregatePushDownUtils {
 
     var finalSchema = new StructType()
 
-    def getStructFieldForCol(col: NamedReference): StructField = {
-      schema.apply(col.fieldNames.head)
+    def getStructFieldForCol(colName: String): StructField = {
+      schema.apply(colName)
     }
 
-    def isPartitionCol(col: NamedReference) = {
-      partitionNames.contains(col.fieldNames.head)
+    def isPartitionCol(colName: String) = {
+      partitionNames.contains(colName)
     }
 
     def processMinOrMax(agg: AggregateFunc): Boolean = {
-      val (column, aggType) = agg match {
-        case max: Max => (max.column, "max")
-        case min: Min => (min.column, "min")
-        case _ =>
-          throw new IllegalArgumentException(s"Unexpected type of AggregateFunc ${agg.describe}")
+      val (columnName, aggType) = agg match {
+        case max: Max if V2ColumnUtils.extractV2Column(max.column).isDefined =>
+          (V2ColumnUtils.extractV2Column(max.column).get, "max")
+        case min: Min if V2ColumnUtils.extractV2Column(min.column).isDefined =>
+          (V2ColumnUtils.extractV2Column(min.column).get, "min")
+        case _ => return false
       }
 
-      if (isPartitionCol(column)) {
+      if (isPartitionCol(columnName)) {
         // don't push down partition column, footer doesn't have max/min for partition column
         return false
       }
-      val structField = getStructFieldForCol(column)
+      val structField = getStructFieldForCol(columnName)
 
       structField.dataType match {
         // not push down complex type
@@ -108,8 +109,8 @@ object AggregatePushDownUtils {
     aggregation.groupByColumns.foreach { col =>
       // don't push down if the group by columns are not the same as the partition columns (orders
       // doesn't matter because reorder can be done at data source layer)
-      if (col.fieldNames.length != 1 || !isPartitionCol(col)) return None
-      finalSchema = finalSchema.add(getStructFieldForCol(col))
+      if (col.fieldNames.length != 1 || !isPartitionCol(col.fieldNames.head)) return None
+      finalSchema = finalSchema.add(getStructFieldForCol(col.fieldNames.head))
     }
 
     aggregation.aggregateExpressions.foreach {
@@ -117,10 +118,10 @@ object AggregatePushDownUtils {
         if (!processMinOrMax(max)) return None
       case min: Min =>
         if (!processMinOrMax(min)) return None
-      case count: Count =>
-        if (count.column.fieldNames.length != 1 || count.isDistinct) return None
-        finalSchema =
-          finalSchema.add(StructField(s"count(" + count.column.fieldNames.head + ")", LongType))
+      case count: Count
+        if V2ColumnUtils.extractV2Column(count.column).isDefined && !count.isDistinct =>
+        val columnName = V2ColumnUtils.extractV2Column(count.column).get
+        finalSchema = finalSchema.add(StructField(s"count($columnName)", LongType))
       case _: CountStar =>
         finalSchema = finalSchema.add(StructField("count(*)", LongType))
       case _ =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index ecde8a0bc8fb7..5dce3f29deef0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -38,9 +38,10 @@ import org.apache.spark.sql.catalyst.planning.ScanOperation
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoStatement, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
+import org.apache.spark.sql.catalyst.util.ExpressionSQLBuilder
 import org.apache.spark.sql.connector.catalog.SupportsRead
 import org.apache.spark.sql.connector.catalog.TableCapability._
-import org.apache.spark.sql.connector.expressions.{FieldReference, NullOrdering, SortDirection, SortOrder => SortOrderV2, SortValue}
+import org.apache.spark.sql.connector.expressions.{Expression => ExpressionV2, FieldReference, GeneralSQLExpression, NullOrdering, SortDirection, SortOrder => SortOrderV2, SortValue}
 import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Aggregation, Avg, Count, CountStar, GeneralAggregateFunc, Max, Min, Sum}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.{InSubqueryExec, RowDataSourceScanExec, SparkPlan}
@@ -705,22 +706,17 @@ object DataSourceStrategy
   protected[sql] def translateAggregate(agg: AggregateExpression): Option[AggregateFunc] = {
     if (agg.filter.isEmpty) {
       agg.aggregateFunction match {
-        case aggregate.Min(PushableColumnWithoutNestedColumn(name)) =>
-          Some(new Min(FieldReference.column(name)))
-        case aggregate.Max(PushableColumnWithoutNestedColumn(name)) =>
-          Some(new Max(FieldReference.column(name)))
+        case aggregate.Min(PushableExpression(expr)) => Some(new Min(expr))
+        case aggregate.Max(PushableExpression(expr)) => Some(new Max(expr))
         case count: aggregate.Count if count.children.length == 1 =>
           count.children.head match {
             // COUNT(any literal) is the same as COUNT(*)
             case Literal(_, _) => Some(new CountStar())
-            case PushableColumnWithoutNestedColumn(name) =>
-              Some(new Count(FieldReference.column(name), agg.isDistinct))
+            case PushableExpression(expr) => Some(new Count(expr, agg.isDistinct))
             case _ => None
           }
-        case aggregate.Sum(PushableColumnWithoutNestedColumn(name), _) =>
-          Some(new Sum(FieldReference.column(name), agg.isDistinct))
-        case aggregate.Average(PushableColumnWithoutNestedColumn(name), _) =>
-          Some(new Avg(FieldReference.column(name), agg.isDistinct))
+        case aggregate.Sum(PushableExpression(expr), _) => Some(new Sum(expr, agg.isDistinct))
+        case aggregate.Average(PushableExpression(expr), _) => Some(new Avg(expr, agg.isDistinct))
         case aggregate.VariancePop(PushableColumnWithoutNestedColumn(name), _) =>
           Some(new GeneralAggregateFunc(
             "VAR_POP", agg.isDistinct, Array(FieldReference.column(name))))
@@ -860,3 +856,13 @@ object PushableColumnAndNestedColumn extends PushableColumnBase {
 object PushableColumnWithoutNestedColumn extends PushableColumnBase {
   override val nestedPredicatePushdownEnabled = false
 }
+
+/**
+ * Get the expression of DS V2 to represent catalyst expression that can be pushed down.
+ */
+object PushableExpression {
+  def unapply(e: Expression): Option[ExpressionV2] = e match {
+    case PushableColumnWithoutNestedColumn(name) => Some(FieldReference.column(name))
+    case _ => new ExpressionSQLBuilder(e).build().map(new GeneralSQLExpression(_))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
index d1b7e8db619b1..684bab5883394 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
@@ -44,6 +44,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.connector.expressions.aggregate.{Aggregation, Count, CountStar, Max, Min}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.datasources.{AggregatePushDownUtils, SchemaMergeUtils}
+import org.apache.spark.sql.execution.datasources.v2.V2ColumnUtils
 import org.apache.spark.sql.types._
 import org.apache.spark.util.{ThreadUtils, Utils}
 
@@ -487,18 +488,18 @@ object OrcUtils extends Logging {
 
     val aggORCValues: Seq[WritableComparable[_]] =
       aggregation.aggregateExpressions.zipWithIndex.map {
-        case (max: Max, index) =>
-          val columnName = max.column.fieldNames.head
+        case (max: Max, index) if V2ColumnUtils.extractV2Column(max.column).isDefined =>
+          val columnName = V2ColumnUtils.extractV2Column(max.column).get
           val statistics = getColumnStatistics(columnName)
           val dataType = schemaWithoutGroupBy(index).dataType
           getMinMaxFromColumnStatistics(statistics, dataType, isMax = true)
-        case (min: Min, index) =>
-          val columnName = min.column.fieldNames.head
+        case (min: Min, index) if V2ColumnUtils.extractV2Column(min.column).isDefined =>
+          val columnName = V2ColumnUtils.extractV2Column(min.column).get
           val statistics = getColumnStatistics(columnName)
           val dataType = schemaWithoutGroupBy.apply(index).dataType
           getMinMaxFromColumnStatistics(statistics, dataType, isMax = false)
-        case (count: Count, _) =>
-          val columnName = count.column.fieldNames.head
+        case (count: Count, _) if V2ColumnUtils.extractV2Column(count.column).isDefined =>
+          val columnName = V2ColumnUtils.extractV2Column(count.column).get
           val isPartitionColumn = partitionSchema.fields.map(_.name).contains(columnName)
           // NOTE: Count(columnName) doesn't include null values.
           // org.apache.orc.ColumnStatistics.getNumberOfValues() returns number of non-null values
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
index 87a0d9c860f31..63c529e3542f2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
@@ -35,6 +35,7 @@ import org.apache.spark.sql.catalyst.expressions.JoinedRow
 import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
 import org.apache.spark.sql.connector.expressions.aggregate.{Aggregation, Count, CountStar, Max, Min}
 import org.apache.spark.sql.execution.datasources.AggregatePushDownUtils
+import org.apache.spark.sql.execution.datasources.v2.V2ColumnUtils
 import org.apache.spark.sql.internal.SQLConf.{LegacyBehaviorPolicy, PARQUET_AGGREGATE_PUSHDOWN_ENABLED}
 import org.apache.spark.sql.types.StructType
 
@@ -248,32 +249,33 @@ object ParquetUtils {
       blocks.forEach { block =>
         val blockMetaData = block.getColumns
         agg match {
-          case max: Max =>
-            val colName = max.column.fieldNames.head
+          case max: Max if V2ColumnUtils.extractV2Column(max.column).isDefined =>
+            val colName = V2ColumnUtils.extractV2Column(max.column).get
             index = dataSchema.fieldNames.toList.indexOf(colName)
             schemaName = "max(" + colName + ")"
             val currentMax = getCurrentBlockMaxOrMin(filePath, blockMetaData, index, true)
             if (value == None || currentMax.asInstanceOf[Comparable[Any]].compareTo(value) > 0) {
               value = currentMax
             }
-          case min: Min =>
-            val colName = min.column.fieldNames.head
+          case min: Min if V2ColumnUtils.extractV2Column(min.column).isDefined =>
+            val colName = V2ColumnUtils.extractV2Column(min.column).get
             index = dataSchema.fieldNames.toList.indexOf(colName)
             schemaName = "min(" + colName + ")"
             val currentMin = getCurrentBlockMaxOrMin(filePath, blockMetaData, index, false)
             if (value == None || currentMin.asInstanceOf[Comparable[Any]].compareTo(value) < 0) {
               value = currentMin
             }
-          case count: Count =>
-            schemaName = "count(" + count.column.fieldNames.head + ")"
+          case count: Count if V2ColumnUtils.extractV2Column(count.column).isDefined =>
+            val colName = V2ColumnUtils.extractV2Column(count.column).get
+            schemaName = "count(" + colName + ")"
             rowCount += block.getRowCount
             var isPartitionCol = false
-            if (partitionSchema.fields.map(_.name).toSet.contains(count.column.fieldNames.head)) {
+            if (partitionSchema.fields.map(_.name).toSet.contains(colName)) {
               isPartitionCol = true
             }
             isCount = true
             if (!isPartitionCol) {
-              index = dataSchema.fieldNames.toList.indexOf(count.column.fieldNames.head)
+              index = dataSchema.fieldNames.toList.indexOf(colName)
               // Count(*) includes the null values, but Count(colName) doesn't.
               rowCount -= getNumNulls(filePath, blockMetaData, index)
             }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ColumnUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ColumnUtils.scala
new file mode 100644
index 0000000000000..9fc220f440bc1
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ColumnUtils.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.connector.expressions.{Expression, NamedReference}
+
+object V2ColumnUtils {
+  def extractV2Column(expr: Expression): Option[String] = expr match {
+    case r: NamedReference if r. fieldNames.length == 1 => Some(r.fieldNames.head)
+    case _ => None
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 7dd987e0a44b3..8a5cf27b53791 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, Timesta
 import org.apache.spark.sql.connector.catalog.TableChange
 import org.apache.spark.sql.connector.catalog.TableChange._
 import org.apache.spark.sql.connector.catalog.index.TableIndex
-import org.apache.spark.sql.connector.expressions.NamedReference
+import org.apache.spark.sql.connector.expressions.{FieldReference, GeneralSQLExpression, NamedReference}
 import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Avg, Count, CountStar, Max, Min, Sum}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
@@ -203,28 +203,55 @@ abstract class JdbcDialect extends Serializable with Logging{
   def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
     aggFunction match {
       case min: Min =>
-        if (min.column.fieldNames.length != 1) return None
-        Some(s"MIN(${quoteIdentifier(min.column.fieldNames.head)})")
+        val sql = min.column match {
+          case field: FieldReference =>
+            if (field.fieldNames.length != 1) return None
+            quoteIdentifier(field.fieldNames.head)
+          case expr: GeneralSQLExpression =>
+            expr.sql()
+        }
+        Some(s"MIN($sql)")
       case max: Max =>
-        if (max.column.fieldNames.length != 1) return None
-        Some(s"MAX(${quoteIdentifier(max.column.fieldNames.head)})")
+        val sql = max.column match {
+          case field: FieldReference =>
+            if (field.fieldNames.length != 1) return None
+            quoteIdentifier(field.fieldNames.head)
+          case expr: GeneralSQLExpression =>
+            expr.sql()
+        }
+        Some(s"MAX($sql)")
       case count: Count =>
-        if (count.column.fieldNames.length != 1) return None
+        val sql = count.column match {
+          case field: FieldReference =>
+            if (field.fieldNames.length != 1) return None
+            quoteIdentifier(field.fieldNames.head)
+          case expr: GeneralSQLExpression =>
+            expr.sql()
+        }
         val distinct = if (count.isDistinct) "DISTINCT " else ""
-        val column = quoteIdentifier(count.column.fieldNames.head)
-        Some(s"COUNT($distinct$column)")
+        Some(s"COUNT($distinct$sql)")
       case sum: Sum =>
-        if (sum.column.fieldNames.length != 1) return None
+        val sql = sum.column match {
+          case field: FieldReference =>
+            if (field.fieldNames.length != 1) return None
+            quoteIdentifier(field.fieldNames.head)
+          case expr: GeneralSQLExpression =>
+            expr.sql()
+        }
         val distinct = if (sum.isDistinct) "DISTINCT " else ""
-        val column = quoteIdentifier(sum.column.fieldNames.head)
-        Some(s"SUM($distinct$column)")
+        Some(s"SUM($distinct$sql)")
       case _: CountStar =>
         Some("COUNT(*)")
       case avg: Avg =>
-        if (avg.column.fieldNames.length != 1) return None
+        val sql = avg.column match {
+          case field: FieldReference =>
+            if (field.fieldNames.length != 1) return None
+            quoteIdentifier(field.fieldNames.head)
+          case expr: GeneralSQLExpression =>
+            expr.sql()
+        }
         val distinct = if (avg.isDistinct) "DISTINCT " else ""
-        val column = quoteIdentifier(avg.column.fieldNames.head)
-        Some(s"AVG($distinct$column)")
+        Some(s"AVG($distinct$sql)")
       case _ => None
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceAggregatePushDownSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceAggregatePushDownSuite.scala
index e4a41ba9e71be..47740c5274616 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceAggregatePushDownSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceAggregatePushDownSuite.scala
@@ -372,6 +372,34 @@ trait FileSourceAggregatePushDownSuite
     }
   }
 
+  test("aggregate not push down - MIN/MAX/COUNT with CASE WHEN") {
+    val data = Seq((-2, "abc", 2), (3, "def", 4), (6, "ghi", 2), (0, null, 19),
+      (9, "mno", 7), (2, null, 6))
+    withDataSourceTable(data, "t") {
+      withSQLConf(aggPushDownEnabledKey -> "true") {
+        val selectAgg = sql(
+          """
+            |SELECT
+            |  min(CASE WHEN _1 < 0 THEN 0 ELSE _1 END),
+            |  min(CASE WHEN _3 > 5 THEN 1 ELSE 0 END),
+            |  max(CASE WHEN _1 < 0 THEN 0 ELSE _1 END),
+            |  max(CASE WHEN NOT(_3 > 5) THEN 1 ELSE 0 END),
+            |  count(CASE WHEN _1 < 0 AND _2 IS NOT NULL THEN 0 ELSE _1 END),
+            |  count(CASE WHEN _3 != 5 OR _2 IS NULL THEN 1 ELSE 0 END)
+            |FROM t
+          """.stripMargin)
+        selectAgg.queryExecution.optimizedPlan.collect {
+          case _: DataSourceV2ScanRelation =>
+            val expected_plan_fragment =
+              "PushedAggregation: []"
+            checkKeywordsExistsInExplain(selectAgg, expected_plan_fragment)
+        }
+
+        checkAnswer(selectAgg, Seq(Row(0, 0, 9, 1, 6, 6)))
+      }
+    }
+  }
+
   private def testPushDownForAllDataTypes(
       inputRows: Seq[Row],
       expectedMinWithAllTypes: Seq[Row],
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
index eadc2fb9e882d..aa0289ae75bdb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
@@ -806,17 +806,53 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     checkAnswer(query, Seq(Row(29000.0)))
   }
 
-  test("scan with aggregate push-down: SUM(CASE WHEN) with group by") {
-    val df =
-      sql("SELECT SUM(CASE WHEN SALARY > 0 THEN 1 ELSE 0 END) FROM h2.test.employee GROUP BY DEPT")
-    checkAggregateRemoved(df, false)
+  test("scan with aggregate push-down: aggregate function with CASE WHEN") {
+    val df = sql(
+      """
+        |SELECT
+        |  COUNT(CASE WHEN SALARY > 8000 AND SALARY < 10000 THEN SALARY ELSE 0 END),
+        |  COUNT(CASE WHEN SALARY > 8000 AND SALARY <= 13000 THEN SALARY ELSE 0 END),
+        |  COUNT(CASE WHEN SALARY > 11000 OR SALARY < 10000 THEN SALARY ELSE 0 END),
+        |  COUNT(CASE WHEN SALARY >= 12000 OR SALARY < 9000 THEN SALARY ELSE 0 END),
+        |  COUNT(CASE WHEN SALARY >= 12000 OR NOT(SALARY >= 9000) THEN SALARY ELSE 0 END),
+        |  MAX(CASE WHEN NOT(SALARY > 8000) AND SALARY >= 8000 THEN SALARY ELSE 0 END),
+        |  MAX(CASE WHEN NOT(SALARY > 8000) OR SALARY > 8000 THEN SALARY ELSE 0 END),
+        |  MAX(CASE WHEN NOT(SALARY > 8000) AND NOT(SALARY < 8000) THEN SALARY ELSE 0 END),
+        |  MAX(CASE WHEN NOT(SALARY != 0) OR NOT(SALARY < 8000) THEN SALARY ELSE 0 END),
+        |  MAX(CASE WHEN NOT(SALARY > 8000 AND SALARY > 8000) THEN 0 ELSE SALARY END),
+        |  MIN(CASE WHEN NOT(SALARY > 8000 OR SALARY IS NULL) THEN SALARY ELSE 0 END),
+        |  SUM(CASE WHEN NOT(SALARY > 8000 AND SALARY IS NOT NULL) THEN SALARY ELSE 0 END),
+        |  SUM(CASE WHEN SALARY > 10000 THEN 2 WHEN SALARY > 8000 THEN 1 END),
+        |  AVG(CASE WHEN NOT(SALARY > 8000 OR SALARY IS NOT NULL) THEN SALARY ELSE 0 END)
+        |FROM h2.test.employee GROUP BY DEPT
+      """.stripMargin)
+    checkAggregateRemoved(df)
     df.queryExecution.optimizedPlan.collect {
       case _: DataSourceV2ScanRelation =>
         val expected_plan_fragment =
-          "PushedFilters: [], "
+          "PushedAggregates: [COUNT(CASE WHEN ((SALARY) > (8000.00)) AND ((SALARY) < (10000.00))" +
+            " THEN SALARY ELSE 0.00 END), C..., " +
+            "PushedFilters: [], " +
+            "PushedGroupByColumns: [DEPT]"
         checkKeywordsExistsInExplain(df, expected_plan_fragment)
     }
-    checkAnswer(df, Seq(Row(1), Row(2), Row(2)))
+    checkAnswer(df, Seq(Row(1, 1, 1, 1, 1, 0d, 12000d, 0d, 12000d, 12000d, 0d, 0d, 2, 0d),
+      Row(2, 2, 2, 2, 2, 0d, 10000d, 0d, 10000d, 10000d, 0d, 0d, 2, 0d),
+      Row(2, 2, 2, 2, 2, 0d, 12000d, 0d, 12000d, 12000d, 0d, 0d, 3, 0d)))
+  }
+
+  test("scan with aggregate push-down: aggregate function with UDF") {
+    val df = spark.table("h2.test.employee")
+    val decrease = udf { (x: Double, y: Double) => x - y }
+    val query = df.select(sum(decrease($"SALARY", $"BONUS")).as("value"))
+    checkAggregateRemoved(query, false)
+    query.queryExecution.optimizedPlan.collect {
+      case _: DataSourceV2ScanRelation =>
+        val expected_plan_fragment =
+          "PushedFilters: []"
+        checkKeywordsExistsInExplain(query, expected_plan_fragment)
+    }
+    checkAnswer(query, Seq(Row(47100.0)))
   }
 
   test("scan with aggregate push-down: partition columns with multi group by columns") {

From 50256bde9bdf217413545a6d2945d6c61bf4cfff Mon Sep 17 00:00:00 2001
From: Jiaan Geng <beliefer@163.com>
Date: Thu, 10 Feb 2022 21:32:18 +0800
Subject: [PATCH 207/513] [SPARK-38054][SQL] Supports list namespaces in JDBC
 v2 MySQL dialect

### What changes were proposed in this pull request?
Currently, `JDBCTableCatalog.scala` query namespaces show below.
```
      val schemaBuilder = ArrayBuilder.make[Array[String]]
      val rs = conn.getMetaData.getSchemas()
      while (rs.next()) {
        schemaBuilder += Array(rs.getString(1))
      }
      schemaBuilder.result
```

But the code cannot get any information when using MySQL JDBC driver.
This PR uses `SHOW SCHEMAS` to query namespaces of MySQL.
This PR also fix other issues below:

- Release the docker tests in `MySQLNamespaceSuite.scala`.
- Because MySQL doesn't support create comment of schema, let's throws `SQLFeatureNotSupportedException`.
- Because MySQL doesn't support `DROP SCHEMA` in `RESTRICT` mode, let's throws `SQLFeatureNotSupportedException`.
- Reactor `JdbcUtils.executeQuery` to avoid `java.sql.SQLException: Operation not allowed after ResultSet closed`.

### Why are the changes needed?
MySQL dialect supports query namespaces.

### Does this PR introduce _any_ user-facing change?
'Yes'.
Some API changed.

### How was this patch tested?
New tests.

Closes #35355 from beliefer/SPARK-38054.

Authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/jdbc/v2/MySQLNamespaceSuite.scala     | 48 ++++++++++--
 .../sql/jdbc/v2/V2JDBCNamespaceTest.scala     | 22 ++++--
 .../sql/errors/QueryExecutionErrors.scala     | 12 +++
 .../datasources/jdbc/JdbcUtils.scala          | 66 +++++++++-------
 .../v2/jdbc/JDBCTableCatalog.scala            | 26 +++----
 .../apache/spark/sql/jdbc/JdbcDialects.scala  | 41 +++++++++-
 .../apache/spark/sql/jdbc/MySQLDialect.scala  | 78 ++++++++++++++-----
 7 files changed, 218 insertions(+), 75 deletions(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
index d3230155b8923..d8dee61d70ea6 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
@@ -17,10 +17,12 @@
 
 package org.apache.spark.sql.jdbc.v2
 
-import java.sql.Connection
+import java.sql.{Connection, SQLFeatureNotSupportedException}
 
 import scala.collection.JavaConverters._
 
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.connector.catalog.NamespaceChange
 import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.tags.DockerTest
@@ -55,11 +57,47 @@ class MySQLNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespac
 
   override def dataPreparation(conn: Connection): Unit = {}
 
-  override def builtinNamespaces: Array[Array[String]] = Array()
+  override def builtinNamespaces: Array[Array[String]] =
+    Array(Array("information_schema"), Array("mysql"), Array("performance_schema"), Array("sys"))
+
+  override def listNamespaces(namespace: Array[String]): Array[Array[String]] = {
+    Array(builtinNamespaces.head, namespace) ++ builtinNamespaces.tail
+  }
 
   override val supportsSchemaComment: Boolean = false
 
-  // Cannot get namespaces with conn.getMetaData.getSchemas
-  // TODO testListNamespaces()
-  // TODO testDropNamespaces()
+  override val supportsDropSchemaRestrict: Boolean = false
+
+  testListNamespaces()
+  testDropNamespaces()
+
+  test("Create or remove comment of namespace unsupported") {
+    val e1 = intercept[AnalysisException] {
+      catalog.createNamespace(Array("foo"), Map("comment" -> "test comment").asJava)
+    }
+    assert(e1.getMessage.contains("Failed create name space: foo"))
+    assert(e1.getCause.isInstanceOf[SQLFeatureNotSupportedException])
+    assert(e1.getCause.asInstanceOf[SQLFeatureNotSupportedException].getMessage
+      .contains("Create namespace comment is not supported"))
+    assert(catalog.namespaceExists(Array("foo")) === false)
+    catalog.createNamespace(Array("foo"), Map.empty[String, String].asJava)
+    assert(catalog.namespaceExists(Array("foo")) === true)
+    val e2 = intercept[AnalysisException] {
+      catalog.alterNamespace(Array("foo"), NamespaceChange
+        .setProperty("comment", "comment for foo"))
+    }
+    assert(e2.getMessage.contains("Failed create comment on name space: foo"))
+    assert(e2.getCause.isInstanceOf[SQLFeatureNotSupportedException])
+    assert(e2.getCause.asInstanceOf[SQLFeatureNotSupportedException].getMessage
+      .contains("Create namespace comment is not supported"))
+    val e3 = intercept[AnalysisException] {
+      catalog.alterNamespace(Array("foo"), NamespaceChange.removeProperty("comment"))
+    }
+    assert(e3.getMessage.contains("Failed remove comment on name space: foo"))
+    assert(e3.getCause.isInstanceOf[SQLFeatureNotSupportedException])
+    assert(e3.getCause.asInstanceOf[SQLFeatureNotSupportedException].getMessage
+      .contains("Remove namespace comment is not supported"))
+    catalog.dropNamespace(Array("foo"), cascade = true)
+    assert(catalog.namespaceExists(Array("foo")) === false)
+  }
 }
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
index 8d97ac45568e3..bae0d7c361635 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
@@ -52,6 +52,8 @@ private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession with DockerInte
 
   def supportsDropSchemaCascade: Boolean = true
 
+  def supportsDropSchemaRestrict: Boolean = true
+
   def testListNamespaces(): Unit = {
     test("listNamespaces: basic behavior") {
       val commentMap = if (supportsSchemaComment) {
@@ -78,7 +80,11 @@ private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession with DockerInte
         assert(createCommentWarning === false)
       }
 
-      catalog.dropNamespace(Array("foo"), cascade = false)
+      if (supportsDropSchemaRestrict) {
+        catalog.dropNamespace(Array("foo"), cascade = false)
+      } else {
+        catalog.dropNamespace(Array("foo"), cascade = true)
+      }
       assert(catalog.namespaceExists(Array("foo")) === false)
       assert(catalog.listNamespaces() === builtinNamespaces)
       val msg = intercept[AnalysisException] {
@@ -99,15 +105,21 @@ private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession with DockerInte
       }
       catalog.createNamespace(Array("foo"), commentMap.asJava)
       assert(catalog.namespaceExists(Array("foo")) === true)
-      catalog.dropNamespace(Array("foo"), cascade = false)
+      if (supportsDropSchemaRestrict) {
+        catalog.dropNamespace(Array("foo"), cascade = false)
+      } else {
+        catalog.dropNamespace(Array("foo"), cascade = true)
+      }
       assert(catalog.namespaceExists(Array("foo")) === false)
 
       // Drop non empty namespace without cascade
-      catalog.createNamespace(Array("foo"), Map("comment" -> "test comment").asJava)
+      catalog.createNamespace(Array("foo"), commentMap.asJava)
       assert(catalog.namespaceExists(Array("foo")) === true)
       catalog.createTable(ident1, schema, Array.empty, emptyProps)
-      intercept[NonEmptyNamespaceException] {
-        catalog.dropNamespace(Array("foo"), cascade = false)
+      if (supportsDropSchemaRestrict) {
+        intercept[NonEmptyNamespaceException] {
+          catalog.dropNamespace(Array("foo"), cascade = false)
+        }
       }
 
       // Drop non empty namespace with cascade
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 361a7b421d511..c042c44b6ee34 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -1943,4 +1943,16 @@ object QueryExecutionErrors {
   def MultipleBucketTransformsError(): Throwable = {
     new UnsupportedOperationException("Multiple bucket transforms are not supported.")
   }
+
+  def unsupportedCreateNamespaceCommentError(): Throwable = {
+    new SQLFeatureNotSupportedException("Create namespace comment is not supported")
+  }
+
+  def unsupportedRemoveNamespaceCommentError(): Throwable = {
+    new SQLFeatureNotSupportedException("Remove namespace comment is not supported")
+  }
+
+  def unsupportedDropNamespaceRestrictError(): Throwable = {
+    new SQLFeatureNotSupportedException("Drop namespace restrict is not supported")
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 1a4e4aaf16da8..ed167c07756e2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -971,53 +971,57 @@ object JdbcUtils extends Logging with SQLConfHelper {
   }
 
   /**
-   * Creates a namespace.
+   * Creates a schema.
    */
-  def createNamespace(
+  def createSchema(
       conn: Connection,
       options: JDBCOptions,
-      namespace: String,
+      schema: String,
       comment: String): Unit = {
+    val statement = conn.createStatement
+    try {
+      statement.setQueryTimeout(options.queryTimeout)
+      val dialect = JdbcDialects.get(options.url)
+      dialect.createSchema(statement, schema, comment)
+    } finally {
+      statement.close()
+    }
+  }
+
+  def schemaExists(conn: Connection, options: JDBCOptions, schema: String): Boolean = {
+    val dialect = JdbcDialects.get(options.url)
+    dialect.schemasExists(conn, options, schema)
+  }
+
+  def listSchemas(conn: Connection, options: JDBCOptions): Array[Array[String]] = {
     val dialect = JdbcDialects.get(options.url)
-    executeStatement(conn, options, s"CREATE SCHEMA ${dialect.quoteIdentifier(namespace)}")
-    if (!comment.isEmpty) createNamespaceComment(conn, options, namespace, comment)
+    dialect.listSchemas(conn, options)
   }
 
-  def createNamespaceComment(
+  def alterSchemaComment(
       conn: Connection,
       options: JDBCOptions,
-      namespace: String,
+      schema: String,
       comment: String): Unit = {
     val dialect = JdbcDialects.get(options.url)
-    try {
-      executeStatement(
-        conn, options, dialect.getSchemaCommentQuery(namespace, comment))
-    } catch {
-      case e: Exception =>
-        logWarning("Cannot create JDBC catalog comment. The catalog comment will be ignored.")
-    }
+    executeStatement(conn, options, dialect.getSchemaCommentQuery(schema, comment))
   }
 
-  def removeNamespaceComment(
+  def removeSchemaComment(
       conn: Connection,
       options: JDBCOptions,
-      namespace: String): Unit = {
+      schema: String): Unit = {
     val dialect = JdbcDialects.get(options.url)
-    try {
-      executeStatement(conn, options, dialect.removeSchemaCommentQuery(namespace))
-    } catch {
-      case e: Exception =>
-        logWarning("Cannot drop JDBC catalog comment.")
-    }
+    executeStatement(conn, options, dialect.removeSchemaCommentQuery(schema))
   }
 
   /**
-   * Drops a namespace from the JDBC database.
+   * Drops a schema from the JDBC database.
    */
-  def dropNamespace(
-      conn: Connection, options: JDBCOptions, namespace: String, cascade: Boolean): Unit = {
+  def dropSchema(
+      conn: Connection, options: JDBCOptions, schema: String, cascade: Boolean): Unit = {
     val dialect = JdbcDialects.get(options.url)
-    executeStatement(conn, options, dialect.dropSchema(namespace, cascade))
+    executeStatement(conn, options, dialect.dropSchema(schema, cascade))
   }
 
   /**
@@ -1148,11 +1152,17 @@ object JdbcUtils extends Logging with SQLConfHelper {
     }
   }
 
-  def executeQuery(conn: Connection, options: JDBCOptions, sql: String): ResultSet = {
+  def executeQuery(conn: Connection, options: JDBCOptions, sql: String)(
+    f: ResultSet => Unit): Unit = {
     val statement = conn.createStatement
     try {
       statement.setQueryTimeout(options.queryTimeout)
-      statement.executeQuery(sql)
+      val rs = statement.executeQuery(sql)
+      try {
+        f(rs)
+      } finally {
+        rs.close()
+      }
     } finally {
       statement.close()
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
index d06a28d952b38..03200d5a6f371 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
@@ -21,7 +21,6 @@ import java.util
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
-import scala.collection.mutable.ArrayBuilder
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.connector.catalog.{Identifier, NamespaceChange, SupportsNamespaces, Table, TableCatalog, TableChange}
@@ -173,23 +172,14 @@ class JDBCTableCatalog extends TableCatalog with SupportsNamespaces with Logging
   override def namespaceExists(namespace: Array[String]): Boolean = namespace match {
     case Array(db) =>
       JdbcUtils.withConnection(options) { conn =>
-        val rs = conn.getMetaData.getSchemas(null, db)
-        while (rs.next()) {
-          if (rs.getString(1) == db) return true;
-        }
-        false
+        JdbcUtils.schemaExists(conn, options, db)
       }
     case _ => false
   }
 
   override def listNamespaces(): Array[Array[String]] = {
     JdbcUtils.withConnection(options) { conn =>
-      val schemaBuilder = ArrayBuilder.make[Array[String]]
-      val rs = conn.getMetaData.getSchemas()
-      while (rs.next()) {
-        schemaBuilder += Array(rs.getString(1))
-      }
-      schemaBuilder.result
+      JdbcUtils.listSchemas(conn, options)
     }
   }
 
@@ -236,7 +226,7 @@ class JDBCTableCatalog extends TableCatalog with SupportsNamespaces with Logging
       }
       JdbcUtils.withConnection(options) { conn =>
         JdbcUtils.classifyException(s"Failed create name space: $db", dialect) {
-          JdbcUtils.createNamespace(conn, options, db, comment)
+          JdbcUtils.createSchema(conn, options, db, comment)
         }
       }
 
@@ -254,7 +244,9 @@ class JDBCTableCatalog extends TableCatalog with SupportsNamespaces with Logging
           case set: NamespaceChange.SetProperty =>
             if (set.property() == SupportsNamespaces.PROP_COMMENT) {
               JdbcUtils.withConnection(options) { conn =>
-                JdbcUtils.createNamespaceComment(conn, options, db, set.value)
+                JdbcUtils.classifyException(s"Failed create comment on name space: $db", dialect) {
+                  JdbcUtils.alterSchemaComment(conn, options, db, set.value)
+                }
               }
             } else {
               throw QueryCompilationErrors.cannotSetJDBCNamespaceWithPropertyError(set.property)
@@ -263,7 +255,9 @@ class JDBCTableCatalog extends TableCatalog with SupportsNamespaces with Logging
           case unset: NamespaceChange.RemoveProperty =>
             if (unset.property() == SupportsNamespaces.PROP_COMMENT) {
               JdbcUtils.withConnection(options) { conn =>
-                JdbcUtils.removeNamespaceComment(conn, options, db)
+                JdbcUtils.classifyException(s"Failed remove comment on name space: $db", dialect) {
+                  JdbcUtils.removeSchemaComment(conn, options, db)
+                }
               }
             } else {
               throw QueryCompilationErrors.cannotUnsetJDBCNamespaceWithPropertyError(unset.property)
@@ -284,7 +278,7 @@ class JDBCTableCatalog extends TableCatalog with SupportsNamespaces with Logging
     case Array(db) if namespaceExists(namespace) =>
       JdbcUtils.withConnection(options) { conn =>
         JdbcUtils.classifyException(s"Failed drop name space: $db", dialect) {
-          JdbcUtils.dropNamespace(conn, options, db, cascade)
+          JdbcUtils.dropSchema(conn, options, db, cascade)
           true
         }
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 8a5cf27b53791..2d10bbf5de537 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.jdbc
 
-import java.sql.{Connection, Date, Timestamp}
+import java.sql.{Connection, Date, Statement, Timestamp}
 import java.time.{Instant, LocalDate}
 import java.util
 
@@ -256,6 +256,45 @@ abstract class JdbcDialect extends Serializable with Logging{
     }
   }
 
+  /**
+   * Create schema with an optional comment. Empty string means no comment.
+   */
+  def createSchema(statement: Statement, schema: String, comment: String): Unit = {
+    val schemaCommentQuery = if (comment.nonEmpty) {
+      // We generate comment query here so that it can fail earlier without creating the schema.
+      getSchemaCommentQuery(schema, comment)
+    } else {
+      comment
+    }
+    statement.executeUpdate(s"CREATE SCHEMA ${quoteIdentifier(schema)}")
+    if (comment.nonEmpty) {
+      statement.executeUpdate(schemaCommentQuery)
+    }
+  }
+
+  /**
+   * Check schema exists or not.
+   */
+  def schemasExists(conn: Connection, options: JDBCOptions, schema: String): Boolean = {
+    val rs = conn.getMetaData.getSchemas(null, schema)
+    while (rs.next()) {
+      if (rs.getString(1) == schema) return true;
+    }
+    false
+  }
+
+  /**
+   * Lists all the schemas in this table.
+   */
+  def listSchemas(conn: Connection, options: JDBCOptions): Array[Array[String]] = {
+    val schemaBuilder = ArrayBuilder.make[Array[String]]
+    val rs = conn.getMetaData.getSchemas()
+    while (rs.next()) {
+      schemaBuilder += Array(rs.getString(1))
+    }
+    schemaBuilder.result
+  }
+
   /**
    * Return Some[true] iff `TRUNCATE TABLE` causes cascading default.
    * Some[true] : TRUNCATE TABLE causes cascading.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index 9fcb7a27d17af..3cca81048e812 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -21,6 +21,8 @@ import java.sql.{Connection, SQLException, Types}
 import java.util
 import java.util.Locale
 
+import scala.collection.mutable.ArrayBuilder
+
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NoSuchIndexException}
@@ -76,6 +78,25 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
     s"`$colName`"
   }
 
+  override def schemasExists(conn: Connection, options: JDBCOptions, schema: String): Boolean = {
+    listSchemas(conn, options).exists(_.head == schema)
+  }
+
+  override def listSchemas(conn: Connection, options: JDBCOptions): Array[Array[String]] = {
+    val schemaBuilder = ArrayBuilder.make[Array[String]]
+    try {
+      JdbcUtils.executeQuery(conn, options, "SHOW SCHEMAS") { rs =>
+        while (rs.next()) {
+          schemaBuilder += Array(rs.getString("Database"))
+        }
+      }
+    } catch {
+      case _: Exception =>
+        logWarning("Cannot show schemas.")
+    }
+    schemaBuilder.result
+  }
+
   override def getTableExistsQuery(table: String): String = {
     s"SELECT 1 FROM $table LIMIT 1"
   }
@@ -134,6 +155,14 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
     case _ => JdbcUtils.getCommonJDBCType(dt)
   }
 
+  override def getSchemaCommentQuery(schema: String, comment: String): String = {
+    throw QueryExecutionErrors.unsupportedCreateNamespaceCommentError()
+  }
+
+  override def removeSchemaCommentQuery(schema: String): String = {
+    throw QueryExecutionErrors.unsupportedRemoveNamespaceCommentError()
+  }
+
   // CREATE INDEX syntax
   // https://dev.mysql.com/doc/refman/8.0/en/create-index.html
   override def createIndex(
@@ -175,26 +204,27 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
     val sql = s"SHOW INDEXES FROM $tableName"
     var indexMap: Map[String, TableIndex] = Map()
     try {
-      val rs = JdbcUtils.executeQuery(conn, options, sql)
-      while (rs.next()) {
-        val indexName = rs.getString("key_name")
-        val colName = rs.getString("column_name")
-        val indexType = rs.getString("index_type")
-        val indexComment = rs.getString("Index_comment")
-        if (indexMap.contains(indexName)) {
-          val index = indexMap.get(indexName).get
-          val newIndex = new TableIndex(indexName, indexType,
-            index.columns() :+ FieldReference(colName),
-            index.columnProperties, index.properties)
-          indexMap += (indexName -> newIndex)
-        } else {
-          // The only property we are building here is `COMMENT` because it's the only one
-          // we can get from `SHOW INDEXES`.
-          val properties = new util.Properties();
-          if (indexComment.nonEmpty) properties.put("COMMENT", indexComment)
-          val index = new TableIndex(indexName, indexType, Array(FieldReference(colName)),
-            new util.HashMap[NamedReference, util.Properties](), properties)
-          indexMap += (indexName -> index)
+      JdbcUtils.executeQuery(conn, options, sql) { rs =>
+        while (rs.next()) {
+          val indexName = rs.getString("key_name")
+          val colName = rs.getString("column_name")
+          val indexType = rs.getString("index_type")
+          val indexComment = rs.getString("Index_comment")
+          if (indexMap.contains(indexName)) {
+            val index = indexMap.get(indexName).get
+            val newIndex = new TableIndex(indexName, indexType,
+              index.columns() :+ FieldReference(colName),
+              index.columnProperties, index.properties)
+            indexMap += (indexName -> newIndex)
+          } else {
+            // The only property we are building here is `COMMENT` because it's the only one
+            // we can get from `SHOW INDEXES`.
+            val properties = new util.Properties();
+            if (indexComment.nonEmpty) properties.put("COMMENT", indexComment)
+            val index = new TableIndex(indexName, indexType, Array(FieldReference(colName)),
+              new util.HashMap[NamedReference, util.Properties](), properties)
+            indexMap += (indexName -> index)
+          }
         }
       }
     } catch {
@@ -219,4 +249,12 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
       case _ => super.classifyException(message, e)
     }
   }
+
+  override def dropSchema(schema: String, cascade: Boolean): String = {
+    if (cascade) {
+      s"DROP SCHEMA ${quoteIdentifier(schema)}"
+    } else {
+      throw QueryExecutionErrors.unsupportedDropNamespaceRestrictError()
+    }
+  }
 }

From 344fd5c0b3958da881aa2775689f620aa25c1de4 Mon Sep 17 00:00:00 2001
From: weixiuli <weixiuli@jd.com>
Date: Thu, 10 Feb 2022 10:32:23 -0600
Subject: [PATCH 208/513] [MINOR][CORE] Fix the method description of refill

### What changes were proposed in this pull request?

Fix the method description of refill.

### Why are the changes needed?
Easy to understand.
### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?

Existing tests.

Closes #35307 from weixiuli/SPARK-38008.

Authored-by: weixiuli <weixiuli@jd.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../java/org/apache/spark/io/NioBufferedFileInputStream.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java b/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
index 7ca5ade7b9a74..91910b99ac999 100644
--- a/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
+++ b/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
@@ -49,7 +49,7 @@ public NioBufferedFileInputStream(File file) throws IOException {
   }
 
   /**
-   * Checks weather data is left to be read from the input stream.
+   * Checks whether data is left to be read from the input stream.
    * @return true if data is left, false otherwise
    */
   private boolean refill() throws IOException {

From 75122f3518b8355b5f59c06954faa1834347af87 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 10 Feb 2022 08:41:54 -0800
Subject: [PATCH 209/513] [SPARK-38171][BUILD][SQL] Upgrade ORC to 1.7.3

### What changes were proposed in this pull request?

This PR aims to upgrade Apache ORC dependency to 1.7.3.

### Why are the changes needed?

Apache ORC 1.7.3 is the 3rd maintenance release.
- https://orc.apache.org/news/2022/02/09/ORC-1.7.3/
- https://github.com/apache/orc/releases/tag/v1.7.3

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

Closes #35474 from dongjoon-hyun/SPARK-38171.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2-hive-2.3 | 6 +++---
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 6 +++---
 pom.xml                               | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index 1925ae0a92b5d..bebd54ec64a9e 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -219,9 +219,9 @@ objenesis/3.2//objenesis-3.2.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
 opencsv/2.3//opencsv-2.3.jar
-orc-core/1.7.2//orc-core-1.7.2.jar
-orc-mapreduce/1.7.2//orc-mapreduce-1.7.2.jar
-orc-shims/1.7.2//orc-shims-1.7.2.jar
+orc-core/1.7.3//orc-core-1.7.3.jar
+orc-mapreduce/1.7.3//orc-mapreduce-1.7.3.jar
+orc-shims/1.7.3//orc-shims-1.7.3.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index dad4817239ee3..ee756f6eaaae5 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -205,9 +205,9 @@ objenesis/3.2//objenesis-3.2.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
 opencsv/2.3//opencsv-2.3.jar
-orc-core/1.7.2//orc-core-1.7.2.jar
-orc-mapreduce/1.7.2//orc-mapreduce-1.7.2.jar
-orc-shims/1.7.2//orc-shims-1.7.2.jar
+orc-core/1.7.3//orc-core-1.7.3.jar
+orc-mapreduce/1.7.3//orc-mapreduce-1.7.3.jar
+orc-shims/1.7.3//orc-shims-1.7.3.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
diff --git a/pom.xml b/pom.xml
index 4e1e5cc70b287..632bad6c9aaa2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -137,7 +137,7 @@
     <!-- After 10.15.1.3, the minimum required version is JDK9 -->
     <derby.version>10.14.2.0</derby.version>
     <parquet.version>1.12.2</parquet.version>
-    <orc.version>1.7.2</orc.version>
+    <orc.version>1.7.3</orc.version>
     <jetty.version>9.4.44.v20210927</jetty.version>
     <jakartaservlet.version>4.0.3</jakartaservlet.version>
     <chill.version>0.10.0</chill.version>

From 0ae8c39da4917fdb07d0a019732b644406307525 Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Thu, 10 Feb 2022 12:48:09 -0800
Subject: [PATCH 210/513] [SPARK-38134][BUILD] Upgrade Apache Arrow to 7.0.0

### What changes were proposed in this pull request?

Upgrade Apache Arrow to 7.0.0.

### Why are the changes needed?

To pick up new improvements & bug fixes from the latest release.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #35449 from sunchao/SPARK-38134.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2-hive-2.3 | 8 ++++----
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 8 ++++----
 pom.xml                               | 2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index bebd54ec64a9e..50480e4ab6930 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -17,10 +17,10 @@ api-asn1-api/1.0.0-M20//api-asn1-api-1.0.0-M20.jar
 api-util/1.0.0-M20//api-util-1.0.0-M20.jar
 arpack/2.2.1//arpack-2.2.1.jar
 arpack_combined_all/0.1//arpack_combined_all-0.1.jar
-arrow-format/6.0.1//arrow-format-6.0.1.jar
-arrow-memory-core/6.0.1//arrow-memory-core-6.0.1.jar
-arrow-memory-netty/6.0.1//arrow-memory-netty-6.0.1.jar
-arrow-vector/6.0.1//arrow-vector-6.0.1.jar
+arrow-format/7.0.0//arrow-format-7.0.0.jar
+arrow-memory-core/7.0.0//arrow-memory-core-7.0.0.jar
+arrow-memory-netty/7.0.0//arrow-memory-netty-7.0.0.jar
+arrow-vector/7.0.0//arrow-vector-7.0.0.jar
 audience-annotations/0.5.0//audience-annotations-0.5.0.jar
 automaton/1.11-8//automaton-1.11-8.jar
 avro-ipc/1.11.0//avro-ipc-1.11.0.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index ee756f6eaaae5..13b23c06cf647 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -17,10 +17,10 @@ antlr4-runtime/4.8//antlr4-runtime-4.8.jar
 aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
 arpack/2.2.1//arpack-2.2.1.jar
 arpack_combined_all/0.1//arpack_combined_all-0.1.jar
-arrow-format/6.0.1//arrow-format-6.0.1.jar
-arrow-memory-core/6.0.1//arrow-memory-core-6.0.1.jar
-arrow-memory-netty/6.0.1//arrow-memory-netty-6.0.1.jar
-arrow-vector/6.0.1//arrow-vector-6.0.1.jar
+arrow-format/7.0.0//arrow-format-7.0.0.jar
+arrow-memory-core/7.0.0//arrow-memory-core-7.0.0.jar
+arrow-memory-netty/7.0.0//arrow-memory-netty-7.0.0.jar
+arrow-vector/7.0.0//arrow-vector-7.0.0.jar
 audience-annotations/0.5.0//audience-annotations-0.5.0.jar
 automaton/1.11-8//automaton-1.11-8.jar
 avro-ipc/1.11.0//avro-ipc-1.11.0.jar
diff --git a/pom.xml b/pom.xml
index 632bad6c9aaa2..04feb2aca1cca 100644
--- a/pom.xml
+++ b/pom.xml
@@ -201,7 +201,7 @@
     If you are changing Arrow version specification, please check
     ./python/pyspark/sql/pandas/utils.py, and ./python/setup.py too.
     -->
-    <arrow.version>6.0.1</arrow.version>
+    <arrow.version>7.0.0</arrow.version>
     <!-- org.fusesource.leveldbjni will be used except on arm64 platform. -->
     <leveldbjni.group>org.fusesource.leveldbjni</leveldbjni.group>
     <kubernetes-client.version>5.12.0</kubernetes-client.version>

From 93251ed77ea1c5d037c64d2292b8760b03c8e181 Mon Sep 17 00:00:00 2001
From: Zimo Li <7163127+lzm0@users.noreply.github.com>
Date: Thu, 10 Feb 2022 17:01:41 -0800
Subject: [PATCH 211/513] [MINOR][K8S][DOCS] Fix typo in K8s conf
 `deleteOnTermination`

### What changes were proposed in this pull request?

There is a grammatical mistake in the doc for the config `spark.kubernetes.driver.service.deleteOnTermination`

### Why are the changes needed?

Fix typo

### Does this PR introduce _any_ user-facing change?

yes
previously:
> If true, driver service will be deleted on Spark application termination. If false, it will be cleaned up when the driver pod is **deletion**.

corrected:
> If true, driver service will be deleted on Spark application termination. If false, it will be cleaned up when the driver pod is **deleted**.

### How was this patch tested?

No tests are needed since it's only documentation changes.

Closes #35482 from lzm0/patch-1.

Authored-by: Zimo Li <7163127+lzm0@users.noreply.github.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../src/main/scala/org/apache/spark/deploy/k8s/Config.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index 65a8f82699665..bd6bd93ca3da4 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -58,7 +58,7 @@ private[spark] object Config extends Logging {
   val KUBERNETES_DRIVER_SERVICE_DELETE_ON_TERMINATION =
     ConfigBuilder("spark.kubernetes.driver.service.deleteOnTermination")
       .doc("If true, driver service will be deleted on Spark application termination. " +
-        "If false, it will be cleaned up when the driver pod is deletion.")
+        "If false, it will be cleaned up when the driver pod is deleted.")
       .version("3.2.0")
       .booleanConf
       .createWithDefault(true)

From 7ed51bbbe5fd571bcbd95c78f66532eda83e2d8a Mon Sep 17 00:00:00 2001
From: yaohua <yaohua.zhao@databricks.com>
Date: Fri, 11 Feb 2022 09:56:56 +0800
Subject: [PATCH 212/513] [SPARK-38159][SQL] Add a new
 FileSourceMetadataAttribute for the Hidden File Metadata

### What changes were proposed in this pull request?
Add a new `FileSourceMetadataAttribute` object with an `apply` method to create a `FileSourceMetadataAttribute`, and an `unapply` method to match only file source metadata attribute.

### Why are the changes needed?
Extra safeguard to make sure it matches file source metadata attribute.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing UTs

Closes #35459 from Yaohua628/spark-38159.

Authored-by: yaohua <yaohua.zhao@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/namedExpressions.scala        | 29 +++++++++++++++++--
 .../sql/execution/DataSourceScanExec.scala    |  8 +++--
 .../execution/datasources/FileFormat.scala    |  3 +-
 .../datasources/FileSourceStrategy.scala      |  4 +--
 .../PartitioningAwareFileIndex.scala          |  2 +-
 .../execution/datasources/SchemaPruning.scala |  2 +-
 6 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 06ca139870804..248584b3d9dcd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -434,8 +434,8 @@ object VirtualColumn {
 }
 
 /**
- * The internal representation of the hidden metadata struct:
- * set `__metadata_col` to `true` in AttributeReference metadata
+ * The internal representation of the MetadataAttribute,
+ * it sets `__metadata_col` to `true` in AttributeReference metadata
  * - apply() will create a metadata attribute reference
  * - unapply() will check if an attribute reference is the metadata attribute reference
  */
@@ -451,3 +451,28 @@ object MetadataAttribute {
     } else None
   }
 }
+
+/**
+ * The internal representation of the FileSourceMetadataAttribute, it sets `__metadata_col`
+ * and `__file_source_metadata_col` to `true` in AttributeReference's metadata
+ * - apply() will create a file source metadata attribute reference
+ * - unapply() will check if an attribute reference is the file source metadata attribute reference
+ */
+object FileSourceMetadataAttribute {
+
+  val FILE_SOURCE_METADATA_COL_ATTR_KEY = "__file_source_metadata_col"
+
+  def apply(name: String, dataType: DataType, nullable: Boolean = true): AttributeReference =
+    AttributeReference(name, dataType, nullable,
+      new MetadataBuilder()
+        .putBoolean(METADATA_COL_ATTR_KEY, value = true)
+        .putBoolean(FILE_SOURCE_METADATA_COL_ATTR_KEY, value = true).build())()
+
+  def unapply(attr: AttributeReference): Option[AttributeReference] =
+    attr match {
+      case MetadataAttribute(attr)
+        if attr.metadata.contains(FILE_SOURCE_METADATA_COL_ATTR_KEY)
+          && attr.metadata.getBoolean(FILE_SOURCE_METADATA_COL_ATTR_KEY) => Some(attr)
+      case _ => None
+    }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 443553f6ade03..db86b382235f7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -200,7 +200,7 @@ case class FileSourceScanExec(
   extends DataSourceScanExec {
 
   lazy val metadataColumns: Seq[AttributeReference] =
-    output.collect { case MetadataAttribute(attr) => attr }
+    output.collect { case FileSourceMetadataAttribute(attr) => attr }
 
   // Note that some vals referring the file-based relation are lazy intentionally
   // so that this plan can be canonicalized on executor side too. See SPARK-23731.
@@ -366,9 +366,11 @@ case class FileSourceScanExec(
   @transient
   private lazy val pushedDownFilters = {
     val supportNestedPredicatePushdown = DataSourceUtils.supportNestedPredicatePushdown(relation)
-    // TODO: should be able to push filters containing metadata columns down to skip files
+    // `dataFilters` should not include any metadata col filters
+    // because the metadata struct has been flatted in FileSourceStrategy
+    // and thus metadata col filters are invalid to be pushed down
     dataFilters.filterNot(_.references.exists {
-      case MetadataAttribute(_) => true
+      case FileSourceMetadataAttribute(_) => true
       case _ => false
     }).flatMap(DataSourceStrategy.translateFilter(_, supportNestedPredicatePushdown))
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
index 02d88e9ffa43c..f9b37fb5d9fcc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
@@ -192,7 +192,8 @@ object FileFormat {
     .add(StructField(FILE_MODIFICATION_TIME, TimestampType))
 
   // create a file metadata struct col
-  def createFileMetadataCol: AttributeReference = MetadataAttribute(METADATA_NAME, METADATA_STRUCT)
+  def createFileMetadataCol: AttributeReference =
+    FileSourceMetadataAttribute(METADATA_NAME, METADATA_STRUCT)
 
   // create an internal row given required metadata fields and file information
   def createMetadataInternalRow(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index 5df8057ea92fe..9356e46a69187 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -214,12 +214,12 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging {
       logInfo(s"Output Data Schema: ${outputSchema.simpleString(5)}")
 
       val metadataStructOpt = l.output.collectFirst {
-        case MetadataAttribute(attr) => attr
+        case FileSourceMetadataAttribute(attr) => attr
       }
 
       val metadataColumns = metadataStructOpt.map { metadataStruct =>
         metadataStruct.dataType.asInstanceOf[StructType].fields.map { field =>
-          MetadataAttribute(field.name, field.dataType)
+          FileSourceMetadataAttribute(field.name, field.dataType)
         }.toSeq
       }.getOrElse(Seq.empty)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index 9b56bcf35365a..35cd7c2715869 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -75,7 +75,7 @@ abstract class PartitioningAwareFileIndex(
 
     // retrieve the file metadata filters and reduce to a final filter expression
     val fileMetadataFilterOpt = dataFilters.filter(_.references.forall {
-      case MetadataAttribute(_) => true
+      case FileSourceMetadataAttribute(_) => true
       case _ => false
     }).reduceOption(expressions.And)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
index 9dd2f40972ad8..a49c10c852b08 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
@@ -77,7 +77,7 @@ object SchemaPruning extends Rule[LogicalPlan] {
       }
 
       val metadataSchema =
-        relation.output.collect { case MetadataAttribute(attr) => attr }.toStructType
+        relation.output.collect { case FileSourceMetadataAttribute(attr) => attr }.toStructType
       val prunedMetadataSchema = if (metadataSchema.nonEmpty) {
         pruneSchema(metadataSchema, requestedRootFields)
       } else {

From 23cd3190f7453010a0311b8639158e8b586997e2 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 10 Feb 2022 19:17:15 -0800
Subject: [PATCH 213/513] [SPARK-38125][BUILD][SQL] Use static factory methods
 instead of the deprecated `Byte/Short/Integer/Long` constructors

### What changes were proposed in this pull request?

This PR aims to use static factor methods instead of the deprecated `Integer` constructors and add Java/Scala linter rules to enforce new styles.

### Why are the changes needed?

`Byte/Short/Integer/Long` constructors are deprecated in Java 9.
- https://docs.oracle.com/javase/9/docs/api/java/lang/Byte.html#Byte-byte-
- https://docs.oracle.com/javase/9/docs/api/java/lang/Byte.html#Byte-java.lang.String-
- https://docs.oracle.com/javase/9/docs/api/java/lang/Short.html#Short-short-
- https://docs.oracle.com/javase/9/docs/api/java/lang/Short.html#Short-java.lang.String-
- https://docs.oracle.com/javase/9/docs/api/java/lang/Integer.html#Integer-int-
- https://docs.oracle.com/javase/9/docs/api/java/lang/Integer.html#Integer-java.lang.String-
- https://docs.oracle.com/javase/9/docs/api/java/lang/Long.html#Long-long-
- https://docs.oracle.com/javase/9/docs/api/java/lang/Long.html#Long-java.lang.String-

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs with the newly added Scalastyle and Java Checkstyle.

Closes #35414 from dongjoon-hyun/SPARK-38125.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/checkstyle.xml                                           | 4 ++++
 scalastyle-config.xml                                        | 5 +++++
 .../java/test/org/apache/spark/sql/JavaDatasetSuite.java     | 4 ++--
 .../scala/org/apache/spark/sql/internal/SQLConfSuite.scala   | 2 +-
 .../spark/sql/hive/thriftserver/SparkSQLCLIService.scala     | 4 ++--
 5 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/dev/checkstyle.xml b/dev/checkstyle.xml
index b6abfb57c2019..6c93ff94fd9f2 100644
--- a/dev/checkstyle.xml
+++ b/dev/checkstyle.xml
@@ -189,6 +189,10 @@
             <property name="format" value="Objects\.toStringHelper"/>
             <property name="message" value="Avoid using Object.toStringHelper. Use ToStringBuilder instead." />
         </module>
+        <module name="RegexpSinglelineJava">
+            <property name="format" value="new (java\.lang\.)?(Byte|Integer|Long|Short)\("/>
+            <property name="message" value="Use static factory 'valueOf' or 'parseXXX' instead of the deprecated constructors." />
+        </module>
         <module name="IllegalImport">
             <property name="illegalPkgs" value="org.apache.log4j" />
         </module>
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index 32f1f147f3eed..9585785835d62 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -324,6 +324,11 @@ This file is divided into 3 sections:
     <customMessage>Omit braces in case clauses.</customMessage>
   </check>
 
+  <check level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">new (java\.lang\.)?(Byte|Integer|Long|Short)\(</parameter></parameters>
+    <customMessage>Use static factory 'valueOf' or 'parseXXX' instead of the deprecated constructors.</customMessage>
+  </check>
+
   <!-- SPARK-16877: Avoid Java annotations -->
   <check level="error" class="org.scalastyle.scalariform.OverrideJavaChecker" enabled="true"></check>
 
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
index 6ff53a84f328c..22978fb8c286e 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
@@ -279,7 +279,7 @@ public void testMappingFunctionWithTestGroupState() throws Exception {
     Assert.assertTrue(prevState.isUpdated());
     Assert.assertFalse(prevState.isRemoved());
     Assert.assertTrue(prevState.exists());
-    Assert.assertEquals(new Integer(9), prevState.get());
+    Assert.assertEquals(Integer.valueOf(9), prevState.get());
     Assert.assertEquals(0L, prevState.getCurrentProcessingTimeMs());
     Assert.assertEquals(1000L, prevState.getCurrentWatermarkMs());
     Assert.assertEquals(Optional.of(1500L), prevState.getTimeoutTimestampMs());
@@ -289,7 +289,7 @@ public void testMappingFunctionWithTestGroupState() throws Exception {
     Assert.assertTrue(prevState.isUpdated());
     Assert.assertFalse(prevState.isRemoved());
     Assert.assertTrue(prevState.exists());
-    Assert.assertEquals(new Integer(18), prevState.get());
+    Assert.assertEquals(Integer.valueOf(18), prevState.get());
 
     prevState = TestGroupState.create(
       Optional.of(9), GroupStateTimeout.EventTimeTimeout(), 0L, Optional.of(1000L), true);
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
index abde486b2db2b..a589d4ee3e3c4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
@@ -469,7 +469,7 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
       if (i == 0) {
         assert(zone === "Z")
       } else {
-        assert(zone === String.format("%+03d:00", new Integer(i)))
+        assert(zone === String.format("%+03d:00", Integer.valueOf(i)))
       }
     }
     val e2 = intercept[ParseException](sql("set time zone interval 19 hours"))
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
index bc4b64c287e6c..d2c0235a23f21 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
@@ -138,7 +138,7 @@ private[thriftserver] trait ReflectedCompositeService { this: AbstractService =>
         serviceStartCount += 1
       }
       // Emulating `AbstractService.start`
-      val startTime = new java.lang.Long(System.currentTimeMillis())
+      val startTime = java.lang.Long.valueOf(System.currentTimeMillis())
       setAncestorField(this, 3, "startTime", startTime)
       invoke(classOf[AbstractService], this, "ensureCurrentState", classOf[STATE] -> STATE.INITED)
       invoke(classOf[AbstractService], this, "changeState", classOf[STATE] -> STATE.STARTED)
@@ -147,7 +147,7 @@ private[thriftserver] trait ReflectedCompositeService { this: AbstractService =>
       case NonFatal(e) =>
       logError(s"Error starting services $getName", e)
       invoke(classOf[CompositeService], this, "stop",
-        classOf[Int] -> new Integer(serviceStartCount))
+        classOf[Int] -> Integer.valueOf(serviceStartCount))
       throw HiveThriftServerErrors.failedToStartServiceError(getName, e)
     }
   }

From 93a9464b6814b36dd9ff056800143bc700976abf Mon Sep 17 00:00:00 2001
From: Yun Tang <myasuka@live.com>
Date: Thu, 10 Feb 2022 19:33:40 -0800
Subject: [PATCH 214/513] [SPARK-38178][SS] Correct the logic to measure the
 memory usage of RocksDB

### What changes were proposed in this pull request?

Correct the logic to measure the memory usage of RocksDB to include the memory used by block cache.
As "block-cache-pinned-usage" is included in "block-cache-usage", we don't need to sum the pinned usage separately.

### Why are the changes needed?

Current reported metrics of RocksDB memory usage is not correct.

### Does this PR introduce _any_ user-facing change?

NO

### How was this patch tested?

The information could refer to https://github.com/facebook/rocksdb/wiki/Memory-usage-in-RocksDB

Closes #35480 from Myasuka/rocksdb-mem-usage.

Authored-by: Yun Tang <myasuka@live.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../apache/spark/sql/execution/streaming/state/RocksDB.scala   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
index ea25342cc8a1c..0ad03169d2053 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
@@ -370,6 +370,7 @@ class RocksDB(
     val totalSSTFilesBytes = getDBProperty("rocksdb.total-sst-files-size")
     val readerMemUsage = getDBProperty("rocksdb.estimate-table-readers-mem")
     val memTableMemUsage = getDBProperty("rocksdb.size-all-mem-tables")
+    val blockCacheUsage = getDBProperty("rocksdb.block-cache-usage")
     val nativeOpsHistograms = Seq(
       "get" -> DB_GET,
       "put" -> DB_WRITE,
@@ -403,7 +404,7 @@ class RocksDB(
     RocksDBMetrics(
       numKeysOnLoadedVersion,
       numKeysOnWritingVersion,
-      readerMemUsage + memTableMemUsage,
+      readerMemUsage + memTableMemUsage + blockCacheUsage,
       totalSSTFilesBytes,
       nativeOpsLatencyMicros.toMap,
       commitLatencyMs,

From a2d7a23be2004cca7346b43785f35a2088ab83ea Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Fri, 11 Feb 2022 11:56:50 +0800
Subject: [PATCH 215/513] [SPARK-38176][SQL] ANSI mode: allow implicitly
 casting String to other simple types

### What changes were proposed in this pull request?

Compared to the default behavior, the current ANSI type coercion rules don't allow the following cases:

- Comparing String with other simple types, e.g. `str_col > date'2022-01-01'`
- Arithmetic operation containing String and other simple types
- Union/Intersect/Except containing String and other simple types
- SQL function expects non-string types but got  string input
- other SQL operators..

This PR is to remove the limitation. After changes, the String type can be implicit cast as Long/Double/Date/Timestamp/Boolean/Binary.

Note that Byte/Short/Int is not on the precedent list of String: `str_col > 1` will become `cast(str_col as long) > 1L`. So that we can avoid string parsing error if the string is out of the range of Byte/Short/Int in comparison/arithmetic/union operations.
The design applies to Float/Decimal (especially Decimal), for SQL operators containing Float/Decimal and String, the type coercion system will convert both as Double.
![image](https://user-images.githubusercontent.com/1097932/153430898-1f4eca1e-d72b-4714-831f-ff697a046f93.png)

### Why are the changes needed?

The purpose of the current limitation is to prevent potential String parsing errors under ANSI mode. However, after doing research among real-world Spark SQL queries, I find that **many users are actually using String as Date/Timestamp/Numeric/etc in their queries**.  For example, the purpose of query `where str_col > date'2022-01-01'` is quite obvious, but users have to rewrite it as `where cast(str_col as date) > date'2022-01-01'` under ANSI mode.
To make the migration to ANSI mode easier, I suggest removing this limitation. Let's treat it as an extension in our SQL dialect.

### Does this PR introduce _any_ user-facing change?

Yes, allow implicitly casting String to other simple types under ANSI mode

### How was this patch tested?

Unit tests

Closes #35478 from gengliangwang/allowStringCoercion.

Lead-authored-by: Gengliang Wang <gengliang@apache.org>
Co-authored-by: Gengliang Wang <ltnwgl@gmail.com>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 docs/img/type-precedence-list.png             | Bin 133793 -> 93171 bytes
 docs/sql-ref-ansi-compliance.md               |  57 +++++----
 .../catalyst/analysis/AnsiTypeCoercion.scala  | 101 +++++++--------
 .../analysis/AnsiTypeCoercionSuite.scala      | 120 ++++++------------
 .../catalyst/analysis/TypeCoercionSuite.scala |   1 +
 .../sql-tests/results/ansi/date.sql.out       |  33 ++---
 .../sql-tests/results/ansi/interval.sql.out   |  10 +-
 .../sql-tests/results/ansi/timestamp.sql.out  |  10 +-
 .../results/postgreSQL/float4.sql.out         |  25 ++--
 .../timestampNTZ/timestamp-ansi.sql.out       |  10 +-
 10 files changed, 164 insertions(+), 203 deletions(-)

diff --git a/docs/img/type-precedence-list.png b/docs/img/type-precedence-list.png
index 176d3ebbf9f5a79d6a52ea647f06710b35f588ab..5d9a3079bf01b76470ebb51abe5f6209446614da 100644
GIT binary patch
literal 93171
zcmeFaWmJ`G)GiE&N(j;*Qi_6rboUa)LO?)3NdajP=`JOtRT_z90MaVbrGzLYN|y*o
zcQ>5*pzOVu`#nFtA7_m3jI+mZ@5NX=amT!8UDuohTvxb6L_kY`g@r|Q<?;n(EG%3T
z_-_U85Pah?5_teVux*tu$zWwS(T!taF=1V~aPEea&Q#ywLM_!Y=^5F_C(o2c`<yI0
z#l$2pLn>Q?J8)9gC7RY{7MD`s8QbL?ocp+^nefO8FH_6B4{9aB^&uo9yD+{kZQkGC
zu-ouC-ge}gk<ql|V)p33<AyHRti1l&rpr?B!?6^FgpFx5DHF;I3mXR)k7WOcQ{*vv
z6Rl|~;URZ#|MUC33%tJ^6u67?=hr9=Y;1WA&llwT&)9$1eXo!5e?Fh&@>wSI6)mi)
ze_f3$FoQ7?<S8h9c=R9+O(NzXNtl?>)<^eW^Vi_X2=PqFsNc{1V>-XC!47f#W11vP
zk8Yw+;e}X!tbdMBKpO45yS>UwK34EPx_l|wBWVa1bIRARdygCzm@QpYa9V5>CY&(n
zX2aDN;lKUfi{kw8aLl2;Bq3>jd0p6O76*sc5f5X6b}?RFDOChI59=wLDV|?rxq>mq
z5OR`cfhJ|e5Hk3@sw|B$sp}-LF6Ri^lv*g7?QRrlpT&5v53COLPeqttMO+Li#;*ia
zQK*(1Itd-8nb2uLjaM+Ass&HAHt#6sr*3{{WvP7(<JKFLB+Z4q3#?4&&${vr4>6`0
z$IgT{WzT7vgu%AfjGV$)uQS;&&%!+O{#Zhj?t=X6V2oSS3GhsMY1U!3KXX`&E?}&I
zJiJmMQhn4OAJ1e$-<AwxJQV6EluB=R-fY7`foV-m^@|v{svg7@xEs6$H!i4?^Pl|Z
zCVQvAE92@i7K?Fl1?=C524IXQkb?=$c6YklaReuBB`b@A8ROP;*hb-tJ8<KAM!+$Q
z<AYLxSIQ5!IcwwK#1(hOV@{<8rb1ERFfDG2Ex*~`9*KFYDmHe=ab{1rajUI5_%DG*
zen6+ew<!w~rChK%+7=3!QyGM*1ZtM7Bouk2xSLPbVcweV<@GV-u{7McW2S!*W3QWU
zz_%*S_Bzq9Ib1@lnB%dAsnld?Z#HS6TFUe64KZ%L4l*?K@l%qQ7mKGLFEhqoo4_km
z(n`g0<Y05CpN3<Orx2zx*q-ff%!M}X?=I%Uxb+ptkhQ`80kIt`^)mYQ^2$JqqIA}{
zM~w4GY*l7%6K(lg4R^a;=>uA3klo-HI+#P{!AYQQwkTU}A6OqvnpO85i7^O$-D^{C
zxO?On->J-!nPR<^SfdGbM&fL3m}*HRGyXsI5t<WMj2peaTCq$2-DNUs?h|vEkJ5{a
zzDK3j(9fS7ICKPWKFM=?Qde)`=jVasXq~u}Z;MUhRwBa@DsNw3G%tl!oKJB%g&})d
zgmyZpmOMIYZ965A-K|9qb^(W}PM&_Z#YpMRu@_;L{9h!<F9y@_Hiio`UkW|))H458
zJ%@z-L@Q6#5&m~>y=p{f(58{^tTATF#EO$Z-`wli?>^vlfcVv`8dxUnJfZo|PhW&d
ziS@e9mQhMG@*7>dY4iECrrH<yt>oFlANTcU%U17p#bhyDMzxq(4`GUrzIPL`$;Xi6
z)DGRIsc|@QPR+H9o^<V6wvB;9L*as&cZ&?=CY);N9etR?B=`s;SGtUoqJm@fNwDP!
z><!5NX-7~pd(R4EWOV4Yu8lho=XBf6X84&u1A9_#pF@xDP^ftJxL`bwp3K_!)$-^N
zX0)kT0XD`Ay-*w=&f7#LO`@am_dcJFjtF}9k=ERYBd7Fjnyl;QbV1eKl4&Xyg?Lz=
z^mk1d8|j52gM;FEAg+p7{A{m%hxV|UW$FCKfnM7$S+8`C+$f$UZd>-Mq1KgXkXURO
z4okTfOZMG;wJ%U=B;K}(2e!$-L&ueKDz8JQYd1>6EqTIUdP{G;gCp~`OpNu%W2r?C
zq~Icwq*7;Ch1~VC@0w)>ICR&q-n4jA^Q6}@k9dMIRU?`15{7oOJjsL}S41s-W=--T
z67GF{(oGZuw31kS>bkXbbwa|R+V?h%-shVelQ~C&&c408UJD!GR=C`*$wA!ju{oW4
z+X$p1mZfz3`z&WcFIkkfL)StrZ%onD+j<$d?o^E)x5dWXgJIM+Z+mNfuf>VeimG{u
zv8ZTgILjIb;?^SS2mBdz-t08c**1_4R+cR`@GHs&4C+_T=cUdlw)ftvBGOBaR7=-r
zf+0w_hOp&7I4UKwxw>|v(aLkPXD(@Xi&e{IJb8GrUDtD)$7$}zLXN>hm2O%IDK^_m
zd{!Uno%t}mqT#6e_N;&sshX_HoZ^Y}1ySo6SXaxNMzMx*j;?K8xn3y>BB_{Ss07>I
zcOb4}Ync0ll=s4TYLegTcUNu+Snq+N(L_RP@#$~0Ja4+^uf*tXPpIpy4@c){Y^ABB
z8Y#9{-+tMeT{swM>fp`3J@w%*V}pZHV2(71oF#Jb*^`7#zrCDW&haeUnq&?YIXP9j
z*rZz9XZuC63S|1pK@n;lC%v^n>WZ6ETz*3S3@#??9T}N#uNIEPnX`Yat4cVF{(PRY
z!5hQ&Xq<s<WoT}_&sb0#s+iRvwK-MBu=D=FQJ?LVo*cJ^l&5!TFNV;nF2}1Vh@S4X
zYmwJmn;Xzu?Q_f#Non^QDc_u_cW?@vZW5n862w(JCSYLS?>o_|qDZ7^nWxGb-S^f2
zSAg?{5DA7M{dpdI0!eg0&X(KK*GrT^3l2G|5-jESDX1s2@<pu1*Vaxofg;<U`zFU7
zS@KYE@NCh~CzRSAYXkmzS$XYsQYD)+CAl?jCHXVh@<$#bXFkwikX*XTkElLz;g)4y
z+uM%l^t?5SFJC-Xau>q#VG*YyUr7jJ%O|Gmabeh&W*j(eR_4vsKTdJ2Z1(SN_jhq9
zXzQv6)&%5`Zm-o!XK%c`n&vcbpIrFpURq6!XPD$lXJB`fj%4@k2Sk<Jp6m6d6W=A#
zq7$!Pusc`>vR13-I`r9hEELVFrkQcBwAuToIT2}hq?mq??-(V)iIWPT`=^q{#+gPu
zBm2icZmt%#DEC^YnFlGhY;AN@%*@-T7A~svwxvx9b+7h&de&4{9;UauUI-$U%c?m@
zEZ3*WiQU*J+)t+<wf=Lem8QwAuPv>>RmkrI^Tps#ZI+zq&#I!={4jPrj(hJQlTUsX
z#A9n?%|1rn5jvix?bv5O(7^xY?5@bnHwx;-kSvPwYVF8DTE5&SG4mvB`Mj*MFCZKi
zqpvy#S9*Du*v!bADe`Pa<T12_Jvav&$D6A;bKs~<6U-JKS9S|bNu|<d3navi*5RC>
z{TP9hWR@cpxiPteEAJ-D&d{G1nOGT+w46_4ogjQwPLaI%T}01qj4gVk1LD%}#nNsu
znN`q~-#^n0n!aPwp^aMW-nsl~`?Lg)jd8uP=4V|FU5Zlccc|v7(Wx34r_rMG)#O#6
zZJs1B=vo(p;{MjJW8VbElQq5tQ@6YV-Qt21r~K+$-h`s$@{JsEu`)1^dP5<?TuFT3
zXU_%cZk==+74OHN?;p~yl+4q>iL2)-IU7tT`nFfrEz7J;RegH#V{i~*ZAZEbf&a!7
z8vigjeO~Eu7(U0RTrjj1*z&z%uFGxeIcLnY&NwZNeRZeW2<C6X>(KRdUoWIo*W#7f
zoXQ<|$Q=49@nvkHSIRr`8);#MmX1d(UCgaY=P%|Ht$lZ$r5_X%b2i0^E4@)EfjJ*p
z#Kcy2JVA~I!DMQZN3r_m{7|j`%(Et;9t&~>vGJEu<|`8!IYx>Nc2=GnElHI)glC(F
z*1^c?bb<!0m+m)Ca<9ZbOn2SdZ8g<m@Tj8baKu@!wD!Sy>t};7P)Wo#4b<!I*d#4l
zwPEALsa-klk0EL{C!4#l8#~_^RCym}xTzHMT}L%gF}}LSx#Vsvy<$16Z`Jc4E)5m=
zqtaV*Q)|k{`R-g6fgs04a&;(7n%F$M$Y?#LkkYaDiFiH;-&u*BwIRI(`*!@%Bu|gR
zvFAbT8-a@<;@28MFo(Yk&0EXO1dy}(i6nt-Nlqy$m|((|&v+Sf9z#S$NynzVM!=X7
zQR}){%W~@9Bs(9j{8Y@|?4e?L@G*WC#D28+@LO+A&3des2zD&S8YiZ8dXIx~#eZt?
z_Da&I)w{13DGkDCcu#7Uj69L!!Vi<$Few~Obd@<`li#Tyv~X8RTRg<PVb8LHp>DVz
z+nEoZ9Eom8oSxsY4Z4a*5Dkqq%d?oG;f)$SD6k7Er_wy9jEcee=VQ}EAvVfWhZx_~
zq^b<QY3u&+;lcUd08{WNrX1;qT)06O-F(<%+3&f#trvC6XgoKp5bIu^prJk^?;Und
zye*ifp?RI1XI<(h);wE0%|i9T`-|<k0?ru^*Z=7}dZEG(g7*jtxjOacMvFck-xrBR
zpM%s|Jk>RhLz5TGGv8dy=`mYv_dIfOFu&Vc??)l(9_KXpYXq*-dFuJ69YHe&TneG3
zJVZ=)^J~rS&g`zAL71cv;j`ib=g|b=_<NQ3dby248HSywYVk~-ygnX*Au8YS!P5+H
z+St7qnscp2J!sJUqVRVogNNdx%c(W2E;=6T!-y~J>r1Uu*r5XAQZ=`=_{F&Pi}dc6
z-gt5}G2(N~=$KA;kmAJc^pb|2{-=)h7J``gUGYPZvwJJ2uDQuhq-SKCEzkMVD56vz
z^2MIQuucLBpl(F!%UuH%zf8TUW$4=Am)>3fQYBnIA5yM3@tzr1fc~ak^tFG8m3I>r
z#ErQ-)_pPAL31Ay(4kTrV<a8k0VQ0G;G{LumA`f;G$GIwBgY8-y*D7NTP|AmQ0tx9
z0EfLnCWhu?X2s4%*U746^*BVUzXlIV*kCMwOa;|KWoSo=p%|~1fHHB&=ngBL%r2?a
z^QTu^_EA;4by^EHMepM=<IFPfA(c@rdIuegFm8nS<D=x<j|WHn+`$Ko%N}y*>(H{5
zzv<9r_>|Y8XNe7M`c%1?1VcPu$%Ep2s<Iop0V`hxjxG7cFa~<W^PKv*7S|HMnn**X
zZTEG@zTNv0p^4b*F|OEuZgB!9ywqrWC|lmDeBCI<b7#GZx_r*ph`3D+mZm#Bo(t1X
zZNQ2yS^j*e1}PQ|&btyAW7$vXsFOm5fD=>oRl%CltIf%1WqYNN81h}gxX~mYR?Ire
z#Vi6om7Z*zzz~!Bcz7lR%CAD>QoK@L)J^MS_~lHv*dZlM|3%FIM<V9`M-n~Fab6=?
z1>f1Z9>*4F-G=D!2n6OP>xG9f;%;o50Ngmn=2OKyRTz;ea-0Go4!fe17Om?cTDr3_
zIs8l*>=i`|4-1A0gJk1TaBBm_BsRugASUy(pVVY#ZY2axiTbo3|9Bhj!U^P|hyNW5
zV|n+2N2eTd5fv`e7lWZfP)r5jGf^ayBFV---u@{IGBvWMxB_ggA(%NvTpLJsT~VU_
z?^R6yE#N;i^M7LTmpAd>x%ykr{3jOwiN*hSBSNbkr&s0Y`#GF-wR<{(O{J1tQQ|Ru
zZUbMv&*`X}&%c;zK{%oJ<Gr^#!zk82ln~-aqVIuKyBKZJq3uYduH$TYzJ#9VHp*!@
zQk}LtwF$%1^aAS$4kN90;MW(I{Il}A)1UPv+I_n+@H1TTu1S&@295*f*#=4bUt}B1
zxJ=HfU~#j9oaNe_i%OM%Jtl60YXg9n>BTF=qCX$q0~bNy<z$<>u4M3W;p`Lc<39iN
zso|h}N(M*vm|=i-9mIymzb$`y%zXZco4}v5vV_EBBfI4b9oq#Lm0b3hx|kO{Iu71K
z?$xgkJK}}9O<wRomfuKTsp9X0xHoZ>6u8!n>F0Z?T+@=H;<9JBE4CI$3Z~otT$cR3
zXsGnTAtC}KnbLRuTATn|gXC(jebVd&%r~RZB(UH{QKqf0l#tY-JrS?nSTm2y(pYhm
zJtq7{!gT!%43ocJP%=l<xhyR<@~beW*Yhb77NcwU)u>=<(MTLe^}X6j`Nr(f9SlF-
z3&n*@gR2VCEToWJE$|@9g-ni*Qz9mwhl^+DgFMdA19Bpj5XH6R=-q{^Fpov&YmbDX
zKe1}`YnX>x1eYIY!FLyP`SUd$PxW8Dz(^0n*3ly}Ae*+i7#H60R6~i>03!;=^40?Q
zP(vwnr*y;)-s*Sayx%*0jgS9?Cy@O|{54ZmdL+$w4<vI%tk(<sz}vp3|JwE!Fw-pd
z)v>QHs?;<s+a~WFW^}tpBWQLbBjpa=UmM{q^!oBX7vR^IOpM56+Vy6NMv22&6=$qJ
zu_n3k9ibI?6{*5Ug}LZVd+E74+NG7fIo-lHz?}52Io%5Vz9)Me9D0~p52i;Pu&EjK
zUt7!-;6&gZI81D@Y!{4)KH%cLq(&n6L;iF&mqkY7ZQ1>_-Iv>{3ay7UBq=emAXuRF
zy>OqH_u(-Kr+YQ85on6H@avya8cG2``59NYNYlaUfZ=Cl-y@zA{OKAOL4f)4jBH0o
zd@1}I=U=}D^!eqPOYm#1y`#|oucHtODC|r2ZS=VZl`4volq~Xb6oX9)w0|sN^BbJT
zqb5i!9ojZMUCCWKS2>3y;UNX)r%34y8I)*3%v;7N{>casKlpBB(hZW8m5_9y@*}3#
zSQJgL`GJrLcp!PHP^8uIhaV2U`K&i2m8GkBWHMkO3YjsM2mN0Z4eRlL8}9u7NEH2l
z7hM7poBV)?&rRii@!JdA*9L<I!WE_P>$xl3ZtD3`RszaJZ<Odfk{J(Phc>|eqLt0h
z&SylemaTp#JSw?#9my?M9y)G8;zx8`t+$8C80a!TGnrB_Guw?+W0_tf%N@E$QxhCC
z))td?wJ3!S@jbOFcaAYmbgndv)}|8r?lg1{w0cz7_NeskvAaMwK#ctyxGYi9NLsau
zt7Piil)nA7U7Wa3wmd@2JnAm_SiILrj_0;{Cji)WzPcvQd@Do6XK1JZww>F96Dkp~
zlT!F&AV)UMUBImujOPQMBB@|koK$o2W`UK}aIcg%i7pu4k#{el-9^;m*Gr7Pj9Whr
zBm9IqcD_5G9?j(@ZJ!0l?d&jt`_B64z)Z<3v2nchMJGTYbH5Cc3nI{gfGVA|ha1&Q
zgoZe-z<gWIF{79h)Q*{6EYyZP_<vbB<Oh=klrgRVq)g6>Y%jIcXCkQD3av^WRnn97
zfuo)@QfogROw|IUr^|jneys~|6WT*Z8G41V8><7zs~jsf`*`m57V@-Q<IkH}nbbRl
zEx$k(gXxjIQ$zFP!~uRGr76ymkL11)rb8K@^h&1zf}4H`EkcsDCxg!NHO^;b7n^Cq
zn8%;fJ9M6KE#t_}P|-M5l((ukY=RRPvGI`wBR`3YlR&yh5uu!{ukMRR@P3^#0~Gm#
zxWcC@9Xu&^$|bl0sqr>f3&&HQzO5%~KDEYaav5U<n3~ywaIN0dPz?Y-fP>ntpwEF2
z!HRQ)-_tun-dtkg)4lJ`qlV_~c2?6Q&06+tssq4XVb_iG`sj#*X@f4GY(A#gOy|(4
z*OhTqyXS;I-IKD9tu2*>j2>%xRL#{1N$r|eWx${)OqA2AAD;Exb-j7-!@(+A^Gwnx
z9hY&Qp<w>@8w0yrqtYj`ACT*c22TE{wjGk57tGM0nhy!umIC~4AW~fifBy5c&cXPd
zrBrF(FzFo;i{kMVy)MIOA_VdVY6v0wh~0EvN0hF09ORXA*HcY&tgk-1dktgem(MgG
z`x`AajUNi<WlgYCeyDfBQ*)KoB){7%gGaymA&*f*jA?NoA%U*je2rfFE#qnHn1X%}
z@pz}ffC0d<Ub$TBRbt=S>gnHcXiM51V%+^C^QwPqMEDkyiiSCBb{TockMZu6Zu1-q
zsj@sPkHx10aPnuk<<5svHw*YEv0^yY*Xg{Q@Js-|%C#^~fkSE(_pSN|vlJUYIGJ%t
zHUSBCL?#Zf9~R;>YprsMmi3$k4LI>;oKk%@08IdRcZ7zAiOzlL>mGx{2~Kt;C=E>=
zW|qW~`H#nqpM3*bMYKKTgM`9HM>n$+^C0X-)6@j26@ZLP{etzr=?8IAeGH}7)U$by
zeK;aa+bk`)a^$I1;!icI=2LpYXE1jBCHYve7t2QZ#ze-zCFu>N6}u~W$hsMRsI{r*
zR96vA=cJ?4jRnjx;qBb1@|1VL{O|>LIHR{8!O}<`gk?`ZzQlm<I#Xo4qI4t(bV$Bw
z;+W`auPsNS(=GF?yh?<bRJO1Ld;*`6*>Z6SIFPquTFSBzP=rHSUDw@y{nC8@VItay
zENSwNXKZ;0wwYpXwQ>%k2*SxEjKgTd(kzNC?*r)AYA&nlU^ug69=KS-upd(HcZK@|
z(E%UoxIN~BEXrcDIm_?bO{AqpNeAZKifs1{VEo9LJrpjUd@y*ZHy`F<nz91CEdK*X
zC3t$_fx}5P9z)?g+L~@pSh<!o&EQPm70M~JcX0bQ0@MX}M6t_kN%)E5#Kmtr2+(7Q
z;@A!ECIS=vNP|w&g2<iqFP@2I$~UrVQcXPoh&9T=UT!U}7__S<y>!ji#4S19{0Eut
ziNFEHv>N-{gDt$tRMgaorYW)M#K)5Eb^+>BCmr<cx|Zz-g2k@qG6I5l0vXbaOJT(I
zXLGNGbpYbu+P|9p^v}g{#+FaRnh$uAwDps90G2#cBb>$yI8%aMp_ke;I=ooQy9M0V
z=1hr$6hFBwXXtWzmP4d;XR;=uJHO}d@k9WRrlPH<W7k|%6gg*_C_3JZZv?8jHJZX<
z<Vvyuh3yZdkXKui06ni3FNS-u7+28CV}itDs%YB-amqDA=I2@C@5vyFJ+%nmMT4A$
z;iep_WxH^VKzaPi)53e!T8C3w2L~&(lTy+gF6~UHR}hRQIO?ol0#=t+K9N(}_Cj<;
zgN`YCUU_>2X|vtLtZN4*J3$mEpVjxT#qyo5?szar%h}fhrYmMTy|ql<3Pn!MxzprW
zt}-JeB~Sj~`FD-p>2UzaxERT*VrjnNnb@rK)Y)P9$X=*j2(Pru2175-r#+d;>h9)g
zLAVb^C0sP6pn&~=ez&YqK4ac(CtXV}uwAlAk2xM(0LXW+<=wJO5Kwz_w!%|><PwAP
zokCzLu!oib1E-2i?yS;$*U}*YYuBkY4VAQ-=`w)NMs!BkfN14!<vyu7Y*3uz&Wuh=
zt0u-EYGUI!lbh^%v2ZnPR+B6GfQ~=Y0HB>a6?!wAbKWN-r@UCdEM^UP_M5OA_i1xV
z@hg%Dz`m#QLaomYtg+unO8~;yfNTGz&UyzhSL!GKNS8di<h^p#L;;uq7Mjy*xQ##u
z>M<sjg2%iD){z>WFeua4KcC2{xwSv^GWsI9dL&F@;c`)b;qoeirZSAKffrIVNSh7p
z#y~U>`;49G|7r0+`z)M+HEKkBmy4lvu><{1Lm_%Kl<HG?BnS^;ZA7@@mXy&mUn&By
zva154h`xlF{@&O#f3@j?No#FK>%hbPK!_Yz8w%r2S_S@;q!t(<$)+50Eb^S8j*1=o
z7&uIIwTf1#Mc_300W(l?J!B)9+nue)K?}o8f%qNSu}0*G**Nh5Rl#rZi*&_cTKJ6b
z4DDETQcbE>@k~60^kliLgQ7SO;<D%g&4c^sn?|7?pKt}Sw5`dtMVgsv_19OkHdT0&
zzYN77oC0U@IkOz?8uJn$HPpFFF1M;6@a@#RPWfCVAqUR0{+*SAJq_=$1zscFpwJL6
z+RPrPG8(;38204RP4xDGxS7fP3v=LN7U>^ORwkXP*xd=N@flKK=fxW@=eh=L4utqd
zQWZKg4dkIrCqp(aH8<cC+@KTEtMm!9zoY+f>1qEK3&OgjtgUh00=wHNbzIEO1o4_u
zk8$5U%%EG#o`<Bp45MzP)VgMFl1o^7RQVr0%3ZdkobSF0z5p3GJYiA&FL2_b!jC_~
zP(9xW-g%<<4eKIE4n)jwN*#L(=F%Slt~ZsmE;vnfz8S6>%)UAw5Ftx->*Zi8LT7X}
zoB$z7U)y@uqEMfBZynA$9aI$1?(D2q_yY${xEEY4%Gw0r+9EsyLikT}eX%fP6(BDL
zgb+KGOOuXpBK5%KB}{T#xI$H2y=<!AgV?GccYr+c*@HeQGoVCV61?COP0!4Mz}8l&
zbcr6JG{iJ!xFWR8Tv2uN{@tC`DnPq$&J(C+%&7j46cgm&$<+2eq0(_0xc9^Ru;1E+
zM#h~ve;(_>>U6<}Im>j`)ueizuk~7d$hCE&C(^u<(WBt_kH7iIOA$C(YiBuIvFky@
ztAC+zI={yyz9TI)kyZ$Jw}DBBA9><#L7)1hDRl-IQdUB@;(SmQ+|`~&F?xRG#g=tY
zGa;|n#)e_Kf2t#H>Ds$`#NeyxI1k&fNx?)d5+cEeI<W=BH)6T7Z!YAks7Pc_R+eOC
z_d%-YQR6r;CN*OOOqCI8K-EG$HzO7JMLHefv5C$XgTP^7K}e_w-I858fnU;H;Y$%t
z-1t{pE{vB`Tt{AxLzVI(^(=aPY@x*IpXSi+Ii%UhGZI(LshWS5$&6s8eu$7<BFzm)
zBFO0oU@1{!i2L<ZkUM&u-o|E2*=%<;S(_E(wOhbL579jFA7uX@WDn`@|7tV-57lO%
z1+g2!SFcY(HR2#_Q5pbD8f(QdRX!et7J409{)6{5OcxzIs%8rWeDKX!-d$=@^j}l7
z;BOK!K=KR9*Q);D9C+<P5Ns23h@rM`zr}id1A<cme?||dKvSAO3ZY0o`6Dz0`u}JM
z2vYSQk7M7l{EwaTmzDfaEdIvd{u7J;#Nz+gTsBiX(~(5i86M|_pBEjEvs{ZIfyktD
z&cpulUnG%VKb!)rRG=7lA1kHpo`%dk15z{5EM@;E$o=you${-0tC<YvetaOBcKG?}
z@rgsy{{kqZ5R^Vbusl!zLKr{$>E#qZr3uWET)dx3@TlN)fdxU<9AUv1G8=#J42zJ$
z;umxwZjIv*NgRck-<&@sP4S<x0363X?AfSIuNi%A0K%TQnHknS(DeGh#*~kYDbnFL
zbD*Vd8&H!fz`9d2j<Wt)>|KPcNydeo(uJ!xA;+1L;hG~XPn{J02Q@$mp$25OE|rh`
zTuw1kyVQ>nZlIWikyl3PyXe?<LrS6&C<I>{#>X)eYe32X$0H@y-sv}SK&6AEpw~u@
zM#$o(_NW+U@h=|98H9@5H5_=N9+;usIeL&d$tJ0y55b6-@kqw-5HG2oPFJPdvY>B6
zFu3FCM$PaaEQITJ$VD7CznClNSeZGfE-7M)h7FV&naYZ8EXK6}V?d_&x#l1F4ctox
zE4TN#0Ah$}1S*FyNQznHwW$db+0T7A>|eMU{-+Gt1;Z3nlR-F(s(yYB-=v#?GQlH2
z|Cm@5IsCVV{Y7K2L?{rdY*29Yo4gPYObAt?A<XN3u?AE_HTGBo8R>d|(-rpQWsfz$
zQUA5$Z`QzYM6fL8bbql1-u}fJNGC3W)V06$(8oL*v5$YG%YLyc4C(c;_@N{){d_2$
zC|@<!r9T%)45<#7cv7n#1Ld0{jjv&y?N9$nSJ<Qa*t4aVA!4Ma{sA*dgJM!g=oQf~
zy3G{;5p;gobI%-8&b&~U_WU*X--m$xs3a8skvKYi!^}WpdH*6YY`#)@Q8_nImHW(}
zn4T7R`3e79majjsnogh1YgO)Q*0H*(gpFDJ`ip_09Qk4iA#*6JzAwZa7#skEr~tW^
zImnX@j|49AJw_@S2cSp>A5hA>Iw=tu&Pd@)QC!$(lFMv!kbm(5CRS?v{WpYXez?B#
zlI1u2b>h+C2Q|o>OV>x9CIo9OcOykCbC5Ffl^5#rkr(#nb|071d+&v<g-5dHiNlL_
zp0A;HCK-!<&z_O_EoX@SulkEeH=yr8ME{Sy0{kO5kx1Wz3zD$HF0JwV2<c@OpdDYd
z`oVw%g2Zse>iZ=qEJ*3Ue)RVQy{TZuH6B{NqJ>KYWkfR#a3#(`vi<IuFM$bdT^j#y
zHHFGWs8m$#p1bh5Eo^(y6`s>wQr=iB21oDCvfOVGGzo+=!ImkO09EvjcJrb|L~y<3
z%~mR{CtfKBKl$x*dVW=#$Vm!b1-UwNW&2$a+^5an)jbJ>m-Zf${o#!RN#hzTy5%W!
zqA<lt=OTo93O2j-C-P7?6a4-C{$(zpz?NT$rWV2cM>ZzPoZxe??d(y0$gMmh7}@uM
zU)_RY4*tjkny@aW$MU=hfL6|S`I(K7Jhprjfxg)O<G#b7XCCc0HMok9w<mR5zaA8j
z&5C#atz+q-jDEbia&+VEL4hK!u+OI<Q*#Ps_29%o0n6(bqJO{A?g`8*JY74&9C}YA
zE01)sgR07S*<((JE$`aa()RoKTS9rF=_VQV9L_<3gJ<K|g!kG<e9xbTz5w~j_Iket
zhH{0?>dwusDQCx)e=}*k5`w7r@9)T6pgV<3q&D*}SEl(Q%wCAys7EW%E5-8axD4$n
zcsj`)e|u*1`0M*6TE7Kx3@&@F{+fQ28*N&fUYs%xte<OPI-`jk=<(j}5}V(bJ%Y<*
zqOuxUInkzzI3=YXu=5-8BR(Y1cEoajW%tzi_2G#}p9xIFUN&>(8o@jpZxBzx3R{cs
zD>(vC#B4W;#O}Psi6drvSD*QNKMwxgk_3em6U%04iGR^VwKTQaTP_3Ha6-v8l+~p7
z!2bTo0AGwiHJdm#(<|jX!8D&aWXoEw4Zr6C5idCK+o3~WM+P35ah?U{^GK$oAr77_
zs3LkDHN>zlh7_>QDy?;Si>1)pLQA_tEey7cL2%UR{zu9GruKW4kX=j5(_(aAQi|a-
zyqe|j56X+XL1ZXcka70?t}b+`xXW~%?GYb37U~I|ptPzAN!RNDl01hL&JRcHN@c#T
zB-pE1gWd_YLS29xay9VQ(jn!moZS6L;U5>m{sNE956KwoLBAKPb-D!+v=>X5DlhB4
z77Bwc)vyW$ABqD3;PV(5fErQG1CT_!Tj_y4F_Of8NnoPmxxIW309jw8giSS->Od@w
zAJq9u47{!9B0O$YYApibtESE;r1b(+Kc+}z&-U-W0M)alCcyMter_X5C2wMnfC0tx
z#xc<Qpm3p%qiTD#U%D|3-s`&7UQkX+b?eS!9(Y_wcivDjK1dGdIkYm9ENT%z7C{SO
z8Vm;@mO&2yyCZ~WU!zuD)&VD!JP|PazyPH5ec<D9D+}j!1a6&!K}OV;fw`^%?wp2H
zSShrU_^c`0u#X-#XMlhv9{;JkyP<v)oCywvkcyqQ91m4RNBbmlT}jA=ADfVp&S51m
zS<pDPc=dGi1mKz9)*mo#F_kWTlj}Nt6xwQRO&LoQ0y9`HmdURj5F|&~Oa4lj(NlMZ
zq*u;RaK3rRY{^mGt!x4Y)bLRos{bpIoOn)*%l(uuo6Xq!CovxB7{mDq7!TU>1+@sm
zEdto}<aBob=w5m9hlA8dptXf##h1oSd??vf|FosS31b@ZJAM~$UL2rJ@(1&RDQ2A6
za^xNE>i|8zW!xR+_DsXGDNbt1oXsx0*n~7Dc-<s68PQQ=YM-zml*nqbphmj5U&OWq
zia2fcP#4UDquo$s@F8<0fVx^%j@x3hfOO&^*rLb}uLG69H)zSR4Z8Q*D+N!LuU3uV
zofujaK)WjNK;yLawd6Rm$G>U10>ePYt%g6pP)FUZF;~NUbv9e=J9%F4X>^+GXWspZ
zBOK@n&Kl_PK+WLv1DYQ2@^k^HGBKS!vN%X9(!cqA_3h9>0l{3+N2q59HRzk|JZ0vf
zod$tzlc;fyA1^xKoV;6~3Di#~jC3lj&D8bCv;2FfzaKvZg0#|O<qx&|oY&$-lu#;G
z1M!#8UN~9ACP>vSNkKMO-ddS0Ab%_D>Mmj<&>x|`J#)NUY`BEOsvydX#fp-b@%Q-w
z*<o8nL6eQWx?-T2bZs1&UxwOtnb>)&_@TRO2>x;t$|$SA8$Skhirm-uN7ww5%T~HG
z29L@k{Y3It&ZlL;VYYn3lcF9UXH!4%?maRVXo>N9N;Nrq`q6pf&pHMZrg{q^wQHyJ
zde=<>dg4gZn04%cMj&KJeeYbOl?Q(hrBz)78-l+dv7Vgm+*PmGXfrLJ)hD5#=K2oB
z$W?<u+&X!KEt}d}SuG-|2wt;k3@9xTs0Q7e&cl&aP=g%1Bz_B}@}qw#Md6nN0;meB
z$wZb5bO?Yp7<3GtTk{s;-Mj`LBdEW+ZH%W@0SV&xpxDvAg0zqlbhV|~kY{JK#@6{-
zs1H~9&X$}i+7Fx#gk_Vp`ozXI(Thbmndfml9*9a2XuT@XXA+HVwX6n6EjiM4q~lae
zKS=5M??}NnL}9u2Sn?69YydQmB9%q7Lr`E6QZrrsNX)OtnslhgszP$JzY`(Oo_)`j
zchQ~Nv0|)q7K)BZfP?lnond~{4Wf?vom<eWBiCSXM!kP~#?gG?>Gub?0xcY4Zu_cG
z4y@U-O$S-CMaA|C;)uo?Jxl_wP*=aeYHJj1)jgmi_;ndQ<rF`)bAt8m@#DLXY@lX1
zN>LM(V-@h`>nlP}C~Q6Lc`l4C|CPgJCxptx9z~wS7oa$*y15hh?EKaaEd*~#ivCNZ
zC6hoOAqHjEBVdR0`KYq4r5Bb*Z6m;PgFwr*0jSGQ7>S3*6feqyN{E*by${_L1cMo+
zr8Y&%yYoIAI_?8J7Qvn;r4&@ECDi(t`vY0froL}^_kDXJ;3>4q-I7D`gVS%Z8vdC1
zlER{Y5cYvXcP2kqS3KT9TJInT<j(OSk<>NDHH6|<v!w4ifAn;6dtJhd3H$U>32b>u
zvnEqxZ?(e$U-=FZNpE}&t25WJUky?8txh%7NB62{43|tI_;<d%!x8o3g0<prNB|8n
zt&#^BZ@CS`2y(s%Jq96wr908SafL*|?4U?R5O=vqJZQMC8!uG0c72qeN;uo($GCQ|
zdJKlTT4&EwzFF)0-_A*8LVRan0P3&Vk5VSN1$b)q+N(#brK|LcCbuH2qy-JZ<S%CH
z=M~C2!c!NaR6X6V=;t}GBIG&Z<x+MC>nVqRxpgq(60aN5S;<u*MY>;mJ|I?iYml1K
zeS2KdlK?R;4~ggPzlwlaTF$a4i|zzdD(ypaGg@QSE9Gl{&Ae(RbT257_<lNCiww?E
zLZE0r9JK*JAh<o%Ea!h3^=o1fikh#38&?*;jc=kaLVi9@Imn#-2%X4l{js&HAHYD>
zCDr)y8a!;tv)?_hkdeQV-SO%>41T#zG)McO@<D;G{MoTKE%L&?U_VEPQ`L*bYg0`J
zGoA)<=HF~11F7QRS`Y`Sjg9m?Tf*9gkrijQdM?CMQAfw27Q5BaT5HZdTHApwj9Rxb
zP*K*w(yDA(t<xg6v5t}6ZSDi5twl{4`~M(BT}fUnQpJqa@6P6#Vaspb+B&+k#<=q)
z>&-{nfjW+y%ov?j>7R7=txA+7B5kgm&f<!w@Pfb?MKkXRZ29r-As0UB_0NiL6C%>{
z3WXX4JG4~npDpaTIU*o=SEzk5Rnmo9rpb{moacJ!xw8+em`NTK2-)BYC45`kp)ALl
zeT?dS`V8kG0p3>umO#rOLMT3RR;RJ$<%)>*vj{Od$kN_##4{;gVMC{F^^Cbp{*=I(
zW_slBmGYB@x+3J<`Hp4PtI|Oy<lW+4ZgMHYE)+`>kvm3Am?u2Fcv!&Qow`=NoLFM+
z{X&*KI6H<xERI&<v!%993!hnY!-nES!IoW;v%2&@z1JDfBoulXy@pcxR#*HY$uHyr
zMVui4r^!C4pvdu&?n)k8LyX3nx8ad9g$5!}QOhAU!_d66(x>X<H3F0ZLQRV|vg@=4
zBxFfV<KG7zx}m<OK`K;yE(^>BXBns~&S4>%znm#VT@j3&KnDalew;Sw$-+13&U&70
z>~e5{syQM%bu*4D`Acoc@n}nLZ8K;l^u4?!5O_<IVq@qB?D=b<=1Xov$NSIdE`s+)
zI;f%lrlxPT3ECi798Oge!j|XA3)#<t#N0&|)&~ON;SDK8R7=5QW-&Q*BruLRS!XU&
zI6lcpuBoY;1-en-c`MMJudB)A-htxl_!*!!ZyKj84xN>%9-TN}>4Q)H)^}iNQr<df
z4v0f8aUJDmZxX&kYoCAXVM3FM$jMF6KK&O%$Ig1I9TG^csbh2*d&UE08!PzR&^*8&
z+>GMELYm8VeUaJpN$3D-x{iT>l55NFh}OZ2Wf<I5ZvZRgE+#aD1@%5RmiKEHpq+|W
zijD?M%Emp36+Pbbi8SGKbDh^ngo?uF{I*cV<|%O9+-1Pl)#`z?W{TWxh~d=(YRg&A
z_H3B9GRq8uAhfo)6NKdnHn7Y5nsxoO(>CG&CV4bUKm>kG+kHQENXrH8R5VhDLp*O4
zBR4%)b@WUA=j?W~zV5u7g!7>1%U%!o9gp(w-0ze!d-~@J%>1sLfqnq4#<6BCAWi5&
zLbWRrXxG1JOUvmc*?D9~wGiTTpJTtawSp(?@7qK2T3f4!rPt0;HEDrS(#tLyxoYRQ
zBmpF5$J4wg_D!7?c>ON3VQZH?-QHES4!)QI%1uspA5zH#jgFEcJy%Nlx4Dq)P77U+
zy<EFl$$h)srxU>PDeawwQBv)>0n-ovVdxA|i?z}_ymYWbU8fu1&~y#vBedkO=vKkz
zfWy<!wrT*J(QJ`&P_xiX?)Jk0+ca&Y!2}rR$w+~_+kN(p@q}E3OqwwTuI8Ysri5qI
z1U<IaEaFL-FG78sd&^|l?__2((!<NeObSxiufY!9h%er6i)Q9%J@5&Fq3o^PHGiL(
z9NcOz!*F?_&+P(d@&MiyL1H9I_q#~vmD*})n(EqdZs-ZMhXW(NcAV`*(Z`5Us8ydh
zE*^%Hb;JZAC%y*qj2ch8+rsCTOn&mCV+_I7sg{Eq{D4wV3IykQIQ9WM_C%<<nRQ6C
zWqwym$J=UFcLnk3cMH4-sSv@SQ$$mh`fCtY6Oz7_-Q{iAqZrTjubH(V{VR}Wc)qgK
z+5&_>h5PJUqAC`C;>Sts+!gvWQd>Lx5@G-MBryAN&RZ#z6hL1hz8*EfnQqeE2V2mg
z>{Gs8$cd|4tI9zG3T}d?zLaX8@}GxH@>e&r?8w`7P$ks`gf-XhzUR(cn>e>RH27tZ
zk$|gwZBS=7rKH_l-aX!n#r|M$k}z`QR8p?tPv%tY2;k1QC4+_%kJxEu)SaIy7MTI3
zE?Uwu_tVLs8^e>6B^)ceNOTsD@ciIdvaga>^yt4POL`uVL*59%&~Os;4BV}23h-9w
z=!+P1yf<YG6=zMA&w)B$@vI*@y{uMLQE6Hzkw&fBf2n4Y-0?i8t9W3!P0`bX3q2kd
zWw4)N{|tIBFN*09C(^?z%inmirCOGG$b~>MZ|$mzRsqXSqnXw87tbBrHQ;u|(=Q;!
z0H8lPddl-!fu0r1W7hhL=iZO^aih_r5a2pzhZZH1m>7azFl(;6sd3Y3z!&K=L^DV(
z=($$KD5GVYtPzA=M|z3wv7{2&fzd<_tA1_X)^#9`cC${ymQf(=^r=F#l1r+SOywKL
zbUh)`n=iB;sss^e9gQzxL~5|!P1&EpmanM)t0gPyDrnWK+vCT?cPy>lp0n;`*>63#
zAr8I8#8q4J1Z2={*p1-uQNp6UnZl1y=8%+Kt0d(1S;PGdY|h8iHz>j;Ib+5>wx<{R
z?159}{~}b39dTXtGP3dstl#`}{O-(}8=!Y%DR8%%^$C(ZI6D@|nSF_><EXhTaQ<Yv
zX!c_-KL|Q$FQ&cN{}dtx$F+sULcSyzp`;H$Beq6|!oZTHHnQuWZQQ#uvaGEKAl5H7
zwVmvK<CSu{;n05cDDXQ0xu1~CL)a72_Q9GDqaCI|FDzDqDKLxE${f%z&!RNDwN#iw
zY%X#crE=iO`F+oI9TI0hQ>l!U0qH5E#=8Mox&_-y)%4jD&f013Y8<t`u$EJ`{}1l2
zK(O*9#O*~skyeAB6;BM|)Fo@L4{|72`==FNcU9!fmM3@oUIH5Tt@ch6gs@TC!Conb
zt%vsW_yRJZzwu<<1=Qg4reYZ-<YU5<XcK`ag7};1G1q%##JM^dc7>(r$*Bo;kOG)N
z1w}=P`3nwHj^&F9_v+YE@AP;Sq~n`toVMHt98l*W`$44FdgVL_Fwt=9t>X~zE~r!^
zg$Q)uS0I5~rm3=ZUu=To_o9(JGqZ_zAHn8B4tc-t>|QetsfpWE+VF^(!$^ucZmW-U
zs5a~<Qt{&i=Bf&0Q8n=g#H>#xuOI+~#pbm4fx|3MOTAc*m3i+UqTe|{1zUJf<8qtp
zJdABUd#_Msue8vy1AM}$r;vNJQ6V7T-`ZAKQ6yx~f?`A@r@Ri?(w`L<o#UVVE2%^)
zd}+VeY%)^1xXe8<2FX#irqW+S@Fs?D?Mw!2-MgSiV|49LGFS~Vbis#5kZC>hT<&+K
zOzaBm!Id_yx;z4tKxMW?3#2kv_1m;ys8>qa#JJfnf$`3VfxL4MAawFZ=z)z9Pxrd|
zTJ<<UyVjm>#XP_49$FG+JAJD|UpLY#rE{5o^CKi)X|1~PB_YN7tS<ZUFY98$3<vW#
zWv(TcEUHB)+3o}pf;Zy5tttjK_{|U6zd4C}7}-~j&d^2ZIc%V+-LeETNV$|_`vo|J
zcc-;Qe>rN`FT>ML)VGg>Q#HSH_SV*^fI)1t*vjCYLk$W2l{SS#qw>ne{?na<0`$rz
z(YKKHZbs~f9gxOE^!vxc7Y0I+u0_vx3tQfwqT8??I#Q>_)_DUAv5)ehkq&gv)v^;t
zA-Oy{QJlY@$X~*3L>xV6t)#^C``AN)E2MdUmPjRtiet+gxA&>8!UL@xTbZvD?2)nO
zz_s^DaOh^Uc>9@U*=~4Wl(D==VCCYhEPQt0WNvVbtT`YX;OhI-)*bRNieEDG+BLd%
zKMFzh$y^oV4iD0fw(;1FoWEYY4EIe$+UhKV+%@L5+491o)MRQ-+^U5*L(&$aYQKuz
z?ODo}W6;Zus<k@R3=(Ehttx_2zujLi6du4c=l&&Q$?EYt5JLMRJ@p99K87*sT8jdJ
z$bMK5T#MUE+-<+*585Rn0Ej<(evcqP0KXh(#1HL^9aZl!+JwAmQ)S#6gIYB-5}(nv
zo(1SN0e>WY{{)%Bh)uUcbfBBnP&p)PVc{;p!f~S6mid9LJ{1rL8H42KlSl!%>oi;W
z&O2}xTKO|=_cs%F0twGYM>36IE#lHLW>3Or;LW@x+ZlLB;|$Wd^M2gg4f)mlp-^!n
zy++}R^~NP?gG2jDdj(l@&yXNTUj&+I2!0BvifOi0$%>8~N~L%O+_d~sg~pdx!r3o_
zQkuP^*ox#%r>_nd?+?Kl5EcZ@`89gG;`fMyuC5q$Q$Ju%cXwI$9I9p9@q&9tf09><
zKqZnjya$~|jTPHUw-)Lt4}fE^aqIeC(K~>6W#v~ygZ>gjR-nwf{?!*+gLw`dazAh-
zq_ph5$__yLH9W*QECelF2}W%%wcUQL@{A?(b!eqlyhcOt5zSJ*u>@p$^_a}kg}Pz1
z(PLRE)8pLKGQWW_%-&sMN<Y916CoY_AbEKBEvpg2Vz!tLgQSOFVHKzg^4Y-gl96!;
z`9kT=S0Y4eVBZJga~6+1@1hk%qP&|Bfu>l!ZThQN4$j`;{kGw+oN<sI1R_vkRRtAj
zn+Scv0u7}+(tEv@AxVc1opJ9&2CufiT~e4G&32*>;LR>_L8JklZx40FTFnBvAatJ8
z(R^AgvhRtLgBZzIx7jDau@xJLS3Bbkmoq~_p6EdFWHw<=*@}kHDuk8HNU(;7xICQ@
z0@QaH5CIY+BphyO2g~&uGP0+%tZOK{R0I)9dd)IY5e2hC`2My-w@>bi3}$1pK5J@m
zAkXimwFtROd63jn$qppGL^DBZ^`P46i_Wp}@6ZO#@u|Si`%fvGBUo0Ec6pF!O-K4q
zrN=BHHC|;K6a^E1SFD?nnaHCScQ$7`1dbmjeTc)KWDy{;&jkQVCh$73j^J3Ho&(R!
zaI!ugq1ppie*hAR_3g2@z)z`y`j6uX?f-SN454b-2u*W!pS=LiHYq~6*syi#tA)OR
zJ5~sGkc%Ec(&)0%fOt6UDGH>TvAFi$$=`|seE{i!Wry`#4p=0pFdW{8&Pa9JG2?jB
zL7DfAZgb}xEP7L%z<xIB7za)^{4A<TWp*J5TacJN*)$DQ1pKFWi%m&W6U$~Ol3oRr
zNJ}m?pQr5D>!_{hU*js}_v9}^$$J6VMGE1Xs-x0$2#jJe2L9&hBaBiKH3Gp`kmiVk
z`CZbGTB?F>j&y?+wN$#uEBBfj0*%3P;RtQZ_Qm6t5eEg-we3}7AeNwQF+Jb{sS^+7
znc-J2Ld{TNrX?aXg+rAONtxd!lnmm0m4ym4I~A(dUdWtNM!$KFLl^<$?D_OJ6~GWI
zP~g)6SZ*R{-lZV41bHe-Vkmh1)$dE5?4jakBw9qWR!0$Li_lfK1XBph2w4-I^w+>M
z?|=FDct7G)FW&3a^Eaf2i|V?FrJTEzMdPBn;g2pH_nhaPnuk8cO*3K2L3;9~o`K(I
zU>B#BQ=#`j%Ahim`i^)IhqQbii#FZa7s!iXGijf7_Dg|Hq_mar*2k+>ml)(==k;-h
z3|(D#s3H*^MBzx!?xr^KGwx{+s08g*u&lpcJ3L*Ocv(k79MWO}TnzNO2iSe(!4cL)
zDmmZY%Nd)VuVwtQr$WaCE4~TVyyq+5csCrSNwv+_xX&KuK>+^ouNkXaNH(B}`H`;o
z?t0QLel4BtH5zCho{u%RTl+os{q*&+-Q8D@Zs2jskdfklev<s?<df6lS7d{xuF7-;
z-q1B)$a;0-@)J(Y_RCLRZbY5Fz<wp|)Ril{9ivOqt5!3-tF~X<`*Y)C*R^sw7O#Hc
zw~bmA<+mN-?<nHx1a=a8vEwY)#_BBNCmOC#XP05CK9{2z!LjQw!>VLy#+u2zm3yJY
zVY+&LsP?q^opUY_JjtB+hL(o!0`Ude%Sz*J*iW`ce(-^2CwXym7Dw<OVeS1xb>d$~
z&l~u#HLsmZuz%tuj!U9x^y$$Fedyml2<i2=WO#??4k4~%4KR^L6l%Dl#Vk5`c%9~M
z-y+l>Mu`+c>5z7|VBTQWnP#u|gEc|2xkin+e4ZaP@-5g>6qC4ysC{dQ86?j5(&AEn
zT;Y15nt4;nr4^M4Yz@Vm#SI?62Li)E@!?RIJR!9inx!!5Mc46Nd9R5BMMWh}kV7%-
zp3?PLKdmNOsz6dI^eWb=nRG~!;EfE4V~S}@%6FW6CC!@$Y3<ynyYmJC-V3%oTv;*N
z5X)>c(qQ`F+C5@X>p>q1Q>J!UD3<Jj%jCq559v1|(OJC0o8(g{oZtT*F<=u2j9b@F
zoh1;6ogeBt^@A1LLel#-_R(<e^0kvx+%KKx2l<+=_2gR~pfvs%Xqcv)e5e1m*x?MK
zT-1S;r$UxwohyrA$m-spc(k25el{iae35&fS9}6>E;Clz(?g5id2uAfx%I}Y>mTwq
zZt?8z0~Y?;H7}eH#settk4Gmm!*TgoUwdr395Wt)LR)e?yd&apHZK52>Ox1hw(K(*
zU%s5`QL>HYN!Hx@53~X%Oe0MR8Ce~EWF`}B8LUy9nj`$8j_C;1J^Atj&T~9YCRtiV
z%JGn`n7Q%lTv~=&))NN7HQt`-f_|>Kss=^cBNR{nT7@41unG#~c`6JCX~iR)j^K}-
z$Lb1(4*Vx1jc*)^(<-tF0PjT>ih@xQT-st3=d_}^wAp2k-SoyeSPgFEiEOY-?uRN{
zpPa!H7+VOX7ry}eQd91}c?$dGkl*ESSP0A7j{$z}AXjgu6+c?~oGFnFqEdck+WGn!
zx$2uEWK7t<|22DK9i)}M=nZSrdW#kRrNZsqo$Wy|tkeb{gCAQPJ=&q+y~U)#6zR~G
zt||*`DrN4QRs63*4q=Bx;>|%#;={uXPN=G=9o_KfC!5i=uB$VVCmUz_N={?Fl-4bC
zu|HFq|3v<+r0YsG4#80W#SGnwatbqzg1ZTsG&>2nm%V=fvtxyw2_t=u8aiy>eg$U*
z|GnJBn%H%yk2LJIc!)h*7a9J%Mqaq@6bUu+H9t;`a2EL|+GcI(kujpSDJy+JPf!m&
zh}`?(^-3i*6m`|{=GW)4urs%dm0l-#0`}DU<FIB$u-c1@M8{GO=@WhHY5D8Tmzlh9
z&or;`noyW*p_|06yl3z}Ef@7LHPPj2DKsd5BT(y0NKFNRc$gb@D`(rgG95$rRsh)%
zh4Awno<lHGmSKIA{c{ub`u9R-tBvq&kI?d?oNPWkki(~d<bQU2#@wrbz_bQM60=9s
zbtauV$%CytvW_>5a5t~~J{ZqVAhUn=^7rg-k)uA+IXz;Use5+b|L2FfCkR$&c>9MO
zciD}2n>w~<RS!9thwG!m3<=^SoZl8g59R}3znmcuprNt2RMnmcSsmR_a{aGNEJW1-
zuezqXa+5DLLTF0aVedrA_UsT2N66nRkAp%E#)AvL4@M^bZ9;O1%fK^b1q&j6%=i$t
zSB7c^3*;QF*T1=fuJEVQ1%6ufJJXgsyTx|nClSKn`z6729HK*GpwgZLF;BlHkkY2~
zQ5*ibL|&^GCiT(W(J~=%^?}$U56R3(3<56hD+Ck{Ejb!BRQm(8<O8wEQ{GNEb5CLg
z&7+QBmAY>_z}w$&kQk>}^_M@t63)Ws>kwlJZGN6>7TkRsM9HB>(fQq%lmFbCGlT*(
z%r9N6y51(o2wNr6@!<s#Qk^A1!t(2W&GQb>?tzyobp+NWEHa7TxGCnkO7d08+DSw2
zzjf0OEI5_KsB1-d1OjZ#L-?nD(0EJlterok<|{G}kaZAb=T#12l|yl7HIid<S(!Sv
zu~g$#<h*EP4Y)Uc^`ci}g1CC1^0~LKU_2bbx}dLKcH*6n;@uKAF&JMe-2@C+Uxr9x
zDELqguTxuVmS)Ge*)4{@mDC*>FC2N6rEikaIP%3mofz=%T>Jq|D5w;rx8I?932-%i
z6Tmm+<7ffS26vh9bNvunL9!q!&L@4-{S_57qBeJa6f1a-^q;88FKtQ?4+50=+>W&V
zX~@8te=A{`Sb_U$ec~*8@@odX9v*zgGNG^5I*PZc_<KKSNs-OC{cAIV`LIi0p1_)e
z;#lsL=^j?4+x6!g<G_%6A26I4JktdUD0T+u5z#f=oo8Mz!gB72*EMezkM@<BKFwfy
zakU*X9X-4?H^{+WFg0VH;*c1<5jaD1T&SA<up%v<;9Taj$G?mu{NF1KIfK|qduqs-
z0-xy-y97Ro@d&v3Nf*3C_%EO6%TVWz6*|uSR*CN+?`1in`ULTU-VxO@mu1vRG|G12
zH4&qf7WBPfej|N*#vr?8Ck*TU{qvb0dWAJWc^FumRnpPK+j{TIJxUXd;T8=ofq=(#
z;{>IXUS1WK#kWA8;oWgmQhRxsG&b#(ZiUCU2P|fO=rla0jBotXP|(ZAWOF?PCuI54
z2hB|Lj_fzmGOv$8MeTL}t~>{WAVwcr%8E;>H{eu&guT9fu4BUZ!`~8i`y_J8+0N~q
zG=2hHk{|=HHu_LNsz1^Y%ZJ5tB=IC{4JGo&Ud|{zzd&*Q=E@tzB&lyt@!Fru28HWf
zdvM4PM}Cm>yaa7^@&`fe68IYeHOOCYd3R#vW!YqVR*<Eqn8%h=W>cjvA?!0iyq)3s
zugUh+z4zt4!#1a~ci4`Idjs$w7j_I6TMgUmtFgtX_bTdo!Mupz-ODw-?@U#0(dkIM
zX&Z;eHumXGx^>S`;erlMe(%gX6wZ9bo;M+hW-;s8m{Mp$*cU9VbP`TH4jPFP&iXQq
z0A=VEh%j9G`<SPZp;6)NysV+As3Bb0sB<{GWbYkVEL3PDoL;ZN=`y$p><MOmawM1k
z_|h_B;%~pdoJDjXr{``7=bYA==2MK7H9^!tMvZX+2DPD|A3W|d-B=nQgi2yL51R{N
zNGZL6Oe19HkG=3hb>p!FC(#O;U5Fqqk>4N1kCSl1xhmyv3V_LbmD}{1^nH7XYKGcp
ztXJ$<JR1*VNIUC%`k9${<Ufh<L*Ip{Gjld+-*^uv5FMm-WiE7>*06@=T!GMt>=XIi
ztf=kC1wP_%N&Q`hgL*249?F9$VioYT1r0Oc`?AX}+}vMwY_d;*h?N~V{Oi3uq-)?f
z$bzA*d2(7#pI3-vsQ@&ZKHPTF>U*i)#NCVTw?KLtf?k0;q}<A`@y_I;;e-v}LE25`
zH>NEt-ge(e=x^tlwUNn1b5R!A48PUA>=PC*W}i-Bu9l-if9V-?PUJgIClPM)xAZ+a
z0kc1bb8XU&K)_UQ=*lT35^9`drqGn#a2{A3H4mA7Ohaz?&W1(zE-BUvG5ZOm)B5!t
zvR<UWP&3nWRCxDwnNFX}F9tJiWT?pp3P6s-!R(;y+E|h8Xdv|AS87O-mJY?i<<8q?
zt90tL-=DvC1KxG?=U`nIV-@t3u;X6>xv=jT&`Nuh(g@#;UxOG{pNV9Rnt{L5=>7Xt
z)-Pd*N5K>+GzBTy2yFeTI}|3$jE(f-4xgMZY_3d)voeoyDt{aOk|e!cdD88W#8;+P
z2#7eWDcEnkA|W?}L{BCjxjwW?`+e5GF`>L!h)j+p$#}CE_?%@ri3^nP6D@{EL19?*
z<a6dHq}}=a1iVgocnt(m45YMa_O{?aJcrf~3#VRE)e@e5EA0FEO!G_OxWD`xL+rxD
z*Y>H`pUI3<(hTIjw!c=B#_O`I4S(*4_nDs-v&6Z-7T|IS*1@<|CsA9FJ7bZz5%<X7
z6EeT$$-0$$5gatex4F;(Be4)*vueohb0WmSdj~$D9TDHWx7){+XbL(<M(V3|2II(Y
zFuVMCkFAtzB##|NOEX9|5LW^rSqry7rSVK}5kE^SP44Feu*ba5Qf{Ovld@E2hwtmy
zPFRMIhfdWVeTEtmQKq6fiQ~F5MGAjxh5X?%qT}lmKgb<ACjwuxv8!dCc{CFh6@!Iy
zCX80_Dg)NJNysq~o#q0!orq)4vCwaM&%}Z_NZ(%gMKO3ov!D|!trlHxPpf@`=>7zR
zBNXM(p#K|3yzyUZ-X~$t*^JrK@~JbpbNbLefMOmvd2rw^zetdDb$}%&`kJFFU3B`v
zG2<s;C;2o_gzz2FoqCfuexH!*vWGra7$}l;^ob0W?TzIcXt%*lc>XMH^~-Y>v>F?@
zB*Jo*?iv*y?po(SU?OCQN_+ot1a2cs5H=3~gBU<X9mnb11(w(k4-VH)Q`LqsMl<k1
zK8DnNW9c#5^JDK@zmLHqt`XdtFm(MGeC)aWQ=t_F;vY&BCh^DKfp!Q47YXq}KH)rW
z!|yotR^lrktCxrdi8)guZ59Mg-!cyUFV4O@p6Wh+n`5sWBReBy%R2U+$;^yM_FgBF
zP&u~jBqL-;krg3iZ`oTiN)km9n$P=N_kI6f&-2goyq<r`{kn6`cYHqYab4H@`W(ox
z{7-Ht<wB}VD)<9B!A-4Y!0%|GvCgG;_SIiLO%pXMeyPbn<$YcpqsSAh+B)wMnC);}
zWS<3bB{#4FMOT9*yx54c^vHg_K5AgUk8y)`$wufFnNJ-N1w#z1o|la}Cv9kFPwq`X
z7q+S$Di)C?+7o;GFuC^oKT#lA075_+t>0=O^dILPnN@j0GZ;bUKo2GyQHQYf9WOv%
z%AF<Y!P9j01CM~&Bl<TVkH)`3eH+W_-Jm*s1B7J#75b!)*qjSh*0we}f-5dP4-mu0
zx1xMEe4lYa{GF%Qn6MXgBgUoeNn?*p%V^_FI=B4CayYhR|2D)$e;VS$zYX!G3f}u0
z521_w1=a<(D_g(mNb$S8%r;KO@lw^l1I}GNx`g-<)y_>7g*a%i`vV+wHKDgcO-1{&
z=;C-zxLZC5pEb;pzs3M&i(v{okPK31gu?tMvQJk*BbagCL2rx-cU;VU*$hd?E`m;O
z{{=+3Ke=$@Z>{qswQZ#{+G%+VygFk5`o6Ua(m@930Uq(0muE1^`Iov*mZtRGm#%!E
zPwD>(Fl|~mwIgFo9TIr)HDmgCR6=m!;`3`hVgXwgma6rhb6qe((+y@^D>jidXPB#>
z-90<}>*sfeQ{;?i*y!9FUP*M&MS#-*5+h@1;kDe1m->?(43tSrWh8*kil9`v%~>Bf
zCl9JTr|e7sw5%Y)^(B==mKgX}T&T?58UZEpnaHZrkgJeXKVqT+$&JkBGQ=D-Ds5U8
zD=;0!x4?!+EZ_Ov%N#LU(s@cBmLGRTE-Q6J{1=%>6_IbI6#M(lB4oDryQV9wig|mX
zxbK4A!*M@Gg20E_)1?Wtwk@8<d>`bX(O+bU%3u)f`T{npR%&w>9vc|tVQ-bvxU$$-
z4)|`fXtEwyp<7s%v84*`h=~jnUd#g8t?Fl)dBw#sf=>2-B9hACpOp~*cO?XAF2i1C
zG<j%n9Q;Gkz{e+)Ghm$rE<6#6WDX2o)(pQVi@(Dr<(VN=3NhIJ$Kh@!BTF7F-)NwO
zMa<do1`OK9Zl?ezO*MQ8wOkRA1w5(r6Y(y>u=5jDcIwwy+sFS)2+gM;0_e^^f@p>m
zf8KP55dONeW&OHf_@)~`>@$zU(c!aXkp8OlxZ*+!F)pN>x+brht~%LWu*OCX&HIC+
zuZ&5ciVnomS1MeR7;y!z2|7xoCW^9uEbXuZ#fWaTbN`*w*yGccmoEoRJ@tMye?O{<
z;oqEyFAaB(P4zCQ`FsCO=kP9H<GB8YraEYUbM`_&mulmu*6$iGZvNDGV%@k{u5`zV
z+k-B!b17s`KeIr=T?^N<Ky$e{usYi%<}^__BZ;_zQQ*Ee8-wu0Q;{s5E|`n^n^REr
zqC<23YqrqZe`;Q-e{LHMnM33a18Z|p*)}!U@kO~V)K>(;QC?1>wpn|GNW-@~s!rV>
z1}TAguzoE>KPX)r*YkZ>wV6B6i%l`e)chvYLpmdk@aTeGXOPY`I1jV%Pa?W(Ed4;5
zAS0c72D8ZjtaDN%{M?LZ`@7nB$v9))!o*V-^o$N7?z5Yr!qSH~ignVm%M+cLNfC9c
zxXK1ZNdcYoGs4R)ca?Ab(iR7RV5AB%<9FktcD4vRr8AeB?&e-mqU=4(af3e2#1CSI
z1;y>-f(#UV)=yoHI4<@G{P*7fk3yi07wI*{oo+9_jJR7hz)9csxDa~RRgfX=mR%hy
zWx%QcmBb|<w_FjUOk)NC?{%L#Uj%3H&a&!U3}6rEQi%EU-InM=r3=aV;#IZ>IG0Xl
zh<>DlaJk=VSDx~3ufcg7p1X1W+sSIk9Vh#*G@lAxzB_B~p!fZ@FqHIEv><#>@5@zu
zo<1DwcnPB5Bjm(Ar`I$Tgw-wup3tY9_=%K1m`_OgL#0}^nI?EaYr1go_1NLFfcQvS
z_kivB;~uo)@1eK1XZf$USJD0Rz8U_$@AtH!oZ&)x6pUx$yEA10?s{W?3y{c^LAJYR
zdi6%WL->DV4iIX6Utam_yeU+ddT~@=j^NulT3dn-2Cm2c?$Eg>#2<cv5yH^#9?b@#
z*_$O1S#Va@sYF8ii1xyDSoKMwTmk>WI!}>(mShKe1TdXU0UUrpeRt=;Zu*K)-I)Q`
z@uFBHu&Rjac)RGNxqs{BW}ODRBYp~?C_)1PfF2hSI^|-VH|D^(QipHI5;)n0-Xm;}
zVE*3qdlvvl4ZTv!XWo~r!@Zl{^YBk%e3lk}N7WGh7vTRS`|E#`{lpm@&}F&++h~D{
z^*NOyqo~R^uby8^xAMp^pE;xNk{Es#HSXTY8Q&fpRjm{OZSc^KD+Tmj1*&>1T_rfA
zrn-{bQ5aQ1g3FX}(Wxe`4vP-}MY2wk_K(s3u%wXF$k+Y&izIEM+feMQLnZq6?QJAr
zUDHChR=F`=9H&C=m32S}s3JPJOpPrIdh6iVq*NY7Dupw7wpLU%@?3*-l=$v6sl?T8
zuHVpY0cNC<wJsxx9+Tjr$P9~oJ=0}#!nQu*p9NN+jvzP=`hWXMLDb>6;*~er)v!V>
z=C=SFGF${REd(<u&`vo?xIu>l7&|UkF?e9!s?Pm1XTjJnxNNN#nZ92(j53G40K2Kd
z?tBUBB_^2Ilr{&=NFu#E;?gwJKzmxJufp)K5g`6A9x_%M|8!O>$ZaQ87yo(DZko2x
ziQ0qQ@U!kFh#%f?0aur@kkJ&dEdb$M&sT|dw$bk)Wn0s&-#1ePFPv#~BD~z_M(^sM
zOEAkbSL38B=Ch<Qr#JNKpSs52o%7jxubm82{74usJYXV;`Jz+W2tkH>F#5#b49ba5
zjU*FvK)Jx*#jb^#y}biX)}W@WarG|<?<b+GKvk12i<wl~C>|MW?xm|hTI>d2c}MPK
zF%OuU{w1YdJO5s_a4wW7D}4E>*Zfl&fZFf87>12W?+GTTS~plv?B4Tw-Kxkl$=A-V
zT~&8njmmx!G1sJd*!A(Bv=;gpkc@`g(yA+eSAi<t0yPqpa|2h&b`fSlM&eIdVNw!L
z?+i}l49wDD>&iIk*C8f}B$PH1JGuF;49aKVmmfT|ti$UN(da=?9QLT?)rp6O=}VHB
z8)PBCvxw!d!&Fkd2VZ@QS~)Jmko#Xm*EbuA?B0}lzGPGam9|2c5vn)w1RbEDjZ8AW
zd3v{k-}KR>2wO=Rm7eFqf|*+k;K$;ppA>dU2yTHI1`S6@CCoNp3x!CrF6diPS~#8<
zzr1Uomh6Egta;z?NiSEy5{^_op}M70G#>&3PZ{S6Yy9}}gfBi#R?~-mg1b8+=kz~U
zla6FL<|?OM<YBN6%k2_D@_w}noC_^P_9hhpYo^s9TU^4HUYdZ}Z63CS;Dl{G_2^@1
z!ds*H;fP^B#=GK{ej!Q1tu2!Z<~`=Ui_A6MklF;N>;s#r34i_i?!!OXM&}OA09IHf
zN;byp%k9s0wic)VR+~(JY7A2($0`P0Jz2GdpqAyC5Q(($VkAtU*}%e*l3<7^F+m-W
z5E3Ix#fg~B(+gH=c1gwlGfTMew-4O6k<ubTyvIx@E$?@)LmW#4OIopR9qEA(sbq|a
z`hO2!`#!XKPsf~mPZDKOF7Hkj5u)*!+`Kp-msSH#$c)E?bw+|HJ20QjGjBkWuXFJj
z^-i?P-uf_7fjGWJ%1R^vDpQfERM5MkM72wJR^PH?-Fz>HwZ{=JVV>I3WXNB8LhqiO
zGw2$V9$8_C_CQ-_{FmjEk!q5E<;sRFz?nElF)ZP7RHBT;&+iB6m!(&5xIk*O1C03K
zj}_}O;+~ela^jRC=^-<(b2odh7~b{><AKU}E$Vo}s@{`Xj3@$6T)qwvA7cLK6(@GT
z%Ly(jP3_L)TuV2<d+|?l8r&PxNfS6u=T6Qbq;39R==}-}e|2yFTp{16%%H?(@ro{{
zUR3euP3kJNvd5#TIqqri?AHL+HSR&YB!Oum+gf;LNKC9X@5fXlBk3`%_etyfq8vF!
ziCTqtO2nqw0^>HW@dHeeYv{W~uS3}F;(wTZN3~30N(6;Y<(lL)PZ;}KSVQVxzp!T^
z@=xDI82P8;yp436otC^|zYH?PHRIV8FNVNtuSu1?@&*BkbN*C4LCzONN#PVnb-fHx
zRVfB5LR0ru+owwp%#HJQqo149bxc$u<pILlX_HFrJn4tPU}9FqmPCe}fBJbTl0))|
ztJ+fY_2P&0AH4vGYV57BzxvNJK{Nfy#ec#R^@SYm`--=Ig&;Iv_0<pNbF$6l_xMe;
zdW73`sb6T2xD3sdTXG{T&)o8lqrJfj{bJqpV+JH4?)*e758C7{wvoPfbEZ-REl)v=
zWyb1Xc{}jdR#<4j0|nGs98i+Mob!0Q`ec%`J?H*goH5+vXzby?S^y+ull{RWSIX%2
z9s<N}4;5~;%KbyR*m%B}%N=d;?W`USw#%UZjX{hj19NuryXjp+LO-gdJ-GR;tkmk+
zlk*~<ffq`}y-ewjXYfS9=1wTy=y{Pa-FhRKf7*8If7-1($cx}XA+;aLi<d~NM~rv#
z=l)DL9QNoKy_41m`AK|&T7%`{4k>4YGeGB#os|u%<kdja-S(7<9%C{!cOt5}E<imI
z--b|L2ow$J#eSv()FYkWvz*uNdi!y<mQ@(;JQ2G%QKDabo?yiA-v(R*iHgcR$RDL7
z&kTe-X^^dGs-oYO={8mn$)pt0z-QAO=qulQFAgX^T_fTsMy4*;yg6=oPV5i!*y@50
zyp5g*c;zbyS2_8?O|K%SPwFtvzC1;UPS+xnjJe?O6kto6rQhhq+<$i?&7aJOgZ_+U
ze|ahvGuIa*2K+9LQdzVsO||3!^jHIo!6mlGmKs>B6ucK@#sKC%66{*BsBt`3CI1Hd
zfqyiRaxr|<w=!ItAc<ubqzgg-@d@8%5Z+YXF7MDq1-7b|s}2bluFMhV6Wd6}7q7pK
zsXFtw&!B>Rxa)vMFb748t7k4g)$X}F2gt6cJYD?@I0!-;<31T<Wg8}$eXruxi(4-m
zU{cfRwm;MOF=^LPPC~`KClvIq%?{g6`a3KU=oY6z21_p>M~k#g;POQ7w>K%A4E4%I
zyF)bCZeRhLhvN_aCX{rs)BT^KsO<u>@Y@WL;-tNF55yVtq_lBDJP6>DaK^f!YGc0c
zF>)9#a}pO*r|aXi7+$z2zU!V34RVT}(F;o=we#BXqQYB8ZyAjsdGi~0CcmPbds(#|
z%V2;C{HR)*DSyq|ngmgjTdY4sH0;?qE|vd${qkk_{p?&aC~D|P*EgTq`6vyPM5f|3
z^MN`|1gI-nEabzvexOy(5+~hwlX6z0J&fr6=81y*-ynY~J{W@7&1Inc)1NuKL@kn>
zYtzp863$i<yx@n(t7TYEG7Xdvh2wpU1mRGj#q)p)Pf=99$mLdR(?_QKOebswCr(z`
zca!r^>OD|W{0Q<LWo4epyL?6`D{8LO{u88J4WNFTAczR0jS*ZaY9D44@qfBQa|k4C
zH!yyua&LZfYn&pQ%iQU>*-0f0P#H8B$LDHYs4w?&FL4**;TvXw&Ppp=JLI4`r`RAz
zE>WU=zsJ)h(cx-c8(`(JfSLCThgK~%`VHQid1NM79JEW>t;oO}?%K|Yx5A=E6)^=Z
zmHf1;=QU$7_bR1=e-JyB7+62pAkk{la*OJM!tO*kSd~G0axBa;qmmFl^mHV%kXlL(
zSvwkG2obiNh^1(>Br*xl<_xQpC!c&2UMbUWs6a(RP7GI_H3;UGavOhBR6)lqmGUrH
z18+ewrOq1yW=#*IzS}^v-bP?7bBZ17sAUrDv87VTbqG}Mkon*khZ38ILT;u`sj98r
zzneND6!x<;@z{Gtiktga9q#@Rq+=EYOC3%$=^?m%r&95oJ-WNG*jT%G(<n~`|2Io-
z)+OFhPkjL<p5|>hoMt5`N431o%}2xAhkRA&@P$<8^D1~=_VZ8OKBw;pMf6dv2n(tU
zv6AK=ip1}6Htg9VP)hzfMBk(jr1b>cALxv){>vnDwjqU|SRj1Bap;!<id^BE2n!h^
zZLvtR0yCSG`3&hh*{(~?jT;NkDpfc&E;f)YzeS7Ch<4>&M_guJtxyKNdn$c0@*^d&
zdaB9qqjwxw@WYJ->a(ArJ$x1L9)|6OXU{yjG?)WlGCd<*BOq^a#W0Uz?&D0iX1_&n
z>{&>8FIogm*;=vv+XkYvVRf;FN8R{%X#Xx^*W&>l{ZHXFAn`sw{shZWq~2?RXXg$U
z#@q+*4gmN`102TwGhF#Ax<Bd=dZ*<l)aveY0Nz|TzII*^JHWaNw$Z88xvndm$gw0A
zi#(2>!*&vcu4&cMx=g-4vZm@)Xk!x0Ux!am`d|u4rTF{=1Ug^;do6!;2QlPOqIf4@
z#A{%vgO{0cq_UNn`%48Lp`<C+8Dy)R6VBWLY7pRo`wx6MTA+^ieuCr}8=ri9^#j0&
z{pB7w8%9$ypA2_$aravvvh$tQ<8lvZoimwYdMqr6C4QC1nBW|qY3KI~HeAN}T1eXs
z;{ZmUY+-<I#sG^9ms!vm{@XjuAVn%gHQhpXLrZ_f81|WUmrkfmG~<=7Ox!y{>s=sO
zgh5W_eWXdv|I}@atIA{M%zH&7l=T@@g5c$?5N0fY|CZv_4puS4T!rdGZX-vgbtQ+#
zboC1Df;7Y<S-@y47kTNMQ*LO|z0AZ3`|Ladb>g*EIg2OOvi#%A?*Ia)ba^)?_$O@R
zMA6WHPV>p_+P7D>dM6~xzg5eS4cy6t;gyVdiSS#U=r4><rhVJ5xqrAXuV`E5O|!Ao
z@_cBPDExrF-f?t`rf-rpW#Rr7_SmM;tkUh}_J-db%O}F$`G^-X>Wizj|N4%oNFrI%
zaxd+d0QUie1^!XEVvnaH5|JYABD;#D9x&|cr+H=5ot3R+w0j4EMZYxSLA28w_XnpX
zUdTiwNCbFmr&Ag9QPA?GF3j5=?Y&EPtJ}S!QhS!u#7Bet)mIskBfg^7S}fk4-1^<|
z3kAlqNxj6;c+D0M^)gQanl;>@#mH~0K`uG_E8^C_dr5^3qV^&iDPeKMI}N#>%GUSi
z_kLv~Y`oB=Cexn&+2-rDo_O^VQ%^2gA?EU{Fn+(&YE*6{h(f>Lkz|w#IrJ5S3|2A!
zFxWwSSF0IFd(Z188!tn*bq(;|r=^Qr&i$d%Ndp*hnq}zDW!Q-2px+6aEX8a;dIo)G
ze0OQSFZ)N?wk0GKs;D4w&@T^NDtfs!#LW!eikscu)+s`~D9$}N@fT3B4VxrJ-;m70
zls3>=oKF6FJOH%1x}TVPl(lczsO(^R?^c=UjN~d3Dfqgs@Qjw3@QOhSc`665)fQ(v
z`P3DS_)Ha@C#Ym8Txl)0pX9Wm`gaG1AiS~H&U;V4)vrA7&N>0pljww1^<*Y23;|zi
zCp=p?Z}`^lU*e>`O$FI|EzYqw=^8*W(j7)b;V1zkRF+sMa*tjWsZ2k3B7IWB)LVq|
zP4a@bKj6e&Fe&57;iHsngK;aQ_!8S7ty0uZtzehNHIYi|4qXu2hEmP$-y#;o61bji
zJKcBPVV7^@oH*_&5w4P2Cioqu4tbEM`wI>$=|bT-$eSRoHEa(dsL8iHJ1sBfkukNn
zRE_?I5j4~*4rSH{iP9&ZNgv7DFHPpW+?#sj^rc|ex!J$m36_y@5$?h;oCwV}qv0tT
zZDI1*T=>NOmr{RIQba1OO_84Cmp6``kaK)4aC5&dLEt^k_tM!ACW|KQ4RjzaKJf|{
zLCRTXdv@FJ8UXLzsShX7ND3Vbb_Q-^@$0=6dhIFJ6t!znZIa5b_#s#nF(#~sZ>#R@
zcwSH&2H2SKf&*|2Z=#tL_Wmtjm&GAqogui_{jf$k7w(;v<}r~1gPnGz`%Km46YV#5
zFCTYV2f02yaKm@w>VO8D6TaQ683W&1a<R@`{41rf1gfkx`C(9n%d}kX_Gz5yyRe-4
zvACXN#XB4XSgr0-v75=NBV0|Lg~Y#$|4F~0x=2PLlzL*}S?M-io>}qeaB7C3q==-#
zrsX2YKHn9sp*{Vf6g@X`_K_#41k-bb7+(WDhyZxX@y$Q;5xD|y;wdFN`Vva(Qzk;u
z-hTRYh_t9?xGd!8^D0P7UY4unR-DF{?qc@8YlFFX?3qsj0Iv@SKfc;|)GozW$R*Dm
z!4f$AFT~Ym2OE=(IIBwLIT9`T_h{PVn7f#pM=hBNLDOL#rMLljU=iXqz4ZeI^mLJ9
zQxNEB6?Eo9%{U>hNy7Lg2}$=!G{D>5rQbg|z1s@nhQdONGf+s_n$Xv<x8*yafY@S(
zF9ld4k30&^lX#~mZErX~1u3f*xg-Fgl>QuZfqx6!3Z%ewm)UBz*m>-;UWg<50R?jQ
z`D|6H*;U7YOG;PX#AHZ$WwoJxBJ2V%DM?@MAoc#nOyz@yt(w7x9R9tPSPe3GI?%9L
zE`r5)%EmLsoTncW`nL6L;-hX|ZBwV+?H)hZl3PBNE#FV7x%X*yl`@}r;X&2gIajks
zrYUMvtCO7A(9|eu1B=>aZjQg(mXj7<UZCVqV4r!ph1lTc;godkSf~ua2(!EuEByzk
zx2%v@cIGQ~32fX?*SL(F*>UphilIU6IwuFDYM}y1Vb6$0F9zbCP9%zQqz)mAHsh>v
zs}JDuDF+|ROdiTjzxNOCm!HOCC{n}^8N!{fg+8#M{+{3kgWH%xSujCidmsIoIO?5<
zCd=968Z`3EMiCBH<SglBRrJECl-<Eg&V{&0%GUCEF(H4jnYr57cd^PA7fM3a8e&#@
zUiAxzd7vKGelwUpVql~YMQD$_KZ~ws9y5vlY&^+uRbsc;h}lC|IbU)YvfniB&**c(
z;t^k1<9V5@X!V2(urK<V0nj=0L}Wpc&AZLc<nZ3kOD%=R>#o8dh&)UK3Y7H1$PCi8
zo`l-+JC|=ru*QNS;Sy({qkuV}RMkGOm9CR_)glzw$J4!^+)e?JS$II%rM5RSWjeUF
zJ56}U%TJAs|E62w$E|ihJR3>N8<gj?qHQq!q`RNAP=I;gRT~C>1!j^GfIzQ&Fabg3
z_{9qu%5p|UE6qrQjMGMP?pslpy5ZP``{NvBLOmdI%QDAD202_ON)nKg4MaH+Yu|e(
z-02O;Pr(%of$W_Jw2UYEZW88?#WvF*tgJ-km^R)X9=qgwZ;7I_g73~hB7_ydwKmxn
zJno957BWfDu{<*qs{h25n>)uhKHJe7j&v)WeaMPY42rc8g2mF3(uaBL4k-Ma`ckNl
zOJ?wLHBoA$g$H^ddFrZh8tm)VM8Q`!w)AUm;0v>GXmL&!u{?~j!Qh|prOImYekJ$b
zD4e4Z@zSrZFoc$2xTe_B>ur&CVLN9n84}T#5jF0|>bf(`6q;Q!Vk|Gdl}MBK{G5BA
zw)4r-viQ}@BI{h%6G-H+taa|+x{Ns>J@r|uXmp!>FE5=kiBRDd*XBZkiMmKuUQ*MG
z?bpy)>XjZrP_a)?UBy4H*I-<%!z(GCauC0(avSq2QP|^8#l?t}TQpCc#>}a0Ztm;f
zpx+5AsbMX;8SsPIV#9N(S$4uc@TLZmF%X2~o{%N25$#gI!d8NI5{b_dNu3lSeUvsq
zHIHRB%geI7#2TCJUts3(W}5ehiiQ-6f)nR7cu8`9{osfjox;((ePMB_k3x`gHYAO5
zCG#H-kKTlX5NYM3FW)ff;H_%wS@Qfb!(XVqPT#+g=I|v~HMtL(8%^pkYDFm@zbpxt
zMA!MJWz@oUX{x93@`;r#yUTEaNiI?egyN^G{N90@a1VqV-N&OX0@3bg8sE?k&Pv2?
zmT0HGFWm|Gv*!|F&nZWBH6Uxtm=!-pd?y=6l8_ozF5>Ir@|BUdEb4_2yjcKG0pS>J
z=Cb{wHI&MmR2Wtvt!OLU1QdX!LOL+)*Nju$XPyW@YaiEc<3|BWs;^ht-qh*Z_u~tI
zV-HP_Yi8Z`-L?I1pDSzi9U?VkSwg7nwxhd?%Z)E!l;RX*&O$bO+V)Fs`{0RR57tlM
zzfI_?;9C--_R|lPT4N5SJl`WD8E?X{Zgf!T167xUUg=!ZRSzwyeC<>krCdD(_LoDX
zf-|md$2l%<XD^nlUAWx*=m3OV=^r<|S;Ezt&PiK4v4)#UL+o(AQgB^Y<vO?V#@Bnn
zvmTv(%zhNgXm_ZEztS8$y9GnD_vziEs5#Eu1gEB_tm+UN{Qrgq|C7tDD&Zo8JsbOS
z`%5GBoD+l4Wp)Du_tP_~{wm`cYndlr=5v*ct_m-@o~ue`J1qe+eZ`3CKI|A*PzQUQ
z=vJgZFutgO?@5GrZBf^amrnHc+3*2UE43fu3XgZQdqx0}+`b@TQOQGi<xL;gOT`20
zUaK`zPsQgahe1&dCx-E|eIlWtl*S*rVNGBsN?Q0sn>{LJ@&z@ho*0fkztt_yUn0eP
zKlo%=Y?LDN6)0qTh+_rbubNUed<1e7fNASOl<@LI2)MsHz(hYTINo0I4!WasoO%+6
z@)@Kvfv#uS-Gy_Mv+dNjBWm_yl!eXHR5Ij<+x)h$#!m;)><YEJo&}os@odh&Lc3Pg
zc?0sE*jC3qu99~I37f4fDN-Z44Gvyn5x@~ITy#6NEQN!yA|L@4k}X@m6UL*c!tgJY
zTa2aY&{)t@Fz<&yZSi$_m;zG)01Y!1Y9-u>)(7Cz|HHKK96$5V9F)G+;mKfeqO87S
zit-<6@udk_9yYtgUozkshyCjo7SU`$j^X044H!Y_P3!gnwHBT{LY5c_Qu3_`3VV1m
z#ppr}rXP&T7+UQ#JbOahGj_N**@)}p)g$^qrt-Ynxu=igX?7UQJyLI@!u6c{#Qgju
zNbPQ|8;*JutI&O+as2)hA@M-0Gl4~+U11brB-8X2b=n0=1-qTk<;@OkKBp++h48xy
zkm$Vi8!0*AaJ_Xo3UbU7?qg0w^sZn5xpg;*Fq^@3aVNK$8!mkpOH0(?PL9E6SCD!o
z4VNCGK5{$f)DD!K2o<6;5>Y<1b(}zq2c&ogQjtz6ZRY?9eg}5;>PWX*pb<;^!~|$)
zxz+RLAS}DvIbd+4-R6et5&o3vh+IDhuTkjZ5*51Tc87@@2z)60rF`}rYR4cDXd4I3
zalxK*O+tcnC>Z0pO;2qH1=6!am}$Fz4qHD`tm43WoEihUsPnxF>Lso3#XMDKYTkM*
z?}uVp9x9?N$F9R`f^g9?BHEl+;csJ?9tjM0DC08daLAf1Z12;y*kf4pVx-?eON1W0
zC!9(TiVogK7b-uJ(U4<uZI4|#4Npb7Xe)VyFdM+Vm*?H+-2fm|NI_jxB+3ds#&&;O
z5b{Fns>@pg8qtSy0gtTjsiQK{7ES(TuA_N4p#tcd?7wc!WzwgQH^LQkbcg>|hTq60
zKea4`4-k;2&&8IYx-WU{0hS_qu^0qen(z$9S8%*~K96iJv_?J`$+cw8=BrZhYu<Fu
zNAJ<Fn!}MwP2uXxY}o>6J>S!(PRr7}j(0E|rSgqx+$2Ybb|M#Co<-bF9K{rZ_D<%*
zS2nmUgLXq$`a}-pkD@0S7z5y$bSQOfJzzy@XVo;PAOu`{*Jo)*i@GoE!vMai#R`ug
zyxW6FDOViUA^*owQy{r8>ivnXYHeOP9!qJi$x=dcbNiQI@RU|gdUOEU_BO~uY;0RJ
zx$&RF2hX1FUejLw4aMh*Cn*u!L_Sl``Ktw>)wKt!k{Ux$#Fit1K80-#A{Ikzbcs0p
z6K0(N4La0K3QT|~9}RCxhTq00m!;nO@#1dSrz$Bu$4uF|AvLaOaN;K6?{hiKBcsWX
zIH*WYcj$kE(CSpR31Fnr8H1gD@e5C^b7_HD|KF~jlN6P3!W57<G>|tmfmlA%(&QWy
z7%+iPW#hzQ;Cjyyuo#Xd!KeQ0IW-{6B73C>A)7Qr|7n;<u&_bcuJZ{LsnFfFbL7RG
zL^I%`EVD6LrugpUimoV(h36wFcml2x^q$&0@PngT(mT0xf7EO+k-!M0{&A$M&XhtB
zYRBXC0|hD$6m-<$kS;p;RbffPs~0l7#~=hRDooPGvw|HyWJBzjr)CuLX{5;y<+W@<
z3tRw+wWgU}F(?Mm8(H%J0V1QkbCC>5X7I0k&m@P|!rwB3sl`Nbz_{5aFj_q(f%gy;
zPhgeuSLpiYeIiZirRVj50JJz>XO;G*hpE0a2d9U|Xre4r*IkHS*#xJSkHK$!^mQt0
zhe*T2wAN+5sR;|lX03g?=p2agvz7z13JDp6EI7SekDM(LUZM+B1Z={}X&oR4Rg3F2
zMxHJHH3T7u7SyF8hR<tf;;lCpfgAk7LS4_J4?m0(_D-;!9tG@G%wt-}avk*9nvm^J
z{QrCmcPM=kLb|-2s<489Ufoe$Gr=E&(wCbIJmxU!d+h(|*)Y>>Ojq2`kiixRy@QsC
zK1c5AK*Nw8IfyICS~hY)T-W3<lKUJ{7RXy$x{Jc?k@d(_rv<@#ESw3fl5|4^-8?e6
z>!di)Q4CJb1FOdn*skfmZDJtXol-L)9D&R*6x?7Z%5qIo4*sM759XzWQ*1lTy70(2
zbl=-VK3vJ6{hlLCpy(6t1iJSS!lB0kQGo0XUSEyn?INI7NXBMnGQcmGgpDaYK<Q(8
zb6NtB<$Ew&<M<QS_1LZWS+$>Vbz?qyvjzXIaBM^y0!n}&m=mNir$I=punS7tILNuL
zQ)(pm;nH}%6a=3k%GyX&4F33kUjr9=2tk8+3ehkOHiG)78(P#%Y^DJ{gm<#rV}ju*
z{VzS)iUH|xY1fWw{9BMq3l5o;Q$&97vz~oK4?movVWPekuLAUSH<U4&pb{)P7y_;Q
ziUkBy4IB?$&=@lL;oqC9cr6<Fq=uSFTZol(@mPDoeJEeGDtB!?#sP-m(f3sl=0<@k
z1p2F|$xi-TvqpM3a*~CgvlTK?38|Pu69%}sc*)>RDtPWozRtoy@WKk;HB^}ef$*{L
z;IQhd6kJj@+3jdqojeG*_6QI^gh)L>fhK$LCQqbMUVns*Kjb93SJf^fuaFs~6HkOB
znPBG;I%Lo4v>=b&Lt!=xgWMY(o1yOgHW&rz27@u~5g$ku7R`a!d8bC~Nx&0Jqh9Ed
zV^%mEzK$ZJ7tL>n=fC`WM`5b406?o+p{Ev#qIwuJLx{{3@fhX`J<d_O(S+#IX7u}N
zl*lm`sm|K6sN;A<)w`pJBVgx;!eHHd1WtZp>y*z0Lu5Q}2-wm-8s)_fXnz<R0gAbQ
zavl*tfWCkhM9TT>R1#0aPzeF*v@yg+d1PIp)hYOJ9Xu-G-6e8Ju2|Eyv?YW)i*4gP
zn;%E-S|B>`?F)9f4F|`H($uIT@IJ@~CLdydzkp2ol=Y4Z1~wmORBQqz8m<GMMM!E2
z3wOwNb^ryo<EVsgP7b9Ii{-aGnK>}Kb@JA)AN|MHUMnJ%4MAVI8m$a~6m7a&Ajr{3
zIjQOBzKw8E@^VdNm>sEaP}X~42Qnodwn-*TzQy<;uWbD%tm8NsPhDV%pD>Wuw>|a%
zjDPAAsRvZ$1Gdl})8#Zk#W6y!NspM5=uF)3f~@TEgV!J%O+wB7R8b1yy^$j~o9oA`
z5h*jjNlBk<2#`xLNZ*|uv|09^zT3|j35jCm;Nz-qYcnsVaN+aYL6>*u+1F_C%kit$
zdlelKkvvE?3qY~)WD+Ea5&KX@-L+!avH*+{5oh}#|NJj_=DaJ00tco$*0b%1`~ciK
z4PDQjmBy{Zj3mR2?_c#}rvMY@y-c|K8idC&&{~@>=WF^Qo86-x>>uI~xuP*DNfKpa
zuww#DEEcd&l8=$T5J!!Z+xNdu7%F#vOTv2(f@Zzgv!>J~Dh`UD(OVuM-2MEtjU)w3
zDfI3NnCTF2^uE}hg`v1^7<aIbHh`knCLMDPS;Iq&xwGIOQ6-6&Zha*dky%tYTu=5R
z<UA!}QKiBu)&~%Tpz^rDb0h?Sf&aR-Q`8;4Q48Ddla8Qk5y#^rhQruEj*PH@NcNQh
z^R-LNObl5pur$hcSI6GdAwpu)mv%7wjw^*x&u@{9fhFE3D)uYTLNGqK@ih|$pW~6K
z2!l;Uh7X5Xgr82W4d(5t2=#~&S6Dr}ciF@ZNumzJdoa$%2wsaVp|&`$gdR<nmkj`O
zzoBl8VVCpQ-kPubP!zUfFYCWCs?tJ_2ytta5x+s}TTs8e_{c}Ey}t0T7GU(0^FFAF
zVgQpf5VCGK520kgh(Hj=066_ugE}Qvz%NejJ(B|6PJWR^wY}vra)nmJ@OM@WzzABf
zI(qmM_a?;+aziQ;Yc}c_n+#{Y_bUNV$X^@((X-gA&`t6P)~vF>O{SYZUy@-5(>~xz
zwlhOOwXeAYJHBgMx>+?<29XC_r|uQKX)pn4ou#FRySxI&X#&!eImH;l^>l`uZx#1d
z92{$bQf=H2Wnq{_x2CC<&OP&aVBPQoPHa_lTQP3%wc7=6&H1l^{`Q`5cH@IlpXV^V
z5>lc7?h&idD<#UKDWuo#xp=-0XNnamavotNy$dl}*Cs{ltgYbfu?5()Kil(F9FnF$
zz|94W8k79&fCHY5^p5o{V2D>WjAI_418ZpAU^xGGEM%DOfy*_HWQyY3RQg(Hjq<9a
zsrz~EUaOZQ*ri&bUry7sOGqWtRe1OO+9!6wFvvX^35916a<P{?kc{#XW{@d$-@Z3M
z6zLeFdoG%IR#~}0oBT(-o>#97){T*UlcT^Kk_UnHWcO7%E50)g%c#L8pkR4zOc^e7
zOoX`b6&#+p3iFzND9d|Jwqn0wM!U=S@pGGe!{e~5kNpBuBN`y$;^fg_j7dClAEfAU
z!1HzgWM(IT$zXk(Os%uaS!V2%^sZaflrZj84#y7&Bd%4m?Upytkl`!af)+Q7!Yis~
z6Vub$DO^CXG|r9xK`ZM!q=Q)p&$_qN91q9mh+Qjt4htunD8<Z~?)PUcgNB-ObGm}{
zJ2z35zjK!QyJey+7#zHv_7$YX-TXVp%sTDGk%Xoms-Vb^?|F&Os=mCfV+Zy+1EN{$
z1F7ry-VZqy7aTGEh21chr;wq}F)o!;+K?f0a-k%NW6<*Pz|6uri+i1Kdq6$N<=X3l
z{P{9a`k#&3P+!5BBtc#@MI}7E+<u|Gwgl?Qe!+>27$81%Dmj^8sER-xG#-MBRx6iE
zgfe@Zrmi*w?%Lyj(#cXn?v0fquoAKNz>;t)B`}USmRBraBK9#P%^=m*lh4FcYTy$I
z`jLi8NUH7Hx^fQJ)7#!x0R3f9qHe~;$nL`V%3~>b)y)1<IYca7CraU|vnk1=5`yNo
z#|on@K)||qgmi+X=kvqX)KQ7R@1|2Ql4BCru2!@`D48GV$uJ;L`uW8~mN))hno9WE
z$EFWwKo#?CmLJh55|A9zdl>ZkB~0hB2a#jGHnay5+h7LnkYUU_-hpKVM`nI(2{lBY
zue}8jZm=XH*|H`ed+fQ1@5UZG>$-!Eg2I|I1vQ+cJYaiA4xYyeEw1aVFmUJ6#VyVq
zddN4sj_S0rdp@@${#f#7ZKuF!C~3WHrb8g7`b$`L=}Mw;62HHF5NT8-TBx*z#Mk}7
z-43!BDPC+B!N3!6v>41QeWKQ1<qNa__(ia-tYB7HNAl&lAIkO5?H2^~IUrqbB}H<e
zh9L+E<wBcn!OcGk3s-x&H--%3>!D5`9Fnj3m5)$f5Fl6(>NIQ%)6X(SD}E$>RAR$M
zGfCYzUwxV&*e|6($|#;>&$jIqRl6*B66@k@`}^8Z($B|>bv+t=(qUth;3eth;Dynu
z;f2~+b%4CeM5Jp?qC;pD?=86XbRpxEXOGX--KOWiLhII5)!u?Wn1N!_-1&BgBEx~i
z;tb+imiiK3xrnIu`ap7VZ-)5^F%4R$&z)z97uC6V-u9=Sy(?sC?42-i!{-^}$4MJ&
za(EgUx;h)`zduZtaVMMcWXe`6oxcj_-CH|rFH}23Tl+0vY|p~qn@!XWmP1jk3M|VZ
z^Ndaml-Ab*?P^nTRPR>5srN|#a2(0KWK*kM8DBX7IlPbd!Ni9GM~Acxuyoi;(KX9!
zZ=#P?h-aSK3T#~am1O$>P#8Ms6>2@%a+yhesvzcMEQfp0sK!^fFn_S`s%{Tr-4oBW
zyDy})wJg_Gi;v$uW!<x%JmmZ8WVkhGDm9@fBzlqn^0R?=%uKc(dgdJUDO9Dl?P1ud
z<yUx=2~pd20UwF_=RJPN!$x{)eB4#^U`0P(_Eph2I!l_R*S@bccvQ=!-KWd>-vM7d
zXW&k1zX-<PVjmAcq7^p`mImaci!aJ4_m^ADh}wG>+b!0bbe#Lw_slL_I$g`xsrzzZ
zXRE_cg?T;<Gwa;B*TW{Gkh7mBkj{>wp><83V7EFw#4kVP*zoJy2O@TvbymS&iL4S~
z$5$UK(Hk-`;7-W-CtDgOyLEU1b3{+-pOv<<5-!YU{+<%cZev`PhDUK-fv;+#vpb2R
zT?F6&#v|9sxDPswHftW?vcDn{={wGhF5~+ouHf)KlCrsBglymhW9H`%xV!{smy2nL
zRoU<#aOE-$v)vkUk54&Q1mOvLtD^t~t=V{l!>%|q)`r&iCUR22dm~l%Y8;(Hbycy*
z6l1^MG)a$*?Jl_(>A@3?0GQN?gIR_cfckk9c#h+UKad1ID9e!SQQNJN0VBDFa<85j
zYnn_}3|wtt%!$3tU0Eb*RFq4e8>yC@S3NG>4KY8Z1a+-qP#HO<A^Ms7l)#gnnaj5>
zae@_*FO`}#u5=e_iUDQrQR(#-@n%Xj4b^xF!d>$j!FrK`C#vDZId@a2{OGRokRK+@
zUfszBZ_pY05!RUfhBKx|eT(6k)^$eLYzi*21N<0?DyS{{(y@VL%n!ttLjz8UZxO4U
z7#QW4^tgGT@|vzCd4aV%H3>TZ2iYfZ#H(!6%;+Cx3Q94ehpv%KdVvoymCBsUcK2I%
ze|dk+u6937sG7#4{_}(%@dqYrZu2NfruF+rmUjk1w2naYoK!Ipzi&Gn6)4-|!Kjza
zJefgs>|Mq)g_7s&ru!y&v7ssml7efyw;9LceHW<|+tdj!f51vyK*dhk6Ei+Elj66g
zC(U-I^z>{ET^^XuQzaOr@PMgsl-n!Y+23)D=t*iWWZIkTlQ{YY=dVL2P^_Kg_!Wd*
z>m9^z4O#J!U@#A8{q;?sXA4DjXupsdWY_uoK0!Lmr_@>Fu*p6Fzh1DBWeje!60SjH
z4c#uym{LulRy0@b198U-=fj9wHCEZ0G4}ZQE5sBgf!ntEcGjYH_>q`xIToU<H-&d@
ztAu|iL6Nbnjo7x@api32I+T1MaG+j37x8UZ^7#Jh>%-NXF!*`&_PAP1*6W5BszXnM
z+BGu+h*+$zm6E(&WeXWqUF2_e6|kd!yHflaj_Xk(lX+sYk0bNoE<bnoUFL3_$hyFM
zW$WSdx-&XT+%<pJN>~{wCSwN8A8+{+-8|8daDM9c(HHPu%RQx)LBz5bz<*V#nWy&R
z?^Px0N`k#6yfEtdQX3XP$0$Y7`nzhevJ?`x6uP4=9nWA`Z8|EH@N8QWJ&x@&KZpgr
zK~6|}{pZ{55){?doWI%z*=O!t5(y6G{`LtbD7Pkbct)4!@WgsNcXoQtUWvCm>G7(m
zj-EMf<p%ZAT{M1zT9cSMmy$KJgWO6B{Q~Lk<ssQ0%S)6=C%R}%om+XvGe`R7W5vIJ
zc~3WU+M3aqzDk_*w@zpzlNIFAS-Z?~5;HD<IbrL5K!+tfx-WHQQP4<3AJxFYEf{O`
zD(YKorIzTT#WT;jlN@s90zB-P!0R{)rZY-`U3{Hg^-a4D_(UsOQ?dP&Ga*jSb;lX=
zg8D<{vt++D{IoJ&KWm)VpUtIP#S@zDZCxlDLeoQ`hZZ77QT22N{_dpUu|Z8K(o#^=
zg>nG47GjCH2))7EpEURQE=dzIH!pV73|Q}hGD#OpEYgmiP4KQmyTRQ3O>b$wni;<Y
zl>U`fa}-s*+HxbM5%q|1!wt~pUV3R*C@s(>-=!no<ihIVZ+|0iK)oJQPB7VAt25`$
zAX!73v_*<pMwBIUgn=hIC@h%gan!m4Ym6^?PqM{@IL!vw(Tli@<O*Njx!bPziiwhK
zCTG56wWo>i$W!|!AuPjq-w4aR_P7nO{(pNl68jpe>gFe$pY3dzK0UO!rPf?E|KzqT
znYtER^HY8a3QU8|ju>rb(GWYSg|xa*mvv-ASW^1HW1_c~M7+7uJ~oRK)w0Pl(>sA~
z=EO7?u1Dlo1X7EZoVd`tqK;o*+qP#S7CIY#K)$!1E#%D-ma}$q-Pv3g>XdcmL}Ya^
zg$Q-YXJ3z#{uSjx+#%g_;={2a?3#5HpTklLF6pb*rvVN8PrKFl9s)zMwRth`7zV`1
zHFngZTuYy{_^(U6l&swI&9RC#l`-EZT9VYtCp$p%PKW<l6r49B2bMy+S?c*Li$6#{
zM0e?52h<rW&|)S<-+apU*7LDE6_zRe25i==kt%8$3K^z){ES!myP;i-4*M!?`i#`)
zPQb+Hu|o7qskUyD>rL$vR5M!03P`wahfCKD<m{-LYTw{@J+3ewc(Wj{0`O%cbDFNE
z^Ov=_mcd|HpI4tIWUiah)M++1QnT6{{{K;o1sSG)g!Z#w2naZ8;)UP6`G$8{clQoy
z_<8%GWCzd6qi&h8MLnKTTblSS(?A>uCmU6q(Y2W~k`@H2r_2`QcUEzg6$6~f+|$ux
z%U<QH>=u`144%Yo#e38^Ksm?#<<sloz0sGJ7SXFtl&WhqYkY#?GfWCPcQc7t_JjBQ
zh(6c^<}*E;u={1lE>N2l-BWoa^RoG))QHLBM(^Vu`Yn!Y%+&ZyOJaf3Ws)s(bfk0F
z%L(mBZ7g0dH^zF53Xj0VL4fn;7rnDIuBL81-#Kqaf52Oai=V-dP+F7gJj_=f3o$%3
zx_pH5pkFc%e1P%k?+h_DAbjNM*)MUkT*pPJ)9TnUiUD6L^#)1SC8O$N;havt@YW}T
zl2Q6GL@b4<*nXlF!bc$|JM~{P6^(Cl**53x+S~3&zTWzY%I{^9Ahtnm#;Ft#v3v!D
zuyK=i`~Kg8@2Mf)f^C&dIN~l*oqpo`>BDP|^n@&r@va?SyuF;iF2ZB(){qbvsgWBQ
zK*-{7TW5&5E@43C+j@Es>Brgig8NV+E1iT*+5BwB=~Y8Zn>B4>`yTtqg~#dVcxOn^
zsk=lb%=NaXK3~y%o}$5-()&b!J5wb*T{C1m!Y+=s-q=#OAc0-<@Htj2p|TS^Q0_il
z>8255*tuM|@}c5$hwA<16E0E@r#1Us)IW10rFR5(IU&np`mT_Kx085!lnbVQQ^j#)
zV92eJjtsNDUvqSt?rMf&W4Zm+Wd%v;UE}AV=V@49X$n7R6tC@NzM13~U+uv8P36t-
zz|gz4g>{%bQhZO}T;6Hb*Sz1o<T|GeGZ8n;ApvMn+phKUJ9y1DIeua5xo7OR$Rp2=
z>G@tWC*N(sp>S%dUhxO}0>MgmnNZGAykEknhljTW4LRQ`I-g#asbdGCz#iALbu;FE
zq3oq{Y|YxBBI>R{?Y99B7p|~v(-_0TA)T^blzwAJS*XeS!fh;m)C)W6=V`iSKNnIY
zAd+IExOHQBLnBj+H>-2dAKSpqhZM6ONz~JGxKEO~wLG3!`DMMqDv()=P4w;OM2F=i
zZ>d5RhqUV?*v@znVzf;;C3QgUg2Kl^?;CY`*K&>X&KaDsq-5Dwi!G-Obj~7p+d)B%
zjfwxhX7o7q13|g)4yuIma$kxpPl_VA99FB8$n<PFoEOLSG{p2!xD{+kInxX_=#yPt
zi{n?)TWJ!HlbuCHR(82BSQI3kGpLBWGZOXYEHo!CNzXbS4=1+bLp##1CB+xd4!ART
zE!PpLo-)ews4cytohKH20yed4rC4(F)I6I=B!c<O)_dLmQR72-D*l&>hS>tX5qs`g
zEkpbt$Jp~pmnuBL2<BY)P~n3^3y<cP<zc^9nXJ!R#y<P1b>;&M)6721fJ(v3OF-NA
zB(6tDhiC7URAs*DV6`ygf>GM(i|;Z%ZLe3Xy&rO!jJ;X#VgNMGG}kUIAKZ?fGPn8h
zDt4cfC<{%2nU%QR@@RloGyB3V>F4p5mm&ffPOcL<i9fSzE??$y{WU;B&3ZhEz^jX(
z$KbZ=_h%2kBo;w?DV>{^fw!1swvJA_%4K=+LQbHTYM$69eS<ed4s47e*B^jy(@>P%
zQw@r2M-<i9>z&WZqr`{Kv|0|6g$!SjbI>aBgN=M8te=p@k8It7{!qTzZRe!x;?mZe
zZ*4zL(|OhF)IOSbk3SLq+U`Xwwj%8HX*yRlh1T^IrnP}n+_$e(G<!{6N2^w|w+Hnz
z9T40yd?UFv*~DkLu>B(km@gem3d}k!=gl@hICRbImf_R7W4Eqd%oDS5h^_C-T1cQK
znC*#f+g;Zy+n}EJ!R%!%u%+_TSh7hw_9f3g7_L5JMl7Ttu`4vRxn%3}g_c7T*Q2ER
zkcef{Q%K?xERX3hUY;eIZ6<}T_}uLGFI~C<!d>FO^+i(S9X6DN^*;7mKU8F6z2kMw
z)XVUyqBf4F3zdL1l;MfV0d|j(Fxw$u`-(ePSf0!5x1u2dbm~}P??;0Emzvq{#pqqd
zPxtK&6YiKXdQXUkq;)c`b6P@#YS?6{?XbAsU^`Tm5$z#de~B{yFM>X#v5V1SYb3f@
zaI{SV|4EN%3~S)~V>L*SLdt)>u^$}~uQN?v%QY&A26fHP8ARiik%m<aZDvDI@e7~{
z;g|H$*D;X#MH;B5q8I5K87XVSLqmFeCZp*i|7ekp<j*x405ga(Zo2V4$kbury?VJ<
z(t%-tj)(>d?)F+@4y~y@{DqpTI?`Oc0U%{}ene|OA~e|xP<?AhLVcr<F)4Me3}OBr
zJbHpk5L?nO*nS)kl=(y@yvFP#v02it+=YexWZ&Km4cn;_y+2u5Xfz{ypJy+x{f&5U
zzSx7cyDEkBS0(eS&-W5cY=rs5t2!K?Xn#qx*#pmNF^^?ie3!_QXgX@ZJD_eR$6%I-
z#gS07<W^VU@-Iid@PyZP!DL^zALkXI7k1KUy?1(;q#&?AZW*5aCIt@kv;NxaWqIG5
z0W=AT7^iVf1fJ2j%BnUGl5X3^a2We(@*kx^al(x2Op@z<HMFinA4udq?w4z?6|)w|
z*of}dG5n57r)rI7d3FyXfzst1IW=h<@lSD|%(hSE;)SaRH%T#vpOXmL1b#}_ORqZc
z*{ftd>y_gl&3l24h;KJF<571IHG6EqZZHg3Pg&}uS)N$^pa6Htt6b-kqfDt{c}n}L
z$HFMYf7=C{)f{EN|0e|!?nHVN)&O*Wk@(%<z);Hauqs~;t?MXpZJZ?7vgosXeZrkr
ze49A&HS??KEt_ob6K6q~P%|dPSe<6dqTIa0syG;?c4`ZiAZvivLxdFsNS~4QWZ^Xm
zLMBI}N1#>R`eZ<g=}}4MlbJ)Q9vlJJazt9pJ(_3gH%;~9m}SD<`}Gh-k6%{X?=fhW
z>FPAWDk29xHALX?@e1OI;~X4BwI4t2?C<{-Ob88#8bu{@h-?cC4BJ06$*a~KpIh^%
zcOA;L+7N3tG1W5W#;+t|X&zGb@2(^1Bq1u#H;ZR`1`>jY#^;Wme=*Mx=4p(sUKS(S
z?el^m&>J01<QT0aJ;zOx%$V(it=HTVG_Jm_*RA9g3aib~xoNRA@?}zT)kszn#}T(D
zRPCs7o@rL$L}!Q-a!C?@s>|ByQ8{mp6Uw1?J^unrdw(Iko3^0+!@52|kW&k%l16Uh
zJ&{eLWuEZ@dp{J3&=87>;cd)!d;e$BE(L9IC1QBbW$)t=W6W?p0%p~+UR9TNSwx;L
zJo8D?{r;%)R3R)V`tjqg@pI9p!iX8L=L?1UJ()VCHU(4sDx>rqmAsz12($Hc*ob?B
zm9Ns39c$WqIb!4kv6|(~YxUHRy_Nwe7%`Gg95u=lb5T=!L3g@w2Z9)Jb?^N3Uq6np
z)nbbbu5D_?TIPHBzmt~V9&R|MNe(1j2Mklm>=F?R;de7Nhk~sj^A_tG$Jo#rrY==p
zWY?<^e$!%8AzT}A|Bz<#K^3$+w64p!ad%S!Q&iAO4V0`0!SiJ9nL?ruQc+Zk*(<cM
zKSLTgxw=#$f&0iCa=Hu?G~chWP$z*)A=g&&g-8oZz&ei4s)a}X@ytL@+-TdEg9m2q
znSdp!vQ-HYa?q09>!82v{SwZgXu#^3<yC)E+XI21&IKzcb1yMNcA=pZ8p^QeQ|UI!
zysXfIe3_teU5>ic8gkUoLBq#1=Xy7jVBfh$Nz~<4Xx|Tjj8AW|O$*F26c6dR>eVxd
z1#h=;KLeSeC8}FjXmzh`k8<YW*dQrIaFK!(@_Bxf3Eaqfv-&XVLz+(wXA7MpZHE6A
z%R!%Kn(K{Uk5qA3B99LyS2(tltv21E!CaZ0t&B9N$c_xY#xoG=yv6_VeGHicWpqjz
z3DtmfuT%G@J47S_?hxU%xFIc|+fKEic+)E1gG2S*3^+1!XrJ6&nI~QBICH&1rihMY
zKh3F6&{Zc6AHnoQZwuuzwE;OsXLrOgDgUQciXTz~B|1&&hLKZaX`}EWVmxRyCM@%C
zwq8r~ZoIZ~3{QP&F5qrVHMHq3FfG%+h;}(SZXqZUBvAS~U+DN#F(XQrH41Kf|2DdI
z5Ps~8KcOr$u9h_Ox+KP)lur1AD)R<<jl%MnKWI?;&vtG}MoQf24y>lV3a(;Y@502P
zeg*5-;!wu;8b=Yw^+*Zp-l+?QW?Ha+tm?79O`{kkJk7+t<uT)$#Oz-#3x6Hvx1)J8
znK5sc<3S&W4#wjj9A19|U1WUUH&SP8&Dl{liPK|;thy3DgT1fObd(%g!|R}HVzA<3
zQk4<Uk}V;wIkR%z0mEzKI^5oCq?BtpduA2L=IXO^I$a~1KSvF+ye6~V8F-7Lq@5Wx
zm`N+JfR6QEJ@r_|;jk*!>nyl5rEvU6lNEbVf%+Iuw5jW8P(i{!>UwH4;{~bZE5`~R
z!ZR2l^Qvns&ds}7ynL;#9Uk2R!c2>3n)8&-b(Pa>vm#^m-E7*!0*1}IsIG!XFvI+$
zWU2PrV9hmpAV0DLO|PJ48N$6{MVlw@6S(k*s3vjyGq}c=l^qvu=&*r9O789WJ_z>^
ztk2=HtsSFv+7eYxu9loQ?6@sb36p2RLW4vDm>vS7t4x|f&2Kt}FOZ2?M5vFp-5QQ#
zbZqR5D-;XpS!xL;(g^lkt9nk<^<fU^e_8BfuQqd3o&Z@c30iuH*|&<Q4E)8g$03im
z)gd-nT>~TTKkdKh=sw+_=QCgJq-pk#cmB%K_Imkr)IL?S-Q9LU#?@2P%urdaEdO+7
zaP?J~Rv|5YG{m+?CRS|P%yKn8^+Um%TCO7+h%yo#G;p}*;cty<M^m>3#>Ywgo7Z-9
zIw`W#WV{pm?Ozs@42Z@l&1U(-navL-$=2>^`nCDgs)PrqE?&qS=44pObo?<J>KQ8F
zSM+G;hrOrE<ATO~V4!9AZyXF|O1vGcWdu%`{Vgmn&B;e*gVW!LNth>^dB3$qT(w0^
z8J#W?jY<X4uBni|1IyLB?^HI<&Y?dHa}?}cBdqUf(z)c5F{8~*ox~dGl=bd0-5&GF
zb3kIPO?&s-|CX&|$MiPq@KDaNUjI;7#cuEg%dOhTTk|vg;e$Bh^j#1&KDP*ODX3sz
z4YcLFhZ{??NsBrc9?7&yZfk^#yeGa@tu?RX<gB?c@RKG^)*3yq(6h;97f|`^VtJl{
z0--M#f2TUtTz)+G_b4|qF1or(Fr4=dk0`huZ_scaOuEiIQ)Xr~_*&ZJ2TSH!gI0y<
z*06alQ{R{HNSpKeKv#W_9KQU?*HD9vnTcI25WPyljbd5lqJHLCXs1e<)T4pQl5LQK
z>W^aaIedo*5l8LHCtv*D7VE~o`=6#P?y5IL$B_8ozist6ewpxXp~52X7~@-~@Yn5r
zBnQH;zzps)4UYHmx5I+OsD~u2hk~7KQJm3@iRoOmGT}r*>nUlnbxKvA`&6GNSY@7n
zG4`BfB*gQ%xX+Ey0~Q;0)A=Qhvgcf+s!Zan(=-j{DZ>K|QsTEFC5Z*Jm;+_oDDbA)
z8`g&o@Y%xmCqSCuh&IpugzWPF!HUMzb#v@)?siacHcu_%T)!4|&C}0tp{%Xuz<QLZ
znKjL>l{2GmZfHTaynd5ef~#yVq3rGd;_N-Zv2Nf0@ob7HE0jG#D!P$<+az0w%t-dg
z$le)ANH%3lGBP5wAr#6US(WUpA|m;nm*{yu-_LRUzQ6zfI6C6#x!?Eu8s~Lg=XIX1
z^YvOjsR5iuRhdp>2p|HH?eyWUYxP_vMP@3*Mvk3DkR>)Ql3ohXrMqvlRIAc^d(fo)
zGOsotZ|^vF#{io}Tny>B1r5suD+?lA?^*$e=CKMJT>}f10=ulL_@y?@kTs>Sm9w-j
z{NspCdnLz3+%1?@?k4wNWHeQ4zm}DlGA$~kb;DXB?Dr?hoD;GBZ{AoC5YSVyoOqNj
z#KysEwEy&xiy^NS0Nr>sS^Pv`Pv+~{O9eMc-_ZdXd1{R~m1N`D<4{xjyx-8a5G%IS
z@#4v(od$sIVyspuyk1y34uwVs3w>q@EPjbZwoZuQ-H}gmlzyo~xaFVJ{hBjHd|j;P
z(y(~#Ky&g1FSyUq@iE5U-WRCNli*p49iL>-=~jgZ99^RgG28k#dyrzPJGjO+PNQ+g
zZ@8w!{dq)UTz`h>C=e$m;4L#8FdPpwJQlQh6)F|JY_pcs=(SyJrlVzE-?;g@B5uj-
zF+;=!cU(FGue3?X>dkdxPY|`KgdJ$I6K~2|#wC2^EZljt375|JUBrUAG0`3tfQB#G
zaUksy)5DjCjvk7z<+$~cgJMktU+XnJPDAsUlf-8g-lw6=lc@skc(Rs(Db*q=hGX?U
zS4-^gWs&+cw{irxJ|>;V8tlt{Cq&<vfmFXf<zJ!&SAlQgrp&S1f(%9)sRpznPIg-9
zPjbVa5tx!)Aq+lGOnH$|8fWGakTQDTeVB2lx$fw9wBw2A6T4fyu%Ud(voNLb%HXC+
zcvR7GrqRetvJ4N@t1gz16}!xIb!qQoa$xyzq|!e9{Nqo{G14Bdlgx7H=3bW_{LX~S
zVHDl&7o__NvLSrktp#n?X9@es@Y%!C)~oOLp6R&6De3aujLO2hUWC6PHclEKFDz{w
zVej0hUiq}1?VCN}KJ|Ij02pCKDjn-g-W{o9>;3gARDSj7%L;;1M%2T)aiKpWqey1)
zVmu1co>UwsuQ_`!^mcEvV8Lio-Dz@wLX3-u3O?wp1T;kJqe4m1#G!jLo+U6K*SBs>
zWjTq7&Ay=VYSh;rb$U}oRO6S2HDkR>eQ5M#n_Tk?c}AQyh;JM?q0n}&T9jt!L3A$*
z;F6v4QrPskco=Ax045V(azKGrUA!^<oP}%c1?li%xib^-Z+{}nsn=$O#2*W2jMP5W
zh2A{UsK+ZjlimR)#((Xaa?>GlF?^2ZRwhQBu-?Fg%T>wa1nqY|MUc~EHEr^~^?&%-
zh%|E0z9c%B>$Vdw)hs_zqU=z~62Is9k7vcDmb|_v81`PQJ;J6`d-(PI%8NM&F<zpq
z+^BwFYkOLK>F`ZjmlGw+R}Wtv`#P%IWyq8`bd%P!R^zj*I_oQyed3yJcCE7<+HqG`
z<7jb(jRXq&gx0?uh%)3|v-4&T>Il49(&?xMRXW*3M{DPpOL!dUZ7XH=U0k7@D|}3P
zUR#frgTYJ;jg{(~qh_54RgPxeCv!J_Js@lHqE0M^<JPjJIOD12bw`tx1%ZUnYj~5i
zlGzqld2%G=7;UWtCCNM7hE<l1wm^}^3gOGzi-8nIqWlSw+;fz@aTogNzl$go3)pr7
zpJ{{PFddU3*o4gYr}d9bWbeJ|>o5=KUyeDn=uB~flCHLm`5;fw4Oa21w?n3IeB0*u
zY{!BWZ^a3-^^p%0n$|MT*X!m9Jm^#_Cp$>(s8zzIV^PY8$)!_oqzWu_64T42J%~6R
zMI9w+*m)*+bR>t%mjlBj{4W~g9evnUO8oWxYv8Gz#~x~=J@D|^CDKn<$!W#w1ILL@
zJ*iS!)=>U3`}Lw&;ai|y;+S{ryO}^TykttD8aZPhG+;J@Pv3ClbA+V$z9m;Ga&hp)
z*Noj2KTe$FEv9jA!Ir;DQ1|X^x0d!7U4BhtY{1;~(cQauzl_jn;81GUne$9DVEUMJ
zClNO>;gZ2P0j&*L7^8i7NY&FN*CRFrJ$M1VsjBUdrmFlt(FbOvO;%78@w*=}sk`@C
z(&s%9aC}a+R6L@vv5XTTaTZa$bJS`6l|i#X(^yH|dotPE!nQ*dZ{?;He3}HfJE~L%
zlD_eiKWTa)O0XQ>_Pxj8z@*^9d{wjA!j-h~iMk_;k10IborX&@KlONa(0$fRkO^oy
z@D}GGp7FEv&$6jc-cnudC8%A(X2^>Z6;pdS!`6y#a@T59?u0?fsiTCn3S!11E(?bV
z=+hJx$DV4YciylaC^Z2T6=j>#0quedw=rTv(y0r=1oU~~rM1Tq<npGbT9;yDXF$#A
zdm6(^+9bF4>wZEdg1IJT<5wnW>;bqQ6RGgb(M#9rK}<e-CrunPWDO{@JG=6Lwr7Rz
zz*F|4fF21pI~w%i0JR^XQEE$|#f^v(?JbxqD&=@e4gfu5Vm^>;Sd#`Oy>;Wjj}PRD
z!oF%E)%0aXOj@f;J>i8u1ARoYO>t0sPf+g+Y(P@M^)7%;IyInY6$XGclI4%qeS)^(
zjf%Pz5(F6}6w}m~A3rXgb?@}vk1^7)IA#j<2b{KS$qIgLR_{yfW&jJLdZdM7bKnp@
zSxns%!TqPo2a2cjG}D)3eM{y-I>HjP<dPIWV&sz7HV(ux;KSTV3;w#Ql}@sKq%3q4
zMr@pK@nK&G_7{)J!N5;!iC>AByqPO)+1%($&7V<^bMj6?uSD4f7taHP65yjs@8tct
zzQhRAv$nI12nTKeVd{B>cExyx>>HP%pn!BebSx6M!WBB!hM)*R0{2BB)6j|z{Z+Z#
zX5HM=M+lNn-_Bg~f!9hn_Z_i^A$uuNU%hfL>D^<2xpbc;iu%c98*eKf0(#?;vMzrB
zP*E=7xIFr)F&Kk;F=wOokpb};1e&wLt-C#tb|D2Y<ARi#x(Bre*;)ii4}FDGiD5r6
zB!&2I-@gIrb5tnMf(G{IKASFy#=F6lw+Ey9yq^2x)K}cajdYmSbMi<3d+DRdh)XuF
z?7k;J1Km<yuJD9#ru(CEH>8#WXg@#_U)0b?VFCcO#Kz|83XXF?McwinvW;4`08_{?
zg+m4-J7c8?^?tWf;y~$@Aeor7l+bXv^lHS87|3LM%2k_^!WqQ0z;fUh23BO(wGnAx
zli_^GHYHudGQj-&5@0iY+B?ki3gEt~@tNP^Zy`^^eL&4VPslBS@hiEVUAcv(&7qIR
z7mc4(ljtgi%?<JG160S;<;wud&rfPA_Z+5+WWfK)vpz6dM~iIPeM3o0y@+#{0)iW~
ziHfHD`!S|;$_|@A8FYE{G2bs_+<vzQUp1l!+@i6w@H6|}?Q52ZZz3cp(<7GVq``3t
z9r8XPnpy_39IMgKJ;|_@3L1%TAXb*MluFO_;AiT@o320r-K&-~MHKc10q#$tDWI?g
zN*|I*aTJzTfO=06LHA0h0CPqOL}|_sk3(a79X<Q#prR$iLjDkfl(XWs+v|gYz6zJd
z%?Cq>6+Lr51C;{MAH3D1ih#88P-{V;)s}EvQ^wvwbmE>i#@=ZbN|DefrOD~v;T(2M
zhPR_FfEl2Su$He}c7?B2NQf~J6F?wgHt!fQ4`2sDfY;J7VELc{^z6Z7cu?KJprtM<
zM^3MPy6i47dN|{Sg|0e06A~MUuZ3{Qt`_35vd~@3QL4z49^lu#bZ#D2Lm2#KJ^W!M
z8FGLU_L@==xT-zZ_H8cy(FnQfA&uUvl=b@QEMTJ}PqMvmGlzOvRGa1c4Q`MVKXMU(
zcRw{xSbK_aXz4cCvdH728<5?Z3)7QsL_p>N|FRk<G)pr7Jxf-xh+d|%&g;ftHGn|E
zfd$v*Lswg}d;sSeUrmV=TvZ*#`&(`Zjak`JI?Rl=#$e4W46d-TC{!LY_3A?H<Fkzs
z&j(1_XTb8t`yiG+c|aY>z6ropJs#S-iU?wl@$-fqf!R33;OHid>LJyL4=X_}a&k}7
zGAS9XsZK`?jBOVbX0lcazCd`Pz9Pc{Q{FTy*tV~oRVbv|kI`gs`q6I(V>ry&_RW<O
zS}@gVI^=@b3;SbCYqU^PI1FMW)*;(Pjo4eiZbPu?B#^DW=qQDTf5>pHFoRmP*R3zv
zz)Dr|iP8-3z+}C8gZ(d)2~mBdm2(Rb;n@Q9wGZdpc)tICtr1^G;7j08i}z7Hnhf{c
z;AA|4WDW9Sn>2tM(d*M3)r20giY|S93OL9R%7Fhem-ghIN(W<p&lStTM<kS-VNhCV
z)hBZ;1tNZW_cgfzFrcH5ytRTj=8Kie7>}XBbkZ5%96`VM1P@4FhRGjzNi`URh|dw5
zs@vBA7Le}YM%*umEY+G3%toTN$n_3xq>ZJXqZ~XLKK;CG2(fml;s5tUS$6@M5RxJ?
zk3bxT%YZkv>=C_@4CnH(BGXfGxT*t+iCf>%aw-lllS~6M?6krR#HS<sxfgRqtZb*R
z?7X-LJPh#^+Al1i0F*fmv<A}wfXm;asOf?jB|`Ndf{EsQ8T7uWJ1E52AAgs{Ra{;Q
zhHZs`(oo+82w9)i%``LlQGq)3oq>9v8rUHGPqsT39)sDIWta`*?g;jD9~5DNb}ZwF
z7W9PdldvqO+b_h9g`vT>EE(wevyHK{E`i*^PWAckpRoB-u5TZQP4xKf((gVH_Y~KT
zKZ5KpQ{uPgB2>#ge82C9)G_#nu8bz<a<n9BSUPj1VI|T6FHOumQZlgwx&a|O=Q6;f
z3}-Wp0^e`&4coypeyAy-Vm5y_JYpJH9WUrlJr%-%UsCBW;-k`)O5tOG5~SkO*MTi-
zYo(hF2Q~=ykilg&h#DT{K5+XiT2l}99a@wmNDfoq=O)F6n)9+L*5%I%?aYz@C7&Hu
z^T-*X+Cn8ijAY-_si`c*{g1xnfnD5DW01obW_+1iigzsurtATce=iYCeKPhS1A`q5
ze~9zTTUZFaynb|{;L8I<X9;|mO)dVJSjwafxAH6m`h4?J2@7>*>|qG*ugU21L&6ZX
zgroU;Ijl>Cf=?Zp{HOo}K<}ltt)O<r32?wn`I=g5d}Ozw1&mN0La+X#$qyd;#f10t
zIhx>KX77`ORA8+-Hs9eB2J3k>W5RhFNK+7v+Rr)IbHaf#UVi^!f@CGKO0zWZoQ|BM
zk;C0F+~DKLA(-gAHr9<!PQ&Zs>Zo?Ioc<smz4ntU@~9@L^(nBWD?oTkCBn_47qnlD
zVh-v`z&R;+|7+wnSjV~)p>2R4`lb24AW?$<Q(Q8mla}n!`vaheEART*C^G&4sC<h+
z+;2Z9hEgSgkA`BcuiB<66fpNuF>}IVsdND)!-@?+$<l^hvOVk=h6N#|Y+BV0AQSZg
zJsV2tMZ%#){y!DQC8*v46dq5RPVe7L7YTrc76HlqoUXT&g9y#;rzH~9d>3h^B;Z4S
zmjyAe6dVO2U3Y+{*FL`Qy2~E2$NL}cPYaEAmmV}A65nU!F+u%vF5+ah(lG4OzL+4;
zB>RC6Wv`rQTY#_!+fX$KOgI7YruCS;PzM7pK)<6dWXp*=SX=NR(ty8zJ|fGz!&U);
zrpaIyZ`n|~V?RbHaXH%Za3V5%sK5T|4TWWmzy+N}5?J??JsRkR2Nq{Y-zv-e>zgPX
z(L^7?{-JJ_X72#u&&0|>1YYh0a(HI9AUn5kFw2DJrT@~@V8)R`ODISbQTmm=OFx9_
zXb?0Q$w1j|0IM#60C9&B7VYoP{<a!^p<vy49aJMB_HSvn)4;^o32@cHuTXvv4)=0`
z!UML&tuSa+q=~Qp>L&m*k~>rcMkbVBFofoUR8){cx^A)2mEkI<(m|-7J`8x-1NyWB
zMt?|8Pe<(WOQCZ7C1jANtdR1G&>SgG01$PALzU=xp$CI0fugq-o?a%wtdv|&IHU3(
zPeF+jxD+Wo8AM2*>smB1Xu3)x^$j6V{dkMpPa28e1`TyogVz216C4IlNfE~W5z~n#
zzI}vW24qw<ggn5H{rsev@qpI%!HZF7^oTJ9EWm!T_h%^(weon%95{@?*-XcdJ5X8*
zwHAY40ET}HBth`=f7%u(B74G5?*02hG&E4c6-j#!5v?Y<62?6T1+c50c1OCb+3J8N
z;W51HQ|;f^z%Rsyz<PX1IVJv#q>Rb8FK;q|Xsh%Y6f#po2@mPH?m@LE5~|}GSWuRU
zwi=L_d^fvG>bIlpVSwt<s2?luN0%jND-|wO^io3gPIP<PAIb(RjpTQ3o=Eqt`MKV)
zf5s=;KLt&$vasgh?nGigw#2Yq_v!{d$;M|>_jz}U2F1U(F=Nu~R**s(Q+nCCe};(}
zrxH5k6k&UD0=Pjzw~7=u0GO{LphU1G_o>5}Q7!N&N?QsIO;bWIB?GQi{IhJZxc0D%
zjT82o!>vkLA<Qp|C%~xAGaOYVEHprk6Dp|Elz-tngbKt6*%|+1rBLmdF88kKGHU>R
z*fO{$;c(-4JFg0r52h5!?Lr%fQAPcu9^b$F%z*kVEvT0Sk^e)V$Fh`6pl~V^GQOpA
zNJR>j22t^syyu42#p0e|Jom2!nl6SW>o&#CN>dvl`D;+lzzLiQn8Iq|r1|fAC&#~6
z%vrQzVlngafTcLe%}C3E3xzh6P*@ag*d&w(b=3;_n_q3{p=sGTk$L|%sOfX?BoiC>
zI7qAG8ZOk%#LrZi0y!)d2ru<MduE%$36EI{P}mawyPK5ok7uR%dj;St&)segO?wVP
z4Dt|3G8aaaafIZIM}M3w^E-$pVV@k0^uN1#V5ggK9KnX7mPUe<j-Z@dR>%pCzD(F0
zM1>RVy3vwUB@Ar;w>3MWb;ET$SDz3{i^G@KPgA1JXn52GFuMVmAz(|jN~~$E7eQB}
zpwoI(|8DRYG<cmK%`0pvt{i`NqgwFpBRaDb=RW8>RXGFyAHzT)6uQSLkN9E`Nt!@L
z!ossZ$Lvd`zuS=wFy2rB%#74bc#w-0*(wfC1pD49hn6oPhhD1Xj}TN(+`gxREQM}R
zeb`uJSR)6=fU6zPP!1goM=}|4itN9}D4GQRF{wjk%>ary?v8tyvXz*=BGfF@7aS}U
zTd0yfpCrIxYiRCg0RA0N#Ggq1SA(D1K!a;J?`%Makp_yA2~<>G`S?7b)CyqUv{0+r
z`y|OWHWy8+=xJM;e~l7W6#g+=map3xYQ)b)PlevZA{rd<T^qst`P>$Uxq*TBNPN)~
zCPp-~IOZ4sJ+mLFq18q)LVdgDB)n%%$WwzN+P=bY+o(zOUP-L&zx3Fmu{`L#GOhV$
z+#P4-?@r(+{WT|AR1geew)dHDgxanBKWq{`kyxZ53f1H0Wyk?Pckg({Wy(RucI2je
z`wxi!Yl8{Ufk{)o8L!<8Zr&~SRuY~7Bs(>)3N(m2jK!Dgy*)H)Ub<}}lZsl>6a3d;
z`FBI3&#LyQdHe|sm~#F#s@FgidJBrbLZ8)qP5DDF7XKPK6MKKw(LT=9^GB3oR_-tH
zxAKnbrjvhHt=;T0E?h-bv4pqU|7}twsGR%QIW;C2W2?*)r!4WATM#kwc2u2MKxO{y
z_SxT*k>=-O?)$OEncGKm-&5i~r^%x&pR7T5iy|&FfRjGjl9^6MUM}PuM{sbk1-CXg
zwfc6y!|I1Ak+rJ+x~Wu&)Vd$n>WtnOgx0LlkIoH`-(NKJ;uD!K?>2|qrs<sIRyzd_
zZZc^Sadp5xTw-Q_^w-CbeK?T}x8C$gV`Cp5=^tV*@m#gehmO#7x%L}!c_vd51nA%s
zTWV>JGvZTV_u-(w0!XQu(@CyeA+`+E$ZJ1;2uY+)K>38wNT;KMEd74md}4}@?93*K
z-Iu_d=y<SL%Em^D4*U$Ylq6P1@SxrXlTe<&0h*|6)S#Gfuc$LBnuMYw*!Jc0$-UP3
zKM#O+2fF+0J*QFaV)YZ?`vZQQ+MMi-ThNT<FP*Ocv`h|q$ZSN3kCA@ZZNW)=0_swM
zCdr}t<?k~|r`bL&;NhrNO+}5e?Q|afWqt_0<1>LY+}lO%ECR8RelrK9v0Pq=8^j|P
zJ(<dBStSBuOBqoo7lxhgqraHNc3wog_oC@pIz#!at5CF~m3uzv{4|XZasECWRVA)F
z_W!(#qQm^HjlVyqwnd=tI(G?BCUZj(tkpN9%L9dPKL^F_^p0b(W}mVJfHnEgU^7?a
zK8CSgAlVt~bOKNs;7Ks@%%4#J2H-EChfPwgQqFW8S`)3#Jfiq#f}n+Aw5VUJFzV1R
zLO#b<zRDJDb&0Fh%Yd~w2SkEF3y0@J4WKn&D3#U@?anL?8g5@Q7;Xu?a;9MH&HdEo
z;5X#Xz#gr9M{;ADxmr4f2Bzr}CW(~b?`Zf_W29>d|Cj?a0hEHw4TL-gpC}$_-?c1s
zJmtW}$Hbp!fA#k@6dF6@NAzd>qH&>aJPv%)l#?#=A1*+}B8~YC%b~+Kxb!NVcTNA8
zDw-{5*eX4A?A(2~pu?!9=_-&}<^BXU-)5l$2Omk5+k}EWAN$^Ne9u;K@Ne(q1ruWV
zV?xsVQ=xvY6j-ynZ$jkrmTyTeb952RRCVBR)ZbMREe-9JK2M0FN-<kbckekW(}W@~
zp>`mY&_oh)I=Ytq0?@iv&U>H!wyu)~T6X34?o8oxs-+$g)7pf)|2^Oc$M2>7P;uVh
zzp0eu>e;<@fh`Ba3QY6^o9(eWb36d)+M>$V?q8_V56Nt67-kr@!lax?(Fh6JYoFX@
zn6|K=T<Ih-X+lAVA3`Js_&K{U+wH<*EpRV*kX|iQ6OU#|D1($}Z<fNILN_VCtN*bG
z7NGJy$PMaER@G4VSZewz1Wj%Het1aq3F(2q59RJWq_+Fe67)t1;*6@@s1OrKd+|HT
zK9uV)1MjDWIrvF;Azsnkr73V?&wTd<grP~}@BA~-6gvS8q(hlI0~b?epdJXCCbZLs
z-N9_8@t?<-lhFjUiT#;?U@VZ`${R20BOm}=Cdo4y%B^5|Mdnbpmh4S*fF-mYpMP=Z
zMY&^0zAXkAkA^W2NVob4is!!;aOd?-!i#?5pAp|{AG7|>i-dPxG*kH;iNOM&aPA<`
z0<}U}_v{T?%lELl=v%oGKe6oX8l8vHOxN$sv}ywZ+@^S;+PuN)2Pik(1xfZdizhqo
zPvELyvtIpzE9nU)zE`q;S7r%XbYUc%JF~Y%1C>?vuuVP3)Lfr9hj!jE!*q#Ou!|E9
zOhQ8cPPxj?l<z&{C@D^R$_Vja(Ke*y)e*xO%bDDzW8K?0DUPA#HJR+sN$mPzMcV|V
zmaYSZaSA7p{G0?2dbP~s!>urwS~<>SQ`W!TqF_#U^12V&fv1+4=t~RyVA!EDl=eHv
zR<V70^5uN}iboJ@!Bn=ci{jF;AXc0iph=l&?Z}Xw02Yz9FyGE_N8%H}?+q?TlFc;+
z5OJwd81r)GMW7@~wX-ZckM5cOK3r*NcvWGM)Gx@|4$fO&1!^$w5=X<h{dI>Cs~{3C
z%-sO20Q1x2rh*?2`%mJKtzo`6*C3@DAn;sttLXVVM9lvF3^1AY$D8{3O`h+22BhhR
zh4G4V1{2u=O!#ogz9#b;$|+lb6JTC80%IEMUzDD2R98JG`=Q=PjQ5bCZjQ{JS?t3}
zfJJ8zc>6*b-1oVm=(?cy+ht>2k^E%;{j7UffHt!Lv@e2Ug_VA%q6TCorko;l9J2g}
z3Jwib*l;1$wMd=YBzubq`}`nS(y7TO=vo2y#{Nf>mGwoMS~29fNMdgA$R!no6@cSM
zviuz7Q<AE!#BQiX&V~~7r~Lf?oWD3;(74*fR#>#kP&;LqpWK{cV48iHi2x)&*#O4p
zZ?%j<&ESHq4~VJ`vVNSjZe*3e)dG<W+YHmA|BSyItQmLyVte0`kIS5=EW$+Vs1N}|
zTbfuu2FYG31KWF@?VV~q8b=#J)+k}UU#zB<dyo4|z;GM2=PcAMk9sZUT##s3RgH*B
z!8?qo9lnm%4cUG}WT}Kb2a+qKpBMIqiI1AUDhS~JyJ7YnfVT_2uBJl}jjl)c$2M|b
zH@D*uKi01}2i&^0`t>06Gyzd4@^o5^RfIo7!J$@=LDeV!=i~uj1P;1o>p+)=Adpr5
zrx6tH5Dl6Dmao%%>Of%uBTkJyW!VMdpiIuQ5a^pecXa>WY61JFgN<Q)0)Hl)1&(l)
zd=f^7bK#LGxHKKeq2U9T@ie40{*kfMdw`(JFtW;aP@pq9G;nWuGW%mgP-jG>XS{cY
z@>)oB-QkYrlokFwh(x@B6Rp-d#FLFZ`kl2Lqr<I^12;4NF1t^t2iFg-2E8WXN@W(i
zv2ebc&UY>aLnn-XBAo$mj_`&4ho^SDa72<-DPkNVu#XB3dDOm%*WEY%_dI1~f%dOr
zDuD}hhWyd5zWzDA4Bu1uqdsefB0%bWz86knTia9gr#@u9vn0u`riTl4Bqw{`-?<=W
zroaW`EatMB?a9ABlas*NOA7OOD<a~I+{XYS#%M{Ojl@xJ7}V>t`FR#f@23?82(&!f
z@<B(b6asK1<Y~YE@pR-A;iQXt29gSvT2Ug)?y*c|G7R<yK!nO*)P0;xWcFd!7uXox
zYCLBns<5R6hnT#+^21(oxv}?%<lkw*hJl^`N_=cCd1(p0;*1P~5%E#4uTQ=~+TtzH
zGT8=#uW7%Zy@o!fIc2yf@8mzrmkFNq#>PX~K?Uyd-+QPUR@*5lX|E<eM>rSylh$+?
z%jlK)A>mh^N;hmNTISHSl$t)J!`pj1g$N8sL#~1Fqv2W)=c4xI%VP%NRm^w<=wgF(
z?q&udG@1?LNH?HTP30C)TcmIlxJjlfsQzgNz6?9eOTYPy5iy7fOPM&s%oGo%AFCvQ
z`!a`Y2q}NB{m@fYB+KGP*%RvjWtBe<PS%K`ry2jA8CKL3P8D@Z*snHlQpT4#8!Up{
zhG{w!*H<*_W7R~JVd$pZR(nFR=b0;e!>aWv2u1#*8Jy6Jso;qpF9MrWex;IqxUQWJ
zRqEe>gQgW29{GR>=SG?JtPFDm)*cyGI4$YkIQV0c_xmfg3I~s{1*kYu>|&u4?~~H`
zgb5(=&H-+o(1n8xLg#_2W7cA|=O&gWVE7{2H+RZYNB#~(Ks0PL>mp9T4|W&A8WuzB
z4XdlpoR+PsI6m`m+w0W%`FnFwrBJ_O`0@Q}iUD=zyxuTyJYykdgTw##YjE&if`2@|
z_87cnRiKT1Vq6AoSxicosNCU1f=eK#cYdTs_uJI_#f!Z?LD;9bA`+7rkWmF%V*=~F
zoeP$2uqIgOqF%Qmw))26{Tw|;hi_gqmhV*qfG5Wg@k!)sW`zI+aZmfB`~v~D1f`cF
zF9FJF@QE4Gm4AQD1@tvZtpK!*68<T1pnNL+Iq+g;V*_w=tAR#dQ{ncJPpBo&aR?6U
z{@19$o`KIFKt_*=hSTajrrp6OC=us_iwZD-OGX1(RLU$O8i68x#{%nF>5u-n;r}%|
zEjunGcBHZECv)rG^Cs&{pE5TCEcKpU&jG28H-(S#i30HUHx?_fPfx!7XWKFfK<-`a
z!%cQ7-ivMjdVjJ_Q4Cx&l<%DVOgvcURe8BriyYVEup^^$SJsHDJlVf@U<%X=K@ZQa
z7NCn5>=AfKl5VJruO|7^T(7~mZraQX97g9HH6Cf^V%&GujSxZ(4-xzi>pT%{fZY)c
z04Sx5_-NbA25?OP^FD_F(DSs-=(KogK{g~hirHYKKkE~qOYKER%C4z<OlO3?6!aJW
z@dNgfc;JZ^jM8;ZdVmXYgu5t+4;l99Yh~qe!O!`dkFr5g{`t=bPs;B(VywK;-q`Qp
zXv<w}Z4a0usz>xbz=`(Jq9UenuH@ChSg8I`bo(Vu>J1^EE(n2luOLQafVp^3j4aiv
zl3DuxT2_2Nlo8N_4+KJHBEV*(U6WKJPTTP@)xXKvf_`s%7K7dBYoelpG<JO3<|7c%
z#6s4-8B+_IS0a!F*;))H=R4f7?th-L4>{ms6{MuQKEp7a4NhbC=!3vDTQ=Uht8y}C
zraMn1Un@tB$*;VEqGLd`>~<?T><?1+tnhXpIstduC+4hdQlz-_+s3%tKi~VcnR$XU
zcSbt0@TPxLU(!_rU+}!vGtM*Y+OkPFxYHbzbU#hupEnN=0MbHFBr6B1{;vb^{>Bgv
z(`B~w<wGgCk1651?a$Fs?CuZlT(BV1_yjk(;b-2{7R2uli#Ef`1<sK!*g5^gKAPF!
zV<fM1-*W|jp-+_?PiA)t7LglMmSZ7>Hkck7#2l9c#oW1?DZ<2k%ZH#LA2J1N(AXub
zE9~8V?jx7wt;$3_+;7$u?C7lrOLLIu8pMlOfL?L6(&+(eDqZU2WAM_SSc}r_ZDsDf
z(8R)16`LIJGhwA+{vI%a1AWo)NCx3};BfvqRQ0|P-b$+Gd=Xuq1k*?fcSp1q0v^kK
z2_LrLE0<OD%|U5W3)tJbO%_M%t|E0jK}M_!J_aYfJ6jNS@;nEg^j;^SbG5>`1X9os
z&<n4O1Y+ECkPm6wegL^f#GLd0<zWyCK{^TT+b}bAAXhuu(1d3h?$5c;CAnMywxZ;G
zbLQPC{a6Q6y4F<w5nYLy-flnb%vELZfWS+(<ENLU&QogJ^ameu;3oTjwCdXr2E>~i
zBfWd*_)9@2exL2W)sH;91KY*F0YT9M`%w;nAOt{v@c;eM%iECa$N`-G<ThmP=HccS
zwchRMUkF=9n0j{xic)M*3GCb5;+NE+4^lSz-?ZST%E)$ao#Q7N5lCCN8Q*3QRx5fo
zN$*F0u!I^ndEc^?=nAwh<ADs}-u>k_GKp)dDn`ge@;4i*fe+LPWp#f>^cLu<fXLC7
z)!FA4i7jy(^JY3=*9!LCPq*j7ts_HzAjs!|3^`|Ed)9uezV0@FuX;8?_irh$)BiF{
zI*B{3rDP9!?R`nny?DWIbhgc@+8Dv>-8oV!MuDKM1@0R;kS8exsw6*pvMOk?KbQZc
zEFT19W@oa#@6Ay$%uT~+`D7gY)GggESOa0@c-ey@jlW*QW&Q+G3s{S^z#hQI(48i6
zVJSziP7@Wqv)k(5(1~>iwFo0v&F&e=74M=XH<SP!V)gqv1V*}^m?<tYwENQ#TMC#Q
zo|dr$Q<M>Nbve3g)xVJAIhIOOhc*`(L7OzB&j2(li46u%ZId96*1oB5CY=CchKcyq
z6#8(b>`*&)Y%j*NjaIF2p8P4aiZ$Bxuc3ex1bkzI4vH#4EN99mh^`A4M>XU;{s?gn
zoHNJsBGq<XMC@tk$cw7(M?>(_K4QUm3?f<FkRlCF_1&16Y|!M!$7m*5HO<_FaiM+G
z6TaKu-S|Wd0}YtAw(rslqE>|R0Wu(AUaL$kV#`c{?%Ul5z?x6#<Wuf#hp>2`xLjcq
zK7mW`kY^Cp*T?p>VtPi^^Om;GejHUDi)1HH<UZfzaozRF{d1sM@oKhTLgcaI^Y@~_
z^uPnRq1P+7i~$Hn^AO>ghi)(|$)weXMbWUR(j8Oyv)%(f!f*tZ<y{qq;Ti53ZYV-=
ze@34b&F?o+>Kfwk({@30IuP0*R3$VdNrQoz&b@tbZ(tnJzzlYmrl5hbUNG)QtbcGa
z745{W+~)Ni7+Om{kXQ=X%1o<0gm=jZ8oJQz_B0U1W&0lVjleeLq$05Cc}H~i_#l*2
zms|IqgG~5GJ2SeA@a7Oz;t>)sIn{@`5$|^Uj4eTL-srBrjNYugVEi6=%5ZPzH+F{$
zz?Y($g>OYqNFOKpbK-H`Jb4p#8%~V2yy9-?kl8)pBOd+8`_HggTH;tyKxixi;xFR;
zHm|O~K)5Bup)E@;p#ynRc0hKKs`P*Fd5`YTa7<e=M(P3H(Hj`fJ*rQ|MWBFl>yQN8
z#bN<Ykr;uv+^qmvHfmq~h36D5y+n~WY{5kbr~&!0a@K_}mP<Xp-^fc9-5N)Y;VAgz
z>7^5fwTG@eBU%0%A7<vH@Wy5}#Q4E3csDtA``47z^)~)yB|yIAK$cMp=*GAWsvNak
z@qL!Ti~9nwJi(|)%g#Cp(0*7=uZeTpH1=7I`$-`MdmvKRjo(;#Xi>Z$tzA~|;EG9)
zoPLg30_M3^$Rp1gn$cY<qs3_lkM~O6m1iPfAYj%)cP*6#IEce;TI>6Pqw)pfCN5>%
z;Cib=V?C~XX5I64u>^tSmLSojj)GTT;5!pQX3(<1iW`#n$fv`n1J>Sq06@4V=1Sj%
zu2#vQuTcGgvpb^bC_oM_#@o;G#KK;}-d*GS{5QDnUIK_+7tXB{-q-EHQl@&0JZEwQ
zTxWH-^-8mU0UYDgQ91g$AFSz}z2?Hhj<Qedw5}NvWbxV_9$Unuf`{G!&Kuw0=q8jN
zD1m$4zG(#tDrWuDUhQS?B~wChD_NtH!K|$9hi~q#*cd8e$@K7ROakU%u!Em$Jzoog
zXIjANJ}@>v{H*&I%{I8Yr<u+kmU<2=OA#_gj3NtF;{v`44mI;aWnFTui0KMgz;86I
zi4-5}`KT)8KYI?=T<xQ)ldc;$Hee&yUl7nD$0IH&ZKXul2p>alK6<^CFSnoTzI!H#
zHpW~MjvX^L4-ic@2L;O~Qa6AP7TkWu&9ac%4M|8wwhUNTz=vf(b(k3fhy11$WipE~
zii|N(mEiS|s{Rp(jXlr^{`o1k{eu~9a>;w<t-WoKi9moiy>qO~rs^}*mg+Z1HD6yD
zsIW=6%_L<6&i|7!L9~hY6dR8zwth+304j4ET+%H&4`5uqTg?1wQOVmok{UxNX*%fQ
z0O3$AkVE+%2b)?eND;ge4#RkTmB(hi_zv+L`yai@fLuN5v(mY_Dp2X)^vo3E_}8KQ
zETu$LGORON2QL-wg4|RAuF2i^dgdY47Y|h4Z@`V^09sY$k=EC5;D}<MdfmHw{7_g0
zwC%TaIWq`(%_a-Egzr-m!au5q8-Q>08@$jWNp+-Ke$6cdpKS?@qZR1vXl+vcITJmU
z@_Ki#)~eJ<ksI{eMbl<wJc_~EAvV`$bqR99u|V;C@^V5gA;z<;DANPN25sDz>H9RU
z({y}%q?#gR(Hlefg<wCGf2%BP*yK+6^t;=bEoIANe+4PhQP7)FCc=0IEktH8F{W{B
zZQQ8Fy#}6mCRd8apTlc-o<@+pBaTtOe*N<S5df_Y+p~_(eFqC(x+H9LA3Q8X*J?$C
z0ovCXxUMFNLOD{>a;~(ShHzb<A8k3Dn^KT28fECl-SucMv6-l2v$XG4;VNz~U>h=?
zTF{?yVTJ=(lwa9JMSa@?2E2Xw2k^Fg9lr6wIUl+Vo>BWPcAxC;=|Kn2iKTZobHFj~
zD*}Q#6QI5ElxS=sX4=LBNZg<3lUN!MzRMNGrC*G+!rATb<3lB2LRMBFP8uI+<x1TS
zQE2C16xnOjvkY5g<&r!X;O1F&E$FvKQhy!EN{g+8={ema)B72>m<yt=rNN2mA$s>x
z!iTxp41iflHv7&JmB2PUQ2PyB0Xi=N>oKX#uV<V;N7xJLKf8WaWTiv4tX2k*5_UF6
zxQr1Ti(R<k{E~$0i&fD1t$kTSo?Y<^u9~^$>B??>hTK%Et1Uj}lV8?D2>*Kc%y45p
z1vIE4XmM1Hj>dX`)a*v;8rjGtL-p~u`M)4oR|+oM_qxKdas8E1q=g2fe)ad`s0g^P
zJ2mjQjJwrAs<1vL>yGF-*rnp<?~965q|I&?*DqQ>=r1;Q7=5+6z6Lg%^22XqhO3+C
z+BUo0kO%3BdM4n<)%toU56Nu9g|5t4c-7}SFirE1ce}aVh9u(Yiyj`qeGZpLeq9<X
z>s8p6=yANTP1X^azdA<0RVlMsynXOX&bEg~A>%YXZn6u(Gp5}MU}ZNk#-4#F!3;DY
zUIazC%7W?N(b58x4>bP<7tdNwy=qC94l3Qk9yb07&F2PU$+qQn2cNGW?}#l#v)wt%
zs`6k3uN&ZK?SvT{ugc@U`lb}*O|_+pbc^rL`1BsM$*ndXr`(1dTGr-#O_r4798nF#
zv5Z!TTz&4zsJn&xt*8uj?NjzkNgbKY-f*>BdqKp6X})UHxs(SEyWY!Fc6pF85%FyR
zzjd=vPAXlDWkOYW#QewE@GB&ju~}nl^fYr`!M@vbYrnypN}7;6o~D>)Ji@L!7L=;!
z>k?}!L0}Dt6aip(*c$C$F+c8O>~c|O=E>+Xm#RoHd7;@ri7653vu=;=-gN1;rOs{Z
zPCze&km+sArCR0~s~d>Kj?7(Z{;~9mytzI*&zy`&a`>@J7rF>hj@lfVNM#(zkR@_1
zy$JQ`r8;!Mh5FMxUBIdM`nO?+)La8^bv4iO*uMEC05@8wk)J1B9r-26mF}b+TfLjZ
zd43&d6E_)B>81_i=3Xn)roOS$R9m0)q`WQt)1_RD;jT0(9A3SD5|kd_eG7UVLPcxn
z(&<}_Hv&;cg8q+k5x@5tfCegM<1`W5*3!p&P*)J<8!hs+<kc6r=ih_Oe#!-sa1Ke>
zuMKP((#V<LRRF|6nCbC~>;*M>soOIS5rSvCnEIS~HZnjqf&Y37OQ`uXxmRowKQ8;F
z_iI}qfM8jsQiQK6pKC=7&DK6=IIt{+@FA-_CW)d(W&6)ZFj^Hi=PEu4=1zaZU8PED
zxl+BkP~*@*K2I-HyY*w}NypimpMB#mR)e;{#rJ+bx3XLX8oka9JY7E9z0}OWSbo~4
zKkJpCNyh~~{o;^i`ZmkXeV3Vjc4bQ6EGqAeyY=oC^Oo1tL+7pS4aTXX@<qIA2yZ&@
z==J)Vh|glb;x$2&uFS>I4Ws#c%(j%ecSL<UJ(9&ezTq_3Id9x}UbyIrOJB*Jzn2+g
zWd`fdip}*peYW9K+X9iLgr3qXJTj)0%B+p5C~Zu3rz7ic->rZeoqR1lBk~Qr>LUM6
zQOp;m;c0EDTF6~7yG50Kkn`i+gv@+0$5c44!)HADI<=u*a!NH~sxch0NS8AjTc7k3
zJ~nR)=@mmIh}3h8-3J{^4-(-zUzN#Z@mVT;`p|^yTwlAuQ}Dc+0*`I^XEC{aZUq6p
z^JjB%AY3|G**yr?D947I({G8Vf-~T%)AIOb*cB3OY!>g>3V$j%l`XIx(4`bL`YVi0
zVf<47W+`$@?8)?I!u`>l#Yh0q#X<4ywP%eUex3cH^@`J3^7Nu}oQungX&aY@;k-|p
z7<C!*OWUl_GW;1^<y28GEV8J%@tfAzWjkqotsqq`zT4#T7*X0?_Ynv=-Jg&<BX|Om
z9nZvxFV8=CS3{`0u_)~g(qnf>bfd%;&VFx6zB8srH)D{ueR8_N%ZTcK+)GN%fF%~q
zUcZqy?I7<uQ%^IZ_*!on>0X1N-Jq{?LBq{nCYQ9#=DykG3boF$RJWO~*S|FVWFI$h
ztDW!^cZX1bnV-*)UFCcwuC!l}gz3xR0YMSFOZh&xw==|Qgj{n}KhN+DZgp&d{U<6O
zNzH8F=p#2<mS?(r{bGwb?Q{0~Pji?)9@hP2=>vPb?@$N%(7RA?GAhWqeRL#0{(3Z&
zJ3X8!GlUZEE`>h}zA`#>HK=<_bv)M_+x)!ttZQz?=iFFvYG%JxRly?pGRJbA^s9U8
zsu3gZ+w<VhozUqTf14)Zoa$?-l(RVa!I&&>T*U~F-f-eb!#BCL^l(2?OK^HDUT15&
z$WQ8BON-0lYp|<u^1<-^=I1-s@SBy{m(6;lMp1TcJ^kEN_s^TMx5hX{MisE*Pmize
z=41U|<GK-I=o6<MLeny%j?9o3>3@#8)V@$z;~Dvza~*L@j1+HHSbofuPpGunm(Aq8
z68ZsDIFh!G9FF)+@woL}#mPD~;jA7XgU=DZ&pjY%W#$*BjiuZUV9z7h*H`u~x&BUM
z>PywNusiJ@jk5qg7LGD_O*`^RaqU}YCPTz5`TFuzh*nz2n&nN8;|<PVmBmsnR36;g
z_sBHcp7>b9EYEP(x%xFzGRx0QUT<c6WSI_hMW#bzf3X+EcNWjgij8Ew<_?dF6!mKW
zyx<Am!oy65RM)n2hr<1~yC}}BH7XCw%IgMA`x@pGVm65L35WGvvVKBwmAS{K=?1)N
zo=)!NC~XnHsmj8qDOE(5-FR=WRD*{y^ti|+(0!|BUBc_`_Q^Lt58~1b3+C@-_fOD*
z0V*3vJCAxyC6V#2hH8=7q-MTQag$d!JvXeOsjkr_VgA`q);pa-ZmzdobhYx?2Q{|>
zau?&|^=69e;5k;@DH{v;ZE;z5-CXx`&`}?8X}r5%Zxu~;nkcLMnq{5R--^y*Qd-fx
z2^ub^TiNzoKQxUkV^v=4BtNqp6)v)hj<R`KCe=>G`4zn7_2uzb5{Vd3rau2%$aX!E
z&^bu;b@q*e+8g_v2flp#w$r@@2?oyXf`gBm`tzblD(pu-7)&*77Wc(gRm(G->W$-H
z=CxXJVv>l<hXB0ZMq)oTM%2jXvAXc(KI(+(nQHA7A45N!4%lG^#;n59M!5BS2Stx5
zdt-R>)-39NM+%RgW_Twiuj_3D1g}=!dFFn+IFWPNI$^uFnIHQH=sMUJq~X%RP^K8M
zK<4;{BO!67=K|lGulY{zCDszzF1d2ogh=SGgbuDZC*Ji*t2)-vNOX#5zh(LRcem1q
zWjL>$he&mW$RPzL*RU}&nFcj%?ANwyzgMf1D(+v(l4nZlh;7N#C${{Z_H(`D6BbAH
z*QUd-a}2_xeicmgcg)u=<rf|4l5leIOHDmzgqQqv0c!jcrr9o3$9R91JXI~AB>YA7
zga?V`aEa>&RnGLzwUu?UCD-1VGV7LhwDbR`0h~(jWXg2aZe0KDofu}krIoM6;Vi0I
zDj&qS#bOvCxu3#szf!5|vz<=Z6JRzhBYtSlmrsXM_~@ueO`pB#&pDX%{@0wb*SugS
z+vkdDYaYX!G`%v|?~85CM5pp0lu)}ZS}bko@o-UPklN5RYf*c7v*>Jp&f$(}Gc0|Z
z*tXGh!>MW>B|N9RY=(|(o4FM>2r(@47vgQD#JpRR#8DOX9FEu>|3j1|r_z;&Q+2sq
zfC|ncyjn7M88M|#URpDK1G2SR&P5E%qKd|I4ot35H+3S0a8w~e+kUA6zIr-TYvhQY
z`rWQfrap)28BXWaLdk`kD0S3_UR~_uXkLSIziN5Kv_&3~eq8jkEXTZS_?T_cTn5&v
z<=+faS>X({&3>x7gtKov!c!8sC_DJizRusn0(>2*>Hj)DA+Gg^fpzY;f#;PApU3r0
zNi7dg-J`}V)Ea9X*h{EIhxq&8L^}Vf2)Yh}St*Ln<&6%%9{Rnravs79XmY?l=2Zre
zS8}OeE+MAc;f_7-S6k-%sLIC0N-Q?^+r7|9g%Ruu+|GnHJ*Veb-a<S-&A%yfZLG2X
z6I_$8$y~p7)A(2aDtjJW*QHd5AN2bxtg+O9<6+jUIcH=@e@QIm&=5t(^c!0s%JRJ3
zR*>~nPnvS#hI-RpR8e{V0CfkTIJ_HF53Uk@1I_s7RqVbJd2FSWhgKVNp;#-qS$w52
z8pPk4PI^FfQt7Yglo9;t+`EXAUWAOR-uwzSD^GpKL973<o>}!H-fJ$~Y3nlB*e{iO
z#HXZyS5RJ9zn82sjj~SWEU9kwdF;3vb{rP+54otSKC+=zKebNEpBSw2FoS|_Ln9O2
z{eFMW-=bvPuCgQRin+GLmWS0RyZoX+?Qj0=+Iepdd`wZS7R&Ak5AdQ392fZvH$8ps
zS^2g7M?|ihe|^6pt-`eBbjzMf-0O!e3iI62ek>I>$1SuG7TTA?ASW8-JFU8`=su<M
zCgN1R<(+OLkV=T}P8HzaksmMpS`fRAqu8v3HCq%5yz>>P8=?UmlOlZ(B%&J32Id*J
zerK_zJAp?#zNPDxS#ZR)?1s-Urhd8sXCrY6E(UN?gNBItATQ@BoOTG1$}`1WzjZ$|
zFX~1UEd#`~=ro!qskWQ}XKb+jgEQSJ?cV;~;M#K|bWdL)vhk67!|f?v;ryW^VA2FG
z|C5p<+D+W_4=od~4?8tqeRknh{Q4ZGr&E>V`_%geW6J7exrSVZQ%FD-NTx`gi~9~4
zi@Hpke0XoKeNN(?zHn@teT5pDtXT6KM{gKK4fZL<U>v*d&N5=ceLO)zWbN|Ltj(_J
zZ+>|X)uE{3$M^BXdB=L^TBz1}(rxHspBDT6rAYFM*D)PzR@B0D%n!9r6PYo^Z*T+;
z76J8CeD&wKmN{kFnFZ-AyWGv+1+LRps-D3=K;@vC=_X@TVU*d)%TL?9RhfDj1k9Db
zR)g;r)g}3B=~Y=z8IV_;h;nIqAU9#~aC>rkc}T63`3(Q*uHv6o1@jzb%8tJpc<HgP
zLj^s2A}1z9At-d*iEMA3nc$<qOx=|gBEvjjobO6yj@9neK91SfNzDg0?bhoLBt5fn
z)UR~rEO~Bdd3Rnoy<w`vH0OAWQTkEy0h_IGMa||1U%nU=GvTj<>5gEvUNLUr{DuJc
z87{x+!yd;{%KQSHqwKGUK$+2{6p<~$G5}V6gG*cD+=}JWy4UCCLw<(8M#aJjm!;Jp
z9C8)l+)q>v<0S)5sMTj}gm!ZSoY4tD)KN){yy1pCb&fBbdXa{e0A#V1Jt16Ax9UVx
zk(YfcebWt~3jI3SV?^6)e%lGYIV?pi5B)iPyB#LFB`&P+->;?d5%2WS;m=#sBEvko
zSV&)>rJkeW?bmaP(<m>)#aoGCEN`8e%W0bjjuFB1&V3euyL}R8v^?KT^a?H^WgZRJ
zZxsdhMEa!(czpXZ;Pef}Ij0|;GrURHFcrTbm@A#+s~SPLwsHTgRaf>UiONyWg6T~M
z@8zM3Luzov@e--=zhF%Jj@!;dPdT~2-@vW^P)BxFbKPE~iGRm}*>A5idAbUG_Ve#!
zx-A&iS;y~E5m~e_00p-3BHgiZE3z-3I5lnDKve1@mr=U(E#H=Y$JcsSv;6W{TERc3
zt1*x&fg%UnhNo!<TvpY*O|n1TCgcp4_hTqx5=gz<?Ft~oTXq8lTTXX=-YHgJdu9b_
zs(3fS9zfc5_|DHVo>6;#x2p3=Um=c;cc+HZno|eU>^KvXW0A;ek9NBehiLpdRzd10
zAKYp#Uf@X)m|A7H>(5!N_00IjYiixZMOld_D#mk~r=;;Q8=s7Xb6tgI`$RexeYW=9
zAF*y|6zSS@l{z%Fr70DB%Ba@8!HaT@<;@KH1Bd#b3MXrqUS2u_alc!~#Ey;q0M!Ie
z5U{<q6<6OsyZ_$$-G$9!=Z2#FUC%jw*Xa#)u4O`rLL%q*_Ev`!7vYhO`aT#XXP*a>
zK5gfe$c8{n_ioO6>te5@nQ)5@Nt*57MYN9``il(Py}q+WiL>UMi@OWAzveuj?#QpJ
zKBL^C7Yz+3ZcF>U$k@K`aRG8RcXNz(W6dQR%<KLNeoDVUSEMbJXrzFnCR6Gt1c-_^
zw=|CW6^0ORFc@xqjK18d%DEb9<&!;)m6wf!V(4o%hZXTsA#{Fq@H^*|lUs$Wl<MLv
z^d^n8TP#R#60^zCNK1Cfh18S1%9prXg4#v)ieWsH@d}=Qh|oK>3)T-6-_1%2FU$K8
z_e&f}jkbXHbIQ@3bu@{e^ZfDE>c?W|Tse}x@NQ@qN@M8J(`A?mU2i1kerI-p-qA{I
zSuuHTYl8)w&^h-u_bVe&tKI0q6<3Jutdx!xu;loLb@wMaw^e&pgcfr);L?*_=rI(o
z?u|?uf2FFZ+R*HBA*IN=t9d`v`bQJ!%ymcvgWo4eCupq`Up(_R5kNJ~FKv97y4K>8
z&RlCM0{()r&@`-GkTKb{)ZXCmMcIw}6GyKWzIayjJc4|%Lei(9YmIEG8m^Gzc#J*`
z{cBV{_mtFKy4t-3HTv%+D8_znb{@Q%kYrZATBoay>C(<C<=v<+iqf6&DkEfFmT$<F
zu3_vv7HsE+6ZtOST`Zyg#zxmqw?MID%@BWHoDt@LGN4+Wy>r@)1fL1KQIhDC;vzMK
z<sB^7l~S6mKMmPVsgJjkzRvbEXJIUr44#Y|6l^w43#@(p5$sgdIYu)VN<PBH<;xNE
z8~b>Z(sEyH70wf1_m+8n0-rDEg5rjM;CJ{qxm<{04EIUTY+YW(ODM~h$g@e)7GHTn
z<J0(huvxmepPX?6M_meHEl0K-Gr9DwI-hlH(Uv!CFqm&eJEz}Gh>UagI>zps=Fh43
z<ZE$1#rwhII#M5-7RNrR9r#r6y4YVqcG_I6vvV!2vH7gv$96uQ?l?R8Moz-B;b+Rs
z-yAMuJ;}eWTB{aBYFL;sw3!^T^lsBsbg`kDHj433p;36Akk!~qotm!Zq<K<Rf6{2y
zqTxmZK+%#u&1^mk(F!+>Ejr!hN4w%>4s2?ZJ|8?FG}7TBct`Re^*uW{^H`q!xC;BS
zUal(MnUNa`+gmHu5Dn%#zqPw(nZ#h1a8x@I=^br{t2&Qn6`I~YNzhL%7T%B~sUfvm
z*+*|Zz3rlXWh<a(o|;wq?2JR>YUMKhyXm`Ijr=oKTpO3&w^V&Q#yQfRrl>Ougis*@
zPd9lo=+ZdleWvLDtPAAOnfGe$zeT~%X*1Og#==iw3v^ilQT60=o5l9bU-9`wT+ie2
z__S%yJp^SbTUYhZtJPESAz1qAxknt=sH2_J`m*4zXS3Ywn@4(gGIG%F@ME9uxHHu+
zA3|ldMQ!+IM$X_@90#uSWvztjJ$K>{bRARriY49Y+U#nK<P>`aNRxRtG=Cqq;xGVM
zNw7_Q=J1fYn$x$|aQ~)LRv-4@3<n_Pcp_0Qg!ziES39@bOas-*GMUd~d5LYv5UlW)
zrxW7RAK>I?m1Ry3Z;?B5es70CIH|s*$PZPJ6-`<O&|o-YXc5=)bW95sKbPLjxGxpH
zfFYz5yRU`){)@9FqGC?}L~sQF45}O7A9}f;RXjTvlK;%^V2W?Vr-uRloJYUQoqC;W
zLfxQOcK`T9Y76x}@~E=}9O^ZD$BqnSiYV0R4d0=3<yOLL@Yu}qsu5qtWH9}HUQ2hf
zV8xG++7#fL5kUtuqyR^-M3AIEE)}_#lC6R+zZ5iH5#oX`TQO!pBX#25Pw20YNTBeR
zk(V(ZFM@d7T*3dbFk-`~+QZh9X4X*A1iXpL#YS~svSZZ*xHJhgcM2p_cSq!JeFzui
z?z6Y=R!ziT{sQ=M0qQNt^(h|{eBe8mbQ$VZCf|`sP<Ce3zlVxT18yY$2)UE?Rnf{*
zkmS95Z(}cbdIgdw5nRGrvTW(fe$^nQV<tpc1l8o;0Q{cOwNxfUB^cQ`uht=QWe&dM
zvpYo)SDthcYB+!sIrh1H4@k{|${3)kX7%h&cMmEabqW^IuYvlr$%l4t8z4(rdzeBA
z&6TUzg4}M_y9E+Ua`tcej-AF)^`wNnrao{p2IhZ(G-xakGUVuH$lsvcsTz5G1KK(E
z#)@J$_Wlx3Ih3Q%vZj{dn`i16SKLR1>^>x(122gSfN@I2G7$@$=}#;L=x!~-C+cIn
znRh+BU|ncKt+V2Lxa)e6t$ZES27OFx9`LmDrhTdJ`9T8Sor5{L8mwp5ZFmnt5CZ_F
z@Zo!xMNKYtWD7q*MGFX#15!X&p+b4)g2mtn43z5eSIfv`b?xbQk^2FT2r%OHd%BJd
z?qQ=g8ScxUOMN!ote}qjeBx=Ay0<54QvQ^8)FVL1TUE0?BeaKsbbQZL$Ca+UEm-0-
zuE4EZl$vh|xS=kn+g#zTcnFIof-^Q_9ifoLM9B71^6t=umtr?o#EqcBCcl9ZX+A=Y
zIEK0{Sx|*u*gn<sD$S_!pMs$=uu|`=T$VlaYJhI9PemVkh`L`K1R;;NU{?-21J$kt
z_l|3_uD(4{8|&ZnQu(UVA1IZo>|T3)p}kA*S=gA<l@|pa;ePiPp}K%TqShAl=b@vu
z(9!?(L5)W`+=GB4Lz!uGp*4cGs{Y^G3Jc*k{W|2CC#uHpoS_~D*_f_k<AxsIvK2Kb
z1>%Y~{mBX=bl^<n9#H8Rn1ov!Tb2oVa*q)nDsP6n+rY56&xQ+xQ1z^-^}K0uB45Y0
zc8DXeOKKwF-g03CGxVAi_`t@Ju59-k3I~q^C#JBlACRePfxc!BPwDzew4f3)JkYF&
z@RRMm6T#kpv|TM%gbVG5UzL`fGP$ZLoQR?m@J9R<=T0<4_&0qcv2ee?3su`YPZ9vk
zV)&hIq!aonw1!F|iW`3GCkxr}fO}|R9e2qW+B!kmwj_?=X7>`_?(Nqnp{+HOXR5QH
zt-cF_?q?y1)E)_$sTjyCocei=)sYBC6`wPS#S`It>G-I3w=RDl)YNt>jG`Pi_E#-L
zi!TF4;EKKX)@uJmY3oUq`n$x#Q2G|2Y!P-Jp_dd?Uhg&b0W{XfnZEtdp$>MbMI<IS
z3>ATsO(><e5=3>?587lzO`Hvb6I*@k#-7Q30W)M3o`^q~=+F5x+zw=GK+Z}N0M49n
z=TfN;3`6gW3AaeRk047g&*kjBFAv_A_MN^?6TNSWE6wGu_s@xh>kyiJ137g_m9yS}
zEyg^PE|&{Bb-zT5d+)jrfGxABS_KCke9W8npODJVR*GSr%mvc&7Lc&KTfi)%OW7f3
zSxWb44xapvKmk0tIILh}j-Fft^lA>|yC!oHky%mo?4vI{VI95rm_y07A+xZH{AVB#
zpqFBmo%D1#s@K0uxJ+xz43_0RvqGiNQDT~<U~He_0<g32!u%m8uHB}}slkqQUE6-f
zpTzQR$^dUsl?<TC8wp={n6Rb4!8@G5938X|hwK#D?jpuAzlSgI$o>5CHf+ku_MV{p
z%!Z<e82jN%klK$MZa-85MLt(3-#HqB%{axyzTLBL`+zUF$%s&*{l&BlpGK+q8;!NG
z;&<qnz79Z$<VjSu6huh&tzbsuX%<d9Y!EVoH$)?`9k@{ScQ=sW0O1JPhdq=7{!3=S
z=u(Cp>Quz1I9u|3Wu|)qz^MGRbwF-|APBM{yCJ&0P2@4`I~2c}d{$zBRvIsN28f*A
z0@V+loUrAYOx(3$=cr)_S6Qq`{aOkF-BF&*!=(R|SE5s;<p^v;%(*gn$)H}CoV$uS
z2LEZnQf((>)g@;EN<DiUQZ>cxJw)4|&bJ&zPfiBzR6n`k-vTyTVChPLYlH#v?@e}x
zXcwS~ABaGRR}TI;ovAMYuo7Xd2+hEHy0R3>|L-4nhf;|zfR^Sz0x0KLLNfY&dqCB_
zfv80T;ryo%^AQ&O3v&Qx6PiKc&h^GaivP>U0c10Qu;AcIya7m|^qw2GYalJ8J3R$Y
z2<p6akXl>|%wsn2ZwYv=SZe%_jta<v`Gt8vZ=r5b3TY++Weyp5J#1ef3W5JGOgB24
zna{vd4ooD1q3p48{?ZbIO2P1+xXfq)G@kJIZuj=Emq~!2qihrhSSJ+tf|lGpz&a@b
zA^TN`zxw~HmH~a<{Laph{HeLUSY)7w>ImQ}d>4x7m?-vaf7i2ULj}55PNLa~a!Y}n
zMh(@8OC8L6b)t3@3VS%rKP@tkM_>LOeYt1(P^y$)J;rvRgBihbit0CJErU*b{>K}b
z%XYd+j=GsOG6{hhE@Yh2$PNHtEBS%>Hsh|VvTI!@cE&si#(dSX(gR_Spb+HC!>l_f
z{3-I`$p8Gj_|E5p|9oEWp#}>A)El^T$_oJXkfquGY|r<d_E7G$XJ`_zW7!Ik^n@lr
z1n@*LEtB_U%kHVLJ3hC;pCP$&ZfAV#Q`E(mgNNOQ><CQ&vwj_L42rZtF}tV1?gCAM
z{gSe=7R?mnh!8-g`G8LZvk&Ty?g3@b|Mq#5)<m@$IS`~Og{<tf%<1d@IiAkov;-zH
z?6iMyQrFmr1HrKVPkUb;j^*~Y9jPdykf_jrM1wqrL>>*wtO%7z$vlK)C`E%xlniA`
zlFU<Pr4fn9Jd;R5sbp%9zH_Pe&-3i}d*6S*<M`hF$3Bj|-S_>hd);eY>l)7Uym(K5
z<3-R{)n^aX(X1V+8Q~8|C=%l4_RI<%;}bi9r!k+Y=-9oZ56&EA=|ZW$Ilal%=HCpX
zs8r65&)i>aGnsV*P%re!TJR1@?n{GN*^kB;nf_+Jq9}LPozhz8VNOWr=a(D{A#j!I
zt@PvRnH<md$?>FB5P5C@3QcCdf_h;n8g~tIER>)b4+mI%FgHM9GmrFUZ?J8lfNXM(
z*Oj>bNC*1zGcNxF4*j!^p8nn|^T-!?APu0yQliGj`xRWjgC-ioE9jo@PdsBK^?8%;
zYn_SBLOR%mc@ZHwV#<}-z2?z=;!Xg1Sq?@{t_i*H<6|F#VMCf&??HY7ViK_y4D0sN
zelx?#83m*VGiv38ZCjIWvPH5NKUE5re)NpT7PO1*2%H5b3FFBrN~!=TTm`)O@~FQo
zuB>SbSJO@rJQz&TrO7>z?S^&v0*88e6of}sNm<ohSTODOKfH{|UG@|hea@m2lT(x;
z&8{%4{SDBj7{XZsEpeOmXWze=*Wa6x8SV<KtTOq;M_cLw!-ttfRwGUdMsib9?u>a1
z?UIw7T%$6RYqYip^wm2kyP7>dGmHHipm*ENKl^gg?0IH*Con319$x?*n#0e0dCeZ9
z7g+%CtXuzw8^m|TTtLTZHPDjQ;%oT5e9ZgTd--h|IgBRf=knzIWaWb!nxK~w#%lxg
z&D(aD4!>uQd4*^v)Nb;IE5C2}hHl}{(Qw;rX6vI<r+C*Lpi*YOTuJxICRg>T$xn8>
z;*aZLI&-7>%f*Bt7%g!``EQ8yd#=v@W|=R?X`#Spdkw9@!NEB=^9NwTrC8RB{k$Yq
zX;)F(<SG&%t7tSg|4$6aYXMdlfv@JCs`C9C^!@&5(8M6;(F*)R`-Jq(;~zKl)g=V-
zTEI_zd+R)Z#{X~w97-{;7$v9QkT~Q`?CB_b*t|b}=%4+63{EDK#1-<=tsOmtUI|d7
z^z?RAOc%R#U;XcIXzzhvA7j&;Vdf73jX*$On-vZy{|%FWPg2wGbvhqg+0G@L5Prc}
zdcvBq{R;nj)8`!bXO;K<e%HTLAVOz39il~6-hiD>AupR{7McNU2MSpJf4W$}zNRE%
zHrGmmP`wIgwe`OpjeR&`{6HPe4aKaVpUznugbZfOMgwJ9H?8USSui<#`|!aZm8=4r
z4DA5CJwEHjbJ~G#oZK?{lLNPvJozgWM0_>O%fJ%x<^LhKlh^!!tPRUSBf^f)&DAjf
z6$;|F5_mQipQyaNG^_Xk=nvSnYS^{+ZrGkgqT(*MB)U3ZgAPl}25sfPAmTrBO~?~P
z<CB9m59fQ1>QFJO-T2o*N7SiW|A)(g4o_h*%j%VAMcj4QDav;?%?3~Zz1W4wQhX_W
z<EJ^;OUp^8K_`FuD~-vb-9VW5?GL5j7;lHp>9r?kX0^)fP(Dj@>}>tLy#HKDY-2vE
zuoaJiEL#fT+EtG;rdw&v5@m96m`~2J_0V6k*WH7~<%2ETe7Nq~`};D}4EqA)TVR*e
z$pr4VwQJ7U1}9O3I7wMh(?x7y+Bl8(pNiPz>B+Hr(sG2uayZ{_Au-mW?N)FGYwzC9
z8Q>GTP3vJUL$ZUjNSBkWZ~bu6x73$kp@p>{QMNx>Td54|`Dl&66hVd2tbFj1Hw+eW
z9)@$g3&^}Z=?A<N&Kz1n_u7+7%VqMlEr$>!5ZHA%1KYxN)bC@T>?>a{WT9C$92$r{
zbKiO3oC?69KV9!WVxgwUrTgz4y-J9%Eq(x&s#ZsDb9!)j&Ig{}_WQZ=z`UA8-E&vg
zucr5kH)C1786EyZDeXL#O|m+Se2u#c_jLYcr8K)JkKLR~$t6>H)(8or0nx?MHX%np
zmAx_0_=^R2)n0G)M`*!0TXWT%xW|9IxY(h4-&M7aMAP}he;(A9Wf>T>dS64mRf#X}
za?pTXe&38binxON@VXl}QS!leT^}+1RwK;}PGcl4f>8lK!LNg_7b&)zM|9U#5NkVC
z_v_9-=><bphI4Z@C{Tf<s`P6Y?dE3MycUyC%=Hkaq6Q#N{Y1OQ)EgEaTucvtj_xDr
zs&QCwv$wvfQS`H5K&UgtlztzcTs>(x7~8=4?%XDmAZbcOUHlaKV~<z%@>8brJ;`s2
zVW}5{DYNay(5~r?$iE6&aMM{P*S1v;-`@P$oj933QO*C$O)~sWU4z1DW+Ep>J~n4=
z*OVXJI_Z+44wV59;WOK5ep-Ksxtq|i>#Ym@N+O1LyhZlet$tfS+ttS~y?4O;i!GTx
zlhvOHKgEw7<)+zTR6!mrwzbB~+$lL97X_$_>csl?UTx=M(tSPJYJa_i|9JU<N%5a8
zv>S&b9U13mFA^f;oD&VfvJjQ3Mk@|pX&VF8F~>j;Ml82raPrGPY`p!Wl72}N=Hj5c
z9PH1k-*NzuBM2*RSp@L#@`OE^vAj1U+hRmychY^tKey?)m0v6gFNh32Qm+UoVLoz1
z8gU|Ets@)2+BmJ3);06|!;-o4V?-D1UbX|y%ME5TuBwXPxjb~EODcH5>e4n%2PV|$
z-%l0qcco9h^DL(+cDI|I70%-5F>Q{j#?1f<u%1ngsOJ9HXMc~+J|0qolyK?i&cdO(
zAiJ5QH>dpm7u!bX$OAC%2#7?8SN|)?d$XpJjLA2745}5MVyy1;>;3gf?-E26RQF6@
zf}tL%->qK?trn|w!^6S~#TWlWZ=N)G2SBm%Gu8=fEQxR}C{5!n9Y1#R=ZOBKS;7(d
z$1ExJxP<Z3IuFb-NHzxg>|Hh1{Z=CG`^ftjICOOqB9Du6UFbNlSH`cQ;<hN=KGY+r
zU#XNoJa_)|e1yRl_+Mx!QD@jeLeLF}A2#pQ$m-qyni(+|!SRR&tj$tn%N%VS3CA?c
zVyEK$t^0VzwV}s~h3fZT8X^NiY<*WZNx}!2-8PWuwt2*yJMB4J+^E2yzahnX-Eg<C
z!jpeZO$w&wrq1!G$*HLfeLkKubME4WtxjejS=7L2m4Bir{7Lcu-4#2K78l-~ulwo6
zM~OIyid^uP!RAd&^cd^&uD<t49jBEk+}55)^qbe3ykKMt$iYg0p!nZu20ilPgu)C4
zZiD;BQgq|BpB(5d<K3_HuZ^@93++>1J7iB!CM|`!)-oWZKi?P2x_*ms@Hlthf4Nkw
z$RQW2Ul@XLCp30sP?*hjC9^E!yU63I<>4|~@S+ZA+RNc&dP2Xfe>(zxr^p^U9pY7P
z>jZX~<1=3Kkhw<3nh$?LQQX|lypPtA@$~%7niJO18b73YZ&x%zqSCgP*$d{NtFSSN
zMIt^N9^5KPp*<^;aC&Q}T>ut{EUk(@4KAj|UPKHZQ{UZt`-=1x8Sb@4ix<<}HI3oN
zi~@FxlM~;}3AOKMkjmvF=+BG<-+WNRwE|pPPJM&-yo&far=nPv*EJTQcx*qB19MEU
z|IZO0&^%BunsI`Oq6h~2Ru6W0oBc~W7ss(y{bITdou(Z#DNq~@Myjois(wRmcbJU*
z%NLA)*TQe7R*(;ePj$$<BYx|B7%Gqhv3x4$(LF$_URPw69IaC743l;Li{SgSU;GpC
z>WPkR?`}}JriVdssgzsa&_w%F-kZ5@AOSv@9&1K3#%W&a$a9ot@!nEgwv*L@=%EnT
zzcwv=J184Bum2_Q{5e^vM`358B^x_=E$b$;pdpZA`8tyMQtpKfzXshWzYv76#Kdk~
zamH^ibqJ{h6);)LKd_?sgdFLjfY9i0x?R2o;cp(3iqx9fTUSTrcL%7MLU>2{2u@cZ
zp166W5@?-Esvr|)XlY2yb7u)v2EWLLqu@e?ZLn+0yNg1OnB!1|Crzxzz;xErn@6dK
zWvgfuFQki6+NxE53TV41k@xS>U{?6cp#L*RpUD=nHtE+g5>#c-fb^p&oF7Nb<E7MC
zA}|m*>&;xc=}afpERUJCA5Q5qh9Ui+4a0LdHXR83F91xp$ljPDTQr$z5=7lDtj#a)
zsl8V`69uW;TvmShrq7pu=hdtpC0Mf4Een!ATz`nacUu7!zX8Ye1>CY2c<nud!b@c6
zvyB0mqN@?3tO7^57{dTBQUhFOvn1!AXu#}ia+Lz@D=@IbpvV{9>F1ge%~92cw&Cj!
z7BMfUKw8|uAaEIP?muPZ-(PPso^2a4H~(ad-ScBWhEym|ka7>Ptq<I@Z*Z2z)19ba
zAWRmhFPb`}^?1~?2OOTOs7)@cLXMzBdiJPR$Thm_*F)h=xKQ})#|t49j?gMPlu``(
zBKO8!#|OKLcilf`Yh6<InC_l0@J#|mgx7r8{kX@o2VR=g6`6ro5BB|uK)%#pdq#K1
zda&b#m&XZp-N@JwMd0d_;){~wPz_N<(rMYIR@T4FrayO5x1tuxYZ~cU!Jq-`@>>EQ
z?ImDM!j21-VHl<jE2xj{anwF4wp{DNuy1-8Bwmnz-p%@W8_)g~sIfg?az>19NYR3b
z6VC0K+q%$}&4S<cH3Ll1mC(!Pk#5S!k8tHA+<HW}hhCBT&l}a0KISIAstAwKW@5kq
z%S=CkY63}X#TFL*MUPFM<M`Jo?rD;|KI&Wa%+sv%={W++4^g>?CMTwlnI2VNxSVcl
z?;s`O{YoEQj2}Pqj73oX1>~hg$!HU>BS|l%?fh+J+BM~sj`RDhbC1o(Tt5blKkA22
zbmSd!tu;Bn#B7Lby)@k&q~H}W)Mcf}WHZNtz{Z<=LML_nT)S8l7yhHk@n^mS<+02=
zLz8-k>vBAm1<Y&wm0v&xBnIb2b*Q?s4^03m=nvnMU>f}%F-p&%B<ZJJ!*}4NvYAo-
z<%SFT;|mn3EgNF8nQeinX)Oq9l;>KCKeMpmgoYmX&7%pSJW=MUX0{-XP;;jF73(Gv
z8K8U+@*NNcu@~3ZUWZ)Db3$}O93oS)QfC)Wx=_?N!d}&@B$DtDJrl(g27rtgGq>;t
z-N((D-6T*oj;11xSe^Ifym0gc69^pXc|vk#U@e#QKLQi@4o;WU9G>hl8y1UU*#WV%
zf*N&!@afwJc9*--*hH@wrVs2EL5Gm^$v_32q!2Zt0t^!M)7D_5<zE`%E(N0Q5ON%S
zWCGq=11;fOFkGYNSfw<p6o+f(2|rKa1fByqJgxJqh;SY`$ipQbf)!0xQOuPsY;^A+
zJ$ozQyk?pL&WRZ0wTqz6-9Gy1)lbjB`_Aw6cok0;kWkn#j=@jo%1jC@mi32;^cfWR
zuvfY<fwzs?q?ssU`l%>xB`$0PcUHgW<&E&2&yM~T75Td)ZZk!5SFQBGyb#Cf?w&mG
zW@C`M%zx=KllR_}_g%%8-j<5o41xz5ew<QJ&Hh>woTp5P$DqU=+Pp;s<lcO~ug+d<
z3>tsrOTBGS)p2~|C-c2{`$oF=>g2p&M7{l^@B}^$Vb>OBS=PotcI8Xcp(`pN>8mdx
z`GdOTJ(5R%pXR3%a4M9-B{>+*Z~oL;_5*|8CVrO|%eYda2m(@A^mbdQ5z2E~^!=lr
z^?S!s&1P=}!C-qB<Z<2vG4i!vG6ToZOsE2RcY%J-9>~T8<LnQWz;XJc{TmLRGMLN5
ztLjTS_#xflvM7pfw=uM0w|)73K}S<k;2AfC^u%CYB1e&4!N}+GC~fWi59>qMANEPm
z)%u>eFE{l*oAx5wRqttw?FVi|tk%snNbDcZ31^AptFq~M*e-7r^P{uZQkxW>i7pI$
z@N4gQLO(7Y!qkT>e{>2`KSZ`HX7jn(j%A{8&+)$_Y!!%NMqH%(suzvJTNBCtc_g+o
z`s`w-atXJOXLxwLzs-vSEo}#iTdbEh%^X9T&sAwF{cbr1ccNP;3jE&`Fn_m0I5e+g
zSY%NZLGC!>fV$)@*38YO2iM$UbX{q}I0Dc0&|a+GZkg3PA<Xxg=;a;z^vZT36@@=x
zZ6C#3q7HA>kB!KnP|APuYbVvX$7HU>(JRql1ay4uvqV2`=h<}u&uBIn1Jh<i@`3xP
znb=;$r|GWK(b52d+V2n!3O6UlplePqH*B{9b3$})R($hxGaOK`*#S*Su}<$^M*rG~
z&JLLc0T5+f>$J>2^UCF!QZGe!-5azgB~SPxJ^XCf0F|D1{}(da!6WY*FY&Xbnq1aF
zhm>ngzt%!y5mjy3Dt&u{>0wxb`><_H^^}9x>~C2plDTY^_EElniqXG!4l|BH<wBNr
z$rTUB#~%F(#_oTPa?@e{SvB=*`esrFuPoS|uxlI1C-B=k`kc&rgt28ttaBc!kon{y
zpXn43<kSJTBy;#D`f6S$XwQj{q}Ym2d^jHopXz$bhWdHkV!K-BT2qHW`yyItwqGH1
zW+fdIOP9nE^q&P|EiV|VR>NWeFJT1=Sq{mf9=fXU)x__u)gap&#_R6dKC?`uPOtpT
zKlN*35^R0bnLP}`7?e?SwrhxNd43GV@4VfeocXe>h6^NXbA|l(YruTYoUeK*^<W}E
z=8oJ)Xe(l#R9*NX-z^fg+{BFC<Mm3wlI4n}hHu<y{<fXH6SgGCn>8y|rouE?0B
z4%E}uUoS=PncV$oa(mkk1@o5?;?8}CG|eL%ls|kTY9)lc&SD`yu>drXeBCso_N?ev
zkOCjg+m=Z88Pa!ndiGF{SABq-!{Y5JSGe!+RgJ^A!rd8YvQ`%G>VvhvoVJwh-0<+t
zuk#<NDi;F$|9uBx>}QUZ6H{=17)wmz%8qQL3rAh<b*86&xUe=k%Hh|(Jgu9XzC51J
z9^!M;V{}h`r}v^Z`?Gv>3)cqFhf>?X0}2wTn(P&YO|uHjTzm1M4-wgG1YwN9SbI>+
zZulum7YZeZGDqHHXaQ(idf}s2fU8^`q%rPy@NZ<Id_1Z6{#k{4|26*Nli*X7;%nRi
zmgJ-KW?MQVDh>C!dqGw2hxesBASmjg8Ye4c*S`@df5?>dF&OJ!WGDj)J7Azx^<Ine
zSP0QwQuny`?xG#X3D7l*`vJ(D_~Am5*8|y&Vcx?QP9Dj=beRiJ>ipm4YS_Z6Jy3XJ
zr_ii#D3M<W9^zR!YBrjdfJ=X<<cpNO^03#qe?kI^!SxL9c4!&Y_&*c9I7T>05o_+0
z9J8)|poP=JT$;0?HtJ{xUmR2*1_%7R?jTWcUFMZ*VcSoXbi=vz>0@@L08|o7)u>K_
zRc(x65cl0~_vq)S0XG!so_~4wp}(OdILlbGdR<S1(@Tj{b)#7ii!bqnE6UilH8D|N
zs%SD%R0_uT=jo1(6R3fPhDI3WDjGnyp4*#1A4DF+s~XLs+*_+MF8tv8!Wy)kVE-zo
zX6QTwxqz}?KRbSf9`6!3wF>#SJ^2ne)r<Qf^EI~5z*v!hlPq=hDf?a8ohOcYHqiN|
zHbmY?E=q+}eR%f}v{cF_DF$;a0K>Rja(y@&20Dbp4F1F2V9NI~l0=4tuc(>=B(OY;
zNrDlUXayu&)-kk@00yAR31?4w&);SqIaNhg^_8k>_Eq0LpJm%B{g$O-V4g@y9uC)@
zc)Pe2A9Vto@<z=8it9-m`DHDT4Shk%i+L^0Bm2k7s^8?^SbyO0G)w&Xn(%y?r45Jf
zBQE<K#Mbt35UN>R`<Ddu9fq5GxAF3(W3k~Ibm(4_V~3KUJEF@dLJoVDY?s&REgr`&
zbSV!~UkNDle~?xf6djbf4j^agIUb{xfF4RW27?&0_{NR)o#iBDMX+;*`soLhLKTi5
zv)QeDkm}TRQKU?5?E}-)+6-*Yd?*o_A~$q*q7B*{HDEf67uIY~K5D!uj%cuH%Jr=n
zDgi>QtS-)jHSw%aRJB{p&(m4xFgA<m56{}B{1=(~*QM`Va6ylHTS*1j%S`oVl5dCb
zx~3c7$lYsSbwX1MzNjLSs606$#GjiYqG+kTD~jsXl+Bmko4Byuz~GYiYWL0lI>Yhy
z?H8mU_NoSgt41ZN$V0tBmn9xrG}R#K%DJL<_7w!?q4o0j5=%r|cog{e3e;CWH0N1=
zhc)P_p9!-CMSLTe=;gh2zfy8vshY&J(eFghdFhH#NTZzEdZpAT*X~B|U_BH?n(aSI
zF;xdkp)?o_bj-WH>_J>o2DgaRFU!6>4|<iHq%&+PV1D?L&aG74zK%zyk|!Vrs|we-
zJe%*KFQ1eud=a~ckqckS%Jf}reJOQuN&W7%u#fS;%}KmizT18Ocp?DuQC;uzKV-Zp
zzuFGrmC8W)7Uz~m^??t|%DcXZ)p+_Qg&UXX2Tk>19Gyksi;)#u;<zOdQpYR~Uv?c8
z((JMdF8VQi=6-6`#kJp=1MSq4#q2>uXbL#_S@BzUVCK(zn}kOYD4DzYs+<wrUtIgp
z<M>ch9gfOv0_~3c2{qdtj-FE=e^!KZUrOvSbVcNnf5GK7He!~)FI)>5D3={FvtFS&
zT(j8rcNq|Wi*(bb#Y}j5j*zj1%ANbZBOvwhHxN&@>VHELP)T#cTQ1R^(OV<$B_Hnq
z-n2QnrkeBY+zs9S;$wX*Tc8Bx_pJr7*frZ5?tb2LzK85a=rWeHTx-eT>u7a$i)WXY
zENV^OBXgob<2rqImN|yFI5YWk-B<YcF=&tmZG=WTtaeTOD#;BkXGx@mA#hyyEs`y#
zTUg%h*#o_f%QM!yufNV&cXoEjU*)X7F`FroK8mei{yDF-+f!Mq$Q|&K3W^`Er0{4w
z=MZh-6@tMeYi9Y{A!$2fC|?ypWbVqg+6{ft^CTuG^3WULP$}$+qJ?Tq^J0d#2sGms
z$vSm5i{aiOA|)xi2^hN`%9^Q?#*-)38br)_o;$eL%rQMk9YPw9xT2;&DfYyqzzccd
zP_pFRm*)v{=~6e*7~8xXUw!$H3D2aUOp$!_V_S2c^QTw);Tu%@O2;_Sorkwjv0$k`
zx-BL%bYWFplD-yXyG+eqLrgUohv;@w>4m~{o;W*xVv}I))&R!P1yv-qNPHYgALQ}(
zKD#1tv-?6GEvOG|C#EAf|IAn_@1;J+VsDO(C{(|swLzoGp;*?e7^>=}4{<}_6g`zc
zo-?Wt$QXu$(-<D>p?Ct{hkb*u?faJq|2GmNi1*hYEH{XFsn526riQXxPpG-NzVV5{
zZTX`u)}gYru|ES-8(^6PgI~k*Jjk<$XpG>5D1-Qq-9*xM>wj73&s5~;rt2oE>j>&F
zhM_xKDKuruE%@EK=aJHI?PA_sy26#9KT`B;YkkW(pP%(KgV@$C(o2RE3gH(gM{WQR
zSd#q`|I)wa^!I0JLN4G?p?Hh5w>q;$BpQ$9Lr9y=_75N_%P(}0&Uu|g<n@%#P;g*c
zzch-Jm`HCD*jk26_Z~<99Xd^a*nB4CAF)0C8v6x%J<ZUit>&JywWKFHW<2k$zO5s`
z`WF%WcXtT--^cd&sBGAoORf6P-fQ{;oBXOlJx;f#h|2!Oh7;!xWhu?1!026v;pB@a
z!mg;G&znner^TxXa+u-st*eYo@nByc)rV*uN>tCPG=Hmq9~P5*Lzgw3go&1i&T;Qz
zX5y|&HhR#qIOT!MC!-DTlR-ZU5K{lwZaRtGL_||dS#ZPf$M${v?!8NEn~8E5iDJ6t
zUj9|q`saJZ0FfOSw#dazlw&J36Nv*%D8jJoz(hP*oU$LnPHWGrw9~ygDivT^ELg84
zc)_y=-byeu1Pc!!Gp(@x$m_@DXKrn$yCDZV14^(a{Q4XWeyNdr?jAh1*r8}%&m+<;
zL2{`Foy*P)B)Udkj~L&Bk&RKj;1R%EX?P9I{yoZ<7xS`NDAUcXzbM8Nx}J0R%_+u{
zvnWI_l{9)ddf-o1Gxo3Y>_4mIx1?)qN&(AP-YUXYSd;gfvG(qZHTTttlwN^zK+-?g
z{@GMVi1w#~nw6v4<4RAY^d<#}OOJX0^IQ|XZBDMYk^<dZGk23(RN5}XcQ-waCuQ~s
zRMPAK(iOqy=KOl}VB?h>Yq{}h54Zf-q9#O1s^hq|TYMg0!u+t_T~@JoeQg6RSg)#X
z-m~n^MIYre%4a!zLN>RaEz;hmQog0)bbxeVk$QSebZke<yX#{o#~v0GxP0;$i*)+r
zar1#5&nt!FvNt`xSGo*zP7E9E(qdxf5TtreV_;-r9y5Fy{TB;>|M~jY4DeRG`#E=r
zMUkpwAvA5~dm`^~j*y_guk*zjgS-CT-g3s%GghJ8G3CJ_K86Y;f3_6t{Q;M5F0yfM
zq}5I#pLOz1Yu#sRzklt|eIrW+qY|#TC8y=?SRRGgMtz~g!L5ugFks%bnj2Fday($0
z`YX(J0o1qC!$=<g8r3&jm8k*hD?WbRs4}o1USmgU<zR2q-9xT;z*C@u4ew4+)M<W@
zY45!F6VyYqnRUY--??tYcX>lkh^YZI%~_x(o|dze530ofc`N3uwG0dz3x<9o%XIG0
z6$R<O@9;3#k#5<wpwQyF2g+IROz@iQgEQj{ko*~}f**Lyvhxnh-+3Vu6lKN4MA@Uh
zjn(2<lgas*_aq}h4eVNBh@^2RI3sHAzPZxHt>Ii#f0;wj|1Q?aC7he4xA-^99~VxI
zbb~h<$Y7ulSRvmFA<QY4>v7<+L2$J)wAmr2!%`Y5d6!(bnC`kTZgL%b#~OX(9Gg8E
z?mci1wd8W$g@10ST0ZrO%*nd20mUUCZy5g^AF5;*i8!*k=5D=Y-;eiKX?}Yz7kQ=^
zp1Io?T}$xHo4>}*Colc}-jDWD{Ct+Gt`|D!a8qq>l_Hm#HW(|OWL}fqxQOQ0xmE~9
ziC#!3XJ@e}V{O^&Byi%hQii)3N`|F6dX+KeY2Z`*v-p@fRvJ*LI_q}x3UE~g&C3jq
zCH@qAvY>8JJlm;}llIv{c(zZHI{O@pGNxu#NjOp}dX;D&A>&rlA9}QHWHZf@**1sS
z!beza%M1n$UXD2y2O+U#f<$F$pk5`{w4Ql1<9M!$Njq2I8T+LlNBl927`-S|%=IdD
zUm3Db9k8Hw1~!c8J!xI8sw{@I$B9@tbBVOYm#lip8PRUOH1n1*A5*+^ZB#-sQWTH)
zK{Dors}gk7bVU8|gKSNj;o2_19EYZ#oykNIisIh+)>rZ+GZLh#Rt5?>OcC2@R;HjB
zS>9hz=SnD96n+fZgU^Utg>T@}4c4nU|C$D}n5oTJ#D{N-M=xd2m}jtUlbq>u|8-bu
zF%9qro}%CBC7UO2Xx@-2=!~cLcQ1v|*x@<>=&`25lCJP`>8}%&hg3r6#*S~AhvjNl
z(R>_^l{YalC$62#T@fYPmKY+!F4r5i4(!tyRrxb!0Q48@jseR)^_Tvc^zr^v-(DC$
z1<o^qvRJ>Os9w^6+mH5RGFef{vR;8@mG%m?iO&#X5n?NGGVl5(2|eklkBy&4CWlif
zN;K{J3?4%c@btnce3elooGfegYM8S8$ow3Jy$l|)edx@f@vW}I^^ITm-T~+}3HfQ>
zkduNELhgwr7SJo7`V+cFSha3%xQ}yG1!?_6=8_FvsgKnhU^N5Lcd7;PoaV*Hi6tOm
z!Wi}?p5Wd}`$G}G39&s((O_$|xgIwwh3<M?e6ECbGJRXAI{YI9B$SkTZ<VDu|Ln!=
zv-IeO&Ndr{BZR`B(OQpHJDLvIp%7c~`>Z$FErag1P&1x7=(;FkmQ*;avi5^^;KGjd
zg?<fyH@BZ6`O*3+egWOXr5PE+#M@?2Y;_)?18(7CujzrxU5TN2?!u|jJL#_Vn;eKN
zgGOIJ5!7>B_Zg{WT%IpEIL=Eyj&w4PQqHQN)J@3H9>EzS!dA2>ysJWk{zE!Ue#n(k
zq8c8=fb@sL4-m0D0LS_kvK)7$Vi(Z7AxAtvxxr~ExdD7fKYh!R0|+VTA3L2~6^O?g
zDZFapvbVEZ>0qtX))hndCFF>0mbl0m)>ZG6R!@owg;-A7Cze~$UD!_ysl-}bSO-k*
zQ;sd62FTj-eMDY4{mINlT#XmvXm+nBb!2kB7BOga_NO7~&;;4#LeL=sRRe`JW^Whp
zINHhP8bJH`bRUsv+c3Y1E0tgWWf3>!?uYxI9(H2euPMk7p?kh9Gg%lS3>qf=`Lm{{
zWt6QdZHT>btR*46EOOZ&zv0hIWP^BFdN7rVaxUQ=KdU?vw?}X|bL&;>Ox;sB40Fg~
zSbp6z)_J7uj8|^6S-9y)dE>*;>;^IS)3jr)i<}%QmRj<w;RCOMVmn(nccb{Fgc^{V
zr#^Pa7Hn#7$J$Lyl+{W2`4iwMokiON=}VUXx>OC9UOQ^1$QTw(E-gTaB(g6YM6YYb
zl)gaP0mR^>P*K}U)d@NJ@yTNlUs%u(Wo>eF9NoGdnEVbK#Vo?~D-{;zp@T3M8R^3(
zvF-Z`+Ih0Xr8RYSy=v6VX!qJPUiNUo(-EwiN+|s6QXD)r3$FZRvCz}SW50$Ry3BIj
z&{1W9&TYspPaT5)IZV_XSe}RO80R9pJd=!U%Rmk3(t}pRDm149QwCX$x7I|Jl99?r
zZjp5T{8~JA+fAD1QU5s@ev!+*mMBg*<RR9}44LmSf9k0)%}X3+Z%^+WiwP})Y`x|3
z>6Hm;+@+@~ff4y;Pq$~YBqw*`ny7DzBk0U>7A^2Zew_ro>^yK%W)e&2XJ8LzAR`3#
zFr!4`+zZgm(+(h)aqK~VFEeB={a}*GV9Ke-*)7QNXmdDhql?|sQ>Z9<PiL23cPXKY
zZr#MsB;(1fdNn-^ZGycK3d`Su=<|++iYHGjsEwkVW$o3(l$k{<%%T8Oy#H&T)~T>`
zm-h{;+h7Q9A+5AW{mdr19h5z}gD#HpRS^kklaeJatblr`Gy$x`2b@lLF;_3$n$ry=
zi@w<+jiCZT)RHy#KV+Oe0V~uAJbd|SW4aF&)iSxAb{uE5m>7;zJV#10ov*BtQl2hx
zX`3hAcBD}J>j>QvNYo%lpl?>Lrx{RUw*WBcge$C?XOHCT5h5O{lt%yNev@y0=fMmL
z$d|LpBHX;>7in|8C;dJB_>;)^LoKx!Dxeg)coc<(vu>AG=P>8CAKTXiz)R+VM*7d7
zM?QmOtr24wc7nJ~W`#8P+eSM=kd*EJoO$!7_#8+2Pi+M^$5dkDfKU`KK3K>)lg*nK
z+aCc@c-Mk9`ejtV1`gjCE9yt9Ky<iOY=**bn``?p((3%jP!iTVUk$=`dZ}=D)^|f8
z0om7{)1N|Kfe$Bsi<Jxx3v>AY3IXpP{v5DBpI0wx=df$)?!yN6;>KrqJFzmS{aEW~
z=!^$tH)l5*Z7jd3sCX15_~d~Ftp-~@*P-({X(F1la3P|-)#Z^28~eJzXr2SugLuxy
zN*af2n%C$uGHi9NNcjzrXpc&+cvdfYw_W@NXsm^3Zq+1vcHx3zf8o?b%e}<cT?gcJ
zcP04XT9G>R!wZJhDf{VA0N}ta)Zz}?j>d-DSxGaCT8pGmfRs&NcXIqIccc%42I~Pn
zy)i;^C0n`FDcm+^Pl&Yb-d>=G!nV`{+A-Bo#7p7B37Ls|F95)1rCP1SaUE8Vj5Vum
z+kMR66!{C=dGw>(u`4!sV5bqdE+pD{;okdQgw`gK?~_o(8tfFVBV}Xn^R5Ft$?V;U
zOO7=yl}E0q)CsgVgOK%yRg;|MY42oWhI1*j>9nJrQ`iP4vyAmZFLv3y&VmNFP_FdI
zhi1taChx;(ZF-##{L5XE-0UR}`V*5{>P~>p70?=<vF46VvmEM&bM9T<_}-;fk?u_y
z@uqi#Z}p{YzcVa}Rm5@Ta<ZEIw68#zuXSH|&ah<v(c7!&4)pKv*>oK>)!3x@o(;pj
zJLf8Ef9)`Md?v6TtKFDa?=RW!czYF{Mg0zEU)OO{Yeq%F-W-{f5AFj8+rx}X_S<;S
zFAzEG0bNH&Txki%*aXcpf)^Vp+2$U7a7>A<TR!Oc5|_Kq3R<)WKtBnU`bzz76@e}7
z^U>^8f!orCij1Hk>BP3Ng)(-A7sQ<CB2lW~Jp9o2Eh`US(gp*#kaU~syOrH?@@}Jb
zZp#7YTj=Xj`lMvghL6tP<E)AK!d4V@`2IkSSYn#h9+R4Dl1IZHer~Z&TXOI<XC=J{
zIL%9*LCm7&tDsMNk1ph*qSf}<-?B<acPy9rFHIjD7W2z*K*f6M8eFM__hH;yz1yd`
zM|X8=xayv7wTsoW7aubmj!B?fEj9QcF(wLto>V~(@btZVXw|r-ZEukCMVTEp9G>wC
zu`f5`<kzpj4vXuS)}=eT2e!g)Hf-f_-#-!{q?Xt<t$lMzYeWZ%kT=gt&Slhm|GtJc
zdr+4LQ<%cyE5mc+$WxZp$9T^8;lXiIF~Orc{Ua@dg0H_u)~7SBblN(d=1^r|JcomC
zcCJ-x;5i$o#A^c@5-a48E5GF4>3U2v`JLeF=q7o(yhb#Bs^U{rw(XYsj0Xu#@3X;N
z4NH!o^RTCR1;9`V-3>MnjeLQ^ou`@g*!#w{8*=aPsorWlU9wukq2Bk5XOc;SuNTd>
zvBIhe?CH)vn)3qk-CT2FBPr)=ZH&(ur40zaE{`jlMq_fKPU2@QD-W}pAnSRx(bn+d
z{K9anl_^Wi7MJj)z2*m_K#H|-A<aFWR0SNCHIc<VIU9D@j>hKg?*r7v_L>ks)8Z0=
z%9?+@N(Kvzn)03lTPkcPCuz0q6Gx6CUYs8!XcmZ|b^&(0Yi3U4^Y)%Pbj0OId!6cC
z(>BuLpr&IerNj2_jqaJN5_EfaHpa5bv&AYfUulO_iu%QOWgufebQp*Gp=oE?k0evl
zr*7Jn2<O%mR-*K%ZSuk`SCG0_l!rXqZl_qak0UJr{yK3SS@g~T{iLS8wl>14SZaKR
zV#A|Tzt*8Vm)QJ@D|e6cK#P<tbKVD?6eZg0tHP*m)5DPHT80o|PKtB?7{^nEu{KVA
zv=~Hh)6a>Y^N@yoiVR#K>BX?uI%PNap|T}g<&(@Sq#Bd&Iq;&N_S6xqJVX69Jq#7V
zhpca`1T+f1_a}9RcZ{3uF0hgEyR*SR@24tNhx?|F63qlrzroddD^q_^2p>9FyIz>)
zp}H6GQmVpF*X@Pj__}rK`%<GW<4$M!eCH7>#;|)~nrpYv*r#udjC{shN#J31DAJcU
zcu|FA3@VD<)emo^uGD!4&5eCn4K<k!I{Ppn+iD#oi>JJ1q9KgLTTWBg7x?i_Gp787
znIrxXKH3s3yZsNsJY!R1YhTd5=tCrUd3G+sPqu`J+@>)Je>PJ>&^>HEXU8QphMek-
zTZWTI>K5Sf`-ba;rrIK}RXnh$-<Wt`IWjQ#Y0>VrNef*PxKBS#I4U8jZ+QLG&n4&&
z1*O4lb>5g^>7LS+H2!AP3jCIe&8j;%P*rXio~ON$L{kdZ3fbGJ)Y=6K&a`)yVPbY~
zQu5BlOuf;eF-KnHoT(e^Zm3UUvFNvwdjS20DNpW(tvWF;n`CnR+}5j3{S(JZ70mnm
zaMc7{vrD9LDb0;BA{b@mUM*WJlB{i7`)Tfs$HZs^Yx#Gs#!P+X`u}wa)eBUeKt;9_
z2(OhFOtb6t=2)j>>%ocLqEy+~vIVJ2763^Oz^~f48EtsCKxk3*<T=70Zv)f5GxGRH
z7N{$1b?v^oUDZy^&xbLL5;USjvvGJ?bWc&8-raEVHvZYKzwO(JH;X!$fnc9Ic(dcj
ztFg_{!YG`%$VOXtQT><Cfc<W!gh*NIg#c{vCLMfoP9&voMtg5?`PlZk@zI17nny?$
zAk*XK3~qFObc^Z_69$czuJ6lf)<4&bL_iaeS1v?CGx97a%;Ow@&GtTtL-5>*62olN
z&e)M@v&%dQVDjN6s45jMJ->Lq9yC6={8}68yGsKLA=X|9FXjv1DxoOSIytaj#(`T?
z2MLST9u=S_r>>f+nO^<u7LLXr;q}i2Wc9`I4g{Uvu$`HM_7d@KpyL(-O0sYbV1(SK
z{n%vM)&VLj2l0p7Y<7M6pld734rJBxZCnG;WBlS=za`FfJ3FC$W@V#A4V;MRr_G8p
zi}S>%cnYU3v%HrgV)ttg>PO)>s!4c5Kqw?f-wEOF=f`^xTCjn_pBlP$shx8R@^5AH
ztBRCQj@|D#hp925>PQ-UdeK^zC3EWZsGRJjKcBFl^p_DJ88`#`2P1nUZs#5B4${$N
zahRcL_ie!b)S|hNu?Y+5$g6n2UGe+QX2vi;O#}_W2x1Hi3-LHZYt7`yh<ku5n}e8B
zt(rk2`E$pw2^nBDR%|#WluI<sO#lN+`R=x$%4U88{yXue$aI=#Ffa-+XWjG^?kcj?
zgwMAxvKEa-7NSUAGyg+P%CacPtNtkNqb&;T;<7Mi&^Y@5`UHi@BM<q$sjppRn^mpa
zP7<Bn-tkb3KZ}&)YPVwu4CkOfsl(Tr>kqRwKgheYYHZpo?^#{h9RNGqo|wQ&WFw6u
z_2LCu?cG;@En^IGeNj)^D@^?Sbm~0d&SJcc4g2hubeTK{8qA<?V{k^X=tKR2$ori>
zADc>$1>*<uUw6>HL8_Hoq=A9PUYUwiO)!Qn%pwUhw{8*$Bb+zPR8nJIPefXRHLKC!
zC9v&6R*O}V)F#f#GNR6^-=xW1S()D(eTnbpB1k$r4vYYFGt{C;YHFSg)4xnKK%!`R
zfNPU|!QDj5X2?|qcIf4-j)xT0<xdDDSxMZSIk);F;8f+3E5~G3M~UXfgH$lZMo0ND
zwHQ-52j673VG~(JoR58?GTv$Y>$|>eUrb;?uqfHtfu}kw<8C}fK`xYO?cj$n?*l@q
zdoMN`kqnfZ)1ubr3TN~3Pj<}?ab4nS*j%eUo5kYfm%y86G2wTM?Gw<4q7!IRc89l`
zwYw~+oc1q$a`zXX8ttnU`PPw`l*)&F^^k+z!fvI5PzBzChwN0J6CI3UKIv8<D^1yr
ziCdyPW>Sz!tK7#nfBH4}#4?&Hh76orBD|*w&s+&AP+54!EPhwPqF@;|A#YiL?|#f~
z^>8eV>R}8sIEDO@3HZWmRz+F5%3c;ry$1M?)>9CUBbY?;4_{FjWd>u;PKBXjmcR*)
zV`zh2)cH>FmzyjuThvNGJ5SM^F_39D8|f6P;=lXivZ#c!iPE&^p(<00IhFIwxi(uh
z2}7tZZ0CE|4NSot1iCvWHX7OfJg+!@tN9y=Acjs}+85WA6W!*B^4>Wx6GTq!-9xP%
zK9;yyuL_7*Ig>BlrEcaeMVq?5$>k?Gbaj9aIG<!3Wm0iP(b(lr3-w<Y)vwp6%-Vyh
zTrVDBbU(MvQSE|>YW^k9*tX~vTI;!LzQs)j4a0eX#}}wa_@k1)Y`pT<xR&E<J42Ql
z?XH{~wmHqQ%^NPA?*TDl<0sZoC}%8Nn-#B1<<!qC+Pzvs&9Iqtsa=O$)U%si{Sjz{
zc0YcSE5rgai2g2-$jqS^#uFKP&gA*5Ocl0T2?(x+o<Q(@c-*TKp(QtE`Nc2u8s)8`
z>{sFxA)OS}dI;Wcskb=WWTe}UrO+2~!?|G(V6b);;|h_DgeN0C;c^FiX<ae*hKgWh
zswu3CWzz@aTsM^i*#Cv!)^>(Qf%^$qNk8W<ucC_Jw#*Y<p9b4+EB!a~Q7dWDWpa;j
z8rHAX&_7$0l)mp?s$6OT0kFM^cqX|IK=uOl>61C8@z3WKS;_J8Mm7?7ne?(%Nt6i}
zYX*(forX$Wxw_oDa<J$MxObeZ6YYjv+EUfXr8EZ^A2&Wh1t~STNIA=!%FJAFw`jE*
z+Y@C&srl>^wh2Yk&pGL5P*MVmXNSqYf<oIlY>)d6y^v|r>V-hi69x^O*M&$qM#w8d
zow|Ewk~oxR)I2<BvOv^xGqAIV5HAbKaK61JSt&2M_{d|1(wh%|{rvRqhG=d2h~;6<
zd6g$D-IX=$ZY0CtEl~f+riNW$U~tG!YW7?UM|N(0RMjthx_WqCm1#nnJ?+;^r~a5@
zkqVSKx7}VJeH~JryZfLde)qI#b3|3PD(SvLP3kEQS-y7EV4r->PV-19c58xe%TpMJ
ztMZvp0QFXc(e<k|=dLP}w1R29OUa!dpS@Z$U+m&sh2g5t?~Q-md)jgEMa3M8^=})|
zqI}Bo(bbFcm_Z?($C_~VeR*jH%-ojE1$y^yMI5bB>BRA2G5+bAor9I@N|)?X#M%OA
zE`7s|DvADgN08YSeq-@5@pA*-yUCt4?%>5n|B``SgEv!xA0b(1f-^`hwP%g_ys97_
zL<Un<t)TW{#sRUYi~F1j2<F*_3tolLa0t!a9NRar4Kn$zvc!1H*~9E<<ZOF8Q$D}(
z=)2S?2<Dl;V%y2)jL`G}sb^Ah9Dp|UMmD>9<QuB9B1&poEc+J3eN`_w{R7^CD!+Z|
z0}Kz8>a!?dP)2#unlCrn=IVBBkQwy(l$ULVBX#xr+%-|6VsX!=4v-pkkI4!nJU{j<
zMJE3(5|Xpur{o%4N=C;~m5db9^4_gdipP-@u8VfBBZd+#kiCIzc752u?Y`iKQ@?(6
z4C!6zo+)%UWjnu{>A7R*1QkrGd)W)Uxj3uD4fEE0z;Ul~d~o!RYt^9s!!N-FC-=><
z06^##H)Cj?vzT3{RVpF4ck(>?x~&Om<uJBeE#KScShTE)-$eTz?^11NP=Je4%USvM
zTHYPAGi=fp4YK^Nf513<556gpNfowq{S50nm-LOw9O<%$f|t$s0RF707emo^)PEa+
zEJ-Ih=8lWIB<&n0W|L&6{VG8SmxX~?Ck_tBpgqn9D|yA(&MU=C6bWA+^MRuI9STPI
z?O73UI6H9^S&ci@TIaoChY0M}3yr?-u1UU>UCxrO2cUU(<)G>(*KKZw4xIX&+!;d2
zLBF<ugs@ZRJk1GZwKXb-F|5_@{%UB;$X7~73UF3ly(lzu{a{2W@gn3=#ZxixKmOYO
zgc&}HW{$Yj12@s!YBaEu&94`gp%B>Xmz?#*?+&@n+v01`rBncxv$4sY3Z#V>oy3Qc
zkrvReE(#Wgr?b<sehft7s}Te93;o*Wr!ih>jRZ9}a6I3Vnh6`_?fOG^i*}9n*p*Y`
zhvt_R&1aXt{6xixv~XU^f4NON%&2h5FMj<Wi{X$~G;{!F8sKt+xaXmpQ23uc=>Aa^
zlnf!=Y}z`&?q%9P87g3Vxky7zQz3PEfmlobc}w4TWf@O=xjw`g9Q#;;dQr@<%TFyS
zIYTvRVo)#E)oEwjQ!X{uosjemNWOQm)w`?X%_j%>L7UKpPl2~|J9q5*>U|65qG->3
zI@){Ip?wZQFMtsE@=V~zr6)Rzox}+*=fbTr6lAKJwv*iJYGXKW8k-YNjlU5n{dMr-
zp-UqhLA95O{c0%Bv_XdA!(Oyb<nmIvx8Er|K2ql!+xl<mUA^Rrz>?`2Eu0~$feBGX
zC=$W*eYy4?OdlLcJEg#~v#TKvNTf~qyH|QI)F||6Xz{(Zqb&fuoxI>peC#j%o+#G_
zi|2E9Nyp|5taK?>9al&_O$f+^*v<l@0I;P4SZ6vzRi0Hg^|K}4iem1~gTCS5=u5py
z4{Fsq@ea+R&$~u_g7^6FX$ZQs#C}EsCAPS6;-kx>@Z1s6P2-TdI-9pUsn06r>psJ)
zT)7HTjmFjwi^s9+m0v*1Z;CUgeTWf3pYHTtph5V$I-fWQX|alSjmE6bX<BB3XP1`U
z*!%*3)3B_}XMENxymqqc@~O6c98R(KNP0a=dQfn%^IV=KSTa~#PXQ^<?$#F3zWh87
z@ykP4dupuGtu~oez`zHztjY=Sj5Uy;%_>vbkVmny$Y>eSo<SLK|Na0%fd3g^|9(vS
zCrQJ#pG2Y(u7?iu|L0G5hbmv>O3eQ^SMtBPlK)M&3HtxPmu}OZ@a#dDq;CJ!-AaZL
z0Wh<5YQEWhH{-RcG#Q}*)u;jnjr-9<88q7){;$^;BxcLr#B)!v36sW)T8eCEKp%a{
zgo!z5J1hqOb~<<xY>Fz9ib3je%chza@~9|zJn4(96@4k`4`NP^n<h>p<9>_$VNl6V
zh%-$wqhzoF$Y~1ICeY@Ian;HzNS?}H-b>@rlW_!zqv9H>x$R$X=f)J^X_odS>(0Mk
fe45uTrU{-=PVYi?-uOAw@Q=zi&8?}M51jr#S@3q6

literal 133793
zcmeFZWmJ?~A2+Os(ybsMosuE~0@A4n1`IHCcOxN4N(v|-odQY?IWTlfNh1z0bSRzD
z{q8yHxzD-ZwVvl)>-~CvbFo-*&A#^j|8h_0QxydQTq@j4mo5=JRFr*w=@ORSrAt>U
zu&;riaJFCibm<cPrH8UoYVLY#sW=mmq4Nuk9Yv0hcki*s%HEbCV~KtqGQb`Ec;soj
zwQKK?00pHI&)s_r(Xx5?88O%H<vx{u#1VSjD870=a=u=c&M$86-nVt$pm*YumS(<*
z81X3f+818$KlX0;<hnNMSk1Q}5#)Oblm60WY{^R)#Q%qX`Wcb12X1L`+1<kYzy2=x
zg(fE7t**BJ`+p05mHr_HcJV`u?+*XBNBHaSVi)UN`9D49-wzkf?7Nl3EJ+yr|K;O>
zhZXqW!~Sn&|F;dmS^VEN_}@18-!{Oc|NjRHACsH(ea=j~;<%GHh6cnj=_7+N=jwfI
zrW+>LMoT}(+*V~Jx_yWFzd$dM0iYITU&__rVC;9DM6<-eFOlE;EwsuZ16uEaupBGT
zvqa{{TB6{e@}OneV%^&I^`=wq^91}hqb1i<1<uU`?ItYF&o+I=chX<|{IZapY<KrZ
zcl^6XKdwpVorPYr-XsCXJ-c^e`*&hFHQriJ&!|kl+KH$>KVI#fYYXfB6t8D>4gcoT
z0#~QA4~-Y+<FEEn<_o<-)Ax$&e!j`mZ)~W~#s<KBhUtB_r;)|2^DQ6Jt(Nzk#f)vP
z+FZR@Z_+|ZfeQ+6^(KEyBRX7naekuylz>9J!lu+`<m?AhLtj8c*Cn?4^k?_bAj??5
zzY+Q=F($Ddo>3Ad^DkaPe}Ge<-#De?+$&&qxHU_HHIyNPHB@1%&2Q9-UFN#3mBp3*
z*nA_`heF4}Z$u%~jMTtcEc<by(OdE3rK%u4hZ)J8-JvHP2z!Sd)hA-eI`C(wiowV7
zRLxOIHA2LOVLfCYJnRUkG-jTwT^d8c67N|)=OGLgm!V(8wrK9*uACsPm_AN<c`)gw
zT<!ED^X=;1h`x}R=YDw2J8}X`Ke>FPsk)a7DD!lI`Hpu&-ErJ_mczVtR)!t#pNLyh
zxaOvxSDkFs79KwOZ;~asjZI%i{*GEf>Nop}l)~tGU^SQ-c(hTwxdwF`vt)B_yP=VA
zb}&6{W~h=bE+$*xtUELvlN$2{TfC%k{;g<%K?)HApEjf4DChhrUDEpuncyUqRFT9n
zgf+!Dd8OCMzS%-gqTUd{S<jvIF+?}kE60dg{fp!H-g0U&k9#fFI<2EP7<p{;hR@T5
z`o#`Xs%L|!-=xe$EAj9ViS0h^A-mZ6Ok_4(0F~dgZX_OqY4Cc|iETAqSzm<a8~yzL
z#cZO=QNDOEV5CsHGU@#6B>DNLM>mevWZjjKHUotJB9~8d(eKt}fl}!AyA8%p@!Cf%
zB$}s{icLB9ixnPLf|UvmpXugpy!RT5ND#cEJ#PuO4V@DMQLWRX#q`?<j5q=~JmI>>
zha&us9(yZ4h}TDTQw434gnxE2$+)6-TcLmhe7lwGNy1K_Xulv+sqO=%c%IZj;m5A{
zdH201Yh2P?D#n=R1#YNWUcm?@JKXfSu$JpGU@zMZ=kE)FNudsDeNMNhyw<~b8$wpA
z<~GW0#|53gJyS>|5<hIRW{gIb$ZAdM>wtF`FVgYn-Tfwzhjf<%G>S0)9Bg=!Z-7#x
zI|rVeDPABR@|);Ff^l8=LsSK==UORAM-J8s>2VBSa@W)LIR!c|6O)HfgcVXvF!hG*
z+38NdS>>cF-g39-W}R`(i{<F(^V_P{Ke}S$^toV5FM16hU5hqhI$5twKRRA5w&eso
zx&`oPW+}-&oUTKn_BB+Q^6GG*aYdciiR-4G>xix!3V-xPFx-jm<#&?Jdr<)z!ZMwo
zny$83*O&+GGyIbR`@LcVqiB=A=kT{W5LPkx-Wjz}P}cWTo*`^JTRB<&7LvE|)QC`I
ziQbwlj|+NHdj6W6SI=fVsw&=UR-hxQW39nz@!BeNJEvr;%>zH0pI(GS5~mKFGn~YF
zWL0-YZ(!VJC4T50yVG)dv=es!PJpP_=OBW@GkeBF_!oQ`@-N`iCy&T7&$<j{D>L8)
z5bM3q=qvp%`rqmPgX)iPq3_1gaYdOf_+im*d7#%MnO4k0c74z5VA6bbaXDkhWkgrQ
zWO~}?V)B;9QbvW!?H21f3Q0li;=nt>*u|mx$+s^1y&w)@u3OPL#z!CcFfMtV{m;l^
z?{&n;gYnrHyGTUnR+36hIl<pb<FEChzkB(vUbV;{CDmz#adEmR(QEupY+>)&o~n6o
zk|~E;E^+B}pUATgLTW)SlScG$K@X(I33r;$X=eqtmw7E`5l<J0!9T6vGtoy9VyL)7
zyjE3Jgg=F>m)LmGfk)2GQSXHy>ED_9rRSeB)n&h7UjMJ5q8NUu#oDNDl==^Z;FGzZ
zZg=#8vqR@JY&Thx7_aB?WvzTjg~W3Q?ELX1!=tMdIy388TwlPvLe-t^k56q<-Lo)A
z^V|^LoZeeO^1G~j!?AR0_P^>%XFA=b<phq<LLh}+NktUcv*nWj<fxr`f(WP!s$g2V
zEA~}~JN**9C!3A-m7*I}pEokKd{wg_^8qjleO5Tymnxc!9#CDeoE>uK9dthT(tXaW
zkA8OZ_Et`NTT=<y*^HH28n1p=2ar-EXtW-KGYb*W(|`Hn#!#90lcC&aSsxdl5_DRr
zI<tzyGmG@<YlLTfudJyzygJJLJY1)EFKWwawXkx`i-KFrr2Q?8wVHO7+vb#f7{+PF
z5mJC&$s|=HE=?r9pd{9@(E7F(eO-GE>5(YrU704FVx>4PK8ds4_k*7NH%qw!m~e{q
zUu_50(_PK79k1A@{q)oeimJxS0vNhb@|h41@A9kw|Hw6Szkh=mF;Jgf0+)a95lMBj
z!xdQZ<M*t}Z*~W9vOVt8*uPjC9elObjP=aT=WsT-w~a#IrVY70-!U#y%5UCR-l1sm
z-kp`<#e92&p6C<tqmPvLEFzEQ-$@i$9D|keG%CWJNw%x`GOodxidpA4-wrh#L@+(u
z8hRojV4n6e8zD&;)H)5GQf}R|gx1bsk(^uW=63X!23XTF^fa4r>_9_AN7%qdJkKm-
z@1-dV_p_5!x2e*_=)jy-bBfTO44<d)gSC-j{m#1TMx~VD2bI<%McBcgl;X#ZUajQi
z6u5JgUVv9tc9IT;IK#=wDpBKhJGI%?O!NND!1fQ-ZUWlKw?%R|b5R(x0vX-WsSJ^V
z7~ZoGgqA}1O~Y%1w=}=M8nn_?Ak}lRk4p94YPgsUrt`?mCl9l@{QUkl<Y0TIh0X<k
zR=_s?3W3bO4T_LQmu6$}F#l}w0yg4TzH6IXaum8IcGW+~dHLDL445BtXfNl(ihjDj
z_g(GJU7xD2jnd?JcJC(X)sy{oZP_|id6;Z(?Pf!5C!f^pUZe~m&dHVzCF1Pl;N7TK
z=xgw?67#eWR-<002#46HJRf@$ylw1WnEuDe+>{XaLf7Zqb@}OCH)F4w-|=s;u5(_J
zwKC1mA1-dSH%(aaKr<yYV>{~iIVUrsT)}*?Cq-!izwRQPw;GSBit`!l{Z~;9Xxv-q
z;w-*LsyjLT?!iw*PS)C&s_g~dIZw6ed`?RN;j<iFmU8bs)+VFjBF2!Kg?t{yNCh|_
zL_8p7c7A$fScWo&5~RBfL*);ID7<HpXDWinS_K*SniSZ@SDi@iKK_G7zh;#=U>yrR
zYJ<;y{aA9AH1bZ);{gYw+}tI@%9Yhr*D*^+sWG36v-&m0&}{pLBePD&Cl9s*pruBE
z<`=6MSpj3teZn1P?A5{3r28>fwQF8j?kr`!3(};(3e-ShJxRGgsN;R~Lv9805m$@@
z+n@^3P^=uxOII-kGHiC|)F{nrcxC$2qu8IWt3x?Yutg)P0YdL@dW>$aan1Y3Uz>$2
zBJZ=PSe>316n)rQIlgI4IO$EAVPCf`?-|jPz{k{Dp@}LWO+9057n*)$yg%ut&(v*(
zll`9U38Cj?AlZ1%lXN?7MF4)Qh@(!XcbcUmbnseQezWj1WaIfBuQPzcK)H8YRGT-x
zcoQq~`Z=_~3R+$02x{0Z3<Ec)0mkAjt8vXj((P+BYO&esR_qtkJc|YyNx+IOr+RAr
zm#q}ZemP)7HnddY58YvrmQ=4J%`K|m9}8LB1f&w-OE7FfKpUBBvG`UaXl_K`$98|z
zn8=#50P6?o{a8w9hV<3iaR847fkG5g;1!;~;VilX5GcJe{jM#TL2Z#W2)N+>IG#RW
z!iH@#`|IPLuAQWm;O#6#GGMJIv0y$T=Cb=JGLM{o_u()yeCrsn#$>UBi6KF{hvf}u
zUWC0N?R?wX&__{CgGP@cf0?sXZ$GW;gZKH-qB0N$5dk-F_PcUjNbJOm8jh`aMYZ<a
zApJsUo^ycv<39GsILZ675c2^Hf@+#W3j3vlcpaxuUHtFwG+SV;T3$ib0632~>BcB#
z1rW~4*8c_6(JX*aa7n%Wvvl-Xm@$alu_RK-&w;IjXi#Ej;(LA=sDYCP4<ew*-}aab
zW%KZ%5`R@Cyji!C`Bnq^R3!lU7O)nv7`nSAAtUNlR7LPFlcw>y53J|)>OD|3W#^+c
zF0ee!(#!)+Bp&`SEt2lcSY5pU&L2j-d|i*l;nC6y67bqB31j(Afhri!j<Mv5Yks=k
z`kF|5^aOF&m?iu0v;BO*w9nCx_&D>bft2;Ke(~XaO+Kq{xs2pryCPlp`$>Z|3bk%j
za0iUc>1Eu)^7}@LW(-`-o*|;nTlMd@lE#EH1pam8E@7a+yb{nh5Ayy)7m1^MTa`^Y
zIIi4#&b|W?teIuhn|0}mxh;>1;E74h#c#79xy}OM2QVVM(hme+3qI($<sANUG1YDD
z_GeRtE-H;ZeO-1=&3#1k)(8_m8xN~f^(R&LSX=Xsw>0$|*EQS;aA$aS1n&0{Pzzqq
zLs4J1gwUzG1n4U2>}d;QjicGLt>A7^?S_>J=i2l^yuNqoj=0CJg=Yde?&<!xz2P=f
z9jg-r&Wrg~fj(&~zl>v5EQR1SC*6C6dri_^dHQtV?w|es>s|ZVFoJiiTqdRehX^T#
zG)1#0^)&fi3Hvtc9Krq`f{JY3!Yj_tIUchv4iww;+^jbjaobosQ%+Ro)~*O2>rlfh
z2HL6|A%9~wKqE9j!0Ox9@wZBQeNGI_dGJ(1;ONw!A*_h4Y}z7h$n-w<&H~Y@3>acP
z!!dunafG&jH%n4Y<Pyg=GSH@&3}>BJ>S()zPKdW|OS&@yu)IhUee4)~Kd@;%T#yir
zMDPegA75*Izw2}UO52!f7GbjRwoLlpaMO7Y9Pr0vi?9A{>W?wLa3VUu;Um|cx%pos
zpt^|^f^e(&e1E@{?+D1EUK*nCyxy#riYcRs4s7ncmjLY7i%pzY`)3}%kam)Ck4@mQ
zHj^tC$=?pIuWDHZq|{c=YY(Cw?*`PQU@c^>X9WrC&3Y$p^(118svp0E2=h^#<bxL2
zMpy@pTgu|o8IOWMp8RQ#wbG8HJk^|}$|;ZT^IP~<b#ZIAoL&EG|K*|I3`cbNl|P&$
zl=Kix>;mr1_6Ta3<BT2^vBUjfeRl#4A>?{89%b>0^+3fTkRX*t<zayOB)%pT>EzO>
z8ZguQGCZ0-n7Gtxn!H72NBCOc1&e<ikFLmI#@JaHQ=K;;n+p@p{Rn?pGLxw}jJ7yn
zbhW6q4^Ui$rfQ26aVNW<dw_<EqWWC$EvFD@g$-2kLakr>03%=|T!otL^}m_aQ$`>e
zl`uuw0)PLUo)$wx-wTn*XB;#+7aGN+K%TeC;i-XRaNa323z1&3%t<wC52u_b*a19A
z2(2NtEA0f(gOn0XlCxymQ@53Gw`o&I4!&HS-D|u!D_)*%Y!nKDz|<2JUG$8b71i8n
zB)sb!hucZj*&Zhlx7?Z)Aa^_`^jl*jF5b%5Q!+rSUnWA|@R7A7ZI3?-`seYG;CWS*
zWA^_(0hE5gpg~T!&qp|h1?GRM?x6KdKv<7>cvT%OB->J-gVR9T31}*HMSpelc7ERl
zIF}1i3bcG0I*xLqrMbg}Yqkedo|_*R*&r=2u6K|fHH3#!EQjgIC#_r>QP<t$LtqvJ
zeM7nUga`z8`HG)zeXe)iXgIO6-l67<<JGqlKigGx?z)}hUdzf@=M9!(_!!j&w5^4l
zqYogzNmu{+aFHQf`Dxuo6Fx-S@2<@X)GowL{kOCb0Ea-0?337^z4csPG7~c0;H4Z)
zNYexOK3zOy?U@3NX`mKGVB~gw={Y#Bg56I0Yp4al>?#EG0Htut5~+-!7OIG=$a|xP
zUChJzSa5)}O82^gJAt{tDPXu-BvXOy7weVNNmK$>vaR$=3$E*9i5!spfZf|9vyi!s
z`h%&}RGA%86EJD>Iip<ZBPlU}LM}s;rc3xVSc21|!-n*y<EF)xuWPYpcEb>svS0|s
zo2>MA>Dd$Fwy9pP7_U1kzjIp$2#qglg<8lH)w(dXIsYb=Kd1Zm2gi*BTI#Zjh)mG0
zxBDIUvd?fPT$s0JDKkOWeze3;(W@Q+aQhk{kqjMQu6;#L*$ibz9DRTPq@^O6ld?Ej
z$i8CF*1RvpQcYV;;Mx$S$ACW2DUqs*Ted(&>Ey&Za5h8E>H)jxiPg|srlBYXXZ;q(
zJY*y?jKBZE&;#&)W$RQs$sOlWm4Mlw_VRc+r}wuZFt`3bWQ&8^oSW2vMCE$*?pLK=
zfMZMRWDkkA4;lemH~aeWq2snG8*uAfFi-K6TaftgUa(H8mfu_e9u^UbC3=hMv@S}f
z87RP~A74TjSH%~kTPy+n30n)^N~*E;jr+5Ye`{=JX@FxST5EWJ=7>B_0ko|BTaoU#
z=pvA_rhVckYaw2dhl><6+T|984Tp1Kj3?b)>KN(Ls1QKNcJ2o4o}kfIyff{t=^NqW
zrL6jSh_yOp)SwZ-I<t6P*KOD8HF&p81kzy1&0BlsDjs<PP%<}A{k-4hsNQuKi~&7w
zD>fk?LG{Tc%^&qq1Qkr@Z9yoyR+6v?xt0r0U{gu4I6(U+%D{}PpJ#q_%WcAOsu4nm
zFz-)mOZp5gGYjlVry4xXJGZX)*sBvXbt!m^1S%@g0mJ-`GxzhLL@InU9i>j1`wk!6
zcyP|w$15vQ&OFR18u@B@oj1?soEH01BW&Vvz`~Yc0&eA5EWQhPy!r9oDuo-?aIwBU
zA@I(2z#<1v%)b@eA7!*<XKM65JD%FyOY;GiO?^Kr_rO!ZGr&fRBaIOQ^C3)c*}v-D
zIT8QrwF)4Pz!BqV+#2vJC=PC-Nd7yUmByj3bCUaWJR=d9yC14wEPrJ<6J1PkE{L^j
zdvim^n|J$hCJJuaT{q3ES2wx_l$u<-+c4BT5e!X_Ml^#$Prj(moV9k#p=^{$7HKcQ
zvG3~?x37Hh!NAG&=Il}wFu@|Cwdt5;)M1Uio4xWakL-DwI-?^n1!UU4+uWJtWnon+
zi7PCEi?T3kz|?!+M0k|&GN`VT3&p%d0f!^<Gt0G3b^Zk98el;&Z5u9s{UlhXY3#}v
zaGck8w5T~eu$b=CFyi|0@uK_U05AMRlnc)cCprHMN3$M-l9_4ewWF1>NX=F0x#i1P
zw}{F0?@%j??kx{><gyk9yka{%AyVg74Bcov{TWs`1bf;$rRz3{B(p&3^D)z3^6McT
z@capk%u+UF3jw#?L*`xKZ{?P%n~{o?#WmBbQ}g;U87BoBL^bcMW}_rtPYdK;70zek
zmKxR9*E)FVQ?U*_3Yqhbw;IaU!V*;jX9uMnv`Qu+e{cT3p-LPbm=OM%9wq{pP189z
zztWMenLuFp-AMx0N!8AaNwc4WJ!_XXiyMyf5z_<|T@`DCSqh_#c{FFrMiXcUAW)dX
z>HzH{lMz0_{?i43`|gbpA*U<tCardTb@$&&Sw95!KYKy&3MrMo=emG(@|sUmQKQnX
zE%<S-0~inKJy(bx7;Mz-^hJ7iWvh2Q3UPL9!C})JygxN?pVniEf#0C%k}BBARfOYk
z)39FJxY$ywm?GU;!8OOOnplF;02!KjI^(TNrv8s6)Y-|@dsuXi+nghuTSG{O+ehtl
zD6I)isS6^XPS(0@5}!;v(8O=u4(f~3+RyBge6LAWeiEa|05hPO@3&L0#iQUUXSDt}
zFrn){8^|CXKCV&G-4!{^h;eq>!`~lp4=u$|!m4L_7#PHgbeFmP2c$_tKicsHh$fRi
zPjPIg+}B%oGYJ_^7MgPZd<*!puJMX3u=DJzv|39fw_jYuAA0`$!WO)i=@~6v`z?pg
z`>?t0XBVeD-kVbfNA)n{@<R{?++BDlabA+1YQ@jKw8H-gTCVl&eQyRIyRp#J%g%+;
z0ii#L_Ddl9papWj&Rx=9KbF)Zy??el)Vnm86~RTj=eq|I5;#ICODrMokg`V$f5T@F
zsgVpIVEhEp+W9O?9_n{br@n4xoAkR0=PD!ZWmSX>%SYgO2^d`z*Y9g-@7z<rF#WuS
zc^_)nl{SJbj5v_KDe78KD0TA*JD#SI8^`>Qc>M~L&PpZxiE-Oz<Aoh~S##^;mV_*_
zCo-8;ve=FHH!o^2?NLtZxlfx<5No2QJGVGnOzFExH8Jw^%i}c}PgLS>bIr<+luKh2
zVQQc(PJHN=C*a~hEhZbgL^J|BrHz^1@A+n#MMj{o_lUn{P7QGWdcnk^gSbPe(Q+S=
zalosELfi5d`6q9<2A_y5Go#a&tQsVE^9SS@mZkFdM7<H4aM!&f63fdG)Ji2&E(LsM
z$D8Lib2TojV^>A&hB6B$UB|hHbm#u&&cEjtehVkX<v#)fsx0Xu;sb<eq)2*~yqfw9
zkRz7rg<7WTWTs>=n>Ke@RUw0T<j3H#uIKn;ROSLV^%t3S^2yf$+!&i6Vc|JQn8rBQ
zb0kshR+Z%t>&YgFygXZqX$QI+?#44k3CF}P0^Vo}vJm4kCl>d7+9Gcimnv&D%D%2U
zcxHKB^4cRsD?EZWqj!)wIe_8NcVl<*Xi^qxHNgS<Qdm#j7(agX{!ac$u=sRJMD51Q
znqr6PJFF?|Uc?3B6leZxQdl%5wi5sm{-qq2(Y%`nUj7F!FJa!5lyptdt$9(3dd8_<
z5OO4`uNxg(ycP{pFMKpPfZs!s^B2_meItvs-9ft>t3kCI(AEwzty~r<^~6#S$fR8?
z5B-1RvJ${$sQ)dZzkTUPAx&RDDWMUta4BEJ>tNc)XF@DW*$Lze;cG#^pHs4kEON+K
zR<n_qQ%XHKg?=uSRXNF>5}u#PDHIO)SF1Do$-h4kaS0%})x>}QdC9k=TO;?yE>3sQ
zZa4XeBARUiHAcMyNKlg0>uYT;_@ci>uTxZ09JhA(O0$U6a5@X;>VkXb%bkTZdRWqT
zc)mhpMDVSD{jm>Q=y+@@h2QAUqW%nMk~rTc+G)Y#9UVk_*b886-UFNQ-F*?HJMtWz
z0Y1|m2gBB=FoA6Z<MXejY{g#UcbshrWN>X$i8%wd$h_6#BC<ZXCaQESU!$0Iq;&1y
zwWjZEfa#v5XvkmvLvt<@C)_dg7oH6ula}X@f}rlBW0z+HwIJ+D6tEsnkP9PE1X~uh
z0ehSZY}{l(PHtRc3BtaNZ9?V;PVm&@v<Ikh^&%ZhaFnB!bwBb#Rc6q0B;D%<hnk|&
zlh~`Z2F?DAacx}*lOWyfab9uOFe(tlDLd|orLXPgscn4_NAvfpqmjpN0v*wuICo<I
z`7yfz#szR2(M*z(;&LcF|ElL~AVv1NpNkHSAs~St46xZ>58xDTHk=fp01$SL{$<31
ztoP1?n-)%(;Gs?X9_&0GSA2bqpeK|q%|dPz_)Kgg4!0DnN#0{ZQRFA<Z{oS2q#c9p
zF`OEGWF|!SXFzk!O10hv%<n%r3^)OubN~{M_~XO>wrDX}N?si~fspzRzum<BG8+_}
z7?)eCyo6E+_^m>zA})7<v&{4K5k5#RIx+ot8xC7=3(>g&*i8ID!$o+Efw;4;?sx^3
zjb8O7+Qm*s4D(c%I7(qBlhJ1{mIj(}OM!K85BMEDql&<zF}(^k*Itu_o{6XB>QuvT
zm}n!ceuS2GMazM`M*FY5Mt=hKn%Q*90MnnnM*q|X1L&1XXsv5uZvhHWv9L)(EVM5W
z65NE&>H>$snVNo_E<U+Ld!0Mx<#er^eJ#B#1`^;zFF1OI7RYkTA$OH8;#VI_sy71j
zGN^KShet=PKy$D=NNgoH>(R%wYI5m^k$dCht4SBYq?k`?1{ORD7tTTVp-0mkS};xa
z&qei>+$WWE@13g7F#p$&FJZ#pG=0lc?M1sOWl)L3nb!$k?-R!AJ>8rEI3}Y(F;{$X
zwA484TUXuwm~}6ZSF~kfuD-tMzD2ijO3Bz%%8SP<?TPRdDNDM7)(5kK5iC4l@k9O8
z3P(Z8_Z1he@3j}Jq2zjYyyv)=2X=~jr0rB)HCy|mgh`;e=S0#{|7^owhi(@<sxq(s
z|H~?Uj~S!8#PvG1_ZVox_FEkc!MBV+xM>uqZaT@C1u<GRPxm6<9u@aGz6tF_r=)!2
z=2+OWO!P2owifwXy?6KLu(hex$xpl3dxIj0S20oG?7j3~1CwQLMXyi`VZ&N2A*v3A
zW5XS1jlr(}&piH!&c?c)ekJ~cI=@!YZ-S-7@bfimiUJ#T+jQ?^%WutU09MV2@+-0f
z)N#bSf?`LH(P<)pm|(F6phXq{b-8!t8vY%uTMm*N7+H$|zVNVKh>2Ryu_(m_tQW7~
zaZ;=A1-XcA0{alk&lAzi+qq({AtU(Q)xKoMWp!8}|6rrw&7?IlA5vzP^5V}a`D@%9
z$%0v)ZuY+PACHdyXUO{|KqU)wx56p;3nT^HHYRcvI9@|qVk&@ap1Nl@F={hWHHce?
zu4$mdOD3)QM+UJ2`8~tHN$Nn!xW1NL!S0M3#VPH26JfGGp8Jf6vlSFU2lZCLV<_pe
zJ^%J~p*J}U8xH5zYk2i?!aTBRO;*z<&L)-vGH3Ui=HCf%CK?Dh{`Jx?V_(1I2ad5;
z2GX`GC5#;CgfigMr@s|mxS8d{i3Z=c;fL>86T#RIw9N#ik!+w+)h%LU?_Bn?VftXL
zkvK_eIPHD19x^Ek^87-AiYY8~YoWq0U{?XU+KdlIkkX5{Lc8Jz<{kNQe}OJKezYfB
z!}=Tfz-aVm_1)4M80w$-_anbzp*Q@(Ti#Bp9}~M0pl*8dCG%V0U9FCHboOOhfLnHy
zbXtuR+19vi+VN|b>3--M5jNLpW1?5muh(F1?Mgxin?&Q0aH&{=Rjvw_rwh`jKxN8d
zq5|<KsLsTvA}_%_sz?nH%Zq5vGxZro^v|JqRp!zx!XP`z@89O)(PIP9u1Iq!k!3b*
zlza2@M|X0C?YNeoTC(@Pc77LJ4O&R5g^+s#Hh2<nxuK;3$yUY-I}i9PcF6`9uQn3b
zS|RgG*kUIu;LWMT+Mw04>&6pk{*-3HAZQIuqqtk=ak%>&7z@ZuOp%ZOxv4%x0wGEM
zbB+Jsb|F0*4p<912Jq@a&(BXbLyV*7zhkP`R{)*Z*#OM=x4rwxA};w+hon5rqplkh
zsNIdp@iFXp&}w2k8nnb_mToeAR~tSG9Nhphor+fA+oO96WWvhBYqd^49zBUfjy4|7
z;qm*NpH>~`@ST(yP4?Hg5&v_lv10-R3B1!C{%`f$X^8>Uo>Itwi0A%V?!4t-rktXn
zHJD0aYmIi3BB17SGkK7sbPw3nxZ?}JH?wUUM#t>KHO%x+J26wZOUXsiB@tIV3SjvO
z03x;>Fe*bR%$!qd3(8rzC1WW!Vd*aRZ`hIqZM9@03&C$a2!<2=b7VNZ0dqt}nIiR%
zDfw%};QBSaxxv{3PUnMC$lgsRIkXIiLt_Uy2HA8#6N}9Z)MeCdBA9P3-w%}~)dnG?
zwc1InucQd}W005)U+w$h0M;rb*2TVR)NR^J25xAonnJWa={Eh+zktlF5kOy6P<G8g
zb4{so(!J1CO;`WFB`C44WCa==h7&XN|GA95lFh_VufeR!y+O{rFYsLX;bk<X2hRn{
zEHfb~F`ontTQHLc^SPsaIWyFyVHNy+_dlx@>xr$7N*qi$n}MZWm<!|Wt7#a_m?N>6
ztU*`~eDFg%G(CpFH0pKo@HKXGB`$`2;LGuY(@Zcf6HzY{Oqgxl@pb{Y8l7DSI*x-~
zxRaqJ=Oe)77BhRVrAXSBui41<&)E@)2gZrBVcZhaKUbPQQV7$e_qHRFx4f(2-t%%B
z)K^f(8Sb{41SpXSKU_$*e{f<DS{PLzNiAq&@m6Goy%yL5Y}Q9AN%y`?fJHPzXDZOw
zq<L;A)A)jQUYf9XO=M3V81{*7KCoZJ<Ed)GElHYxh11Bry}LYQ8&^#(gJ}$mwu(8}
zICIy6F_&&lc?dmVd(-U#-+)3Y4xxvIL;po~Q1W12kDCRY|MO^F!gwhsIh^I4A&^91
z9*BG6f#`F#3c&w_S&2}jN+z*^C75Z74Ae%Z2w-m_ajQ|c&~ft(9T4@&6{E#u?a`vd
zg{581a<<Keh*#L_CP1&vXuWoy?ErD;;R%xbwl~FNKcDFwjFWOQsiR6~Gl8$gAiV}K
zlLTvUV!{;IX@=GvKvSjjVSS`3m4X?SGW~POpGJcvSBe&2{B3Xke*B+rng9xj)f%u>
zpbcD-4t9o8P$Qcf2A!WO_fr6bX<j9*!-_^lT(46DOROy0yW%2NzgteBeZvJXE`&gD
zEYd9-P$2DQh&G^aLlak$`mg%|*rQ}?dKB{_V^wW#l_g#$I<F|E^QFmWQQNV5_IUBB
zMQu0FZANYM??iD#hLLk~MANJ`$}^_sygK?R$S=s=`RLofw+s%L5Q!gpg#Y=WF8S3l
zm!QdIhReo8^|OxK)DX7v{03vtq?lir;|gFYw|aj_#l!qqq!gi1tcNxS9MiLy-PCjy
zO{=VS-mL>wQRWYGCpmg3NnYON9gAIS+D&dBnXk`vxx#+B;YiqFI#=!D8*hs@IMIe3
zTwxB4#L0MJx1S5V1jP|m5<bD_!wy?15mW*kjGe=U+CmWBESi6^5O*|g-u%}yU&d~I
z*rXp1ktl|iv93gszCEQZ4AlxkA6a!1WcG1nR3>P`rHyig&FSDFPhw_{fll|-l>p{x
z;fx3HcD|1}EZOq`&#t(2goH^xpxmK{uS4^)IyowkG>n3mki_V-sb?Gqr0Y!yIx=V-
z)qz?><-eRLHC7<uQ%`J-veG{3uLU^`c^>t6-OC*{mk8H?JRxoj7<hgv4?h2Ur(}|8
zf=0j*$dRNgEEIqs;E^?nouPW-rS6jq=WvyV>7DKIo&k_O%*}fHvvnA4FMfaVmRhK9
z3>PL}7zPkctJ!7XgOso^t4Qhtzc26E#_>mS$XEmJW%p>{lxPh+Rm||n$)8-iYXqSa
z)LBF4YL__q1f#-_U(FwZxp*UwhLcZJJ%(FbkePoNO1=r+UMb;^&3{h7{}QZ!Nhg1{
z5)f)JGFlgS_yhS?E)C;y*aQ8lVDhjmtEA($VMS;;sL7TY!N&7}3sJCR`dJFy%b_eH
z)zekH=j5W}HuZARW{K5{-4G*7;avq~2`SB4##k|_0p1t05eaW-ic)}!(Ha2r&wbwj
z(WPZ<WvwCp7?30JBWJqC#d>sY6r>>S58}DfUL%ZM`WgSR+Vuaf5B(bP*KhfusU>zH
z7!%OP*F3hSigZPJVGx8`7&W>;bwfJ`lwS1#vTc?q-?r|>DHL%l3N8p>sp`@il>=Ri
z9?;mbkQ)IG_2Mz{vJVxu;w#9t0*NcHfa6{|w~B84I=v*3=5RPFGu$g9kH(aN{yZNm
z(OfyFW57Yd+4k+DT5TjofGKdjl*10dE=P8Z;6I+)DFN&e!Y8!4zXSdlW<TX*L1^;_
zaLQOv1byhn96jr0)WI?LYlzSu@O|HU=zzf(4sR=`aUC_>cL8<3u$M)OqKcX>(83zG
zNA3g^fS2NZv6{4RJbUBSXCtx&oN+{4%h)|S9?(!WxH=+eJ7%KlwyZ=>Iv~Kj4M~=u
z_L}xOKwzwaKqE4+zVB5$y7&+|Rsz%uWK^Zx3dtcG5*w*}O6LbyCbx}iflYdr9B`#0
z%GmbJJ6EPu{xQR^Ujr^cOM<yP&M#`INa=^pS;3p{cvIAM=jCSo!Si830<^iIc-0T6
zkU;E0P@@P#6~Liq(lFUj6PdVXZ#+XNZZKZzI2iQHU)$KjIMV0PVHmXNeEVQ&@H1`^
z$bq_ZGVaFV6KmGI$h)BroB%L!Hp8{(GuB3c3^MsZQSJN&7?}5ELy#@i28!2r%|DZC
z5lqUOsiyOGg(DZ=bVtUH(s0J~(p@)MMZzdn@0=6<hYd%*MkA}k*QMWSJ^c`-S%2Dk
zXrWYw3#bYtZV)b_L)RISr{pOCS8cmYAZDwe%$G^}$VCu#VFH)l3oATdQ@QZY$Ks5F
z_*(;l`O`@rjZ>Ue$F?afc1jNGpe3gFHc`s-t#3oJ;wu_HBiYV}z#O*YHxPB(NU84o
z-kBJ)Ts&4{7go-0dU19@L}CP_Pb?D?SEAh;>w$dDi?=tr?t-R9zy=tZ<0C*{wuywJ
zUCKWv*+dw;46bzgn%_L1{Wb<XTh$C!ftC{=5J_+*9pwh#ImW383G-soKQ+KK83d1u
z0n=kbSsa&sWJ*R(gR{}N8Cpo{^BGOe#4>=`1gSz~B|n=2CQi}ungau$ls~Kj9NNY8
zQA*7P9JFa;)})efhvEBobgk}&L0&cGOO=JbRBFc7BJIke&b}Hqa9+kMY?F^3_6_m}
zkr^AKCRR@nn3|0@Vf}ah!4v4+(5`Zy&Y<cqFpwO>$u&zN)1LNN<tBS|DW?X4_A+kh
zfS!Ef{<#ANuxm<iUI7`$zz7Q%*4*3sn%KKyCIhWFFBk@yBkP(hx@N$zrv*4y>IfM}
zl$3yuLIl;12vtg53f&h`g^f_~rc$Ub0Ig95@7X!OKX3vC5{GeoO(61sg#z7%khgM?
zhIzXH-N+{!0FigB8eqh&=S{g;3p`JXpQiC!qozu06ef#6xHW+L0lDjFVkjE|ape+h
z!^v8_^f#_W@&PQSl<5uqJrPh3C5gs#dWV65IaC1pIEo5j1G{rp0J0+>iX~MHG+GLJ
z1`G0rAx+C3xkuc9TYi(Vu(cg4=gZ6OrP@d#)i^VPwT?Js$lWf|gXwk0a#C1h3tt?d
zeO~+#EL!&_^koX_en4;R`E{K6Y7ao*UpZe50>@r}47M?Q@$a@lA2Jaw#fpPiw;_}z
z@H5v8zDK?^cTE(h`b_iL(59ezt7}E8M>>`a-p`avp5LPs#?nJuah7Tifr7BIMNO`P
z0!yZ#TA5idqY)RQ``nuFe_rax*I?#}<bGxS#aZdGusd;|`>(c!lJ)vu#e1`R2s)x=
zl#k3)9OVhagLr`MtCp7}Z}#psfMR#xB57VQ!aRXSvMKod=FAOsO|%r^+A=HM@><^l
z(um6f7M%Bq!x>Ow^@ix19O@3e$T0+fp?fqUFYe?UHh`{tGTL|qGx<BR+{*E_tZBqE
z=_d_sV956ZOQYZtrw|bZn8FBDF$e}#8VH07=TV>`b;BT)Sk$;Zykzk`OVQN97<21_
zR-C661M6s2kzpe-EIpoEn;db2`;G&`XmS-4ffHvEQ8a&_MM-)nc-vn{zq|k5%7i45
zowKd#VlN1S;L*(GD}FBl6?C>Rd2s`Hd1e@0I<X}jN^<eNRJV=4Maa0uNks`v9(?Le
z778{&9P=-nzG7)VY8#d=e|3w`C;)L|pFesu^Qvf0iqj)O{kP9ffbL{v@|&)f=)}nc
zPN1-8nj($F^2hf*RgUv(>8698ppCjPc~gKM2SRG_Ykb;=qcdB|61FsVBfYnxldrYF
zY>mg^<O7Vhk=yAJszH=Qz1WGFKAPzRDZLb*;4RM!DA3Um*>E}HCw8>=ss8@7t-!*k
z3M2OUjcecUUxv0lwDFpc5DJIuydL}@btMa{1Ww8$yG=i~dk*}7g=Q?edjQO|-|Am|
zAwl&QXdLY<CO5yvS`=e9wFS%}Rh}|*OihRf$&ev~0Si#CEClJ>isg$<oHqq%FUDo0
zrJ~a5M;CFa3!ou}2iCwL2pI)kRAl%r@3=~4KPd|Lgx?p`3+yChOt@24Gx#2yAI`hp
z)C$>ME9+l4j%?Z27hrKw2D5fL8|2RNluJi8VTB%_Nc9m0O>}H24*oX`VrL&;Pj5n-
zh7;tssnri~z^gy*m;H&ktBd4q=XmV}9_1Nz+?EIEE(L4e*aY$C7q=kte}Fz0xa`35
z`?7;1E$P4&3bu+BhH5LeV;R`A=kAhW;XVk$2Ocm-o<_#R=3v3${dEgT);%<C6H%|S
zghyL^E>86MUo1Tx-+>i3jNSc`Gth#GV#x2yXlHwNH+>#lH@M5B@U~pW=Ih7M8~UCH
z8@6g}i2J4P+jESJur-pbK-$A=a>I{D5@P7N@C>=-2Vt!_4ugQ!KC!VKDbjT${?W_l
z^f+?n7&L6%jlaJ3z$tQi*onod$KCRjwM=>>k1Ghx<mwVX&q$&r;SRVrGO8~DIyA!`
z_ot14U?28&ZwbI*kf8w$Yy^GWO=V^U$-R;@>4NE?2?W<5lDFki49jx`$*@$NLQS?V
zZ?NLep3o~9g_ibZYD#oZ`u)QdS4;u(Vy@@@?+FqaiwOcJgI&pI(yOb*jduQmX<dEM
zqhp}$<_ruN^B*_ipHxAu3swz!)%6=MfKYs#bnmuyP5hv@qWd~X@5OR#Ztp0&L$1Zh
zGWp~#7>Wn2VY6_B(@BV%xC7Jf=UoHfl2AG>b>Z%tm#&yYcjUuBSGxo7pf0qQZm@#s
z*1Cqy#5Uz$y-ljUM=zqo-aTy3PzT6StfGF|^2aWNxKAZ(TDsuETKk(99-Ez{B6M@0
zO2D6gvAp89;;}qY?JSPZJ$I8i<o5U}I%RFChBv}M#xY{-`ucr6eB%QC#pM}r6O+$4
z4z9p2o;y=^PN8^P<ZBMPf<}2;yRs7$xi41>-{%{y2VH)Vm52Mlv?1nG&$IzMUv->M
z>pzXKmx@4W6S2iI{FNt3&JvF{c-6iH+PrWyUztex>{-6rZO)4Q6=XO1T2bJY>;$G*
zUF~#nuQeIzz}BmwC0zm6yQRZlQRoTekj9u+t`mjc5nV4=)l;d)R72%h4GC<%V|shP
z1s*w*dlIT|aRy#W=FiAW6xX3jKQ9-OXWa*s0cJ%S6bAT_hmmzWkM2)KXZ{-$8|aLd
zU&pj^qu*#_q&WSC4ruajsim-5j}2guO>SP#SA`XHN{3L}RW+9ljr3PeI4LpUk5gy0
zYV;x+)l1>22t_#z8}V`8_*U8eWFcO_^vrGVwCXb!OkDA{l>gHOVBgL6ycb1_(d8d|
zbEA@jFg{~VqL!0jrvI(70?L0kJVE_$zf;l-9cTpc|GAm~3X3b|)jjIaPuFCiBBVN4
zgtsj2LfH5|UE%U;Z&j;+ZTP*1>w@N-@fe7oFzUagsgnC|af|`nyDP;BMX`XASp$ln
z^7h6T-~;pkV6|O^2Gzck6`cgMiY~g1qgjADz{+&*k%`MVNDN3_Z^AX>JM-7n#YWkr
zaTG&5X8f>v0G8HC+ybUPz%qpnOmL0GeJ<3spzCW~pevwccysUpFG;8^=n0h>4|^*9
zz{b5@ULr|3S+k+rUe@l@bQx<DfAsQ|Yu8ENhDSgS%#m+gDNCcogUjhVaY~O96?rem
za^eh!6YzY+-Twf>$27UpGLD4{y5&^1zk>++T3~vb9h?q`{z5Nb1q0v|=$XN+*00J)
z`?so_+JGCBH=P_wBSH19a?;EW6yS^dvM>`V^xT_GYBo$#bo-X<0vYRY^SL0e69wpA
zZTVS(8k~G$&Z-HukRARPgH-g5%@PBto}oPQ^aB4_92h=DXaUOPN}gwyz72s!F~mQ*
zjClA&+aLECcrlfa9L~TkyLyQghMJYC1=t~T^E1?#M{Jqj)J~v!{{gxVh%Q^FIwQSk
z#9z5fHHhs}(qC4GmcTn*7zUqU6f_u<EHiBlD$qemhC@pMALVqq>m@I$WMtaPAsVMB
z;dg^m{W=K{n&IR%#Vew(CHMuY$t^ymlbio%Jhg*T(T(5w2cF<tzy~RYpbGPaD8hjs
zG>+vrhPy81$dS#Rg4hDRNqgr>mysI<2!}{(2ApE5T6m)x5sD67ZdnF_EC)|@I3nXQ
zqd6IuA4G!lwU8Fj=)<7@TTDvt?~;kAlDQQ_sRtTS;Snqop}PiCp@p;LnM(<vug?Eb
zkCSML=?O?EhYAWy#O-z1cXl$vc;L_?a9@>Y@JMCxOYHK>3a!3l+YPkCtGu`jhuh-l
zz$rktt{O3_v0%u+JM;WZ$+Yq9O+H4G1{L8S*ZEC4BmLc4I$PyV0Xq!TB;_O%y{UES
zdodnNq*QhRd&!%mfO56+6VV9Sr1`0I0r~#-okgVuv)BB88rzY!Ku49?$}j!$g0DaD
z16k1*GVML$2bvns{DaA~-w7`y_%A|qF3lDMD!ik$Eh)ug(XwcPQJ;amSU^yWXDap3
zx2s*V-ou3?3WB;lC`}|1TfH)U3jNSUMI&GpQ+S2*SsxvIHtvvvM4eJ05gSSEA`M%q
z02ggX44-&{6XZeO(&fMnJ|}BUeA!}T6KnGxxIMYu{=s@4n6womwOkfE!lwt7;EYn4
zKA~6<$m(wdXLVAgvURD{xMN}GTZcde@L9AkyP{F;1a_@trSXw0Be?v8B3Xq@Pqze;
zGaUzNRtpd(vV6Fj1(=ZzjlNIui%pmPR+Sj;>#&2KK0@<9GnxK9SQGw-$<DtQiIEr`
zkra-uRDAFn0TW{*w6KVf=1!P`0WiDM5GCH~iZ-!rtivsGiRAfP9Hd>Z8bGM~fL&<9
z;dS+oIPEuXv!7;i0WnNQ6DBZ5?tXUx`Oh=$k<aFhO&sGhpq|h=nYBEWqkUOjd!Suo
zPn@NtYwiJ(=uzzUcM5H$h#@HUD*7r<sHaX~T^9M_1jumzB4i-m2@aOVBV?{xq55+0
zl>3R;Ry_L$F8k1{q#Jm`TA-`A$&I@mi7kDT>6zC4=NgKd+~=R(An4-cc0flSHyfWc
z((|=`2%3(ErRb)6dXuPUay`3?S^o5qfbp4&F(QFOtJ8~#y`6j|Ga))rpbz)A1eb(^
zHG0p|x&2GZ`Y~g(J9h*1O^tXI%Rw1R)n$j|<UDwP>Fk0i7Euyl9nbv?+^2CKQWi=C
z_%pR+p2_YfO|B<5kG2gGm3jKa4s>=7=ilX=J%6^2{LsX4GOd{*M<o@700Xa-Fl=>`
zR9ctZ=FYsDf9zcY)3(HrT<Qm<?Odgh1NnNZXOHe!4Hr;GBa`yC+paPYxjOl;=0jk0
zgDDOzIO+~!z*5{X2-qf&l0E~8@_OI#t;pV28Nu!$VbVYF7){B^IoLvryL^#eU4dNH
zy!E@!Ih*q3??gc{^4nG^3|Te;@leDkfulD0s%cOolw(C(7zITh<!UkPR#Cju4+o<H
zPh=+Le-xAn6Oi>bDL0OP6);{ZOYUK(j*8e%)iD}_7Z1Ckh4@k1Ys|_fV!>Uorfyno
z#C^+)99P%<Mk=~)k>atEN1Kbj`+=XUX5!vOI%--fk^#Ax0t~RHGT4SEwk<0-s_SXY
zOnyyvVhH+{N%$?{uV@za{e>3&oJ|P020vO=lX;L$rUh<k-q^nx0r7tdfPHNh(7{_8
z<7>M<nCP33$+P($jXcan=o(vEn@z^n*SS!z)u#zH!mh<kez|gb(c3Up35=?WGbnj2
zs3CC0bTbw?+KrYp<I{+I!!7Aba?Lm?#vBD?ybVbx`YG7AK?5w5gJ5O;0XK=d`jLk%
zC=`2$$iQ$&BKPXcZo8sN(Y1%Mc676de+O(asA3|qtLq;RmR=d`t0Bxdv)`ESjR8FK
zpI}%3wk|hw3z)}gqi<-mtcqz+INh}S!`k)&-qwaV=J}9^vY<I~EFqrnyi#nAc0H*=
z-*vgZVFOpT;!6aZ0fdFWbO*MSm>&|S(4+U-cs;7j+*7bsYE{zoi3qikPkTdwZR$XN
zfx4It0R`#R)CdP)Fc>=-1#e+8zm)*>c=<gJU34G~o7b`+%0rAw3C1N0+QK&(kS?)5
z?rck7XdN=}$)v~EsO+A78Yqf#FA4;GEpVoxCFhUpjo%8l<5S*keX%;son|)57J<22
zeF9j9r?kz-ek$&!_m^P($)K2$Q>t(;mHjfeBHnN~x-|36tQ@-qq}$=h#{cxNA}!G=
z0c+pUU$O;Vfgx)#tx!q1U)ou<H#1^d0m9Y_gDMlJ5BVo7vmN%_T-t`HM?dL>8xkem
z&m>gPy@%dcXJES4>3i=ED#$Yy;F+2Sz&_W2+vf>QFbIrX-JlS({(6j+Jg`vioLSFZ
z49;GC<28kGyw?}(gtNzuuJm7b_R{E;jUUX%ngO(ybJ_66?olisbMqdsFAmx>te4T#
zyP_rOoiv_lv>T|h^rQ$65uSma$ETt9(lo;`M0g4v%r&Incp|nJBYd(}W=yM^E4t)`
zuB?IYLI@$o1R)JVaPM1cUmB-O`bbxhi;>O!%TL1YoNnNP5VJS!@&K8VB(xWdbWNYB
zcaQ9@9j9e0Zz|Le!!(GJ{`hQ?^o8hc^(MakuWj{V0GoXtOe+rpGhejdq}QtsXZY&r
zqG$sV)4-9&EeH(jpq>?o8An?Vpa-%5bzCVvuC$5*j0S{rM*g5h5{|%9AI_I}Eude}
zhHk8KHi7Sj5M;$O#jh<;4=oC0zt$cQGeej^=>fc3)N!kKw-ppu$${HMN?Upt-0m_9
z&0okWxX9~<?qZ;~fij3BBLeb(8m2^kcoK;4@Q-&=Suf)9#wmfD?+05oT7B*ltj$&x
z=st^Ut!aOq(a&_Ts9HxzffD%IvecU*>@027ZY3pK9!-#>{a5oXY#Mx_#c!`Mk^}6Q
zJjREjf33*V0nC`e70eL>)_5zDnjZCf5aCsSfPV7af-BU4E@dwuh(QPkTmz;$sw}MX
zW|4xfl^4Uv!8@CPF3Z6+cTmH-pNS;ODGSH4pQO{0L|6^3c)Ncl5J~{IZ*yiIeZqm>
z7i~sk94)b_rP2#XwmCxwx;(U0AV*3{Vrm$All<sS75LJMUZCXd-Kqdw_6UsVsCLFb
znJ|vOTEEhGyy*ki(l`ToiTC(k6}4$cIhi}K4c8ar^h`78@T)%_(omY1Dwzmf4lUNw
z$x=r3IZcF+g8&(a#5i$&EluC!x24+Y3O+eXTxR!IYXKhTYlUb4%2V9|LQrmS*rsk3
zYGy|Ddbp*_qz06TY$G(l+ZVzLWUk}UuIMo<SeZ?k8X`4guHxtxNQ(>sx3&9;GZ4UN
z+`g4mcEZlohf~;fNLp;b{#nEIv(qKbYA4gq^jN8Hpf?3>fu<98FQi<3E=}VDay9fl
zhMUiME1ehn7SPl$@gk5`i`jA1F!-hao4aJOXG*fHTfoVotbVZs!Ru5Tx<_HtUuLQT
zIe}mlh%4wU@k?VAkV_Mzlq6l!&!lbc13m6^Zll>?Y_|ETCJi*x`*2InZ9ynVB3iV`
z(_AnLvMG}b$z_H==`&OcXid&+zh8lXWD)UGU50p6{*7<V_vb>G>ehzAy{W~nlAoFB
z@bLHU^OfY@plW9mgz@YZ#t~~BhbJ-==COBit580HDc8f^+tUE2{BtnT!KGEd<ADsB
zFro)oZ`A}p(LPeLh+X2a9yjPr`*!hk48{HgSk9n%$?<U`S5$p30dZUAeaO3}!|nOP
z84(bZZu)|o+@m5zprH4rI0|D_i4eQ1NL?{<cu%R7)d9TrWVDa6!QMPz+SD!{Eha0O
zxySVGfzegXM(Q`MDGV|}n#mrFwKz*V$_IFCtwrD)WE6uDzMbg=VW!@${dtXUc;sBz
zBt~a=EG-ezrb;@gz|)9sOB3Bue)1b<6476+_HZl(?=OeUDcW~MT(}e>Eptb!!e$w1
z3+cVjScVz%<pm%xP)=F%Gqj#W*D_?X9e+AUg|)x}q)ZD%43WG=b|^TD9sBG`N&bf6
zoiv~GmqW$+5}PBWL}>CH!yU!T;D<`Te7!)w&~XENV++VX2hp5~B)Ir!5b;T?q?t=|
zQX%z{l5+hPHu{du)e%tfMUG3w^}_E$Saa@P54|9X<aP&YvKRejJk?8DMXEW3?sN3?
zCswFWumx-IbuKo;)&S||??>yNV(yAP$0&dsoTW<v1@qV4G0^k7*W~1j4#Z_-HA7vS
z2j;lE&V?BS)0ObPH^s0_$az&d4!82~4|6Yte@nO}2FfEE=e!AbTnT^cs_!g-;MAq4
zU;9-h{X!Z^6ALs$ev-p^Br>LqbodH+rU`aH8M(S^Ut#4`l18S+mw~fsXc@#k_ROBy
z(FLUVj!dd%Z)lT_giK+p)1d)(C1UG$!8fFgeg;mFz1k{o{M9r{elvF?m^UE>I^Lzd
zYuzl9$$+O|nyusu8Lp9t5m>be`N1lA)3X<i&J56D)9yILTm+_u4Y5oysh2C=ivcM;
zVP6m(CjfQraGOO^vIktyw|IBhA#rXfK^8Jcg1!t`$mE}JJP*om<zcK~{6gx2g!5&w
zdy)jY$&Aj5uAKKR)%ubd?(s)?V#a}%X!H%(Sm~g2#gI1xC@y~^>#u*A)A5@Hn*b5c
zsuVf@*QTPk$7ZK)N7{g%9S|#l;kMvXz+NlB29HtYm|K1y5G7Xq50_fB?jxL^N4LP>
zN{pef<^%KcXYsuHQ)KF?xhny~YFe#dGF1et(H9OTEx+YP^}gzj4PNfr-7-f6!E_64
zXzM#-GCs+%h2nl2T(CUeU0$eOOe<rwDaqD*M=NGSz6Nei*xgGHB-gT_PaX-T_2?Xn
zuem=T&Tm7bao46rpJYVGIieb5<ZE=bE~{o!uXcK+YhOgu7N;=BAaXoQrCBJxUz5C^
z?n}O+=$i`K!XqJ8U%UID7+i_xZRXm~6;{qNUb>`boS&4hzMoP6)5t50v|2%O`iYKU
zLHFBOPRa6t3kH_<W7{_{jp8)V*MHUuv;&mEFtSry19f5mQ%8?x02T;=s{~1t;N~UO
z-N#}xZi$5+Tl&DPdDRxlKmVLY#hkm&xPx%q-gK-UfQKf1wjuctcb!%9YkeIsmR6<I
z9jZJxs?A5&kPiDzmXfB)G4usJBOu=#^FfE`u2R0T={DiOHE<^i+iTJk>}`%j<bpP2
z(O67qQeWg=$yzc?lQ+2dz>pI?!*p3a9PiWHeSmZ}<wp|fFU0cRT36_4EZ2$8*#ZX=
zh5oXPGV)!mgsec!Z+hYJdQ09~G1;5axRZElutdc)--b$j(Au>a48_B_r{XJ@pp@qL
zu$+DokF90J*#+EUlB>z^jA|qlG4g!aqqfqQr`4nq?owQI`tW0!g<~3wY*kNngh}g9
z9qPkJu)E@R@g2PXf^~KhjGe{)gVtg6H)$x+G4tGS7I>ej9NN^G@xDvzFn~;@gh=Rm
z7jZSXIAZHH@+v<kQCT$_cNT)WoKL_$+o^94kyt$l4WN~+wLu!}TcN9LmK=pZ(vFYR
zfxKx-IPK*Er8Br&^=U6<TaYLDff&APqVOy&VZI)$+XiV3V(vLx@lPiY%V`Cy)S@1@
zfusQnx<5DRoO}3gi<wv4%U)IRht*E!>37-0&WttsT-Yp|sqf1m9s-MhBNPdMsw|yG
zb{nUY6@m!7QRDUkSXPG-jwaZC;8qdf%|$poN%t&`g77Jg&^ETkkNcM!-v^KfV9E@a
z_r29LS_7Fx$~;r{3FC(g{o}S#_l8ZN^0{xdWF}bS{E9jvYXR|dcH3AI{tE+{6=%S;
zx~<k>v;o~Y2={R^S_NOMlVGsAmS%?m91a%T=<Qg&pWC>_Ae|_M(s-17)VS#H!c@Gc
z2D_`X4l&hVc*JhK4F2$LjVEYSQ6cI3UMOnrnNQ2+KN<e3y&y1dHTeJVblve(_wSq2
ziDTs;$|^I;Dtm-uE0K|73mqdXJIT%#Q7GA??9nlj9g<D5B@`+|5sL7;KYhR7-|P9O
zUeD`!J@xT^zwhh5uIs+;?twwu*JMI$TdliE20AY!J9^F!Yz?XSSxEJWdg+1OSP}2U
zX)44gTR?OQ0C{fJx?cF2#uLr>;5AmL<F#@8o8~F)s>>L}{AyW6Z?zRL8ou_Xc)s*X
z5Z4YxUxu7i$FQP9Nd?_y-!1$0%TIVc;EFzwb+X~>LJH7w>%w_1qbxYOADSKC=NkO4
zHw{@u?DhMes<i)kq9{->o>EYjvsfB-k9IY=A4$S$V864Fcqu(v<vn=4+F(K{FVSHU
zzRbf1>~c&_^Il3jH7h49lLHEN?la)`Q6<YS&ae;<?<X}q6_H3Xysqzk#)ALeH~6ZW
z#|_)|dPOI)M$c)@TWPj>Zoekd8b(;vMxHW1$8b%u!!P(d?d0|=*~(YO_w<!h(1^>&
zw_QKtrD_M2NaGN+>U`4$d;+@#rB`0M;e0ZCN@qc3s>;3f>LW!{3qtEg=7eK=R{8X`
za666^l@Anl3xCFn_Dqa5WrMY-NS?DS`6Q{(<;@@+CsMQX1f|u%h&RGb9nS@X_zs{q
zQ6}1)5V2(2yr>iAT><0E@pq`@{vR>lK{_n^0p|aFs3Yhk_HXF%?3zcX2%u<I5NK_!
zlw<D1fN-1kNJsW;hv{!bspWER#(2I_eU#FkWXsTJI$G>MY9ZuWXqnMr`m+1fgb8}R
zcG~rgi)2AOEBO1P`YV?o?Ycx6NSqw4{AI?1*izM=GI<6yjX}nSfMLO)uU!3G#ITr&
zU6AE4BiO5a0Q`A>N8la9vI5@)ODfh{*O_JZzyX;rPhy4ZyQ&@DalJc3<#Pz~DDxzt
zA)E{m)ypww8(Dq@T&7~H=D57$HEJbnRBlc=h%<<9g|?xTWaWBPPD=)e3;5v0=7j6x
zF-#SI)Fxm;^*D3(!C~$dW4;csq=>h0oM)*D-1id^K%T_v!^6o+Yn5xF^(VmS3UV*S
zrT=vs#9$DFn698K{^OY+!Rpo~Fqnal>^)ap@v&?MbI<N_aS1Pv3dZO!^jDF*z3Drn
zUP;E`HEut@YyboDA~bBe8ElCOtE0v#_VWYg;Pk-)wjwTn@vjUNQuui&r@y^KZ*DQx
zL`+c!SoN^q5}BO`neK}lXHLiU3W?bzRc3DDLqn!gQJejoJ`H=7bQk58juJ3S%u<#^
zOO`SI9UPPI#@u<Ugnr)6R6RRtfdBfXAk**MPjzXob0I+on^{Ys4FbkjF_WxQ-Nk6$
zhZcSPr7(|AZ;vHPaMu3)+h!iKObMqHz7Cx<Ng-#WUw@Vqrg<B|WoHE%yd}Q0!#GIj
zs~dMYc*5!K+U%c>`5!M01GYdh_NG<+NA!uI@CSu$Ox>6^|0DRsxyoLGmc{@1Qs@fM
zBuSsD-kZz9|Go|hGX$QO-A3pyD1T4Dn<~d%(3d8;75IayWbF$O$y2kDy}|b-d?`wL
zxD1d1;jxEiW4E7DjaqBtrSE$m`0wt4N6Gb#kxAA$u-)6e8CP|7mxw$l1rjPE7JkaU
zl5H7sw+Hy{)4k0t#R?6a1OoS6`=ucMs_{u|toEfyVm1=lg>4|nt3FQP%6X!h9?T^5
z_0+Q=`OC~V>VxQtc(dQoz_3bn78Nnph)EDXP-M$iE)hV^BI{L{st{1WV{dZ5r`d8R
z4s6BGTNu)FR54xN9iunLohqGUWW9k7HkG0h5*-ytYys@5n_4N%jaRHNdnvpeyojE!
zU?>qijs9Oh^9_k#v23pXk1^;b#}7y!@|7^-ts{HGBX4!vMu7J8@Ojdk&Z5Ax8JsQo
z49vq>UngsM2LpV%W9T<SGhG5AhtX%UQxxiGjt;&KT9F@X2@0BC0}=lIav$9rJ`Oqi
z=n6y(aZzo1qb1TrP#f0Fx#L4?!CPM(=v4P(+-WrTk#am3b|zVpiuKs2Cm%Z<yR1Ct
z4^p~w#0m_xfA%hce~XuXH5W=nd8g+gOT#JIUZ3|(Qc3$WFrS(~wJ_I(221Q2&t9RI
z6YLPLW9kX=-#q~pG(`5f3$*T5eXRD?NPXM0ab_beI9==SEcvxlTqjLWa*~ckR|?{~
z*PWE}9(`{>8}oJbQ1F<Jl$wZvouyN}_UE6VjPuI>?qwIK+T(p2OV0nGY66;+;uvtW
zXJW4fIXr72nr6)Ps6_DK9uMU}+)%Ae)U2s-s?#goybC6Tc~!%f*5a&oTj>tjijODg
z4Kpg21Gif`9aY*G+nfY1NAFvh%+!w|X0&zO=sB=WW$mmFD(ng7f1Cy|5u2_$=8l+M
zl0&Y9(Hl4VzIc3;_3sD7a}A950~urp6)|Jry^vriMswTfRb^TQ-n!!W60~pbZ}hLg
zVH8>TlXGcLV3O9h!7(u&eJfz#YZI_PWxp~KQdA(%GG{fCx7G2&1FFeKE4%g~*XGU7
z@2;)Z0^NRs`8Q%6=4$(-uYYnp;&LRq$|eZLi3{&NW{px5z*D9EKdBaT6($*euS(PZ
zLk^-iL_mY?jFammamXtanW+~Y;?_xYMA7E}cP^VYT905+f0#w=hfa30&D6N|zTf%f
zS$)ppnD@dBtDkHOBNz+~esVz8#;DQX!y3NV2pRfO1mU{n3QP&~Yk>vq5`zbRW%TP&
z6X=B`4ptkBK`2}_GPs)SwkjV$GUslXy%ynYY)Ed9TcUWa-EeajPUf}Da*pX?`|#yY
z=aRlXyMl8zdLMN5>&ItK#_mWZ0DB16KHnlWC;FN?4pq9;c0JExM~tEp3^u{Wq;$G)
z>#S=g_)ZqDuMw@bOhK<g<+kf06L4Ie#edbR6Mih8u&K(qa-`}%&>Y*v3$^f-xY;jo
zz69e!unFPCtJ=wv#pIaR3D2H(;9Yu*xjY`@q~T@>HiF4Y`_IN=m7mqhc*RSb;^ktp
zaKoKj``T?D?#dFIzlKy&eW%*?)1&MCFcoi7n|{aZZNElroYxveObdO|X4UJ!zWg&I
zxT%fa7u<-Y7@!3QpDG^y6(gJ;#Tahqt4<K41-P|COGHHgz1Xj_Ihy5`IPXU-P;{9Z
zGqv|LBKG!x!oK$Tg;$;LLlvgMN<?sKjA>fdu1erI<xw=N5qrF*?gmVLbH-(XQvx4s
zDDR$IiP5~~$yKVb8Je?GdOZvV!R}a-+Ej1vJ!sJ>4IHz1r^zW`17?!-3xWrqmH&&%
zRbeKlXY@azcqU3Xfvt>*bFU30eGU=~Qy9#krAlE8c+SvGhcTsc;%!!Kept)&5qx!w
z1~bpYN#;97K@~c|imQz({z(np(5cHm*YoZl8MQZvyXu07u6Gd*x~Xrh{g|BK0Yjzz
z2Q=Zqxs~e{pXuZN=0pw+MEF7}i)+ySF75x@O(695^hKD!bEfHp*(rsb+6ft1Qi{b}
zO<O55-<~>^Ux0c*-X*Gui&g)np!ZeoDe*xxIE9=8XYT$Am{NlVpg)0AWi^yG;PDyC
z2f>0nz%=e=vQod2udA<w*3n^J$)+PEm}&U8Pz+>FuqWSVn$`^Vo(9xH`~SuNKoN1z
z9Q{v=qKBgANS`~`D;~r@-35HBp}Y#9*T3NNUV@|~31qHQx%Ao?%Z>ke0m31&_L0xD
z*ytr1%HrLZ3`Sd?)uSJO4n2PJjA)SO^2$qyyRlPj<=sxAOC^JuEjOER(tH`U=wr?@
z@@#M{O9C9b+I*xM`zTn{3Jk<|#N0o1Bm~rOEFdV{+bY?cO~{M((bo$ByI!}XYy|{9
zJI}R-T-a+p7EPz?ll8f{bR34bQXyaB%oye_pEhoQdw|6PUjR&|R=qY0!1LwE-q494
z5q)h-p#vC<>XXPlPx1B8G?2~*C=E4S;fF{DYs2Aiesk2W&N9(vrlh6ZE8+e@5I5cR
ztRsG-co>vzJKELt4^(DnBZEr6eu@2aobjsQQXJ^bYWh>f3N-nyJ?fz-kG5k;?u(Lk
zYD5!+O7DvV?Oz^^0tR*u4pE9r3HsX0a0X-E#xo2=yI9`hVoQR;(Ke@_b_S^$I?nK_
zUTv4uZ}9n^;8*6yaZbS0*5(|~)FpoxNKg4G`&y5JVd$P(bY-4;oak%vVLuWfkRHKY
z(^y-eoo07*9As6$mJ)-4ct&&Ypw0C(w@7IYK~nkOhR@?ZT;Qo)mVdrOm3RjQzP(?L
z+)y50`v^y^a%9&x&|4YqOsK)zyxQ~)IF-J5o#9w|ZVp!e7EH<d*VjV<W4A6{*<Ynk
zD=(FK(Oj}C6qC%!D#X(UjT_T(yZ&p#bpMWyrWlkf@gvXg{QDi`&)U-Ugz8M^UwRZ{
zrB@pxXa@x%kUB94-DX0ixY;MR1cTY}GRC9pFb$i`Xt>3)_C@jFH@BVKJD)<`oX{e$
zwU!kW8@+rLBw(i6B2bO<jl0!p_}*abHGC`J^xIDH(;22dLZUwdl4<f4@ukMrZ;W<0
z;4fl|_m_vUz0WI{yU)UcPvIH_jYoSnBbm?{pwlgZDtiGy_VYX7ZE8{!@HEMdAoh3-
zaty2B#`;Y^WY<A3Pan+LKF(x#=V*>brtoH|VvtxC1|4b6^3M}z_tDkTN>nNl$JiZL
z-Zc!_C#)ID`~A`k;*(YxW6=FW-GXU=7!<C6vD;P_Qo^h$yO}16Wz65^KyO&Rjdww?
z$;{uH@Aoni*k4?aD!bkisJ`2M>A!xqRCu4B-u#q~u-~{4I^8Bb9|9vQIP+%V4^=jF
z?sZN6iXYQNHNmTn$b|ncZ(Fmh_=R%viHy_y*2?(ZP)8nXiB!L0*;%M)`l{Eqs7iND
z#<}&7*@=Y`Ws}js7YNU9eH-5-&R2WNy@T$8h?o#p!tdpZ_k2GMZBD}>=^-Rz1oj3T
za;QZqn0xrShc$4hhhZ*iAfBzZTfU;h@@iN?;^R{Gy<?&<ulq8!3+OKKb0?Gtw4_4D
zk4WS|45|Y6c*Tv~(sz58Bv>dg(kM`>*VnDS-mL*@t{%_^2`%arX#kmKaPrJ6OYg(z
z<MhA4&UWxX^S=$<MhN_g75cbtAP_vk7On7SfkB}5viML?V`xqv##Y5aWwnRct>u23
zaEtFnR}edYgNbs1s=&U6KkyWnG9bHXHE8ttU7DC;;{c}97uuZo<t%#PZx2MQ+FWsF
za<9W^2|D^y2Xv_R1jcWlaw1O=+fIR&p7h5L;;tviqsy5pR-#3v2NVze-$Q9PfV=nb
zn-yugp$T}GrKC*LIeN7MI>jOJoJ2iyUxn?wRq!21DR<FGJ{q|8>Wf`a0aaxrSk%KA
ztrn#}JwRGy%1S_r*GcuQyWU|6eX1C2;Lv~OYUo*;v3<QPcNM5J63P%Ps`s(Tj;e7$
zWXu9(-o0Nzl2FBDUyM9k3jJ5bLd+5JJGM$a3wf3ZoTL;E!d0V_hkqK1>aWwACeQnU
zMTszv`Kq6bPMY~rQNZ%oB&Dc(>hN;w531tw=Hk7K1MQ|FReRwV`*fCZWwwhQ__7bQ
zCxrLfZEK(R$bLt{+QA(bd%MS}qG{{{bU4x4T=2ZkLD@e~-&|AK^+reT`!&LR)S9-b
zbFOUV?gwr#AI4MkoXV2FTkG-*$b|DM5!9DvC>+k7MVVgVWpxc$>*cbjDgNWHLwVDb
zHV^>{(4DObgY0id2Jt>ejhWP+GX@PvmLJuGNll64!V9bOMC--NI2?zj708t^R0cCa
zxyt%C$tfybr$O=x{;#>A%Maj=iK3bp7P&Or^b=69`yIVQOO`eN<U!&IxT+40=qwx~
zjV|<yTJ9K`YuS2<z>^}<Z=~1H8%Xe%J}i2A{8JK}c@)sP%)HgIFC>P6pI(v4G2E&1
zvXzAp_dNAUO<{zL&J5iBmeeN_r^=U2-bc@OMkoF2_X)7yrrq&>r*<M0T#3_D*p&eK
z`yZR`0wq=fMph&S%@at<>4+qnJT+{OFds3p)zHYo7E;74r#T(h?FJiFSE2<+kVI^O
z&^leFS0#r1cLk-EUhw$60+(q*edTVD^?B2muHU08M~X$FO?u)yLDc6|jiPyM^8IwQ
z;CGOh;COo6$o};oI82F5hbn1H-9x5B6If8hbPt;n=fl4_vQ9mY-uO1?b37e&E(@IH
z%}(wl2E<f9+aM5`;F3Mk2H@<RVaQkO;gbK*V|VJ@@p*3$Cpc|NL~Gl@=q;q&F?MD2
z!ZmHe=qH1ILb%|x;SN}Wi@&A)MY-M?NSDF8vCDYmYbe2(5_k1?@P~7H5tM{Tw?Sln
zxnmdvL6fLbNg+)xdcI1aKb$7Zjw+faR9U@7IZ~d9zwliHiGAD_v#fYp01fhU7p2Qn
z<OWuJ31v-a<H_3%x+$r|>w8-(Mpoh{U31aDEO*AklM|TkgD1yW>l2J8j=4tdM(Ycw
z?;CcuU+2bCsa#XJsQ*<imBFUwyw;BJf0;GiVQA>J{K%w{wA2tbPL7aKn8~ob=MK|u
zcWp}WIrO|*INFQ36eU2Fob@5!ft*x<>>mC1qxz#IL>m}C)*19OfUeS1(~B3dv;tLy
z--F*PHRab^6joKeK|5zK^R5sm8fr57R3bXS*u#_f`*K|GNPm>`CNGTm*>9D6EJu$U
z*Q`k1`Ur>L1z>`!=3Bx{(Vs^o;GCNt@s=Gl8@4%Ou~MvElz{VsMq%Mkq33Pi9$&+y
z66JMf6EB8WO1K@7nLVD?1Muxx79fJaQR60hY?Rs@um;EItYPZOVgWn(P|JT*;+c`q
z2VM?rTx(ydm<rM6o{rhU4R~ixKco}5Pzx?^lK)yd2uP_=clQ)UD)}ol++j}Q#PVv`
zJtqAgE3eeD;x8(M_+UIrOI)9(#tl_M+=*f6>6`-pwoB(_X?H`E6A<MpPsdc;<q3vO
z@+XQvmr%=>X2pn6O?qExrl<fhm4?pDP)hV9M2=LB`3|T{r%a@DGn)|!(TwX5q$76M
zw26<mc{zt3RLuH<%N)Z?C)ji#^+(_o;}<cVwvygZnz4p8NQZcQRBP<&uR9H(4)=p<
z<#>A;e86-{4Zo`f62Ea|ug4U-mmTE-m@J0aC-$Wh<<Qk6YM)j>`iu+8$rQcL+xk#D
zMb6V?SU&m5`d}E1EN@W|as4)`)KYAbM}OE^I>-YkjfpHqf!B(rh+R46#cp${5u4`2
z&x7y&_2t+!_*l;Bt4KqUJ%_oHAv@aZI@*3B2+7xoDI3r+K0Kp;6=(BY)sU~0Ea=!v
zzl(U0q>yuZ5>rga4R6#Gt$YTr_b*kB?}`pX9D9{IpBNMO&>Kp=$T9)GTJ>U9-Zasc
ziQ;hbNfziYsz0ry-D<ws!0)c|sYF$o%N;d%ar1uvh6i)9V-Acu<K48?0`T$<KH$mZ
z*?tQ^|JG5kC+~6Tgr~~(N1saRvC-ZBMPysDzWHg~_xS@~C|CW#q(@e7dIz-pXroG9
zLw(UAC2cz7X(e-tp*?kRg0iwsD4)b5MB>8DVV+H}?=UwZK0r0~3N@BMI>~p3kB^_!
ziCE#zUN;Kll>AN2jnN8iX<oT$ov}>oq$Km-^l?o@f4nxFFNB;anJYM2v5e#&d|Ceb
z1fJci(Y>yVif6j7Rufm-C`xo3l3i})&Jzu}DN0}{@=A4g;T{s@{P*+e!}I0wkHRcW
zaF=l$8<99+%8H)KClKGqS1WAUD7_lS&pj<tyJXrl5^{w6DMC%(xC>U9eq;(y@x=9~
z8XJJ4U>JBjhH5lFWzG*~k_-N>BIzo8t+MYvWT=j3c6iMw%6Ts*Z9sO<J)$4p!YDvd
zuP{(JOXSssNAx`sw?V;q$(|{-e)!_{DJ#!wwtI@WuVFZ43U(AaZ0IfnI%G)cauR+0
z$knL10)CdKKfV><lFn%wa#U+FqQf-#av1zBaGb<|+_}*b`1UY$ef5t0ZF?W&pS3Sw
zXIPj?75nnRU?BBNVCLSPw(KxY0X`Hoyal{gZ{t``(2#ud>N3iF4EXt~4Eb}*+l)x8
zrv#UE_kG&99qsG!C;u3yVl`t(Vsu9$ya}ra;&3x6m%<p+y|_d-=A(?mU`RQKs7dK6
z`mw|7$ZtbVsarv|H{GNk{8V?C&(@f-PfqpMN7%|Ag%X{q#FXUBr?55~@1`v$ObqA}
z&K7=n_&sV&M=hM2`}e)mf3u&vvJ#Fy9C^s3a~B`m5?~TFj!awiOul+rwnc0!V*J*H
zCusG$LSoVb7Z+RmE+Vql!uB-2d>_R0J)!x_DV1WHXbf5bl1#d`ev9igSw246Yre`#
zKTe?7NClSLb6rc1)(w7=X|OS5y!LLXn*VS?VF146fR>avRr}G;X^;Gp>P}~}5(Ndu
z_!>_-EKBi736@{2Z1=m7z@lb^wc1}IT7M2*Ofj~h&}x(A!WYY0$Y?)P_zD9Bj#0*_
zP?prF3H>ZrZ%gF+=wi^SjZB}!3mN`>paHJd*s@;N@X{YN9)pQx{w3SypFNRn1jZhW
z50htySq2UhjJ*#vC~Kx8oZtIQyBKkIzk=8NDI{lB4WpG_4r*n-6%qY;8Fxgrgxeg+
z#WCiWoOZ5?a<;e;C$SM$BH&{1_!k6(-;WO%QX)@<RPzFXfU_G)cZ5~yl-YhSVa8p%
z?DWas(^VqL?Ak59+j6Lsxs-F0Kqxy&PC0PjlSO-;HmNv5eyo%pEq?*O{MB9f!dRx6
z>nq!<_e+-VNkh3>^&GJkzxNidz%2@6(H&dmY<9G64u{*l2L=~^Oq)5sc;mPbWw`yE
zy}G1fy#?aQwL0nt!zdV7LOHo=>&Ii%h0$TbSO-QpJ7ru2`|xWlidTi?OBeXDoG8X2
zP`32ydN+w@Sjlq1Xq(Cs8@;jh(m5S}qUn2<8t%`cM()JhT<_1h1kG&cRG&{ZMj{vU
zPn#XB=GkM6^W{~Zp)2!$YjSw7FZ6C%)a=zuznQL&Vfmh&o9QiQ1v-pnyt-~`j>A>p
zYr`}a8rBWSt{nfgQ3qiZLKMB`-&q~W;)9=UTU1PgT<B03Aa`ejAa{+BT5^8P@?r(d
zK~yT)lBhV=nMCM`*bJ52P#!+LkxWa<Gk_Vm7i@cwOcCNkhq#)!WIh4+yLHnQuv}Rp
zcP5wsr{wduEBH|e+1bBzTeXymrI<W2Y9hvxpQRSQfilNY6qltz@H6dj<dfK{AhP(&
zW9AC(`fbv$-3Hb{bbfxKs&zPBgiUi$@vxEC_)lHX<_zU4SulgMc#QIy_VTk|l4S1)
z7b8~Gm8V<}{3CweX_}r(7yW4q&{M3a*`M3#iKHQLdd%Rf9nj~_SUz5_UlB%@4<S$T
zWW<=oZ3{7<RpWbXcMQJ+`&JuPB4W`cp`5Y;@|BsUF`(3wwyz@|P7*?7>1&Nv&gmCS
z*g8J|6RJ6cx`$u9x_nI(?R&=J&SJ99XCTc-R>76f?K6UZhMPNm_*+A@RoPs68qzKV
zNgLpkKel08Cj6xPy>k7KrJGK1Ys4Tx_KH=>bp{_mDS`*tBkgTmKOi7t4Eq90{9Be8
z9VN6{bnWfGGh?BG>u?&+9uN8Vx&CnCs-JV4yGQ+wNGj`fp0P`F*;r9kG7DTxHnO3>
z^LV|V08-+E2K@w|5ezVPiuW`6OiSOK2UTUpu0ACZCoDL0bj|o>lo2226}(fEV3Bou
zgqo@Vfn~>bl*U9M{}j8OmQYPnzrV%Z?==EP`w`epVdPjOYX-5~Lh~^f{g_uFRwEu`
zyga<Co*lw%#cWh@GTuA?^@S0sy3Dg790DRJuz(Y3RNLLIR2_S?bqdudLuer?c&XPa
zEC`%l;!4GKQHB7X7UQxG=yn=EP^J>P#;tJ?j2gDy4mAeWXk&xjjgG*|OPJWG3CipI
zEcM8-nLFg>_+<Dv6!~?w1{U`a-j?RPNt#=+Lbc~-cgJ3?7Eq>I5h&!LLB!)zZ*k>y
zTkgY;RzKq>SkwpXyf@;^i%MRahEo%DO;7$Q2VZ8Mc)nUOLs{Gk*U;Hd$-AY2P5At#
z&-b5(2v;RP^&mE58evv5#sBEl`!H6^NYP~}6(xrm2QP^*%Q^k9@^~kcXOC~y%|}Fq
z(G1Qbk?fZyU<*UhNWqC{>yhK;93u$GGH&SVc|g*^x<bA>VsKRgnvQda+2WzbXkE+T
z;4R+wMekQJ6I_o+hRzD{q0npwv?;Q~jho?;zCY%kUNJtpnY4NF#EWrcsS^#I*3DSI
zGhDS%_PPP|D0plaVuPc`{eK#+{Cjvm(4CZWHCL_Udb~0Xkd2$@T~;&J(<Cw@+Y0_1
zcDmAdhk(s~DTdpTf>u$6JMO9ihZVU2P}P^n6@M$mRiB?chkarACO>98gw->%Ltv)}
zbjG#jaQ;o+u%dl_t%z33yWef4g9Z?<f+w`hJkzfTX7kQ(CJ*;!4;7aGI9K&E(Uogr
zw?h*rRoIOcU`%k<DWc=zI?>8q{Bb|DDT<!px0`Z3?K>B;bh~BGVlpE8U`#343X2-!
z7)OSisw~w4e{C<Ke)fnlbB!@OX<{aT8ZLcFUh&SGhLlLhPSGo8?EnEVRx}hPxZv`P
z;>?TZo%C9CRzXZ(i8oMnzG{hLN&0t4$l&R%=lIXVt2zf$HgR2eO^sQ*RNIs&XWj<y
z=c#t*<}sfeuBfMHz3HKnI33l-vt(bft18e@vZ@H-9#n{!J;tmvmR{<IA!s)~>|VC7
z^|&6X?sLCnwC8q3g56A=A13!VY#L%@-vwMM4zIeiRQ<!=3*cPyPMeRpH~ZNCUI%xt
zcd{YAd$DfDHEFMtu2NHYH=;P4+p1{OjI3R+d27sUx?K202R#z}tiJW9$8G81&QI{S
zoJTS$<o<ruP!vU`YBw@!Y;V$WoQ(KVl@(q0(Qm~BmGUL&3T7d^6%kFl={DEcSUeZ;
zW-g3$FPcoH<VE~PT|LqKkLMU(l$xwS$zJkxq~hd2hv6o%*q^4gV$TmKp?sU5A<j6+
zgt&!J=m17MPqVp|%bvA1(kkh*(wkmG=9n7!H|J-OF#cRlrQ(N-ZzEX})@Gx9%73Us
zags1kBnxXzFgy_=7-i(x@+Y;--Qi)*x_xc&DI=e8whjSwX%*tskZtRAhLPw?s&%_H
zeYYp9$+hZ`q#WB*mc`8sU2~eoZa-as?s>UDHvC>qg2<V8G7we*IiEQDxbig_>+za~
zWusV3!uOut<F-eL@46?>>|Gh#!~8I}8JPc^-@z>Ln(1N%K$+f;<VxJD=TLnDnsl3Y
zpg-OF70zLv6$T-FcN(Uyvo}i7ar{myb=K&VEWWeG@M89Nr<Akq(<=y;;XwC<!=3WT
z5ZS^5*rVjaHOU$^B-cM{=H&g_HP$aVU0VHhPMr1<e)fXFEi36CY(>iqWngU<5MrA7
zD$U%E5Ujix(l43TvD)W8dF!0)+XUDqw-0~7Zm*dKUgUI2;$LhXlKS11$enjY_e4^C
z1-wN!0?GS}7LjBlg^It_=Nu0oDyc;T8ph4>XtY9;S0!=2w0iim9+TieBiAI%=(z61
zxgXnqRj9dt?fR6)96WTApkNfbIDpPB1ywI_FRnOzn4|UXeQv9Onpk9`$j3ckr+jX9
z=EkR-ARWusiTC}~5gkA#3XN3SQ7x-hIP3x@zcw<L+jiK3BCmERfG@*)udz?G_vBxB
zG}A_+jnVwo3R!*x*PQ3F^rrjwy(ptv!PUr?ScYVkW}VvL`idDRsx2T9;(BByy#$wQ
zhheX|_p6}{rj6pw`sXhC@CY!c61TAl)oyox8(Oc6ca%KBo@q!H9G8Zd=2d}hhDz`z
zQme(llyP^cB4~dl#oq!FZ)L}$CF8wdfX$>}A7+v1YLvBIMV}v&SxAsHT`RMoc<zoC
zoRn|gTAx+qvXYu4JSwOCdo^hH(Ii;hofR>^@4&>lvt)j$6dmGa=_mFIJ2^Gd{|cez
zb|fb{at0?!4{kup)=Rs&gbM#Ub%Js>1Lj9&|3wNqIqSACg-D*RJ83SpFVKRs098Ht
z>Yv@XcAWG$r$5(rH(yq1Ec%%^8(pto=2pgkk5UYBlx#9+x73lN#Qg&ncgb;?b-N+j
zh0_o&;uexv73O5pMafneZ|*${^QeLCRee<D+$J+S`dUJU^8(y0Ia;RDiq3b77S%fh
zm<6Cj@l-2wyR|*Wo`O6Ck=F+GFK|)Za#ZsnN`AOzraP9eK%aDBT0qi7=e6?{!vwOx
z8=ACs(W|(9q|TE^keAAT+9p;=kfH$|E3Y@H_p8r4vF1nEKd~m;oV!QMLY%X0ife^^
zk1kzxZ{bMDc_NL@cCbW!J%;0ul5|CJ8hNO@jd*@0zk3#2@Cb7e%&Bw~dqXO;TQcJ7
zdGW!?7k&5!uXsOD%jO|=@Mr(zba<9o)^f0$5~ZdT6*MTjQaXB6hx-Il!H%Qhkv=Lz
zw;Taa_NW?3G&dhFTTgGoyKjKvgo~9}<ch2g(@N9SBR=GC8@capt`iBCj9cbB-;C0$
z&wIOfpZRQd2JMel{^Z@XHC9qIe%Z^%VT5x-%*;K%C*~f>7wU~IsLaHS(7G^xHPZ^s
z2y<bbKUD*=CC|+?*xbW-&DB2fDVi0sN~5;H3{x_hxGK63$Z0JnL!SLnDTPj*=F|0q
zjmO>O`9fN8t?)oHkD6T5p4HFlxgtyZXP^qn+Ild>edHrurC7hX!I`&u+{zqhQG_=7
zY(Fqaj3r69?d3Huh+0ccIppQ{JIZkmF`>|<GW6R}M8)}|-!KneS%~7ad8NQ<V~bRA
zXgBXbJLPlAlU)z_ofYDjiE@U&oY@`+5uQQSFsFBpF9M*w3(&;33|Kb3k7Xlh5>vF#
zygDH~10$a6cCcxr)SXM0;U`$YQW|fFq|dQ+&M{N~a4AnlZ8=rFQsKI@XkXi-_q4sc
ztL{=4B8I7#S$e%wvx6RX;5Yxbv>Wj}?Ti|YS<i}OgTKAFKYediRMVIc7@(p|w~>7H
zBTwU@Ip+$asBemiUAJAZ<Fg-_gf}`CcFu7UW9QwynWz*o-se5h#<3e8kWFcMwcVkS
z!87puh$?2X97kQk27e6=4v#{>$KI+kM|5x4;;bm=&{?!^1z!|Beagx&1Vyjrz=nmj
zLP<d$!Oimj-l+`@fWrqTS1|t|4~CRSrsCBSi&lMvxAqAWLSmG&kn)Vg_)VVB=-ow?
z?`h`+q}G0w*Dn|2Ylk0G<K@TJW--##9HwkHC1Eko6J$6CSpzY%p6;hBjV=^WEyJ_r
zY&lZv8Fbm!D);pCM7Bcty9ri22JgQ?2Va8vXoG&Yms2l*VLt|AbiI~^K7o3?tahqc
zV1Ie^9@n$4M632b?==^Uao5_brKbW07-Y+<z=gjs@;WJ-#czwP<1Ar9VHr!c&X0Af
z;eCq6QopS^=)JSn2@QV6R4JDKE)_KFB#IDM&N-qLr_E<g;U%5N6b)^Z0$wF)vaV`$
zzF_Zgi8I#dpzk_q&wu~VWF<(mCHyx3ZLhAw@mY$kF#UJGo*?sfx`UJj`sPh1m(65k
ztaIraZjP9E@|5fWP=2uD6TO;Lc}n200H0PWjo}dThmDygd?{j=s7`$Od%TG6VjhRt
zA6-^a@wGde=O0gAKGRV87nAPMb9M<l=I-JNFhZ3INsTpmNA`ela=2s^lzHvBfxQlC
zzCh|=Lgh%^rpwPaCtjij^*aPA8a>Xiw@qr;k!@FH?e`(M@Tt`$>OJ8ksW-4cur4D{
zJFNwug?VJOU%YE}^-nN~vZxhrMk?Q~V*@j)@igd?&4@ozp+T2?EU|JV61q?w?%=0r
zN(P-wX`L5Q3PM$VqLSPxU(TJrtlRYu_$g-xz~`uakYfsM0}j&x_52FPNC811{gMJ@
z)%vQnc8guki{c4ymY@?LnsAyMNA0DSj=W4{WF>`<u=n?P&TZHb`%+ba_@%iqYlW4R
zlknX71Y}l==Eu4Xdwf0e#RqEc1JekHd({2Y$R0b^;~s#sH6oR5W=Gw5x%J0n(1kS^
zQmy{kA!^n|p8t6P)M20Ar3pyU%2_;?0Y;Cd)+4mldyf`l3@Sn66na^}-W%5nx8GhE
zaF<*Zvb>EA9RG`_68y4cP}pDS#q>K%sS+-TUu_8p?UxJick^N()ihFMkfb(M@0_Kp
z^aoC|E^TJ?g+L66l1@UUzq$JAiR=D!v1BV^j_vvfl!vrRzM=>w4903cfO7Tg$xT}1
zw>S$l*rQYV8zf5a%0djyQwyKTH}vH&y%X?OFHG8<X9d~Md1xWBoH!r^*wrVe2@vVz
ztx?ps-)Einf*&%uvbJS9&jB{LPc-E;?-r$``9x)+r~t`=u~82}AD(1+P5h`zvccfd
z-%mq0qudJ1iv3e*Pqxw6??SE_OHEw9(C}M=m{4UaD)qDh%^8h32)B+RMDbnCG_U#$
z{t6cgV@Kg*EaLp`dDSYdV+fPcjgRLX=DFZ6|7sq|<`8qi=$gB|K1%c2rmA1k>}tH>
zJx3tHeo_o0QJA?A80s6%(!hIB52wQO<AP2&y)9<le&E($?^o%6+w6PswC}gyGb+I~
z&D=H=ZECr7nKD5M+%~Bd|9h+1pnUaT7-stS8bx5R@?W8DJy*}TC3p?croC3ZvYk&4
zN65Z25}+$_QGAOzAlDmuptWm8-s$)000MOeZ{s{0&E+VpMa|q@Bgsfn?_$JRW_31x
z!b@|H*Z4eLg}mDZab}dspr#Chs(iGfL<tU6`qJ5US-qB@Ocqz6Y}an>sdg}~u<VVu
z958lcKR?-|@%Ye<Rq2$hqLKPAB>SflSrf_z#`S)mck7I};HSeae-}l*Yr@I^RZmz(
zg|`}Eu_?h|InboBrlDe?Z@L1*gBjgt&k?6YoX}lkKB<*Dma~dvsTF}|1XEwY*Aha*
z54<cY>e+`Y=Fx(?oqL><mA~WI=;VN6_bQo|E&tbq4Pl~ug?@e0`vM1yUlc{CmTlXD
zWf^7PmSMe^(7PUcxG94HCr=0bO5`=MI3sY_X<|C)`wm8q*37(3JQcOsWrFj=<xrky
zkF`-%bHm4tzu`b2HuQ~YOHT6mm~&Zbxz~$*=V4||sEFs`{+!%rBx+?=gM@t3mjeS5
zkt%9?d87~N&c#yPK6S;!kqpuiqPnFK)0Lxa?BDY|g3RyT$@EY!;`JIHuS#@lp4juh
zU3{CLC|EmTgC9QS{REj$s*F#e2vNsygb#Wyf;s-aWF8ykW;f7H0zblVIoQ@)bE4LF
zuF}S_R0=bskif|AS4q_G<L2su3eR=>`=P%&JV*)N-yLspRs0Kx<=^9UsD$e%DM$ai
zZj|W4$xmJQ<?QfUkY_Z)oHDn}EMaQ-sq>g0mGyWF@Gv!J(!9G8IM1q=Y4?WW0RBk4
z068LyNUgN*%s`rdF@QKaYPXffyV93fwo+Q9^PW4#*M(B=sw+<N+`<abj;wLC8d5da
zfnQ|hG!Iw|zvcg#H6`kim$#{SLyP=tJA#r*TSU!#5fvF}161CpYD^{_KHG2?MWMh{
zc@PPC@<3hLeS7wJ?Ep7&uZ`nJ$@Tr7zjNe@mHQ#m+?pz9&<{CC{`5rN=C(&uB$Af=
z;iU+jdj1}RYplOAQFq(-xVrh7($Hbq03WYLKaB2EGSpb&ivzLoS#rfk+}3}_{H~58
zt-GN*)cEAV=1!ncaW?1#Q}-|d(dcG9gQ!0ILKvszW@W=#eMy8v05pp$r^t#PpHK6K
zFEN;B=2*pn!6o<u@9?rCq6*=C%x}+lN8B>G#Z5vH>R-ui4gtoo3P*fY1!j@g{~S)W
z@p)*;7fJQ{pO9J<)42r{-IT(uhbK4B9CL4RJ&tDcHixrx$fNC$hT;yM>ioxt8D}ar
z>%AQ4#=^0EX9tlC+sGEBQJ4TMZq3sluMZ(vIVsv)_$TJWnJ|UJ#}tqrfF#aC#&G9=
z)PWrRx;?h?PE!`|x3pS1R2&u;I$_(!rOh^l;7i-Kja#W$eo7R?yWfv8&W}b+WSu_!
zgAw6T89=Ybh@!Vt5M-Dj2Ry20q%k={h?QbEM(_4Zj2pL&R&XzL9toLW2!rn8<CVxe
zupaygux;;gbsIVH&hmMuGOt#bSGG~6ZmY%puf{EmHfWr>!^35I2Op+h8Gf}^7%HjI
zZM6=k-l46&cqDj=S>De}JoE5i{ZO*y_ZyMr_y<n(<kAu$8969Y5G6`NL;RM8qsZD!
zkIFb|PSjn2$@4!Eo;uHH{MP^4U&z%Ge$}xG{Cn+L;`di61Z55y6#A_!YAopX?%M4Z
zr<Im+SO9bL>HAvc>^w}JpmCJ(WrZ-Zn~S2>Tg|iY4D#+tC<SjYpiSxr!Harh=LytH
zC!)2U?IQ|OxIcc(@D4Zm`$$N_Z%ma*3OEP?`2vYM%|fph9!gd*ws}L>=u)&_mr>{)
zk@uALtwy}aK*CqEp#3eUsIk&ZvwdXGB0Mza{&kB0Q~StI`&&IwLl@<Tdqy@nk>jSK
zOYx$;f1lcwtBiEqbABdyd=|taxAtaw^WiJc)5%;LzDsUyW*vU0^&Y;&$DALN#_W+t
znC<l=4}p$yoDB?Q?}9lrg<%KTQ&Pk=fam%Jqj?Vw6y9GEo1`{bP$~M%zv6=CP=N1n
zS?g)|;E9DV>hWwhzX&B?{Q7+*JNcbb(tOyWOsG68Nsne^c`_*er8?B_*4idi{^?g3
z(OG6E!cvY3t^9}}REZ`Tx42?HY<6{GZ|EYjA^4k^@YwE1*t6#PF}%F{)LlnxQuEK~
zi8^OjvMcBBi)b~1d3*c#F_!zttBpKM$Y%`;zOV!wW=wwAXCl%IHUJ!;x^tXor|a0f
z@kkGjOJD$U!Gx>2&!j!kmYNv}tf-xGl)bLp%F!Kpjhm+gP~^^1IO(@<JYnw2k+olQ
zW5)=!&uKbZ^(@x^yg!$Vgz-ziwrz?e*;wROzkeR0S&G;{{FrZdN%@Ra^NbIg^oZ*9
zt`%lvuS1wa)b_^)rl)aHcN`%XT0(kOIn6f3#JOw^;vm}nH`8XF$C3gourTwpnbi+O
zzl%oS)K0wInuGK%G`oG}k<Giy0uALU6r`s~$8`I&6O0CorIcDvd313;hX)#U1#_I`
zAADlcAA2eDCH?)w3IIG!`)4%a$&QI2A#D$oEIllTT(X*#N0r65%&V&k50I9P60V||
zlJ*|*Ycb5+kb8|;dwmh)X~#k3q!>g7QhV&e9m2oHi7lMyKC{;;)ojhKgFMoMR%IcC
zd+?h70G8(l>|t|ma<)I>Ze+$e4GIsT7OD6FyGK&z31QK`$hKg!HL>vlSfKJ@<G@`G
z@uMvPn=n|dErDl4>pI}bM(7*~cy9I;0aYw4$E}>SOJgkfynIQ6P8tqQ|6t&@*xf#f
zG2`}fm#$Enam5kKurS_x^%?di8sr7{7Gp@pDJf}BK2vz|Mfrx17kK_OOm_H(6I_HC
zQ0|!f>~dp0vkfaTE`_&gNs(k5N^wqI%KyS9olUkd|HXSEO7N|sTA`~ldqfkftdy?z
zWrojY`fQqkD%%JjoT}mwz?qKe_Hp{~jM0XSTizAoGUHTIQBj}hrq}^fHScP|CGNE3
z6aMjZMg`#!kl|@3z`jZ}H8JlA!^xwSZ(4JyaQ1_8C>4UYLHm+=69i^+I%HH->c<ny
zOzfiEwUIpN)Lf11Xi+w?@M^a>B{y7ggep1x-w}6S0X*IUop90;HuClZ6t-T<yz2@A
z@u2ZH&rBC13-4rJD+E!q1S9I_X2!bz+skwY4VLg9xwU!hTP56Ro;u=4i7j`3El+5I
zKM*$>qaPN$n7VL(j&`|at$w8(brq9D76KJ?AY0i3*pqSe@jBAdnlV#43b6WB4&T>e
zFFS?0s<`i^ExF2itC5bX4Um+PN=Gg=9|PPz%yD*j&eRlDmb2hCrgFL=-9(rZ|E<s)
z<Za1=lmev*yH8wSW4e33Kyi+7xzWjNU#(h_H9B?fRa*U{shnzzrfFSuLAmSOJ4Iew
zrhg**X=vzs6U$hgE0tV12xq0J?<k*nSbCcd#0%CwX8UyOirJyU?`OYwuV>ut@*;kI
zguaHW)vxNMV}<mroEfQV!0XJ!q2FQZ>RZr~->ijW_3?-S_vJ8>*Q^(>Rw5GE7<_BH
z`@H5WmnIp0XmaFT!H6r*!6sDVI30QwgyURouxD#%wc0U%crM0?mJJ~Z`G~DV6t5i4
z*)>sXLzOB9d)2p~nEIazbTK~A-rpGl<7zVteXWIeJ$|p`KN@O3P}ft`ocjniD5CR)
z@DriQ%MouNz0;{vIqrf+?eqTMh*cra86Sm@8uuf+^NDDhxUc7BStL>vf7^1njXm<>
z%Oe{Xl@%uF%B?fb9P1N9nW$mLb(&O_i|2+0+IYm$kH)-lzW3@_Te7p7{m7=;Ztr7?
z_S67iGER76mkqTrx^gVU70^G$fT4Gt=)4-Z!GP>+nRbEqPe5hJjQjJHyGbR)YB}~l
zFU{%-4X@+%#N*k~UC%Y}htLv<n+K&I*7?0w;7a(DU@yuE<0>OjY@~Yt=hPaIrGHDn
zrq$4Y6Xn#EM6;3h-ZoZ`#%XD^ZfN7`;E`e`O<MjtRp}mb^jB7?>-7}hoN%Ud8<itc
zaJ$Zh!R?~VaHcWDT%#bhlw_N~+^;&DAmXXtcF9Q%7br7nl|BghImQ;!iQ7t);idTH
zYUrT~FAq7DGIMd<oTq-hs&4>W?~A*gX`W%beQ$$0xB`6X?9a@Ni@h-m$$f5buU=mx
z@8~9-_k-!#QPm6sR@&SEfQ6EZ``@niU*gL+zIAk?|1fARV}#U0g~MMVVe2xJllk@v
z=;PFcvy&8{yjPo4PzhGza+|fm15`d;UiXP|g|Hb%%?`?9>zVF)Z7-b5HD7esa6K4|
zO9|fe-2B0FB;eM#N1{K5&a!~6?wu?8i|AXW)?>_~JldX9c+7DWyxxB~<D+Qg)U0B;
zt$&(yh2fa7*;m4|=;Q)+e<{pl!VBu8X`H-qu07T8bU1FEi{n$x?ZGnjGw5@Xtx(df
zs?NB5nE$&ICul%4OT={~zVCxnF1yYLr*EC`!Ir`L1*;hg{Ph5Dt?CV>PtJ*IOgG0G
z15co6=x}zCf~rx+>|w|$Jvrf%8QvM)HRq99c2Prl1wtSD9lXN?HX3JYAwr*cQ&O)=
zK|iuqPxo@w{?^?WSYH5ZrN_AkIW;fXW38~|NNLcH<g|CcCMP!+e54O(52I40BB>%e
z4sPd>LcM@(s;cYVC-bh|6R5%2i9dW@cG85XiG5GDnRAOE&ZTmNBm-#fI-vYu4!-&Q
zNgfob6ZCA9DDzv`$^!{;-yc>bb8cOnkrP(*fHt{N{mZZQ2MCDzqFVY<tk*Z9WYRv#
z_^<PX?#w_=o1CzM?6qgVEkXP)p?IP$1b5E}xrdAs-nww+h^izvFKE<79Fr9lVVj?F
zPTWG)w-b|3+-m;J_4TV%O>e?6ACt0}uz?Nd;xdq<fjby~cp6&rG?H`tD9{<MCQV)z
zcXfblrXX!QAUFkWIi>ukZWc@ilbZaYa|>I6jXbP05wLB<EiUvi#(%@9(G|j4CGWYz
zmKuIVenxguo#HwY`Z<ohB#DzcWk9v>Mc9*&$@I)l*TB$Y57~PlFl-~{WM`$f5&5Lb
z>XF@O?eyvISt6*?Am7gF%b6b9VIhzIY#W#DaG{_mmHp|z6=<;eVQ*@@nF;9KzVj*~
z$INaNV^DF+uRUCVjfyfp%6e@f_iLhcF7cCnI~U(+mW`?SVB3Ha=k-%Ee^?^0{O17}
zQ2Bd|NWfQG=5B54xkRA|=ud)G-}Bg8kuU*f+bwE}Q|Zz1&NS2Rdu4T>Vp0g+1ShF{
zoVQTY5z_Ps>1;EJOH?mo%x`Zuz{F@?A3lFsnR}vr&{@bMu1#<uqNsa(U~4v`sxWUC
zgL9)>2)Qx9Odun4fx5&H;tjaTELz0AYyDAE-H5&x@6>fxMye1yo+A>UITSh1CFn<@
zZR1j&wIh2#s+<(cxg806oj<$VmUek2bnpsjW4Xk6Kk#<c%>2<EX!%YyinFDYhP83u
zJ^O<cdBm`PtV0z*Ct1aa7nApzuW3%W)J{HMRWUpih#=XTXQx0$$j4|nR6STxW40Z|
zlFo00kJA`oyhxzHv6GlY0@&n`Lz~+{0rw1lY^_l@_Gs_f!IP}fU1S3N+Xq;O>2%^c
z)W{e6g_*`aUn(A(DaX^j<y>HhpA$cGb6(uR`<7Y0ct+UyS!*&Tc~)632W>k6@3tPB
ziz|q6Dy3c^bw0&Ppijqq5L+%ipMK%NDen=zrMt6xnW;CQr|NGZSxzrSs{*%~iTmga
z-97h6C^-mZ`4>=8ZevTf=*CJi)rj9<QK^vW17O+@5;(6ukH`-(*AuC@d<zrBpX-8G
zygRFKdM?sv<E%&B4=BU3<ZIx~GDZ26l-1H3D`)@0h?kISVe7fA;aGhieqeN@@D%E|
zJ%^Ra_{JT`XW9+Mxfo}K(P$N4qm_s=2C+yGF4xneF^<sag_zUf90%yi5xCI*0s?Q2
zU!u4L^*3PmCsfBVY}x#QYa{?VZZ1<3H%1vTA*0GsUoA{R*^4R15|g)_<Fr(J`AmF5
zNIGmP=vD^9ZI_h#+~s)Qc2|0YIemy_$>&Zj^G~=TR5Dt!_=IrrA+t`X{?FQ+Qx^uG
z!h?8znJu22aHHt+R6LKQNelwhL1Wb#k=&^28mhJVevY!*qpx`V5k52>_XyESwx_zM
zId-~3)p}&F2`Z%$pw?8T)`3gI_@jm1o)E{}m{u9t-iRDQZtw|HM2^L|P?GgWrHNg^
z`>>KCSMSlytNTW_(?$!*g{|k)M(-`kk0(~@ix#yWeRofVhjon4Iw1LUP@QJqUXArx
zR-S)&DPIi;1JjP`CV)%qN^`I!J1Re{sD^<hwXodB>FdcrK&rX2WrUZ=p7Vfh`}qY?
zQxjbn;w|O=Jgnr$Kq?v<mFXqv_shZagZo<vc4vN}E9|P61PCE7`_??gp@?>?l*wfr
z1`}IRz%ljXg8EAdWrXRSP$lP@Wz|$zIkbFb-Lkc`^v0aIO5MZ7Ww}J!i$8E?1-HW)
z@m<aiBZb?%s)I6L*gV0n_RKK|wn1EgF;pGKk%!A9Q_@VNsF(!Kt4^gQ9IeB}iuv-#
zrja<McYf~%Czhm^bGcQhNqe7O#S5Z0IIce`Z;1&H^D|&VC+I~6@6W6Up72h68UpQ7
zU4O6z(9b=h$F~eH;%eD?^2Vq#01rFnp8ccM@-7?z%Sl4~g&CNXePF*%Ks|Igo?{~s
zyf=ZkLHOTLKn^_oH(BAWl8qoarU1gXTEZ=CX+oL5+p^4E(6bzgrqLt!#q!<V8fBQR
znRKC~2&1-$Xp_3V@uHw~IHMzDS1tmzmZ$i+^wqcLX???5A~UX!tAnU8I90?LAi9u#
z_3`9XiHLbw7tLok&L|w-A6-|Tcx7Sov;D0{;XPvA75z^a9Rh#lold>}_tOczN9gCc
z1fC0r%vx5IJf(s`XpT4hO+&>>^&ZWp{cf&{bt9V|CY9E$)TH`dSd}*Aq^_6{+}MJ^
z7pYj&7jJMHP5kFjSp7cji?`AdhWTHp33KK5Lbc=Geal8QbPm^Oec*+YKsr5Mn>$w@
zQiKq>aObnGwkg0Y=L;1RFnKu<qUGoujPX}SZ*l5+gQ9@nryTknjw8hehk59?LGX=5
z)+rb8GU$EBTRL8_^ffu2_^u~%DmI4(`1dm<jIXfZdZH)2ZX@md_^7XWlhJmL<OmFV
z<y!Cvr7xOX`Zxv!&g{|T<>yVekV=y@dfCS*#U!}<?&PG9St2VF^Vp>4$nv|cp%Aw%
zv6GOzsUjfI_+Ce@X3F`~ew!237jfp7anz{r`zH24wd9wv3Xp5qOqhL4u!<2Zr;0Ml
zq|3Ny-&cjC;V{G2+SK#b$~0u=Dv%}vUfIVccBv^p-^EXIlKi>+d>`p9ee%%8yOcaG
zQiWt;xFcxS#-Hk1tsmXrvcJQ_T1#iu)`yJz45+3sT$NuXzb&pcaK&s^6rt2aXe@3N
zaiEgUZrVn_I#9U*i?JiZlal|@G;B}}y@ik_Spr=s^aV2x(T+vWPa{kU5AXdOPx{!h
z90EWty0M3d38RD>E-`3`?r>r+Q%Am~SZTU!DqSI#*l3~23M~6yAXUFtaeEA^I>|B5
zLq-<Qtw!=%XWm|I(e^hAKe4*l^5z>)sQ2>XWqUY;K3r-*;@F_{TY74u3hY4KP?!nk
z=nOgWG+$NDv$IQZAQY4La;7VAu@0H^&d+W06B932!x-Cs^;&xB=#x}hE;G;o0t<Zz
z!e!A{mR`uvdx-jDz1#{-pa|Dc=_x^zN^GV7?#}vbEY{%)qcX`TM^+e;eqy76Yyle|
ziAbit9WG1{(7b~KF|&mu@0D7xsfTpSZ;lccmH`o?hAoIHeIRPeX#q-Z(9UEaAj~38
z)5v4&Q}2F$wHj69xM|11^ZLSDHtw7A!j|U{*98xEUyBN>w40T--3plLM&5k%IUFJs
zzA2*Z*t#OxKBIVj%dfSOV%`r-g6e71EaF{ubq_7n6O^Rx2S*~7mZc%I5vSo7>fz*8
zCKRc1Nx_++xEH@RE)?xtS#n76>7dgv_Q+-32A#o_K1I;@x?Nf`wwhc9MIi@Q{dV46
z*`-5`VEgw^F5H-EJ5oa@ng(v!_n064pLe&iG9W<h<wkn~@>Z1mg9*C>bd|*~Kb9Eb
zwmQQrh9C20Dg^kQ#_Fzo4(PP)z$?j@_!)(&3!+&N!=F*qzc-zP@7#beTPqj~J^C*|
z*Y2)WOT@nIUgqjvmigNi?Cotj$p@i@-1ml+&c))mqvxe{-AH!FHxWnwfb?hTy!)$v
zg*h$5T${G=dq2gnALuBieu$Y(7ui0U<*$G5!M{FLcmn3&Rvc*Fu5sv>=P3Zw{cm9;
zypcNEZ;R$Bi(aQsyQtJV!K_>9H?oJ7PUWwjTaYJwn~M5n!2Z^MkYhrn(e7qRDKVz(
z=!}c<YS!Uj`=q0H0fCKECuXN16xA5rv5G;#7rJI~bedx1Z)!U3qSq<lyU?I@No9bu
z5G36LnOM!@_rUGu8x#|9l2e7OGs73cac{A8klk1ILGkqE%YPzHzfG*v5`3Zhb~pbB
zMaV_ExmW9tDj_9_usW}+?8YOgRJd_t=?;FR7RyftH!jxw)Cd*lP!&d+n0(aN!OW#5
z+4|?}(9h7wzYvXbuQ7F}-&0*d9_lLJn^uds?9>joCcOk}^|A(}0J0qs(i2H!QdO8p
zEW^s>{9JXTZ}X4+9k2NM(qaTT^+{}6qH%8T^MZQH95fKYD?t3P#>M%i5MFh5Z~HI=
zgAVq#uKs)Z=~8BhbWVCITfMcbiF;FAQa;ts`D^vXG;g!ARekEWsjobVlUJEm8I8=X
z+gN6$BRGF2KO4*FZOm}e^;npZv6poeLM6Qm3AVM|ruk!ynE>Z1^6}ndu+Hn66R=w(
z+pe^cK@KGu>$f3KlTT(Oi{>v{>ZL~<2mzgy6E?9XM>%$i>ObMX=@4ZF(e%*!NpR{;
zfwLfF0i(O#PFIe!y_1PY$p`|mE>Vfm41ArESI(4Nt%W0WzgsR_C>$4$;YmJutf5un
z5d3~8j~*l3_CV;|D2H$_8YeYR9%70=QKBJl$4BeZLPWCn#FRuZN;M^H9xPd|Vqd}g
zBKUb+QR5@F%^OIaTV;9dQ%(`gc2V&i=BJJD9OVMmG~bVTzod`6<Z;MH8ahXf5uXwu
zDt%IE&F%R#tMV%~_$Z%>pl#SdrW#SB&gav@9?O=hr-A+8qxi9tq$6w`Rz>b>)ZP9V
zV+H<7XnrfW-Jqw_;8}&a2SYLQkE3i9X^Xpc;}=GM_k<wZXPq=BkR3T!-%^n6T$%va
z&?~lk1|r}!c8CEq@Em*d2%v$7Ty8*PnH4bRJ3W!XT@yK02a;ILRTrVDFH-64vwxSQ
zzVE<f{lioEX^N0|&IG)T^y(Ah`6)$1X)6o7z-&d!fnSz7{5RfOwHtauIb-KF=&Mc8
z^@ND`H&!w^rL7aiT*if8EzoP#>9-6|vN)Am$HP*<d!^PUeSFv7|1x=)dvoY=vk>}*
zdBDtq(8;~kaIf#R)oxD}9);78_LB3xxMiihEuDXA=Sis;23x3{pvJP1>_j_^2P!g*
z#0kF?*qp)nZB5h)Zu3BGL$Bu^cpqNAO{56n;%dDZi@uOrdC)YsTMv(+>i-e<m2p*V
z?Y_hUK|oocAfa?9rF6G+io~LkShRF^cOxAlC`hL?2uMkZbT`rh(ja%tea^Xc&U=67
z+x@!Tz*=LDImYw+>!D>vy$iOL6;|BMAZg-1bVI0gUIfh>o_VLA`hlXWaI|EAkXscf
zeZgdcRY{{zkH?o9ojzlW1Kn$z*x%<R2V%;FJE%XldnZ7)v+1H(EhCIwGFLs^s11ku
z?r2UNWcX;!6mZHep8F3iz|&;_`ZOui0Mgp$bv49RDEi{>{kVfl{^TTc9+c$1UzhOL
zNzmd^>YzcfH@4rPKo$9VD9`T4{y>pQz0ZN?qE$g(**;XlB9Qjlfn(^>2p23n?>Z*=
zc^ok*P_;+W8Di64gJawhrTYagDl|II>`t@x=SY-E+O%@0A`K!KR^Kk|3lfFHDUTn%
zNm9^4!f?ggp&&;a=jp)$n2YL+Bv{lalrujXBLj$_ETeAPt}=Wn`0#@H^V>bj-+)(@
zqWvd=AF_c8559l5f$Z!@&fHs>0gc}i(D-Hb=t@686Y@uaYx90Mhvs^68E-GP`R24W
z;KdEv0d5WmDtE92Il~sD!Llvmr@3IKEJ$C9(9jkF$E|pM$S_Hrf0qM$#~uNO`d6+&
zUi&#c08LajJ*YJ^PCs;Z(8+Of?y(8Zq?RM?H~}YHbfa<{VbD&Y>IVgq0+rdKbu27i
z$Q;tEls%L!tRdj|7?s52d*b^kCMg6i)>Ls>W4|D{0Ju6YLGy2httA|I=l*^&6|n=k
zphQC%)L2U11iIO9RnYxFt%=7sCq%VuWB0u<Ww$GnXcy-#;M#|?n`ddGc3BbilUiK8
z-wPJihN;Lo0_h;IE0`)00$r=VWQW{C7M8}DR%R>^2=Yy|wb6PCN+VOA1kkbVj=Zzv
zuETd0&3DU1IpVohfJZOgEt+f(7WIG?Dlb*(09omNI%XX9^>%Uj8?t%x57E?B$-&hM
zSV?&P%<O*GFN(R1t{?seER!^%G_Gu{>`(M&R3iQc*{3heA~4cEJT*d9=hOt_Ht>tI
zr_~onVMcM-D9inupc0Kt=L5DImaRshlAu3O@UN4+mxL1G!i*QQNB-&A6}!lIqOesE
z>QR{>hez@*_0D)7exm*UEO_*`;GSZWZ_jIu3l$3sv`G$6;#?~pdwEoUcie0OXm$O)
z*8lo|JMqH?80|^y2oR`L&NLtUuMX4()xyS>Y;R$1#7?x<g&(;A1O{4YNFupd`mU(g
z6W7uJt0uA?Bp~S~Vv}hSzcXG!I@9ikkl9Y{8`ZXfGJy{iRa|#e#`E!ials(3aK`lA
zt%x-w#|Pf#obg7WpyHf;*@ojUP9w@qw`TQjLat8dT@Ra_4s>_Y(kLV$&3S8}^D4q=
ze>W{MkxXjzopwQ$307BpnXPhIdR*Lz&NuiDmZr&7)^bv?9gE|0!r|94lP*@_0paWw
ziF%Q;b(Y`a5w#b)K(ty+XM`YDD<9&SXq%fH3mhEnHCb)Yai$L5#30!}8GA&&(H}H?
zw5_J&PUF`fNn?0p{20jM-}%{m-0rCMM@nWs#iBZ>{&adQj?(8V-ErxUP5u_3-LX}M
z>aIfN9Jwx$0VuL;tYoL4#);h{LfvYmeeo;^((*JyLZ1B`^b=96eDQWZ^CIUYq{dg!
zAwH7Dra7J-AOeyY(kf>NyR3@@*kyre?)XfgET*P95(_?{B*i<BeTOI8hPB!m@>Y0c
z(>A0ZO=I3U1NEIHDREyi`3~*yeaoa*#hf)@ugoTtwKhMU4#O`49&^1Tr3ZH3RR~^0
zvQBiqh^#9fm72jd33%S?P-=O41sZc;s}Tlsdbm=s#l)3D9%ar{s(6c@kVOw(+eo%r
zW?ODX{A`^U_w%F0ngV4u?RE!fUQL16G;DR^u5~?Vc&3r|nUx<mygz&%-m=Y-f6OBQ
zi!%gia#k$Z@=I#wpxTU;nG)PFNJtx+OZ;@7RbXuFRXO!<a9dAV1I--u_~RMyN*G9V
zOyCS`sYh|LQmN2_hK^{<x{ysg7aNV$Dun-496@xIi8jVD(Ee4%L;=8l4s;N`P=W>b
zqStYlUBOVM0v2$NBjj;Ir(VlaAA?#d+$0F%jf7LMhq+`lr0X{>R_5d<%@>Tro9+Ng
z0rw=oR$gMuzUmjHu6M&EsB(S~l!3e1pem5qt+?Lf3&JVI58_!&f4nxG4a2D^ywiP8
zVApW+y@Mzz)9s!~Sf^DS?n*q3v^GJVI>M%G57KjPybkl?;y&iW#dV;h@>Znu2d_UI
zwU-WS3See0w*La9FLZqUb>xw4kLsOd*T>j)Bu9hTRLPEzZ2<^@muz48&_AlH!H0?q
zV>jPRq-(S+R)?v`|K90<GU_T78x*u&BPO)1N;U4Mc!*3`lmw;?1I+I;bRuo1P1ehY
zie|nx(wKgH`}eGE&%_N70qNi^ZKmQ@7>lvC8TWt9|9gD7pE@qItKvRA;<-uJx?VNj
zCtQk{aoHx|Ap_$0JPrxtYacTv)tjmnD$$T{Yki3}JsJqp<N?z!kvuD>o=Hhg&e9#>
z&=g(mj^sigWTC8ReGDmA|A64}z@BRf3QLW)1gGp|@n`BdSO)DZZPqdEsy2mUKomLl
z74Yy)TFcz9Y<S8*LH=}nbQ=2g`XAAPm`6L+T~H=HCj$Dw*b)MxlIS2&RrnI3<Cv(b
zZyH3<S@&sLHXAp`GGMbCvPduKCg%{p6dK;v1V^y{R+Zsu!2Se4^umH5Z22bz<y5C8
z!q00x6$EL`3a66dqp?sv2ed=F!W3w9pVmEGSofBFW67{-dORdh&hoVLrc?Lh1p{0D
zxNSTf7&6wXKeS?_8t+PCY>_fhjFm-A0PU8clF-R(iPLGoA-^VmD^gsGh$hM6q#9wu
zqK(J0F)d+sL3oJ^JBgp}KqXBO6I&?EYRzVB^#Z67tg)f1q-=P(H^~)nq_SDV?q@oC
zlhW<O&j^-BujMQXT(&^NXmU;Zj~$9mHx^V~65MX(kETS4_^3s8><<;bRWi%RqU<P=
z(?*wUk9Jt7(BYsWB*O@EINwKuc#Elo@)C?xLNY`(vx({AqYU@KrdQ@r1D&q+(I!~b
z2!0L_DrD9KNHDP;>w&cV+>Tp7q~^Thv*RBB=$VZcH4H=%JczF_B0xPz145PRX%O79
z=gLH+`9uP-R!Ujq%^!~9(6vwauG8)&H<OT-`Li+9!;pZ6*Gvz0(k$JVJx~KL8FJ~;
z2^ekL*5aGf_wX3DqmW_e&^v_v^3<P;WA_3$>l;)k1d`>2cnhwK=Jsn)<s{?xYQKT{
zntNFhG0`|Pz4FxW_(wo}R!yf1oP!s_DR+Nd2&!p~(b&^45u^RRiAx?8Z@`yXnAMP(
ztZ?pqBr}<;q5bs2U%rlRny)#I=kt%L?YLf)0iBn*<E;<f+bUwrr!>RFw=)V5jzsRw
zS-TlDhUJah&oZX>XT#t$%-E5gOf~tnaqB?1x*E$xL_#r?gu2*QtlQI1<`2PXt)0j2
zpjglr&O}Sn4p9<#hk{Cgqf5UFavlwSu0lk<*QUVY3ox8X`g3l#$JZ%p19>~K!bl>2
z(7i|{blE#6Ok<8F*A`yY$ai!n_l*dBz^RIWUfmVMWU9PBO3*flCK-D@YX9fg55;u}
zkn`cFL$>{;?WQ!kF_x))0i*>=2+<Pd#g%?8*NXTqnKgjzpYISEeNXvDfGE?8f3Gm)
zn(8X>?c&T}!r3f!072dfc;9%~TF>85Fot!2h~r+S{{Cpc??d;>iyUH~%`Py|$!<SR
zc28|+lw<#Eh>42xJ_W>Bu07CcD)S~0YQqS)<`to?*6AEyv;xwqc-%v9{_6gubfkwd
zQ!q5;NLA?$F&j?`7KZa4sf#_@GpM$F?}n8dg9@N4ru6{r7vo{J7+ia5^e|wd_SG84
zmYHh1*?Y^S7pfBAHxJY}gMgx+b<$E|<+7C+Y*AePiHRYB(iUBe^WF7PuBhuQoe^1b
z-%_@bcSx)PJ?yg|xJ;jAnnqVTVdeLwTF_F+dA}sel$`2$sGS4YjuJ_@^P4z>_%-Af
z>qBSWJ236!N8NmbXat`OczQMNV8m!|i;0^DO_a?&hbP39g1NnE?f^->tbrh*44|%=
z0k%2raD5X;UT;fw1{kIOa}zdGZ@BqKmG}qM&*b@#X&4!_j4WGxMg1;g@NcRsWR`dX
zPqX5_mERqT0!`#%6xLJ)H;sR6j31pJlORL1T>|>K$XanC$E0JfoU3|mcYnGu0M%lh
zQyE^0uUWmyuhE-^i{2RSPF0=~wo)8}nKxH^Z+!(^Qf1L{InUnR+%2-b1ye|YRh96y
za$3}gHkDE0`OgRBn)5qlmNdkN+O-^IheS{OIrEFjqC_lILC$G1Q&~MVH%_#-U0G$l
zAZUnjVncybh(BCZJKRXRu5n;O>6B|<3%@LevUoD3#-Wop_$`zO35fchga$}nXu?EM
zwPD&4?w445;B@MO7#X0J&#nw$a~M+ZCi(Pe_(i5(soB>6is42j*`}0r{?f_e`d%kl
z9M_GKDzi`bm~)_T+`YhOiS1Lhcg5@s;RLfb=p_KWM8%g`8B-cn#9*Kmlb!u@7cEYX
zmR7VqPjq=Uf~m|LTMLZ<<mr&#JRnXY3JvX@)NhdcEO75N{HC$OsKq4iP<9HEJ`Vub
zN~IrN%i9MiujhA4<)CFKs4lGp%OI6z8O^V(`Um~K;0P-%131i@o=%s@U{L^~sm`wv
zf)`q`XsMJTJxPY*SMf?lgeWgbJiYRYp#n7zG%R*DWfa^9!(Kqd=<TO%6#Q`aIEc}x
z6`*MSSd=@1*ICJZ@JnBrSFR7N;|-Op@Iw*VDK$N8KTmz6?x4QBHXty}5(?z42Lb|6
zf!BHe{){B3V11L-%>i094`9{)P8y{3`UdeselnyxfgE)M6O@y+lbfl=_c$EZWQqk}
znD#%o^2r*bG*B&5wFt=%vuL8G`3?F+DM{<h*uMCRIT|SchVwz4q8fLoXalHH)cJqa
z_fGm>P6z{pX|_j&1<PLnUxwOvwBJ8$%J@)vPlNrZ38c8E8DaDwZ+J-*{E}K{o^jNy
z@Awrjm=`NdIR_$8HqP2^TS^<?Nbrg`>q!bcT_!P06-!CLFBC*WB$nM%snSm_z3@#T
zCP^zv95wq&&DK+`1Aq=0jb57#{>=Es(?-{C&qZ<ojJ=U+9AnW$sLt@3vhZ`slaW)5
ze9XJyqAebCdro!R!x1hOKU>$O{Laj`+r>LV#FQbL-W{Y)y}GY*re@h&Bk>jP26p2h
z2t#<IJ_2~Gwm(}ew9M--AelZZz#SdIJM@VZ-iI4k?r|%Fk-^Q14Zga(L*Cfnf|TJv
z>Wj3=K^>qrEzNu$v;*4m$ypRjC-GcG*a~fkz3A#(JC)wpOT*&fdqj0nID90uL4CH-
z)$b)jwX2{qyCR6m`8}j6zrKUIW*2`p)R|O@pcv;wRK%6Vl~;9R4S*-W-Si>tb&K6D
zE*hdQY*eSO-gzT)ixIM^b@g>1?RER|&HVFb7&*|uWDXt(>ZdcJN}&w+eZR(%+Cb-}
z($&Mq{vbtUbPmXkPK9Oj>fSjCriUg_7wq+ifK#q2>&3~JP5fSd9n7LTSd$R=w^=dj
z7bL2W#T#x!eL_iVKS7*qje%apOX;PZ{6iOnAPfMtBoWRg`*&vJJ`G_RY@jxsbN^cA
z5-e%~@0Bm;xI=3rv@sem{iY|;>}#WFBGLVy$4&k;LoXqN$!m!qj$x!JX#K=r?P2-v
zQ@4Bi=pRixc(|U;ZU6cv=I7S}Lge1typmnBvL7n)NVt#2_z~Xg3mOfQzw>3XWlegp
zT3<L+J9Sm(39DoJm&i_nX!Q&cAev47s*L^+4M8cI|0D*PIb<4_|Dl{$e<r-6VB;7d
z$ndW`CcXIbHR8N)!3;Aq@aTupv}>tg9t5bF*%p?<wRZ!mxDsi4_^5Ooug?#?U8ut_
zx6FW60v*jA!?~jSE4FTkvl7Wr;w3odqD5yK-An((ZlWI-WD}O=e)9If@Mq@u{<8J7
zU;Vv3r??+~UtIZ^bT+@U<*ebMk}H7JYvyA>h4@kwKuPeSfJ+rVSlFA;HS<v3qC4r^
z9qRiIqkc?5C~Ih_WC)WEW?+Kg{_xNN3FdQffh276;J7iBU{Ncq{L-Si8P&gU*~S?a
zLn(*Y^(Uyx<DwlQE;G=p7-)qaqkaMwCXslhwKVEiboZUcKjQZiqk7#z^z%>blx7oN
zWJ}lTd%>#8J}?s$op0(2SyTAekLvB!Jga@@GV-(aC4_aw4~1O5!iF8u>Y>K<QjY)n
z+*Ob-rh)4Z#c-;v7=k998$$QKsf^hLi|$}K1vu%w38yO=Xh;<3LG71*_XM?9Q`qF|
zvq%OZpP3KGSSEWl5oG={$wN7sz~)vHDaw$I=R<LlX?_-Eaa%?0S8iKW@UEY>ce+QE
z_7$QW>qOuw8sZMKo$laE*&mZGFCl?={BX?Wgd^7PQOP>(CuwE(nE8l3_p5&RJ&QqM
z^XpAFd%QkW{RTv}Je1b#Bu4AQUlOJzjE?GOwr=-BJvAK#c6jT-Jeyk8NIxKvP5V@$
zsrUG$6y^O->T_=D-z$4H{V(Yu$%a>VPqM5%SEF}uXvuGh>`YQfltl-khzWKxL&LoW
z(ba1a4`3TWVmp9N^oZ=>yNnAHegh*?`$h(G=huW;cv6i}qh<9b+3O|d>qj!kHeve*
zP_;Xvk8&|pF_1sG_mG?k>e{zyMq*wC(`dOPf^?lpN)j%|9x?!0HWp==BvT`cJ-$3k
zT)PL3c0tpA9O#NE922#^xJOV$9dRV$$}e7t-y13D@L^0yz2vxb!32mHBxoP4n`&2Y
z1eeV0!N5T!;9nJ^>JH<VZA9Z9(z680MPSHaLt{1s<HwWb*K#FHf%mWORp!5al(_t|
zbSw=$F>oN&1d3)OxKA#FX?B3Bl7lw@I;H+RK||nIJu@EjoD7zF5G%T{7BBF*S5wcw
zB}6pAb)C6eVWSvKKc+0=<_`y}pp;i?tWMys#IhuA$MsbG$l+(MCk&MuQ$Q?C&bD5f
z^${Ko53Ive6=r3tQfx$}Vga+G<#Wbq1y-NA7lMC=MqU&;Q$j&AUgGb31-4c^h93*j
z+_j$>3(th-)>-L4Q!|=;*}CDyKJL|nzm&bau$2Ub{BxVD(8Ky6TDvOq=}C1GmQ$n?
z2D53)yQ2ivPfRhQ0tp{W#TgowHR6Bzw8Wq|@<u#Nz}i3IC_TjXe?!ym7<S@cZFWl`
z;^VcAngw#~LI$|g0M0Zn)m~1qLs&&^Q0%Su=kiy>MBmI#zN*g}<mr4SGAcXfTv7u6
z74inhA1AK7(X~oA!;<YjWXtpaJo5mpI~BY(3(v8ge`@vGRQ1u%?*_y`iYjrK+wzC(
z;sag9dMlj_S=)M+^O|b#=fHAf0nDoXvV-ULmi0J#(cVP1qjcoct1Ln^Jqf?fkn6j8
zfd+SmNc<uBT?h(ol=~qd5r;3rZY}iJtfUPm#2h4Z6Vq1yK6g4;b9*)J^byqmgAbIr
zK#3eE`pHCG6Li{){XH#M@RLs3p!kALX&?v?t%N@Uf=)5+0<vz7!1R%bN4uDp{2%}Q
z1EGQ)`;UnQe7XmCs3xzot}Dv9m(fzIC@o%6KHMuQ7LkQ7Wn+13UE6@EEwUNrRsvr0
zg<tq?U3H=@kW_JmU{5-tX$)!*qvm5@JjWIUDzFV`r~*cJH`!Zq8#Z>XhfXtESV{L#
z^(U~#DEsR&=r1a4^U95J{`bEP$Yk(AL=o`c4{5jIbtiJDVO}gd2+g*i)cEhdW1YP{
z^(<TwKSG(_mgAN4q_6S*1nJhE<~wKhQ!OD|_}Odqxu4;%m}aQ&6a4TC7S+KzXLGXQ
z%HKVAS^oJKTKfO=*V3u+rAJo?0nGk5W4{>h-&P@45UeT7aKkHQ`5?dIAB^m^k_?J^
z57GY1-|!y`_wOH`4E(4Ky5Qb3*#G$tP(IGT`guq|)p+l}|NrBo*+5c9MU>j6|Hp6r
zPk--!8Ug-$KO~v~eHB8{y??@w{?l*!FBTK}28W-m{;PHVS6^;~9x6FuDoy{zM);RY
zCfU3VZrgvgn*aG<YM^6_8+B~|zyBjx&?CY8SD*TSy)OT2NB#eJB;9~0A^r#qrUByr
z*W>U{pl(n10jVhLkOD<kI@k99TwwowK*-l#ZBm{+@dA|U253DMoHBtDd~g6DCT742
zE?d*WO@O8@3AsKwAv#I$4+5DrJ^<7TW?){sVzcrO9#|j%qNnCSfxH1IV0I{M2pW_g
zxnOgm4D1*FfC1@*5(PHEU?*-csx$!sRhfX}u+`9WR<B-5ke_L710#Pnri^2`+Ahb-
z2O!DA;URD{`o3Vua!N32u>-GW8pz;oTlF464HliiGS?p_Rjt)Dg;DP7d!LMV;X~z=
zSy{t|TyqQez!QHcgcPcYHvlqFy>ki7KO-doQl6%eVY%!7081f*d$5eJ?P~rY9azd(
zzh%$721`eQdj0i0cxM`z>=!ifZ~kok90Vo0+Ls%;cUm2oe*pP94L=9{jSbM82=A1q
zaatr?0ElkTvbfYt1bCsVPOjd7+_3UNh&8z<^k<~%KH}yrA=pGVXj&oj?(Q=nvh|g7
zEik4`EI0L1N2UVDV;3;1-ZKLd1cmVsZX&dn2oyFD-gi$yNN}Ht-k9EWzYjo$6;40a
z{;|LSI@bbxOHxP%A^pf2)VUk)bZlj#!(G6JUlJi%1qoGk7Yk>tn#3`n6Pal^2R=!5
zLVAVgAVlJAkL)nW-<o3vo+ZcyRCSZD454^i*~X0<L84)z$(zo4c;b{+Ff8ho$LvLw
zWpe<yOSCQ9V;46BLOCelCkro=_Sg3>&`^>R;MpU2EIWwhKY@P^;0CM7Zv1kH=(377
zX9H*W0LWi{bHj78+kwNz3Hn&&?=tK2KHnezj1i}l_-B}_BT(_B19d@@ZE*<_fey?<
zTT;TKxL?`q)j%KSbu{Ya4P<R*Ak<`|@Iwqc`0QSTWpQE<Jv|7zo;PlB%|HTFsdyg@
zDTU_Al7kAt3>1SF_sRiMIZOo&;+`&n!VuEI1>HJ)4zPYl4sb1HfYlh5x`jfrz3Y~<
z8v$^`0@oqBTE6dVZl?kbw5Hbo?l3ww+(GlZ^sV>zC^8<_?M)zHwdP)MGZtVw-fcg8
zK*~0}J1HAz2MO1$?#U)GOqYW~(gpUJzdYrpx#;$rq~8GPL|+LpaO8FgXU&1kk^$e_
zGv8U1Y>fbtNWN`{lZcYFA0MQ3f}=;m8CPm`H>-#}BMC5u?`&pJ8^`XtL8ZwSoU-=}
zC{I4&%?W{thk2CRlLTioK=x^D2GYRBqB%mDfwRy1bk*kusGE%w@rd-@j4-x8N@)l@
zz(z$I1E#5#Lisrk7$y*|O+jO_HUPm^ahCGd8oh_Qm>ZnDOfG-J&F6vP%~Hx6@KG6W
zeXnMJEcTjTKtt!iEQ%LQH0OYA8M7XSd3?nnPK6Y~#=ijYicApMe>B6dq74hVPhq>>
za2;&L`*mfc1wbEzz~-)5Jwo@}2((%S)CMG*;5l1CV%KQI4_%9LQX4i<s<Z>Qi)CPX
z<~P%-gk*;fYz8R5fo3Mgm%?Sr2+wH1cz+J$*$zO%M`Hyn#~2+!-plsuyZ+e{xlzX+
zPtH7tv$XIEnXtftLbL(wY>_d8GbtrgO8q!Hf6cwNeJ_9JXv=CMsr8#2iht!kK`Ugo
z0l6DL419o{ECU2m^mNF8N{ky0#RFOsxtpIy=o}tC0TxgV4KB_YC=sh>S_fQxjj<^H
z8%aJ6vevNsc@v}M25sp>Ks%rqg5q)h011KPov8RGUED@G^AygfLWSGfx>h`}|CtNW
zWpz6F>b(vvfiW<-m2T0f28|)3IR1PGqvN!X6LYTfl|Sx+snSm}f)lw=dLQ}hSs8H5
z<%4JJI5Ai(L^Z;VCUW^Bc7V<2FtRWUL{s!n2uD`~<?*0((_S4HDjsicFExwKJdWjF
z{kPDt$vQR~E)_ADmbu#1be{_k4ZCWoo#ZAsvW=C1vZb+bozE2#LWazY18tKW#&CLw
zE(WZhyL{8VO>^1HD(Zq7e?fVS6;jz83l&aYQvCgWhpFfaLXeGnm}mV}euzFBO@q*d
zuM0bD`AloY909j|qh>cxfp$078P+i%G&w3oW|QQRonN;D=p4dXl0ll%9GX1@Gi*rk
zSONWZIV$%YjBRDBOSEGxam@0)gCTj{auT%!v!o<Mnf$J?URxhFsoz><+#RxJaqi~^
zbZ!F)@~qSN*!fT0D51zcu37l>A0Vg`;d#(UplelHYrSkqOI$lLglEY$hlzH3)pl#O
zKAOoztv|g1j)B_5%C}%*@9V8?Mw;4XOYv|wn^m;YvgDVPxFH=c6z1bRFjbqtzRds}
z+|VwtR&Rhm7e4>m5oyZy6i<)_*3VhK<1_p#zc_6UU&d3!bQ(^aG!h}f_uc0~ODW{M
zuOmw1;2Vha>Go*yWt3)VyFT*943`@KJmU0h;N-6$(IKp{rZ!&d8Kkd_%!MBEFP(}3
z=w)Ks0qbuCY^2lzxH7MM$D|-pUOG5;CvC;^<WjFUPJ^SmK1pmf=G^QeO+x{YTii-S
zd^l5`;&R<C?ygcNC*EB3c#SEc%ShEF|3e`>BlO}vFTC8HvV^-@)DHzltC7h?qu-Fy
zvAYLknIqtC5=J<EGPVhG+j}&|uM$g*;@=23JE;#>U+?=E`doZB;3j98{s_qWm}#)Z
z2ccZ1rqYd}CoP7YB{5VgAKmrNJbPGi9v8lZjV>s&Joz|m3nltzv0*PgJe+x?4lhUO
zIa@N|z`TNq+gg;6-;pnf01OimB1*FYzL#q)!Dm_A9HOpgcOzi;XjpbLAdRwR20LZN
zN(9qPZ9WIV42T(@s@N$crxuEuxob5tBGZhpC`jJT@l10Y*PeR>FE_3K&arDO(8V-Q
zS>OZc$(YfN3o?=--hgWEe4Y4@4)iNrRzc(X8dx<eDwmK)A0&|P7vc<>NK|iIz1suL
z5`ki^`b$`M%wruj<Hxw9eYXNpTGt#5tsuwC+wkWc;v18!@erwh)xb{UVzuxw)b43F
z<jm~#e9G~$gb>7s)F8+q8V<hl{=-i<)#51b-5@e@mOFIpIqG0*G=<2*?<c@xDW1y(
zBR`Dr;z?LwO&qSBV3ZgZSplWYTX4eNx%vdrs>;};@bc0uiam8f*ple)AH4FfTn-`Q
zvVE-~w)t<6Kme#;`!Ta}f^zN1+s%?lH<ksVEUVZVI{htOM&fn)8BiAR)g1s`-lx*l
z&Ol3WX>t$x)Q)R#K$1ov1vb6hl`ygx-Bsq_%1>YX3l>D`Rp(gOhkdKe7VsKw09HKw
z$s5Udl~7x7eKx!)?k<iL)ea??<emKR^X3j;)m*QOm6=Sm0~=&k-bh1czhF)c#UnF2
zxU(P0iD@rE290z~6LhaCJ;8k+So0I?vW1!=;MQ~5ABJi%{*gx_6jdSa?^J$eM}_ch
zq<%2Ufym_yGTZvEG4a-7yU{!g&zm_ZFB(AzGsxtcmP+7N9B$?Y!IIFRs;kK_FNyWW
zp1<uLjJ0io)I7G(oX3|<b^NZ>Y7q$-s^3k9j7&z4meT@8zX6VX0@*vTGbb=cvo%?6
zz)Xu+bR5gttY7M`hQxW#tx76GZhl{{+ybK>2Acc84wP-0WwlLm7T;F4o=s26&sq1L
z-b=vBG%waZ0SG;f>n?yEd8(HEI9CD7BwCc|jPSt0<nBNI_tS^nT;J#ZfcTQ>lSSYt
z+2iNsEPK11425a@P}W`TP2&GBpfq<f<lRG>xVrkt_{o}H!Z5hCNAXEF7#B3DLX<#c
zM-z+VI20tCYW4JCyZ1g)F$gb@dOZaro!9=&t|R&@zRcqp1Vg0T;N@pc&r)hT0CcB=
z>C98_){O>T!R+{S@Lmg8-jn7&d#%0tIH?K&tfBrlUH}CRL6O;azSe>gHK{vN>JsOM
zjka<@-8ax$EY5pKegW*}d)1vS#oa>XT0b31Jh+{Hjifv1!1Rs;52KO!G{rwx^7e{D
z{gj)b<f_xlvuMwI*vn?8(Qmyvp#sq*TNo%^FHKPBsq}wdM%)N5e;4<Tcb`bU=~x^%
z_BTGZZ31GB45*A<_H?ec-*%;HPiGo{Z3<ZfhSCgJU869<E>K8}WVA(APXGg5(JC{M
zIt00a9RaE;dApfq4N_H0EK(W*oS6%rq{%P2fYOp(VA8tvV&n92f{i#0)BHtpJ;QZv
z<?OpNxtXP=_!~f$*BrzQWSm#s0VAbj<%=RzepF`Fx|QqMQv-%Sikn24sEdiM2`E_H
z2IK25+>1`5Q~E(P<$zN6BOEnb1xs=Mj;CcxJ=hGHh7^X;QN7QoWxR|X*8Q=l9+L9!
z8h1xkzAAGV(Adp8Xt(>xpHwfErr^EW>L*x&<PC^>%l9icaNAZw@;Br<-~5Z~x@5W+
zDsugv4@|P%uKOkw{j(6BmKNe>v-x+;^p9rSW%3?J_sswrY1I^8dEtA=A$&%a<6h}!
zZTg<mr~}q?dwV^6n+k?Vj|wAuiSFVRkz3jTo%#Y+#d+}aKg0|BsUX5g>@y$J;9TEv
z&<qX8iE0qr$mDYebkvu~ivyptCWxHz=5!)D1@g^CXCevGL-xqk{&~+hStv`V##{f4
zT#Nuxu<U*h@MMic{ffhV&xPFbPnO|bVAG5geRAxf^^<QGIJE#h!*Izld<QMKSMYbV
zXPjS=#za5FQGmw4H>ZlK%$r=3m!Ko%rsf==rAst%Yd+K!an7U%uD&3~{Db=;P({}c
zJB2+;Y1#hr_>7h0bAMM>G*sAcwEf*_v-F`(4ZDo@G0dMuISO=7yW->w!=Ffl5|f?%
z8ZHg2hi^8>PzMGk?R(l^^kgcS1#_^+W7!=|Vl$Og1mN}C9n|cVB+lCdj3Ao@4Xs2O
z8^vtF(bR2aAtO!s<|<!*v9HcQb(wEDbE8?A5GEFfK4gq633NZrZ^%L5H2D>Q1NR62
zke<ea80Uos_a4`s*JEYk6r!oJ>#@Ox3KJNt=Ru2ZAAegPMxSYiv4O=20Ivr@+!C$o
zjW^YE@F2c64hjLW<V0JfJq3K=fm}H^$P+ku=%_3)0X3|r*=DS!(e0_JOozu^B#vwZ
z8%G!n4dOM5VJKEFh;#REF%f;osYqu6W0oFLR$IJZ2g5=y{iv?-cHYE%^>{=x;s>f(
zifb1Y9Me|&>qx8nVA6^LrIr!IJFQtv-PvI9DzWcAHeBnCcM*=}=)Rp@{9E@iNTb;5
zke2tI4H>vW1&nP@d2m*eS%XM7Gpwt^vrw`smx7WYZ9>UvVrWogRlEOp#a~1Pygq$}
z_EP!Je?$w;U56RA?LWrRc=Bk-D=iX4g-sW-(3=uX@1uUJYgV}WF=W$GG#~8uk}_PP
zPb~NYEMUwvmorg>Cg8>o{azTkQZIo-%u@cFBjbeQgNHJNSP{Id>ko@?^v(4|(Qf?5
zM2Et|rxiO`1q^{YIo5{hDY#s3`lvhL`ElhC1N6b1Sp+@j>Ir*aylB*&i_8(q<mn+_
z?x5L<`fwfHtps8;1PwmFz|E=sOx(C!{u>T?epiCzzS_qLExeh%*3mdp=baRPApy=9
z>7C@ASmovo=iz0;)%9#7A&zIZC+}ATQ-#m(0$;C8Hxe|1e>T1TF@G1s@fQlNL4K0Z
zAOBRdifFN!)R<>{Y%{W8LzhOON43A)@XZ`z<xjf`53@kd>6lDcO2uIV*Po1oNM&8L
zW2$&uUO<<CsgSYF&bCP<-a%Yak9`dN-;G|KNlY<5-nQpq@-br1(a&h6v0*Ra)5ABL
z6l9|zTzuVv{pmAsrZSuT$NNRVgIK1V;+|9By)V+XCiB7d^+HybTI`wb<r-3}rcvos
zfMJCzl1a~|CKAPQ79>fVZBmf8u%hrfEp=nug!Y)tdjKHi#Gdt7L5#1v?RKseCUzY2
zOQ-rO?Sf=<eE#748`fnOs2Qv|OxllmqUy{SHNQ~yM!@`m>%VcMzQn#WO8X0{4Zc&<
zq5i;?eYHY?%W(AR!DEotINl~T&)DEfigDh6g0i5lnwJVpbZ=H_)eT-avLAF&8Lb5z
zx!<=jf!SS5AGbJ4aV0dv+Ycf*?SyWruvut($J9_AZ#p;S{O)tT@y@~dtF>^DX<zCP
z`$99g`#NBezWKg+pkz`U?P<8?B+=iD10a&&LXAGL{2{)Uu+*)cdL}dReZc6v@UZh-
zvV3?iUoUdg`(D#@YuKc1-eRyZ@@4Z;#}=~dH@ggd2}d3k`BxH#JAqCZY+0#^@z~3T
zVtdm2a8wfH+^>BqF59{KbVi&!T=&XT3QSF1`(ST70RCuT1>K{|e5#PpEb~GZ(t2<c
zX7q~cpL!Li6m8XgCF%P9F=GvQBd26q2W&d2A7IlgzUay)o9Lp-gBQK!k)`2S7BIDl
zMY+q+`TZ8ryHXOJXe=+@T<7Ua{mq|nDlPJ_!|fddlcl*-w?k(VTS1O?C|2}9ttsfn
zUjLApGA=NfpFnM>1F5Ys88HI~=^b6N=U20;XXrgj1lYgL92w5yJc+(NmGlC_@#<lM
z4LM}*gs2a7nuJPd+sIk?q~Jc^3@sBXlDzwS%Wc#VO_`o=7TO&)jAcg^&0KJViDn=3
zvwa4dcV9&wJFRihG~C+zQE58&(WQ*-PVV}+l2!j@AoKT33G)pjTNEGN0h7dv;@Oo0
z(m?3Z3BHVYbK^io&h;8Yr6hgYl)!%dhrxFHpa_8{Lr<#|C63X9vm~CC#WFUJ2<J0R
zo0cSJ#otJw<h6C1*~p2$*s&xxjC*H4^dd%9CHFwrF@|OH+@;4zAU|n!z}I*4!apFX
z+GV2eZzVX>|AI5UIdXZJ%DkiPZGBtZn?HMCFoI_1t5<UwNzF@X1Sxt>P<Vqe-%DC{
z?>GfTPmyEcW&pTJ4r8I^W#xQ7A>MNA_b`mhw-=))Uv%B3LB+v-jEu?M@E%WIeA;oL
zM$zvtD$_h_^&oJrrTz90b=f#VO?3HSh-b}9$s%l0!~2`b&f&gXhnTLPVC&-mGHee@
zJO53YLQ+SN94IY@v(BrtZK{fIDExI;t6Ue>jJX{MT2zgntf2=1=K&o{WDm|&&W+3N
zMjML1zFd-@yL=#?baBrig|81me%SvOza4hN7fGX!-Q~RMj~ilm8t5hd^6urg#_2El
zR)-&RlikpPG)~yP)JceG<pjVqmB5FiX4!~>x{~*{FWr8c^Su+PKrrG3bBNySTr19S
zj&S2Dnf7?ygMthU*etIx9QEE{o~rg&&8ydh{v+>`U>JG3@uW1*-|Oq4%!rxwnU)_p
zU|^GeK{2zJoi{=9#!|{%M?Lh7mk;vM`qaL+b4Tj;7mr$}bkOU~<(SMYTf0b^-J^|q
zJLc;2N>wl;?D~Xdzd@SFHZ{Bg<n)Z0+(1J%`_E+^URY%);2Y}G;^$`n!Nozao0G~W
z{EVk{WSCuEyO%>Jm>6f#;a1dcQ455slad=WgTKBfg}&<C%~%%-T@4>lN6PF35&ROQ
zans<X5s;~U#?ipcG%Eu(ZmvVlVZbgfHFl_~IqZq(?<K8hPbwvkHi<e1+zB`=F!+@f
zZ?zWB`7nh{ZpFT)DH|4EGYc#MzkDCXcESD@x!+m2!z5y7fcxk<Wuw0RT>#%ocweS=
z0TXLtcOTJD#<P28an<!)HO0=Gbw&$$i_LEnydwi6)A#Y8?*GNvWL-eq)HYj18X^O-
z?HSc>N53#n^wlt|)RxNY3LyPZlx)}sU-S})paiswT+92QNM`D~>GG^pU0`w2<0K=o
zQN3Z0j-LG5X8P2SV;|Mz<LDK@i<NpbI4oCw)_nIsY(dT4Aeuw-Y=Uow!_PqRHI_f`
zN-{f9;rsLMt0x7k;*RS!STWM44-7O&SGaHtUL1TXA<pEpaA+$(cfgwZZKWL|7yXcb
zkJJ(qBp|<LBOK>at`Iy6Gl+O9=tQtGzPB2HK7V_7z%G3cz=Xg5N*NOD$W45Na&_O=
zeLg@hlWq>lR5;HLXCwAyvr?N09KquwyL|BZeMoCMt<@o}lH&UeAlmr#GtqmqidsKj
z*5QOmkL||XTZD#aw0Ahh)YI1T)k#*RVRSLsEc5EP?{w7ng%95Bm=asb`OachBIuTt
z#PBf3k(CmY(P9RF?0Yh}#|}gU;rD1nB!b(<GdSl@S>>i~a#93|Dd7u_(Rnv+4eQ^2
zy0&+lj&RfHSJ#W&q}X+@SK(go^!oC}ILcL)9+*fGpp!(6h<7x^cd4=I_mHZc!3|l1
zrv4bvcIqFJQ&Lyyw@baErq^Y>s?{Z<f=?twgWuG=8FobL@e`4G`SKUDokPjI$cJq)
z1iaTu-_`AW8tppzD%S5CXn`9r*@*?QF1oIX7tUls<XN3{re%l#<#dn_o}wimVFeu_
z+8%XWh#Os8j2}M|Ke}X39A~oG2c=?m!u2$+!0(xy`CkF6mVx66<+*Z`rc0uQ$RwiW
z^XOdwRTLMI=qO|UFs1$muSS$6z67^Kvem5%+|K`oQF<PjNW9Z=vW8&<Er2&s4BW!@
zTFQ3Dgo6PXQz7$;jv}j8d6>oW{Gw?c_enQI@$*?LDziTm?C3lw@!}$F3n|&DpkVxV
zu38OB6i%!zcsXM&d}-~fY-Rt&!j(#PyYgDf7dha4TxMDTd;;#yYoxsuijT1%7(<Qm
z>tlm<jZGNJuveLorN<;rD#KN>--X9|p7_c`I%<*jVG(*Rec2#;DGipY?h|IFm2r$4
zd-93Hfk^ue!g?6Svf2qtpLX+PIXlRA^9cz&G=16JZQ9I^5C}-T{4De3H{~09`PWq(
zJ`w@n#*0@DAYdUH?UV%^VTDEvBE10~fR+ju`e4K&U=UODCdDr#108LUT+Za(^0~<<
z?TS#6px6hBo>z~=0C>fd1{fFa?L!cLJ<mY!gW}#jHS$kG*J0Wsqwf~&FA3<>hdZu@
z8SLkU2g4>y_LcSTwWA?6Di)Yy%%3}?PI#s%i8qJbdrkxgCWPVT2-QUK+!tv0;WFM6
z#TL@}bi+~g9!jzsO0G@@6D8Sd{JT#{*5i1FW~lTN=Nx@L3in(CW5N~hw76_meIZiM
z5z5GOAKdYwsPA!iciEWyX7B5G#(|2}iyHcF&FB6BLX(Nznn{;wGEqVy@I&20G|G`(
zpCeW3pQJ}$5ke~ui<EA&!_=NyZ0u5v#O<OWPxkRFIYNhO<MIoDYEgE`ZXh^q-Ch%Y
zoz*LkE=k#$-S$N8c*Klo#{}@0noZ#-2mQ?{4msb72!4FAEA3fy`|@!^mj>sdJ}F<E
zo`A&xGV_<oV|aa3Nrmr$+V}fgMw%Ug160|X_TAiL^NvKsQOgTweV;d5K}>HAv~6nq
zYb<1b<;F?TXK|q6#I0@c)1&kyf0g}b>quFqk|t<<@Qhg03#lhVPgr=b{FEhRMjvbG
zli1MAeU#s-%Z-xuMgXPSLD!IJ=1xx$KVf^IPufQUYPHK9we5(jXL-whsXuvbGgq{#
zYuz4}(Zb8sdA~0_s;1W0d*M9kl;9DDPYa|<+0N|2Y~{_b%Rrz$7m_BN!vdfqF*8RW
zMANf0*5-P-LHIH##aC52;Euq&J+p+q*xNz-tlw7^XGeG<k<xJwAC+vv&j}R{$dcOR
zbC-z$sy`sRSIYt$X`ipavBFh;^^;}7zWd@NW6Meid!4q+igA(|)Y?rFHZ>6_G5Tmv
zyxw%bEq?Me!DQcQPetSsP=s~2-|U0Yi_=VHo9P=i-QF)FelfFN=4@u>JneP-syS}X
zO%F1!#9>G2lQ*kDTu&t41XH40@>QA97e_~>k0i_;v_=VAk{hl0wp7Qe5j$wn!)9Jm
z4&Edsxe0;F&}ywA172=lm6ddmOH^OYd62+cn8p0`D%29+ejC@GS0B()8g)Ym8O|op
zOjq-&pad-@Xd}qRjdQzGq4=Z4i&J`kVAfbj!QOWb5xJxONviv1NnV&fp3(Ni$K__}
zFVcJPX5gyjZL|#ahW7*3Yth1|Y9FU?C6F)@iKEdn<<Sji+cDb%DwSBb6LiYC!kgNn
zoMc7JMvAbE5VvRv&eC#TyhGB7suE3&+_Rw?sWEIArBuU{P85xp!Q&@SJ=bczal$7(
z=#W!)EUrP?Am_~EhwpgVCvD;|4*PZ27;1x$RY-NcS8C&feA2nU*Ofw)LnZfB#>2yH
zWZy`E6x<b`x3@U1GK7jacr~<F1X*g*Od#~D?RN;g!$!PdJ{V+;j`Aj%C%t5Xm+;zn
zt<ps>B9KNOHHCVL7efpVlG@bfGKqQhr*uzs6GXB1KY>st9!q{DjZ*kwdJkwk(YEdq
z(MJXIH<%1TNqu#vC=*KERs;l$hcDmb1&c~y<!<{K)3IK0DRGNn!&e(MFju&|5F6Q_
z=KS9pt_l=TmSSInGgbq-Jnmn0X8M9aPf1@F=go8USCPQ-px(*ll5jO2s};jUHu6kr
zMj4A1dctF}E6D@=DJ+`h`)@{H)X9yh;8=k`un4qpcWF%YffOeN_VU_lC8ERhKMGb&
zg7{b*Z9mCBAS_aq2+Di2@>GK5Fv2l-=dmdzEh5g;OAs#lT9+Xnl}Yp+UJgtpA5t(m
zY;<g3+wk&Sf_HAqZ^y(!&Z#Pr36vjAm$e4D7I#?{x2|w@?EvAYTB1{@XMTCB)?TsL
zhha3-0F(H;^fIh9Kp9;TyU0VU|FlxZK`GNY&<zira5n9&WHTuZ^jefi+L(wnhd38K
z62|F!{w#a%JfoNx<40FVJT`ykwKkQ0r-#5G(L5c$;<jZU^kzCp;2fa-9WvRO<Z^+R
z+U5YS!^BuiMBvjB_it}pvYaO>X@hbs7MNF)>m<T!o_n-`Cv<WXUV`_PG|C@|6(0ff
z8j&^_)#|e9vwh;s14rvE7HdCu2-^K6V5}=48}{gn4M~_L`$gZM<M2)3kM3p$1P$B;
zR%&-(JXTAu2EANjl;!S1yM&{Py}tyG$LVx`t7pRO)34@pzV{L2)5rgl6v26>rr%|i
z`MDFa&XMb#Sa#8U{j9!{Tvs%EPikmxX1*`xNO(^s!^7LcH4B^&eTWHecS^rq2C<t|
z<+U*ZjYnZJrP6W=W7SIV@_|j-NA<P*ux)^=BCBn@*^5aINimHpgr=oHu5+k8$<fKS
z*rvVDnAri<NSGU5;3kRCK`v}xb@c9{?ObNkJ2#t2&CC!tJXu=P=F<f(y(Bo~htZhf
z=d9}UHtmRWv-#sQfwz)88fEf{-xH9hxG+nm4Z?EShZEX-O7UlK$fA@eu~<c^AHHI!
zx<zJj63JX8^M`@{_^r=zhH9g?7BfH2%0m)T_KQp|2ZeQ%g7gPya5M{YIa>y-uIDsN
zN%9?h5#$Ngw9eKYq(rQw@g4g<Hq%9FpC(MAda{((upoEr72$M2ugUoYpk0ps{@M9C
zdY{%02zCz(nLv67$2&MGQO%gEly=|nL<tX3uT+`uVw|q={&DGKm}WQ8M8LNfp%SDQ
zOcR7Auw#XF4FAh|;3j5TrF$<-jOE|FaZ*opiHd){Pg^uc`d&53KH=)II}?L19n!|1
zwd$@j-9PWs36#A(4W!L^9ESj@PxElr&&Vos37b@LnKPA+Lr8|GR=0l#B|A0v??yM-
z#AqeUT0gmHROS!kd7Vje_uoR6cWWCyp99!ozd&Q+vrxw1Oouf8KU8>!{in;vuw>?J
z9NU&$(qwnN=yCHe@WZ=k%QFdWsypKt4I!u%*V!R%gB{S*Itdi`WmWBwJYIj7vp*ke
z>3Ge3$Wbu-V)El`wyVHmEFnR8H0|#u@cQ&f_8EceO^N+lcs9&Uz1vEjSSiibGR5ts
z&jYlcSWaBxzA<f@&X_13=Aphs3~<&u&4Svnlv#&^#xsQ2hUfxB%p<D@Pnv-FUz9X7
z!HRioaPt5{`^JB}cFRwQ-Ff`rCa3Q&q_m&P0Rtnaf?sS%k2R)ff8jV=!X&dEsbCS2
zCknvoQ(god6=FW8xt>68%lc@ok1L<a7toM@2*xonBn<({f0`ZpUAuHrkGyNZU}(is
zhmPva++WjPH)oJb9gI}cqFE5y>D7HBL}0c}{Clieo=y~c-E4os2vtMpNp{Std{5^b
zho?%=+M;RCuApQtAW<PL#i2Axl(CNmIi+QrDY_1Q1eq_KD|@j3P<uqJPxTUO;4pgN
z)y6|46k%M#Y|cSbkI(J|SjK2>n^3m`_^t5z?Vo7t^NLf)sV^n}_IRCdvhScZPd4y{
zq!+Rt*Z!zn8Jh%|vb-D~e|}BPAJOZlsoh+9+xMYZglnSZ-|QMB;MGfmL>-b0HxN)+
z>UvyHUHrogR0X)XXR1Y-A6`5?OizaHx@{(!7ernwKC-2jna<Hw>M&h=-O(L-+$Rbu
zX8(~3@Nu<1Dds59UVP;Q5b8FHPo>(*=PC8E&5qFFiDPP?h6M$dw5~6{Gw82QtdwS_
zu<w0}nq<!^#l4(iJ%q+ur!E5GOjViErI3mb<hDus;`|(a+qB1>akH%h7>&JPgx4}U
z%RP4eLZCIdoh+oc#w1QkufWQ;XXNXpgoogfX#^TF&JrvzfHYe!nF3CaA4fZESG6gf
z9*<4Bsr_FM7cvFs$5-=?f<2r*ezP@s?azprI0A4NDP|fp5P|tQdBpU|H~$1l{k=_*
z+Xg;Bk9}Cypj%BwU470lN+8W+6%Xla2Dm15H5^iL&p{k%WmJm)rLGE8>#T-E{<)Q7
zDm8)TNdH1^c)orHg%OZGU9r0bbyZ<@Jnd9x={hz(l_*HD0*5lFc!&>48Qz9G8(8`p
z_~HEv3Mfd22Z*Ff1a#3D5V0#-M}c&+bo!lV$Vt?Nrg)}O<iGVyj=@%o8t`*S)l>K-
zb?xz_2&TUalr=Nhva{ya*}A6K%)gi*z`CQBP1jmLr$5b;*;I5J6=gq)60b2Fe!Alg
zV?6FY8Iw8X$31l}%<ZTEy0$aar~mQ1<w6A0j<l)`KQ4$|ks4{kWP+%uLYAHcc%>s!
z{W?wu+4YZ;NXyO^h%x{E1p=u^ODeTDtba<OapK&|1=)wVOASW`w<iW%Vo_kGako{q
z=f_Q-`yVzRQL17O=1sfz<^MGS**tUg_FU8EHLf6sIxXk#pHFZX`&ajNrCH>V%_feC
zmqZgyX&`SW2g}=+z|>%7>04#C^(tUXO;Yo>DA;=cp*WkLAFccv82@TM=Axw;VN*2Q
zak3S)Yfb-*;q3b&8J`g}lQW%Ih}}Ss)8~=<`ebDI4g+C%TVaf+5+{xRY>P-Iv1!|B
z)*{lEJv3G9E)8c+NeoT#R5t$)fEJt8Fh#=I@j(Rmec1I&_t9T%fJQ4Zd-Cz)!=X<#
zj-O^*nMulR2itE39TjP<o@nOJ{DBIh@=ymx_kI^y@*lDj{mqPoVe2|HTcf}gJ2rG%
zQnVV7<y3$L`i!76HJao+=5WoNBuM5l00m-y+WD*fjslXjNg%SIR(!Z_%<miO9THMZ
zqA&8zRrFexMFrFdJJ;X51U)zJ*7*@#Rx`!gvtJ@nTC>0qj2>>ncLkOcE?N{>dpj6U
z!9HlTW}HM`n;G>%qy`E3&XAoY568mo(<?%4frMX!F-42T{wUMytcCY{e}Y{38RSw4
z2Gvt<D@kpF^NJC1oN;Rk2H8w-3OqfOvr$AV$$XP=Eihi+T}@6WtD}ThfY`bda|LA3
zV@XD|_1RaQXVM|q_DNB^0JeM6l*PT8>^W1;5GgYCl=||H=peo`9dfymh>L!o7#S64
zBfb!_*f881lJndsfU2Q=aPRt^2}}0+#j`g${cbEPh`vf?dy9jy6p1?8A*}~1PX3})
zjeR`%(RCmpP`D|Wgilgl&!La*hl`~6?~XymEoN2^YA=6pFeGAEG4e%tVzhMKcOc)?
z{CVdIR<G<i{L<tLfz6=g@PTAz(A7FtQ~JI=OdJRwEJ%6&@G>yo)5yDz1T**2^*X}e
zVMp_`+jn(BLM|xX{7kwd3D?dy;Qql=BKRMnp;M4%B@Z*+-Mph#zjq@G_2gVoYOtaj
zG(cSE40)cXuxb=1Rc{#fJpzxW4iu$Im*aSO3mj!VY)HKFK&<o6cqZ?2JhqD}+1i}K
z8xnVw47NqeKB^})Br50(xbpIN+VW{#Ydc)y6=i~Smhs$o>q47tsTD+Gw%#CBac2oJ
z`u=kmz_vZevq4N>TJp0$2M1D)HC8lRH~Ym#_1;j7U#IQ?c^AHj3@yQuf8evCUE_!4
z_oMk9qZvs(QX-phw8!L&=&G%Tg5SN)nilt}EwnaY*s>l4Mq=@r2-{BzOrAjzH&$Vx
z{@5jS*df2s6IDMNlEcux!F!A<jL0uWak&;7dg=zpEUL5_97~?#^bLENPrL>FpSD<g
zTj;7>AE#xaGyPvC5nd8(eJ?$#t#FAcHn!w^C*}03X-)mSVJcLGch4Lv44wrFU&I`G
z7|;K^FplUZleWGo+3~&i=U|iq+TWpua}uzdE(D`L1;dBEQ;B3CCcDyB<gl*|t3G-d
z>de_0CGJ}q`I%fpS*TPSY4AZYvK<`M#=!4R+(f2}CM+%|HdYN{83re0U0lHIpy?&*
zMYn@v^e5MpUJE@-`k9vIzXX(CvB$W|nf!h+(KZn6V+iw^RY~UjbJ5Xo?rfo_Q}X@r
zCSg~~fXW!mowd7Srm};ZvM(l-eDf~&R<rpDcNQpEIuZR9u?p|TEdLBn5+dO^2=P5!
z{d&fus4>t*@K+84%Q3}lbsUxv4d++)B07f%;)xMA)J;rNBUvd5OoaScB}W)-zPg*a
zC~Kvbv_t&ZMSYU*ljpmg)F}$wU#1iZ3KFQ+EVsd!Y6e3Gb&MIm`*~sPGIk_lFtms!
z`mc`<DeUH666&g`(&jQoh~@<_u5jBHrlJHy$cyUQ8DZsx)%)kI<PzxGktwVM(1iK<
zMNnVll$(7nEPL(xxv9AEpwwgd__ulEfzVmz+1!4sbDQVTV^sg4PwPL|heT<8lN|O<
zL$u$0HfOs)_W@Fs<WQkqBfVOFHfF~Ko)meD{WvnEhE(gn&GcP+y8XP$B6Q3)w`u#Q
zV+L1><t2MLISoT1Aq`r+oj8Lz#a>f`RjXO1jUqXF#CU@urxz`8r?h_i*EdNv`XJ4g
z`(1>*OxFE}S@M0+XE6b=!3OMk=Cp6uF|N(VupvXeOcwv<HeTX`!Gy^3fvE*R40dxq
zy?@`-%<8Ic)%$$!<Sp_Y&me{y$FIJF_ku4MFMdmZQoQoaKW=Fr_Wl}xVa&a=Ijp<c
zROXml!_}7IK*Q38L(sKxrYcOn<~k?&eQ97qOTe}ThI!-Kk|9-AdLIn%oA`3%-*D_I
zw&~OIw|P^n<j{xpXqk`4?^x92_)JWr-eRHf^JK4u%|nbvjN6OmX&c$ajjE$TpQtaA
z+QK*+{+(vBcB#p#GxD1!jv8<K(Z62kq?0KO7SznXy<2;bV>NrN*f;#<*ne4&G3~6}
zrIn)05z!sQf)Mv&rxWqrE9Q2-x|%^4h?FI&AvdEt!fh**KPuPJjVMy)kjzqCADqm)
z?>RgFz1Mrzc(b>i>GphKH|MmG;rKZ7j)X&XLkpw<j8-pp=oGx~?)0vPYO>@)D7Eco
zHTn8pFFN($Xlr^hM`ZhJjH)BU<Lq6@18Lj)$fIc{U!0~=%y<_k{aEhXvb6(S&#lRp
zm7%tSi`h)i*0j{0%zKlB4Lq4`7g_&<z4wlavT5E16;VM^%!)`bfD!~HN*?u5l&lCy
zMv>%@bH*_zKtXc!Q31(0M*{-G0E#f=Aq}8N7LdGk6KC*!J>TxxbH4rd=U)sncj&IJ
zuCA`W>Z&l+$eV@*pSSl2Oij5AIB5$3L>_<k6!2L|F+MZ$?L1H_n7#f<Rtws#Rbc$l
z$lw_s&*{Drmw@Bxb^i{XkazO0#~P%Lg$<>$NHD2VoWjRLsMhwdU1_ED)X>Plw1(cj
zq}UzaUPPZy>TIc2bJNY(vkW0`@aUmy;;$$Z-iv0>;WGez_3}o#Vd$x{r<G%oeSyuD
zTgsb^P26rKdb2imUTm61$)=WThqhvwQcW=zzRUDHr?tu|YI5H6${}_v@|SRAPe#U3
z!Eh^H0k>Z}4T;p9&N{(Sot`GH+f(Nb9l*5VKdz-Zl34N}^RCmxE1$^r93SWKvWbAI
z*E61K-C4m_87@NiE3$|4YwkYklio#Si*`1|Zh5Ie(@AS29&FOX7EAeVB0MlCk!>QY
zgsFO3;|86tsHP8X(1h6Yuo|#YaZP=uc4&S!Y%KBZ949?Kczfft{+Y6A<8%UFV1K27
zmBZlB{h_l=lL{S1NvT;Woo&yXWaQL&H|OVdzGHPU6@Tl)64U-=R^&v%2+!eT@^@_1
zZyT(M^ktWn37;^HsW9y};HDKsY7e**cvS;)XUyz;|1B-AHq|@pCNko5gm?3BO0`4E
zQCj}jvH`^Ei6gG>zB*GGYJ0JMs&wz~n#$tAGSmA195IL=iBRN@yx^KX!l!R~frHX<
zyZ^U>-sXlEVMCO06NDk(+JPpsvl0V1H1UUDL3gD~zm;Ig^U6(7!=-aWM#@^u5T#|D
ze?50rgp@*6pRiEaI^G;%!GK|H@oJ)(%xpSKs<E#Io>j*rUxh!6S7JP?GR}^xdo3Y2
zCrkCKMjvsr_OCPbuwrBj@Z#-M1x~TNlsDzGiSPfKr5oS7vq50pD~*mWGwX)tiSRQG
z&kHi@4gN(>SLpJYDY$3wS$2<A=gC#b7X1`yyEJ9s<1BXVvbaGe)O>5<#)uvZp1PPN
zXUrY2?-|iPC0W|Z{EI`=v}vhW#58_7w;@Yb{BSofWFDKSxB~P?kg>|lwco@v$ft_u
z4qKQ-ql$}vS*2A=W#}`TT(1BWj$jkg_@NDQB7&Y}2)z;rtG7Ddf|aRvb@y|;FUjoY
zxl^XAxn!cTPHWgskVEK5r<t3C1uL<WUuJ^SS1Dkaw?Qj;mn-9#eBz6#iy`9yiYnqN
z0@~UZ4@8<?CLJ-7=-+L3U&u(Mzxl?wVrSQin|p9m5?LM>9ySTFU9&&0BENyHs&D2a
zZsa&^zuvIS@bmKN@*9Err8ZYsA3qg*m20&-smpaDtF)KNlIbw*3{l+7$<%A0yZaO^
zb@!}E#ut+fj#IW-_!lTajImScx(5!eNe%@^Ibu3_ug&VqT^Y_fIDR`vtdP)~_KW$G
zd3x`j+=?-(^1`8!wI@?<Iv$@mUBT&}(?sNA!EW!O)eF5+anC$qKnbh(Gz$}yn4voS
z;6&T!wHpsqWftOO3Da{gx~g|i95kCV_tU--U`x<S?lekAYoFmKRC+T_IA+}9NNE{v
zv&*$B7`v$}Lb1K%oPMZyps~@cG129Cz%{R+e{jSUr?(s_iaU)x_O^GOH=+1A{R5i$
zl7DX{&ThALqrl$MEKE(7hGItK6Or-CVVMb4WA5sSi66o1UpS7e72CTu>!pv5h+yY%
zkswR@#Iq2>_N*xXs2c?59$c~_)b2t_4ZhjdlYz1lFsQ&!Yn6CsWnmio*1kO;BGHjG
zh}-dTr_ZBVTm5wBP_5u7Yp##hkOaWz`z#=;lhx5QVoSRLFo=w5ePS-4PyX@ZO!!_*
z{bWu{eR`J&mUl$#v;wyt556pm3+g$|<cJ>#*ll8IVk&^*{+=Fip%r2KaZc||>-;t)
z`p>CLtpu7wR(E1%Z1_Vym3Wyud?`>BHZvxwiSk+rOVD?BUDoe9@~ltUQc(zx>kCud
zd9`cCgPz?x0Oyzg#<RJlO}xb-!Z9THWLfvr79WJwul#ZxeM8GQ_0ywO`5%ljOokQA
zD13&bx}AoK8@UR&llMzy)JOD9bvkBdSvFX^hVJ~{$?G_U8`6;Owo@B<IX6~Wm33Yg
z)s+9`i@i&!t-}QoK|;#@kTW{$6d?nL`jsU!(3M@4#_u949z5jf<(6(AzdWp(oMnRO
zaDE4WfrzC`<1C%9+W?ZtASf;(sGU+Rt!{EQT#=b}%MmKv%x8^+F0~Ygfh4QOV7sgW
zf7R%a@ym@FI2(ihjTv(xWrX`*Lq^j4h;})7$w>}RWzJk|`66_XYy75_SP->ispO9I
zh`21Sehsc%Rh8SVcVj|a<jm>4WvBw9emWi(k2MHwwYe&perTrb=dWB1A^DwMZ{Guo
z*Iou2h~3}4F?YlXw}rQno9WERjHpH0%ScnJTu~n%iR_6J0og|#Et3yj2|OKs6eH@8
z={WkmI45cN?ls>hC9+>cFZpb~UHT%|jU7E68K2uLB(IALlPK7(Z`#vs%=5{JU>hwF
z<{-?I@-%4R=eHkdeTL3M;*8!|^@267b)F~*%HfpY$ir40Zzcp!blcC`vhtPRv-G`Q
z?wvD{Frpc~GwoWko5<AXu+v|kZ#*}gj}sO3p{=0~0dt<hYRP+wN$m^X*}g>PmZ(j0
z?hyZHrzaaeE8Fbw_dr>V-4svkUK>-Ii?vUd7|6o-2GCBZmTl~xZXVCD5`JYsVBg$1
zqObMp`GZ>{=I{TZ-O+6(7cXd~dtAQ1+u*R9n_w!9o5a){&>ZItq1tYEd=t(JI~>kM
z;k~!S;B)LIp9hX30ofy%xvo(cakuVkSS_is(wSIi#*&%Drp<~}G*1exVeB^~INah1
z+sum<<H#ir*v__i+Ic!$r5V-Lr{r-GF2FasZS2nG7{6>)*#(hK6X#H=Z2>;-i`Bhy
z)X`Q2hekU`ca_Rast((+;Zw7^Uld%NaQYHl6FNY2wd|j>mAgrwu;rV9@|#k0_i&+U
zH(HSO=G2Y&k5fhp_p&Jyl=iJRVTQDVwG@<8+x8uPMoZ~H!HfNNgZ0aOEw*PNDe|GY
z<*`l!QC4SaE;93e=b7r_PjJB9jXCl4t$c&j!hJHb;X<1;@NYgv+U|1N9u<blXiam?
ztGZ4>dHytNP<Yd;SWkF3@)k@WHJ_h~bV`af5z6*-hdRQU)?Pke(TeYTeP#O|m7h=H
zd`1_gs>~jf*2!Sy`%e&Yt%W_(GUgLl3RPBqjt={0=Dr)>wQ(F}wvTIiX1HxFMF$m?
zR>=Ov7p$f5*->K2*LND7Fi?WsVd~&ni_afR8=fM}<#*dvt3;3N=lw|m`A~#nPL|+t
zEuyBXgJeY{q-DPxDA>AVK@u5Moi%iQ36n<o>bwbSOVDdKjP7UFMTL7%+{O!g1Pbij
zC%d+&*U%$JYv-cq*g2k!%XN#f;gTVS;fLg=4OKJ_Nwt*vjTf<6<B~t9hF%3u0!%R8
z;Y2H>&lv|3<el4hN;9(ms{15fYpCCn6>68xvCxUa!Ocg^U0Oy$RpcaQeirA{h7I05
z$~QZX1a+-vA#sb7+vS_|1Ew>y)G}vuAm@nPVL(5n)1_n+3}W;#5KMa%@LkS}4W;v8
zJtNfr6ZG9|%I#2DGXuDWWN@^6AQqdwT}@BF7vRi_TzNmg6h}CG>V>=kp?1e??Q<A_
zqG#rg=MD5Vq`r~%^BvFa7E>}#7udeg;dR@7{V<SdyxRA#dOe)6VIh&Q*%J809oI-I
zvtTASHI2sG9S5M@xtMi%@lUT17|}v&DQtFc%dNHPObuF4Hs%u_C?-%duaAlu<qg{8
za&;HiY?nN6$?}!dL4|V|-~^8z(m@%N3Izo;s2eAp+Z_}qyC|PElofE4Rm_R6NQPtw
zKcpl(`@VH2RP<cdBdjnl5Jxh1@E-Sp>PH`+?DYdIc8wlXT;_y0`9&9zC?X{lO~|cQ
zo1Xhu9xSNkT8j4v%^{NE11VZ5NnaZv*mec-EVGqA^#jUU&=)OxYHGS$qQDpqx8Hv@
z%G;-gLDEBWJ6X8dpn~Q3SDsl(fg3kK%sDd|vTe7R6xcjn=;T#xW_(tHOCK5w8aP|a
z->=I!;%^CoKOd*24#pCyZNDTi0MIM=0m*LkKYt<hOor($9Yozi?WSBcIqSkvq1F|a
zm+i8!nq?+tPK{ek&DK#U?J5yz2J)NXWN^uRz+7iOmi@}QIQI7`O#xA#Y}?mRjOG)7
z1UEY2==a;AJW-67OVURE(R<}Czz#LzOk9NDyaA!)Awq$2h1BzNby0y9O~az1gN;aq
ztE%38Ke}WzmVY7npwqLSGU2Sm2jL5=Zzs9vR$(|?b!U&V#D96!!nJ3)ne7=S57wSu
z=+u9C;D+OHRchkopWE&T^R|jMX=$P_8x67#Ck)}L@Ow4#|K(NlI#XD+nM&({>qDDo
zi`x7j#eq2uS8Y5V>-xvG-`4^w`SWb-9kOPuejtq+TxED|bjju{sV><~Cw_vBddYO1
z;mGRSNyk*D3a&c6IVS9nZGX9UTkh7GZN1EY9G*LS;3~DNGi1$J{qn!6wIIX)AE;LQ
zck98A+^!ZeNGktjoBa_iD<iM38a|^o@$j(6F|dRkoEar;tEMBSbQ6`<Q4aknw#~+=
zpU=m+;N#Q<iH<*3%!E!I<&JbDW7pjjq3pbkzecY!1NLx5Q!&rCZOirea6;vk_ig;m
zr+1W>HT#M@YEUij<mW@=v+aZ`%;y^)jV1Kp?~89MFFVl*)8HVJPV?daP0yEiC{rwh
zigL|{<=Hm9(VvgSt(ZrJwQGye>o=U<Nj}dD`M_pYQ>Ho%K-*j5j4v)p=7bu2@p->l
z9t&A#+IGS8?9Nn9R=JIr+WO@D;sQ=dbp-{&!kn(6TqP{Twc%Ua*OGIqHO-DgebFVN
zq<ZQLc0aXP-HdEVbxuKZ-V4S1{P1Z_^Lqd9rDbK<4Wp#2vyBh4GIGcqkWmCHCX31$
z=VG?+YcBI$zWx;p81T5ET6MDa?307$n0ugEoN8#a1RIT0kZO%8$?p5lE%h6vBLFL9
z%FBNL<wwnb2rmG6%sNJ!P^>MAslKz^3X~oE&%5&f{10EXK^Su<V`XV66Vm9|w=B!h
z{~ElX=8nh#avQks76@ZaRP+z%g^m8Q0&eSB^*=J4m`PXzdL{0seZ#b-37mcNa=D$n
zoXqwi*UTS~soz?Pbs)K|Ym|JNL9`V#E8<xFKk^&}H_(!qPl*Cz;9KcO@x55;r_O!2
zo0q2FeEDtjA9U7$RMv(^C&Td81e{(LV)&~6=YRMPD{tJ~zS@Y5xA6@$KKGY(p*#_~
zf4_*qW9e-R&!GX85?*sf$rct4VPN@RQfBK_|06%3W}tneOz#_hg5-l<eo<KJCx#9d
zl!_CX@0T@o{yCVn;O1|<6<yX(aa+XzYOpW%3$yC~`5&SRKzV0%uLdzvX`y+q>|flc
zz%JrFbf0d6-KnM97oKxs4UF;{1*LjT)Zq8>rDktqu3Gi~{LC7K-qgC$psUvvp-t{f
zsc_%$Ucv&LU7j2IS3wl-TenIsOL+dn24+z5HPP%jpb{Z;D?^jL>VHIb;j_@zNzZF0
zjxY*11*2n2eJ-Sds}1%wwXH}P5mDh9m<Q`nN_wjl{IA0QRrvom75=Z1BL?EHRsTQ8
zs)y6%cIEWe#=ZY%#T*yTQG*TMv|@w*r`lYB2E$jig0*q#4<4;7=a{*ju!ip@Ut#{g
zxMoYnm3hmHXCW1tT?WP=(?<Pr#+l_&4v(jZQI2EZyoa;LX`~w$q(b`R!c|u!!2t<L
ziZYP~9<IP-pbs0%n?Zs)s;P#T2)#-NmzLc%n1EgWB6g;#IkqO+v{}aW$;mfLz4B|{
z@{l_dC>=*wxW7%*Pt8&8cm`GM0$fI*vej`Z3e9`V0!B5j-|-Bnn37!jioe&yNR$4y
za8Z;8_%On{mwJ7FI%Ty1NGg?6J3qIUb^l6Of|&wc8_Op0Bk?TZ1SfBkpw#YFK2tvI
zwx-I2vVHVyZ-!<T0Bgh4ZF3-(^P3py9)H8-Gw)f=_s`!1qtU1Zm?9UW;>Wu=%Gy6Z
znY!#JMLzy(FtadcrVNCJ`H5YXY>~k{CJ*{D2>B;DP%AsM6Py&PIK6LQ-ne|LfHWP=
zuJV9tzw9e3W3nLjpRiYJkib4qFd~-u=m95jaQYp+`;oFy6id>?Jf{AM&T6~2d9SaA
z-C}5TMSi2;?9|YEt7ROMZ;`jlx`w5}PwuK(h!rE|*@AL1sVPzKeu&np8J-P;^W8)C
zxVk_y@jjvw%5iE+*nBkK$~ZrwM0k~w0c<6^xs>O9b$bf$@{jW$_C<mJ8=m`0Wdf=C
zY)fz%XR~vzPu9~b^<q(MA$Y93GMe2SQUqQAWwm7)uF;O*BK=N%MGF>3Is!&Iu;X&C
zGA=|UBUUrVR?6jSo}*3aEE3RN7y`_-T~|RLg+|JGBfq8rsn)9-ZvXu`dd1(9J~4`X
z6Thl&|K*15vO^mcP<X|Uz6DW6{c`q|R?fczWdk004MR{vI_H3)W};5f>qfJHwu|2N
z%R9JmeF{ZQ(>k8_{Pc#n<t@_uYWN(snI4^qwE5MFs>l_0nD$C8zt-OcgT!GK<%IMu
zZ%Ha2e4S}5vphxTKj@zWcbVhId$%gV{2_OuteK-?C4*+B`8CxnOTEg*#+4u&N^`ws
za}Kc5jWtapx)-aO64BjfvJya8cyKw)9vj=H2eVOGCsP5v__L{UH;3{I(TRi=o!_?}
zBV^U}0x;z1k@1XB7)Ebmm?&m~KD;FKMd|B|Asb8siJy|85^k<pz;EdIy&F+&%aMyD
zdcTRnTh$ck*tof}(v??FoSWdX$B2oinQDfXX=hu1+c2dQ(6(hq{jwNzN+=!YWqVd&
zF#GKC*T{#nNNDqWTS#-oXf~qK26n**LY8KNjxD9*yu?#@1w5AbIrAG8%7{a-zkD3%
z;<%>{(&oybjIu=k7!pedLx*;Zue`sa0r$`8fbJ53u`wi3Bt|8H%c{GA!I!Id<FW=W
zDE>{@C@Ihm{IKiq^p@uWY7s)_mU=|3(=V-b`DtKbv{Fpj?e&|B`^It|Mx;o8oR>ew
z5q5LEC*{&i^z>7pBCc;o`rK3Aiepdq9FI1c;|pHu-K|1vN}eE_zJ@f-n5*f=s->}4
zf7AL0Fnbs~uJkIiUKNAsf$s%7f)<Niv6EFBF^_!d)6317ZYizoc;`i|uK;>?r|y+w
z)DUvTU{vV#N!cagSMAzoh8PwR$(61ltw+s(K8(_*G^CxvZZV)v0|l~`X@qhb?L2O?
z(+#7Ncb2z-R6iA&@gESFyFa^1?A1$qxV4%p-$y-J?%%?PeaL>gurPW~8`H!m3=mSa
z15oSMMKwi#3*(A19_CJTo)}KH{`TeMR)ottfY+d+q;bviG+X#!vjeuLMyWTOYJE6V
zeBqoVrREN@dUS5^Ua`SCU)K+#+M#$+)%B||%SuO6R(@OP{#R*O788b|ZO|hhT&?Eq
ztK%{L(4~flS5ow0x2Wr9R%S-83SaOZ0j)b|_#jJfUU>8?!h5>gP#ZB06<b$2g{Yu{
zJOnYsKE?UQi35nV45<L?dt6=Fms9mJR@{)pFrTJ^T2uUbP#A!7sz1j}2&!0Yw4kzC
zj!P`g?R7{<df(Mi-Su4+A?iB{yWDn@ywG$zVo6nV?JY{@ezSp#L#vi?N`9bQ--|>l
zg;y~k&`p{lyH;im(zWnnSWx!UhDQ7`F0Zf!y2xLAnqx&8i;X3HT2vC~a`BzWZ=_$Y
z8efRqorR%sx2W1g(y8AcdlQ07aYL;}!sIh})zfN_D4p(s)Z0q~yQKUJ7G&YEnEhbA
z(4zK%a{uYqrRSsgktKQGMv@pN=|dJT`zx-+QTr>dMJ4~MT}zYqe|<7K?aM_fCJ~3R
z+ymFJr6$QW%IZEq2{q;(B6q6ou4Vh9qrM0}nFt5UR`r0@JE{^8x3Vg2^Y=o<k8<Y@
zzOw|bMPDcv9&2$tAJ{UHje%d5Rky%*4RAmmntQcWS^zTO2WSDuybgf-%^7|jny7Tk
zA0o)R4tukueUw{es!%TAMT$Lb&d48)O9t-Eu|#60z(o7rf~%pwJ{<K$piM*!t+Nig
z+Yd-Ki9oj;rlFV4x?(AGE>l1mS49WHREx@1z#UtR=iu}DmG*FHTm|y}TuUef@8c{`
z2NbeujR_iA$<AH8VQdB=Hp*H3*}ZDo+8L(JuN$uGuCk5u!BocLK@RN+DF--t&uH-t
zGRt@9FdBzE<<ucc=P7qzXR`+kw#NdFPr*^WYx#KKZX&C!X}fi8m~IHrqlu-Kvy${x
zMyd6iyqhD!{-&101Z5$KgxNmAe*B=suOixPaT<roZ;^gK@SF(yT8HB-q=xnR>r-h(
z3bT)`S}#Wd7BP0hR+at7TX~n8x>!yj1f|cfAI)lR;){S+h>&XJ)Ju@|<2pXzQ0b%w
z)mRC0E%s94SgLNeiFvex=u~|r7gTQ3mJWSw))G4b;0*Q9p{E8wS^c^Gh;)WY?ocsN
zIU~EK$68H(Sr0a_nnMby@7~<j^>LFT2FYFYDq0SD9g{P811@IhoawncTZIi}au5u`
zaJhlFmCQpCnj*CLdt)exu#SG+Zt@ima}7cUV<-NlJ`*+?Yv5CUeFlyRFAba4RwTb{
zP0N~5QqipOrw*!5iVU`t>eZLO{Sg5^G>zwvABS@tS=zY$A+q!EjR)dCU%K11>TkHc
zvi#}@Q_KMZ0}JHX^=dv5BQHxUvb875RMHuvopER_#g?)3j5xbrcDWr4ECjS~_QnJW
z5Xt^9*&X>UT>=>aTQgjiyFEYM3QNEPb=vpc)8&V*9;A`5L7tMe5Y)ZQ>Hmx#3=HuE
zPRrqFmAq)@)^$wB5T;%y!y-eo;eYq$ML7fM7?(ES^buD@ykVc?@|OIB31)6~V2(?q
zFI6!n)LTyY?E&+`wH>uV;Q}0Af*};(Ub&uWrbiOrD8PG-R{U3MExz|at^kvBF3^<H
zW4Q>>jga)LO-mMGLE#FI&9{|E>pFPH{g!o2w31#+F0i)x%M3O+J5FbIc+gu}MLdXj
z+QbSh+LonGDVHe|4hv2TFYU7SJw#D_J@;f!+IMkaCodBrOjJ;Njv|?AM98JHcPQ6o
zRLlpW_V|px6YCSQW-Dy$AGg{tM}|w02~Fn$GPh}rDGHDc=mCgcbmZ$=^Aw@C1`x|a
zVSN17Z{G8>>h|ZRBLg2y!rIvF8E_-0gr#T9?D_RIQd}u?B)I$O+c7}pD0TQS@AN@{
zG49O6$yhFRvy^MpS}A3ffCKvcJ*+j2U=TJ*CJG&YXo1WA=F$xb&0C!i#16OH1Xd3h
z$WMF&ps38_SX)FZ_8%MsQ}wIXHfybQ+r7QMQ2OlO&WI|2^r)Ev_RI6_Q9(JdPiFir
zN;#j;)ANTmxWOo2HFfD#A3rjLusQ-{O5#}7Ngpp4-p%$v!dKTi>+qltfjJc?b(aSy
zG7jF_$k);wD4;KtlA5Jj0ky}5QzOMpWI`OwYG1(+7vsN-ONzE{?`0VAbeq=IrOr&{
zo+G+kq0Z>83?uZQBP9*ir(6qq(?2^RF_$9RMlEl66)6?%K7_j(ua%em14fnj+|&66
zFidL-)Xi?$dV9;VD9RAyTbyR-lm(3UTKPax@8<+~v5rkHU<{3nt-$=pXW;CM(D4P3
z3>7?F9X6Xf4Q88*)~W7&*(=7;sNsaU$pkUa4^XXB%N5kw7tu=h-Z2fq?gE4kJW|*)
z&IK`H8odH4-leh4(|&f%tas(+fR_vJmyt_6uKT_Gy(jL?{S=dysp!?dEOJ!oBFr1H
zKbQ3eHd&9l1*t24-Y}5<BtoU=cpU5Q3sULO`}labd*4<lCQvLJCYt01w5J03wOai)
z2~IxXYzvrrbTTM9ME;;}8N5H5j+G6g{1@4msvO21My9uE)8IEw#Ek0-3_`$){+g-5
z#eLae9{ujVWusPyDvr_0_CX29Rg+(Hs;9>e%^=g5SPH`1AQ>@}r)q)ZeramWd{+@1
zlMdP;nxL7J-r0PGjru0Ass1R&y{X`-PN%-IbDLZH&ah*9HLWkb5{eA5RV($^W3^7f
z_4PSd-C_Ld?-0VQyc`Gc&qh&{l$*lh?3Sa&!ZcALS)3Oi((|zSHeMr=$+F~NucljM
zsK!Qdfb@qX?wpWGgyPN~S*E6hgZO(61Y>atYhMkH?)`H>BYvY}<UR1Qs*C}-rX^Tv
z0j*cVFkds%q{V4Fiz)^4zRB)PH)wlz&%JW*N~mbT(MFCg8zOmi*^7GZ1+NR=!QVB9
zMa)1q!qDj=%CGRGph+<YwNSy3yBVKpd?zc`p5<i=vPdELwYZGZZfKuZ5GNb5=+uuv
zo}=+szxouM*O)b92awkr0B|Ana{kXZ-27)gRzf8MJ&?=IA6|5cKwz%kF?aMIS6ILC
zzvig_9mzwe{oKQoRJF#E{nFmGmHUge-dd+dz*-ur?m5oV2C%`&+GI$?6`Y-kx66w;
z;QFi0eJ(7$d&Ad=Ak&6l<HmKY(?DK8$PKkB&de)lE@sDAYBU&Z9RIdUdCTIqLg)1Y
zCh+m$W~r8rz>Nn6AO&aK_zML735ba}+9ZL`%hDR)1ph-~Kd*cc{%O{p$~k$|BzQ<E
zLxo*?^9S$Mnr5A^936lZl$>IGvZIuKy6pxmwN?fo>uPPRrYt-qW`7MsWz@7GQ~cP2
zHH`$9pc>T%m8NfWpGZO}$Sl?Q2lhfP`+MF}Ca7K?1GAxdn98`~tBe&41VGL6hi*9N
zpJ#-;$=<Y6@<m7QmdgflCQRMln;XvmgoplUELyxRl>Wrrz}!qhu?6pD{((y-zMLUr
z^Na9I-+-#@u}s6y|JIJD+)zDJGbYS~=H=B5ViCM=nfs!t{9EqC*Wl*B^i&C(us+_y
zsWbMfj)K_rcD~xMyt*r7s3z~SoXWA<IjirBrn$UhHlmp=<=PZrVBaRmGyyxV!u1xd
zFP=?JWjm}3=4J-y36(preZ-6Mj+b@2;+L1?6JnULjX$$%UeJYu+jNzqD!9imqzIkf
zjNn0mo4(p!^Spk2ez({F+eA|pr^6r@2f^BRY~$!~Ju~g)E+u-LS2iS?dltWU$9Yxm
z#Pt?y&;o_TNB7FD&C0Px74L9qPAQ$)r)aO`x9?6cjQhuXAT5E1SIop|mD|w<=ka4k
zPa}8arZWl4R|^`XdSrYxPp_2uG<LKqMbymqn+Z3<HHw8CoUoJHSod(mttrRH=TCmO
z{3OznG@2&L78Z0G=T*qFR>!mnL5^}VO;$aqt(MvsY!b+F(CqfdE)iX1ZT11W-sGXa
zazk8s?U)x`4icr&?Lv1O*K6Iud%6_1aaS*40T$c}Bc-76PVuoMPX?>tjo=!(yS}~=
z%X2t9rTn#t$Go3aPV=C=7$=)nF-~MX=K5VROgJLTA>*o5x_Ow~wVEZvdj2vqEk##R
zQP)|=5GvIQL5^~3r!|Q#jTNo@F-4t|Sre{byVC9QdNn_BiZ#(0cWEYqX`4|~N>EFS
zz7b($P?|P<mT~ymN$fVaxhaC#-8`JvMivp3)Y9^L1ZkUeQCy|dhFRXJp;~RgbT@=@
zJuz}*az=$A8htz1bTCl$WdK7U&!xf14@bq5MsGNU2Fokp<G`sNH*CI(IM*u4FVr%;
z-n`p!CE~`nq};RVstxOKqOt@N)g;PD9E(fPlpd>@N7$FyR990qJzHa6K{U6Sa|b;Z
zmb&z!R({?)jb~f*8Q^fkDEC#2D$lyHP{1Gi>mo#IaRFJXwn76reVn{TLAsybX8o|7
z4WXlkf-vTI3&&Dvt;=%2iC#ON{KJwo#9h7jDjeH%2y-6nv}Z|3v#*KK;@9sono18T
z>YW1~<`;KkLx|wePx|Af`3wRDGSCI4xXUGwyjPKFQZ}kH^_WL>?NRLB48O4ln^Ue>
zDCZlAAE}SKUex=u@+kk(D9ta>g+0ir3*T7v7OlA%!d#v+B<!M7_5W=hg_Xy$RB?hW
zV8}nfvsucQ5uHD7^e??YB`n@*0}uN&-lgZsW?f7vWeu_xQ|``r)!;WQ`z$DOCWwXi
z`p%xNx0Cuv&1j84?Ai>baI4~8`+>qT@c?4bta$r&&to}1lCl1_tPkBKt<DbtCAQMA
zYjXvUce;?R!Qdp+0!Ym5Imi9|=5(+2T=q%cOjXuR)l=3|G4~fkKPN0HRL8~<H_D&x
zZ6B5DdRE#&8*zJo6R=qlP^gzEXO~^=cwcqvFUSCp?d=6VYKMBDM5mw2hj8-#gc3vF
zyG6}f{9mSLp}@ogWJ~HUR18^{>iy;qL{jt9VK?Y&1}>JnTel5;mU37Wm6l|GD*hVT
zR;}k1guvwel{Rbb<Psn-311ov43b2a(fyl3PMez6@%d?Bry?C;O+S@l5hUmdxapix
zUsLIzmf5ikPlvr|gBh?wzWf!IH}hTq>vPgcZ)U;b2Eyqem-%f83~_a@QZ;(8fwu90
zGx)AxDBAQ(TwuE70b6y!w|nmdmbZD{+b)<p0A=|L;Mtzh9KY%2cXMh(-i8F#k4Itx
z_@wp2Taa|c=@CHmb9y>=9#$t{(I`t3u}ek;_i?r%v>fRPu_@+l?~*NvWz@YpbLHA^
zB(W{7Sc{>w9tgmVeR~n5HkfTc)NWbE7Rl!*=+in1m6zD<@<&3{g&ijIN19*debIAB
zEsy9^9<IPsCR%lss}3|mM_V2R+NH@|Nxq`)tAUk`MjYKC*I0zEA=tkC5lfLtnYBT#
z96r5%7F^~&-e-!E-BX2*4782QCxymWnPWbSf;ZA9Je4+6vmX~Yxji@jYSO<yfW#xN
zSnrL0tOD<)ix-0Ro&^!Xvo!>~qKn$xo?KJ=Sd&%Z^hv<*^?Bc@xs3<5&xBy-;ZpY&
zSr+>lP`(qaozqYsK-(6oOtGO|AOVA`(se9FomZfdql~B6L8T8iodYLFAsdGN=2=r7
zB7;i6n;>B*@$uD*#X%6xsJ}3($qCwr#G9medvDQd^t&g}VN(|QFT>LILTvL5x>ivX
zIJ2|n<>F?J)j?&T>60h5dnE@0AFQVp?g>}U!}=cQkoM&aNX!cvaB#urwA9$|HA#V;
zAfqWgc^DaE1VPV8*<S^_Y19X^A)_f7ifW`=i%*ow`*w?V3srx~Zo5xkhBz<9H24ay
za6ZA*en)&xdVUREE-Q^zqRwf&K&r&IFx7YmT2tRsroPvxw2<(JAL5U=bBzwW#v5(%
z4OwdbS@vgk!TovTEkq<R;*YKz%AP{}_N2#BPNg;$@t%#+61VZWx?^;e_UB_7IC+D8
zG+5cgCcj1&YeYtRjN$JB9{uM(wa3Y(Q`Bo@I=$yHF6xCq#}!knpvF{d)<FJ7xnJPz
z#of4=%83P1g&-f!OVi+yzk*anJ0la{APv=7!J?Z^LO>&eg(`aZ$;$~v=x$pyKAvG`
zcEmgA%7BHZ(+h3zowtPj${Pt8_)%RSvw!j3g*{ZkP@7#3JSUE3{Wy=vXK$RW+#5MP
zU4Oo!=SWZWbEQo1({eP*0{4OtEfA9t48G~ktrZ)^2QglFJ3i^#?Nd$N;&E=9wIsWn
zY8A=4<nIEZ^sr)`y4(1~8{Y`p)9rhYzs&8v3AFfO3O}Iu0k<kdK;-HK3np+x;Ina<
z0GBVn#r4wxt8#z6DxUXau+>DH;CnB>AL+uOkI#)oNs2|owq$)TLri}WS`^M|vC0J%
zum!Ln1T{t~s_`!x1kB4_zaZC36QSXl&pgSUJP0|7Q8#LQAw*>phugs4F*@Kj=akiF
zVkQzH1Pw5g(&9a-<<LP-sH1uw#A1^wfnu|7uIiZ-Uu6cuka6-&u>Z&*)l&48^T7ej
z3kVO7Q+%G-z60E%GJ43Cj~?3-(Rpzu0U|w`o}XvMtKQ~z@cVS-Da{Qr&3%rZJ5B4T
z(D7($jCiY&Hq@@HYbH{X!RE@v)<Ca0#K39B!;a{r;H|(Zo0em08K#;sWe6XvXrGZh
zdL~D`&K|0s`y%nMG@QNkpnexhf7|OD6Y0o2?ajiyHrXm3V3*4UD1;ktvtA%k8Y1`@
zrX9^`%bSnFPRa%q+fuJw)j+$mECZUjDU~2(lczV-{!!OfWcF8wwD>wcEHVu&BSo5x
z@}flyqGll{iNh|Zv01H==qQY@!FegkB{!7XznQW6fCZn`%Qn>yte1m?G2=61U*x#9
zscoxW-L~fgWF87EV{dkWX2{(^0$Rkcit;8k=}=V3C)eyPei-)inVk#~Y}BW?;RH86
zC%vFMrE!M=E(GyGhwpJazZ|M`f_SI9YGkvNQ;tZl^i{`h5rRXxX~6cufJ*0x&JA)Y
zHw5%eD`4+yW>miK7p&%A8n}ykk<DrG7h~!>B3LBTXc!W@(uau1-k%787CE#x#8u78
zX&N~RzCl`SM(QnRrwTWUo68Ia4t(UgkMpWeM>bIEbBx?|p%)wZB6y*ejqQAph<6gY
z|Ha>B8X~*tLwO0#L=N{i-pglMn77oa2|tFD{qkam7;7p5B5;8~=JdVzOCFMW)vFV7
z%_x5;JtHp;DWeu=0>xgd{Z$)Pg;iWiH}N77Ds&2x#nicbC(vtYcA!J>_`XrSG=C(N
z;LfeL!7ScW<b8{z`0#TglCox8js5Zw*E`;2fgmQ5-^ob%p3H}RE+I`@k)GOVc|n<X
zhA?e#3^{g96#Q;Cfp@sBDM3X&Uz}{61Ck_mN3vvA^?bU66^o!O&>u<IKz((S43qWd
zA<`r1Oi?a5&Mvt}`y0b<RqoJNEsM>&QJ-8c)yHY1tKSrXgJ{vXy2%_;5^u*LLFtiV
z+4RaCg?$*qbR#a=;5h<gY!{(V&}(?$YZ9!wmQZ#>kfN@frsOH1B40npWxP4VLh)E*
z2De|6v5N2g3S45O_~=^}>5p&9Q%wv80|l%rLLZvx&p@o>VnKre%6PCddF|3%Ep()S
z6KZycxN|9;YYSr^CgTJepVkY|?$bw@HxQ<Db-ZUEeF6Ac?1&Gfu007Zg{p0{4Sv+a
zWw2<YV7yZh5eC(=_w<J@{O}2__^Ch^e6#lC7r!#rAmcBSYT{>+8Z<<({MH?4B*Q`o
zHp|5axbZlkGPU90lY1D{J2U1eOkdYmhlDIUQe+t$${(NV!*Yq_EeD7eK48?rXIB)Z
zx7>M{S7c`>3nw-?Glb=(mZ^Uy^_VCN!QswfLFr+Sw@6$O2{KRlH+!CHe~5VQT>2O5
z%N?Czub{030X2_$E<x8&B&>^Mm<^4J9t<O5lbg((N_o;$h3zYXh1|KjOK0wSLAj7Q
z|GweZvrs3t&N0l;_qz0{hWw|Ng2{J70@|jKWWVx^#xVa%fuq6ZXG)(||6~))u<T%M
zyY5RR=OldR`&d9>-)=Ysu5hQzLkKkwKP>k$?(m^?={f68M-y<VK@|WZW_-8X>l6F-
z<VLszJbFeKm-X3?-Ux@1(Dl~~GP8+MmmP{tqhA^{6eSYj@#Ac3R86KpY8ZaWbgew=
z=wNeUq^-m_2tB3|nc=4qd32WxRF@O)>V3<BofU6gOKYeRLok28h2<dn9d=CE$|ShA
z##zgd8}UWUfPUd!t>18cI}5kN#g4X5X=uDJvipkLIGpS@m2RXM&p$F+@#ewE#F3dW
zz>H-z_nBsd7D2QvUu#Ws)3Jlr6fI+RCBm=%%ZTkT?|L+(g+(lHv1L*p5e$&N5OBmE
zjXkvXrte1Vl;lx%Ro5|CYo<12U@c3s-g=MUg*^ZGnB0+Q)nU1qa#s`W>v0o#<@<0V
z7jp8Z_>0iDiRBR{To$#iO?Q(VZj19$W;YjJ?D3z?V6sC(NGuMG1pb-{xwr#|dDkR2
zH6od!=N|?Hso}<R#k)>=8kKkE%}(|T8VVdkcAfdE`3Q{$JuhDX3#`BenQ7&g3Ejbc
z%N@$zd_+{{VL)#YXKxUipfQrR@!vkDu7gFK+nXH3!b}?cj`!Q9=7#YfH1nv=KO@(j
z-mHatt{UNBKltwxuI~-cW3#={pCF$6bi%dUvZ&dXabT1lrrJO0ZhK1b!&yi0F%Gvb
zFNfV0kF^(Y^xiv4Xqt1M2nfE}(F%5A{P^53_Z*KY1GNC88{x0A{zBqe<p)Y~B6lSB
z12csl1g#odEz1MwronkkJQ*a2vGCRFS029MW-3Rtcfcz-oe1jVbrQtxHJB^C+vQCW
z_@Nk_Z;J1fRTGnlJ9l0d=qTS~83yb>UHp1;*<ZnIq3(3#OOfZ5PR+8Ei9eV^W2T%|
z8pCe6#Wx@UlGpRz(nl8%2y^gO1#pd+_~YMOnTP_2q}WaMyuAVu7JBsILH9~z>uy27
zh(vY7_>mvff4u})fKQjIyIyYF$8FE>xZ)e-lHQYFhy<lRfH^oU(3-8z03MW0@fqpm
z!TRk(H5^LGsZ`BUMnFDs$f4lo->z8RB<%+1m$JD6pAjGBa7%;3NLQNh6i7YefOXaB
zSDzj7{(B}OO#r}NN;X`9dJT!!KR!-|$Zf`cq;RbtQl3Pdq3SZ3<3|Qvz(6`uO(_b}
zTmZSM0<$_f8G^PipdwzP%}=O%n+xgHVaS$PI;(?W;MDs#%T+laD<_VC7W@VWERnAa
zv?&y~)xi>hg&3A#TPU|vJ+=%2Vkho3^exA|Nbg$^^$B`FfYdA4l|SE;`8_(O3<_hN
zcv%!IWL3rhJKyKFUmX5xH&7ukBsTkxn~<UDR*zt~7rcNhd5bLIusF~PM`2V$&P%H=
z?I@N<i1fq!hJY+li}cqe5r4f1Y8CnUi}HVMIr36=Hh`FPCK~Qq-C#19$RZ#q)#o>b
zB)JSD1wj)IAx@IKE|OoDp8r=|i-Yx7Tub)of2eldzuXE3gy>N;)F+xxwpAEYj9ik*
zf|3-TfB?n=2kTMym5GN@qdH*mVLP$sm8WUyDC~oG=GI>S%ICFm1v1s1g7FtVO!_on
z-?)ueWnA-MRo1@?61`e`X*a12@kgMy!j5CBOxk|{w_vsdP|stqwU->DM&k|-FuZ@6
z6m}UE1N-Cn!;#ITZ(>HZ?tx^^lx2W2Vlq;Yq)Osi^JOUbg7K)@2-cBl8yNs}#q5(k
z^*>?t_91zoRJ8f=WLwU{Dp)rj=pVDWyCOhLHdwM=VatYP9O=S}VY(fJN&<$Qr!@I1
z)W1UgtEqpd|BCroOYzsH{<W!p7pni`SAUz(E80s~0P?NOUr*q-REYoG?o|(KA_R+*
zk+d=f1Ay#u1NPvXQy%;3*hK{JdxT_%h>>ow6HhOtk=fp9l+~Y4wt-YlXP%roUsAgc
zKIT%M4~ZA8le!+sbMb&Oqxpw40RN&lJ3ZkNr>IBfk^;NKPlyy$kE94L?S+v?v+lZ@
zc2h|ejv5Isx{&PF$kkzrD@Yno+)&lh$sBUmeFdHVrYt>3s&e@TNsCx1{m;MU!KA_4
zYWCB{kp$RusMb$rH4P%!s<$<M%ff<iOqyE9%cY|-q?I%;Q3dl^Rog-jWRqlGLmSQ>
zhLpCzmtVJ&#22DDPe9}gQc(S5VjlU@h3*MkJ{w(-MIj1Mh3kk)Kqa>#oLLne$l62l
zO{?28p@u6K;w}+N4_4i}22%u{`o@lEvVg|Z?N?LuW$Uj!V2FrKUOc8}8!(e3c|5WS
z(n*BPPtj!ba?TwxosVO_aSG-lQ7ikQ>wzonncp>`&Ou9eGwj11T8S3v{GyCVLDKgg
z3^-Sjrx@<zg3X1wdt?hqSV!R+1}ZT`7(;HHx9Lm0`$uK<H<GvOL<cmB?x-D#<m>aQ
zI-mbNM=a;94Bk})WjZb;hyA7zDxeX~NuNoxmhUUHE~fKoSBf>xr-zpPt`g9v8b+o#
z68JhKEwbh`4I)|BJ8J)zZlGGvx*&u>$|@F;hQr*DLQ)I2#>kNzBq``2Iaf#$o7Ej4
zt{&X*$aqDd^G1~&6e+G;!H|kXCte*nVNell*_Vcwt0OKZPCLK2`g2tP>Z>vcqVZ+5
zIY~3Vb0#jLFl6fPJ>kyJEC-^}isrAjB-g+QpWp$}Kc~CNK(faTm!hCoGqqnd9&ayu
zr>&kN{y>lO{pV7M4H@e~PHLId`IJs0g#)XMg$zV?h0mSJArS{kCz}Gm3v5t<)fj)1
zlUGzzbBb&~3MuNab+3rcctv_wI7{Nqn&@PmqT8eYAXZ{nY<0_`JYn{V>mnq8%PL6o
zval)33fqu|{`3=a-St>ft}UNDE($wb9K<mB)W??uIdS4HMYAux7G#fHWR6*W*y4v#
z*aO!Z#HKE@6hY|q;I1AGBq{LhsghMj;_yK*5|6IcUL+Czijm+8Gz@5C_k58~+EsYo
z5U446f?;`b;V==4{qjbdK5ofG=x3eVx{TUi)F~NI2w~Nejie%lJjbORd)38s<S?`f
z1BtdFB0I4Y1QDQMOG4H}No9;;#%|$8u*X`t{<~M*p>P($5nDFOdXuaq`@}Jvg27ca
zJn!ENraO`w%vXpk+%0OQNo27yR!ef;0Ks|&=J;rmhmxurV*FvEK`LUe;nNf?l2KbV
ztB@|C;HotiN~@w6<nJsr1gk2k8$7xyfV9+}g{v`ZAtJKzVa&}xw*5S0H45+Vi1j1U
zLGtGyt*1c}c2OEiq+>+xT_J7X55qB6*^;->O$k4-&JFXg{&wxuVXxvh|KRnBPcRv^
z+cj`smBK@8ZPRIvH8v%$&5ivx=yBSMv|_*AY3WVjw%DZ2&km*YP{?ZS*sZ6jc_}M1
zetY08Q~T3Hg54wZ;@bI^Q-$5Z1Kk>o2E}v}&XrRxS_z+er^kb><oLT{|2#-Ma9*PS
z5RLw$G@@a=<BEfAmg(JoA0bFYz7PGRu`v1L7%k)(`loRHQ7KA5u7L;RNpr-0*PN*P
zU?%ok)v~NoX7aYt!D#b-NaO$GMGMiuitN=67MXrABa`HQCTVuqZ)1%LM+W<5x$2+w
z<_63R+%|meAC+q%u~w+}&)Q4_28OP^A#h<f{VpZs8_Z3Z7%?XeH2*lqp1r))%70cv
z{g2AL5RLqwrBrO2AvJOH&=+ZL{#0~=R$hD6Sz41GjicnIel5jEm`Z!BOruHC2g5&7
zATW2`$)Df0D4jH{3I`bE(fz{si>pQ+brOd0V{j0utu#z3jF1hwKKi{T37B0n0Zw}z
z{7Oys@69=OG9kkJ2rGy^Kq8o&`qEUMaM-Rs*JG9jef7;ABW=zO3UFTJo#Ak|?@R9s
zHpS~-USDjkohx>yhw2Su4@1SAY#aGCAFfiS9_Uy$S+Kb3b>ZY_@2j}DI4l>y>eP{B
zGTHv7y5TMK0v{5uy>GuyZ~72PI7>QLLE?4J&zH;K6hxF6@se1aX!fdClAc^~1gC*K
zizbUtHk&_Xb%)X;nU_tFa&q$ehli$F5l>bdU`-HOKNZPys9rM<j<t@02lxc~BXBRo
zF8AY8I^OG0;w;UZG9kb!XI0PY;^EvDM~WN8?4nfLp39|?nTXvPe;dDRgVAQv7i?or
z41yWnd^cs0Qh8y<sc-smb1)m2jWYpi+JU54P4^LIO-y1s)#Csi`v|Zxd`SJlQ%Gf9
zf+<p6Dg9mkXsvq3kCe*(;VFb?<kZlIo#=wmAN47XS~frAR!`+a(AQ-)8pN8tX0Ez0
zn5~ZsRI8gz37g?W*adRF8%2ifK`LNPHx|sOI}bh4ehSFwENOlOdIt><8>+C&Pnn$p
z9*lONxTrI#G0PjRm8-O%SY;rgs(33ugJceg%oQv!A?p)>S>ieZ##Ltv0Yt3@6g)2e
z`K>yG!@feO`vZbIm?Qw|W)@*JXoBH92@cETz(w=M0=Kz_g2sl1cEF4~e~kRsg<Pyv
zE+E$a6kAd^T#7R>rIp`k21>BK6{~lgAo`iHSU|C(k4Sn<gP=N6w(As-Y;*t!I4&#$
z=qUD`&_Vgxl%gL=y6;z->swTW^bcifB>MzJ3nxGcyX*wmNxlw8QMgkk1U1r$+M9D~
zW~a6^`>1HqmAa|-4*?iBVYUz7`Jr?49fB6jc}S(T+4{~gavxp_2r7@yJme=~e?i$%
z{g4_A6nztTH=^`&j@^JgWH_g@4*|3Jqp`8E$pHoihLf(-zqq>d#O#qQEqcI(%Gjm$
zt}w_iSc<|%*aDkUlgjKhb0SAk?@{rgN0S+<8GCc{A!+F|kfk{4d9%tshFZ!$PT0%l
z&j=gWCGN^jTEA+16JtQ$n!ByYke}%XpV<xuYFx*P(Nk;f<<>fY)No`XeJQ9z_<Fc=
zPk7D|k|z>(mYf}&gqqeL5P;rr+xJx_WEVp3dg~zD^OgCBncm!a&>n%Ib-?*wg@o@W
za+d%_4YQl&K(sH(BHZJO5v5F^75k|`L&x)aD#-5Ks<ttMkXB6+W_#JSpZAd4+wel@
z0tKx+DYHeVBr|PMjLMeoZdV0rlCA;6#XOO3K(9YZ<6o?Afn<Z8`V<2{O-PyRhcx${
z=?-rY8hu3L8wif${S4edWG_(J$WtD|XLMO6faeX?0W#Oo!B9kZlwevgGlrN0#r_Ig
z^48r#9fszee-TVdyn|t&0xl+jG05@ZGMENl`KJJuMF+*qwsX%OLZ<#y<JfJyJH22r
zl53&u29*pxK(<4pUI9>66+hg5_<MnDn79~{Bv&&#14*R^m1D1NN5)rVl?=n_LDiaX
z{n$a$su$`y5D2n>tUp_$;l~{D4@<p|nxzcAr<D*zxKZXBV-oWB!)&+nvkHSH4q$um
zJ_g`Tex563kL~yt8Rmgt=0?N3w%LvSs82Ggj!;|U1Y`dRXnD%q6IQk1*-lux<Tkt{
z-N8I&fkl}6pB*0JKda6TL7p6MD9ABxKe8~kL%hHP!3}-}D}w}~%Kc9sM&QL<-CFi-
z_YjT%aW9mBP$*ceeG;zD<w1f8SO;WzdCDjzJUP~<J~wliY2rfM{S2my3PGYuPOX@Y
zdp0Ylk%$1%{BRhT+gs)8Nxn{1W_B1I`1)3np8XK@=>b@_>L4S*)_yj@bu^}dAP>ma
zzED9}`PEu;Ca&f?I~;JE9!dhMJ!y7W(&H(D%Nzo1Th7bLVDdc{B^4jo6O)#ztt4;T
zX85)cQcUR}5z@Uo%uO81t=1AB)%ot}YA3x4ntBGBx@n7g4Efiq9M&5UufP@8#Q*W4
zD;HqyzPyGeH+Cema^A%>2BoRbZ20p<n}D#5gU>kb&weNzxbxGi<oxj>g@d3W_wL6j
ztx{g{4kp76U%%s~${#Oc2Zo@ys>HNabXDXXL<xXAjWzoIe9=7+!tVX)%H)=8#ca%*
zm6G$Yxp9xt{NqJY&tP+N=6x+YnI$4wjOTYb_2Ad1Sfp|M@uJ;;eO5NCQzMBEkxL2b
zDKkCDQy|{<j~D$_Dr6}CDiuiKf1px5lgGePTDzGZ{KKDpje=qKq93oC+W%oYv1jwo
zp3kqeimK%$7@(%KvBf#A)sem*(SR^g`4~+4J?6!Fqzq0b<TBpO*`%ddoVF^;*TV`E
zR`7HV4e8uZhdM+yX5>}^(+<F^ClY5FNWNAuhntHwMmduk#%<egt7I&v7a)C81Nf53
z&>D?prnLpj@fwK3S9?{4VAdL*hOSyNnVG<DI+vd&#bjLpHgdJLHZ<7@2Bj3NMaOZ+
z_(@I(P?x|3JQot2O>Qbtk+5lNCJpmo_CO!iMc4R|9AzObWdr-Dh)N~%eFI@l5O`&9
zoK$X9+e(19kDtj(f%MYdfCp2y|D{IuP|O#2rZlgVJE=IYfH*H5jWi_}XB7ZaZ3IcK
zLqRwITxZ%sC_hfRy+dt+uXk-D%U{Ik!&-vtQI#gQ)x!{p!+kn%g=EW$x(8C)&6r<H
z_Rw%Xc;=U<mk*Py?Yopf@Lx;cE|ol56rnYQ$eOp1T(`{-VSf9#&U{r!;n{5v()xeY
zNgE$d2Yu&ttR-KqG0q^Rk&_#hNG??X&>H&Qg9IBgDIJ5=K8^3ySJESsbFewvV`KT9
zByxdBAq2Sg?beZw+h6tkf1;ju)FWk<umFn&?60c+Rn_^a`qz^FwWK5~!QX}c??S)0
z;QrrO=+7XIa&c#=w_QU$oOO2~@!Jr)A4#8zMOubD7~;QGN6MGOSwrNMFd0}L6t71d
zoP=@JoE;>~{O(<bX!MiOUVLQ3g#St1>%~v$-u9E7A0Ru}a3Q$U%2k>4_)6yih3+R0
zcc3PYh)#t=6hkvJc^mnl)dx2?2ulQwq>!DViR`+dMJ?+1v*+szIO*8Sp%O1+B0<0U
zP9#&S+9Gu`Noy(7Ii7X(-v8vu#00_JR5GUwj^cvhT~-3yh@_5@IEGJK@CW~UVvdA0
zc@T&Q84bR=9@!+`BJ(E-T8U|MBmnwtQ=WdN(HVM@XCbG3<={C6d~P5KS0cXC%+$E2
zQj9YHIUbwuU;R&9JEQ1yb%~L0>GA(59@KN_8D&Iuge+PLNu7UAnYoHC8O7`dp5`>}
zpK~O3<Ap+aJ$?%<qv$Cwu>&OEBP~UVgTFcv{3Mzr!gA~-iZ_9NEU;}y%QiK#10&33
z5STQ-9FC-u&kQgg9v=RuEZR+ddidYxeAM4gIsl+W0-y^SmvW#U>+;K$_Fe_{b{aS#
z@S{8Laeywxpt6N%vxnrd^X_mnY@zzZ(JG{kK&576d_uUEq<0{En%95SCFP_Rmo7hg
z;R9+M{OBBfV+~2aqB0Bnftm!sZJ%pvl`Vw&NVOA&${F@8BVtT7BzmR5gVA(pPQ=sr
z{n${om81idR4)*WP{Ze&Bu7t}+)YpS^Rpvm4HO)-c$>C)dy~8#kv0~jeVhqHLGpD}
zVBzAan21S^e3*EWX>)c+X!okxYpC#17}(lMz`3p5dqswsT5)QuFQ+>G(^I;g7ccZK
z>CTcMhaqZ-n|peN^b~}8{eFk$K_JIJBveVVbD4KOJE7QF;2hoC6;k~*m>rvb{IGei
z#jEk|C7PrR2na(8*yoRr+VhiCvO<NfZ)j+^J~KJm$oUz4e?(L$jYOQtgopzX^po*Z
zr(V#3K|BdZb0%6YKsEolrKRQHGL2QBr)_mg06U3)EJRkBl8qW{m&kcf#=II+c<B?s
zghg2IZd~gBlB}9xr`ESs4^JweLR-r8(d@RNj!%-3k{d}+K#{xP018eewn)Dvk@pLE
z9rgK%F^p4{B>5g_%3VRnCk$fGuBbSrCFdY8j(mD{@a*^9jyQ9V2XFXDeuZ}=u7eDv
zMzV5q$M^IQ3fy9WnM?U2gt2ws-K9PrU$Sb4x(1^Od$Rs${}EDq_AD8-BULU)ATGTZ
zs&pKpzRp0_LQuXWFhw5&{ce)nb?jwf*P>~dvs*2+k3ibG$4sA{Wak%tm!ecLzSKJb
zQHQph<m$kV08>8r^OFEmP%Z0)g@vzwjk=`88$5pct>@wvXK}M@3<Xxc*OCqj`R_=J
zP~I>QtZR>xZT!h*B61Z3A;cCRy1E;SV!*J1;>l8|{gJ2~ioL}+k#F*sRA^b6vS3+)
zC{tGo5&Yg}_~y3C_<#I<`7xA%aF8!RZAb=L{q1EiWc|N&mi)nd5ZN%<QDsT4T&slv
zQH6CY@BaNgRND+ML(J$%IPuR{0r+6!f4X~R5UUVgp^GRiiJGnaxZEs=#?bzkky`9;
z_(hmg-nV?ot@LVPfC4f8hsQ6!=dS|&RiG99P&j}=KrsMRr)U2KC*)5N(wCQ3A>@u8
z%z#yD9`&?vdSavlAr6WnAk-TcKrqa29lvUQ|9#&3>_BkjqQI490qo=-0cukFX1pcO
z=@Y;-bU!@YZc%l5<Ht~^epNPl9C_4kEkzD2-pwu|93&IzSYqM7=bl{uHQ1&p{A(<u
zfMMrRIZksGsny0&4cOL9d~$n99$9r6vscUhU<(j1*uHH8zVxS1bmz?*o2*l-rj&vV
z%)67_5u}*Rz+DId)W0eTTAH><c?=o`;ndQQ-)!<ih54_eB(5w93=ichlY$Balg`KA
zw^FF}7U~*Ux_fj!XGl5=!-hJDe7(>65XsjU)WdUb<Dtq4KO>yp{;C9yM&y(koJnT!
z={DL=z!1G1$Tmc58)|`1m2vZ4uGkqty|Nb#6F~wdkAcPh)66)53Fyk)p*on*`KSA~
zL&1;%xp8T^Uo!)4ObDPsbh76aLJ_o&&rpGeUVw80E#m=%s+PXt{cm6~6<9JvNNQw2
z{{0PxT+_P5gIvHS<uC3sY3stB*?Rleca?%{C7r<7JRm!~4vb4fK;*-x<vKxZU<BBY
zGB6#lJE>ppZx36u^oH?*+1`TolEa=&ZK4L{{@Xi>-5dLb5Zq9(`8#UxkhC`67c_0!
zhg2g&*fe}{<@r`&J@<o?Aiz!(C0Q4wSf1$(LW!X-zdDaf&;jAy6GRnLjjG#!R?B-r
ziVGKj!1v-ctYpPx7*3h%O!r7iMBfVu^3?SZVub=VxjuH5Ex;O`(kygDJ{~yA?+gJg
zFFL6E$Z6!%9qlGfLFgnT<PJ+UfxZm+BslBkOtruq)BMg+;~q{&(iB)jo_Rq8o8<zj
zv!=j_iM@^Q<j+TVW5uq{)Fim>W}2$|l-<gCH8Fh5u@xt|qhfa4ZO-n1^I%b0m=}Iy
zWr&C%7hnRBdFGc^x-)LCwzEaBO|5EL<Rz#b*3^&{<as1`2Dqg1YEJAquD0v_1KNZc
zSAhPVA0A*7bNpI46;m)7(@>oT*Bujw3OhIFWI~_yHD;J4ZHQGPZ*mn5G}m;4oe;^_
z!v~)!4Nf9FU}KbM%Omo6EtXiIt^YOIo8XDi*w1EsekJ({;g<#gg!Sih7La{V>n4Ig
zrH)EWT<l|Oz)23z)E(F67B%|vdNM;Nqv=QG?2pPr4wn2v@tN(9{Y4!t{m^27NJ|^2
zFctS|0J8FIXBuhJV;opY!;$*E6f_qfk<7N&+|SMpP6F4VFhYImm^9wvWOe`$X^ot-
zqj+skpiUJVJ*w;Njo@DzI35Wym2x~jD&wy(H=dr~mJPrV*zCs)@LG13_yZcz51kKn
z;*x+&<*VBiFg7xU0To|*bT2}yIMbL^s+e?9V)ceJ{3KWtT{%+VVfYOS)diBjzNxq%
zPXf>hSpdYtRK*l)7SDQ=rMCcB%kh<NQ;JEAo3y;Wy;bN4z`BxV^Zx#pC%__AfaMPZ
z?8t@ux$KEw#e(_Uks?aUHZBubZ4ZcM;9cE-V0v#U+WR*TSQjKz92`t)cS<RfskACj
zbv*SKG}F>#ci3-(9f-;gK@wM<ba+bS&sq($w*ir8-0Z;lJNT4JaS>C%=rv@UtWPKv
z)Ex%|&}6>bh@jg}gxo*B<{yM^@8oP7)9lMJr(v<WPW9CPWr=J&V2(m+ma*ED9DMKr
zp`QJrVA7mz^EtzeFezl_R>l|C%AZyE@&V(17y{!NHNcm7yly$}`Do|S^RS2Mi$rMP
zEJkZo{EWXhkfdiJ4sAW`+*4B0NUPNe?I}5AkC6X06NBPJoZ9)@9Hw?AA+|{KIIeZH
z&nd*N#lZsXer-aUMMp8jYZ6>K_^EGs<M$sWgxjq4-rV60^j4O8NR|_po$v-RsGOh+
zSt2%X0VLxZ*+xXjjuG&U2@a5Vs+&axnT$3hTr6;#!MiaaY|On=+JF;6ka+emRoWZS
z3GRsTO)CJdw0Zccm#l^eiIPJ@nAJhc((`Re)<Nb-3L_XJ4hJBUXZoZlFSffhL(vTC
zRXBb^$QRpR0gv9+_36%1V|@fISzdeD$M#S^yd%3e{>c-rY{MXa7J`2s(0I15WBP@X
zi;UnTnevG7(uOy>8oVOSmEzfT;jq1CwzUxr#9HptGBHAN$%labo2SxGn0%r=ef>z=
zw=oYWrNJ`RY)6<3^Bx6wVDy|wU+d@_Kdr}zp#Zjepi!R)Llf+03g)H?_JJLy+0S<j
zFl-?&GgqPTQCYC5A%+6*c4ai`JW#{3)p=tLF<5PHlx0uV%Pj%G_PQezF~sv&EiR}>
zhG3ro80Ha`Ab-2OCPu}MS|y`t8W_xNf~~(?91TMNX%MAKm=nRZA`@>Y$<rlPc>tK;
z8c%ik>+b(v{o6|TA7_^<fTBMkjBjsZ0Fox?A<~~2VjCzb6qvim9#Rd{teBv>-aeii
z7y9N9hQiF@GLBv+XP^RX=#O!c0Pfk3$Vym?XGu8TGJGLFC!2`Icn=h?5-=`P&z(JO
zfS)Wa%v1P#ls$l7(}lYcJRE}d8p<Ef<MvWvX0oljCel|;O|CDuP)LScaa=IzUiA@W
zKM<!NFu)vZYsfYH`mPj7R+?~ufEyB(v4^4u9kWvd33(BApwDSmUXg0v^BCc=0dPms
z#wOcvZt|`{U`U)n=LV7uk}ILsqWV<dv$I>@iXG1(qW`b<&ODmRH;VTS8idkh3Z+63
zQl?1GuYQeE8mNp(gB)XJN=SuDD58+kBr}yM4vLUUG#D~ZO=L=@GTiUuH+avf^}Bc7
zweDJX-K)Q>w{za}zR$3q{p{znzuUvF2C+!Pw33et973$WU2X};BLo?ds3tRiu+vi}
zgU<;eog>u&HNI7PJ%3CRnnmiw#MCsFJN@_`p=zzN_wh-kpI;x+e=1Wtow&HJxp0=e
zR;-|ukaL|ed11{?jq4LN3=)G+Y_)k2f8<5C@4eN2rQDfNY`YZg0FF1gV&D*JCZv1`
z3r_7Ha2e`xX;^-vZ!>|t6Knje6dYzKupjrkXXbZr2Pqj*o*{nSeqSXDQs}RKweZ=%
z2N@TBGPAGHZ`hU}PIU5K_2Y9H;O2(DPd9ubcHGOwpz4w?y`f~i&)oc5OG!aq__o{o
zpbN;gRW`z*JmIjc?bkA38SAr~uTkPuWRn{yfrvs=W7mjh6MBlsES5K&dwr0f6bB@Z
z%%ft3e7|s1I7)74gB4D8JlDux5$DuGPvo|F(Lnse=9?`^JchRK4<uSX&lrd(Nxl($
z!m#_~_^IIo*Umg4J`@<^V^?$rQI=*p;P>jg@>?iT4`M&Huv^!*ZaPl699qGq02d6>
z|Mo2#D^OnLLAO8+K;rUb9jvo$7Eue>^@vFE=h8SsQcl8)I9Sg3W9_%}a<UD9UMUZ;
zPV^yz`}@|a_G)CT`3V;^v!dyk2(z`({2kOFx9H-Dt=*S*Y>7xuvPb2Y4pKbnp5zr7
zL*cx+E+-hx`fZU;6(y`OzONuYnx;~h2imy_zD38LT@*<ylJD!LfBVs}&Zp5o5994z
zqfIvJ`o+5ArX3ieV+L;gy<{lt?;rp&$v$wb`Ds8ze7Rz*sRs#|kl_zbiW@|&bV{de
z@imXTZgpzf(Saf@DJ6&6s?&#^B$Col(W26qOFuGLGf-8NBjO=AgGAm5{C?N&%{<DY
zgBzP5Hd`JY+C`o8&+b1){x1nd=}T-LSN!Dkq`W$hxxz%v|JHgR;rozrw(sxAYd{&H
zh!96Y(gXf)fCS(RYr9PpGMx;DK42>!5AjGaHKN$Kq3>IVW)b&ujidNYvu{#m5KLDm
zK9^!b?l0EzOl`3~B}y^Ib;{&Tw}pfMVF99d-=80gIeJx|u1%`@s^<~s9|sK!f%aeH
zlil~{cRb&?`8q6VQeP0AdfS!ouf!#pMn`1YKL%Uo2FcUO4z2@=`QklY5|kJ(X}wv0
zAfA0@aRh}5XKvJC6QhS1`B7Ej9Oj@d%Lg^tur^K)yR(jY5L4flJQbqH1zq;DDA8l`
zu*^n{HXSx??Nwg^$_8*`JO9repCw`^QQELg@R#pX@3<DbdLE-qrYtSS{EPESu`uZQ
zthZ*q%n}nEZLHzu8Eve;#^V2DBa1eh4J5qUyNXJep@FEP2g1ygkM;#~^6s1ncgj><
z6h!k8R&5ehgZCtBRr~l>VyFT0T-J1eQTSjvb_``=CFs;5k<W{$8ECuLCKkW7Vo5}b
zcf@S0I{J`_?To-iW5?|TmSaW}D265;_%2O}J&kc4LI}-9e8e{%wqc_TCKaq?8)Yy@
z8O(o|w^2lxYAi+(;Za1G;!=(h?4tzx-<EPT!Q%hR1dF}8;{YFC0K?ci>kyJ~JA!Na
z!)&X2I~s0V)?}#Yfu+0woG)ujj}2=M|L=0@CB)(h?d2Yxwq_ZQ{=ie_wk}Ehr|R0j
zyIbUZ22GDPr+VQRC;v^F`AOs=a%lQ%FtDE_eJtnjr>tk1t6dxO-yxKLw`9+^pt{D5
zI+G~YjripkW#FV|e7s53gxe%`@ys0pEE4GRO<*bKy)2WN%)b4_RP4WPw$Gs)H`79+
z9+%xWHU<nA5s$;Wd5DK!e{P^a4VjS5`5Qn+VTTTjQ*UUtV=wZOWx;!f+5F9WnjN3Z
zLRO9)_XV+tw~C1isPB+811DDWA2!(yc|Jbg-cwU4yA9G3w5bS~uMj-%OWnKNpb5my
zF?fKASINm&rmbg2yJ(?o*n`aA+{Qt<Rioiv1fGc$MwRqCy1QNQtJTyO;GS0`g)dnc
z*-};PCja@ptws$MauRiCk!sp{HpN<Li6!%yze$_th(74fjs2(lhxaT&kI%5r+=pOk
z+THt5meaBN74-uKRPae}C-rcPeS$D5rXsrjOxjMtzP)xs$uBYEe#DD9Rp$6p0y-Eb
zRUib(f$r89|BGK}REhBu-&%e572}}&m6U7LO&GdIXFhNQ_B2n*_|ViM`fa%<$Bq}U
z7`o%P<vb{5g%cSwse>FX;97!VtlQ8UNoo9@yF=5{RfSQx$ej&o=5wE7oXz=-j7u}N
zsWA`j4p7=xLZayP>*;_d%))=7Fv~h!UEMXGl$kL$gbzcom9*)OrMv}X`vjb-+Miuo
zV2q(TTkDMib<|DfUPMF=cXw{5X0nM+=Ua8ceu++kiAR-vfz?L$7%s}AApCg--<e@t
zLe*|?m<c1W?!p{Ji?jn>U*3xQiW{dNRI>EjF^&26%9235g-}Dgt~`^Yb(5u$Kc7}5
z>BGNV%-8Cd(C;h=Y3W>85kXNg0^a$zC3#rz3YqY;^J{)NHkCy~=3MoYp!5yE!EOV^
zYm9?R4)cHg8DM=n*Y-qEwcLE!7uJ~$4GtKC($rV(465_x(qTTtgV4hhx@c}RRnzU8
z#8Mm%TwQVW;FWJYOLlYP?7onm-$dnsi4jQxd1#|cv2?Uhw{c*Ky>Y5gDoNt+(Rz;_
zxXpef$J~nZ;#Ej|VcSC{le>6ILSkaEq2fxMKz{!Gd4si`E@gHMPs`u(q*K|FPE@t~
zimKmAPY>Z``6bPzc)KSP!}KUa;JJ7_q$No#7sI3PU=*|TV>m=#5|tm8P^&_&80JG4
zZp|=&?N|}J{V-)(kk^0Qjjn2C8fJ~Oy`V@h^<QC&XN}~C+T-o(3pVFa?s8dkFFA4s
zAf5Fpfdz|-B!K6r@)$aDRCx@IFe+z<wR2R?4h!`Eo}As|qzTo)#K>832i10C)&DiG
zdr<~$My@KLDK7ap;9klQnKrI*>M{S^KmXb>fE#%zt;9|An)IByimgB2^x(Pq)!h__
z)QSyHCe6J7PDj-lMcjzPQIEpFbdwkscJntUO*{Xrg@lYJsqmXt+49(RJiXa{s*u3#
zbzbLZZc(~vwRH!Mx^H*fbNO+vtGZBas6VAOwb}Old`k|3(V<qTdVl{XH6Nn+Wl;T(
z7FhLjE!MT%9ElAUZ}(6RziB+<noHw+aE!Y$qdPG!2Xd>)sejHWYXN}N0eziZqFGij
zgXkKOGOzWgIHb>_vhNblWjkt){w)qv(xWjrq&C=~wmox;ouxg1hNn<$ZNNz&55rb5
zqnT+<qa|Z3r}<apQ2b((owaVf24_(vEnR<$8Vc{#qc~dhP!h1P-QXftcD>v28s5~y
zg9YWLKS}jwS);5om1<*uK9`Jp8v4iSdZGl@)lTHl22o}0!M51S8$I=&G9?CF>3!`_
z>8<O-r~<{4xy#m~A*4^OplA`ew$4;SmBW1nLGoQ3gTN8blPb*>QhQM~)EewU9vG$f
zAFiSPZfy5=lWz;nuKN(#B|x(ETL&Ar+BW;YaA=i^Xh9~1&w&>K#m{FkK04_%F`eiB
zE}i1Df-5;8hA?b%X~mxQ)V`-KeOFx0$(*R;&~Fl4ab$ms%5BH5_tz&Ex2@Kn;H@^;
zn*!x2Q?)4uRm|>_Cdqz!FBVA=?h>6*R#hu6vcW)grFH#NX#rA_bMy6;kN#YpW4g9o
zC)y$6pVXK3p|Z5jMUs&!(`tE37n+{jmVZX%fl^b-pCaBrl?C#wJ(rrwEtjs%nE39(
zzFSYE1ST#>ne*=Ci||W}5`Jc!6x^maM%+Dop2WJ-92N~J%RiM|bN)^j2x^V|SRD1^
z-HH|Wo2wj~s|(|Ln;l#KstsI3p0<LKK%pG=@=v(~{V_OSq@MOthi{FM{CSpMkk_-}
zkIlL*fc;eU1d3AwU-$tWmuJO^3L{r-q%Vrud#U^6mFLFCeX)6e;EEeYu<iY2b6xta
z!N?xJs$j13CJgCMBCl>c_Oz<U^kJNe4cvgLRQv1obKQz}8LIYK&{rDR<;>otSFdE~
z+;`xcSNV#gAJ2eIJ$T<-4V0M`NEr_lsg>hbF}U9b_HyoZh|+PN{)EbuN_$-HZ3&6Y
zb--t`0sitE9RG4R9*Q3BG0D}+dk)b{!LSGm2nb}o3|yX#qxEb8;GH356mMc{Z{#VJ
zG#QSOZap{Bu+9uHXOVP4nIq;?Q=rPfPM_;3jyqfi3km_Rg`7F#wTp9saeR6IkJ{7I
zvY;s*aAH!@-DZ#*>>c6tTH1RK+;1Y-of3drzuJVmQQ#3y2i;cwhXttfRhMQbFxHPg
z5Y1b%-k>9-^q@3pRLyWO3N}w-Ck%>K8ejC5Yf7?DEvvkRR22GJ!h$WPbo@+Qu<pWS
z09?9__lY*;n~bBMUWvFrKXl#UU@wHb9&Shzva6IO>VQTc{C8Pcw}Rj}#0OcGoFn%_
zY^Q*<+z31{Dke$iDumN!Y?o=B!Qkni8~uT5KSCSfN&04v2^f)0nO8_CPg16o57VW1
znHrd-_&1v*wOVt8uE{bWXV06BI?6lV2MUaTN2;;Eu>jx4wN-84*jW+d&SvyF7hq49
z89}-CUxDM@FW#im#+W%3=78J9lCf{MKwC~TOtVw(m#r>A-|5HYry1mV&AGysZ9U2S
zt>mgt06Ex~AAGsnY%$UOvI5)T!Izb*pW>NT-wRx;Yra}uk$gA0#-t6{S3Jt|iMgnu
z<Zphu{n{%jeo5AoEIM7;;v5D`4_Z(hWCTe-mjr2Rx{M5OHs#IpJZrn4vpIQ^&@s(o
zLEOGLzC?#E+EcG+=-Acd&lUte=m;=yj>z)dqHlz=`(Utgu;<$gnK;v#*=48}yX2AG
z_t)z^)Me7;mSpyS^H-3aWu7`yPPkbG8g`96SVdMapend=uLG3dw13D=GibQ~c<aiJ
zKW99s2CC(GHE)bU>m9E;E3;&PM%r!(va`JC{ZLyICahuO*;>3kE%ET@pC}?ON_eE0
z3v=z>4_4I-=-<_-ygz46W!NskIO}^>dTVyXg|)E733YGHdCdIg$XKIKJ->Gf<!Ztb
zAjSedg;nOsRguEnr}L$fE0;au%`QQ`#idV;Pa0y>t<CTNJ9k5@M%p~}t(>rypwAL-
zLh$>aSdaYIh<xutK0SrjiRM3wNP$Em@anJI@iQFt(~~1VCPLg#G|i%+_|3NL4(I+=
z^tYA~xrcHs(-m5k%>}KXE>{N)SiDR{6#z#glnahI{t?>lvz$@yUAtA|>Qa&S_bcpk
zEDO3@tJ1#Nt+eQie)uX<z!+qt5i%+A)A?w5#(uh~Y;l8kDI^ktMVDaQ*2czCQW(RJ
zx-L0Y3Z`@66O*@+!Xar8Twj=`QM%_)NaH=o3aMT@`f7#L`J9kDTwGlKHT8D88<O=J
zYzpK{&k%jSzP8?$clHU^>ZKUbGi~9(G~R<DRL}#P<|$X<{IkWc?M^P;`bjgS__Dx}
z@A@)lO%b2FLgv0Pf_9M#1_ZP_JN9sAv!_f(xU#JS^o;5V#VO}mbE5?Uh*wcj_DCBb
zR^9!RtSikObNLS~%*-3}6STMGYwXHD4b9VFN}yy5fc$aSDT@DfQJ1`8fGuOo^_45k
z06W*AcD2@w6a@oWIkPXOoI-P4=*O>Fyj_voa7wY}L}$)%#F#wl083AW7_R000_2tm
zlDJc2a?XUfj^r&N%342&A@?tO*cYl%ks}mw@7j?~o`nNQV9|g7S_M%w;v_^z>i2Ik
z@6Y`zK##s8xTEq|rCp7a;)>(FSP8cLcp8#u9U`TFs<d)&$8uT9)Pr0Q^eltM-V&j>
zE}!|xJHS|GJWxrr4nQbii+AAw@&<aWz%z>_H@toV_^x}q)cz~_w(K3caRqjsL;<!A
zWw~E%_j<kkcJ%4eFxMvEQ>99ua@!cvi=UZaJ6dTo$L=FA!_1go`9U#zbGtqpCYH!m
znHM7V4r3`Tj3ETR+=FWl&%Uc=-_O^W>pOh#U-P!3p=WEb3rCWC1wzU0=$y<q(RbUD
zk`&gxt$8@rAb4UVY-fjK5)@qLW`TX@MRfSmR|~H+6Erwt1s|@6t50Vn-K{UDgw)7@
z#rA(~WLV-zcP_e0<9X5yc}!jdnB4O(F~oQYIN8`1w*I3xvX#Lqr)Zq&&>dbK?iLdK
z(*v_GUc*?<{Mx5ldEwh90K<v7(i!!Nrbvz{YWK1{v4DM_3*>AU_DqOtk<Rp{m;>a(
ze!^OyEED==WG*dwKr659g6FoQZ0~8V$g}%C3uKqtJGD91aCp^Ns=|rR#;X2W?A3Z<
zhlO8JJJ^4fwc=;24sXL`gA`uT+m+#2E(Q3yKT>wLxrxbSLfY*NOs!3<5-nv0)}}x}
zP)bu)YNU<<?KRt*iE(N_Ui)NWrQ2pi1P^+F7he7P=0a&^|BK8s$p4Y)YvWs#%dOzd
zo0vGfs+6JBMHXOUK189n9nPq-t#AWopH!mC0HJK;ucgfnZy?GvIImVd=;_JSj#SXI
zSz{*&YRX$ap+||9{(GW6{KqC}bV>bjNowR-Y3anRvP`LSw?Yb-mec@tDV(yEW{mpR
zSp*^c&<06SNun*PH-A?{M-~*Jcc1;E873k5=^?qfW^nS2FqEf~TG`*6-i7TFK3%Gm
zH+{jnond|88pT9x-m16j#0_P=WEPlu0FL!w#9=DUzHIi{8g-{SH(m*21DAoWI)IyI
zUHs5$;5t2#E7H{NTfVplhjdD4lL6;GJGgF<j%1N8JN8yX_e3r>Z@fUPJr*iCVTJWm
zNNdY*Ou_mi(b6DYGD+Be-r=|XKd@u*wakJckY^x)<R%0&wKN_~HE3w9@>%s`kwb2K
zT{bWK@i}t`L)uP=Ll0D@uM<+tQwf-Rz@TG89Tb}rzV)D4rfcsua*K2zvZf-=3z9o@
zP~XPWnG^%Ry6nKFZxzUl5okq`bZvSBlXMz_t-M&e(sw>zITFN#F2Z3ezAI{&TT6i~
z<!oYnf1_RpT~kgHo!1IR&Vc+im%+Z9JML}q{EiV|o(&m|sMfqelAR>Fw;zl5nw9%{
z;613Hdb!b0%&knuK+0*oLhEFr9vpmPNZvi@z6+-@ytCe=j@@6ainVsRVHr4a>Cw~2
zJ=h&-wC2^?LXO-sp|2?_F7i~NXv-V()asP%q%W=2VdE9K41#@1<v6{aVjq1BvcjfT
zJGBsEs`pJe?Hi`a?j1x0`A5$xgIo7{E0DA`_8L=lisz1f^eyb_JazUoj|5CHJHgqT
zeg^ViEYXc;SZYz3vgxFNPr4KK%4;}>_Fd)AE(qII=_FuxTzYY|6Ks2w_gsr|HTTo9
zv<oUb=h&SgNo#dijlCm|zXIUv8*)oZ-+HQ-Q$w5DV296|{&i-Zk`p8K(`;kduF5i;
z+R}S?-s=n4JxAdD;TzRgqOT(QiWyb)rz$aw5+oL?A`Kx6{I_Jb5JLLr;Jd|&w6613
zaW|e^)n9IzIgmARju|&eUa-O*cA*-2<dcz?d`Wn;jI*U366tIP1A>^8lP7cUCX1TF
zh!vc7agHnBs$6axoS@|2f2`rdZ*gru=fcdNRc-EMww>!-_37HtEad8EMMlMzH~p|c
zqDZ^njs82uwiwJ7y#T}SZ%9Mh&5}n$IP*Jr&iLs92bY0nLl1v#orpKmu`Ar~$W~rR
zI}&UF8D9<CeA~8aRrcVaAElY@kX~O*^5OgszFy#d!&>ohLl;JlM_Esau4RmXO!u6n
zdub$(=~AT&X0&gg9(E>LEhHtYaq`D9>5_Tw(ePRO>$lWmp>Vk*hUAc;z?X`z#0PuY
zfRjfmbi9|H<)dPu&t!&aq1EtSx(xp497&4NoadTc<E-*j=$5);PLVad5>99ZAD^*>
z<TM#!XEcpSYD8ug+~t=MRuuFpGGAQe)u0b&_NHN`v$|jtvLB|&>X{4k#UTYv#68Jc
z+@zBZFGhdiJo9QX=cDq@y7CkK)l;hxo>|E7N;`4oSWZY7T%nDNQtvdQ9J>09BY6v4
zjcLkut(BWP&6D1Xu?O41WoxX4{S&NLv~QSgmzI0K!MZ@BO{R5EX8#9FvHCK~p%#hv
zH|>EdQ&_y*j^YO_<Grfp;%xsh@Q|;5c!yFY-~1#|PVbpy%jFxA)?r`8SDw4zO!L6`
zng}nIUB)RD5CC6m8zGIONd{6OHqwY`D*bt+fZ08ooPggdKe$YY;$4svxqo+hfBm5e
zn7L~mWx)OV5!$@FB;PT#mARW(Z^g|x{y#~uzlcQrpc$Ifm;cSfO*TmXpCtTmz=@#-
zd-%wvmF1oOdIfCne|vr8C+}cM31fRliQ4?`3#WFqu{Ih@8*PWN+A*$)d29VkV?Bwo
zV{I~){#cB0zj&<8$gpSmo#7|P@tMl+u8%yU>t*;cmrtgSxG^lhGyG`Ch7kJQ^^s?!
zp?Vd46Rm1w;rZP#!SI>-yXzy*I68cOH>^g+$LR3+&vE#Fa`@btP4c%6oP~$%j)eC+
zLffgq5i;NQ@&nemBAb%O6g7JExaj7ZY<Pcu6^J6wU>sjNVpYgwpb5bwG{>8SQBUHB
zWo#nv6*J8i+6qki57BAV>Q5IJVB&e7H)5$i7YgapQroOA>qe&N7}scQoc3%K@EDo?
zWU#&4f^|m4aVyoSqJd@Tn$Ps(V_bibjh_ow4E2`3ZU%>MeukSZ&R{%c;hOx8Nboj{
zvtP+PzJIf=op+JQm9N`sNc}njQq%CFtD5#w3oBeEH?6PNWiFsR3*Xndx?P7V@Y+R^
z>`I>pGL?_~jSEE!%|*ir@IG!}wq&%4xXj7T#RMY|!CSfGKbUr>P}m|&(zZztDFZe1
zDLy%ax8@DgO+%OBYS!g9t106l97+rHbnRCRrca)Hycz-w$xPC9^3Vz{Nf{$N5{w+~
zmu1NRIe5~9xke2#xx!w*y~5n@jo>7Le3Y+JIeAgsG$5rnfO&ax!}oc@F7r@@lGDkc
zO+4~(Bx`Z_=Xr2Bok`g@oGAO|PH1PTM}N^3e!}=a*HFjUJfg$zbv<Lm+>w^~i`IVk
zWC^927Ll7i%}GASG)5L(z}2O96Syf?n}`B&od0r5$~D|59`(MO%qd-TlIDfUB`<w4
zk9jF{<;0}npC>>OzZg62Fs>c`{+&g-z7I|R@%Ge%lre&&LzEPe&R_|-4eNj`zUxML
zdCfQshT}2U99Vwd4I)dHOcKPVBo1#qb|s5K_I~WX3&y<WrZ(ONC|qmYz9+*n=Z2N1
zZI)8Y3~j<qrK~%onEjh6L?L_gVyQDDBggeP6gNufA6RzWlb66hI?hzFNz)iZ*^8sL
zgZf11P7+?7c-Eb%?FDd6K-GzP(9>3-nft;|@1Zu+K78_=r{x!>n`-jms?nyyB9yD|
zqA(Z?Bo9?nuHi;|QFE-gV#MLV{!su{o31y;hsBrT$2?w6lVhVc9yzjo`}xc7B(m2F
zy`+p0*BQ8}y50$OrklEE;rkprHgQk@!B7JXkuIGTomBga_U6Fl>WS1*J8D8lO(?|%
z&KotMqbBrU!%X`>U_u);_9B#Y6aR`2tK!(Ut|M>m)^aZBejJ0oXz?l>xY857CNdk?
zlaC*Oc9vG5Zykxp(V&I{<xBQ-$DUUwib>TbA_x1iL4YMao7*H4Mti{aIMt;J)pE^n
z`st2{c5->F6c@C3XHJ|dSQa47d@O*L5W*tXvUhJbWeL@^aJ_Gw=l;j(i3i+^9v%I}
zZTH6Z%Nx;$vRo|K-{d>+Zo?Gin22^B$#w*TWQrAauLQJ1--2ECr47}$CoWGPirsvS
z5<W;Nokbfv;YUx~T%5Bn+w&gS{2IYRme|hZM${z_iaUOli?X^*nDwkFrOy7PRplR)
zSms|#)i;R578q*0q*OU`J;lD?rZV2xouhl-Zf4omS)1VWgdfqNmg;brO+;2w5dYbs
z(FRQ#c8HW1N6H>!S$5czNmUKm*mYFrhUQPm>?~kK<uPw?&P3`<+xD;=N<aj6H<sbN
z9|BBH#B5h-{nRtlR`rJJ&p$nB+Ejo=rn?M7sD=!$=<5ALZ2f|NSO9c|E#p2@O6$Do
z8;(}ClR!M+Uv&!GJKTWJBH%gRbCdlG@c66A&WjgSH@FKe*JIc{InVTSe<7j8@2FRw
z-KU|@W)>oa<zf6&A%{lP7|fe)%ThXs4D@(2tjjM-D&A7=xaiSi5>#EwtN%{S4PkZB
zr;kg1>2Q=C52`7@%1^o9gjs|L0l8bVJ<iNeLxKIi4@K7GuNO<ki`8_=Qp=T1SyU9z
z;JzM;!u8U7SP|Tdmgs|wW@BoFw^^E@Qh8h|2UAYUoD^jmzcdlNjoXr_wUn#SIwr&k
zMAMR$bA)WmM2JvaOwwD*wr}{+!qYKVyXBWsw|Scu6+y>oDMU3yV^84F+Nk!itLVG?
z`>AdW4$p{w^N*-Ne(FRW<GStkI=7iyS6ZKw3`CIgGq9ww+H@98o6uf-wZ62BTDtGq
zO&UFq3laEg9H6nUxb|o<<!y$?f)tXe(|M_OD*V!8U++1#FRi@J*Wai92PDRRw{=b+
zvfkvVNN4}mS0O4MDGN$uvk$v<F}ZhR$GyN_I`wO`Didt4H4fz|oj;c+HqV#v5InNs
z?9FJ_ZVsP|UK@D#d^5!ZUKC0LO1=jFil65~+q>S0wXZeRNIORO2U5(XCe8I@9#@{R
zd&8F9zkJ@1d_MBZd+O(xlFwso?46J#$ka`L)k(RVq)E97R2_#%R#2=GdrRa)vLJPI
zwKO7#^_Sw|kVoxeQvH?ZAvT|s%lu4^@RuLj;&rHjWp8kj<_MR+W!qb(5CuO6#Qa2`
zhQz^iJh$JU&Dy&!R-@6ETvlcJxtPgMi)^6wLb#@A%3LMe4p(JtR!WRsZw~x-`5fa~
z&%QZWLF2Oy<ADxy(*0#pjqcVy-Ty9(CWNh03*WBWl&1a53{ihub3VHH+<Ef|<`Ks-
zvyPE%tg)Wge^-7xhn5nRNV@x<KUgG-9{0AE|L<oP#S#_}UKa4*KbVKTi%M>B_<uh8
zXk!icH*K`BhDZPC$Qm}Dznz9zfZg~5z<3r#>=0+IzDs+8g?WNZJQYCHlxoixzi|cy
zT_tw}V0qq^RSb+#oDf8C(M6C?CB3q*(a#7++=bsACNkl_Sh0WSLQb7!kCLd_(rI6P
zzsy7#*F-DeiEcm;my_bLre7g~Hs1^&P2j4+y)5&dHb|l~QRnz6=D6s|xXzCS9EB@8
zH}}3Tbmk$YP$TcfB$b;Q<q6qwN=C=8E*=0wzLEhNMWj0fa+}4LL>w#|<zlmt;+b`4
zqhF-+OopwMEEh{)^9VxH1sg>+Q>?t}0h#tPV;@lw`di6X9a7>tKq-)qF;z%eZp9`g
zgAV(;VWPHe`75ObM4Zciu#1P0d(z=9j<dUS+W(|1Y?!sbL3-vDIlJ;*C0v5mcv1kp
zzd>!#IO?=BAP^(0fm>`JlM=``!SZgrp=+5g{}o^eeq<J-+A5pN2-uoY{nK+Xv4&I^
ziP(ow9Yi0!C=syGgmDD+-EEySJE|lz;R*fgF%%_tdm)p#5~Qv;RIrxwe<J77IFY6w
zczK-N03JAhiJ{9tJdh1`1NEOWSy>d{0b#aZW2x^F0VvsgYs@zyI3c6+6$hbvDr)6h
ztE&J4`L<O}F=+nbufSY`EgS)}ME71{N}!i!J^`?iR6$0yEyEdjh0)MJ&orEW=O1B<
zOLP)fW%B>on&A2BWF{^@N@?!xQxX*IRUt#0RWmeD<AF*SM=tRvh}#c<5XmB3GO9&n
z-5C6SZ-39%b#u%w3l3Bulku*>2YhnIb$I6cqscS?;;2jx?ei7s4JU#Pt?$wMg<HT-
z?COgEWnBkx+PMBIBvL%@p<g*ZUx0b3L{cC^bi0pHX_DHZ3FR{x&VYf2=_j>;FI%(W
z)+bCppnIxx0te5TsMu5qL7R8mJf-q2(pq?80ZI`BRw_wj<$NV)%WeR@8s+`9lhh^?
z=78`zy{I+q>nOWf71GC1xn)4pvN6a5dX@Ens-cGSmiFhA_~x+HjKNBqKA$q*E^CLX
zK4}L3$~$#M<HdJRnuj4_(BRLfXM$hWd2m{x)!7O1vRR^6^n6boP3%Yo^qmC})zl3!
z^{pkCff9r`dcSy1rb<OhS{GRH$+CK0=Jhs^_;wpVRVj2?E)q<$kwGF<Keaknveljd
zYmzQ)&~`LB&84uYMP*+d<%vin^)O77y5wDInMk*pCX4Nb6fN`!Sr%C$ka=3*bOA9U
zM5{Po`i-250g+@}$n7O^n6U*qN>&hh+o=kRRl^BINR8_i5CiY25@|hYeq0FR$Nsln
zCvoy|qp6c3Yh=>CJ|Kj%cu)P7GW%K9o%C!j#pp8(>;7IrRtNMqj6aH9NKJ@SPh-eE
zX-593MA|OL@1Lh1e042Q(6F-qyvSitw*WH|GJrf8B^GN(RYFBPgBO*qgZtBgE}uc7
z3d8g&?1=bR2<Q4c%F2-h#i(+e`6V@#Q_LW7F>?_AO{Zo_pbziX+7Oy8M8z(4Ss%ox
z*HpTe^I7H2t(e2efPmgj=+^mL#!a2OiqRBa^D(El6l$?ck)Qdz<>nd>cOtV4us2po
zc#U8jUcbRbQ2`Zc_JK<il4v0?_Sy{I3OReP^&CV=_^y-q5|;7}37CoH1}g?AE@`-<
zYm#$A7wXQl4|Q7(@q}#-eA8On1;lI$Ui45->ok|%HD#zT%|m}*RK$(2hGC!6!vX~k
zXg==z&}Pqhg!5p-mJ(8;UhAfHzm&6NH_674Xt@D`DgO1kIa;b)+jV;49!y{w<g}?+
zJXS0^wU}~MbB=4$#5n5!*<)3vQ8DF03~xT!Tc#e(NYRe+o@pUH-LAs9Gq@%?Rp?VX
zq0)F-pe8$Im%fb_(NmRFq0gB^xUmz^VvTD;J5&LF&1y-mF$g|o&*J}vlcNbC-okej
zles~>=z47zXxgbz$z&(+8<76S`7=|727hLwJ#-!oJ)ODr48ZRP-+BngLxc;T*O0K5
zs$;ia{c44(V{h9*^l6vB43d|G^!=ert7K=b>Zv@src!H894yzer?>qGCby}g#FjC|
z;BESj<CGmRPJ$gUqdvr>`;S~*lumOzReDwdqS@`qE<clvLNJk@#wP+<owSlbZJGCO
zI4eKhYqN1zXB*Zw8>DcAA3IlbLlye9t5*gn82~Fhrqmq$aCNdpRTlVVVUpp3CpcA#
z4YSJTt4nWwZ<{!VVw*8KinP%_x;Y7y-7O9)Teu9auodaFD<jF?3YiVN+|P;};#9eE
zObAIrb-+l&<@%t)9dstL<45O>AwhzsyCdc%=3;2|WI`SI$k>G044#iRiPQr+pv{jE
z<M=dTnJVQWl(&R}=*5A1_O|sxT6^9RJa`K-97Ls$ki~|ioxM4TNy;(Tx!34hjrE}Q
zCwhbIQ>*}QPbwrG222Z()(Xs(jbQ2}N-ty0rTsx%E2-!Mlo?x;N|QQO`tgs?fXe5<
zB=9TSG$oX=Tf*)bVZqFAgXvK}n`YWz^C`s6y;q|ow;6K38*DQb(#r)H%BCq@J?p5)
zX`~Gy{9f!xJqtWMVHMo7_~#N)J7Y5YdUjNte_OsS`-bz6!)0(6NIH(>8Z`)@8v%bc
z>)1xJh90~~>^QGtNEEmm3laQMyk?!t?_Wcz%Eu#lXqVS=m<R66=@s@GH&&^dA$3B_
zT;ctGzoo{x7aFS%2AlJc<e(&JT}MjW;^8!pql3Q~Qg!N{OA`G|cVrLj8e2(n43evC
z>57{=^~0egz=@XBFShqt`E5fPa{MHE>SK$0V@f|D{p8Z8VV)KQ+Ll(5V^Rk&H}<Rd
zu&O<=@SAR&o~pb^&#ubGKl4-Jjk&h)Fqa}B;(0m*ftP-T8=_VC{_j;0>trKADYK?X
zMqlg~YO@4zT&G*}Lpx357!!m(cH99Rwg%jMUP8?$^c+{P=1rMNS-er*%jJb;$z@P7
z$lfR(vh$0}-^<+IOC1xr1ZL%W3+X<ZJN_Ng816+@k;OFW{lVaNr0t3I{A;+%`(b}~
zDUoIS$J2|7!t47hj~2S6R3!F+-XrLL9Wn(ut?!vLf(j2H(jelV0w1W3FulVZbKNgd
zp4|*`_DCHP5o|=~Uy)t3!8(;>JwTtmdVaG{UQe$6&5jMV@+G^CBTkSK1nux5+(|(&
z)0ce5$g(3M{3#7)Z&&X5&R&xKvd&9D>k;qx3i}IIW^+E-XR(W)fz?(x)?6aR{zvfK
z^@2W|S|Jf2dewZ@Yvjn(Ds~4Wi=w#DdY5Ho)f_@PN`6His8nnx5*s-o!}@MJzX?l>
z{5d%!KH3#C;@{EOC)BqV*za$9qi6l;Q00^I@~%&6Lo52T)%w@a3s(0V&^s-kS7YFk
zK%msb$JUQkyUP~9xf6?qQ!VAKhO8ee2*IDp_Y>dc+y=g8Mw77h)9Lw_o*;9^=37n2
zc@c}sy5l8#4rxtz0_c>B6gIdp-AB+nkEf@tCbQV)Fl(6Q4A2#(B5Oc8piw@VRBRww
zct@^TmB#A(g+yB?e@)IDXt^6yMM`Lp-A`k1B0VNidpFSEs2}NMkK+8+7J0XZjmf$0
z_cbcn1y5I`R3zWEd?j7HUygFVK_WP!2r31{e-5BvJR{+yf1<v$Bm`v4RK2KN&OiV6
z@2AnKi0v#%6&+azD0_u}qD2>DU^aEP-lmu_)~3Kj2RK4!_q51o_TB!u$KPMrFNwWX
z$y<~E{Lp0}6FOP9beH||y#GEp{&)|k`eB!xz<+*dw5do3j5ZY(rO{DEX3FRUBu>e1
zY*nNB-kvJExXqs-q05D1rTkL10wx_Bm@E?@UgZ*(n9iX;?{+17rn+no@o9nb^Z*3Z
zA9*oTZEVVa;(H>|6R9VikKYi4vwD*{v%hqaV0i|T#s(A%d6{5_LECi~F`2qq1WBx|
z*P$A~E+Q%8qZ}y8^hxgB#6$c^aR)|Gq*0oad1e0pl;$i@JH+-63otxqM#;`mvh&|i
z(J0wD3dd9TL!)r~C>;Nfarl1`j#nRMCrLtcloIMTO;xP6H+~~M=+e_#rQ7llidYHd
z>sU`1_mWb0S3(lSDbWGcd55Z%oAbbr0~t*k3hx&_z0Z<Ody?i%4s|2k0{>}G;+E6?
z;V%!RKDe}Cy}QU7_x;LYPw-#Vc!TRPL^h5&Gexxbo*M^dX9_8$6}GAKQ{gm(*7J{T
zK23S3;aiYe8(;drQ&GgnGbYsA8Q^3!B=8ODObLl?zIyS!{X~|S3)=&vrW|ZAnMHNi
zmuSy>(cn&s#e);wz^n{_XfsM^$RC;@D@6IqdGU)Mth2rOrK^7$t);m5czI4Z{<2Lb
z)H0NfyK7NgbsBQNHB__LOkJ_?0+fillqNDY2i%qiz!(o&nlc?TFPUGI=`vJQRCICE
z!T#P35$^`pH~QT%{kSPlLm~m6nf5$KPO)J9t6#Cdt$5B=&HJ<|Pd{t(yq4j&D&sI(
zLAj2%Qo^U}HktlGx!R~TuYi2|6<w=<1yAb-m0KLl)Y%6XLI8SLtwKR9=1}2vBa}Kb
zZ@tHqsDc3!{}ny^;#E153>`O$v(6wvO@)o5h9Hz#BQ>1lsGUy&N&yIO*}OmQNZm+@
z^8UlHR@`YP(O7`NNfAD|l-D4G^6>-Ys7UKtZlxd)5N|S9wJ4L#7IiUN!kqowL^g7Y
zcb9FWj)?h$u>?GNIZ~HdKYErvKK*@)?p(@d?$JzA+%M(xEzbZ`0}^M1@<8eK#9|7q
zJN)TzqPc6^eVq~_q-~rKrV-2o1?bYICPjqqxhqmxXI4M<q@9KfmQzCDp(n>#^ETs*
zIrXW(m*9$JPpj@rQy!n(VcrlXWPj;1PRjAeqR_{01QowttsBQvO;v{mk`RwiX9bjG
zQGWc{LjqQ+98^LUx+3%4<HIo|beZ?A?B81|6gS^km7a5tNb_<ED#fo`-npCki?q-f
z{7};Q!&JP(^#U8>*R!{*?0CwvKKO6=bKTZuRDPNCanWmh*PBOyNeF>w0l|@rNT$1U
z71;7@pvoz%7RCmODKWSA1yT<Jb7q=+NioXf<c)i!%^c~@!OZ2!%+;nIE_hGSQ6}SO
zh4j@3=DWM*kn?&bf+zguW5{~VO|@;&34B7zxRDcwY<>q;Y2G4XOhm_b-zoV(Cu#2E
zAcHo{Q=Ems_p}a6*28Vh-*KEY*OY9VqR>BX66v$nlfQ84T_DI~GJ^uCK=Xu|CM(H1
zO^}TKz+}y75tz4kC6aR}$i&56uJ(*}2t*1kzg9XfkmaFF)&gmn&Ga5q*@j1UcT4c;
zz^hDLJ?YBdyg4<F{MVGG%>+kU%9QuWGS~5)(NuV*zGH30Q6&Y|7NM?Wqr>O_ZXu5F
z8NFaVvT-0x8&SLZt3yprV>RX49YnEFnvSkek5MJMi!odF16<@FFb9B}sUgZXrio7J
z^1C6XWDjwjY>*+sxn6U4NjY4Ca3#|D!O&<CZIvuY35HxK#)@Kv<NhT25|(Tg+8&(G
z9f)DrjGWJf?{I~(nhc}2-qSqD302}&_ZxowiSo;f`a9Kzj_$^ZZH3%A1A!WH7K$u|
z{2{34?g}gc;U`#vqelge<=!?p_di20!V{24^v@uuUkb3G3b_Cl2PgC^mca*^V@pQX
z(9l<Gt5B7W2rv;b`K6GXTYE4QhuUta?(0VKIiSI&J>k5@oN0VZ=aK5@MtHSYNmtkL
zliWRok1u)|qLOS{-NJ4_in%Uz8SJ+Ll*M7t+$6Ph57P?%>^A&oA1zsGF^hmrdrA5V
zgeb!kTA)03cI^JQx8026`PP&LTck$hgmy!`P@{FIpr*fIyXtZoFK^T`UJB8%=f{BC
zDcCxbGk>6e2K+C)OBp-gucSC3a80gbG3weWb(9jQ58bpEYhC`U(%fcLFXbRom;3zd
zrC)cxQjyUEMX6<;YSSF>b$*q}ZLg+vy>^*D_0Kk&Gv2VpSzicSuY?!-D8H2I@L^+c
z2`{!jXX$Lp>z0dI4GV+GFyh}tB-SS0uLF(bx4=Evxm3=1JW-b7LS4Uu6=K_Nmvq?I
z-so=>_rG_K%OJVjG-~OA=a=`dL<;ozCguziO;UX`Rr*H$%-+oIR@INq9Yf=L{by{`
z5m{m;v`+5U9gR(s4!bRmS>gYDkH^Lxd{c6KMLkTfUiQ5lvvd2+g&u+L!Xq@jr93yj
zy7!<awx_$Vf7Okj8EI*Y4cmI0)M8@_2AuKNCmYq+<jm`7vHv>rp{5*_&@(f_zQf_n
z`H68Iu7sgXFt13O9FNTmfn~X%n6*_kx>mkx#~o(b;_!WVXbFWn_mSM^&}HmjHQEJ)
z)*%yVosY2opAV3yL4dwH*EhBY{aCzx9>qmwo4ivNzRv7mtL%|()C^?nR<IucF|xXH
zUy9X>I#A6X7_66HB-k{U#6y0Ul#bEF9<qqx(Aj8+gu@pnq_0c;1PXg4mA6pK9Qlby
zoIbq@5pmBD)zx#VxT}6;dz6(OFQc{W{sK~7Hrg<ARX<p}n_5xYH#LG4@|9lDhxSL+
z;jKk0mb;*L^P{#Zf4NfxHDyg8QqR8!@^dgfH+T3&*qPOvl__4|#dWUfTm}tS2?IvZ
z3?43-iRZacz+Ao&yYmI#n@eTp$_0EaO0jMr$xz#M`SQ%Y2u-w*TgnUvT85+^!jUh~
zsZ!T2C+y$aKdcx%;8i`A7p{Ex`MvLxX3&P$7%pI0?q2tqRt-T!LhWwU)YJZ{@}wDt
zi*Tuz&lHO4!m?!%B~X%b7^xK%kXyJ;gV{-{HN-A?P%_$plHl=pX!Tx0wm87pCrCcd
zwntEk)G?{aa2hNJ0TXy8093i%krrrfZx~c@e<(Z434*LyXrylgmcWdx3RQc~=p>rX
zPPaoB7(G5YwvX=igx*>ompLdP{ixBGn2-~hsD9#ZKWJto$<Hu2S|CpnNM)|1>n2+m
zp)5Ml;9F80cWYP&)VCv#C(nUQwi=fqRlMo~1^}nNo^I3!c@#!zdMw_qkzm1NB$k_B
z8+lKOn~yUl8xZ~7B2?a<9|GxLF?O}1<|u{SSCoj}7MXaH)EFOht`zG9bR=0Wp%lAR
zNX6FdwPOQMj$6g_E3+B7lL-5@&nl2Uo87>;CZ$}?!v6$jxd&m^*)3~;Dcur4B!U|X
z={imH@D1$(yWYXv$~xFWr1&Njjr)2CnplUusgzFfob0-kW!jWYC^_8GA|;4tAAHX@
zvMt-W-PLJ2Bezq38tC5nH&MIDjT~H-DwvgVlRZ)YK+0^Zm3ayKL8P>0fxpsD;Ob~9
zj#~2~K&n%}Auvi-upqH&LYybB3{fX7?1|dYUXJ5WQg}l%&9<fgs4Ta4ok^W(W$#bf
z%Q3)AUvqcQVzibgN?vpR@g+YnmS_P&Zcuy^f3!}b#1}t$M{*Jprz%>!B)=RwynCdX
zVVE`7V)fHHR?jsOS)m=^F5`VdKeot$=Wv~AaZ1*5I|Vu*n{3nxu~k<Dytow1VE;f#
zjIIYsSs3cc<Y(m8oXgzxAZPE2iFvw7X8AQiU|IR(Of<KZs!oU_;)|avk}bczL>i!9
zr-^zJW}4(qq-cKrhF!&KKQRr7>`w<dnVyUms)y~uG^$#~%PC(yYZIeq$60&U@@2p%
z+%`GznIvmMp^%{)hjHOXsH)F{JvA;j47dHXoEV2*HV+3>9+*gWlXbu)U)0P&>g-t&
zYom%C?d2dE<4^Q`YrJHiWDdpl7yU6=Clzgx>A>qNbyl9Tc#yVwkGAr6a`*E2*9WDE
z@}RAnp_IJcbnQo=P`=Bx8~Va!FTPA9N|HoF@5A|AsZ$NwPabKf%;?ITVgM*iFi62X
zC=P|@w-~*DaA0ji*^YLBWMgj;8#BPC(e1ltr$hFBSKXj8#Tg<qC7R$k%14_h(Uo~<
zkzwE$uF>K*q<BC!|DkKs!?uJ7#HQ4r?@v#uYCUj8UpuxRtm};W_c4zYV8Z%&(ucI#
z1?Y=8w?NMFbNh!YhCE)k8MX4UVX9x_v%d{AY^ueQ`(xb5upZu(xQ;`Y6jF9@uESkI
zWTY3BuS$VjrqR#tmYdlA6VP()p_^MEWy~q0oS+xy`-wp$_BPT4D_r;)Nv6dT$Pf{W
zxDAPWqpqgnwGE48?7a$eJ{^@>HK*n#vxyixj#S0%lBev9F76>V8<Kqd0t#k)8NkpP
zf7JEe!*-b~{dm{&8qbQ4xh?K5=*VxLO5_qV)K9FppjQtqvu^<!zZ=Etx9`%+QNgNy
z%jhyM0~6;SnW}w*P<4_JFg~a@&uzpXB(XqOCkQlFd0aK1<N`k)(%x%%x-{jK4J`ZG
zwU(7BvVt!C9r=lf|9HS9{M42wb<Pmlh({*%A+^P=IW|qY<`CAsn|mG}%&!?b&|Z%0
z0q(+wUglWF@8;?ov<r-HLO4=QEi6_)&0SPI;H&im7g##(<U<*azUo;(l^J=1%9Ms6
zkG2FMrH3mWO-Be{&729{V)g^5A<*e+;@ts#V@{H988rx-U3YT))J5C958cjPzTK|0
zUIi5(VsF_su72T*Ocli=jnB5n=`ifAKV$g@6YVd#21e)n4^<fBjiG_`4#5(Hx328b
z=A_Y!Behz8KIbrKI4yf&5DVwGw%r*g9L`3Wyr2^ack8GFhqAOghPH7rh3K?EYUr#L
zD_=S?xOT2{P0^8$)hS4njAJZ=k(}u$*o5Ve2np{>$g!$OGWRQvCPL+}JI7DtocuOS
z<AotAPU)dsRj-B1?O}m<-}^+&VCA=G3;Hrd#~1zMeH<@E(#@J1tDL@-A}i;E6;=S=
zE-brrJ)3f*H#hbH#aBbQx6pT%t&8ow#&&>H?ko%elzXmp`g*}_VQb|)ZQ)|CPl3Vi
zQ4(iu{Akknt4=pj8sBZgzN>s|a`aFT!*JUM9bd87DPFF7^%m=vfkdA55!|dP?CGY?
zV9N}4>)>jz!LpA^QGNp1TXWCg2q1cAO+m1t2TY?_DTw)*_3y*s)thX~MBxB2v-b}~
zYs6z2s!idyzc~d+>1s>t_j+=Gb18@JT8u>vt6&B5tRQ*1Z(f6Kwio1*wH!Tpq_SX9
z{F+}cDA`taOFv(By(ct6r4uY`R@KPIT6pR5BgPk|7bVgEUO<`2U)s4Hex7K^?(C7s
zHq79@P`>2Mf)HJSygAvgV0J$w7kf!=&iv67b!RK53CFJ3k0*#eC#i05MdypoXGPc?
z?lo0<%?(~RH;fs>9=`F<KXyV$ZFlOi!{L|<gXI|RS;zngGJNu*x53AQ#6sW6>A?<@
zf&qQaNI26YQQ2@TclJbwAD)#TTL4m~Y%;d?tHcVoz}GcF2Go)C{q4%AQL)sdDD+m1
zXM>OY2WQJScQz7a^{(l8U5PwRd0uHni%O-Z+>iEq>MqEOxvZY_Sy-dK6RAb7GCyIr
z$c1`$kza4z>R`4)t{AsXYbfUe^LC6wTTf{&iQ#}L=i=a%Uh=S;?_lm@dESKq#Stz;
zgZX=+p|)=3mOBJq-aW<NIyOhj5x!aZLaYA%&O*P3n~y%RsBBG%x2&6H2J3OnDvLdB
zpuj$$`SRi&s|D8>c^a8SD<)6qz*_5f)R5!!r`{guw)<Z=3?pLn`qtX~N71q=A@X$X
z2fm5%Iq^!zb6flxdZ$#t#;uW)sNAIFUe!@qlGPIOLSO^uK7{s#VAi!obXoVn1i;pI
z-y=tnDAL*x>d^Vo2*uB43wIzBDum6)W3IEk30ABJQWh%<6X49xc3vNAT*;$kVJTR6
zRw^ndUM}`e>^OJ4aO#2K@GFj&oh4<~Ay~`@@#oIMjkWyQB_!txxpcQ*#w)~{IAL#0
zvctaVvh!xhteRWs?o~0E+i}GV*_007%p9ajW{t1OQa}ZdMxDCI%E|#xnGB_sIBZB$
z{n=%C+mV^CnY<r86m;p_b}`Bj{xbb+1%IAaa!2L66w~i0V6ky<_Vwb_d!`LdulX8t
zkM1>oR*scue^g^xeR}U;?<>>d$d)f!`2#6gJ@~cB9h=Kj%&j=XkG4e>aSZ-(uh;SQ
zyx1k)U3vbD10jAtrx~;`6-<P*-3rW@k71jiFncpBk9tP~kDA24jis;-Wpt4*1VVh}
zpvCi9lp}_JU5|hNpp|1seK>e}7xSt>X#?^kqmT7he~h-)-@{~dZ2dJQM`z++mSWUE
jj2Z~0MJjM+uiCQurDykjS1THWe;e0s`7?I)-c$br6`K*j

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 8e03cafeb3043..3c87263c7de18 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -188,34 +188,35 @@ java.lang.ArithmeticException: Casting 2147483648 to int causes overflow
 When `spark.sql.ansi.enabled` is set to `true`, Spark SQL uses several rules that govern how conflicts between data types are resolved.
 At the heart of this conflict resolution is the Type Precedence List which defines whether values of a given data type can be promoted to another data type implicitly.
 
-| Data type | precedence list(from narrowest to widest)                        |
-|-----------|------------------------------------------------------------------|
-| Byte      | Byte -> Short -> Int -> Long -> Decimal -> Float* -> Double      |
-| Short     | Short -> Int -> Long -> Decimal-> Float* -> Double               |
-| Int       | Int -> Long -> Decimal -> Float* -> Double                       |
-| Long      | Long -> Decimal -> Float* -> Double                              |
-| Decimal   | Decimal -> Float* -> Double                                      |
-| Float     | Float -> Double                                                  |
-| Double    | Double                                                           |
-| Date      | Date -> Timestamp                                                |
-| Timestamp | Timestamp                                                        |
-| String    | String                                                           |
-| Binary    | Binary                                                           |
-| Boolean   | Boolean                                                          |
-| Interval  | Interval                                                         |
-| Map       | Map**                                                            |
-| Array     | Array**                                                          |
-| Struct    | Struct**                                                         |
+| Data type | precedence list(from narrowest to widest)                     |
+|-----------|---------------------------------------------------------------|
+| Byte      | Byte -> Short -> Int -> Long -> Decimal -> Float* -> Double   |
+| Short     | Short -> Int -> Long -> Decimal-> Float* -> Double            |
+| Int       | Int -> Long -> Decimal -> Float* -> Double                    |
+| Long      | Long -> Decimal -> Float* -> Double                           |
+| Decimal   | Decimal -> Float* -> Double                                   |
+| Float     | Float -> Double                                               |
+| Double    | Double                                                        |
+| Date      | Date -> Timestamp                                             |
+| Timestamp | Timestamp                                                     |
+| String    | String, Long -> Double, Date -> Timestamp, Boolean, Binary ** |
+| Binary    | Binary                                                        |
+| Boolean   | Boolean                                                       |
+| Interval  | Interval                                                      |
+| Map       | Map***                                                        |
+| Array     | Array***                                                      |
+| Struct    | Struct***                                                     |
 
 \* For least common type resolution float is skipped to avoid loss of precision.
 
-\*\* For a complex type, the precedence rule applies recursively to its component elements.
+\*\* String can be promoted to multiple kinds of data types. Note that Byte/Short/Int/Decimal/Float is not on this precedent list. The least common type between Byte/Short/Int and String is Long, while the least common type between Decimal/Float is Double.
 
-Special rules apply for string literals and untyped NULL.
-A NULL can be promoted to any other type, while a string literal can be promoted to any simple data type.
+\*\*\* For a complex type, the precedence rule applies recursively to its component elements.
+
+Special rules apply for untyped NULL. A NULL can be promoted to any other type.
 
 This is a graphical depiction of the precedence list as a directed tree:
-<img src="img/type-precedence-list.png" width="80%" title="Type Precedence List" alt="Type Precedence List">
+<img src="img/type-precedence-list.png" width="60%" title="Type Precedence List" alt="Type Precedence List">
 
 #### Least Common Type Resolution
 The least common type from a set of types is the narrowest type reachable from the precedence list by all elements of the set of types.
@@ -245,13 +246,19 @@ DOUBLE
 > SELECT (typeof(coalesce(1BD, 1F)));
 DOUBLE
 
+> SELECT typeof(coalesce(1, '2147483648'))
+BIGINT
+> SELECT typeof(coalesce(1.0, '2147483648'))
+DOUBLE
+> SELECT typeof(coalesce(DATE'2021-01-01', '2022-01-01'))
+DATE
 ```
 
 ### SQL Functions
 #### Function invocation
 Under ANSI mode(spark.sql.ansi.enabled=true), the function invocation of Spark SQL:
 - In general, it follows the `Store assignment` rules as storing the input values as the declared parameter type of the SQL functions
-- Special rules apply for string literals and untyped NULL. A NULL can be promoted to any other type, while a string literal can be promoted to any simple data type.
+- Special rules apply for untyped NULL. A NULL can be promoted to any other type.
 
 ```sql
 > SET spark.sql.ansi.enabled=true;
@@ -262,10 +269,10 @@ total number: 1
 > select datediff(now(), current_date);
 0
 
--- specialrule: implicitly cast String literal to Double type
+-- implicitly cast String to Double type
 > SELECT ceil('0.1');
 1
--- specialrule: implicitly cast NULL to Date type
+-- special rule: implicitly cast NULL to Date type
 > SELECT year(null);
 NULL
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
index 4ff2fbf3b3a9d..61142fcb035ad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
@@ -68,7 +68,7 @@ import org.apache.spark.sql.types._
  *       * CreateMap
  *   * For complex types (struct, array, map), Spark recursively looks into the element type and
  *     applies the rules above.
- *  Note: this new type coercion system will allow implicit converting String type literals as other
+ *  Note: this new type coercion system will allow implicit converting String type as other
  *  primitive types, in case of breaking too many existing Spark SQL queries. This is a special
  *  rule and it is not from the ANSI SQL standard.
  */
@@ -77,7 +77,7 @@ object AnsiTypeCoercion extends TypeCoercionBase {
     WidenSetOperationTypes ::
     new AnsiCombinedTypeCoercionRule(
       InConversion ::
-      PromoteStringLiterals ::
+      PromoteStrings ::
       DecimalPrecision ::
       FunctionArgumentConversion ::
       ConcatCoercion ::
@@ -130,9 +130,26 @@ object AnsiTypeCoercion extends TypeCoercionBase {
   override def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = {
     findTightestCommonType(t1, t2)
       .orElse(findWiderTypeForDecimal(t1, t2))
+      .orElse(findWiderTypeForString(t1, t2))
       .orElse(findTypeForComplex(t1, t2, findWiderTypeForTwo))
   }
 
+  /** Promotes StringType to other data types. */
+  private def findWiderTypeForString(dt1: DataType, dt2: DataType): Option[DataType] = {
+    (dt1, dt2) match {
+      case (StringType, _: IntegralType) => Some(LongType)
+      case (StringType, _: FractionalType) => Some(DoubleType)
+      case (StringType, NullType) => Some(StringType)
+      // If a binary operation contains interval type and string, we can't decide which
+      // interval type the string should be promoted as. There are many possible interval
+      // types, such as year interval, month interval, day interval, hour interval, etc.
+      case (StringType, _: AnsiIntervalType) => None
+      case (StringType, a: AtomicType) => Some(a)
+      case (other, StringType) if other != StringType => findWiderTypeForString(StringType, other)
+      case _ => None
+    }
+  }
+
   override def findWiderCommonType(types: Seq[DataType]): Option[DataType] = {
     types.foldLeft[Option[DataType]](Some(NullType))((r, c) =>
       r match {
@@ -142,7 +159,7 @@ object AnsiTypeCoercion extends TypeCoercionBase {
   }
 
   override def implicitCast(e: Expression, expectedType: AbstractDataType): Option[Expression] = {
-    implicitCast(e.dataType, expectedType, e.foldable).map { dt =>
+    implicitCast(e.dataType, expectedType).map { dt =>
       if (dt == e.dataType) e else Cast(e, dt)
     }
   }
@@ -153,8 +170,7 @@ object AnsiTypeCoercion extends TypeCoercionBase {
    */
   private def implicitCast(
       inType: DataType,
-      expectedType: AbstractDataType,
-      isInputFoldable: Boolean): Option[DataType] = {
+      expectedType: AbstractDataType): Option[DataType] = {
     (inType, expectedType) match {
       // If the expected type equals the input type, no need to cast.
       case _ if expectedType.acceptsType(inType) => Some(inType)
@@ -169,19 +185,25 @@ object AnsiTypeCoercion extends TypeCoercionBase {
       case (NullType, target) if !target.isInstanceOf[TypeCollection] =>
         Some(target.defaultConcreteType)
 
-      // This type coercion system will allow implicit converting String type literals as other
+      // This type coercion system will allow implicit converting String type as other
       // primitive types, in case of breaking too many existing Spark SQL queries.
-      case (StringType, a: AtomicType) if isInputFoldable =>
+      case (StringType, a: AtomicType) =>
         Some(a)
 
-      // If the target type is any Numeric type, convert the String type literal as Double type.
-      case (StringType, NumericType) if isInputFoldable =>
+      // If the target type is any Numeric type, convert the String type as Double type.
+      case (StringType, NumericType) =>
         Some(DoubleType)
 
-      // If the target type is any Decimal type, convert the String type literal as Double type.
-      case (StringType, DecimalType) if isInputFoldable =>
+      // If the target type is any Decimal type, convert the String type as the default
+      // Decimal type.
+      case (StringType, DecimalType) =>
         Some(DecimalType.SYSTEM_DEFAULT)
 
+      // If the target type is any timestamp type, convert the String type as the default
+      // Timestamp type.
+      case (StringType, AnyTimestampType) =>
+        Some(AnyTimestampType.defaultConcreteType)
+
       case (_, target: DataType) =>
         if (Cast.canANSIStoreAssign(inType, target)) {
           Some(target)
@@ -192,7 +214,7 @@ object AnsiTypeCoercion extends TypeCoercionBase {
       // When we reach here, input type is not acceptable for any types in this type collection,
       // try to find the first one we can implicitly cast.
       case (_, TypeCollection(types)) =>
-        types.flatMap(implicitCast(inType, _, isInputFoldable)).headOption
+        types.flatMap(implicitCast(inType, _)).headOption
 
       case _ => None
     }
@@ -200,10 +222,7 @@ object AnsiTypeCoercion extends TypeCoercionBase {
 
   override def canCast(from: DataType, to: DataType): Boolean = AnsiCast.canCast(from, to)
 
-  /**
-   * Promotes string literals that appear in arithmetic, comparison, and datetime expressions.
-   */
-  object PromoteStringLiterals extends TypeCoercionRule {
+  object PromoteStrings extends TypeCoercionRule {
     private def castExpr(expr: Expression, targetType: DataType): Expression = {
       expr.dataType match {
         case NullType => Literal.create(null, targetType)
@@ -212,55 +231,37 @@ object AnsiTypeCoercion extends TypeCoercionBase {
       }
     }
 
-    // Return whether a string literal can be promoted as the give data type in a binary operation.
-    private def canPromoteAsInBinaryOperation(dt: DataType) = dt match {
-      // If a binary operation contains interval type and string literal, we can't decide which
-      // interval type the string literal should be promoted as. There are many possible interval
-      // types, such as year interval, month interval, day interval, hour interval, etc.
-      case _: AnsiIntervalType => false
-      case _: AtomicType => true
-      case _ => false
-    }
-
     override def transform: PartialFunction[Expression, Expression] = {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
 
-      case b @ BinaryOperator(left @ StringType(), right)
-        if left.foldable && canPromoteAsInBinaryOperation(right.dataType) =>
-        b.makeCopy(Array(castExpr(left, right.dataType), right))
-
-      case b @ BinaryOperator(left, right @ StringType())
-        if right.foldable && canPromoteAsInBinaryOperation(left.dataType) =>
-        b.makeCopy(Array(left, castExpr(right, left.dataType)))
+      case b @ BinaryOperator(left, right)
+        if findWiderTypeForString(left.dataType, right.dataType).isDefined =>
+        val promoteType = findWiderTypeForString(left.dataType, right.dataType).get
+        b.withNewChildren(Seq(castExpr(left, promoteType), castExpr(right, promoteType)))
 
-      // Promotes string literals in `In predicate`.
-      case p @ In(a, b)
-        if a.dataType != StringType && b.exists( e => e.foldable && e.dataType == StringType) =>
-        val newList = b.map {
-          case e @ StringType() if e.foldable => Cast(e, a.dataType)
-          case other => other
-        }
-        p.makeCopy(Array(a, newList))
+      case Abs(e @ StringType(), failOnError) => Abs(Cast(e, DoubleType), failOnError)
+      case m @ UnaryMinus(e @ StringType(), _) => m.withNewChildren(Seq(Cast(e, DoubleType)))
+      case UnaryPositive(e @ StringType()) => UnaryPositive(Cast(e, DoubleType))
 
-      case d @ DateAdd(left @ StringType(), _) if left.foldable =>
+      case d @ DateAdd(left @ StringType(), _) =>
         d.copy(startDate = Cast(d.startDate, DateType))
-      case d @ DateAdd(_, right @ StringType()) if right.foldable =>
+      case d @ DateAdd(_, right @ StringType()) =>
         d.copy(days = Cast(right, IntegerType))
-      case d @ DateSub(left @ StringType(), _) if left.foldable =>
+      case d @ DateSub(left @ StringType(), _) =>
         d.copy(startDate = Cast(d.startDate, DateType))
-      case d @ DateSub(_, right @ StringType()) if right.foldable =>
+      case d @ DateSub(_, right @ StringType()) =>
         d.copy(days = Cast(right, IntegerType))
 
-      case s @ SubtractDates(left @ StringType(), _, _) if left.foldable =>
+      case s @ SubtractDates(left @ StringType(), _, _) =>
         s.copy(left = Cast(s.left, DateType))
-      case s @ SubtractDates(_, right @ StringType(), _) if right.foldable =>
+      case s @ SubtractDates(_, right @ StringType(), _) =>
         s.copy(right = Cast(s.right, DateType))
-      case t @ TimeAdd(left @ StringType(), _, _) if left.foldable =>
+      case t @ TimeAdd(left @ StringType(), _, _) =>
         t.copy(start = Cast(t.start, TimestampType))
-      case t @ SubtractTimestamps(left @ StringType(), _, _, _) if left.foldable =>
+      case t @ SubtractTimestamps(left @ StringType(), _, _, _) =>
         t.copy(left = Cast(t.left, t.right.dataType))
-      case t @ SubtractTimestamps(_, right @ StringType(), _, _) if right.foldable =>
+      case t @ SubtractTimestamps(_, right @ StringType(), _, _) =>
         t.copy(right = Cast(right, t.left.dataType))
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
index 809cbb2cebdbf..19ee6d855043f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
@@ -99,24 +99,15 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
     }
   }
 
-  test("implicit type cast - unfoldable StringType") {
-    val nonCastableTypes = allTypes.filterNot(_ == StringType)
-    nonCastableTypes.foreach { dt =>
-      shouldNotCastStringInput(dt)
-    }
-    shouldNotCastStringInput(DecimalType)
-    shouldNotCastStringInput(NumericType)
-  }
-
-  test("implicit type cast - foldable StringType") {
-    atomicTypes.foreach { dt =>
-      shouldCastStringLiteral(dt, dt)
-    }
-    allTypes.filterNot(atomicTypes.contains).foreach { dt =>
-      shouldNotCastStringLiteral(dt)
-    }
-    shouldCastStringLiteral(DecimalType, DecimalType.defaultConcreteType)
-    shouldCastStringLiteral(NumericType, DoubleType)
+  test("implicit type cast - StringType") {
+    val checkedType = StringType
+    val nonCastableTypes =
+      complexTypes ++ Seq(NullType, CalendarIntervalType)
+    checkTypeCasting(checkedType, castableTypes = allTypes.filterNot(nonCastableTypes.contains))
+    shouldCast(checkedType, DecimalType, DecimalType.SYSTEM_DEFAULT)
+    shouldCast(checkedType, NumericType, NumericType.defaultConcreteType)
+    shouldCast(checkedType, AnyTimestampType, AnyTimestampType.defaultConcreteType)
+    shouldNotCast(checkedType, IntegralType)
   }
 
   test("implicit type cast - unfoldable ArrayType(StringType)") {
@@ -153,6 +144,26 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
     shouldNotCast(checkedType, IntegralType)
   }
 
+  test("wider data type of two for string") {
+    def widenTest(t1: DataType, t2: DataType, expected: Option[DataType]): Unit = {
+      checkWidenType(AnsiTypeCoercion.findWiderTypeForTwo, t1, t2, expected)
+      checkWidenType(AnsiTypeCoercion.findWiderTypeForTwo, t2, t1, expected)
+    }
+
+    widenTest(NullType, StringType, Some(StringType))
+    widenTest(StringType, StringType, Some(StringType))
+    Seq(ByteType, ShortType, IntegerType, LongType).foreach { dt =>
+      widenTest(dt, StringType, Some(LongType))
+    }
+    Seq(FloatType, DecimalType(20, 10), DoubleType).foreach { dt =>
+      widenTest(dt, StringType, Some(DoubleType))
+    }
+
+    Seq(DateType, TimestampType, BinaryType, BooleanType).foreach { dt =>
+      widenTest(dt, StringType, Some(dt))
+    }
+  }
+
   test("tightest common bound for types") {
     def widenTest(t1: DataType, t2: DataType, expected: Option[DataType]): Unit =
       checkWidenType(AnsiTypeCoercion.findTightestCommonType, t1, t2, expected)
@@ -408,7 +419,7 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
 
     ruleTest(rule,
       Coalesce(Seq(timestampLit, stringLit)),
-      Coalesce(Seq(timestampLit, stringLit)))
+      Coalesce(Seq(timestampLit, Cast(stringLit, TimestampType))))
 
     ruleTest(rule,
       Coalesce(Seq(nullLit, floatNullLit, intLit)),
@@ -422,7 +433,8 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
     // There is no a common type among Float/Double/String
     ruleTest(rule,
       Coalesce(Seq(nullLit, floatNullLit, doubleLit, stringLit)),
-      Coalesce(Seq(nullLit, floatNullLit, doubleLit, stringLit)))
+      Coalesce(Seq(Cast(nullLit, DoubleType), Cast(floatNullLit, DoubleType),
+        doubleLit, Cast(stringLit, DoubleType))))
 
     // There is no a common type among Timestamp/Int/String
     ruleTest(rule,
@@ -451,8 +463,8 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
         :: Literal("a")
         :: Nil),
       CreateArray(Literal(1.0)
-        :: Literal(1)
-        :: Literal("a")
+        :: Cast(Literal(1), DoubleType)
+        :: Cast(Literal("a"), DoubleType)
         :: Nil))
 
     ruleTest(AnsiTypeCoercion.FunctionArgumentConversion,
@@ -506,7 +518,7 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
         :: Literal(3.0)
         :: Nil),
       CreateMap(Literal(1)
-        :: Literal("a")
+        :: Cast(Literal("a"), DoubleType)
         :: Literal(2)
         :: Literal(3.0)
         :: Nil))
@@ -523,13 +535,13 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
         :: Nil))
     // type coercion for both map keys and values
     ruleTest(AnsiTypeCoercion.FunctionArgumentConversion,
-      CreateMap(Literal(1)
-        :: Literal("a")
+      CreateMap(Cast(Literal(1), DoubleType)
+        :: Cast(Literal("a"), DoubleType)
         :: Literal(2.0)
         :: Literal(3.0)
         :: Nil),
       CreateMap(Cast(Literal(1), DoubleType)
-        :: Literal("a")
+        :: Cast(Literal("a"), DoubleType)
         :: Literal(2.0)
         :: Literal(3.0)
         :: Nil))
@@ -644,11 +656,11 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
 
     ruleTest(rule,
       If(falseLit, stringLit, doubleLit),
-      If(falseLit, stringLit, doubleLit))
+      If(falseLit, Cast(stringLit, DoubleType), doubleLit))
 
     ruleTest(rule,
       If(trueLit, timestampLit, stringLit),
-      If(trueLit, timestampLit, stringLit))
+      If(trueLit, timestampLit, Cast(stringLit, TimestampType)))
   }
 
   test("type coercion for CaseKeyWhen") {
@@ -901,7 +913,8 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
     )
     ruleTest(inConversion,
       In(Literal("a"), Seq(Literal(1), Literal("b"))),
-      In(Literal("a"), Seq(Literal(1), Literal("b")))
+      In(Cast(Literal("a"), LongType),
+        Seq(Cast(Literal(1), LongType), Cast(Literal("b"), LongType)))
     )
   }
 
@@ -1024,55 +1037,6 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
       IntegralDivide(Cast(2, LongType), 1L))
   }
 
-  test("Promote string literals") {
-    val rule = AnsiTypeCoercion.PromoteStringLiterals
-    val stringLiteral = Literal("123")
-    val castStringLiteralAsInt = Cast(stringLiteral, IntegerType)
-    val castStringLiteralAsDouble = Cast(stringLiteral, DoubleType)
-    val castStringLiteralAsDate = Cast(stringLiteral, DateType)
-    val castStringLiteralAsTimestamp = Cast(stringLiteral, TimestampType)
-    ruleTest(rule,
-      GreaterThan(stringLiteral, Literal(1)),
-      GreaterThan(castStringLiteralAsInt, Literal(1)))
-    ruleTest(rule,
-      LessThan(Literal(true), stringLiteral),
-      LessThan(Literal(true), Cast(stringLiteral, BooleanType)))
-    ruleTest(rule,
-      EqualTo(Literal(Array(1, 2)), stringLiteral),
-      EqualTo(Literal(Array(1, 2)), stringLiteral))
-    ruleTest(rule,
-      GreaterThan(stringLiteral, Literal(0.5)),
-      GreaterThan(castStringLiteralAsDouble, Literal(0.5)))
-
-    val dateLiteral = Literal(java.sql.Date.valueOf("2021-01-01"))
-    ruleTest(rule,
-      EqualTo(stringLiteral, dateLiteral),
-      EqualTo(castStringLiteralAsDate, dateLiteral))
-
-    val timestampLiteral = Literal(Timestamp.valueOf("2021-01-01 00:00:00"))
-    ruleTest(rule,
-      EqualTo(stringLiteral, timestampLiteral),
-      EqualTo(castStringLiteralAsTimestamp, timestampLiteral))
-
-    ruleTest(rule, Add(stringLiteral, Literal(1)),
-      Add(castStringLiteralAsInt, Literal(1)))
-    ruleTest(rule, Divide(stringLiteral, Literal(1)),
-      Divide(castStringLiteralAsInt, Literal(1)))
-
-    ruleTest(rule,
-      In(Literal(1), Seq(stringLiteral, Literal(2))),
-      In(Literal(1), Seq(castStringLiteralAsInt, Literal(2))))
-    ruleTest(rule,
-      In(Literal(1.0), Seq(stringLiteral, Literal(2.2))),
-      In(Literal(1.0), Seq(castStringLiteralAsDouble, Literal(2.2))))
-    ruleTest(rule,
-      In(dateLiteral, Seq(stringLiteral)),
-      In(dateLiteral, Seq(castStringLiteralAsDate)))
-    ruleTest(rule,
-      In(timestampLiteral, Seq(stringLiteral)),
-      In(timestampLiteral, Seq(castStringLiteralAsTimestamp)))
-  }
-
   test("SPARK-35937: GetDateFieldOperations") {
     val ts = Literal(Timestamp.valueOf("2021-01-01 01:30:00"))
     Seq(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 1f3d1c4516778..8ea5886c62eca 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -476,6 +476,7 @@ class TypeCoercionSuite extends TypeCoercionSuiteBase {
     checkTypeCasting(checkedType, castableTypes = allTypes.filterNot(nonCastableTypes.contains))
     shouldCast(checkedType, DecimalType, DecimalType.SYSTEM_DEFAULT)
     shouldCast(checkedType, NumericType, NumericType.defaultConcreteType)
+    shouldCast(checkedType, AnyTimestampType, AnyTimestampType.defaultConcreteType)
     shouldNotCast(checkedType, IntegralType)
   }
 
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
index 75cc31856d56c..151dc3340610f 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
@@ -470,39 +470,33 @@ struct<date_sub(TIMESTAMP_NTZ '2011-11-11 12:12:12', 1):date>
 -- !query
 select date_add('2011-11-11', int_str) from date_view
 -- !query schema
-struct<>
+struct<date_add(2011-11-11, int_str):date>
 -- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2011-11-11' AS DATE), date_view.int_str)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'date_view.int_str' is of string type.; line 1 pos 7
+2011-11-12
 
 
 -- !query
 select date_sub('2011-11-11', int_str) from date_view
 -- !query schema
-struct<>
+struct<date_sub(2011-11-11, int_str):date>
 -- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(CAST('2011-11-11' AS DATE), date_view.int_str)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'date_view.int_str' is of string type.; line 1 pos 7
+2011-11-10
 
 
 -- !query
 select date_add(date_str, 1) from date_view
 -- !query schema
-struct<>
+struct<date_add(date_str, 1):date>
 -- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(date_view.date_str, 1)' due to data type mismatch: argument 1 requires date type, however, 'date_view.date_str' is of string type.
-To fix the error, you might need to add explicit type casts. If necessary set spark.sql.ansi.enabled to false to bypass this error.; line 1 pos 7
+2011-11-12
 
 
 -- !query
 select date_sub(date_str, 1) from date_view
 -- !query schema
-struct<>
+struct<date_sub(date_str, 1):date>
 -- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(date_view.date_str, 1)' due to data type mismatch: argument 1 requires date type, however, 'date_view.date_str' is of string type.
-To fix the error, you might need to add explicit type casts. If necessary set spark.sql.ansi.enabled to false to bypass this error.; line 1 pos 7
+2011-11-10
 
 
 -- !query
@@ -581,20 +575,17 @@ NULL
 -- !query
 select date_str - date '2001-09-28' from date_view
 -- !query schema
-struct<>
+struct<(date_str - DATE '2001-09-28'):interval day>
 -- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve '(date_view.date_str - DATE '2001-09-28')' due to data type mismatch: argument 1 requires date type, however, 'date_view.date_str' is of string type.
-To fix the error, you might need to add explicit type casts. If necessary set spark.sql.ansi.enabled to false to bypass this error.; line 1 pos 7
+3696 00:00:00.000000000
 
 
 -- !query
 select date '2001-09-28' - date_str from date_view
 -- !query schema
-struct<>
+struct<(DATE '2001-09-28' - date_str):interval day>
 -- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve '(DATE '2001-09-28' - date_view.date_str)' due to data type mismatch: differing types in '(DATE '2001-09-28' - date_view.date_str)' (date and string).; line 1 pos 7
+-3696 00:00:00.000000000
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 12450fa6679dc..cfc77aa45fdeb 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1532,9 +1532,8 @@ select str - interval '4 22:12' day to minute from interval_view
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'interval_view.str + (- INTERVAL '4 22:12' DAY TO MINUTE)' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'interval_view.str' is of string type.
-To fix the error, you might need to add explicit type casts. If necessary set spark.sql.ansi.enabled to false to bypass this error.; line 1 pos 7
+java.time.DateTimeException
+Cannot cast 1 to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error.
 
 
 -- !query
@@ -1542,9 +1541,8 @@ select str + interval '4 22:12' day to minute from interval_view
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'interval_view.str + INTERVAL '4 22:12' DAY TO MINUTE' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'interval_view.str' is of string type.
-To fix the error, you might need to add explicit type casts. If necessary set spark.sql.ansi.enabled to false to bypass this error.; line 1 pos 7
+java.time.DateTimeException
+Cannot cast 1 to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error.
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
index 6aa70bd599b95..96ea94869d7e6 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
@@ -647,19 +647,17 @@ struct<>
 -- !query
 select str - timestamp'2011-11-11 11:11:11' from ts_view
 -- !query schema
-struct<>
+struct<(str - TIMESTAMP '2011-11-11 11:11:11'):interval day to second>
 -- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve '(ts_view.str - TIMESTAMP '2011-11-11 11:11:11')' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'ts_view.str' is of string type.; line 1 pos 7
+0 00:00:00.000000000
 
 
 -- !query
 select timestamp'2011-11-11 11:11:11' - str from ts_view
 -- !query schema
-struct<>
+struct<(TIMESTAMP '2011-11-11 11:11:11' - str):interval day to second>
 -- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP '2011-11-11 11:11:11' - ts_view.str)' due to data type mismatch: argument 2 requires (timestamp or timestamp without time zone) type, however, 'ts_view.str' is of string type.; line 1 pos 7
+0 00:00:00.000000000
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out
index 214776391f6f7..690fd7cd2cbbc 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out
@@ -171,6 +171,7 @@ struct<four:string,f1:float>
 	0.0
 	1.2345679E-20
 	1.2345679E20
+	1004.3
 
 
 -- !query
@@ -178,7 +179,7 @@ SELECT '' AS one, f.* FROM FLOAT4_TBL f WHERE f.f1 = '1004.3'
 -- !query schema
 struct<one:string,f1:float>
 -- !query output
-	1004.3
+
 
 
 -- !query
@@ -189,6 +190,7 @@ struct<three:string,f1:float>
 	-34.84
 	0.0
 	1.2345679E-20
+	1004.3
 
 
 -- !query
@@ -199,6 +201,7 @@ struct<three:string,f1:float>
 	-34.84
 	0.0
 	1.2345679E-20
+	1004.3
 
 
 -- !query
@@ -227,22 +230,22 @@ struct<four:string,f1:float>
 SELECT '' AS three, f.f1, f.f1 * '-10' AS x FROM FLOAT4_TBL f
    WHERE f.f1 > '0.0'
 -- !query schema
-struct<three:string,f1:float,x:float>
+struct<three:string,f1:float,x:double>
 -- !query output
-	1.2345679E-20	-1.2345678E-19
-	1.2345679E20	-1.2345678E21
-	1004.3	-10043.0
+	1.2345679E-20	-1.2345678720289608E-19
+	1.2345679E20	-1.2345678955701443E21
+	1004.3	-10042.999877929688
 
 
 -- !query
 SELECT '' AS three, f.f1, f.f1 + '-10' AS x FROM FLOAT4_TBL f
    WHERE f.f1 > '0.0'
 -- !query schema
-struct<three:string,f1:float,x:float>
+struct<three:string,f1:float,x:double>
 -- !query output
 	1.2345679E-20	-10.0
-	1.2345679E20	1.2345679E20
-	1004.3	994.3
+	1.2345679E20	1.2345678955701443E20
+	1004.3	994.2999877929688
 
 
 -- !query
@@ -260,11 +263,11 @@ struct<three:string,f1:float,x:double>
 SELECT '' AS three, f.f1, f.f1 - '-10' AS x FROM FLOAT4_TBL f
    WHERE f.f1 > '0.0'
 -- !query schema
-struct<three:string,f1:float,x:float>
+struct<three:string,f1:float,x:double>
 -- !query output
 	1.2345679E-20	10.0
-	1.2345679E20	1.2345679E20
-	1004.3	1014.3
+	1.2345679E20	1.2345678955701443E20
+	1004.3	1014.2999877929688
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
index 371b0e00f532e..6b93f7688fb73 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
@@ -647,19 +647,17 @@ struct<>
 -- !query
 select str - timestamp'2011-11-11 11:11:11' from ts_view
 -- !query schema
-struct<>
+struct<(str - TIMESTAMP_NTZ '2011-11-11 11:11:11'):interval day to second>
 -- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve '(ts_view.str - TIMESTAMP_NTZ '2011-11-11 11:11:11')' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'ts_view.str' is of string type.; line 1 pos 7
+0 00:00:00.000000000
 
 
 -- !query
 select timestamp'2011-11-11 11:11:11' - str from ts_view
 -- !query schema
-struct<>
+struct<(TIMESTAMP_NTZ '2011-11-11 11:11:11' - str):interval day to second>
 -- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP_NTZ '2011-11-11 11:11:11' - ts_view.str)' due to data type mismatch: argument 2 requires (timestamp or timestamp without time zone) type, however, 'ts_view.str' is of string type.; line 1 pos 7
+0 00:00:00.000000000
 
 
 -- !query

From a62ae9f64fbc5f9f472bc01dbd97caa65927baef Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Fri, 11 Feb 2022 12:53:30 +0800
Subject: [PATCH 216/513] [SPARK-38177][SQL] Fix wrong transformExpressions in
 Optimizer

### What changes were proposed in this pull request?

- `EliminateDistinct`: change `transformExpressions` to `transformAllExpressionsWithPruning `
- `EliminateAggregateFilter `: change `transformExpressionsWithPruning` to `transformAllExpressionsWithPruning`

### Why are the changes needed?

`transformExpressions` can only traverse all expressions in this current query plan, so the rule `EliminateDistinct` and `EliminateAggregateFilter` can not optimize the non-root node. We should use `transformAllExpressions` rather than `transformExpressions`.

### Does this PR introduce _any_ user-facing change?

no, only change plan

### How was this patch tested?

add new test for  `EliminateDistinct` and `EliminateAggregateFilter`

Closes #35479 from ulysses-you/SPARK-38177.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/optimizer/Optimizer.scala    |  7 ++++---
 .../optimizer/EliminateAggregateFilterSuite.scala   | 11 +++++++++++
 .../catalyst/optimizer/EliminateDistinctSuite.scala | 13 +++++++++++++
 3 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 61d6e3901cda5..058e30dca1d20 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -414,7 +414,8 @@ abstract class Optimizer(catalogManager: CatalogManager)
  * This rule should be applied before RewriteDistinctAggregates.
  */
 object EliminateDistinct extends Rule[LogicalPlan] {
-  override def apply(plan: LogicalPlan): LogicalPlan = plan transformExpressions  {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressionsWithPruning(
+    _.containsPattern(AGGREGATE_EXPRESSION)) {
     case ae: AggregateExpression if ae.isDistinct && isDuplicateAgnostic(ae.aggregateFunction) =>
       ae.copy(isDistinct = false)
   }
@@ -436,8 +437,8 @@ object EliminateDistinct extends Rule[LogicalPlan] {
  * This rule should be applied before RewriteDistinctAggregates.
  */
 object EliminateAggregateFilter extends Rule[LogicalPlan] {
-  override def apply(plan: LogicalPlan): LogicalPlan = plan.transformExpressionsWithPruning(
-    _.containsAllPatterns(TRUE_OR_FALSE_LITERAL), ruleId)  {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressionsWithPruning(
+    _.containsAllPatterns(AGGREGATE_EXPRESSION, TRUE_OR_FALSE_LITERAL), ruleId)  {
     case ae @ AggregateExpression(_, _, _, Some(Literal.TrueLiteral), _) =>
       ae.copy(filter = None)
     case AggregateExpression(af: DeclarativeAggregate, _, _, Some(Literal.FalseLiteral), _) =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateAggregateFilterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateAggregateFilterSuite.scala
index ec9b876f78e1d..1bd4550e2c077 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateAggregateFilterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateAggregateFilterSuite.scala
@@ -72,4 +72,15 @@ class EliminateAggregateFilterSuite extends PlanTest {
     comparePlans(Optimize.execute(query), answer)
   }
 
+  test("SPARK-38177: Eliminate Filter in non-root node") {
+    val query = testRelation
+      .select(countDistinctWithFilter(GreaterThan(Literal(1), Literal(2)), 'a).as('result))
+      .limit(1)
+      .analyze
+    val answer = testRelation
+      .groupBy()(Literal.create(0L, LongType).as('result))
+      .limit(1)
+      .analyze
+    comparePlans(Optimize.execute(query), answer)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala
index 08773720d717b..cf4761d561162 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala
@@ -57,5 +57,18 @@ class EliminateDistinctSuite extends PlanTest {
         assert(query != answer)
         comparePlans(Optimize.execute(query), answer)
       }
+
+      test(s"SPARK-38177: Eliminate Distinct in non-root $agg") {
+        val query = testRelation
+          .select(agg.toAggregateExpression(isDistinct = true).as('result))
+          .limit(1)
+          .analyze
+        val answer = testRelation
+          .select(agg.toAggregateExpression(isDistinct = false).as('result))
+          .limit(1)
+          .analyze
+        assert(query != answer)
+        comparePlans(Optimize.execute(query), answer)
+      }
   }
 }

From e0bc977dd0dd378665da431d4a2497e05ae90cf7 Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Thu, 10 Feb 2022 23:06:13 -0800
Subject: [PATCH 217/513] [SPARK-37970][SS] Introduce AcceptsLatestSeenOffset
 to indicate latest seen offset to streaming source

### What changes were proposed in this pull request?

This PR introduces a new interface on streaming data source `AcceptsLatestSeenOffset`, which notifies Spark to provide latest seen offset to the sources implementing the interface at every restart of the query. Spark will provide the latest seen offset before fetching the offset or data.

Worth noting that the interface only support DSv2 streaming sources; the usage of DSv1 streaming source is limited to internal and it has different method call flow, so we would like to focus on DSv2. Spark will throw error if the DSv1 streaming source implements the interface.

### Why are the changes needed?

This could be useful for the source if source needs to prepare based on the latest seen offset before fetching anything. More specifically, we found this very useful and handy for the data source which needs to track the offset by itself, since the external storage does not provide the offset for the latest available data.

### Does this PR introduce _any_ user-facing change?

No, the change is limited to the data source developers.

### How was this patch tested?

New unit tests.

Closes #35259 from HeartSaVioR/SPARK-37970.

Authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Signed-off-by: Yuanjian Li <yuanjian.li@databricks.com>
---
 .../streaming/AcceptsLatestSeenOffset.java    |  37 +++
 .../AcceptsLatestSeenOffsetHandler.scala      |  55 ++++
 .../streaming/MicroBatchExecution.scala       |   2 +
 .../continuous/ContinuousExecution.scala      |   4 +
 .../AcceptsLatestSeenOffsetSuite.scala        | 270 ++++++++++++++++++
 5 files changed, 368 insertions(+)
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/AcceptsLatestSeenOffset.java
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AcceptsLatestSeenOffsetHandler.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/AcceptsLatestSeenOffsetSuite.scala

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/AcceptsLatestSeenOffset.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/AcceptsLatestSeenOffset.java
new file mode 100644
index 0000000000000..e8515c063cffd
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/AcceptsLatestSeenOffset.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.read.streaming;
+
+/**
+ * Indicates that the source accepts the latest seen offset, which requires streaming execution
+ * to provide the latest seen offset when restarting the streaming query from checkpoint.
+ *
+ * Note that this interface aims to only support DSv2 streaming sources. Spark may throw error
+ * if the interface is implemented along with DSv1 streaming sources.
+ *
+ * The callback method will be called once per run.
+ */
+public interface AcceptsLatestSeenOffset extends SparkDataStream {
+  /**
+   * Callback method to receive the latest seen offset information from streaming execution.
+   * The method will be called only when the streaming query is restarted from checkpoint.
+   *
+   * @param offset The offset which was latest seen in the previous run.
+   */
+  void setLatestSeenOffset(Offset offset);
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AcceptsLatestSeenOffsetHandler.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AcceptsLatestSeenOffsetHandler.scala
new file mode 100644
index 0000000000000..69795cc82c477
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AcceptsLatestSeenOffsetHandler.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import org.apache.spark.sql.connector.read.streaming.{AcceptsLatestSeenOffset, SparkDataStream}
+
+/**
+ * This feeds "latest seen offset" to the sources that implement AcceptsLatestSeenOffset.
+ */
+object AcceptsLatestSeenOffsetHandler {
+  def setLatestSeenOffsetOnSources(
+      offsets: Option[OffsetSeq],
+      sources: Seq[SparkDataStream]): Unit = {
+    assertNoAcceptsLatestSeenOffsetWithDataSourceV1(sources)
+
+    offsets.map(_.toStreamProgress(sources)) match {
+      case Some(streamProgress) =>
+        streamProgress.foreach {
+          case (src: AcceptsLatestSeenOffset, offset) =>
+            src.setLatestSeenOffset(offset)
+
+          case _ => // no-op
+        }
+      case _ => // no-op
+    }
+  }
+
+  private def assertNoAcceptsLatestSeenOffsetWithDataSourceV1(
+      sources: Seq[SparkDataStream]): Unit = {
+    val unsupportedSources = sources
+      .filter(_.isInstanceOf[AcceptsLatestSeenOffset])
+      .filter(_.isInstanceOf[Source])
+
+    if (unsupportedSources.nonEmpty) {
+      throw new UnsupportedOperationException(
+        "AcceptsLatestSeenOffset is not supported with DSv1 streaming source: " +
+          unsupportedSources)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index 8725b701225ff..b5667ee398d65 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -217,6 +217,8 @@ class MicroBatchExecution(
         reportTimeTaken("triggerExecution") {
           // We'll do this initialization only once every start / restart
           if (currentBatchId < 0) {
+            AcceptsLatestSeenOffsetHandler.setLatestSeenOffsetOnSources(
+              offsetLog.getLatest().map(_._2), sources)
             populateStartOffsets(sparkSessionForStream)
             logInfo(s"Stream started from $committedOffsets")
           }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index a0b407469ab36..0ed29d430bbdb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -190,6 +190,10 @@ class ContinuousExecution(
   private def runContinuous(sparkSessionForQuery: SparkSession): Unit = {
     val offsets = getStartOffsets(sparkSessionForQuery)
 
+    if (currentBatchId > 0) {
+      AcceptsLatestSeenOffsetHandler.setLatestSeenOffsetOnSources(Some(offsets), sources)
+    }
+
     val withNewSources: LogicalPlan = logicalPlan transform {
       case relation: StreamingDataSourceV2Relation =>
         val loggedOffset = offsets.offsets(0)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/AcceptsLatestSeenOffsetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/AcceptsLatestSeenOffsetSuite.scala
new file mode 100644
index 0000000000000..d3e9a08509b0b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/AcceptsLatestSeenOffsetSuite.scala
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.plans.logical.Range
+import org.apache.spark.sql.connector.read.streaming
+import org.apache.spark.sql.connector.read.streaming.{AcceptsLatestSeenOffset, SparkDataStream}
+import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.execution.streaming.sources.{ContinuousMemoryStream, ContinuousMemoryStreamOffset}
+import org.apache.spark.sql.types.{LongType, StructType}
+
+class AcceptsLatestSeenOffsetSuite extends StreamTest with BeforeAndAfter {
+
+  import testImplicits._
+
+  after {
+    sqlContext.streams.active.foreach(_.stop())
+  }
+
+  test("DataSource V1 source with micro-batch is not supported") {
+    val testSource = new TestSource(spark)
+    val df = testSource.toDF()
+
+    /** Add data to this test source by incrementing its available offset */
+    def addData(numNewRows: Int): StreamAction = new AddData {
+      override def addData(
+          query: Option[StreamExecution]): (SparkDataStream, streaming.Offset) = {
+        testSource.incrementAvailableOffset(numNewRows)
+        (testSource, testSource.getOffset.get)
+      }
+    }
+
+    addData(10)
+    val query = df.writeStream.format("console").start()
+    val exc = intercept[StreamingQueryException] {
+      query.processAllAvailable()
+    }
+    assert(exc.getMessage.contains(
+      "AcceptsLatestSeenOffset is not supported with DSv1 streaming source"))
+  }
+
+  test("DataSource V2 source with micro-batch") {
+    val inputData = new TestMemoryStream[Long](0, spark.sqlContext)
+    val df = inputData.toDF().select("value")
+
+    /** Add data to this test source by incrementing its available offset */
+    def addData(values: Array[Long]): StreamAction = new AddData {
+      override def addData(
+          query: Option[StreamExecution]): (SparkDataStream, streaming.Offset) = {
+        (inputData, inputData.addData(values))
+      }
+    }
+
+    testStream(df)(
+      StartStream(),
+      addData((1L to 10L).toArray),
+      ProcessAllAvailable(),
+      Execute("latest seen offset should be null") { _ =>
+        // this verifies that the callback method is not called for the new query
+        assert(inputData.latestSeenOffset === null)
+      },
+      StopStream,
+
+      StartStream(),
+      addData((11L to 20L).toArray),
+      ProcessAllAvailable(),
+      Execute("latest seen offset should be 0") { _ =>
+        assert(inputData.latestSeenOffset === LongOffset(0))
+      },
+      StopStream,
+
+      Execute("mark last batch as incomplete") { q =>
+        // Delete the last committed batch from the commit log to signify that the last batch
+        // (a no-data batch) did not complete and has to be re-executed on restart.
+        val commit = q.commitLog.getLatest().map(_._1).getOrElse(-1L)
+        q.commitLog.purgeAfter(commit - 1)
+      },
+      StartStream(),
+      addData((21L to 30L).toArray),
+      ProcessAllAvailable(),
+      Execute("latest seen offset should be 1") { _ =>
+        assert(inputData.latestSeenOffset === LongOffset(1))
+      }
+    )
+  }
+
+  test("DataSource V2 source with micro-batch - rollback of microbatch 0") {
+    //  Test case: when the query is restarted, we expect the execution to call `latestSeenOffset`
+    //  first. Later as part of the execution, execution may call `initialOffset` if the previous
+    //  run of the query had no committed batches.
+    val inputData = new TestMemoryStream[Long](0, spark.sqlContext)
+    val df = inputData.toDF().select("value")
+
+    /** Add data to this test source by incrementing its available offset */
+    def addData(values: Array[Long]): StreamAction = new AddData {
+      override def addData(
+        query: Option[StreamExecution]): (SparkDataStream, streaming.Offset) = {
+        (inputData, inputData.addData(values))
+      }
+    }
+
+    testStream(df)(
+      StartStream(),
+      addData((1L to 10L).toArray),
+      ProcessAllAvailable(),
+      Execute("latest seen offset should be null") { _ =>
+        // this verifies that the callback method is not called for the new query
+        assert(inputData.latestSeenOffset === null)
+      },
+      StopStream,
+
+      Execute("mark last batch as incomplete") { q =>
+        // Delete the last committed batch from the commit log to signify that the last batch
+        // (a no-data batch) did not complete and has to be re-executed on restart.
+        val commit = q.commitLog.getLatest().map(_._1).getOrElse(-1L)
+        q.commitLog.purgeAfter(commit - 1)
+      },
+
+      Execute("reset flag initial offset called flag") { q =>
+        inputData.assertInitialOffsetIsCalledAfterLatestOffsetSeen = true
+      },
+      StartStream(),
+      addData((11L to 20L).toArray),
+      ProcessAllAvailable(),
+      Execute("latest seen offset should be 0") { _ =>
+        assert(inputData.latestSeenOffset === LongOffset(0))
+      },
+      StopStream
+    )
+  }
+
+  test("DataSource V2 source with continuous mode") {
+    val inputData = new TestContinuousMemoryStream[Long](0, spark.sqlContext, 1)
+    val df = inputData.toDF().select("value")
+
+    /** Add data to this test source by incrementing its available offset */
+    def addData(values: Array[Long]): StreamAction = new AddData {
+      override def addData(
+          query: Option[StreamExecution]): (SparkDataStream, streaming.Offset) = {
+        (inputData, inputData.addData(values))
+      }
+    }
+
+    testStream(df)(
+      StartStream(trigger = Trigger.Continuous("1 hour")),
+      addData((1L to 10L).toArray),
+      AwaitEpoch(0),
+      Execute { _ =>
+        assert(inputData.latestSeenOffset === null)
+      },
+      IncrementEpoch(),
+      StopStream,
+
+      StartStream(trigger = Trigger.Continuous("1 hour")),
+      addData((11L to 20L).toArray),
+      AwaitEpoch(2),
+      Execute { _ =>
+        assert(inputData.latestSeenOffset === ContinuousMemoryStreamOffset(Map(0 -> 10)))
+      },
+      IncrementEpoch(),
+      StopStream,
+
+      StartStream(trigger = Trigger.Continuous("1 hour")),
+      addData((21L to 30L).toArray),
+      AwaitEpoch(3),
+      Execute { _ =>
+        assert(inputData.latestSeenOffset === ContinuousMemoryStreamOffset(Map(0 -> 20)))
+      }
+    )
+  }
+
+  class TestSource(spark: SparkSession) extends Source with AcceptsLatestSeenOffset {
+
+    @volatile var currentOffset = 0L
+
+    override def getOffset: Option[Offset] = {
+      if (currentOffset <= 0) None else Some(LongOffset(currentOffset))
+    }
+
+    override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
+      if (currentOffset == 0) currentOffset = getOffsetValue(end)
+      val plan = Range(
+        start.map(getOffsetValue).getOrElse(0L) + 1L, getOffsetValue(end) + 1L, 1, None,
+        isStreaming = true)
+      Dataset.ofRows(spark, plan)
+    }
+
+    def incrementAvailableOffset(numNewRows: Int): Unit = {
+      currentOffset = currentOffset + numNewRows
+    }
+
+    override def setLatestSeenOffset(offset: streaming.Offset): Unit = {
+      assert(false, "This method should not be called!")
+    }
+
+    def reset(): Unit = {
+      currentOffset = 0L
+    }
+
+    def toDF(): DataFrame = Dataset.ofRows(spark, StreamingExecutionRelation(this, spark))
+    override def schema: StructType = new StructType().add("value", LongType)
+    override def stop(): Unit = {}
+    private def getOffsetValue(offset: Offset): Long = {
+      offset match {
+        case s: SerializedOffset => LongOffset(s).offset
+        case l: LongOffset => l.offset
+        case _ => throw new IllegalArgumentException("incorrect offset type: " + offset)
+      }
+    }
+  }
+
+  class TestMemoryStream[A : Encoder](
+      _id: Int,
+      _sqlContext: SQLContext,
+      _numPartitions: Option[Int] = None)
+    extends MemoryStream[A](_id, _sqlContext, _numPartitions)
+    with AcceptsLatestSeenOffset {
+
+    @volatile var latestSeenOffset: streaming.Offset = null
+
+    // Flag to assert the sequence of calls in following scenario:
+    //  When the query is restarted, we expect the execution to call `latestSeenOffset` first.
+    //  Later as part of the execution, execution may call `initialOffset` if the previous
+    //  run of the query had no committed batches.
+    @volatile var assertInitialOffsetIsCalledAfterLatestOffsetSeen: Boolean = false
+
+    override def setLatestSeenOffset(offset: streaming.Offset): Unit = {
+      latestSeenOffset = offset
+    }
+
+    override def initialOffset: streaming.Offset = {
+      if (assertInitialOffsetIsCalledAfterLatestOffsetSeen && latestSeenOffset == null) {
+        fail("Expected the latest seen offset to be set.")
+      }
+      super.initialOffset
+    }
+  }
+
+  class TestContinuousMemoryStream[A : Encoder](
+      _id: Int,
+      _sqlContext: SQLContext,
+      _numPartitions: Int = 2)
+    extends ContinuousMemoryStream[A](_id, _sqlContext, _numPartitions)
+    with AcceptsLatestSeenOffset {
+
+    @volatile var latestSeenOffset: streaming.Offset = _
+
+    override def setLatestSeenOffset(offset: streaming.Offset): Unit = {
+      latestSeenOffset = offset
+    }
+  }
+}

From e81f6118693b9f624c54b52ca92fd80d6c9d4432 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Fri, 11 Feb 2022 16:02:55 +0800
Subject: [PATCH 218/513] [SPARK-38036][SQL][TESTS] Refactor `VersionsSuite` to
 `HiveClientSuite` and make it a subclass of `HiveVersionSuite`

### What changes were proposed in this pull request?
There is a TODO in `VersionsSuite`:

- TODO: Refactor this to `HiveClientSuite` and make it a subclass of `HiveVersionSuite`

this pr completed this TODO, the main change as follows:

- copy all test cases in `versions.foreach` scope of `VersionsSuite` to `HiveClientSuite`
- override `nestedSuites` function in `HiveClientSuites` to use each hive version to test the cases in `HiveClientSuite` similar as `HiveClientUserNameSuites` and `HivePartitionFilteringSuites`
- move other cases to `HiveClientSuites`

### Why are the changes needed?
Make `VersionsSuite` as a subclass of `HiveVersionSuite`  to unify the test mode of multi version hive

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
- Pass GA
- Manual test:

**Before**

```
mvn clean install -DskipTests -pl sql/hive -am
mvn test -pl sql/hive -Dtest=none -DwildcardSuites=org.apache.spark.sql.hive.client.HiveClientSuites

Run completed in 13 minutes, 10 seconds.
Total number of tests run: 867
Suites: completed 2, aborted 0
Tests: succeeded 867, failed 0, canceled 0, ignored 1, pending 0
All tests passed.
```

**After**

```
mvn clean install -DskipTests -pl sql/hive -am
mvn test -pl sql/hive -Dtest=none -DwildcardSuites=org.apache.spark.sql.hive.client.HiveClientSuites

Run completed in 3 minutes, 8 seconds.
Total number of tests run: 867
Suites: completed 14, aborted 0
Tests: succeeded 867, failed 0, canceled 0, ignored 1, pending 0
All tests passed
```

The number of test cases is the same, and Suites changed from 2 to 14

Closes #35335 from LuciferYang/SPARK-38036.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/hive/client/HiveClientSuite.scala     | 1072 +++++++++++++++
 .../sql/hive/client/HiveClientSuites.scala    |   96 ++
 .../sql/hive/client/HiveVersionSuite.scala    |    1 +
 .../spark/sql/hive/client/VersionsSuite.scala | 1159 -----------------
 4 files changed, 1169 insertions(+), 1159 deletions(-)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
 delete mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
new file mode 100644
index 0000000000000..a23efd8ffd34d
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
@@ -0,0 +1,1072 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.client
+
+import java.io.{ByteArrayOutputStream, File, PrintStream, PrintWriter}
+import java.net.URI
+
+import org.apache.commons.lang3.{JavaVersion, SystemUtils}
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.hive.common.StatsSetupConst
+import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
+import org.apache.hadoop.mapred.TextInputFormat
+import org.apache.hadoop.security.UserGroupInformation
+
+import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.analysis.{DatabaseAlreadyExistsException, NoSuchDatabaseException, NoSuchPermanentFunctionException, PartitionsAlreadyExistException}
+import org.apache.spark.sql.catalyst.catalog._
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal}
+import org.apache.spark.sql.hive.HiveExternalCatalog
+import org.apache.spark.sql.hive.test.TestHiveVersion
+import org.apache.spark.sql.types.{IntegerType, StructType}
+import org.apache.spark.util.{MutableURLClassLoader, Utils}
+
+class HiveClientSuite(version: String, allVersions: Seq[String])
+  extends HiveVersionSuite(version) {
+
+  private var versionSpark: TestHiveVersion = null
+
+  private val emptyDir = Utils.createTempDir().getCanonicalPath
+
+  /**
+   * Drops table `tableName` after calling `f`.
+   */
+  protected def withTable(tableNames: String*)(f: => Unit): Unit = {
+    try f finally {
+      tableNames.foreach { name =>
+        versionSpark.sql(s"DROP TABLE IF EXISTS $name")
+      }
+    }
+  }
+
+  test("create client") {
+    client = null
+    System.gc() // Hack to avoid SEGV on some JVM versions.
+    val hadoopConf = new Configuration()
+    hadoopConf.set("test", "success")
+    client = buildClient(hadoopConf)
+    if (versionSpark != null) versionSpark.reset()
+    versionSpark = TestHiveVersion(client)
+    assert(versionSpark.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog]
+      .client.version.fullVersion.startsWith(version))
+  }
+
+  def table(database: String, tableName: String,
+      tableType: CatalogTableType = CatalogTableType.MANAGED): CatalogTable = {
+    CatalogTable(
+      identifier = TableIdentifier(tableName, Some(database)),
+      tableType = tableType,
+      schema = new StructType().add("key", "int"),
+      storage = CatalogStorageFormat(
+        locationUri = None,
+        inputFormat = Some(classOf[TextInputFormat].getName),
+        outputFormat = Some(classOf[HiveIgnoreKeyTextOutputFormat[_, _]].getName),
+        serde = Some(classOf[LazySimpleSerDe].getName),
+        compressed = false,
+        properties = Map.empty
+      ))
+  }
+
+  ///////////////////////////////////////////////////////////////////////////
+  // Database related API
+  ///////////////////////////////////////////////////////////////////////////
+
+  private val tempDatabasePath = Utils.createTempDir().toURI
+
+  test("createDatabase") {
+    val defaultDB = CatalogDatabase("default", "desc", new URI("loc"), Map())
+    client.createDatabase(defaultDB, ignoreIfExists = true)
+    val tempDB = CatalogDatabase(
+      "temporary", description = "test create", tempDatabasePath, Map())
+    client.createDatabase(tempDB, ignoreIfExists = true)
+
+    intercept[DatabaseAlreadyExistsException] {
+      client.createDatabase(tempDB, ignoreIfExists = false)
+    }
+  }
+
+  test("create/get/alter database should pick right user name as owner") {
+    if (version != "0.12") {
+      val currentUser = UserGroupInformation.getCurrentUser.getUserName
+      val ownerName = "SPARK_29425"
+      val db1 = "SPARK_29425_1"
+      val db2 = "SPARK_29425_2"
+      val ownerProps = Map("owner" -> ownerName)
+
+      // create database with owner
+      val dbWithOwner = CatalogDatabase(db1, "desc", Utils.createTempDir().toURI, ownerProps)
+      client.createDatabase(dbWithOwner, ignoreIfExists = true)
+      val getDbWithOwner = client.getDatabase(db1)
+      assert(getDbWithOwner.properties("owner") === ownerName)
+      // alter database without owner
+      client.alterDatabase(getDbWithOwner.copy(properties = Map()))
+      assert(client.getDatabase(db1).properties("owner") === "")
+
+      // create database without owner
+      val dbWithoutOwner = CatalogDatabase(db2, "desc", Utils.createTempDir().toURI, Map())
+      client.createDatabase(dbWithoutOwner, ignoreIfExists = true)
+      val getDbWithoutOwner = client.getDatabase(db2)
+      assert(getDbWithoutOwner.properties("owner") === currentUser)
+      // alter database with owner
+      client.alterDatabase(getDbWithoutOwner.copy(properties = ownerProps))
+      assert(client.getDatabase(db2).properties("owner") === ownerName)
+    }
+  }
+
+  test("createDatabase with null description") {
+    withTempDir { tmpDir =>
+      val dbWithNullDesc =
+        CatalogDatabase("dbWithNullDesc", description = null, tmpDir.toURI, Map())
+      client.createDatabase(dbWithNullDesc, ignoreIfExists = true)
+      assert(client.getDatabase("dbWithNullDesc").description == "")
+    }
+  }
+
+  test("setCurrentDatabase") {
+    client.setCurrentDatabase("default")
+  }
+
+  test("getDatabase") {
+    // No exception should be thrown
+    client.getDatabase("default")
+    intercept[NoSuchDatabaseException](client.getDatabase("nonexist"))
+  }
+
+  test("databaseExists") {
+    assert(client.databaseExists("default"))
+    assert(!client.databaseExists("nonexist"))
+  }
+
+  test("listDatabases") {
+    assert(client.listDatabases("defau.*") == Seq("default"))
+  }
+
+  test("alterDatabase") {
+    val database = client.getDatabase("temporary").copy(properties = Map("flag" -> "true"))
+    client.alterDatabase(database)
+    assert(client.getDatabase("temporary").properties.contains("flag"))
+
+    // test alter database location
+    val tempDatabasePath2 = Utils.createTempDir().toURI
+    // Hive support altering database location since HIVE-8472.
+    if (version == "3.0" || version == "3.1") {
+      client.alterDatabase(database.copy(locationUri = tempDatabasePath2))
+      val uriInCatalog = client.getDatabase("temporary").locationUri
+      assert("file" === uriInCatalog.getScheme)
+      assert(new Path(tempDatabasePath2.getPath).toUri.getPath === uriInCatalog.getPath,
+        "Failed to alter database location")
+    } else {
+      val e = intercept[AnalysisException] {
+        client.alterDatabase(database.copy(locationUri = tempDatabasePath2))
+      }
+      assert(e.getMessage.contains("does not support altering database location"))
+    }
+  }
+
+  test("dropDatabase") {
+    assert(client.databaseExists("temporary"))
+
+    client.createTable(table("temporary", tableName = "tbl"), ignoreIfExists = false)
+    val ex = intercept[AnalysisException] {
+      client.dropDatabase("temporary", ignoreIfNotExists = false, cascade = false)
+      assert(false, "dropDatabase should throw HiveException")
+    }
+    assert(ex.message.contains("Cannot drop a non-empty database: temporary."))
+
+    client.dropDatabase("temporary", ignoreIfNotExists = false, cascade = true)
+    assert(!client.databaseExists("temporary"))
+  }
+
+  ///////////////////////////////////////////////////////////////////////////
+  // Table related API
+  ///////////////////////////////////////////////////////////////////////////
+
+  test("createTable") {
+    client.createTable(table("default", tableName = "src"), ignoreIfExists = false)
+    client.createTable(table("default", tableName = "temporary"), ignoreIfExists = false)
+    client.createTable(table("default", tableName = "view1", tableType = CatalogTableType.VIEW),
+      ignoreIfExists = false)
+  }
+
+  test("loadTable") {
+    client.loadTable(
+      emptyDir,
+      tableName = "src",
+      replace = false,
+      isSrcLocal = false)
+  }
+
+  test("tableExists") {
+    // No exception should be thrown
+    assert(client.tableExists("default", "src"))
+    assert(!client.tableExists("default", "nonexistent"))
+  }
+
+  test("getTable") {
+    // No exception should be thrown
+    client.getTable("default", "src")
+  }
+
+  test("getTableOption") {
+    assert(client.getTableOption("default", "src").isDefined)
+  }
+
+  test("getTablesByName") {
+    assert(client.getTablesByName("default", Seq("src")).head
+      == client.getTableOption("default", "src").get)
+  }
+
+  test("getTablesByName when multiple tables") {
+    assert(client.getTablesByName("default", Seq("src", "temporary"))
+      .map(_.identifier.table) == Seq("src", "temporary"))
+  }
+
+  test("getTablesByName when some tables do not exist") {
+    assert(client.getTablesByName("default", Seq("src", "notexist"))
+      .map(_.identifier.table) == Seq("src"))
+  }
+
+  test("getTablesByName when contains invalid name") {
+    // scalastyle:off
+    val name = "砖"
+    // scalastyle:on
+    assert(client.getTablesByName("default", Seq("src", name))
+      .map(_.identifier.table) == Seq("src"))
+  }
+
+  test("getTablesByName when empty") {
+    assert(client.getTablesByName("default", Seq.empty).isEmpty)
+  }
+
+  test("alterTable(table: CatalogTable)") {
+    val newTable = client.getTable("default", "src").copy(properties = Map("changed" -> ""))
+    client.alterTable(newTable)
+    assert(client.getTable("default", "src").properties.contains("changed"))
+  }
+
+  test("alterTable - should respect the original catalog table's owner name") {
+    val ownerName = "SPARK-29405"
+    val originalTable = client.getTable("default", "src")
+    // mocking the owner is what we declared
+    val newTable = originalTable.copy(owner = ownerName)
+    client.alterTable(newTable)
+    assert(client.getTable("default", "src").owner === ownerName)
+    // mocking the owner is empty
+    val newTable2 = originalTable.copy(owner = "")
+    client.alterTable(newTable2)
+    assert(client.getTable("default", "src").owner === client.userName)
+  }
+
+  test("alterTable(dbName: String, tableName: String, table: CatalogTable)") {
+    val newTable = client.getTable("default", "src").copy(properties = Map("changedAgain" -> ""))
+    client.alterTable("default", "src", newTable)
+    assert(client.getTable("default", "src").properties.contains("changedAgain"))
+  }
+
+  test("alterTable - rename") {
+    val newTable = client.getTable("default", "src")
+      .copy(identifier = TableIdentifier("tgt", database = Some("default")))
+    assert(!client.tableExists("default", "tgt"))
+
+    client.alterTable("default", "src", newTable)
+
+    assert(client.tableExists("default", "tgt"))
+    assert(!client.tableExists("default", "src"))
+  }
+
+  test("alterTable - change database") {
+    val tempDB = CatalogDatabase(
+      "temporary", description = "test create", tempDatabasePath, Map())
+    client.createDatabase(tempDB, ignoreIfExists = true)
+
+    val newTable = client.getTable("default", "tgt")
+      .copy(identifier = TableIdentifier("tgt", database = Some("temporary")))
+    assert(!client.tableExists("temporary", "tgt"))
+
+    client.alterTable("default", "tgt", newTable)
+
+    assert(client.tableExists("temporary", "tgt"))
+    assert(!client.tableExists("default", "tgt"))
+  }
+
+  test("alterTable - change database and table names") {
+    val newTable = client.getTable("temporary", "tgt")
+      .copy(identifier = TableIdentifier("src", database = Some("default")))
+    assert(!client.tableExists("default", "src"))
+
+    client.alterTable("temporary", "tgt", newTable)
+
+    assert(client.tableExists("default", "src"))
+    assert(!client.tableExists("temporary", "tgt"))
+  }
+
+  test("listTables(database)") {
+    assert(client.listTables("default") === Seq("src", "temporary", "view1"))
+  }
+
+  test("listTables(database, pattern)") {
+    assert(client.listTables("default", pattern = "src") === Seq("src"))
+    assert(client.listTables("default", pattern = "nonexist").isEmpty)
+  }
+
+  test("listTablesByType(database, pattern, tableType)") {
+    assert(client.listTablesByType("default", pattern = "view1",
+      CatalogTableType.VIEW) === Seq("view1"))
+    assert(client.listTablesByType("default", pattern = "nonexist",
+      CatalogTableType.VIEW).isEmpty)
+  }
+
+  test("dropTable") {
+    val versionsWithoutPurge =
+      if (allVersions.contains("0.14")) allVersions.takeWhile(_ != "0.14") else Nil
+    // First try with the purge option set. This should fail if the version is < 0.14, in which
+    // case we check the version and try without it.
+    try {
+      client.dropTable("default", tableName = "temporary", ignoreIfNotExists = false,
+        purge = true)
+      assert(!versionsWithoutPurge.contains(version))
+    } catch {
+      case _: UnsupportedOperationException =>
+        assert(versionsWithoutPurge.contains(version))
+        client.dropTable("default", tableName = "temporary", ignoreIfNotExists = false,
+          purge = false)
+    }
+    // Drop table with type CatalogTableType.VIEW.
+    try {
+      client.dropTable("default", tableName = "view1", ignoreIfNotExists = false,
+        purge = true)
+      assert(!versionsWithoutPurge.contains(version))
+    } catch {
+      case _: UnsupportedOperationException =>
+        client.dropTable("default", tableName = "view1", ignoreIfNotExists = false,
+          purge = false)
+    }
+    assert(client.listTables("default") === Seq("src"))
+  }
+
+  ///////////////////////////////////////////////////////////////////////////
+  // Partition related API
+  ///////////////////////////////////////////////////////////////////////////
+
+  private val storageFormat = CatalogStorageFormat(
+    locationUri = None,
+    inputFormat = None,
+    outputFormat = None,
+    serde = None,
+    compressed = false,
+    properties = Map.empty)
+
+  test("sql create partitioned table") {
+    val table = CatalogTable(
+      identifier = TableIdentifier("src_part", Some("default")),
+      tableType = CatalogTableType.MANAGED,
+      schema = new StructType().add("value", "int").add("key1", "int").add("key2", "int"),
+      partitionColumnNames = Seq("key1", "key2"),
+      storage = CatalogStorageFormat(
+        locationUri = None,
+        inputFormat = Some(classOf[TextInputFormat].getName),
+        outputFormat = Some(classOf[HiveIgnoreKeyTextOutputFormat[_, _]].getName),
+        serde = Some(classOf[LazySimpleSerDe].getName),
+        compressed = false,
+        properties = Map.empty
+      ))
+    client.createTable(table, ignoreIfExists = false)
+  }
+
+  val testPartitionCount = 2
+
+  test("createPartitions") {
+    val partitions = (1 to testPartitionCount).map { key2 =>
+      CatalogTablePartition(Map("key1" -> "1", "key2" -> key2.toString), storageFormat)
+    }
+    client.createPartitions(
+      "default", "src_part", partitions, ignoreIfExists = true)
+  }
+
+  test("getPartitionNames(catalogTable)") {
+    val partitionNames = (1 to testPartitionCount).map(key2 => s"key1=1/key2=$key2")
+    assert(partitionNames == client.getPartitionNames(client.getTable("default", "src_part")))
+  }
+
+  test("getPartitions(db, table, spec)") {
+    assert(testPartitionCount ==
+      client.getPartitions("default", "src_part", None).size)
+  }
+
+  test("getPartitionsByFilter") {
+    // Only one partition [1, 1] for key2 == 1
+    val result = client.getPartitionsByFilter(client.getTable("default", "src_part"),
+      Seq(EqualTo(AttributeReference("key2", IntegerType)(), Literal(1))))
+
+    // Hive 0.12 doesn't support getPartitionsByFilter, it ignores the filter condition.
+    if (version != "0.12") {
+      assert(result.size == 1)
+    } else {
+      assert(result.size == testPartitionCount)
+    }
+  }
+
+  test("getPartition") {
+    // No exception should be thrown
+    client.getPartition("default", "src_part", Map("key1" -> "1", "key2" -> "2"))
+  }
+
+  test("getPartitionOption(db: String, table: String, spec: TablePartitionSpec)") {
+    val partition = client.getPartitionOption(
+      "default", "src_part", Map("key1" -> "1", "key2" -> "2"))
+    assert(partition.isDefined)
+  }
+
+  test("getPartitionOption(table: CatalogTable, spec: TablePartitionSpec)") {
+    val partition = client.getPartitionOption(
+      client.getTable("default", "src_part"), Map("key1" -> "1", "key2" -> "2"))
+    assert(partition.isDefined)
+  }
+
+  test("getPartitions(db: String, table: String)") {
+    assert(testPartitionCount == client.getPartitions("default", "src_part", None).size)
+  }
+
+  test("loadPartition") {
+    val partSpec = new java.util.LinkedHashMap[String, String]
+    partSpec.put("key1", "1")
+    partSpec.put("key2", "2")
+
+    client.loadPartition(
+      emptyDir,
+      "default",
+      "src_part",
+      partSpec,
+      replace = false,
+      inheritTableSpecs = false,
+      isSrcLocal = false)
+  }
+
+  test("loadDynamicPartitions") {
+    val partSpec = new java.util.LinkedHashMap[String, String]
+    partSpec.put("key1", "1")
+    partSpec.put("key2", "") // Dynamic partition
+
+    client.loadDynamicPartitions(
+      emptyDir,
+      "default",
+      "src_part",
+      partSpec,
+      replace = false,
+      numDP = 1)
+  }
+
+  test("renamePartitions") {
+    val oldSpec = Map("key1" -> "1", "key2" -> "1")
+    val newSpec = Map("key1" -> "1", "key2" -> "3")
+    client.renamePartitions("default", "src_part", Seq(oldSpec), Seq(newSpec))
+
+    // Checks the existence of the new partition (key1 = 1, key2 = 3)
+    assert(client.getPartitionOption("default", "src_part", newSpec).isDefined)
+  }
+
+  test("alterPartitions") {
+    val spec = Map("key1" -> "1", "key2" -> "2")
+    val parameters = Map(StatsSetupConst.TOTAL_SIZE -> "0", StatsSetupConst.NUM_FILES -> "1")
+    val newLocation = new URI(Utils.createTempDir().toURI.toString.stripSuffix("/"))
+    val storage = storageFormat.copy(
+      locationUri = Some(newLocation),
+      // needed for 0.12 alter partitions
+      serde = Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+    val partition = CatalogTablePartition(spec, storage, parameters)
+    client.alterPartitions("default", "src_part", Seq(partition))
+    assert(client.getPartition("default", "src_part", spec)
+      .storage.locationUri.contains(newLocation))
+    assert(client.getPartition("default", "src_part", spec)
+      .parameters.get(StatsSetupConst.TOTAL_SIZE).contains("0"))
+  }
+
+  test("dropPartitions") {
+    val spec = Map("key1" -> "1", "key2" -> "3")
+    val versionsWithoutPurge =
+      if (allVersions.contains("1.2")) allVersions.takeWhile(_ != "1.2") else Nil
+    // Similar to dropTable; try with purge set, and if it fails, make sure we're running
+    // with a version that is older than the minimum (1.2 in this case).
+    try {
+      client.dropPartitions("default", "src_part", Seq(spec), ignoreIfNotExists = true,
+        purge = true, retainData = false)
+      assert(!versionsWithoutPurge.contains(version))
+    } catch {
+      case _: UnsupportedOperationException =>
+        assert(versionsWithoutPurge.contains(version))
+        client.dropPartitions("default", "src_part", Seq(spec), ignoreIfNotExists = true,
+          purge = false, retainData = false)
+    }
+
+    assert(client.getPartitionOption("default", "src_part", spec).isEmpty)
+  }
+
+  test("createPartitions if already exists") {
+    val partitions = Seq(CatalogTablePartition(
+      Map("key1" -> "101", "key2" -> "102"),
+      storageFormat))
+    try {
+      client.createPartitions("default", "src_part", partitions, ignoreIfExists = false)
+      val errMsg = intercept[PartitionsAlreadyExistException] {
+        client.createPartitions("default", "src_part", partitions, ignoreIfExists = false)
+      }.getMessage
+      assert(errMsg.contains("partitions already exists"))
+    } finally {
+      client.dropPartitions(
+        "default",
+        "src_part",
+        partitions.map(_.spec),
+        ignoreIfNotExists = true,
+        purge = false,
+        retainData = false)
+    }
+  }
+
+  ///////////////////////////////////////////////////////////////////////////
+  // Function related API
+  ///////////////////////////////////////////////////////////////////////////
+
+  def function(name: String, className: String): CatalogFunction = {
+    CatalogFunction(
+      FunctionIdentifier(name, Some("default")), className, Seq.empty[FunctionResource])
+  }
+
+  test("createFunction") {
+    val functionClass = "org.apache.spark.MyFunc1"
+    if (version == "0.12") {
+      // Hive 0.12 doesn't support creating permanent functions
+      intercept[AnalysisException] {
+        client.createFunction("default", function("func1", functionClass))
+      }
+    } else {
+      client.createFunction("default", function("func1", functionClass))
+    }
+  }
+
+  test("functionExists") {
+    if (version == "0.12") {
+      // Hive 0.12 doesn't allow customized permanent functions
+      assert(!client.functionExists("default", "func1"))
+    } else {
+      assert(client.functionExists("default", "func1"))
+    }
+  }
+
+  test("renameFunction") {
+    if (version == "0.12") {
+      // Hive 0.12 doesn't allow customized permanent functions
+      intercept[NoSuchPermanentFunctionException] {
+        client.renameFunction("default", "func1", "func2")
+      }
+    } else {
+      client.renameFunction("default", "func1", "func2")
+      assert(client.functionExists("default", "func2"))
+    }
+  }
+
+  test("alterFunction") {
+    val functionClass = "org.apache.spark.MyFunc2"
+    if (version == "0.12") {
+      // Hive 0.12 doesn't allow customized permanent functions
+      intercept[NoSuchPermanentFunctionException] {
+        client.alterFunction("default", function("func2", functionClass))
+      }
+    } else {
+      client.alterFunction("default", function("func2", functionClass))
+    }
+  }
+
+  test("getFunction") {
+    if (version == "0.12") {
+      // Hive 0.12 doesn't allow customized permanent functions
+      intercept[NoSuchPermanentFunctionException] {
+        client.getFunction("default", "func2")
+      }
+    } else {
+      // No exception should be thrown
+      val func = client.getFunction("default", "func2")
+      assert(func.className == "org.apache.spark.MyFunc2")
+    }
+  }
+
+  test("getFunctionOption") {
+    if (version == "0.12") {
+      // Hive 0.12 doesn't allow customized permanent functions
+      assert(client.getFunctionOption("default", "func2").isEmpty)
+    } else {
+      assert(client.getFunctionOption("default", "func2").isDefined)
+      assert(client.getFunctionOption("default", "the_func_not_exists").isEmpty)
+    }
+  }
+
+  test("listFunctions") {
+    if (version == "0.12") {
+      // Hive 0.12 doesn't allow customized permanent functions
+      assert(client.listFunctions("default", "fun.*").isEmpty)
+    } else {
+      assert(client.listFunctions("default", "fun.*").size == 1)
+    }
+  }
+
+  test("dropFunction") {
+    if (version == "0.12") {
+      // Hive 0.12 doesn't support creating permanent functions
+      intercept[NoSuchPermanentFunctionException] {
+        client.dropFunction("default", "func2")
+      }
+    } else {
+      // No exception should be thrown
+      client.dropFunction("default", "func2")
+      assert(client.listFunctions("default", "fun.*").isEmpty)
+    }
+  }
+
+  ///////////////////////////////////////////////////////////////////////////
+  // SQL related API
+  ///////////////////////////////////////////////////////////////////////////
+
+  test("sql set command") {
+    client.runSqlHive("SET spark.sql.test.key=1")
+  }
+
+  test("sql create index and reset") {
+    // HIVE-18448 Since Hive 3.0, INDEX is not supported.
+    if (version != "3.0" && version != "3.1") {
+      client.runSqlHive("CREATE TABLE indexed_table (key INT)")
+      client.runSqlHive("CREATE INDEX index_1 ON TABLE indexed_table(key) " +
+        "as 'COMPACT' WITH DEFERRED REBUILD")
+    }
+  }
+
+  test("sql read hive materialized view") {
+    // HIVE-14249 Since Hive 2.3.0, materialized view is supported.
+    if (version == "2.3" || version == "3.0" || version == "3.1") {
+      // Since Hive 3.0(HIVE-19383), we can not run local MR by `client.runSqlHive` with JDK 11.
+      assume(version == "2.3" || !SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9))
+      // Since HIVE-18394(Hive 3.1), "Create Materialized View" should default to rewritable ones
+      val disableRewrite = if (version == "2.3" || version == "3.0") "" else "DISABLE REWRITE"
+      client.runSqlHive("CREATE TABLE materialized_view_tbl (c1 INT)")
+      client.runSqlHive(
+        s"CREATE MATERIALIZED VIEW mv1 $disableRewrite AS SELECT * FROM materialized_view_tbl")
+      val e = intercept[AnalysisException](versionSpark.table("mv1").collect()).getMessage
+      assert(e.contains("Hive materialized view is not supported"))
+    }
+  }
+
+  ///////////////////////////////////////////////////////////////////////////
+  // Miscellaneous API
+  ///////////////////////////////////////////////////////////////////////////
+
+  test("version") {
+    assert(client.version.fullVersion.startsWith(version))
+  }
+
+  test("getConf") {
+    assert("success" === client.getConf("test", null))
+  }
+
+  test("setOut") {
+    client.setOut(new PrintStream(new ByteArrayOutputStream()))
+  }
+
+  test("setInfo") {
+    client.setInfo(new PrintStream(new ByteArrayOutputStream()))
+  }
+
+  test("setError") {
+    client.setError(new PrintStream(new ByteArrayOutputStream()))
+  }
+
+  test("newSession") {
+    val newClient = client.newSession()
+    assert(newClient != null)
+  }
+
+  test("withHiveState and addJar") {
+    val newClassPath = "."
+    client.addJar(newClassPath)
+    client.withHiveState {
+      // No exception should be thrown.
+      // withHiveState changes the classloader to MutableURLClassLoader
+      val classLoader = Thread.currentThread().getContextClassLoader
+        .asInstanceOf[MutableURLClassLoader]
+
+      val urls = classLoader.getURLs
+      urls.contains(new File(newClassPath).toURI.toURL)
+    }
+  }
+
+  test("reset") {
+    // Clears all database, tables, functions...
+    client.reset()
+    assert(client.listTables("default").isEmpty)
+  }
+
+  ///////////////////////////////////////////////////////////////////////////
+  // End-To-End tests
+  ///////////////////////////////////////////////////////////////////////////
+
+  test("CREATE TABLE AS SELECT") {
+    withTable("tbl") {
+      versionSpark.sql("CREATE TABLE tbl AS SELECT 1 AS a")
+      assert(versionSpark.table("tbl").collect().toSeq == Seq(Row(1)))
+      val tableMeta = versionSpark.sessionState.catalog.getTableMetadata(TableIdentifier("tbl"))
+      val totalSize = tableMeta.stats.map(_.sizeInBytes)
+      // Except 0.12, all the following versions will fill the Hive-generated statistics
+      if (version == "0.12") {
+        assert(totalSize.isEmpty)
+      } else {
+        assert(totalSize.nonEmpty && totalSize.get > 0)
+      }
+    }
+  }
+
+  test("CREATE Partitioned TABLE AS SELECT") {
+    withTable("tbl") {
+      versionSpark.sql(
+        """
+          |CREATE TABLE tbl(c1 string)
+          |USING hive
+          |PARTITIONED BY (ds STRING)
+          """.stripMargin)
+      versionSpark.sql("INSERT OVERWRITE TABLE tbl partition (ds='2') SELECT '1'")
+
+      assert(versionSpark.table("tbl").collect().toSeq == Seq(Row("1", "2")))
+      val partMeta = versionSpark.sessionState.catalog.getPartition(
+        TableIdentifier("tbl"), spec = Map("ds" -> "2")).parameters
+      val totalSize = partMeta.get(StatsSetupConst.TOTAL_SIZE).map(_.toLong)
+      val numFiles = partMeta.get(StatsSetupConst.NUM_FILES).map(_.toLong)
+      // Except 0.12, all the following versions will fill the Hive-generated statistics
+      if (version == "0.12") {
+        assert(totalSize.isEmpty && numFiles.isEmpty)
+      } else {
+        assert(totalSize.nonEmpty && numFiles.nonEmpty)
+      }
+
+      versionSpark.sql(
+        """
+          |ALTER TABLE tbl PARTITION (ds='2')
+          |SET SERDEPROPERTIES ('newKey' = 'vvv')
+          """.stripMargin)
+      val newPartMeta = versionSpark.sessionState.catalog.getPartition(
+        TableIdentifier("tbl"), spec = Map("ds" -> "2")).parameters
+
+      val newTotalSize = newPartMeta.get(StatsSetupConst.TOTAL_SIZE).map(_.toLong)
+      val newNumFiles = newPartMeta.get(StatsSetupConst.NUM_FILES).map(_.toLong)
+      // Except 0.12, all the following versions will fill the Hive-generated statistics
+      if (version == "0.12") {
+        assert(newTotalSize.isEmpty && newNumFiles.isEmpty)
+      } else {
+        assert(newTotalSize.nonEmpty && newNumFiles.nonEmpty)
+      }
+    }
+  }
+
+  test("Delete the temporary staging directory and files after each insert") {
+    withTempDir { tmpDir =>
+      withTable("tab") {
+        versionSpark.sql(
+          s"""
+             |CREATE TABLE tab(c1 string)
+             |location '${tmpDir.toURI.toString}'
+             """.stripMargin)
+
+        (1 to 3).map { i =>
+          versionSpark.sql(s"INSERT OVERWRITE TABLE tab SELECT '$i'")
+        }
+        def listFiles(path: File): List[String] = {
+          val dir = path.listFiles()
+          val folders = dir.filter(_.isDirectory).toList
+          val filePaths = dir.map(_.getName).toList
+          folders.flatMap(listFiles) ++: filePaths
+        }
+        // expect 2 files left: `.part-00000-random-uuid.crc` and `part-00000-random-uuid`
+        // 0.12, 0.13, 1.0 and 1.1 also has another two more files ._SUCCESS.crc and _SUCCESS
+        val metadataFiles = Seq("._SUCCESS.crc", "_SUCCESS")
+        assert(listFiles(tmpDir).filterNot(metadataFiles.contains).length == 2)
+      }
+    }
+  }
+
+  test("SPARK-13709: reading partitioned Avro table with nested schema") {
+    withTempDir { dir =>
+      val path = dir.toURI.toString
+      val tableName = "spark_13709"
+      val tempTableName = "spark_13709_temp"
+
+      new File(dir.getAbsolutePath, tableName).mkdir()
+      new File(dir.getAbsolutePath, tempTableName).mkdir()
+
+      val avroSchema =
+        """{
+          |  "name": "test_record",
+          |  "type": "record",
+          |  "fields": [ {
+          |    "name": "f0",
+          |    "type": "int"
+          |  }, {
+          |    "name": "f1",
+          |    "type": {
+          |      "type": "record",
+          |      "name": "inner",
+          |      "fields": [ {
+          |        "name": "f10",
+          |        "type": "int"
+          |      }, {
+          |        "name": "f11",
+          |        "type": "double"
+          |      } ]
+          |    }
+          |  } ]
+          |}
+          """.stripMargin
+
+      withTable(tableName, tempTableName) {
+        // Creates the external partitioned Avro table to be tested.
+        versionSpark.sql(
+          s"""CREATE EXTERNAL TABLE $tableName
+             |PARTITIONED BY (ds STRING)
+             |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+             |STORED AS
+             |  INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+             |  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+             |LOCATION '$path/$tableName'
+             |TBLPROPERTIES ('avro.schema.literal' = '$avroSchema')
+           """.stripMargin
+        )
+
+        // Creates an temporary Avro table used to prepare testing Avro file.
+        versionSpark.sql(
+          s"""CREATE EXTERNAL TABLE $tempTableName
+             |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+             |STORED AS
+             |  INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+             |  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+             |LOCATION '$path/$tempTableName'
+             |TBLPROPERTIES ('avro.schema.literal' = '$avroSchema')
+           """.stripMargin
+        )
+
+        // Generates Avro data.
+        versionSpark.sql(s"INSERT OVERWRITE TABLE $tempTableName SELECT 1, STRUCT(2, 2.5)")
+
+        // Adds generated Avro data as a new partition to the testing table.
+        versionSpark.sql(
+          s"ALTER TABLE $tableName ADD PARTITION (ds = 'foo') LOCATION '$path/$tempTableName'")
+
+        // The following query fails before SPARK-13709 is fixed. This is because when reading
+        // data from table partitions, Avro deserializer needs the Avro schema, which is defined
+        // in table property "avro.schema.literal". However, we only initializes the deserializer
+        // using partition properties, which doesn't include the wanted property entry. Merging
+        // two sets of properties solves the problem.
+        assert(versionSpark.sql(s"SELECT * FROM $tableName").collect() ===
+          Array(Row(1, Row(2, 2.5D), "foo")))
+      }
+    }
+  }
+
+  test("CTAS for managed data source tables") {
+    withTable("t", "t1") {
+      versionSpark.range(1).write.saveAsTable("t")
+      assert(versionSpark.table("t").collect() === Array(Row(0)))
+      versionSpark.sql("create table t1 using parquet as select 2 as a")
+      assert(versionSpark.table("t1").collect() === Array(Row(2)))
+    }
+  }
+
+  test("Decimal support of Avro Hive serde") {
+    val tableName = "tab1"
+    // TODO: add the other logical types. For details, see the link:
+    // https://avro.apache.org/docs/1.8.1/spec.html#Logical+Types
+    val avroSchema =
+    """{
+      |  "name": "test_record",
+      |  "type": "record",
+      |  "fields": [ {
+      |    "name": "f0",
+      |    "type": [
+      |      "null",
+      |      {
+      |        "precision": 38,
+      |        "scale": 2,
+      |        "type": "bytes",
+      |        "logicalType": "decimal"
+      |      }
+      |    ]
+      |  } ]
+      |}
+        """.stripMargin
+
+    Seq(true, false).foreach { isPartitioned =>
+      withTable(tableName) {
+        val partitionClause = if (isPartitioned) "PARTITIONED BY (ds STRING)" else ""
+        // Creates the (non-)partitioned Avro table
+        versionSpark.sql(
+          s"""
+             |CREATE TABLE $tableName
+             |$partitionClause
+             |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+             |STORED AS
+             |  INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+             |  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+             |TBLPROPERTIES ('avro.schema.literal' = '$avroSchema')
+           """.stripMargin
+        )
+
+        val errorMsg = "Cannot safely cast 'f0': decimal(2,1) to binary"
+
+        if (isPartitioned) {
+          val insertStmt = s"INSERT OVERWRITE TABLE $tableName partition (ds='a') SELECT 1.3"
+          if (version == "0.12" || version == "0.13") {
+            val e = intercept[AnalysisException](versionSpark.sql(insertStmt)).getMessage
+            assert(e.contains(errorMsg))
+          } else {
+            versionSpark.sql(insertStmt)
+            assert(versionSpark.table(tableName).collect() ===
+              versionSpark.sql("SELECT 1.30, 'a'").collect())
+          }
+        } else {
+          val insertStmt = s"INSERT OVERWRITE TABLE $tableName SELECT 1.3"
+          if (version == "0.12" || version == "0.13") {
+            val e = intercept[AnalysisException](versionSpark.sql(insertStmt)).getMessage
+            assert(e.contains(errorMsg))
+          } else {
+            versionSpark.sql(insertStmt)
+            assert(versionSpark.table(tableName).collect() ===
+              versionSpark.sql("SELECT 1.30").collect())
+          }
+        }
+      }
+    }
+  }
+
+  test("read avro file containing decimal") {
+    val url = Thread.currentThread().getContextClassLoader.getResource("avroDecimal")
+    val location = new File(url.getFile).toURI.toString
+
+    val tableName = "tab1"
+    val avroSchema =
+      """{
+        |  "name": "test_record",
+        |  "type": "record",
+        |  "fields": [ {
+        |    "name": "f0",
+        |    "type": [
+        |      "null",
+        |      {
+        |        "precision": 38,
+        |        "scale": 2,
+        |        "type": "bytes",
+        |        "logicalType": "decimal"
+        |      }
+        |    ]
+        |  } ]
+        |}
+        """.stripMargin
+    withTable(tableName) {
+      versionSpark.sql(
+        s"""
+           |CREATE TABLE $tableName
+           |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+           |WITH SERDEPROPERTIES ('respectSparkSchema' = 'true')
+           |STORED AS
+           |  INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+           |  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+           |LOCATION '$location'
+           |TBLPROPERTIES ('avro.schema.literal' = '$avroSchema')
+           """.stripMargin
+      )
+      assert(versionSpark.table(tableName).collect() ===
+        versionSpark.sql("SELECT 1.30").collect())
+    }
+  }
+
+  test("SPARK-17920: Insert into/overwrite avro table") {
+    // skipped because it's failed in the condition on Windows
+    assume(!(Utils.isWindows && version == "0.12"))
+    withTempDir { dir =>
+      val avroSchema =
+        """
+          |{
+          |  "name": "test_record",
+          |  "type": "record",
+          |  "fields": [{
+          |    "name": "f0",
+          |    "type": [
+          |      "null",
+          |      {
+          |        "precision": 38,
+          |        "scale": 2,
+          |        "type": "bytes",
+          |        "logicalType": "decimal"
+          |      }
+          |    ]
+          |  }]
+          |}
+          """.stripMargin
+      val schemaFile = new File(dir, "avroDecimal.avsc")
+      Utils.tryWithResource(new PrintWriter(schemaFile)) { writer =>
+        writer.write(avroSchema)
+      }
+      val schemaPath = schemaFile.toURI.toString
+
+      val url = Thread.currentThread().getContextClassLoader.getResource("avroDecimal")
+      val srcLocation = new File(url.getFile).toURI.toString
+      val destTableName = "tab1"
+      val srcTableName = "tab2"
+
+      withTable(srcTableName, destTableName) {
+        versionSpark.sql(
+          s"""
+             |CREATE EXTERNAL TABLE $srcTableName
+             |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+             |WITH SERDEPROPERTIES ('respectSparkSchema' = 'true')
+             |STORED AS
+             |  INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+             |  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+             |LOCATION '$srcLocation'
+             |TBLPROPERTIES ('avro.schema.url' = '$schemaPath')
+           """.stripMargin
+        )
+
+        versionSpark.sql(
+          s"""
+             |CREATE TABLE $destTableName
+             |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+             |WITH SERDEPROPERTIES ('respectSparkSchema' = 'true')
+             |STORED AS
+             |  INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+             |  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+             |TBLPROPERTIES ('avro.schema.url' = '$schemaPath')
+           """.stripMargin
+        )
+        versionSpark.sql(
+          s"""INSERT OVERWRITE TABLE $destTableName SELECT * FROM $srcTableName""")
+        val result = versionSpark.table(srcTableName).collect()
+        assert(versionSpark.table(destTableName).collect() === result)
+        versionSpark.sql(
+          s"""INSERT INTO TABLE $destTableName SELECT * FROM $srcTableName""")
+        assert(versionSpark.table(destTableName).collect().toSeq === result ++ result)
+      }
+    }
+  }
+  // TODO: add more tests.
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
new file mode 100644
index 0000000000000..b172c0dfedc9f
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.client
+
+import java.net.URI
+
+import scala.collection.immutable.IndexedSeq
+
+import org.apache.hadoop.conf.Configuration
+import org.scalatest.Suite
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.catalog.CatalogDatabase
+import org.apache.spark.sql.catalyst.util.quietly
+import org.apache.spark.sql.hive.HiveUtils
+import org.apache.spark.tags.{ExtendedHiveTest, SlowHiveTest}
+
+/**
+ * A simple set of tests that call the methods of a [[HiveClient]], loading different version
+ * of hive from maven central.  These tests are simple in that they are mostly just testing to make
+ * sure that reflective calls are not throwing NoSuchMethod error, but the actually functionality
+ * is not fully tested.
+ */
+@SlowHiveTest
+@ExtendedHiveTest
+class HiveClientSuites extends SparkFunSuite with HiveClientVersions {
+
+  override protected val enableAutoThreadAudit = false
+
+  import HiveClientBuilder.buildClient
+
+  test("success sanity check") {
+    val badClient = buildClient(HiveUtils.builtinHiveVersion, new Configuration())
+    val db = CatalogDatabase("default", "desc", new URI("loc"), Map())
+    badClient.createDatabase(db, ignoreIfExists = true)
+  }
+
+  test("hadoop configuration preserved") {
+    val hadoopConf = new Configuration()
+    hadoopConf.set("test", "success")
+    val client = buildClient(HiveUtils.builtinHiveVersion, hadoopConf)
+    assert("success" === client.getConf("test", null))
+  }
+
+  test("override useless and side-effect hive configurations ") {
+    val hadoopConf = new Configuration()
+    // These hive flags should be reset by spark
+    hadoopConf.setBoolean("hive.cbo.enable", true)
+    hadoopConf.setBoolean("hive.session.history.enabled", true)
+    hadoopConf.set("hive.execution.engine", "tez")
+    val client = buildClient(HiveUtils.builtinHiveVersion, hadoopConf)
+    assert(!client.getConf("hive.cbo.enable", "true").toBoolean)
+    assert(!client.getConf("hive.session.history.enabled", "true").toBoolean)
+    assert(client.getConf("hive.execution.engine", "tez") === "mr")
+  }
+
+  private def getNestedMessages(e: Throwable): String = {
+    var causes = ""
+    var lastException = e
+    while (lastException != null) {
+      causes += lastException.toString + "\n"
+      lastException = lastException.getCause
+    }
+    causes
+  }
+
+  // Its actually pretty easy to mess things up and have all of your tests "pass" by accidentally
+  // connecting to an auto-populated, in-process metastore.  Let's make sure we are getting the
+  // versions right by forcing a known compatibility failure.
+  // TODO: currently only works on mysql where we manually create the schema...
+  ignore("failure sanity check") {
+    val e = intercept[Throwable] {
+      val badClient = quietly { buildClient("13", new Configuration()) }
+    }
+    assert(getNestedMessages(e) contains "Unknown column 'A0.OWNER_NAME' in 'field list'")
+  }
+
+  override def nestedSuites: IndexedSeq[Suite] = {
+    versions.map(new HiveClientSuite(_, versions))
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala
index 02e9b7fb151fd..4cc51064cfdd3 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala
@@ -40,6 +40,7 @@ private[client] abstract class HiveVersionSuite(version: String) extends SparkFu
     // Since Hive 3.0, HIVE-19310 skipped `ensureDbInit` if `hive.in.test=false`.
     if (version == "3.0" || version == "3.1") {
       hadoopConf.set("hive.in.test", "true")
+      hadoopConf.set("hive.query.reexecution.enabled", "false")
     }
     HiveClientBuilder.buildClient(
       version,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
deleted file mode 100644
index 422a905f69b7c..0000000000000
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ /dev/null
@@ -1,1159 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive.client
-
-import java.io.{ByteArrayOutputStream, File, PrintStream, PrintWriter}
-import java.net.URI
-
-import org.apache.commons.lang3.{JavaVersion, SystemUtils}
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.hive.common.StatsSetupConst
-import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
-import org.apache.hadoop.mapred.TextInputFormat
-import org.apache.hadoop.security.UserGroupInformation
-
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{AnalysisException, Row}
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{DatabaseAlreadyExistsException, NoSuchDatabaseException, NoSuchPermanentFunctionException, PartitionsAlreadyExistException}
-import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal}
-import org.apache.spark.sql.catalyst.util.quietly
-import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
-import org.apache.spark.sql.hive.test.TestHiveVersion
-import org.apache.spark.sql.types.IntegerType
-import org.apache.spark.sql.types.StructType
-import org.apache.spark.tags.{ExtendedHiveTest, SlowHiveTest}
-import org.apache.spark.util.{MutableURLClassLoader, Utils}
-
-/**
- * A simple set of tests that call the methods of a [[HiveClient]], loading different version
- * of hive from maven central.  These tests are simple in that they are mostly just testing to make
- * sure that reflective calls are not throwing NoSuchMethod error, but the actually functionality
- * is not fully tested.
- */
-// TODO: Refactor this to `HiveClientSuite` and make it a subclass of `HiveVersionSuite`
-@SlowHiveTest
-@ExtendedHiveTest
-class VersionsSuite extends SparkFunSuite with Logging {
-
-  override protected val enableAutoThreadAudit = false
-
-  import HiveClientBuilder.buildClient
-
-  /**
-   * Drops table `tableName` after calling `f`.
-   */
-  protected def withTable(tableNames: String*)(f: => Unit): Unit = {
-    try f finally {
-      tableNames.foreach { name =>
-        versionSpark.sql(s"DROP TABLE IF EXISTS $name")
-      }
-    }
-  }
-
-  test("success sanity check") {
-    val badClient = buildClient(HiveUtils.builtinHiveVersion, new Configuration())
-    val db = new CatalogDatabase("default", "desc", new URI("loc"), Map())
-    badClient.createDatabase(db, ignoreIfExists = true)
-  }
-
-  test("hadoop configuration preserved") {
-    val hadoopConf = new Configuration()
-    hadoopConf.set("test", "success")
-    val client = buildClient(HiveUtils.builtinHiveVersion, hadoopConf)
-    assert("success" === client.getConf("test", null))
-  }
-
-  test("override useless and side-effect hive configurations ") {
-    val hadoopConf = new Configuration()
-    // These hive flags should be reset by spark
-    hadoopConf.setBoolean("hive.cbo.enable", true)
-    hadoopConf.setBoolean("hive.session.history.enabled", true)
-    hadoopConf.set("hive.execution.engine", "tez")
-    val client = buildClient(HiveUtils.builtinHiveVersion, hadoopConf)
-    assert(!client.getConf("hive.cbo.enable", "true").toBoolean)
-    assert(!client.getConf("hive.session.history.enabled", "true").toBoolean)
-    assert(client.getConf("hive.execution.engine", "tez") === "mr")
-  }
-
-  private def getNestedMessages(e: Throwable): String = {
-    var causes = ""
-    var lastException = e
-    while (lastException != null) {
-      causes += lastException.toString + "\n"
-      lastException = lastException.getCause
-    }
-    causes
-  }
-
-  private val emptyDir = Utils.createTempDir().getCanonicalPath
-
-  // Its actually pretty easy to mess things up and have all of your tests "pass" by accidentally
-  // connecting to an auto-populated, in-process metastore.  Let's make sure we are getting the
-  // versions right by forcing a known compatibility failure.
-  // TODO: currently only works on mysql where we manually create the schema...
-  ignore("failure sanity check") {
-    val e = intercept[Throwable] {
-      val badClient = quietly { buildClient("13", new Configuration()) }
-    }
-    assert(getNestedMessages(e) contains "Unknown column 'A0.OWNER_NAME' in 'field list'")
-  }
-
-  private val versions = if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) {
-    Seq("2.0", "2.1", "2.2", "2.3", "3.0", "3.1")
-  } else {
-    Seq("0.12", "0.13", "0.14", "1.0", "1.1", "1.2", "2.0", "2.1", "2.2", "2.3", "3.0", "3.1")
-  }
-
-  private var client: HiveClient = null
-
-  private var versionSpark: TestHiveVersion = null
-
-  versions.foreach { version =>
-    test(s"$version: create client") {
-      client = null
-      System.gc() // Hack to avoid SEGV on some JVM versions.
-      val hadoopConf = new Configuration()
-      hadoopConf.set("test", "success")
-      // Hive changed the default of datanucleus.schema.autoCreateAll from true to false and
-      // hive.metastore.schema.verification from false to true since 2.0
-      // For details, see the JIRA HIVE-6113 and HIVE-12463
-      if (version == "2.0" || version == "2.1" || version == "2.2" || version == "2.3" ||
-          version == "3.0" || version == "3.1") {
-        hadoopConf.set("datanucleus.schema.autoCreateAll", "true")
-        hadoopConf.set("hive.metastore.schema.verification", "false")
-      }
-      if (version == "3.0" || version == "3.1") {
-        // Since Hive 3.0, HIVE-19310 skipped `ensureDbInit` if `hive.in.test=false`.
-        hadoopConf.set("hive.in.test", "true")
-        // Since HIVE-17626(Hive 3.0.0), need to set hive.query.reexecution.enabled=false.
-        hadoopConf.set("hive.query.reexecution.enabled", "false")
-      }
-      client = buildClient(version, hadoopConf, HiveUtils.formatTimeVarsForHiveClient(hadoopConf))
-      if (versionSpark != null) versionSpark.reset()
-      versionSpark = TestHiveVersion(client)
-      assert(versionSpark.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog]
-        .client.version.fullVersion.startsWith(version))
-    }
-
-    def table(database: String, tableName: String,
-        tableType: CatalogTableType = CatalogTableType.MANAGED): CatalogTable = {
-      CatalogTable(
-        identifier = TableIdentifier(tableName, Some(database)),
-        tableType = tableType,
-        schema = new StructType().add("key", "int"),
-        storage = CatalogStorageFormat(
-          locationUri = None,
-          inputFormat = Some(classOf[TextInputFormat].getName),
-          outputFormat = Some(classOf[HiveIgnoreKeyTextOutputFormat[_, _]].getName),
-          serde = Some(classOf[LazySimpleSerDe].getName()),
-          compressed = false,
-          properties = Map.empty
-        ))
-    }
-
-    ///////////////////////////////////////////////////////////////////////////
-    // Database related API
-    ///////////////////////////////////////////////////////////////////////////
-
-    val tempDatabasePath = Utils.createTempDir().toURI
-
-    test(s"$version: createDatabase") {
-      val defaultDB = CatalogDatabase("default", "desc", new URI("loc"), Map())
-      client.createDatabase(defaultDB, ignoreIfExists = true)
-      val tempDB = CatalogDatabase(
-        "temporary", description = "test create", tempDatabasePath, Map())
-      client.createDatabase(tempDB, ignoreIfExists = true)
-
-      intercept[DatabaseAlreadyExistsException] {
-        client.createDatabase(tempDB, ignoreIfExists = false)
-      }
-    }
-
-    test(s"$version: create/get/alter database should pick right user name as owner") {
-      if (version != "0.12") {
-        val currentUser = UserGroupInformation.getCurrentUser.getUserName
-        val ownerName = "SPARK_29425"
-        val db1 = "SPARK_29425_1"
-        val db2 = "SPARK_29425_2"
-        val ownerProps = Map("owner" -> ownerName)
-
-        // create database with owner
-        val dbWithOwner = CatalogDatabase(db1, "desc", Utils.createTempDir().toURI, ownerProps)
-        client.createDatabase(dbWithOwner, ignoreIfExists = true)
-        val getDbWithOwner = client.getDatabase(db1)
-        assert(getDbWithOwner.properties("owner") === ownerName)
-        // alter database without owner
-        client.alterDatabase(getDbWithOwner.copy(properties = Map()))
-        assert(client.getDatabase(db1).properties("owner") === "")
-
-        // create database without owner
-        val dbWithoutOwner = CatalogDatabase(db2, "desc", Utils.createTempDir().toURI, Map())
-        client.createDatabase(dbWithoutOwner, ignoreIfExists = true)
-        val getDbWithoutOwner = client.getDatabase(db2)
-        assert(getDbWithoutOwner.properties("owner") === currentUser)
-        // alter database with owner
-        client.alterDatabase(getDbWithoutOwner.copy(properties = ownerProps))
-        assert(client.getDatabase(db2).properties("owner") === ownerName)
-      }
-    }
-
-    test(s"$version: createDatabase with null description") {
-      withTempDir { tmpDir =>
-        val dbWithNullDesc =
-          CatalogDatabase("dbWithNullDesc", description = null, tmpDir.toURI, Map())
-        client.createDatabase(dbWithNullDesc, ignoreIfExists = true)
-        assert(client.getDatabase("dbWithNullDesc").description == "")
-      }
-    }
-
-    test(s"$version: setCurrentDatabase") {
-      client.setCurrentDatabase("default")
-    }
-
-    test(s"$version: getDatabase") {
-      // No exception should be thrown
-      client.getDatabase("default")
-      intercept[NoSuchDatabaseException](client.getDatabase("nonexist"))
-    }
-
-    test(s"$version: databaseExists") {
-      assert(client.databaseExists("default"))
-      assert(client.databaseExists("nonexist") == false)
-    }
-
-    test(s"$version: listDatabases") {
-      assert(client.listDatabases("defau.*") == Seq("default"))
-    }
-
-    test(s"$version: alterDatabase") {
-      val database = client.getDatabase("temporary").copy(properties = Map("flag" -> "true"))
-      client.alterDatabase(database)
-      assert(client.getDatabase("temporary").properties.contains("flag"))
-
-      // test alter database location
-      val tempDatabasePath2 = Utils.createTempDir().toURI
-      // Hive support altering database location since HIVE-8472.
-      if (version == "3.0" || version == "3.1") {
-        client.alterDatabase(database.copy(locationUri = tempDatabasePath2))
-        val uriInCatalog = client.getDatabase("temporary").locationUri
-        assert("file" === uriInCatalog.getScheme)
-        assert(new Path(tempDatabasePath2.getPath).toUri.getPath === uriInCatalog.getPath,
-          "Failed to alter database location")
-      } else {
-        val e = intercept[AnalysisException] {
-          client.alterDatabase(database.copy(locationUri = tempDatabasePath2))
-        }
-        assert(e.getMessage.contains("does not support altering database location"))
-      }
-    }
-
-    test(s"$version: dropDatabase") {
-      assert(client.databaseExists("temporary"))
-
-      client.createTable(table("temporary", tableName = "tbl"), ignoreIfExists = false)
-      val ex = intercept[AnalysisException] {
-        client.dropDatabase("temporary", ignoreIfNotExists = false, cascade = false)
-        assert(false, "dropDatabase should throw HiveException")
-      }
-      assert(ex.message.contains("Cannot drop a non-empty database: temporary."))
-
-      client.dropDatabase("temporary", ignoreIfNotExists = false, cascade = true)
-      assert(client.databaseExists("temporary") == false)
-    }
-
-    ///////////////////////////////////////////////////////////////////////////
-    // Table related API
-    ///////////////////////////////////////////////////////////////////////////
-
-    test(s"$version: createTable") {
-      client.createTable(table("default", tableName = "src"), ignoreIfExists = false)
-      client.createTable(table("default", tableName = "temporary"), ignoreIfExists = false)
-      client.createTable(table("default", tableName = "view1", tableType = CatalogTableType.VIEW),
-        ignoreIfExists = false)
-    }
-
-    test(s"$version: loadTable") {
-      client.loadTable(
-        emptyDir,
-        tableName = "src",
-        replace = false,
-        isSrcLocal = false)
-    }
-
-    test(s"$version: tableExists") {
-      // No exception should be thrown
-      assert(client.tableExists("default", "src"))
-      assert(!client.tableExists("default", "nonexistent"))
-    }
-
-    test(s"$version: getTable") {
-      // No exception should be thrown
-      client.getTable("default", "src")
-    }
-
-    test(s"$version: getTableOption") {
-      assert(client.getTableOption("default", "src").isDefined)
-    }
-
-    test(s"$version: getTablesByName") {
-      assert(client.getTablesByName("default", Seq("src")).head
-        == client.getTableOption("default", "src").get)
-    }
-
-    test(s"$version: getTablesByName when multiple tables") {
-      assert(client.getTablesByName("default", Seq("src", "temporary"))
-        .map(_.identifier.table) == Seq("src", "temporary"))
-    }
-
-    test(s"$version: getTablesByName when some tables do not exist") {
-      assert(client.getTablesByName("default", Seq("src", "notexist"))
-        .map(_.identifier.table) == Seq("src"))
-    }
-
-    test(s"$version: getTablesByName when contains invalid name") {
-      // scalastyle:off
-      val name = "砖"
-      // scalastyle:on
-      assert(client.getTablesByName("default", Seq("src", name))
-        .map(_.identifier.table) == Seq("src"))
-    }
-
-    test(s"$version: getTablesByName when empty") {
-      assert(client.getTablesByName("default", Seq.empty).isEmpty)
-    }
-
-    test(s"$version: alterTable(table: CatalogTable)") {
-      val newTable = client.getTable("default", "src").copy(properties = Map("changed" -> ""))
-      client.alterTable(newTable)
-      assert(client.getTable("default", "src").properties.contains("changed"))
-    }
-
-    test(s"$version: alterTable - should respect the original catalog table's owner name") {
-      val ownerName = "SPARK-29405"
-      val originalTable = client.getTable("default", "src")
-      // mocking the owner is what we declared
-      val newTable = originalTable.copy(owner = ownerName)
-      client.alterTable(newTable)
-      assert(client.getTable("default", "src").owner === ownerName)
-      // mocking the owner is empty
-      val newTable2 = originalTable.copy(owner = "")
-      client.alterTable(newTable2)
-      assert(client.getTable("default", "src").owner === client.userName)
-    }
-
-    test(s"$version: alterTable(dbName: String, tableName: String, table: CatalogTable)") {
-      val newTable = client.getTable("default", "src").copy(properties = Map("changedAgain" -> ""))
-      client.alterTable("default", "src", newTable)
-      assert(client.getTable("default", "src").properties.contains("changedAgain"))
-    }
-
-    test(s"$version: alterTable - rename") {
-      val newTable = client.getTable("default", "src")
-        .copy(identifier = TableIdentifier("tgt", database = Some("default")))
-      assert(!client.tableExists("default", "tgt"))
-
-      client.alterTable("default", "src", newTable)
-
-      assert(client.tableExists("default", "tgt"))
-      assert(!client.tableExists("default", "src"))
-    }
-
-    test(s"$version: alterTable - change database") {
-      val tempDB = CatalogDatabase(
-        "temporary", description = "test create", tempDatabasePath, Map())
-      client.createDatabase(tempDB, ignoreIfExists = true)
-
-      val newTable = client.getTable("default", "tgt")
-        .copy(identifier = TableIdentifier("tgt", database = Some("temporary")))
-      assert(!client.tableExists("temporary", "tgt"))
-
-      client.alterTable("default", "tgt", newTable)
-
-      assert(client.tableExists("temporary", "tgt"))
-      assert(!client.tableExists("default", "tgt"))
-    }
-
-    test(s"$version: alterTable - change database and table names") {
-      val newTable = client.getTable("temporary", "tgt")
-        .copy(identifier = TableIdentifier("src", database = Some("default")))
-      assert(!client.tableExists("default", "src"))
-
-      client.alterTable("temporary", "tgt", newTable)
-
-      assert(client.tableExists("default", "src"))
-      assert(!client.tableExists("temporary", "tgt"))
-    }
-
-    test(s"$version: listTables(database)") {
-      assert(client.listTables("default") === Seq("src", "temporary", "view1"))
-    }
-
-    test(s"$version: listTables(database, pattern)") {
-      assert(client.listTables("default", pattern = "src") === Seq("src"))
-      assert(client.listTables("default", pattern = "nonexist").isEmpty)
-    }
-
-    test(s"$version: listTablesByType(database, pattern, tableType)") {
-      assert(client.listTablesByType("default", pattern = "view1",
-        CatalogTableType.VIEW) === Seq("view1"))
-      assert(client.listTablesByType("default", pattern = "nonexist",
-        CatalogTableType.VIEW).isEmpty)
-    }
-
-    test(s"$version: dropTable") {
-      val versionsWithoutPurge =
-        if (versions.contains("0.14")) versions.takeWhile(_ != "0.14") else Nil
-      // First try with the purge option set. This should fail if the version is < 0.14, in which
-      // case we check the version and try without it.
-      try {
-        client.dropTable("default", tableName = "temporary", ignoreIfNotExists = false,
-          purge = true)
-        assert(!versionsWithoutPurge.contains(version))
-      } catch {
-        case _: UnsupportedOperationException =>
-          assert(versionsWithoutPurge.contains(version))
-          client.dropTable("default", tableName = "temporary", ignoreIfNotExists = false,
-            purge = false)
-      }
-      // Drop table with type CatalogTableType.VIEW.
-      try {
-        client.dropTable("default", tableName = "view1", ignoreIfNotExists = false,
-          purge = true)
-        assert(!versionsWithoutPurge.contains(version))
-      } catch {
-        case _: UnsupportedOperationException =>
-          client.dropTable("default", tableName = "view1", ignoreIfNotExists = false,
-            purge = false)
-      }
-      assert(client.listTables("default") === Seq("src"))
-    }
-
-    ///////////////////////////////////////////////////////////////////////////
-    // Partition related API
-    ///////////////////////////////////////////////////////////////////////////
-
-    val storageFormat = CatalogStorageFormat(
-      locationUri = None,
-      inputFormat = None,
-      outputFormat = None,
-      serde = None,
-      compressed = false,
-      properties = Map.empty)
-
-    test(s"$version: sql create partitioned table") {
-      val table = CatalogTable(
-        identifier = TableIdentifier("src_part", Some("default")),
-        tableType = CatalogTableType.MANAGED,
-        schema = new StructType().add("value", "int").add("key1", "int").add("key2", "int"),
-        partitionColumnNames = Seq("key1", "key2"),
-        storage = CatalogStorageFormat(
-          locationUri = None,
-          inputFormat = Some(classOf[TextInputFormat].getName),
-          outputFormat = Some(classOf[HiveIgnoreKeyTextOutputFormat[_, _]].getName),
-          serde = Some(classOf[LazySimpleSerDe].getName()),
-          compressed = false,
-          properties = Map.empty
-        ))
-      client.createTable(table, ignoreIfExists = false)
-    }
-
-    val testPartitionCount = 2
-
-    test(s"$version: createPartitions") {
-      val partitions = (1 to testPartitionCount).map { key2 =>
-        CatalogTablePartition(Map("key1" -> "1", "key2" -> key2.toString), storageFormat)
-      }
-      client.createPartitions(
-        "default", "src_part", partitions, ignoreIfExists = true)
-    }
-
-    test(s"$version: getPartitionNames(catalogTable)") {
-      val partitionNames = (1 to testPartitionCount).map(key2 => s"key1=1/key2=$key2")
-      assert(partitionNames == client.getPartitionNames(client.getTable("default", "src_part")))
-    }
-
-    test(s"$version: getPartitions(db, table, spec)") {
-      assert(testPartitionCount ==
-        client.getPartitions("default", "src_part", None).size)
-    }
-
-    test(s"$version: getPartitionsByFilter") {
-      // Only one partition [1, 1] for key2 == 1
-      val result = client.getPartitionsByFilter(client.getTable("default", "src_part"),
-        Seq(EqualTo(AttributeReference("key2", IntegerType)(), Literal(1))))
-
-      // Hive 0.12 doesn't support getPartitionsByFilter, it ignores the filter condition.
-      if (version != "0.12") {
-        assert(result.size == 1)
-      } else {
-        assert(result.size == testPartitionCount)
-      }
-    }
-
-    test(s"$version: getPartition") {
-      // No exception should be thrown
-      client.getPartition("default", "src_part", Map("key1" -> "1", "key2" -> "2"))
-    }
-
-    test(s"$version: getPartitionOption(db: String, table: String, spec: TablePartitionSpec)") {
-      val partition = client.getPartitionOption(
-        "default", "src_part", Map("key1" -> "1", "key2" -> "2"))
-      assert(partition.isDefined)
-    }
-
-    test(s"$version: getPartitionOption(table: CatalogTable, spec: TablePartitionSpec)") {
-      val partition = client.getPartitionOption(
-        client.getTable("default", "src_part"), Map("key1" -> "1", "key2" -> "2"))
-      assert(partition.isDefined)
-    }
-
-    test(s"$version: getPartitions(db: String, table: String)") {
-      assert(testPartitionCount == client.getPartitions("default", "src_part", None).size)
-    }
-
-    test(s"$version: loadPartition") {
-      val partSpec = new java.util.LinkedHashMap[String, String]
-      partSpec.put("key1", "1")
-      partSpec.put("key2", "2")
-
-      client.loadPartition(
-        emptyDir,
-        "default",
-        "src_part",
-        partSpec,
-        replace = false,
-        inheritTableSpecs = false,
-        isSrcLocal = false)
-    }
-
-    test(s"$version: loadDynamicPartitions") {
-      val partSpec = new java.util.LinkedHashMap[String, String]
-      partSpec.put("key1", "1")
-      partSpec.put("key2", "") // Dynamic partition
-
-      client.loadDynamicPartitions(
-        emptyDir,
-        "default",
-        "src_part",
-        partSpec,
-        replace = false,
-        numDP = 1)
-    }
-
-    test(s"$version: renamePartitions") {
-      val oldSpec = Map("key1" -> "1", "key2" -> "1")
-      val newSpec = Map("key1" -> "1", "key2" -> "3")
-      client.renamePartitions("default", "src_part", Seq(oldSpec), Seq(newSpec))
-
-      // Checks the existence of the new partition (key1 = 1, key2 = 3)
-      assert(client.getPartitionOption("default", "src_part", newSpec).isDefined)
-    }
-
-    test(s"$version: alterPartitions") {
-      val spec = Map("key1" -> "1", "key2" -> "2")
-      val parameters = Map(StatsSetupConst.TOTAL_SIZE -> "0", StatsSetupConst.NUM_FILES -> "1")
-      val newLocation = new URI(Utils.createTempDir().toURI.toString.stripSuffix("/"))
-      val storage = storageFormat.copy(
-        locationUri = Some(newLocation),
-        // needed for 0.12 alter partitions
-        serde = Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
-      val partition = CatalogTablePartition(spec, storage, parameters)
-      client.alterPartitions("default", "src_part", Seq(partition))
-      assert(client.getPartition("default", "src_part", spec)
-        .storage.locationUri == Some(newLocation))
-      assert(client.getPartition("default", "src_part", spec)
-        .parameters.get(StatsSetupConst.TOTAL_SIZE) == Some("0"))
-    }
-
-    test(s"$version: dropPartitions") {
-      val spec = Map("key1" -> "1", "key2" -> "3")
-      val versionsWithoutPurge =
-        if (versions.contains("1.2")) versions.takeWhile(_ != "1.2") else Nil
-      // Similar to dropTable; try with purge set, and if it fails, make sure we're running
-      // with a version that is older than the minimum (1.2 in this case).
-      try {
-        client.dropPartitions("default", "src_part", Seq(spec), ignoreIfNotExists = true,
-          purge = true, retainData = false)
-        assert(!versionsWithoutPurge.contains(version))
-      } catch {
-        case _: UnsupportedOperationException =>
-          assert(versionsWithoutPurge.contains(version))
-          client.dropPartitions("default", "src_part", Seq(spec), ignoreIfNotExists = true,
-            purge = false, retainData = false)
-      }
-
-      assert(client.getPartitionOption("default", "src_part", spec).isEmpty)
-    }
-
-    test(s"$version: createPartitions if already exists") {
-      val partitions = Seq(CatalogTablePartition(
-        Map("key1" -> "101", "key2" -> "102"),
-        storageFormat))
-      try {
-        client.createPartitions("default", "src_part", partitions, ignoreIfExists = false)
-        val errMsg = intercept[PartitionsAlreadyExistException] {
-          client.createPartitions("default", "src_part", partitions, ignoreIfExists = false)
-        }.getMessage
-        assert(errMsg.contains("partitions already exists"))
-      } finally {
-        client.dropPartitions(
-          "default",
-          "src_part",
-          partitions.map(_.spec),
-          ignoreIfNotExists = true,
-          purge = false,
-          retainData = false)
-      }
-    }
-
-    ///////////////////////////////////////////////////////////////////////////
-    // Function related API
-    ///////////////////////////////////////////////////////////////////////////
-
-    def function(name: String, className: String): CatalogFunction = {
-      CatalogFunction(
-        FunctionIdentifier(name, Some("default")), className, Seq.empty[FunctionResource])
-    }
-
-    test(s"$version: createFunction") {
-      val functionClass = "org.apache.spark.MyFunc1"
-      if (version == "0.12") {
-        // Hive 0.12 doesn't support creating permanent functions
-        intercept[AnalysisException] {
-          client.createFunction("default", function("func1", functionClass))
-        }
-      } else {
-        client.createFunction("default", function("func1", functionClass))
-      }
-    }
-
-    test(s"$version: functionExists") {
-      if (version == "0.12") {
-        // Hive 0.12 doesn't allow customized permanent functions
-        assert(client.functionExists("default", "func1") == false)
-      } else {
-        assert(client.functionExists("default", "func1"))
-      }
-    }
-
-    test(s"$version: renameFunction") {
-      if (version == "0.12") {
-        // Hive 0.12 doesn't allow customized permanent functions
-        intercept[NoSuchPermanentFunctionException] {
-          client.renameFunction("default", "func1", "func2")
-        }
-      } else {
-        client.renameFunction("default", "func1", "func2")
-        assert(client.functionExists("default", "func2"))
-      }
-    }
-
-    test(s"$version: alterFunction") {
-      val functionClass = "org.apache.spark.MyFunc2"
-      if (version == "0.12") {
-        // Hive 0.12 doesn't allow customized permanent functions
-        intercept[NoSuchPermanentFunctionException] {
-          client.alterFunction("default", function("func2", functionClass))
-        }
-      } else {
-        client.alterFunction("default", function("func2", functionClass))
-      }
-    }
-
-    test(s"$version: getFunction") {
-      if (version == "0.12") {
-        // Hive 0.12 doesn't allow customized permanent functions
-        intercept[NoSuchPermanentFunctionException] {
-          client.getFunction("default", "func2")
-        }
-      } else {
-        // No exception should be thrown
-        val func = client.getFunction("default", "func2")
-        assert(func.className == "org.apache.spark.MyFunc2")
-      }
-    }
-
-    test(s"$version: getFunctionOption") {
-      if (version == "0.12") {
-        // Hive 0.12 doesn't allow customized permanent functions
-        assert(client.getFunctionOption("default", "func2").isEmpty)
-      } else {
-        assert(client.getFunctionOption("default", "func2").isDefined)
-        assert(client.getFunctionOption("default", "the_func_not_exists").isEmpty)
-      }
-    }
-
-    test(s"$version: listFunctions") {
-      if (version == "0.12") {
-        // Hive 0.12 doesn't allow customized permanent functions
-        assert(client.listFunctions("default", "fun.*").isEmpty)
-      } else {
-        assert(client.listFunctions("default", "fun.*").size == 1)
-      }
-    }
-
-    test(s"$version: dropFunction") {
-      if (version == "0.12") {
-        // Hive 0.12 doesn't support creating permanent functions
-        intercept[NoSuchPermanentFunctionException] {
-          client.dropFunction("default", "func2")
-        }
-      } else {
-        // No exception should be thrown
-        client.dropFunction("default", "func2")
-        assert(client.listFunctions("default", "fun.*").size == 0)
-      }
-    }
-
-    ///////////////////////////////////////////////////////////////////////////
-    // SQL related API
-    ///////////////////////////////////////////////////////////////////////////
-
-    test(s"$version: sql set command") {
-      client.runSqlHive("SET spark.sql.test.key=1")
-    }
-
-    test(s"$version: sql create index and reset") {
-      // HIVE-18448 Since Hive 3.0, INDEX is not supported.
-      if (version != "3.0" && version != "3.1") {
-        client.runSqlHive("CREATE TABLE indexed_table (key INT)")
-        client.runSqlHive("CREATE INDEX index_1 ON TABLE indexed_table(key) " +
-          "as 'COMPACT' WITH DEFERRED REBUILD")
-      }
-    }
-
-    test(s"$version: sql read hive materialized view") {
-      // HIVE-14249 Since Hive 2.3.0, materialized view is supported.
-      if (version == "2.3" || version == "3.0" || version == "3.1") {
-        // Since Hive 3.0(HIVE-19383), we can not run local MR by `client.runSqlHive` with JDK 11.
-        assume(version == "2.3" || !SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9))
-        // Since HIVE-18394(Hive 3.1), "Create Materialized View" should default to rewritable ones
-        val disableRewrite = if (version == "2.3" || version == "3.0") "" else "DISABLE REWRITE"
-        client.runSqlHive("CREATE TABLE materialized_view_tbl (c1 INT)")
-        client.runSqlHive(
-          s"CREATE MATERIALIZED VIEW mv1 $disableRewrite AS SELECT * FROM materialized_view_tbl")
-        val e = intercept[AnalysisException](versionSpark.table("mv1").collect()).getMessage
-        assert(e.contains("Hive materialized view is not supported"))
-      }
-    }
-
-    ///////////////////////////////////////////////////////////////////////////
-    // Miscellaneous API
-    ///////////////////////////////////////////////////////////////////////////
-
-    test(s"$version: version") {
-      assert(client.version.fullVersion.startsWith(version))
-    }
-
-    test(s"$version: getConf") {
-      assert("success" === client.getConf("test", null))
-    }
-
-    test(s"$version: setOut") {
-      client.setOut(new PrintStream(new ByteArrayOutputStream()))
-    }
-
-    test(s"$version: setInfo") {
-      client.setInfo(new PrintStream(new ByteArrayOutputStream()))
-    }
-
-    test(s"$version: setError") {
-      client.setError(new PrintStream(new ByteArrayOutputStream()))
-    }
-
-    test(s"$version: newSession") {
-      val newClient = client.newSession()
-      assert(newClient != null)
-    }
-
-    test(s"$version: withHiveState and addJar") {
-      val newClassPath = "."
-      client.addJar(newClassPath)
-      client.withHiveState {
-        // No exception should be thrown.
-        // withHiveState changes the classloader to MutableURLClassLoader
-        val classLoader = Thread.currentThread().getContextClassLoader
-          .asInstanceOf[MutableURLClassLoader]
-
-        val urls = classLoader.getURLs()
-        urls.contains(new File(newClassPath).toURI.toURL)
-      }
-    }
-
-    test(s"$version: reset") {
-      // Clears all database, tables, functions...
-      client.reset()
-      assert(client.listTables("default").isEmpty)
-    }
-
-    ///////////////////////////////////////////////////////////////////////////
-    // End-To-End tests
-    ///////////////////////////////////////////////////////////////////////////
-
-    test(s"$version: CREATE TABLE AS SELECT") {
-      withTable("tbl") {
-        versionSpark.sql("CREATE TABLE tbl AS SELECT 1 AS a")
-        assert(versionSpark.table("tbl").collect().toSeq == Seq(Row(1)))
-        val tableMeta = versionSpark.sessionState.catalog.getTableMetadata(TableIdentifier("tbl"))
-        val totalSize = tableMeta.stats.map(_.sizeInBytes)
-        // Except 0.12, all the following versions will fill the Hive-generated statistics
-        if (version == "0.12") {
-          assert(totalSize.isEmpty)
-        } else {
-          assert(totalSize.nonEmpty && totalSize.get > 0)
-        }
-      }
-    }
-
-    test(s"$version: CREATE Partitioned TABLE AS SELECT") {
-      withTable("tbl") {
-        versionSpark.sql(
-          """
-            |CREATE TABLE tbl(c1 string)
-            |USING hive
-            |PARTITIONED BY (ds STRING)
-          """.stripMargin)
-        versionSpark.sql("INSERT OVERWRITE TABLE tbl partition (ds='2') SELECT '1'")
-
-        assert(versionSpark.table("tbl").collect().toSeq == Seq(Row("1", "2")))
-        val partMeta = versionSpark.sessionState.catalog.getPartition(
-          TableIdentifier("tbl"), spec = Map("ds" -> "2")).parameters
-        val totalSize = partMeta.get(StatsSetupConst.TOTAL_SIZE).map(_.toLong)
-        val numFiles = partMeta.get(StatsSetupConst.NUM_FILES).map(_.toLong)
-        // Except 0.12, all the following versions will fill the Hive-generated statistics
-        if (version == "0.12") {
-          assert(totalSize.isEmpty && numFiles.isEmpty)
-        } else {
-          assert(totalSize.nonEmpty && numFiles.nonEmpty)
-        }
-
-        versionSpark.sql(
-          """
-            |ALTER TABLE tbl PARTITION (ds='2')
-            |SET SERDEPROPERTIES ('newKey' = 'vvv')
-          """.stripMargin)
-        val newPartMeta = versionSpark.sessionState.catalog.getPartition(
-          TableIdentifier("tbl"), spec = Map("ds" -> "2")).parameters
-
-        val newTotalSize = newPartMeta.get(StatsSetupConst.TOTAL_SIZE).map(_.toLong)
-        val newNumFiles = newPartMeta.get(StatsSetupConst.NUM_FILES).map(_.toLong)
-        // Except 0.12, all the following versions will fill the Hive-generated statistics
-        if (version == "0.12") {
-          assert(newTotalSize.isEmpty && newNumFiles.isEmpty)
-        } else {
-          assert(newTotalSize.nonEmpty && newNumFiles.nonEmpty)
-        }
-      }
-    }
-
-    test(s"$version: Delete the temporary staging directory and files after each insert") {
-      withTempDir { tmpDir =>
-        withTable("tab") {
-          versionSpark.sql(
-            s"""
-               |CREATE TABLE tab(c1 string)
-               |location '${tmpDir.toURI.toString}'
-             """.stripMargin)
-
-          (1 to 3).map { i =>
-            versionSpark.sql(s"INSERT OVERWRITE TABLE tab SELECT '$i'")
-          }
-          def listFiles(path: File): List[String] = {
-            val dir = path.listFiles()
-            val folders = dir.filter(_.isDirectory).toList
-            val filePaths = dir.map(_.getName).toList
-            folders.flatMap(listFiles) ++: filePaths
-          }
-          // expect 2 files left: `.part-00000-random-uuid.crc` and `part-00000-random-uuid`
-          // 0.12, 0.13, 1.0 and 1.1 also has another two more files ._SUCCESS.crc and _SUCCESS
-          val metadataFiles = Seq("._SUCCESS.crc", "_SUCCESS")
-          assert(listFiles(tmpDir).filterNot(metadataFiles.contains).length == 2)
-        }
-      }
-    }
-
-    test(s"$version: SPARK-13709: reading partitioned Avro table with nested schema") {
-      withTempDir { dir =>
-        val path = dir.toURI.toString
-        val tableName = "spark_13709"
-        val tempTableName = "spark_13709_temp"
-
-        new File(dir.getAbsolutePath, tableName).mkdir()
-        new File(dir.getAbsolutePath, tempTableName).mkdir()
-
-        val avroSchema =
-          """{
-            |  "name": "test_record",
-            |  "type": "record",
-            |  "fields": [ {
-            |    "name": "f0",
-            |    "type": "int"
-            |  }, {
-            |    "name": "f1",
-            |    "type": {
-            |      "type": "record",
-            |      "name": "inner",
-            |      "fields": [ {
-            |        "name": "f10",
-            |        "type": "int"
-            |      }, {
-            |        "name": "f11",
-            |        "type": "double"
-            |      } ]
-            |    }
-            |  } ]
-            |}
-          """.stripMargin
-
-        withTable(tableName, tempTableName) {
-          // Creates the external partitioned Avro table to be tested.
-          versionSpark.sql(
-            s"""CREATE EXTERNAL TABLE $tableName
-               |PARTITIONED BY (ds STRING)
-               |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
-               |STORED AS
-               |  INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
-               |  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
-               |LOCATION '$path/$tableName'
-               |TBLPROPERTIES ('avro.schema.literal' = '$avroSchema')
-           """.stripMargin
-          )
-
-          // Creates an temporary Avro table used to prepare testing Avro file.
-          versionSpark.sql(
-            s"""CREATE EXTERNAL TABLE $tempTableName
-               |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
-               |STORED AS
-               |  INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
-               |  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
-               |LOCATION '$path/$tempTableName'
-               |TBLPROPERTIES ('avro.schema.literal' = '$avroSchema')
-           """.stripMargin
-          )
-
-          // Generates Avro data.
-          versionSpark.sql(s"INSERT OVERWRITE TABLE $tempTableName SELECT 1, STRUCT(2, 2.5)")
-
-          // Adds generated Avro data as a new partition to the testing table.
-          versionSpark.sql(
-            s"ALTER TABLE $tableName ADD PARTITION (ds = 'foo') LOCATION '$path/$tempTableName'")
-
-          // The following query fails before SPARK-13709 is fixed. This is because when reading
-          // data from table partitions, Avro deserializer needs the Avro schema, which is defined
-          // in table property "avro.schema.literal". However, we only initializes the deserializer
-          // using partition properties, which doesn't include the wanted property entry. Merging
-          // two sets of properties solves the problem.
-          assert(versionSpark.sql(s"SELECT * FROM $tableName").collect() ===
-            Array(Row(1, Row(2, 2.5D), "foo")))
-        }
-      }
-    }
-
-    test(s"$version: CTAS for managed data source tables") {
-      withTable("t", "t1") {
-        versionSpark.range(1).write.saveAsTable("t")
-        assert(versionSpark.table("t").collect() === Array(Row(0)))
-        versionSpark.sql("create table t1 using parquet as select 2 as a")
-        assert(versionSpark.table("t1").collect() === Array(Row(2)))
-      }
-    }
-
-    test(s"$version: Decimal support of Avro Hive serde") {
-      val tableName = "tab1"
-      // TODO: add the other logical types. For details, see the link:
-      // https://avro.apache.org/docs/1.8.1/spec.html#Logical+Types
-      val avroSchema =
-        """{
-          |  "name": "test_record",
-          |  "type": "record",
-          |  "fields": [ {
-          |    "name": "f0",
-          |    "type": [
-          |      "null",
-          |      {
-          |        "precision": 38,
-          |        "scale": 2,
-          |        "type": "bytes",
-          |        "logicalType": "decimal"
-          |      }
-          |    ]
-          |  } ]
-          |}
-        """.stripMargin
-
-      Seq(true, false).foreach { isPartitioned =>
-        withTable(tableName) {
-          val partitionClause = if (isPartitioned) "PARTITIONED BY (ds STRING)" else ""
-          // Creates the (non-)partitioned Avro table
-          versionSpark.sql(
-            s"""
-               |CREATE TABLE $tableName
-               |$partitionClause
-               |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
-               |STORED AS
-               |  INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
-               |  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
-               |TBLPROPERTIES ('avro.schema.literal' = '$avroSchema')
-           """.stripMargin
-          )
-
-          val errorMsg = "Cannot safely cast 'f0': decimal(2,1) to binary"
-
-          if (isPartitioned) {
-            val insertStmt = s"INSERT OVERWRITE TABLE $tableName partition (ds='a') SELECT 1.3"
-            if (version == "0.12" || version == "0.13") {
-              val e = intercept[AnalysisException](versionSpark.sql(insertStmt)).getMessage
-              assert(e.contains(errorMsg))
-            } else {
-              versionSpark.sql(insertStmt)
-              assert(versionSpark.table(tableName).collect() ===
-                versionSpark.sql("SELECT 1.30, 'a'").collect())
-            }
-          } else {
-            val insertStmt = s"INSERT OVERWRITE TABLE $tableName SELECT 1.3"
-            if (version == "0.12" || version == "0.13") {
-              val e = intercept[AnalysisException](versionSpark.sql(insertStmt)).getMessage
-              assert(e.contains(errorMsg))
-            } else {
-              versionSpark.sql(insertStmt)
-              assert(versionSpark.table(tableName).collect() ===
-                versionSpark.sql("SELECT 1.30").collect())
-            }
-          }
-        }
-      }
-    }
-
-    test(s"$version: read avro file containing decimal") {
-      val url = Thread.currentThread().getContextClassLoader.getResource("avroDecimal")
-      val location = new File(url.getFile).toURI.toString
-
-      val tableName = "tab1"
-      val avroSchema =
-        """{
-          |  "name": "test_record",
-          |  "type": "record",
-          |  "fields": [ {
-          |    "name": "f0",
-          |    "type": [
-          |      "null",
-          |      {
-          |        "precision": 38,
-          |        "scale": 2,
-          |        "type": "bytes",
-          |        "logicalType": "decimal"
-          |      }
-          |    ]
-          |  } ]
-          |}
-        """.stripMargin
-      withTable(tableName) {
-        versionSpark.sql(
-          s"""
-             |CREATE TABLE $tableName
-             |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
-             |WITH SERDEPROPERTIES ('respectSparkSchema' = 'true')
-             |STORED AS
-             |  INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
-             |  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
-             |LOCATION '$location'
-             |TBLPROPERTIES ('avro.schema.literal' = '$avroSchema')
-           """.stripMargin
-        )
-        assert(versionSpark.table(tableName).collect() ===
-          versionSpark.sql("SELECT 1.30").collect())
-      }
-    }
-
-    test(s"$version: SPARK-17920: Insert into/overwrite avro table") {
-      // skipped because it's failed in the condition on Windows
-      assume(!(Utils.isWindows && version == "0.12"))
-      withTempDir { dir =>
-        val avroSchema =
-          """
-            |{
-            |  "name": "test_record",
-            |  "type": "record",
-            |  "fields": [{
-            |    "name": "f0",
-            |    "type": [
-            |      "null",
-            |      {
-            |        "precision": 38,
-            |        "scale": 2,
-            |        "type": "bytes",
-            |        "logicalType": "decimal"
-            |      }
-            |    ]
-            |  }]
-            |}
-          """.stripMargin
-        val schemaFile = new File(dir, "avroDecimal.avsc")
-        Utils.tryWithResource(new PrintWriter(schemaFile)) { writer =>
-          writer.write(avroSchema)
-        }
-        val schemaPath = schemaFile.toURI.toString
-
-        val url = Thread.currentThread().getContextClassLoader.getResource("avroDecimal")
-        val srcLocation = new File(url.getFile).toURI.toString
-        val destTableName = "tab1"
-        val srcTableName = "tab2"
-
-        withTable(srcTableName, destTableName) {
-          versionSpark.sql(
-            s"""
-               |CREATE EXTERNAL TABLE $srcTableName
-               |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
-               |WITH SERDEPROPERTIES ('respectSparkSchema' = 'true')
-               |STORED AS
-               |  INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
-               |  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
-               |LOCATION '$srcLocation'
-               |TBLPROPERTIES ('avro.schema.url' = '$schemaPath')
-           """.stripMargin
-          )
-
-          versionSpark.sql(
-            s"""
-               |CREATE TABLE $destTableName
-               |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
-               |WITH SERDEPROPERTIES ('respectSparkSchema' = 'true')
-               |STORED AS
-               |  INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
-               |  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
-               |TBLPROPERTIES ('avro.schema.url' = '$schemaPath')
-           """.stripMargin
-          )
-          versionSpark.sql(
-            s"""INSERT OVERWRITE TABLE $destTableName SELECT * FROM $srcTableName""")
-          val result = versionSpark.table(srcTableName).collect()
-          assert(versionSpark.table(destTableName).collect() === result)
-          versionSpark.sql(
-            s"""INSERT INTO TABLE $destTableName SELECT * FROM $srcTableName""")
-          assert(versionSpark.table(destTableName).collect().toSeq === result ++ result)
-        }
-      }
-    }
-    // TODO: add more tests.
-  }
-}

From f741c0a82a3bb9d01e1cab26c79622880d7dd1ba Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Fri, 11 Feb 2022 18:49:13 +0900
Subject: [PATCH 219/513] [SPARK-38186][SQL] Improve the README of Spark docs

### What changes were proposed in this pull request?

Improve https://github.com/apache/spark/blob/master/docs/README.md, mark some of the setup steps as optional.
Also, recommend developers to use `SKIP_API=1` if no API docs are needed.

### Why are the changes needed?

 Developers usually need to generate HTML Docs without API Docs. Marking the setup for SQL/Python/R doc as optional can help them avoid unnecessary efforts.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manual test

Closes #35491 from gengliangwang/updateDoc.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/README.md | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/docs/README.md b/docs/README.md
index 0cf5c0a6281b1..6bb83d8953057 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -48,23 +48,9 @@ $ bundle install
 
 Note: If you are on a system with both Ruby 1.9 and Ruby 2.0 you may need to replace gem with gem2.0.
 
-### R Documentation
+### SQL and Python API Documentation (Optional)
 
-If you'd like to generate R documentation, you'll need to [install Pandoc](https://pandoc.org/installing.html)
-and install these libraries:
-
-```sh
-$ sudo Rscript -e 'install.packages(c("knitr", "devtools", "testthat", "rmarkdown"), repos="https://cloud.r-project.org/")'
-$ sudo Rscript -e 'devtools::install_version("roxygen2", version = "7.1.2", repos="https://cloud.r-project.org/")'
-$ sudo Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')"
-$ sudo Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
-```
-
-Note: Other versions of roxygen2 might work in SparkR documentation generation but `RoxygenNote` field in `$SPARK_HOME/R/pkg/DESCRIPTION` is 7.1.2, which is updated if the version is mismatched.
-
-### API Documentation
-
-To generate API docs for any language, you'll need to install these libraries:
+To generate SQL and Python API docs, you'll need to install these libraries:
 
 <!--
 TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
@@ -78,6 +64,20 @@ See also https://issues.apache.org/jira/browse/SPARK-35375.
 $ sudo pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc sphinx-plotly-directive 'jinja2<3.0.0'
 ```
 
+### R API Documentation (Optional)
+
+If you'd like to generate R API documentation, you'll need to [install Pandoc](https://pandoc.org/installing.html)
+and install these libraries:
+
+```sh
+$ sudo Rscript -e 'install.packages(c("knitr", "devtools", "testthat", "rmarkdown"), repos="https://cloud.r-project.org/")'
+$ sudo Rscript -e 'devtools::install_version("roxygen2", version = "7.1.2", repos="https://cloud.r-project.org/")'
+$ sudo Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')"
+$ sudo Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
+```
+
+Note: Other versions of roxygen2 might work in SparkR documentation generation but `RoxygenNote` field in `$SPARK_HOME/R/pkg/DESCRIPTION` is 7.1.2, which is updated if the version is mismatched.
+
 ## Generating the Documentation HTML
 
 We include the Spark documentation as part of the source (as opposed to using a hosted wiki, such as
@@ -89,20 +89,20 @@ you have checked out or downloaded.
 In this directory you will find text files formatted using Markdown, with an ".md" suffix. You can
 read those text files directly if you want. Start with `index.md`.
 
-Execute `bundle exec jekyll build` from the `docs/` directory to compile the site. Compiling the site with
+Execute `SKIP_API=1 bundle exec jekyll build` from the `docs/` directory to compile the site. Compiling the site with
 Jekyll will create a directory called `_site` containing `index.html` as well as the rest of the
 compiled files.
 
 ```sh
 $ cd docs
-$ bundle exec jekyll build
+# Skip generating API docs (which takes a while)
+$ SKIP_API=1 bundle exec jekyll build
 ```
 
-You can modify the default Jekyll build as follows:
+You can also generate the default Jekyll build with API Docs as follows:
 
 ```sh
-# Skip generating API docs (which takes a while)
-$ SKIP_API=1 bundle exec jekyll build
+$ bundle exec jekyll build
 
 # Serve content locally on port 4000
 $ bundle exec jekyll serve --watch

From d4a2e5c55d127218f6ae42925443f7d0588d5875 Mon Sep 17 00:00:00 2001
From: Xinrong Meng <xinrong.meng@databricks.com>
Date: Fri, 11 Feb 2022 18:49:41 +0900
Subject: [PATCH 220/513] [SPARK-38184][SQL][DOCS] Fix malformatted
 ExpressionDescription of decode

### What changes were proposed in this pull request?
Fix malformatted ExpressionDescription of decode.

### Why are the changes needed?
Currently, https://spark.apache.org/docs/latest/api/sql/#decode is malformed because its ExpressionDescription uses the pipe symbol.

### Does this PR introduce _any_ user-facing change?
Doc change only.

### How was this patch tested?
Manual test.

Updated HTML looks as below:
![image](https://user-images.githubusercontent.com/47337188/153554869-b2b16250-6a9a-4f84-9774-96c0894edcd7.png)

Closes #35489 from xinrong-databricks/fixExpressionDescription.

Authored-by: Xinrong Meng <xinrong.meng@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/catalyst/expressions/stringExpressions.scala | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index f1762f4eac767..2086c6dfe4bdc 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -2418,12 +2418,12 @@ object Decode {
 // scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = """
-            |_FUNC_(bin, charset) - Decodes the first argument using the second argument character set.
-            |
-            |_FUNC_(expr, search, result [, search, result ] ... [, default]) - Decode compares expr
-            |  to each search value one by one. If expr is equal to a search, returns the corresponding result.
-            |  If no match is found, then Oracle returns default. If default is omitted, returns null.
-          """,
+    _FUNC_(bin, charset) - Decodes the first argument using the second argument character set.
+
+    _FUNC_(expr, search, result [, search, result ] ... [, default]) - Decode compares expr
+      to each search value one by one. If expr is equal to a search, returns the corresponding result.
+      If no match is found, then Oracle returns default. If default is omitted, returns null.
+  """,
   examples = """
     Examples:
       > SELECT _FUNC_(encode('abc', 'utf-8'), 'utf-8');

From 25a4c5fa84d64e37cf5c27c7b2f0f29867330bf2 Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Fri, 11 Feb 2022 21:11:52 +0800
Subject: [PATCH 221/513] [SPARK-38185][SQL] Fix data incorrect if aggregate
 function is empty

### What changes were proposed in this pull request?

Add `aggregateExpressions.nonEmpty` check in `groupOnly` function.

### Why are the changes needed?

The group only condition should check if the aggregate expression is empty.

In DataFrame api, it is allowed to make a empty aggregations.

So the following query should return 1 rather than 0 because it's a global aggregate.
```scala
val emptyAgg = Map.empty[String, String]
spark.range(2).where("id > 2").agg(emptyAgg).limit(1).count
```

### Does this PR introduce _any_ user-facing change?

yes, bug fix

### How was this patch tested?

Add test

Closes #35490 from ulysses-you/SPARK-38185.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/plans/logical/basicLogicalOperators.scala   | 4 +++-
 .../scala/org/apache/spark/sql/DataFrameAggregateSuite.scala | 5 +++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 68b0f24f50145..21e150ad8413c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -984,7 +984,9 @@ case class Aggregate(
 
   // Whether this Aggregate operator is group only. For example: SELECT a, a FROM t GROUP BY a
   private[sql] def groupOnly: Boolean = {
-    aggregateExpressions.map {
+    // aggregateExpressions can be empty through Dateset.agg,
+    // so we should also check groupingExpressions is non empty
+    groupingExpressions.nonEmpty && aggregateExpressions.map {
       case Alias(child, _) => child
       case e => e
     }.forall(a => groupingExpressions.exists(g => a.semanticEquals(g)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index c3076c5880ae9..43c0162692e49 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -1443,6 +1443,11 @@ class DataFrameAggregateSuite extends QueryTest
     val res = df.select($"d".cast("decimal(12, 2)").as("d")).agg(avg($"d").cast("string"))
     checkAnswer(res, Row("9999999999.990000"))
   }
+
+  test("SPARK-38185: Fix data incorrect if aggregate function is empty") {
+    val emptyAgg = Map.empty[String, String]
+    assert(spark.range(2).where("id > 2").agg(emptyAgg).limit(1).count == 1)
+  }
 }
 
 case class B(c: Option[Double])

From 25dd4254fed71923731fd59838875c0dd1ff665a Mon Sep 17 00:00:00 2001
From: Xinrong Meng <xinrong.meng@databricks.com>
Date: Sun, 13 Feb 2022 09:32:32 +0900
Subject: [PATCH 222/513] [SPARK-37507][SQL] Add a new SQL function to_binary
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
Introduce a SQL function `to_binary`: Converts the input string to a binary value based on the supplied format (of how to interpret the string).

Syntax:
```
to_binary(str_column[, fmt])
```
where

- `fmt` can be a case-insensitive string literal of "hex", "utf-8", "base2", or "base64".
- By default, the binary format for conversion is "hex" if `fmt` is omitted.

### Why are the changes needed?
`to_binary` is a common function available in many DBMSes, for example:
- [TO_VARBYTE function - Amazon Redshift](https://docs.aws.amazon.com/redshift/latest/dg/r_TO_VARBYTE.html)
- [TO_BINARY — Snowflake Documentation](https://docs.snowflake.com/en/sql-reference/functions/to_binary.html)
- [Expressions, functions, and operators  |  BigQuery  |  Google Cloud](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#format_string_as_bytes)
- [Teradata Online Documentation | Quick access to technical manuals](https://docs.teradata.com/r/kmuOwjp1zEYg98JsB8fu_A/etRo5aTAY9n5fUPjxSEynw)

Introducing it improves compatibility and the ease of migration.

In addition, `to_binary` can unify existing Spark functions: `encode`, `unhex`, `unbase64`, and `binary`, which makes API easier to remember and use.

### Does this PR introduce _any_ user-facing change?
Yes, a new function for the string to binary conversion with a specified format.

### How was this patch tested?
Unit test.

Closes #35415 from xinrong-databricks/to_binary.

Authored-by: Xinrong Meng <xinrong.meng@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../catalyst/analysis/FunctionRegistry.scala  |   1 +
 .../expressions/stringExpressions.scala       |  71 +++++++++++
 .../sql-functions/sql-expression-schema.md    |   3 +-
 .../sql-tests/inputs/string-functions.sql     |  16 +++
 .../results/ansi/string-functions.sql.out     | 115 +++++++++++++++++-
 .../results/string-functions.sql.out          | 115 +++++++++++++++++-
 6 files changed, 318 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index ce7cee5764ce4..7a3809378cfd1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -562,6 +562,7 @@ object FunctionRegistry {
     expression[Second]("second"),
     expression[ParseToTimestamp]("to_timestamp"),
     expression[ParseToDate]("to_date"),
+    expression[ToBinary]("to_binary"),
     expression[ToUnixTimestamp]("to_unix_timestamp"),
     expression[ToUTCTimestamp]("to_utc_timestamp"),
     expression[ParseToTimestampNTZ]("to_timestamp_ntz"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 2086c6dfe4bdc..f450dd80a8b13 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -2538,6 +2538,77 @@ case class Encode(value: Expression, charset: Expression)
     newLeft: Expression, newRight: Expression): Encode = copy(value = newLeft, charset = newRight)
 }
 
+/**
+ * Converts the input expression to a binary value based on the supplied format.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(str[, fmt]) - Converts the input `str` to a binary value based on the supplied `fmt`.
+      `fmt` can be a case-insensitive string literal of "hex", "utf-8", "base2", or "base64".
+      By default, the binary format for conversion is "hex" if `fmt` is omitted.
+      The function returns NULL if at least one of the input parameters is NULL.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('abc', 'utf-8');
+       abc
+  """,
+  since = "3.3.0",
+  group = "string_funcs")
+// scalastyle:on line.size.limit
+case class ToBinary(expr: Expression, format: Option[Expression], child: Expression)
+  extends RuntimeReplaceable {
+
+  def this(expr: Expression, format: Expression) = this(expr, Option(format),
+    format match {
+      case lit if lit.foldable =>
+        val value = lit.eval()
+        if (value == null) Literal(null, BinaryType)
+        else {
+          value.asInstanceOf[UTF8String].toString.toLowerCase(Locale.ROOT) match {
+            case "hex" => Unhex(expr)
+            case "utf-8" => Encode(expr, Literal("UTF-8"))
+            case "base64" => UnBase64(expr)
+            case "base2" => Cast(expr, BinaryType)
+            case _ => lit
+          }
+        }
+
+      case other => other
+    }
+  )
+
+  def this(expr: Expression) = this(expr, None, Unhex(expr))
+
+  override def flatArguments: Iterator[Any] = Iterator(expr, format)
+  override def exprsReplaced: Seq[Expression] = expr +: format.toSeq
+
+  override def prettyName: String = "to_binary"
+  override def dataType: DataType = BinaryType
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    def checkFormat(lit: Expression) = {
+      if (lit.foldable) {
+        val value = lit.eval()
+        value == null || Seq("hex", "utf-8", "base64", "base2").contains(
+          value.asInstanceOf[UTF8String].toString.toLowerCase(Locale.ROOT))
+      } else false
+    }
+
+    if (format.forall(checkFormat)) {
+      super.checkInputDataTypes()
+    } else {
+      TypeCheckResult.TypeCheckFailure(
+        s"Unsupported encoding format: $format. The format has to be " +
+          s"a case-insensitive string literal of 'hex', 'utf-8', 'base2', or 'base64'")
+    }
+  }
+
+  override protected def withNewChildInternal(newChild: Expression): ToBinary =
+    copy(child = newChild)
+}
+
 /**
  * Formats the number X to a format like '#,###,###.##', rounded to D decimal places,
  * and returns the result as a string. If D is 0, the result has no decimal point or
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 126960e4fdc94..33ba2b73e6b07 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,6 +1,6 @@
 <!-- Automatically generated by ExpressionsSchemaSuite -->
 ## Summary
-  - Number of queries: 378
+  - Number of queries: 379
   - Number of expressions that missing example: 12
   - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint
 ## Schema of Built-in Functions
@@ -299,6 +299,7 @@
 | org.apache.spark.sql.catalyst.expressions.Tan | tan | SELECT tan(0) | struct<TAN(0):double> |
 | org.apache.spark.sql.catalyst.expressions.Tanh | tanh | SELECT tanh(0) | struct<TANH(0):double> |
 | org.apache.spark.sql.catalyst.expressions.TimeWindow | window | SELECT a, window.start, window.end, count(*) as cnt FROM VALUES ('A1', '2021-01-01 00:00:00'), ('A1', '2021-01-01 00:04:30'), ('A1', '2021-01-01 00:06:00'), ('A2', '2021-01-01 00:01:00') AS tab(a, b) GROUP by a, window(b, '5 minutes') ORDER BY a, start | struct<a:string,start:timestamp,end:timestamp,cnt:bigint> |
+| org.apache.spark.sql.catalyst.expressions.ToBinary | to_binary | SELECT to_binary('abc', 'utf-8') | struct<to_binary(abc, utf-8):binary> |
 | org.apache.spark.sql.catalyst.expressions.ToDegrees | degrees | SELECT degrees(3.141592653589793) | struct<DEGREES(3.141592653589793):double> |
 | org.apache.spark.sql.catalyst.expressions.ToNumber | to_number | SELECT to_number('454', '999') | struct<to_number(454, 999):decimal(3,0)> |
 | org.apache.spark.sql.catalyst.expressions.ToRadians | radians | SELECT radians(180) | struct<RADIANS(180):double> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index 94924a91991b9..9571f3eb6c2bb 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -135,3 +135,19 @@ select to_number('-454', '-000');
 select to_number('-454', 'S000');
 select to_number('12,454.8-', '00,000.9-');
 select to_number('00,454.8-', '00,000.9-');
+
+-- to_binary
+select to_binary('abc');
+select to_binary('abc', 'utf-8');
+select to_binary('abc', 'base64');
+select to_binary('abc', 'base2');
+select to_binary('abc', 'hex');
+select to_binary('abc', concat('utf', '-8'));
+select to_binary('abc', concat('base', '64'));
+select to_binary('abc', 'Hex');
+select to_binary('abc', 'UTF-8');
+select to_binary('abc', null);
+select to_binary(null, 'utf-8');
+select to_binary(null, null);
+select to_binary(null, cast(null as string));
+select to_binary('abc', 'invalidFormat');
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
index 99927c262c5ac..86c90fc1fe34d 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 102
+-- Number of queries: 116
 
 
 -- !query
@@ -824,3 +824,116 @@ select to_number('00,454.8-', '00,000.9-')
 struct<to_number(00,454.8-, 00,000.9-):decimal(6,1)>
 -- !query output
 -454.8
+
+
+-- !query
+select to_binary('abc')
+-- !query schema
+struct<to_binary(abc):binary>
+-- !query output
+� 
+
+
+-- !query
+select to_binary('abc', 'utf-8')
+-- !query schema
+struct<to_binary(abc, utf-8):binary>
+-- !query output
+abc
+
+
+-- !query
+select to_binary('abc', 'base64')
+-- !query schema
+struct<to_binary(abc, base64):binary>
+-- !query output
+i�
+
+
+-- !query
+select to_binary('abc', 'base2')
+-- !query schema
+struct<to_binary(abc, base2):binary>
+-- !query output
+abc
+
+
+-- !query
+select to_binary('abc', 'hex')
+-- !query schema
+struct<to_binary(abc, hex):binary>
+-- !query output
+� 
+
+
+-- !query
+select to_binary('abc', concat('utf', '-8'))
+-- !query schema
+struct<to_binary(abc, concat(utf, -8)):binary>
+-- !query output
+abc
+
+
+-- !query
+select to_binary('abc', concat('base', '64'))
+-- !query schema
+struct<to_binary(abc, concat(base, 64)):binary>
+-- !query output
+i�
+
+
+-- !query
+select to_binary('abc', 'Hex')
+-- !query schema
+struct<to_binary(abc, Hex):binary>
+-- !query output
+� 
+
+
+-- !query
+select to_binary('abc', 'UTF-8')
+-- !query schema
+struct<to_binary(abc, UTF-8):binary>
+-- !query output
+abc
+
+
+-- !query
+select to_binary('abc', null)
+-- !query schema
+struct<to_binary(abc, NULL):binary>
+-- !query output
+NULL
+
+
+-- !query
+select to_binary(null, 'utf-8')
+-- !query schema
+struct<to_binary(NULL, utf-8):binary>
+-- !query output
+NULL
+
+
+-- !query
+select to_binary(null, null)
+-- !query schema
+struct<to_binary(NULL, NULL):binary>
+-- !query output
+NULL
+
+
+-- !query
+select to_binary(null, cast(null as string))
+-- !query schema
+struct<to_binary(NULL, CAST(NULL AS STRING)):binary>
+-- !query output
+NULL
+
+
+-- !query
+select to_binary('abc', 'invalidFormat')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'to_binary('abc', 'invalidFormat')' due to data type mismatch: Unsupported encoding format: Some(invalidFormat). The format has to be a case-insensitive string literal of 'hex', 'utf-8', 'base2', or 'base64'; line 1 pos 7
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index 6baac6148885f..f3852a9527b00 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 102
+-- Number of queries: 116
 
 
 -- !query
@@ -820,3 +820,116 @@ select to_number('00,454.8-', '00,000.9-')
 struct<to_number(00,454.8-, 00,000.9-):decimal(6,1)>
 -- !query output
 -454.8
+
+
+-- !query
+select to_binary('abc')
+-- !query schema
+struct<to_binary(abc):binary>
+-- !query output
+� 
+
+
+-- !query
+select to_binary('abc', 'utf-8')
+-- !query schema
+struct<to_binary(abc, utf-8):binary>
+-- !query output
+abc
+
+
+-- !query
+select to_binary('abc', 'base64')
+-- !query schema
+struct<to_binary(abc, base64):binary>
+-- !query output
+i�
+
+
+-- !query
+select to_binary('abc', 'base2')
+-- !query schema
+struct<to_binary(abc, base2):binary>
+-- !query output
+abc
+
+
+-- !query
+select to_binary('abc', 'hex')
+-- !query schema
+struct<to_binary(abc, hex):binary>
+-- !query output
+� 
+
+
+-- !query
+select to_binary('abc', concat('utf', '-8'))
+-- !query schema
+struct<to_binary(abc, concat(utf, -8)):binary>
+-- !query output
+abc
+
+
+-- !query
+select to_binary('abc', concat('base', '64'))
+-- !query schema
+struct<to_binary(abc, concat(base, 64)):binary>
+-- !query output
+i�
+
+
+-- !query
+select to_binary('abc', 'Hex')
+-- !query schema
+struct<to_binary(abc, Hex):binary>
+-- !query output
+� 
+
+
+-- !query
+select to_binary('abc', 'UTF-8')
+-- !query schema
+struct<to_binary(abc, UTF-8):binary>
+-- !query output
+abc
+
+
+-- !query
+select to_binary('abc', null)
+-- !query schema
+struct<to_binary(abc, NULL):binary>
+-- !query output
+NULL
+
+
+-- !query
+select to_binary(null, 'utf-8')
+-- !query schema
+struct<to_binary(NULL, utf-8):binary>
+-- !query output
+NULL
+
+
+-- !query
+select to_binary(null, null)
+-- !query schema
+struct<to_binary(NULL, NULL):binary>
+-- !query output
+NULL
+
+
+-- !query
+select to_binary(null, cast(null as string))
+-- !query schema
+struct<to_binary(NULL, CAST(NULL AS STRING)):binary>
+-- !query output
+NULL
+
+
+-- !query
+select to_binary('abc', 'invalidFormat')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'to_binary('abc', 'invalidFormat')' due to data type mismatch: Unsupported encoding format: Some(invalidFormat). The format has to be a case-insensitive string literal of 'hex', 'utf-8', 'base2', or 'base64'; line 1 pos 7

From 1c0793a75b74ac7b55631e61d9e827e93647f1d2 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Mon, 14 Feb 2022 08:30:42 +0900
Subject: [PATCH 223/513] [SPARK-38139][PYTHON][ML][TESTS] Adjust tolerance in
 ml.recommendation.ALS doctest

### What changes were proposed in this pull request?

This PR reduces precision of the result in `pyspark.ml.recommendation.ALS` doctest to four decimal digits.

### Why are the changes needed?

In certain configurations, ALS consistently converges to value slightly below  `0.69291`, causing repeated test failures.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests, executed with configuration where this test normally fails.

Closes #35503 from zero323/SPARK-38139.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/ml/recommendation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index f0628fb9221cf..b8e2a6097d93f 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -320,7 +320,7 @@ class ALS(JavaEstimator, _ALSParams, JavaMLWritable, JavaMLReadable):
     >>> test = spark.createDataFrame([(0, 2), (1, 0), (2, 0)], ["user", "item"])
     >>> predictions = sorted(model.transform(test).collect(), key=lambda r: r[0])
     >>> predictions[0]
-    Row(user=0, item=2, newPrediction=0.69291...)
+    Row(user=0, item=2, newPrediction=0.6929...)
     >>> predictions[1]
     Row(user=1, item=0, newPrediction=3.47356...)
     >>> predictions[2]

From 1e0e43df626b64925da5be385f1fdc7c77d17a22 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Sun, 13 Feb 2022 17:59:25 -0800
Subject: [PATCH 224/513] [SPARK-38192][CORE][TESTS] Use `try-with-resources`
 in `Level/RocksDBSuite.java`

### What changes were proposed in this pull request?
`countKeys` method in `LevelDBSuite` use `db.db().newIterator()` to create a `DBIterator` instance, but there is no way to close it because the iterator not registered to `LevelDB.iteratorTracker`, and the similar issue also exists in `RocksDBSuite`, so  this pr use `try-with-resources` to ensure the iterator closed.

### Why are the changes needed?
Bug fix.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35493 from LuciferYang/SPARK-38192.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../apache/spark/util/kvstore/LevelDBSuite.java | 15 ++++++++-------
 .../apache/spark/util/kvstore/RocksDBSuite.java | 17 +++++++++--------
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBSuite.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBSuite.java
index ef92a6cbba31a..c43c9b171f5a4 100644
--- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBSuite.java
+++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBSuite.java
@@ -329,13 +329,14 @@ private int countKeys(Class<?> type) throws Exception {
     byte[] prefix = db.getTypeInfo(type).keyPrefix();
     int count = 0;
 
-    DBIterator it = db.db().iterator();
-    it.seek(prefix);
-
-    while (it.hasNext()) {
-      byte[] key = it.next().getKey();
-      if (LevelDBIterator.startsWith(key, prefix)) {
-        count++;
+    try (DBIterator it = db.db().iterator()) {
+      it.seek(prefix);
+
+      while (it.hasNext()) {
+        byte[] key = it.next().getKey();
+        if (LevelDBIterator.startsWith(key, prefix)) {
+          count++;
+        }
       }
     }
 
diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBSuite.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBSuite.java
index 1bae764ae96ad..cd18d227cba72 100644
--- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBSuite.java
+++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBSuite.java
@@ -330,15 +330,16 @@ private int countKeys(Class<?> type) throws Exception {
     byte[] prefix = db.getTypeInfo(type).keyPrefix();
     int count = 0;
 
-    RocksIterator it = db.db().newIterator();
-    it.seek(prefix);
-
-    while (it.isValid()) {
-      byte[] key = it.key();
-      if (RocksDBIterator.startsWith(key, prefix)) {
-        count++;
+    try (RocksIterator it = db.db().newIterator()) {
+      it.seek(prefix);
+
+      while (it.isValid()) {
+        byte[] key = it.key();
+        if (RocksDBIterator.startsWith(key, prefix)) {
+          count++;
+        }
+        it.next();
       }
-      it.next();
     }
 
     return count;

From 4921db91c856999f24e4d2ec66df5ca0ebc27e51 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Sun, 13 Feb 2022 18:23:28 -0800
Subject: [PATCH 225/513] [SPARK-38175][CORE][SQL][SS][DSTREAM][MESOS][WEBUI]
 Clean up unused parameters in private methods signature

### What changes were proposed in this pull request?
The aim of this pr is clean up unused parameters of private methods

### Why are the changes needed?
Cleanup unused symbol.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass GA

Closes #35477 from LuciferYang/SPARK-38175.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../scala/org/apache/spark/SparkContext.scala   |  5 ++---
 .../spark/deploy/StandaloneResourceUtils.scala  |  3 +--
 .../HadoopFSDelegationTokenProvider.scala       |  3 +--
 .../SparkContextSchedulerCreationSuite.scala    | 11 +++--------
 .../MesosCoarseGrainedSchedulerBackend.scala    |  3 +--
 .../MesosFineGrainedSchedulerBackend.scala      |  7 +++----
 .../spark/sql/catalyst/expressions/Cast.scala   | 17 +++++++----------
 .../spark/sql/execution/GenerateExec.scala      | 10 ++++------
 .../adaptive/AdaptiveSparkPlanExec.scala        |  3 ---
 .../execution/aggregate/HashAggregateExec.scala |  8 ++++----
 .../datasources/PartitioningUtils.scala         |  3 +--
 .../parquet/ParquetPartitionReaderFactory.scala |  8 +-------
 .../execution/streaming/FileStreamOptions.scala |  4 ++--
 .../continuous/ContinuousExecution.scala        |  4 ++--
 .../spark/sql/hive/HiveExternalCatalog.scala    |  9 +++------
 .../spark/streaming/ui/StreamingPage.scala      |  5 ++---
 16 files changed, 37 insertions(+), 66 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 86bf7255ee1e0..02c58d2a9b4f2 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -560,7 +560,7 @@ class SparkContext(config: SparkConf) extends Logging {
     _plugins = PluginContainer(this, _resources.asJava)
 
     // Create and start the scheduler
-    val (sched, ts) = SparkContext.createTaskScheduler(this, master, deployMode)
+    val (sched, ts) = SparkContext.createTaskScheduler(this, master)
     _schedulerBackend = sched
     _taskScheduler = ts
     _dagScheduler = new DAGScheduler(this)
@@ -2890,8 +2890,7 @@ object SparkContext extends Logging {
    */
   private def createTaskScheduler(
       sc: SparkContext,
-      master: String,
-      deployMode: String): (SchedulerBackend, TaskScheduler) = {
+      master: String): (SchedulerBackend, TaskScheduler) = {
     import SparkMasterRegex._
 
     // When running locally, don't try to re-execute tasks on failure.
diff --git a/core/src/main/scala/org/apache/spark/deploy/StandaloneResourceUtils.scala b/core/src/main/scala/org/apache/spark/deploy/StandaloneResourceUtils.scala
index c7c31a85b0636..641c5416cbb33 100644
--- a/core/src/main/scala/org/apache/spark/deploy/StandaloneResourceUtils.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/StandaloneResourceUtils.scala
@@ -99,7 +99,7 @@ private[spark] object StandaloneResourceUtils extends Logging {
       ResourceAllocation(new ResourceID(componentName, rName), rInfo.addresses)
     }.toSeq
     try {
-      writeResourceAllocationJson(componentName, allocations, tmpFile)
+      writeResourceAllocationJson(allocations, tmpFile)
     } catch {
       case NonFatal(e) =>
         val errMsg = s"Exception threw while preparing resource file for $compShortName"
@@ -112,7 +112,6 @@ private[spark] object StandaloneResourceUtils extends Logging {
   }
 
   private def writeResourceAllocationJson[T](
-      componentName: String,
       allocations: Seq[T],
       jsonFile: File): Unit = {
     implicit val formats = DefaultFormats
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
index 5c98762d4181d..3120d482f11e1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
@@ -57,7 +57,7 @@ private[deploy] class HadoopFSDelegationTokenProvider
 
       // Get the token renewal interval if it is not set. It will only be called once.
       if (tokenRenewalInterval == null) {
-        tokenRenewalInterval = getTokenRenewalInterval(hadoopConf, sparkConf, fileSystems)
+        tokenRenewalInterval = getTokenRenewalInterval(hadoopConf, fileSystems)
       }
 
       // Get the time of next renewal.
@@ -123,7 +123,6 @@ private[deploy] class HadoopFSDelegationTokenProvider
 
   private def getTokenRenewalInterval(
       hadoopConf: Configuration,
-      sparkConf: SparkConf,
       filesystems: Set[FileSystem]): Option[Long] = {
     // We cannot use the tokens generated with renewer yarn. Trying to renew
     // those will fail with an access control issue. So create new tokens with the logged in
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
index 0c72f770a787c..3a615d0ea6cf1 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
@@ -32,15 +32,10 @@ class SparkContextSchedulerCreationSuite
   def noOp(taskSchedulerImpl: TaskSchedulerImpl): Unit = {}
 
   def createTaskScheduler(master: String)(body: TaskSchedulerImpl => Unit = noOp): Unit =
-    createTaskScheduler(master, "client")(body)
-
-  def createTaskScheduler(master: String, deployMode: String)(
-      body: TaskSchedulerImpl => Unit): Unit =
-    createTaskScheduler(master, deployMode, new SparkConf())(body)
+    createTaskScheduler(master, new SparkConf())(body)
 
   def createTaskScheduler(
       master: String,
-      deployMode: String,
       conf: SparkConf)(body: TaskSchedulerImpl => Unit): Unit = {
     // Create local SparkContext to setup a SparkEnv. We don't actually want to start() the
     // real schedulers, so we don't want to create a full SparkContext with the desired scheduler.
@@ -48,7 +43,7 @@ class SparkContextSchedulerCreationSuite
     val createTaskSchedulerMethod =
       PrivateMethod[Tuple2[SchedulerBackend, TaskScheduler]](Symbol("createTaskScheduler"))
     val (_, sched) =
-      SparkContext invokePrivate createTaskSchedulerMethod(sc, master, deployMode)
+      SparkContext invokePrivate createTaskSchedulerMethod(sc, master)
     try {
       body(sched.asInstanceOf[TaskSchedulerImpl])
     } finally {
@@ -132,7 +127,7 @@ class SparkContextSchedulerCreationSuite
   test("local-default-parallelism") {
     val conf = new SparkConf().set("spark.default.parallelism", "16")
 
-    val sched = createTaskScheduler("local", "client", conf) { sched =>
+    val sched = createTaskScheduler("local", conf) { sched =>
       sched.backend match {
         case s: LocalSchedulerBackend => assert(s.defaultParallelism() === 16)
         case _ => fail()
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index 6fedce61d8208..b7b652d83ffe2 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -679,7 +679,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
                 "is Spark installed on it?")
           }
         }
-        executorTerminated(d, agentId, taskId, s"Executor finished with state $state")
+        executorTerminated(agentId, taskId, s"Executor finished with state $state")
         // In case we'd rejected everything before but have now lost a node
         d.reviveOffers()
       }
@@ -740,7 +740,6 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
    * what tasks are running. It also notifies the driver that an executor was removed.
    */
   private def executorTerminated(
-      d: org.apache.mesos.SchedulerDriver,
       agentId: String,
       taskId: String,
       reason: String): Unit = {
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
index 586c2bdd67cfa..cc67dad196880 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
@@ -419,8 +419,7 @@ private[spark] class MesosFineGrainedSchedulerBackend(
     }
   }
 
-  private def recordAgentLost(
-      d: org.apache.mesos.SchedulerDriver, agentId: AgentID, reason: ExecutorLossReason): Unit = {
+  private def recordAgentLost(agentId: AgentID, reason: ExecutorLossReason): Unit = {
     inClassLoader() {
       logInfo("Mesos agent lost: " + agentId.getValue)
       removeExecutor(agentId.getValue, reason.toString)
@@ -429,7 +428,7 @@ private[spark] class MesosFineGrainedSchedulerBackend(
   }
 
   override def agentLost(d: org.apache.mesos.SchedulerDriver, agentId: AgentID): Unit = {
-    recordAgentLost(d, agentId, ExecutorProcessLost())
+    recordAgentLost(agentId, ExecutorProcessLost())
   }
 
   override def executorLost(
@@ -439,7 +438,7 @@ private[spark] class MesosFineGrainedSchedulerBackend(
       status: Int): Unit = {
     logInfo("Executor lost: %s, marking agent %s as lost".format(executorId.getValue,
                                                                  agentId.getValue))
-    recordAgentLost(d, agentId, ExecutorExited(status, exitCausedByApp = true))
+    recordAgentLost(agentId, ExecutorExited(status, exitCausedByApp = true))
   }
 
   override def killTask(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 5091ee5d27927..06a148f063201 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -1679,14 +1679,11 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
       """
   }
 
-  private[this] def castDecimalToIntegralTypeCode(
-      ctx: CodegenContext,
-      integralType: String,
-      catalogType: String): CastFunction = {
+  private[this] def castDecimalToIntegralTypeCode(integralType: String): CastFunction = {
     if (ansiEnabled) {
-      (c, evPrim, evNull) => code"$evPrim = $c.roundTo${integralType.capitalize}();"
+      (c, evPrim, _) => code"$evPrim = $c.roundTo${integralType.capitalize}();"
     } else {
-      (c, evPrim, evNull) => code"$evPrim = $c.to${integralType.capitalize}();"
+      (c, evPrim, _) => code"$evPrim = $c.to${integralType.capitalize}();"
     }
   }
 
@@ -1761,7 +1758,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
     case DateType =>
       (c, evPrim, evNull) => code"$evNull = true;"
     case TimestampType => castTimestampToIntegralTypeCode(ctx, "byte", ByteType)
-    case DecimalType() => castDecimalToIntegralTypeCode(ctx, "byte", ByteType.catalogString)
+    case DecimalType() => castDecimalToIntegralTypeCode("byte")
     case ShortType | IntegerType | LongType if ansiEnabled =>
       castIntegralTypeToIntegralTypeExactCode(ctx, "byte", ByteType)
     case FloatType | DoubleType if ansiEnabled =>
@@ -1797,7 +1794,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
     case DateType =>
       (c, evPrim, evNull) => code"$evNull = true;"
     case TimestampType => castTimestampToIntegralTypeCode(ctx, "short", ShortType)
-    case DecimalType() => castDecimalToIntegralTypeCode(ctx, "short", ShortType.catalogString)
+    case DecimalType() => castDecimalToIntegralTypeCode("short")
     case IntegerType | LongType if ansiEnabled =>
       castIntegralTypeToIntegralTypeExactCode(ctx, "short", ShortType)
     case FloatType | DoubleType if ansiEnabled =>
@@ -1831,7 +1828,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
     case DateType =>
       (c, evPrim, evNull) => code"$evNull = true;"
     case TimestampType => castTimestampToIntegralTypeCode(ctx, "int", IntegerType)
-    case DecimalType() => castDecimalToIntegralTypeCode(ctx, "int", IntegerType.catalogString)
+    case DecimalType() => castDecimalToIntegralTypeCode("int")
     case LongType if ansiEnabled =>
       castIntegralTypeToIntegralTypeExactCode(ctx, "int", IntegerType)
     case FloatType | DoubleType if ansiEnabled =>
@@ -1866,7 +1863,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
       (c, evPrim, evNull) => code"$evNull = true;"
     case TimestampType =>
       (c, evPrim, evNull) => code"$evPrim = (long) ${timestampToLongCode(c)};"
-    case DecimalType() => castDecimalToIntegralTypeCode(ctx, "long", LongType.catalogString)
+    case DecimalType() => castDecimalToIntegralTypeCode("long")
     case FloatType | DoubleType if ansiEnabled =>
       castFractionToIntegralTypeCode(ctx, "long", LongType)
     case x: NumericType =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
index 6c7929437ffdd..f6dbf5fda1816 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
@@ -142,8 +142,8 @@ case class GenerateExec(
       case (attr, _) => requiredAttrSet.contains(attr)
     }.map(_._2)
     boundGenerator match {
-      case e: CollectionGenerator => codeGenCollection(ctx, e, requiredInput, row)
-      case g => codeGenTraversableOnce(ctx, g, requiredInput, row)
+      case e: CollectionGenerator => codeGenCollection(ctx, e, requiredInput)
+      case g => codeGenTraversableOnce(ctx, g, requiredInput)
     }
   }
 
@@ -153,8 +153,7 @@ case class GenerateExec(
   private def codeGenCollection(
       ctx: CodegenContext,
       e: CollectionGenerator,
-      input: Seq[ExprCode],
-      row: ExprCode): String = {
+      input: Seq[ExprCode]): String = {
 
     // Generate code for the generator.
     val data = e.genCode(ctx)
@@ -241,8 +240,7 @@ case class GenerateExec(
   private def codeGenTraversableOnce(
       ctx: CodegenContext,
       e: Expression,
-      requiredInput: Seq[ExprCode],
-      row: ExprCode): String = {
+      requiredInput: Seq[ExprCode]): String = {
 
     // Generate the code for the generator
     val data = e.genCode(ctx)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index 2b42804e784ed..8b31f1738d237 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -410,7 +410,6 @@ case class AdaptiveSparkPlanExec(
         if (isFinalPlan) "Final Plan" else "Current Plan",
         currentPhysicalPlan,
         depth,
-        lastChildren,
         append,
         verbose,
         maxFields,
@@ -419,7 +418,6 @@ case class AdaptiveSparkPlanExec(
         "Initial Plan",
         initialPlan,
         depth,
-        lastChildren,
         append,
         verbose,
         maxFields,
@@ -432,7 +430,6 @@ case class AdaptiveSparkPlanExec(
       header: String,
       plan: SparkPlan,
       depth: Int,
-      lastChildren: Seq[Boolean],
       append: String => Unit,
       verbose: Boolean,
       maxFields: Int,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index ef0eb3e5da257..7da9c56bb47f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -376,7 +376,7 @@ case class HashAggregateExec(
    * Currently fast hash map is supported for primitive data types during partial aggregation.
    * This list of supported use-cases should be expanded over time.
    */
-  private def checkIfFastHashMapSupported(ctx: CodegenContext): Boolean = {
+  private def checkIfFastHashMapSupported(): Boolean = {
     val isSupported =
       (groupingKeySchema ++ bufferSchema).forall(f => CodeGenerator.isPrimitiveType(f.dataType) ||
         f.dataType.isInstanceOf[DecimalType] || f.dataType.isInstanceOf[StringType] ||
@@ -402,8 +402,8 @@ case class HashAggregateExec(
     isSupported && isNotByteArrayDecimalType && isEnabledForAggModes
   }
 
-  private def enableTwoLevelHashMap(ctx: CodegenContext): Unit = {
-    if (!checkIfFastHashMapSupported(ctx)) {
+  private def enableTwoLevelHashMap(): Unit = {
+    if (!checkIfFastHashMapSupported()) {
       if (!Utils.isTesting) {
         logInfo(s"${SQLConf.ENABLE_TWOLEVEL_AGG_MAP.key} is set to true, but"
           + " current version of codegened fast hashmap does not support this aggregate.")
@@ -422,7 +422,7 @@ case class HashAggregateExec(
   protected override def doProduceWithKeys(ctx: CodegenContext): String = {
     val initAgg = ctx.addMutableState(CodeGenerator.JAVA_BOOLEAN, "initAgg")
     if (conf.enableTwoLevelAggMap) {
-      enableTwoLevelHashMap(ctx)
+      enableTwoLevelHashMap()
     } else if (conf.enableVectorizedHashMap) {
       logWarning("Two level hashmap is disabled but vectorized hashmap is enabled.")
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 88543bd19bb4f..8d71cf65807c2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -262,7 +262,7 @@ object PartitioningUtils extends SQLConfHelper{
         // Once we get the string, we try to parse it and find the partition column and value.
         val maybeColumn =
           parsePartitionColumn(currentPath.getName, typeInference, userSpecifiedDataTypes,
-            validatePartitionColumns, zoneId, dateFormatter, timestampFormatter)
+            zoneId, dateFormatter, timestampFormatter)
         maybeColumn.foreach(columns += _)
 
         // Now, we determine if we should stop.
@@ -296,7 +296,6 @@ object PartitioningUtils extends SQLConfHelper{
       columnSpec: String,
       typeInference: Boolean,
       userSpecifiedDataTypes: Map[String, DataType],
-      validatePartitionColumns: Boolean,
       zoneId: ZoneId,
       dateFormatter: DateFormatter,
       timestampFormatter: TimestampFormatter): Option[(String, TypedPartValue)] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
index 41ee98a4f47b8..12b8a631196ae 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
@@ -202,7 +202,7 @@ case class ParquetPartitionReaderFactory(
   private def buildReaderBase[T](
       file: PartitionedFile,
       buildReaderFunc: (
-        FileSplit, InternalRow, TaskAttemptContextImpl,
+        InternalRow,
           Option[FilterPredicate], Option[ZoneId],
           RebaseSpec,
           RebaseSpec) => RecordReader[Void, T]): RecordReader[Void, T] = {
@@ -261,9 +261,7 @@ case class ParquetPartitionReaderFactory(
       footerFileMetaData.getKeyValueMetaData.get,
       int96RebaseModeInRead)
     val reader = buildReaderFunc(
-      split,
       file.partitionValues,
-      hadoopAttemptContext,
       pushed,
       convertTz,
       datetimeRebaseSpec,
@@ -277,9 +275,7 @@ case class ParquetPartitionReaderFactory(
   }
 
   private def createRowBaseParquetReader(
-      split: FileSplit,
       partitionValues: InternalRow,
-      hadoopAttemptContext: TaskAttemptContextImpl,
       pushed: Option[FilterPredicate],
       convertTz: Option[ZoneId],
       datetimeRebaseSpec: RebaseSpec,
@@ -312,9 +308,7 @@ case class ParquetPartitionReaderFactory(
   }
 
   private def createParquetVectorizedReader(
-      split: FileSplit,
       partitionValues: InternalRow,
-      hadoopAttemptContext: TaskAttemptContextImpl,
       pushed: Option[FilterPredicate],
       convertTz: Option[ZoneId],
       datetimeRebaseSpec: RebaseSpec,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
index 6f43542fd6595..a5c1c735cbd7b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
@@ -33,9 +33,9 @@ class FileStreamOptions(parameters: CaseInsensitiveMap[String]) extends Logging
 
   def this(parameters: Map[String, String]) = this(CaseInsensitiveMap(parameters))
 
-  checkDisallowedOptions(parameters)
+  checkDisallowedOptions()
 
-  private def checkDisallowedOptions(options: Map[String, String]): Unit = {
+  private def checkDisallowedOptions(): Unit = {
     Seq(ModifiedBeforeFilter.PARAM_NAME, ModifiedAfterFilter.PARAM_NAME).foreach { param =>
       if (parameters.contains(param)) {
         throw new IllegalArgumentException(s"option '$param' is not allowed in file stream sources")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index 0ed29d430bbdb..665ed77007bb8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -157,7 +157,7 @@ class ContinuousExecution(
    *    Start a new query log
    *  DONE
    */
-  private def getStartOffsets(sparkSessionToRunBatches: SparkSession): OffsetSeq = {
+  private def getStartOffsets(): OffsetSeq = {
     // Note that this will need a slight modification for exactly once. If ending offsets were
     // reported but not committed for any epochs, we must replay exactly to those offsets.
     // For at least once, we can just ignore those reports and risk duplicates.
@@ -188,7 +188,7 @@ class ContinuousExecution(
    * @param sparkSessionForQuery Isolated [[SparkSession]] to run the continuous query with.
    */
   private def runContinuous(sparkSessionForQuery: SparkSession): Unit = {
-    val offsets = getStartOffsets(sparkSessionForQuery)
+    val offsets = getStartOffsets()
 
     if (currentBatchId > 0) {
       AcceptsLatestSeenOffsetHandler.setLatestSeenOffsetOnSources(Some(offsets), sources)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 1770fbb5bc6d9..e23f1fe27bbd9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -768,8 +768,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     val version: String = table.properties.getOrElse(CREATED_SPARK_VERSION, "2.2 or prior")
 
     // Restore Spark's statistics from information in Metastore.
-    val restoredStats =
-      statsFromProperties(table.properties, table.identifier.table, table.schema)
+    val restoredStats = statsFromProperties(table.properties, table.identifier.table)
     if (restoredStats.isDefined) {
       table = table.copy(stats = restoredStats)
     }
@@ -1137,8 +1136,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
 
   private def statsFromProperties(
       properties: Map[String, String],
-      table: String,
-      schema: StructType): Option[CatalogStatistics] = {
+      table: String): Option[CatalogStatistics] = {
 
     val statsProps = properties.filterKeys(_.startsWith(STATISTICS_PREFIX))
     if (statsProps.isEmpty) {
@@ -1208,8 +1206,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
 
     // Restore Spark's statistics from information in Metastore.
     // Note: partition-level statistics were introduced in 2.3.
-    val restoredStats =
-      statsFromProperties(partition.parameters, table.identifier.table, table.schema)
+    val restoredStats = statsFromProperties(partition.parameters, table.identifier.table)
     if (restoredStats.isDefined) {
       partition.copy(
         spec = restoredSpec,
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
index 42d0e50a068ec..2c8e51e19d3e3 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
@@ -294,7 +294,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
       {if (hasStream) {
         <tr id="inputs-table" style="display: none;" >
           <td colspan="3">
-            {generateInputDStreamsTable(jsCollector, minBatchTime, maxBatchTime, minRecordRate, maxRecordRate)}
+            {generateInputDStreamsTable(jsCollector, minBatchTime, maxBatchTime, minRecordRate)}
           </td>
         </tr>
       }}
@@ -340,8 +340,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
       jsCollector: JsCollector,
       minX: Long,
       maxX: Long,
-      minY: Double,
-      maxY: Double): Seq[Node] = {
+      minY: Double): Seq[Node] = {
     val maxYCalculated = listener.receivedRecordRateWithBatchTime.values
       .flatMap { case streamAndRates => streamAndRates.map { case (_, recordRate) => recordRate } }
       .reduceOption[Double](math.max)

From ff92e85f86d3e36428996695001a23893d406b76 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Mon, 14 Feb 2022 13:28:11 +0300
Subject: [PATCH 226/513] [SPARK-38198][SQL] Fix `QueryExecution.debug#toFile`
 use the passed in `maxFields` when `explainMode` is `CodegenMode`

### What changes were proposed in this pull request?
`QueryExecution.debug#toFile` method supports passing in `maxFields` and this parameter will be passed down when `explainMode` is `SimpleMode`, `ExtendedMode`, or `CostMode`.

But the passed down `maxFields` was ignored when `explainMode` is `CostMode` because `QueryExecution#stringWithStats` overrides it with `SQLConf.get.maxToStringFields` at present,  so this pr removes the override behavior to let passed in `maxFields` take effect.

### Why are the changes needed?
Bug fix

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA and add a new test case

Closes #35506 from LuciferYang/SPARK-38198.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../spark/sql/execution/QueryExecution.scala   |  2 --
 .../sql/execution/QueryExecutionSuite.scala    | 18 ++++++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index 26c6904a896a5..1b089943a680e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -304,8 +304,6 @@ class QueryExecution(
   }
 
   private def stringWithStats(maxFields: Int, append: String => Unit): Unit = {
-    val maxFields = SQLConf.get.maxToStringFields
-
     // trigger to compute stats for logical plans
     try {
       // This will trigger to compute stats for all the nodes in the plan, including subqueries,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
index ecc448fe250d3..2c58b53969bcd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
@@ -261,4 +261,22 @@ class QueryExecutionSuite extends SharedSparkSession {
     val cmdResultExec = projectQe.executedPlan.asInstanceOf[CommandResultExec]
     assert(cmdResultExec.commandPhysicalPlan.isInstanceOf[ShowTablesExec])
   }
+
+  test("SPARK-38198: check specify maxFields when call toFile method") {
+    withTempDir { dir =>
+      val path = dir.getCanonicalPath + "/plans.txt"
+      // Define a dataset with 6 columns
+      val ds = spark.createDataset(Seq((0, 1, 2, 3, 4, 5), (6, 7, 8, 9, 10, 11)))
+      // `CodegenMode` and `FormattedMode` doesn't use the maxFields, so not tested in this case
+      Seq(SimpleMode.name, ExtendedMode.name, CostMode.name).foreach { modeName =>
+        val maxFields = 3
+        ds.queryExecution.debug.toFile(path, explainMode = Some(modeName), maxFields = maxFields)
+        Utils.tryWithResource(Source.fromFile(path)) { source =>
+          val tableScan = source.getLines().filter(_.contains("LocalTableScan"))
+          assert(tableScan.exists(_.contains("more fields")),
+            s"Specify maxFields = $maxFields doesn't take effect when explainMode is $modeName")
+        }
+      }
+    }
+  }
 }

From c8b34ab7340265f1f2bec2afa694c10f174b222c Mon Sep 17 00:00:00 2001
From: Yuto Akutsu <yuto.akutsu@oss.nttdata.com>
Date: Mon, 14 Feb 2022 15:25:00 +0300
Subject: [PATCH 227/513] [SPARK-38097][SQL][TESTS] Improved the error message
 for pivoting unsupported column

### What changes were proposed in this pull request?

Improved the error message for pivoting column with unsupported literal type.

### Why are the changes needed?

To clarify the error.

### Does this PR introduce _any_ user-facing change?

Yes, the error message changed.

### How was this patch tested?

`$ build/sbt "test:testOnly *QueryExecutionErrorsSuite"`

Closes #35433 from yutoacts/SPARK-38097.

Lead-authored-by: Yuto Akutsu <yuto.akutsu@oss.nttdata.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../apache/spark/sql/errors/QueryExecutionErrors.scala   | 8 ++++++++
 .../org/apache/spark/sql/RelationalGroupedDataset.scala  | 9 ++++++++-
 .../spark/sql/errors/QueryExecutionErrorsSuite.scala     | 7 +++----
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index c042c44b6ee34..d3753afdb7990 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -242,6 +242,14 @@ object QueryExecutionErrors {
       messageParameters = Array(s"literal for '${v.toString}' of ${v.getClass.toString}."))
   }
 
+  def pivotColumnUnsupportedError(v: Any, dataType: DataType): RuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "UNSUPPORTED_FEATURE",
+      messageParameters = Array(
+        s"pivoting by the value '${v.toString}' of the column data type" +
+        s" '${dataType.catalogString}'."))
+  }
+
   def noDefaultForDataTypeError(dataType: DataType): RuntimeException = {
     new RuntimeException(s"no default for type $dataType")
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
index 96bb1b3027f15..7e3c622196173 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
@@ -21,6 +21,7 @@ import java.util.Locale
 
 import scala.collection.JavaConverters._
 
+import org.apache.spark.SparkRuntimeException
 import org.apache.spark.annotation.Stable
 import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.broadcast.Broadcast
@@ -452,7 +453,13 @@ class RelationalGroupedDataset protected[sql](
       case RelationalGroupedDataset.GroupByType =>
         val valueExprs = values.map(_ match {
           case c: Column => c.expr
-          case v => Literal.apply(v)
+          case v =>
+            try {
+              Literal.apply(v)
+            } catch {
+              case _: SparkRuntimeException =>
+                throw QueryExecutionErrors.pivotColumnUnsupportedError(v, pivotColumn.expr.dataType)
+            }
         })
         new RelationalGroupedDataset(
           df,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index 11bbd43d9be06..dc9f5065e277c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -129,13 +129,12 @@ class QueryExecutionErrorsSuite extends QueryTest with SharedSparkSession {
     val e2 = intercept[SparkRuntimeException] {
       trainingSales
         .groupBy($"sales.year")
-        .pivot(struct(lower($"sales.course"), $"training"))
+        .pivot(struct(lower(trainingSales("sales.course")), trainingSales("training")))
         .agg(sum($"sales.earnings"))
         .collect()
     }
-    assert(e2.getMessage === "The feature is not supported: " +
-      "literal for '[dotnet,Dummies]' of class " +
-      "org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema.")
+    assert(e2.getMessage === "The feature is not supported: pivoting by the value" +
+      """ '[dotnet,Dummies]' of the column data type 'struct<col1:string,training:string>'.""")
   }
 
   test("UNSUPPORTED_FEATURE: unsupported pivot operations") {

From 8853f286371bcc1d44762a0a8ed5bf1a40cdbbd5 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Tue, 15 Feb 2022 09:40:27 +0900
Subject: [PATCH 228/513] [SPARK-35173][SQL][PYTHON] Add multiple columns
 adding support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
This PR added the multiple columns adding support for Spark scala/java/python API.
- Expose `withColumns` with Map input as public API in Scala/Java
- Add `withColumns` in PySpark

There was also some discussion about adding multiple columns in past JIRA([SPARK-1225](https://issues.apache.org/jira/browse/SPARK-12225), [SPARK-26224](https://issues.apache.org/jira/browse/SPARK-26224)) and [ML](http://apache-spark-developers-list.1001551.n3.nabble.com/DISCUSS-Multiple-columns-adding-replacing-support-in-PySpark-DataFrame-API-td31164.html).

### Why are the changes needed?
There were a private method `withColumns` can add columns at one pass [1]:
https://github.com/apache/spark/blob/b5241c97b17a1139a4ff719bfce7f68aef094d95/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala#L2402

However, it was not exposed as public API in Scala/Java, and also PySpark user can only use `withColumn` to add one column or replacing the existing one column that has the same name.

For example, if the PySpark user want to add multiple columns, they should call `withColumn` again and again like:
```Python
df.withColumn("key1", col("key1")).withColumn("key2", col("key2")).withColumn("key3", col("key3"))
```
After this patch, the user can use the `withColumns` with map of colume name and column :
```Python
df.withColumns({"key1":  col("key1"), "key2":col("key2"), "key3": col("key3")})
```

### Does this PR introduce _any_ user-facing change?
Yes, this PR exposes `withColumns` as public API, and also adds `withColumns` API in PySpark .

### How was this patch tested?
- Add new multiple columns adding test, passed
- Existing test, passed

Closes #32431 from Yikun/SPARK-35173-cols.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/dataframe.py               | 35 +++++++++++++++++++
 python/pyspark/sql/tests/test_dataframe.py    | 27 ++++++++++++++
 .../scala/org/apache/spark/sql/Dataset.scala  | 29 +++++++++++++++
 .../apache/spark/sql/JavaDataFrameSuite.java  | 16 +++++++++
 .../org/apache/spark/sql/DataFrameSuite.scala | 18 ++++++++--
 5 files changed, 122 insertions(+), 3 deletions(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index ee68865c98e39..037252795be0c 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -2911,6 +2911,41 @@ def freqItems(
             support = 0.01
         return DataFrame(self._jdf.stat().freqItems(_to_seq(self._sc, cols), support), self.sql_ctx)
 
+    def withColumns(self, *colsMap: Dict[str, Column]) -> "DataFrame":
+        """
+        Returns a new :class:`DataFrame` by adding multiple columns or replacing the
+        existing columns that has the same names.
+
+        The colsMap is a map of column name and column, the column must only refer to attributes
+        supplied by this Dataset. It is an error to add columns that refer to some other Dataset.
+
+        .. versionadded:: 3.3.0
+           Added support for multiple columns adding
+
+        Parameters
+        ----------
+        colsMap : dict
+            a dict of column name and :class:`Column`. Currently, only single map is supported.
+
+        Examples
+        --------
+        >>> df.withColumns({'age2': df.age + 2, 'age3': df.age + 3}).collect()
+        [Row(age=2, name='Alice', age2=4, age3=5), Row(age=5, name='Bob', age2=7, age3=8)]
+        """
+        # Below code is to help enable kwargs in future.
+        assert len(colsMap) == 1
+        colsMap = colsMap[0]  # type: ignore[assignment]
+
+        if not isinstance(colsMap, dict):
+            raise TypeError("colsMap must be dict of column name and column.")
+
+        col_names = list(colsMap.keys())
+        cols = list(colsMap.values())
+
+        return DataFrame(
+            self._jdf.withColumns(_to_seq(self._sc, col_names), self._jcols(*cols)), self.sql_ctx
+        )
+
     def withColumn(self, colName: str, col: Column) -> "DataFrame":
         """
         Returns a new :class:`DataFrame` by adding a column or replacing the
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index 1367fe79f0260..5f5e88fd46deb 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -479,6 +479,33 @@ def foo():
 
         self.assertRaises(TypeError, foo)
 
+    def test_with_columns(self):
+        # With single column
+        keys = self.df.withColumns({"key": self.df.key}).select("key").collect()
+        self.assertEqual([r.key for r in keys], list(range(100)))
+
+        # With key and value columns
+        kvs = (
+            self.df.withColumns({"key": self.df.key, "value": self.df.value})
+            .select("key", "value")
+            .collect()
+        )
+        self.assertEqual([(r.key, r.value) for r in kvs], [(i, str(i)) for i in range(100)])
+
+        # Columns rename
+        kvs = (
+            self.df.withColumns({"key_alias": self.df.key, "value_alias": self.df.value})
+            .select("key_alias", "value_alias")
+            .collect()
+        )
+        self.assertEqual(
+            [(r.key_alias, r.value_alias) for r in kvs], [(i, str(i)) for i in range(100)]
+        )
+
+        # Type check
+        self.assertRaises(TypeError, self.df.withColumns, ["key"])
+        self.assertRaises(AssertionError, self.df.withColumns)
+
     def test_generic_hints(self):
         from pyspark.sql import DataFrame
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 9dd38d850e329..4a921b48fafb1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2478,6 +2478,35 @@ class Dataset[T] private[sql](
    */
   def withColumn(colName: String, col: Column): DataFrame = withColumns(Seq(colName), Seq(col))
 
+  /**
+   * (Scala-specific) Returns a new Dataset by adding columns or replacing the existing columns
+   * that has the same names.
+   *
+   * `colsMap` is a map of column name and column, the column must only refer to attributes
+   * supplied by this Dataset. It is an error to add columns that refers to some other Dataset.
+   *
+   * @group untypedrel
+   * @since 3.3.0
+   */
+  def withColumns(colsMap: Map[String, Column]): DataFrame = {
+    val (colNames, newCols) = colsMap.toSeq.unzip
+    withColumns(colNames, newCols)
+  }
+
+  /**
+   * (Java-specific) Returns a new Dataset by adding columns or replacing the existing columns
+   * that has the same names.
+   *
+   * `colsMap` is a map of column name and column, the column must only refer to attribute
+   * supplied by this Dataset. It is an error to add columns that refers to some other Dataset.
+   *
+   * @group untypedrel
+   * @since 3.3.0
+   */
+  def withColumns(colsMap: java.util.Map[String, Column]): DataFrame = withColumns(
+    colsMap.asScala.toMap
+  )
+
   /**
    * Returns a new Dataset by adding columns or replacing the existing columns that has
    * the same names.
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
index da7c62251b385..c0b4690dd6260 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
@@ -33,6 +33,7 @@
 
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Column;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.RowFactory;
@@ -318,6 +319,21 @@ public void testSampleBy() {
     Assert.assertTrue(2 <= actual.get(1).getLong(1) && actual.get(1).getLong(1) <= 13);
   }
 
+  @Test
+  public void testwithColumns() {
+    Dataset<Row> df = spark.table("testData2");
+    Map<String, Column> colMaps = new HashMap<>();
+    colMaps.put("a1", col("a"));
+    colMaps.put("b1", col("b"));
+
+    StructType expected = df.withColumn("a1", col("a")).withColumn("b1", col("b")).schema();
+    StructType actual = df.withColumns(colMaps).schema();
+    // Validate geting same result with withColumn loop call
+    Assert.assertEquals(expected, actual);
+    // Validate the col names
+    Assert.assertArrayEquals(actual.fieldNames(), new String[] {"a", "b", "a1", "b1"});
+  }
+
   @Test
   public void testSampleByColumn() {
     Dataset<Row> df = spark.range(0, 100, 1, 2).select(col("id").mod(3).as("key"));
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 7482d76207388..cd0bd06413870 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -631,7 +631,19 @@ class DataFrameSuite extends QueryTest
     assert(df.schema.map(_.name) === Seq("key", "value", "newCol"))
   }
 
-  test("withColumns") {
+  test("withColumns: public API, with Map input") {
+    val df = testData.toDF().withColumns(Map(
+      "newCol1" -> (col("key") + 1), "newCol2" -> (col("key")  + 2)
+    ))
+    checkAnswer(
+      df,
+      testData.collect().map { case Row(key: Int, value: String) =>
+        Row(key, value, key + 1, key + 2)
+      }.toSeq)
+    assert(df.schema.map(_.name) === Seq("key", "value", "newCol1", "newCol2"))
+  }
+
+  test("withColumns: internal method") {
     val df = testData.toDF().withColumns(Seq("newCol1", "newCol2"),
       Seq(col("key") + 1, col("key") + 2))
     checkAnswer(
@@ -655,7 +667,7 @@ class DataFrameSuite extends QueryTest
     assert(err2.getMessage.contains("Found duplicate column(s)"))
   }
 
-  test("withColumns: case sensitive") {
+  test("withColumns: internal method, case sensitive") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
       val df = testData.toDF().withColumns(Seq("newCol1", "newCOL1"),
         Seq(col("key") + 1, col("key") + 2))
@@ -674,7 +686,7 @@ class DataFrameSuite extends QueryTest
     }
   }
 
-  test("withColumns: given metadata") {
+  test("withColumns: internal method, given metadata") {
     def buildMetadata(num: Int): Seq[Metadata] = {
       (0 until num).map { n =>
         val builder = new MetadataBuilder

From 88696ebcb72fd3057b1546831f653b29b7e0abb2 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 15 Feb 2022 09:42:31 +0900
Subject: [PATCH 229/513] [SPARK-38121][PYTHON][SQL] Use SparkSession instead
 of SQLContext inside PySpark

### What changes were proposed in this pull request?

This PR proposes to `SparkSession` within PySpark. This is a base work for respecting runtime configurations, etc. Currently, we rely on old deprecated `SQLContext` internally that doesn't respect Spark session's runtime configurations correctly.

This PR also contains related changes (and a bit of refactoring in the code this PR touches) as below:
- Expose `DataFrame.sparkSession` like Scala API does.
- Move `SQLContext._conf` -> `SparkSession._jconf`.
- Rename `rdd_array` to `df_array` at `DataFrame.randomSplit`.
- Issue warnings to discourage to use `DataFrame.sql_ctx` and `DataFrame(..., sql_ctx)`.

### Why are the changes needed?

- This is a base work for PySpark to respect runtime configuration.
- To expose the same API layer as Scala API (`df.sparkSession`)
- To avoid relaying on old `SQLContext`.

### Does this PR introduce _any_ user-facing change?

Yes.

- Issue warnings to discourage to use `DataFrame.sql_ctx` and `DataFrame(..., sql_ctx)`.
- New API `DataFrame.sparkSession`

### How was this patch tested?

Existing test cases should cover them.

Closes #35410 from HyukjinKwon/SPARK-38121.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/docs/source/reference/pyspark.sql.rst  |   1 +
 python/pyspark/ml/clustering.py               |   2 +-
 python/pyspark/ml/common.py                   |   2 +-
 python/pyspark/ml/fpm.py                      |   2 +-
 python/pyspark/ml/wrapper.py                  |   2 +-
 python/pyspark/mllib/common.py                |   2 +-
 python/pyspark/pandas/internal.py             |   2 +-
 python/pyspark/shell.py                       |   3 +-
 python/pyspark/sql/catalog.py                 |   4 +-
 python/pyspark/sql/context.py                 |  42 ++--
 python/pyspark/sql/dataframe.py               | 185 ++++++++++++------
 python/pyspark/sql/functions.py               |   2 +-
 python/pyspark/sql/group.py                   |  14 +-
 python/pyspark/sql/observation.py             |   2 +-
 python/pyspark/sql/pandas/conversion.py       |  33 ++--
 python/pyspark/sql/pandas/group_ops.py        |   5 +-
 python/pyspark/sql/pandas/map_ops.py          |   4 +-
 python/pyspark/sql/readwriter.py              |  12 +-
 python/pyspark/sql/session.py                 |  53 +++--
 python/pyspark/sql/streaming.py               |   8 +-
 python/pyspark/sql/tests/test_session.py      |  16 +-
 python/pyspark/sql/tests/test_streaming.py    |  16 +-
 python/pyspark/sql/tests/test_udf.py          |   8 +-
 python/pyspark/sql/tests/test_udf_profiler.py |  11 +-
 python/pyspark/sql/utils.py                   |   8 +-
 .../spark/sql/api/python/PythonSQLUtils.scala |  14 +-
 .../org/apache/spark/sql/api/r/SQLUtils.scala |   4 +-
 .../sql/execution/arrow/ArrowConverters.scala |  12 +-
 28 files changed, 266 insertions(+), 203 deletions(-)

diff --git a/python/docs/source/reference/pyspark.sql.rst b/python/docs/source/reference/pyspark.sql.rst
index 818814ca0a147..1d34961a91a61 100644
--- a/python/docs/source/reference/pyspark.sql.rst
+++ b/python/docs/source/reference/pyspark.sql.rst
@@ -201,6 +201,7 @@ DataFrame APIs
     DataFrame.show
     DataFrame.sort
     DataFrame.sortWithinPartitions
+    DataFrame.sparkSession
     DataFrame.stat
     DataFrame.storageLevel
     DataFrame.subtract
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index a66d6e347055a..c8b4c93812ac3 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -2107,7 +2107,7 @@ def assignClusters(self, dataset: DataFrame) -> DataFrame:
         assert self._java_obj is not None
 
         jdf = self._java_obj.assignClusters(dataset._jdf)
-        return DataFrame(jdf, dataset.sql_ctx)
+        return DataFrame(jdf, dataset.sparkSession)
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/ml/common.py b/python/pyspark/ml/common.py
index 2329421b9ee09..32829c42f42db 100644
--- a/python/pyspark/ml/common.py
+++ b/python/pyspark/ml/common.py
@@ -108,7 +108,7 @@ def _java2py(sc: SparkContext, r: "JavaObjectOrPickleDump", encoding: str = "byt
             return RDD(jrdd, sc)
 
         if clsName == "Dataset":
-            return DataFrame(r, SparkSession(sc)._wrapped)
+            return DataFrame(r, SparkSession._getActiveSessionOrCreate())
 
         if clsName in _picklable_classes:
             r = sc._jvm.org.apache.spark.ml.python.MLSerDe.dumps(r)
diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py
index 0795ec2348f8b..b748a7dee63f0 100644
--- a/python/pyspark/ml/fpm.py
+++ b/python/pyspark/ml/fpm.py
@@ -510,7 +510,7 @@ def findFrequentSequentialPatterns(self, dataset: DataFrame) -> DataFrame:
         self._transfer_params_to_java()
         assert self._java_obj is not None
         jdf = self._java_obj.findFrequentSequentialPatterns(dataset._jdf)
-        return DataFrame(jdf, dataset.sql_ctx)
+        return DataFrame(jdf, dataset.sparkSession)
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index 7f03f64ef7176..385a2439e27e2 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -393,7 +393,7 @@ def _transform(self, dataset: DataFrame) -> DataFrame:
         assert self._java_obj is not None
 
         self._transfer_params_to_java()
-        return DataFrame(self._java_obj.transform(dataset._jdf), dataset.sql_ctx)
+        return DataFrame(self._java_obj.transform(dataset._jdf), dataset.sparkSession)
 
 
 @inherit_doc
diff --git a/python/pyspark/mllib/common.py b/python/pyspark/mllib/common.py
index 24a3f411946d6..00653aa6c9dd9 100644
--- a/python/pyspark/mllib/common.py
+++ b/python/pyspark/mllib/common.py
@@ -110,7 +110,7 @@ def _java2py(sc: SparkContext, r: "JavaObjectOrPickleDump", encoding: str = "byt
             return RDD(jrdd, sc)
 
         if clsName == "Dataset":
-            return DataFrame(r, SparkSession(sc)._wrapped)
+            return DataFrame(r, SparkSession._getActiveSessionOrCreate())
 
         if clsName in _picklable_classes:
             r = sc._jvm.org.apache.spark.mllib.api.python.SerDe.dumps(r)
diff --git a/python/pyspark/pandas/internal.py b/python/pyspark/pandas/internal.py
index 1c32c430911cd..71f8f6ed57193 100644
--- a/python/pyspark/pandas/internal.py
+++ b/python/pyspark/pandas/internal.py
@@ -906,7 +906,7 @@ def attach_distributed_sequence_column(sdf: SparkDataFrame, column_name: str) ->
         if len(sdf.columns) > 0:
             return SparkDataFrame(
                 sdf._jdf.toDF().withSequenceColumn(column_name),  # type: ignore[operator]
-                sdf.sql_ctx,
+                sdf.sparkSession,
             )
         else:
             cnt = sdf.count()
diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py
index 4164e3ab0ce89..e0a8c06d0e78d 100644
--- a/python/pyspark/shell.py
+++ b/python/pyspark/shell.py
@@ -28,6 +28,7 @@
 
 from pyspark.context import SparkContext
 from pyspark.sql import SparkSession
+from pyspark.sql.context import SQLContext
 
 if os.environ.get("SPARK_EXECUTOR_URI"):
     SparkContext.setSystemProperty("spark.executor.uri", os.environ["SPARK_EXECUTOR_URI"])
@@ -49,7 +50,7 @@
 atexit.register((lambda sc: lambda: sc.stop())(sc))
 
 # for compatibility
-sqlContext = spark._wrapped
+sqlContext = SQLContext._get_or_create(sc)
 sqlCtx = sqlContext
 
 print(
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index ea8bb97c3b712..3ececfa0f1ed7 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -345,7 +345,7 @@ def createTable(
         if path is not None:
             options["path"] = path
         if source is None:
-            c = self._sparkSession._wrapped._conf
+            c = self._sparkSession._jconf
             source = c.defaultDataSourceName()  # type: ignore[attr-defined]
         if description is None:
             description = ""
@@ -356,7 +356,7 @@ def createTable(
                 raise TypeError("schema should be StructType")
             scala_datatype = self._jsparkSession.parseDataType(schema.json())
             df = self._jcatalog.createTable(tableName, source, scala_datatype, description, options)
-        return DataFrame(df, self._sparkSession._wrapped)
+        return DataFrame(df, self._sparkSession)
 
     def dropTempView(self, viewName: str) -> None:
         """Drops the local temporary view with the given view name in the catalog.
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 6ab70ee1c39e0..6f94e9a3b8153 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -46,7 +46,6 @@
 from pyspark.rdd import RDD
 from pyspark.sql.types import AtomicType, DataType, StructType
 from pyspark.sql.streaming import StreamingQueryManager
-from pyspark.conf import SparkConf
 
 if TYPE_CHECKING:
     from pyspark.sql._typing import (
@@ -121,7 +120,7 @@ def __init__(
         if sparkSession is None:
             sparkSession = SparkSession._getActiveSessionOrCreate()
         if jsqlContext is None:
-            jsqlContext = sparkSession._jwrapped
+            jsqlContext = sparkSession._jsparkSession.sqlContext()
         self.sparkSession = sparkSession
         self._jsqlContext = jsqlContext
         _monkey_patch_RDD(self.sparkSession)
@@ -141,11 +140,6 @@ def _ssql_ctx(self) -> JavaObject:
         """
         return self._jsqlContext
 
-    @property
-    def _conf(self) -> SparkConf:
-        """Accessor for the JVM SQL-specific configurations"""
-        return self.sparkSession._jsparkSession.sessionState().conf()
-
     @classmethod
     def getOrCreate(cls: Type["SQLContext"], sc: SparkContext) -> "SQLContext":
         """
@@ -164,17 +158,20 @@ def getOrCreate(cls: Type["SQLContext"], sc: SparkContext) -> "SQLContext":
             "Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.",
             FutureWarning,
         )
+        return cls._get_or_create(sc)
+
+    @classmethod
+    def _get_or_create(cls: Type["SQLContext"], sc: SparkContext) -> "SQLContext":
 
         if (
             cls._instantiatedContext is None
             or SQLContext._instantiatedContext._sc._jsc is None  # type: ignore[union-attr]
         ):
             assert sc._jvm is not None
-            jsqlContext = (
-                sc._jvm.SparkSession.builder().sparkContext(sc._jsc.sc()).getOrCreate().sqlContext()
-            )
-            sparkSession = SparkSession(sc, jsqlContext.sparkSession())
-            cls(sc, sparkSession, jsqlContext)
+            # There can be only one running Spark context. That will automatically
+            # be used in the Spark session internally.
+            session = SparkSession._getActiveSessionOrCreate()
+            cls(sc, session, session._jsparkSession.sqlContext())
         return cast(SQLContext, cls._instantiatedContext)
 
     def newSession(self) -> "SQLContext":
@@ -590,9 +587,9 @@ def tables(self, dbName: Optional[str] = None) -> DataFrame:
         Row(namespace='', tableName='table1', isTemporary=True)
         """
         if dbName is None:
-            return DataFrame(self._ssql_ctx.tables(), self)
+            return DataFrame(self._ssql_ctx.tables(), self.sparkSession)
         else:
-            return DataFrame(self._ssql_ctx.tables(dbName), self)
+            return DataFrame(self._ssql_ctx.tables(dbName), self.sparkSession)
 
     def tableNames(self, dbName: Optional[str] = None) -> List[str]:
         """Returns a list of names of tables in the database ``dbName``.
@@ -647,7 +644,7 @@ def read(self) -> DataFrameReader:
         -------
         :class:`DataFrameReader`
         """
-        return DataFrameReader(self)
+        return DataFrameReader(self.sparkSession)
 
     @property
     def readStream(self) -> DataStreamReader:
@@ -669,7 +666,7 @@ def readStream(self) -> DataStreamReader:
         >>> text_sdf.isStreaming
         True
         """
-        return DataStreamReader(self)
+        return DataStreamReader(self.sparkSession)
 
     @property
     def streams(self) -> StreamingQueryManager:
@@ -714,14 +711,13 @@ def __init__(self, sparkContext: SparkContext, jhiveContext: Optional[JavaObject
             + "SparkSession.builder.enableHiveSupport().getOrCreate() instead.",
             FutureWarning,
         )
+        static_conf = {}
         if jhiveContext is None:
-            sparkContext._conf.set(  # type: ignore[attr-defined]
-                "spark.sql.catalogImplementation", "hive"
-            )
-            sparkSession = SparkSession.builder._sparkContext(sparkContext).getOrCreate()
-        else:
-            sparkSession = SparkSession(sparkContext, jhiveContext.sparkSession())
-        SQLContext.__init__(self, sparkContext, sparkSession, jhiveContext)
+            static_conf = {"spark.sql.catalogImplementation": "in-memory"}
+        # There can be only one running Spark context. That will automatically
+        # be used in the Spark session internally.
+        session = SparkSession._getActiveSessionOrCreate(**static_conf)
+        SQLContext.__init__(self, sparkContext, session, jhiveContext)
 
     @classmethod
     def _createForTesting(cls, sparkContext: SparkContext) -> "HiveContext":
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 037252795be0c..610b8d6997046 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -16,6 +16,7 @@
 #
 
 import json
+import os
 import sys
 import random
 import warnings
@@ -70,6 +71,7 @@
     from pyspark.pandas.frame import DataFrame as PandasOnSparkDataFrame
     from pyspark.sql._typing import ColumnOrName, LiteralType, OptionalPrimitiveType
     from pyspark.sql.context import SQLContext
+    from pyspark.sql.session import SparkSession
     from pyspark.sql.group import GroupedData
     from pyspark.sql.observation import Observation
 
@@ -102,12 +104,34 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
           .groupBy(department.name, "gender").agg({"salary": "avg", "age": "max"})
 
     .. versionadded:: 1.3.0
+
+    .. note: A DataFrame should only be created as described above. It should not be directly
+        created via using the constructor.
     """
 
-    def __init__(self, jdf: JavaObject, sql_ctx: "SQLContext"):
-        self._jdf = jdf
-        self.sql_ctx = sql_ctx
-        self._sc: SparkContext = cast(SparkContext, sql_ctx and sql_ctx._sc)
+    def __init__(
+        self,
+        jdf: JavaObject,
+        sql_ctx: Union["SQLContext", "SparkSession"],
+    ):
+        from pyspark.sql.context import SQLContext
+
+        self._session: Optional["SparkSession"] = None
+        self._sql_ctx: Optional["SQLContext"] = None
+
+        if isinstance(sql_ctx, SQLContext):
+            assert not os.environ.get("SPARK_TESTING")  # Sanity check for our internal usage.
+            assert isinstance(sql_ctx, SQLContext)
+            # We should remove this if-else branch in the future release, and rename
+            # sql_ctx to session in the constructor. This is an internal code path but
+            # was kept with an warning because it's used intensively by third-party libraries.
+            warnings.warn("DataFrame constructor is internal. Do not directly use it.")
+            self._sql_ctx = sql_ctx
+        else:
+            self._session = sql_ctx
+
+        self._sc: SparkContext = sql_ctx._sc
+        self._jdf: JavaObject = jdf
         self.is_cached = False
         # initialized lazily
         self._schema: Optional[StructType] = None
@@ -116,13 +140,45 @@ def __init__(self, jdf: JavaObject, sql_ctx: "SQLContext"):
         # by __repr__ and _repr_html_ while eager evaluation opened.
         self._support_repr_html = False
 
+    @property
+    def sql_ctx(self) -> "SQLContext":
+        from pyspark.sql.context import SQLContext
+
+        warnings.warn(
+            "DataFrame.sql_ctx is an internal property, and will be removed "
+            "in future releases. Use DataFrame.sparkSession instead."
+        )
+        if self._sql_ctx is None:
+            self._sql_ctx = SQLContext._get_or_create(self._sc)
+        return self._sql_ctx
+
+    @property  # type: ignore[misc]
+    def sparkSession(self) -> "SparkSession":
+        """Returns Spark session that created this :class:`DataFrame`.
+
+        .. versionadded:: 3.3.0
+
+        Examples
+        --------
+        >>> df = spark.range(1)
+        >>> type(df.sparkSession)
+        <class 'pyspark.sql.session.SparkSession'>
+        """
+        from pyspark.sql.session import SparkSession
+
+        if self._session is None:
+            self._session = SparkSession._getActiveSessionOrCreate()
+        return self._session
+
     @property  # type: ignore[misc]
     @since(1.3)
     def rdd(self) -> "RDD[Row]":
         """Returns the content as an :class:`pyspark.RDD` of :class:`Row`."""
         if self._lazy_rdd is None:
             jrdd = self._jdf.javaToPython()
-            self._lazy_rdd = RDD(jrdd, self.sql_ctx._sc, BatchedSerializer(CPickleSerializer()))
+            self._lazy_rdd = RDD(
+                jrdd, self.sparkSession._sc, BatchedSerializer(CPickleSerializer())
+            )
         return self._lazy_rdd
 
     @property  # type: ignore[misc]
@@ -456,7 +512,7 @@ def exceptAll(self, other: "DataFrame") -> "DataFrame":
         +---+---+
 
         """
-        return DataFrame(self._jdf.exceptAll(other._jdf), self.sql_ctx)
+        return DataFrame(self._jdf.exceptAll(other._jdf), self.sparkSession)
 
     @since(1.3)
     def isLocal(self) -> bool:
@@ -563,12 +619,12 @@ def show(self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool =
     def __repr__(self) -> str:
         if (
             not self._support_repr_html
-            and self.sql_ctx._conf.isReplEagerEvalEnabled()  # type: ignore[attr-defined]
+            and self.sparkSession._jconf.isReplEagerEvalEnabled()  # type: ignore[attr-defined]
         ):
             vertical = False
             return self._jdf.showString(
-                self.sql_ctx._conf.replEagerEvalMaxNumRows(),  # type: ignore[attr-defined]
-                self.sql_ctx._conf.replEagerEvalTruncate(),  # type: ignore[attr-defined]
+                self.sparkSession._jconf.replEagerEvalMaxNumRows(),  # type: ignore[attr-defined]
+                self.sparkSession._jconf.replEagerEvalTruncate(),  # type: ignore[attr-defined]
                 vertical,
             )  # type: ignore[attr-defined]
         else:
@@ -581,13 +637,13 @@ def _repr_html_(self) -> Optional[str]:
         """
         if not self._support_repr_html:
             self._support_repr_html = True
-        if self.sql_ctx._conf.isReplEagerEvalEnabled():  # type: ignore[attr-defined]
+        if self.sparkSession._jconf.isReplEagerEvalEnabled():  # type: ignore[attr-defined]
             max_num_rows = max(
-                self.sql_ctx._conf.replEagerEvalMaxNumRows(), 0  # type: ignore[attr-defined]
+                self.sparkSession._jconf.replEagerEvalMaxNumRows(), 0  # type: ignore[attr-defined]
             )
             sock_info = self._jdf.getRowsToPython(
                 max_num_rows,
-                self.sql_ctx._conf.replEagerEvalTruncate(),  # type: ignore[attr-defined]
+                self.sparkSession._jconf.replEagerEvalTruncate(),  # type: ignore[attr-defined]
             )
             rows = list(_load_from_socket(sock_info, BatchedSerializer(CPickleSerializer())))
             head = rows[0]
@@ -631,7 +687,7 @@ def checkpoint(self, eager: bool = True) -> "DataFrame":
         This API is experimental.
         """
         jdf = self._jdf.checkpoint(eager)
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     def localCheckpoint(self, eager: bool = True) -> "DataFrame":
         """Returns a locally checkpointed version of this :class:`DataFrame`. Checkpointing can be
@@ -651,7 +707,7 @@ def localCheckpoint(self, eager: bool = True) -> "DataFrame":
         This API is experimental.
         """
         jdf = self._jdf.localCheckpoint(eager)
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     def withWatermark(self, eventTime: str, delayThreshold: str) -> "DataFrame":
         """Defines an event time watermark for this :class:`DataFrame`. A watermark tracks a point
@@ -695,7 +751,7 @@ def withWatermark(self, eventTime: str, delayThreshold: str) -> "DataFrame":
         if not delayThreshold or type(delayThreshold) is not str:
             raise TypeError("delayThreshold should be provided as a string interval")
         jdf = self._jdf.withWatermark(eventTime, delayThreshold)
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     def hint(
         self, name: str, *parameters: Union["PrimitiveType", List["PrimitiveType"]]
@@ -740,7 +796,7 @@ def hint(
                 )
 
         jdf = self._jdf.hint(name, self._jseq(parameters))
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     def count(self) -> int:
         """Returns the number of rows in this :class:`DataFrame`.
@@ -804,7 +860,7 @@ def limit(self, num: int) -> "DataFrame":
         []
         """
         jdf = self._jdf.limit(num)
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     def take(self, num: int) -> List[Row]:
         """Returns the first ``num`` rows as a :class:`list` of :class:`Row`.
@@ -970,7 +1026,7 @@ def coalesce(self, numPartitions: int) -> "DataFrame":
         >>> df.coalesce(1).rdd.getNumPartitions()
         1
         """
-        return DataFrame(self._jdf.coalesce(numPartitions), self.sql_ctx)
+        return DataFrame(self._jdf.coalesce(numPartitions), self.sparkSession)
 
     @overload
     def repartition(self, numPartitions: int, *cols: "ColumnOrName") -> "DataFrame":
@@ -1041,14 +1097,15 @@ def repartition(  # type: ignore[misc]
         """
         if isinstance(numPartitions, int):
             if len(cols) == 0:
-                return DataFrame(self._jdf.repartition(numPartitions), self.sql_ctx)
+                return DataFrame(self._jdf.repartition(numPartitions), self.sparkSession)
             else:
                 return DataFrame(
-                    self._jdf.repartition(numPartitions, self._jcols(*cols)), self.sql_ctx
+                    self._jdf.repartition(numPartitions, self._jcols(*cols)),
+                    self.sparkSession,
                 )
         elif isinstance(numPartitions, (str, Column)):
             cols = (numPartitions,) + cols
-            return DataFrame(self._jdf.repartition(self._jcols(*cols)), self.sql_ctx)
+            return DataFrame(self._jdf.repartition(self._jcols(*cols)), self.sparkSession)
         else:
             raise TypeError("numPartitions should be an int or Column")
 
@@ -1115,11 +1172,12 @@ def repartitionByRange(  # type: ignore[misc]
                 raise ValueError("At least one partition-by expression must be specified.")
             else:
                 return DataFrame(
-                    self._jdf.repartitionByRange(numPartitions, self._jcols(*cols)), self.sql_ctx
+                    self._jdf.repartitionByRange(numPartitions, self._jcols(*cols)),
+                    self.sparkSession,
                 )
         elif isinstance(numPartitions, (str, Column)):
             cols = (numPartitions,) + cols
-            return DataFrame(self._jdf.repartitionByRange(self._jcols(*cols)), self.sql_ctx)
+            return DataFrame(self._jdf.repartitionByRange(self._jcols(*cols)), self.sparkSession)
         else:
             raise TypeError("numPartitions should be an int, string or Column")
 
@@ -1133,7 +1191,7 @@ def distinct(self) -> "DataFrame":
         >>> df.distinct().count()
         2
         """
-        return DataFrame(self._jdf.distinct(), self.sql_ctx)
+        return DataFrame(self._jdf.distinct(), self.sparkSession)
 
     @overload
     def sample(self, fraction: float, seed: Optional[int] = ...) -> "DataFrame":
@@ -1228,7 +1286,7 @@ def sample(  # type: ignore[misc]
         seed = int(seed) if seed is not None else None
         args = [arg for arg in [withReplacement, fraction, seed] if arg is not None]
         jdf = self._jdf.sample(*args)
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     def sampleBy(
         self, col: "ColumnOrName", fractions: Dict[Any, float], seed: Optional[int] = None
@@ -1283,7 +1341,9 @@ def sampleBy(
             fractions[k] = float(v)
         col = col._jc
         seed = seed if seed is not None else random.randint(0, sys.maxsize)
-        return DataFrame(self._jdf.stat().sampleBy(col, self._jmap(fractions), seed), self.sql_ctx)
+        return DataFrame(
+            self._jdf.stat().sampleBy(col, self._jmap(fractions), seed), self.sparkSession
+        )
 
     def randomSplit(self, weights: List[float], seed: Optional[int] = None) -> List["DataFrame"]:
         """Randomly splits this :class:`DataFrame` with the provided weights.
@@ -1311,10 +1371,10 @@ def randomSplit(self, weights: List[float], seed: Optional[int] = None) -> List[
             if w < 0.0:
                 raise ValueError("Weights must be positive. Found weight value: %s" % w)
         seed = seed if seed is not None else random.randint(0, sys.maxsize)
-        rdd_array = self._jdf.randomSplit(
-            _to_list(self.sql_ctx._sc, cast(List["ColumnOrName"], weights)), int(seed)
+        df_array = self._jdf.randomSplit(
+            _to_list(self.sparkSession._sc, cast(List["ColumnOrName"], weights)), int(seed)
         )
-        return [DataFrame(rdd, self.sql_ctx) for rdd in rdd_array]
+        return [DataFrame(df, self.sparkSession) for df in df_array]
 
     @property
     def dtypes(self) -> List[Tuple[str, str]]:
@@ -1392,7 +1452,7 @@ def alias(self, alias: str) -> "DataFrame":
         [Row(name='Bob', name='Bob', age=5), Row(name='Alice', name='Alice', age=2)]
         """
         assert isinstance(alias, str), "alias should be a string"
-        return DataFrame(getattr(self._jdf, "as")(alias), self.sql_ctx)
+        return DataFrame(getattr(self._jdf, "as")(alias), self.sparkSession)
 
     def crossJoin(self, other: "DataFrame") -> "DataFrame":
         """Returns the cartesian product with another :class:`DataFrame`.
@@ -1416,7 +1476,7 @@ def crossJoin(self, other: "DataFrame") -> "DataFrame":
         """
 
         jdf = self._jdf.crossJoin(other._jdf)
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     def join(
         self,
@@ -1486,7 +1546,7 @@ def join(
                 on = self._jseq([])
             assert isinstance(how, str), "how should be a string"
             jdf = self._jdf.join(other._jdf, on, how)
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     # TODO(SPARK-22947): Fix the DataFrame API.
     def _joinAsOf(
@@ -1607,7 +1667,7 @@ def _joinAsOf(
             allowExactMatches,
             direction,
         )
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     def sortWithinPartitions(
         self, *cols: Union[str, Column, List[Union[str, Column]]], **kwargs: Any
@@ -1639,7 +1699,7 @@ def sortWithinPartitions(
         +---+-----+
         """
         jdf = self._jdf.sortWithinPartitions(self._sort_cols(cols, kwargs))
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     def sort(
         self, *cols: Union[str, Column, List[Union[str, Column]]], **kwargs: Any
@@ -1677,7 +1737,7 @@ def sort(
         [Row(age=5, name='Bob'), Row(age=2, name='Alice')]
         """
         jdf = self._jdf.sort(self._sort_cols(cols, kwargs))
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     orderBy = sort
 
@@ -1687,11 +1747,11 @@ def _jseq(
         converter: Optional[Callable[..., Union["PrimitiveType", JavaObject]]] = None,
     ) -> JavaObject:
         """Return a JVM Seq of Columns from a list of Column or names"""
-        return _to_seq(self.sql_ctx._sc, cols, converter)
+        return _to_seq(self.sparkSession._sc, cols, converter)
 
     def _jmap(self, jm: Dict) -> JavaObject:
         """Return a JVM Scala Map from a dict"""
-        return _to_scala_map(self.sql_ctx._sc, jm)
+        return _to_scala_map(self.sparkSession._sc, jm)
 
     def _jcols(self, *cols: "ColumnOrName") -> JavaObject:
         """Return a JVM Seq of Columns from a list of Column or column names
@@ -1767,7 +1827,7 @@ def describe(self, *cols: Union[str, List[str]]) -> "DataFrame":
         if len(cols) == 1 and isinstance(cols[0], list):
             cols = cols[0]  # type: ignore[assignment]
         jdf = self._jdf.describe(self._jseq(cols))
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     def summary(self, *statistics: str) -> "DataFrame":
         """Computes specified statistics for numeric and string columns. Available statistics are:
@@ -1832,7 +1892,7 @@ def summary(self, *statistics: str) -> "DataFrame":
         if len(statistics) == 1 and isinstance(statistics[0], list):
             statistics = statistics[0]
         jdf = self._jdf.summary(self._jseq(statistics))
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     @overload
     def head(self) -> Optional[Row]:
@@ -1970,7 +2030,7 @@ def select(self, *cols: "ColumnOrName") -> "DataFrame":  # type: ignore[misc]
         [Row(name='Alice', age=12), Row(name='Bob', age=15)]
         """
         jdf = self._jdf.select(self._jcols(*cols))
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     @overload
     def selectExpr(self, *expr: str) -> "DataFrame":
@@ -1995,7 +2055,7 @@ def selectExpr(self, *expr: Union[str, List[str]]) -> "DataFrame":
         if len(expr) == 1 and isinstance(expr[0], list):
             expr = expr[0]  # type: ignore[assignment]
         jdf = self._jdf.selectExpr(self._jseq(expr))
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     def filter(self, condition: "ColumnOrName") -> "DataFrame":
         """Filters rows using the given condition.
@@ -2028,7 +2088,7 @@ def filter(self, condition: "ColumnOrName") -> "DataFrame":
             jdf = self._jdf.filter(condition._jc)
         else:
             raise TypeError("condition should be string or Column")
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     @overload
     def groupBy(self, *cols: "ColumnOrName") -> "GroupedData":
@@ -2203,7 +2263,7 @@ def union(self, other: "DataFrame") -> "DataFrame":
 
         Also as standard in SQL, this function resolves columns by position (not by name).
         """
-        return DataFrame(self._jdf.union(other._jdf), self.sql_ctx)
+        return DataFrame(self._jdf.union(other._jdf), self.sparkSession)
 
     @since(1.3)
     def unionAll(self, other: "DataFrame") -> "DataFrame":
@@ -2260,7 +2320,7 @@ def unionByName(self, other: "DataFrame", allowMissingColumns: bool = False) ->
            Added optional argument `allowMissingColumns` to specify whether to allow
            missing columns.
         """
-        return DataFrame(self._jdf.unionByName(other._jdf, allowMissingColumns), self.sql_ctx)
+        return DataFrame(self._jdf.unionByName(other._jdf, allowMissingColumns), self.sparkSession)
 
     @since(1.3)
     def intersect(self, other: "DataFrame") -> "DataFrame":
@@ -2269,7 +2329,7 @@ def intersect(self, other: "DataFrame") -> "DataFrame":
 
         This is equivalent to `INTERSECT` in SQL.
         """
-        return DataFrame(self._jdf.intersect(other._jdf), self.sql_ctx)
+        return DataFrame(self._jdf.intersect(other._jdf), self.sparkSession)
 
     def intersectAll(self, other: "DataFrame") -> "DataFrame":
         """Return a new :class:`DataFrame` containing rows in both this :class:`DataFrame`
@@ -2295,7 +2355,7 @@ def intersectAll(self, other: "DataFrame") -> "DataFrame":
         +---+---+
 
         """
-        return DataFrame(self._jdf.intersectAll(other._jdf), self.sql_ctx)
+        return DataFrame(self._jdf.intersectAll(other._jdf), self.sparkSession)
 
     @since(1.3)
     def subtract(self, other: "DataFrame") -> "DataFrame":
@@ -2305,7 +2365,7 @@ def subtract(self, other: "DataFrame") -> "DataFrame":
         This is equivalent to `EXCEPT DISTINCT` in SQL.
 
         """
-        return DataFrame(getattr(self._jdf, "except")(other._jdf), self.sql_ctx)
+        return DataFrame(getattr(self._jdf, "except")(other._jdf), self.sparkSession)
 
     def dropDuplicates(self, subset: Optional[List[str]] = None) -> "DataFrame":
         """Return a new :class:`DataFrame` with duplicate rows removed,
@@ -2350,7 +2410,7 @@ def dropDuplicates(self, subset: Optional[List[str]] = None) -> "DataFrame":
             jdf = self._jdf.dropDuplicates()
         else:
             jdf = self._jdf.dropDuplicates(self._jseq(subset))
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     def dropna(
         self,
@@ -2398,7 +2458,7 @@ def dropna(
         if thresh is None:
             thresh = len(subset) if how == "any" else 1
 
-        return DataFrame(self._jdf.na().drop(thresh, self._jseq(subset)), self.sql_ctx)
+        return DataFrame(self._jdf.na().drop(thresh, self._jseq(subset)), self.sparkSession)
 
     @overload
     def fillna(
@@ -2476,16 +2536,16 @@ def fillna(
             value = float(value)
 
         if isinstance(value, dict):
-            return DataFrame(self._jdf.na().fill(value), self.sql_ctx)
+            return DataFrame(self._jdf.na().fill(value), self.sparkSession)
         elif subset is None:
-            return DataFrame(self._jdf.na().fill(value), self.sql_ctx)
+            return DataFrame(self._jdf.na().fill(value), self.sparkSession)
         else:
             if isinstance(subset, str):
                 subset = [subset]
             elif not isinstance(subset, (list, tuple)):
                 raise TypeError("subset should be a list or tuple of column names")
 
-            return DataFrame(self._jdf.na().fill(value, self._jseq(subset)), self.sql_ctx)
+            return DataFrame(self._jdf.na().fill(value, self._jseq(subset)), self.sparkSession)
 
     @overload
     def replace(
@@ -2686,10 +2746,11 @@ def all_of_(xs: Iterable) -> bool:
             raise ValueError("Mixed type replacements are not supported")
 
         if subset is None:
-            return DataFrame(self._jdf.na().replace("*", rep_dict), self.sql_ctx)
+            return DataFrame(self._jdf.na().replace("*", rep_dict), self.sparkSession)
         else:
             return DataFrame(
-                self._jdf.na().replace(self._jseq(subset), self._jmap(rep_dict)), self.sql_ctx
+                self._jdf.na().replace(self._jseq(subset), self._jmap(rep_dict)),
+                self.sparkSession,
             )
 
     @overload
@@ -2875,7 +2936,7 @@ def crosstab(self, col1: str, col2: str) -> "DataFrame":
             raise TypeError("col1 should be a string.")
         if not isinstance(col2, str):
             raise TypeError("col2 should be a string.")
-        return DataFrame(self._jdf.stat().crosstab(col1, col2), self.sql_ctx)
+        return DataFrame(self._jdf.stat().crosstab(col1, col2), self.sparkSession)
 
     def freqItems(
         self, cols: Union[List[str], Tuple[str]], support: Optional[float] = None
@@ -2909,7 +2970,9 @@ def freqItems(
             raise TypeError("cols must be a list or tuple of column names as strings.")
         if not support:
             support = 0.01
-        return DataFrame(self._jdf.stat().freqItems(_to_seq(self._sc, cols), support), self.sql_ctx)
+        return DataFrame(
+            self._jdf.stat().freqItems(_to_seq(self._sc, cols), support), self.sparkSession
+        )
 
     def withColumns(self, *colsMap: Dict[str, Column]) -> "DataFrame":
         """
@@ -2978,7 +3041,7 @@ def withColumn(self, colName: str, col: Column) -> "DataFrame":
         """
         if not isinstance(col, Column):
             raise TypeError("col should be Column")
-        return DataFrame(self._jdf.withColumn(colName, col._jc), self.sql_ctx)
+        return DataFrame(self._jdf.withColumn(colName, col._jc), self.sparkSession)
 
     def withColumnRenamed(self, existing: str, new: str) -> "DataFrame":
         """Returns a new :class:`DataFrame` by renaming an existing column.
@@ -2998,7 +3061,7 @@ def withColumnRenamed(self, existing: str, new: str) -> "DataFrame":
         >>> df.withColumnRenamed('age', 'age2').collect()
         [Row(age2=2, name='Alice'), Row(age2=5, name='Bob')]
         """
-        return DataFrame(self._jdf.withColumnRenamed(existing, new), self.sql_ctx)
+        return DataFrame(self._jdf.withColumnRenamed(existing, new), self.sparkSession)
 
     def withMetadata(self, columnName: str, metadata: Dict[str, Any]) -> "DataFrame":
         """Returns a new :class:`DataFrame` by updating an existing column with metadata.
@@ -3023,7 +3086,7 @@ def withMetadata(self, columnName: str, metadata: Dict[str, Any]) -> "DataFrame"
         sc = SparkContext._active_spark_context
         assert sc is not None and sc._jvm is not None
         jmeta = sc._jvm.org.apache.spark.sql.types.Metadata.fromJson(json.dumps(metadata))
-        return DataFrame(self._jdf.withMetadata(columnName, jmeta), self.sql_ctx)
+        return DataFrame(self._jdf.withMetadata(columnName, jmeta), self.sparkSession)
 
     @overload
     def drop(self, cols: "ColumnOrName") -> "DataFrame":
@@ -3075,7 +3138,7 @@ def drop(self, *cols: "ColumnOrName") -> "DataFrame":  # type: ignore[misc]
                     raise TypeError("each col in the param list should be a string")
             jdf = self._jdf.drop(self._jseq(cols))
 
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     def toDF(self, *cols: "ColumnOrName") -> "DataFrame":
         """Returns a new :class:`DataFrame` that with new specified column names
@@ -3091,7 +3154,7 @@ def toDF(self, *cols: "ColumnOrName") -> "DataFrame":
         [Row(f1=2, f2='Alice'), Row(f1=5, f2='Bob')]
         """
         jdf = self._jdf.toDF(self._jseq(cols))
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     def transform(self, func: Callable[..., "DataFrame"], *args: Any, **kwargs: Any) -> "DataFrame":
         """Returns a new :class:`DataFrame`. Concise syntax for chaining custom transformations.
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 2dfaec8a9403a..d79da47d3a3ce 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1105,7 +1105,7 @@ def broadcast(df: DataFrame) -> DataFrame:
 
     sc = SparkContext._active_spark_context
     assert sc is not None and sc._jvm is not None
-    return DataFrame(sc._jvm.functions.broadcast(df._jdf), df.sql_ctx)
+    return DataFrame(sc._jvm.functions.broadcast(df._jdf), df.sparkSession)
 
 
 def coalesce(*cols: "ColumnOrName") -> Column:
diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py
index 485e01776f872..802d34d020893 100644
--- a/python/pyspark/sql/group.py
+++ b/python/pyspark/sql/group.py
@@ -22,7 +22,7 @@
 from py4j.java_gateway import JavaObject  # type: ignore[import]
 
 from pyspark.sql.column import Column, _to_seq
-from pyspark.sql.context import SQLContext
+from pyspark.sql.session import SparkSession
 from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.pandas.group_ops import PandasGroupedOpsMixin
 from pyspark.sql.types import StructType, StructField, IntegerType, StringType
@@ -37,7 +37,7 @@ def dfapi(f: Callable) -> Callable:
     def _api(self: "GroupedData") -> DataFrame:
         name = f.__name__
         jdf = getattr(self._jgd, name)()
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.session)
 
     _api.__name__ = f.__name__
     _api.__doc__ = f.__doc__
@@ -47,8 +47,8 @@ def _api(self: "GroupedData") -> DataFrame:
 def df_varargs_api(f: Callable) -> Callable:
     def _api(self: "GroupedData", *cols: str) -> DataFrame:
         name = f.__name__
-        jdf = getattr(self._jgd, name)(_to_seq(self.sql_ctx._sc, cols))
-        return DataFrame(jdf, self.sql_ctx)
+        jdf = getattr(self._jgd, name)(_to_seq(self.session._sc, cols))
+        return DataFrame(jdf, self.session)
 
     _api.__name__ = f.__name__
     _api.__doc__ = f.__doc__
@@ -66,7 +66,7 @@ class GroupedData(PandasGroupedOpsMixin):
     def __init__(self, jgd: JavaObject, df: DataFrame):
         self._jgd = jgd
         self._df = df
-        self.sql_ctx: SQLContext = df.sql_ctx
+        self.session: SparkSession = df.sparkSession
 
     @overload
     def agg(self, *exprs: Column) -> DataFrame:
@@ -134,8 +134,8 @@ def agg(self, *exprs: Union[Column, Dict[str, str]]) -> DataFrame:
             # Columns
             assert all(isinstance(c, Column) for c in exprs), "all exprs should be Column"
             exprs = cast(Tuple[Column, ...], exprs)
-            jdf = self._jgd.agg(exprs[0]._jc, _to_seq(self.sql_ctx._sc, [c._jc for c in exprs[1:]]))
-        return DataFrame(jdf, self.sql_ctx)
+            jdf = self._jgd.agg(exprs[0]._jc, _to_seq(self.session._sc, [c._jc for c in exprs[1:]]))
+        return DataFrame(jdf, self.session)
 
     @dfapi
     def count(self) -> DataFrame:
diff --git a/python/pyspark/sql/observation.py b/python/pyspark/sql/observation.py
index e5d426ab4c61e..951b0f4c83a0c 100644
--- a/python/pyspark/sql/observation.py
+++ b/python/pyspark/sql/observation.py
@@ -109,7 +109,7 @@ def _on(self, df: DataFrame, *exprs: Column) -> DataFrame:
         observed_df = self._jo.on(
             df._jdf, exprs[0]._jc, column._to_seq(df._sc, [c._jc for c in exprs[1:]])
         )
-        return DataFrame(observed_df, df.sql_ctx)
+        return DataFrame(observed_df, df.sparkSession)
 
     @property
     def get(self) -> Dict[str, Any]:
diff --git a/python/pyspark/sql/pandas/conversion.py b/python/pyspark/sql/pandas/conversion.py
index 33a405838cc91..fbb5183c45ed9 100644
--- a/python/pyspark/sql/pandas/conversion.py
+++ b/python/pyspark/sql/pandas/conversion.py
@@ -43,6 +43,7 @@
 if TYPE_CHECKING:
     import numpy as np
     import pyarrow as pa
+    from py4j.java_gateway import JavaObject
 
     from pyspark.sql.pandas._typing import DataFrameLike as PandasDataFrameLike
     from pyspark.sql import DataFrame
@@ -88,9 +89,10 @@ def toPandas(self) -> "PandasDataFrameLike":
         import pandas as pd
         from pandas.core.dtypes.common import is_timedelta64_dtype
 
-        timezone = self.sql_ctx._conf.sessionLocalTimeZone()  # type: ignore[attr-defined]
+        jconf = self.sparkSession._jconf
+        timezone = jconf.sessionLocalTimeZone()
 
-        if self.sql_ctx._conf.arrowPySparkEnabled():  # type: ignore[attr-defined]
+        if jconf.arrowPySparkEnabled():  # type: ignore[attr-defined]
             use_arrow = True
             try:
                 from pyspark.sql.pandas.types import to_arrow_schema
@@ -100,7 +102,7 @@ def toPandas(self) -> "PandasDataFrameLike":
                 to_arrow_schema(self.schema)
             except Exception as e:
 
-                if self.sql_ctx._conf.arrowPySparkFallbackEnabled():  # type: ignore[attr-defined]
+                if jconf.arrowPySparkFallbackEnabled():  # type: ignore[attr-defined]
                     msg = (
                         "toPandas attempted Arrow optimization because "
                         "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
@@ -134,7 +136,7 @@ def toPandas(self) -> "PandasDataFrameLike":
 
                     # Rename columns to avoid duplicated column names.
                     tmp_column_names = ["col_{}".format(i) for i in range(len(self.columns))]
-                    c = self.sql_ctx._conf
+                    c = self.sparkSession._jconf
                     self_destruct = (
                         c.arrowPySparkSelfDestructEnabled()  # type: ignore[attr-defined]
                     )
@@ -368,6 +370,8 @@ class SparkConversionMixin:
     can use this class.
     """
 
+    _jsparkSession: "JavaObject"
+
     @overload
     def createDataFrame(
         self, data: "PandasDataFrameLike", samplingRatio: Optional[float] = ...
@@ -398,20 +402,17 @@ def createDataFrame(  # type: ignore[misc]
 
         require_minimum_pandas_version()
 
-        timezone = self._wrapped._conf.sessionLocalTimeZone()  # type: ignore[attr-defined]
+        timezone = self._jconf.sessionLocalTimeZone()  # type: ignore[attr-defined]
 
         # If no schema supplied by user then get the names of columns only
         if schema is None:
             schema = [str(x) if not isinstance(x, str) else x for x in data.columns]
 
-        if (
-            self._wrapped._conf.arrowPySparkEnabled()  # type: ignore[attr-defined]
-            and len(data) > 0
-        ):
+        if self._jconf.arrowPySparkEnabled() and len(data) > 0:  # type: ignore[attr-defined]
             try:
                 return self._create_from_pandas_with_arrow(data, schema, timezone)
             except Exception as e:
-                if self._wrapped._conf.arrowPySparkFallbackEnabled():  # type: ignore[attr-defined]
+                if self._jconf.arrowPySparkFallbackEnabled():  # type: ignore[attr-defined]
                     msg = (
                         "createDataFrame attempted Arrow optimization because "
                         "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
@@ -603,25 +604,25 @@ def _create_from_pandas_with_arrow(
             for pdf_slice in pdf_slices
         ]
 
-        jsqlContext = self._wrapped._jsqlContext  # type: ignore[attr-defined]
+        jsparkSession = self._jsparkSession
 
-        safecheck = self._wrapped._conf.arrowSafeTypeConversion()  # type: ignore[attr-defined]
+        safecheck = self._jconf.arrowSafeTypeConversion()  # type: ignore[attr-defined]
         col_by_name = True  # col by name only applies to StructType columns, can't happen here
         ser = ArrowStreamPandasSerializer(timezone, safecheck, col_by_name)
 
         @no_type_check
         def reader_func(temp_filename):
-            return self._jvm.PythonSQLUtils.readArrowStreamFromFile(jsqlContext, temp_filename)
+            return self._jvm.PythonSQLUtils.readArrowStreamFromFile(jsparkSession, temp_filename)
 
         @no_type_check
         def create_RDD_server():
-            return self._jvm.ArrowRDDServer(jsqlContext)
+            return self._jvm.ArrowRDDServer(jsparkSession)
 
         # Create Spark DataFrame from Arrow stream file, using one batch per partition
         jrdd = self._sc._serialize_to_jvm(arrow_data, ser, reader_func, create_RDD_server)
         assert self._jvm is not None
-        jdf = self._jvm.PythonSQLUtils.toDataFrame(jrdd, schema.json(), jsqlContext)
-        df = DataFrame(jdf, self._wrapped)
+        jdf = self._jvm.PythonSQLUtils.toDataFrame(jrdd, schema.json(), jsparkSession)
+        df = DataFrame(jdf, self)
         df._schema = schema
         return df
 
diff --git a/python/pyspark/sql/pandas/group_ops.py b/python/pyspark/sql/pandas/group_ops.py
index 35f531f5c4d0d..e7599b1f144d5 100644
--- a/python/pyspark/sql/pandas/group_ops.py
+++ b/python/pyspark/sql/pandas/group_ops.py
@@ -214,7 +214,7 @@ def applyInPandas(
         df = self._df
         udf_column = udf(*[df[col] for col in df.columns])
         jdf = self._jgd.flatMapGroupsInPandas(udf_column._jc.expr())  # type: ignore[attr-defined]
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.session)
 
     def cogroup(self, other: "GroupedData") -> "PandasCogroupedOps":
         """
@@ -246,7 +246,6 @@ class PandasCogroupedOps:
     def __init__(self, gd1: "GroupedData", gd2: "GroupedData"):
         self._gd1 = gd1
         self._gd2 = gd2
-        self.sql_ctx = gd1.sql_ctx
 
     def applyInPandas(
         self, func: "PandasCogroupedMapFunction", schema: Union[StructType, str]
@@ -345,7 +344,7 @@ def applyInPandas(
         jdf = self._gd1._jgd.flatMapCoGroupsInPandas(  # type: ignore[attr-defined]
             self._gd2._jgd, udf_column._jc.expr()  # type: ignore[attr-defined]
         )
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self._gd1.session)
 
     @staticmethod
     def _extract_cols(gd: "GroupedData") -> List[Column]:
diff --git a/python/pyspark/sql/pandas/map_ops.py b/python/pyspark/sql/pandas/map_ops.py
index c1c29ecbc7576..c1bf6aa478dd3 100644
--- a/python/pyspark/sql/pandas/map_ops.py
+++ b/python/pyspark/sql/pandas/map_ops.py
@@ -90,7 +90,7 @@ def mapInPandas(
         )  # type: ignore[call-overload]
         udf_column = udf(*[self[col] for col in self.columns])
         jdf = self._jdf.mapInPandas(udf_column._jc.expr())  # type: ignore[operator]
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
     def mapInArrow(
         self, func: "ArrowMapIterFunction", schema: Union[StructType, str]
@@ -153,7 +153,7 @@ def mapInArrow(
         )  # type: ignore[call-overload]
         udf_column = udf(*[self[col] for col in self.columns])
         jdf = self._jdf.pythonMapInArrow(udf_column._jc.expr())
-        return DataFrame(jdf, self.sql_ctx)
+        return DataFrame(jdf, self.sparkSession)
 
 
 def _test() -> None:
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index df4a0891dcc71..8c729c6635814 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -27,7 +27,7 @@
 
 if TYPE_CHECKING:
     from pyspark.sql._typing import OptionalPrimitiveType, ColumnOrName
-    from pyspark.sql.context import SQLContext
+    from pyspark.sql.session import SparkSession
     from pyspark.sql.dataframe import DataFrame
     from pyspark.sql.streaming import StreamingQuery
 
@@ -62,8 +62,8 @@ class DataFrameReader(OptionUtils):
     .. versionadded:: 1.4
     """
 
-    def __init__(self, spark: "SQLContext"):
-        self._jreader = spark._ssql_ctx.read()  # type: ignore[attr-defined]
+    def __init__(self, spark: "SparkSession"):
+        self._jreader = spark._jsparkSession.read()  # type: ignore[attr-defined]
         self._spark = spark
 
     def _df(self, jdf: JavaObject) -> "DataFrame":
@@ -560,7 +560,7 @@ def func(iterator):
             # There aren't any jvm api for creating a dataframe from rdd storing csv.
             # We can do it through creating a jvm dataset firstly and using the jvm api
             # for creating a dataframe from dataset storing csv.
-            jdataset = self._spark._ssql_ctx.createDataset(
+            jdataset = self._spark._jsparkSession.createDataset(
                 jrdd.rdd(), self._spark._jvm.Encoders.STRING()
             )
             return self._df(self._jreader.csv(jdataset))
@@ -737,7 +737,7 @@ class DataFrameWriter(OptionUtils):
 
     def __init__(self, df: "DataFrame"):
         self._df = df
-        self._spark = df.sql_ctx
+        self._spark = df.sparkSession
         self._jwrite = df._jdf.write()  # type: ignore[operator]
 
     def _sq(self, jsq: JavaObject) -> "StreamingQuery":
@@ -1360,7 +1360,7 @@ class DataFrameWriterV2:
 
     def __init__(self, df: "DataFrame", table: str):
         self._df = df
-        self._spark = df.sql_ctx
+        self._spark = df.sparkSession
         self._jwriter = df._jdf.writeTo(table)  # type: ignore[operator]
 
     @since(3.1)
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 233d5298c4494..a41ad156a8596 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -230,11 +230,6 @@ def enableHiveSupport(self) -> "SparkSession.Builder":
             """
             return self.config("spark.sql.catalogImplementation", "hive")
 
-        def _sparkContext(self, sc: SparkContext) -> "SparkSession.Builder":
-            with self._lock:
-                self._sc = sc
-                return self
-
         def getOrCreate(self) -> "SparkSession":
             """Gets an existing :class:`SparkSession` or, if there is no existing one, creates a
             new one based on the options set in this builder.
@@ -267,14 +262,11 @@ def getOrCreate(self) -> "SparkSession":
 
                 session = SparkSession._instantiatedSession
                 if session is None or session._sc._jsc is None:  # type: ignore[attr-defined]
-                    if self._sc is not None:
-                        sc = self._sc
-                    else:
-                        sparkConf = SparkConf()
-                        for key, value in self._options.items():
-                            sparkConf.set(key, value)
-                        # This SparkContext may be an existing one.
-                        sc = SparkContext.getOrCreate(sparkConf)
+                    sparkConf = SparkConf()
+                    for key, value in self._options.items():
+                        sparkConf.set(key, value)
+                    # This SparkContext may be an existing one.
+                    sc = SparkContext.getOrCreate(sparkConf)
                     # Do not update `SparkConf` for existing `SparkContext`, as it's shared
                     # by all sessions.
                     session = SparkSession(sc, options=self._options)
@@ -296,8 +288,6 @@ def __init__(
         jsparkSession: Optional[JavaObject] = None,
         options: Dict[str, Any] = {},
     ):
-        from pyspark.sql.context import SQLContext
-
         self._sc = sparkContext
         self._jsc = self._sc._jsc
         self._jvm = self._sc._jvm
@@ -320,8 +310,6 @@ def __init__(
                 jsparkSession, options
             )
         self._jsparkSession = jsparkSession
-        self._jwrapped = self._jsparkSession.sqlContext()
-        self._wrapped = SQLContext(self._sc, self, self._jwrapped)
         _monkey_patch_RDD(self)
         install_exception_handler()
         # If we had an instantiated SparkSession attached with a SparkContext
@@ -348,6 +336,11 @@ def _repr_html_(self) -> str:
             sc_HTML=self.sparkContext._repr_html_(),  # type: ignore[attr-defined]
         )
 
+    @property
+    def _jconf(self) -> "JavaObject":
+        """Accessor for the JVM SQL-specific configurations"""
+        return self._jsparkSession.sessionState().conf()
+
     @since(2.0)
     def newSession(self) -> "SparkSession":
         """
@@ -498,7 +491,7 @@ def range(
         else:
             jdf = self._jsparkSession.range(int(start), int(end), int(step), int(numPartitions))
 
-        return DataFrame(jdf, self._wrapped)
+        return DataFrame(jdf, self)
 
     def _inferSchemaFromList(
         self, data: Iterable[Any], names: Optional[List[str]] = None
@@ -519,7 +512,7 @@ def _inferSchemaFromList(
         """
         if not data:
             raise ValueError("can not infer schema from empty dataset")
-        infer_dict_as_struct = self._wrapped._conf.inferDictAsStruct()  # type: ignore[attr-defined]
+        infer_dict_as_struct = self._jconf.inferDictAsStruct()  # type: ignore[attr-defined]
         prefer_timestamp_ntz = is_timestamp_ntz_preferred()
         schema = reduce(
             _merge_type,
@@ -554,7 +547,7 @@ def _inferSchema(
         if not first:
             raise ValueError("The first row in RDD is empty, " "can not infer schema")
 
-        infer_dict_as_struct = self._wrapped._conf.inferDictAsStruct()  # type: ignore[attr-defined]
+        infer_dict_as_struct = self._jconf.inferDictAsStruct()  # type: ignore[attr-defined]
         prefer_timestamp_ntz = is_timestamp_ntz_preferred()
         if samplingRatio is None:
             schema = _infer_schema(
@@ -684,14 +677,20 @@ def _create_shell_session() -> "SparkSession":
         return SparkSession._getActiveSessionOrCreate()
 
     @staticmethod
-    def _getActiveSessionOrCreate() -> "SparkSession":
+    def _getActiveSessionOrCreate(**static_conf: Any) -> "SparkSession":
         """
         Returns the active :class:`SparkSession` for the current thread, returned by the builder,
         or if there is no existing one, creates a new one based on the options set in the builder.
+
+        NOTE that 'static_conf' might not be set if there's an active or default Spark session
+        running.
         """
         spark = SparkSession.getActiveSession()
         if spark is None:
-            spark = SparkSession.builder.getOrCreate()
+            builder = SparkSession.builder
+            for k, v in static_conf.items():
+                builder = builder.config(k, v)
+            spark = builder.getOrCreate()
         return spark
 
     @overload
@@ -940,7 +939,7 @@ def prepare(obj: Any) -> Any:
             rdd._to_java_object_rdd()  # type: ignore[attr-defined]
         )
         jdf = self._jsparkSession.applySchemaToPythonRDD(jrdd.rdd(), struct.json())
-        df = DataFrame(jdf, self._wrapped)
+        df = DataFrame(jdf, self)
         df._schema = struct
         return df
 
@@ -1034,7 +1033,7 @@ def sql(self, sqlQuery: str, **kwargs: Any) -> DataFrame:
         if len(kwargs) > 0:
             sqlQuery = formatter.format(sqlQuery, **kwargs)
         try:
-            return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)
+            return DataFrame(self._jsparkSession.sql(sqlQuery), self)
         finally:
             if len(kwargs) > 0:
                 formatter.clear()
@@ -1055,7 +1054,7 @@ def table(self, tableName: str) -> DataFrame:
         >>> sorted(df.collect()) == sorted(df2.collect())
         True
         """
-        return DataFrame(self._jsparkSession.table(tableName), self._wrapped)
+        return DataFrame(self._jsparkSession.table(tableName), self)
 
     @property
     def read(self) -> DataFrameReader:
@@ -1069,7 +1068,7 @@ def read(self) -> DataFrameReader:
         -------
         :class:`DataFrameReader`
         """
-        return DataFrameReader(self._wrapped)
+        return DataFrameReader(self)
 
     @property
     def readStream(self) -> DataStreamReader:
@@ -1087,7 +1086,7 @@ def readStream(self) -> DataStreamReader:
         -------
         :class:`DataStreamReader`
         """
-        return DataStreamReader(self._wrapped)
+        return DataStreamReader(self)
 
     @property
     def streams(self) -> "StreamingQueryManager":
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index de68ccc3d9a00..7cff8d0e52181 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -29,7 +29,7 @@
 from pyspark.sql.utils import ForeachBatchFunction, StreamingQueryException
 
 if TYPE_CHECKING:
-    from pyspark.sql import SQLContext
+    from pyspark.sql.session import SparkSession
     from pyspark.sql._typing import SupportsProcess, OptionalPrimitiveType
     from pyspark.sql.dataframe import DataFrame
 
@@ -316,8 +316,8 @@ class DataStreamReader(OptionUtils):
     This API is evolving.
     """
 
-    def __init__(self, spark: "SQLContext") -> None:
-        self._jreader = spark._ssql_ctx.readStream()
+    def __init__(self, spark: "SparkSession") -> None:
+        self._jreader = spark._jsparkSession.readStream()
         self._spark = spark
 
     def _df(self, jdf: JavaObject) -> "DataFrame":
@@ -856,7 +856,7 @@ class DataStreamWriter:
 
     def __init__(self, df: "DataFrame") -> None:
         self._df = df
-        self._spark = df.sql_ctx
+        self._spark = df.sparkSession
         self._jwrite = df._jdf.writeStream()
 
     def _sq(self, jsq: JavaObject) -> StreamingQuery:
diff --git a/python/pyspark/sql/tests/test_session.py b/python/pyspark/sql/tests/test_session.py
index 1262e529b9ccc..91aa923768fc2 100644
--- a/python/pyspark/sql/tests/test_session.py
+++ b/python/pyspark/sql/tests/test_session.py
@@ -224,28 +224,26 @@ def tearDown(self):
     def test_sqlcontext_with_stopped_sparksession(self):
         # SPARK-30856: test that SQLContext.getOrCreate() returns a usable instance after
         # the SparkSession is restarted.
-        sql_context = self.spark._wrapped
+        sql_context = SQLContext.getOrCreate(self.spark.sparkContext)
         self.spark.stop()
-        sc = SparkContext("local[4]", self.sc.appName)
-        spark = SparkSession(sc)  # Instantiate the underlying SQLContext
-        new_sql_context = spark._wrapped
+        spark = SparkSession.builder.master("local[4]").appName(self.sc.appName).getOrCreate()
+        new_sql_context = SQLContext.getOrCreate(spark.sparkContext)
 
         self.assertIsNot(new_sql_context, sql_context)
-        self.assertIs(SQLContext.getOrCreate(sc).sparkSession, spark)
+        self.assertIs(SQLContext.getOrCreate(spark.sparkContext).sparkSession, spark)
         try:
             df = spark.createDataFrame([(1, 2)], ["c", "c"])
             df.collect()
         finally:
             spark.stop()
             self.assertIsNone(SQLContext._instantiatedContext)
-            sc.stop()
 
     def test_sqlcontext_with_stopped_sparkcontext(self):
         # SPARK-30856: test initialization via SparkSession when only the SparkContext is stopped
         self.sc.stop()
-        self.sc = SparkContext("local[4]", self.sc.appName)
-        self.spark = SparkSession(self.sc)
-        self.assertIs(SQLContext.getOrCreate(self.sc).sparkSession, self.spark)
+        spark = SparkSession.builder.master("local[4]").appName(self.sc.appName).getOrCreate()
+        self.sc = spark.sparkContext
+        self.assertIs(SQLContext.getOrCreate(self.sc).sparkSession, spark)
 
     def test_get_sqlcontext_with_stopped_sparkcontext(self):
         # SPARK-30856: test initialization via SQLContext.getOrCreate() when only the SparkContext
diff --git a/python/pyspark/sql/tests/test_streaming.py b/python/pyspark/sql/tests/test_streaming.py
index 87e35641f648a..4920423be228b 100644
--- a/python/pyspark/sql/tests/test_streaming.py
+++ b/python/pyspark/sql/tests/test_streaming.py
@@ -86,7 +86,7 @@ def test_stream_save_options(self):
             .load("python/test_support/sql/streaming")
             .withColumn("id", lit(1))
         )
-        for q in self.spark._wrapped.streams.active:
+        for q in self.spark.streams.active:
             q.stop()
         tmpPath = tempfile.mkdtemp()
         shutil.rmtree(tmpPath)
@@ -117,7 +117,7 @@ def test_stream_save_options(self):
 
     def test_stream_save_options_overwrite(self):
         df = self.spark.readStream.format("text").load("python/test_support/sql/streaming")
-        for q in self.spark._wrapped.streams.active:
+        for q in self.spark.streams.active:
             q.stop()
         tmpPath = tempfile.mkdtemp()
         shutil.rmtree(tmpPath)
@@ -154,7 +154,7 @@ def test_stream_save_options_overwrite(self):
 
     def test_stream_status_and_progress(self):
         df = self.spark.readStream.format("text").load("python/test_support/sql/streaming")
-        for q in self.spark._wrapped.streams.active:
+        for q in self.spark.streams.active:
             q.stop()
         tmpPath = tempfile.mkdtemp()
         shutil.rmtree(tmpPath)
@@ -198,7 +198,7 @@ def func(x):
 
     def test_stream_await_termination(self):
         df = self.spark.readStream.format("text").load("python/test_support/sql/streaming")
-        for q in self.spark._wrapped.streams.active:
+        for q in self.spark.streams.active:
             q.stop()
         tmpPath = tempfile.mkdtemp()
         shutil.rmtree(tmpPath)
@@ -267,7 +267,7 @@ def _assert_exception_tree_contains_msg(self, exception, msg):
 
     def test_query_manager_await_termination(self):
         df = self.spark.readStream.format("text").load("python/test_support/sql/streaming")
-        for q in self.spark._wrapped.streams.active:
+        for q in self.spark.streams.active:
             q.stop()
         tmpPath = tempfile.mkdtemp()
         shutil.rmtree(tmpPath)
@@ -280,13 +280,13 @@ def test_query_manager_await_termination(self):
         try:
             self.assertTrue(q.isActive)
             try:
-                self.spark._wrapped.streams.awaitAnyTermination("hello")
+                self.spark.streams.awaitAnyTermination("hello")
                 self.fail("Expected a value exception")
             except ValueError:
                 pass
             now = time.time()
             # test should take at least 2 seconds
-            res = self.spark._wrapped.streams.awaitAnyTermination(2.6)
+            res = self.spark.streams.awaitAnyTermination(2.6)
             duration = time.time() - now
             self.assertTrue(duration >= 2)
             self.assertFalse(res)
@@ -347,7 +347,7 @@ def assert_invalid_writer(self, writer, msg=None):
                 self.stop_all()
 
         def stop_all(self):
-            for q in self.spark._wrapped.streams.active:
+            for q in self.spark.streams.active:
                 q.stop()
 
         def _reset(self):
diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index a092d67df17de..0e9d7661e2d94 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -22,7 +22,7 @@
 import unittest
 import datetime
 
-from pyspark import SparkContext
+from pyspark import SparkContext, SQLContext
 from pyspark.sql import SparkSession, Column, Row
 from pyspark.sql.functions import udf, assert_true, lit
 from pyspark.sql.udf import UserDefinedFunction
@@ -79,7 +79,7 @@ def test_udf(self):
         self.assertEqual(row[0], 5)
 
         # This is to check if a deprecated 'SQLContext.registerFunction' can call its alias.
-        sqlContext = self.spark._wrapped
+        sqlContext = SQLContext.getOrCreate(self.spark.sparkContext)
         sqlContext.registerFunction("oneArg", lambda x: len(x), IntegerType())
         [row] = sqlContext.sql("SELECT oneArg('test')").collect()
         self.assertEqual(row[0], 4)
@@ -372,7 +372,7 @@ def test_udf_registration_returns_udf(self):
         )
 
         # This is to check if a 'SQLContext.udf' can call its alias.
-        sqlContext = self.spark._wrapped
+        sqlContext = SQLContext.getOrCreate(self.spark.sparkContext)
         add_four = sqlContext.udf.register("add_four", lambda x: x + 4, IntegerType())
 
         self.assertListEqual(
@@ -419,7 +419,7 @@ def test_non_existed_udf(self):
         )
 
         # This is to check if a deprecated 'SQLContext.registerJavaFunction' can call its alias.
-        sqlContext = spark._wrapped
+        sqlContext = SQLContext.getOrCreate(self.spark.sparkContext)
         self.assertRaisesRegex(
             AnalysisException,
             "Can not load class non_existed_udf",
diff --git a/python/pyspark/sql/tests/test_udf_profiler.py b/python/pyspark/sql/tests/test_udf_profiler.py
index 27d9458509402..136f423d0a35c 100644
--- a/python/pyspark/sql/tests/test_udf_profiler.py
+++ b/python/pyspark/sql/tests/test_udf_profiler.py
@@ -21,7 +21,7 @@
 import sys
 from io import StringIO
 
-from pyspark import SparkConf, SparkContext
+from pyspark import SparkConf
 from pyspark.sql import SparkSession
 from pyspark.sql.functions import udf
 from pyspark.profiler import UDFBasicProfiler
@@ -32,8 +32,13 @@ def setUp(self):
         self._old_sys_path = list(sys.path)
         class_name = self.__class__.__name__
         conf = SparkConf().set("spark.python.profile", "true")
-        self.sc = SparkContext("local[4]", class_name, conf=conf)
-        self.spark = SparkSession.builder._sparkContext(self.sc).getOrCreate()
+        self.spark = (
+            SparkSession.builder.master("local[4]")
+            .config(conf=conf)
+            .appName(class_name)
+            .getOrCreate()
+        )
+        self.sc = self.spark.sparkContext
 
     def tearDown(self):
         self.spark.stop()
diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
index 15645d0085d40..b5abe6891d2cb 100644
--- a/python/pyspark/sql/utils.py
+++ b/python/pyspark/sql/utils.py
@@ -30,7 +30,7 @@
 from pyspark.find_spark_home import _find_spark_home
 
 if TYPE_CHECKING:
-    from pyspark.sql.context import SQLContext
+    from pyspark.sql.session import SparkSession
     from pyspark.sql.dataframe import DataFrame
 
 
@@ -258,15 +258,15 @@ class ForeachBatchFunction:
     the query is active.
     """
 
-    def __init__(self, sql_ctx: "SQLContext", func: Callable[["DataFrame", int], None]):
-        self.sql_ctx = sql_ctx
+    def __init__(self, session: "SparkSession", func: Callable[["DataFrame", int], None]):
         self.func = func
+        self.session = session
 
     def call(self, jdf: JavaObject, batch_id: int) -> None:
         from pyspark.sql.dataframe import DataFrame
 
         try:
-            self.func(DataFrame(jdf, self.sql_ctx), batch_id)
+            self.func(DataFrame(jdf, self.session), batch_id)
         except Exception as e:
             self.error = e
             raise e
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
index 490ab9f8956cb..ab43aa49944c8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
@@ -24,7 +24,7 @@ import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.api.python.PythonRDDServer
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Column, DataFrame, SQLContext}
+import org.apache.spark.sql.{Column, DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.expressions.{CastTimestampNTZToLong, ExpressionInfo}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
@@ -59,8 +59,8 @@ private[sql] object PythonSQLUtils extends Logging {
    * Python callable function to read a file in Arrow stream format and create a [[RDD]]
    * using each serialized ArrowRecordBatch as a partition.
    */
-  def readArrowStreamFromFile(sqlContext: SQLContext, filename: String): JavaRDD[Array[Byte]] = {
-    ArrowConverters.readArrowStreamFromFile(sqlContext, filename)
+  def readArrowStreamFromFile(session: SparkSession, filename: String): JavaRDD[Array[Byte]] = {
+    ArrowConverters.readArrowStreamFromFile(session, filename)
   }
 
   /**
@@ -70,8 +70,8 @@ private[sql] object PythonSQLUtils extends Logging {
   def toDataFrame(
       arrowBatchRDD: JavaRDD[Array[Byte]],
       schemaString: String,
-      sqlContext: SQLContext): DataFrame = {
-    ArrowConverters.toDataFrame(arrowBatchRDD, schemaString, sqlContext)
+      session: SparkSession): DataFrame = {
+    ArrowConverters.toDataFrame(arrowBatchRDD, schemaString, session)
   }
 
   def explainString(queryExecution: QueryExecution, mode: String): String = {
@@ -85,13 +85,13 @@ private[sql] object PythonSQLUtils extends Logging {
  * Helper for making a dataframe from arrow data from data sent from python over a socket.  This is
  * used when encryption is enabled, and we don't want to write data to a file.
  */
-private[sql] class ArrowRDDServer(sqlContext: SQLContext) extends PythonRDDServer {
+private[sql] class ArrowRDDServer(session: SparkSession) extends PythonRDDServer {
 
   override protected def streamToRDD(input: InputStream): RDD[Array[Byte]] = {
     // Create array to consume iterator so that we can safely close the inputStream
     val batches = ArrowConverters.getBatchesFromStream(Channels.newChannel(input)).toArray
     // Parallelize the record batches to create an RDD
-    JavaRDD.fromRDD(sqlContext.sparkContext.parallelize(batches, batches.length))
+    JavaRDD.fromRDD(session.sparkContext.parallelize(batches, batches.length))
   }
 
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
index befaea24e0002..7831ddee4f9b6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
@@ -230,7 +230,7 @@ private[sql] object SQLUtils extends Logging {
   def readArrowStreamFromFile(
       sparkSession: SparkSession,
       filename: String): JavaRDD[Array[Byte]] = {
-    ArrowConverters.readArrowStreamFromFile(sparkSession.sqlContext, filename)
+    ArrowConverters.readArrowStreamFromFile(sparkSession, filename)
   }
 
   /**
@@ -241,6 +241,6 @@ private[sql] object SQLUtils extends Logging {
       arrowBatchRDD: JavaRDD[Array[Byte]],
       schema: StructType,
       sparkSession: SparkSession): DataFrame = {
-    ArrowConverters.toDataFrame(arrowBatchRDD, schema.json, sparkSession.sqlContext)
+    ArrowConverters.toDataFrame(arrowBatchRDD, schema.json, sparkSession)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
index 8e22c429c24e4..93ff276529dad 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
@@ -31,7 +31,7 @@ import org.apache.arrow.vector.ipc.message.{ArrowRecordBatch, IpcOption, Message
 import org.apache.spark.TaskContext
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.network.util.JavaUtils
-import org.apache.spark.sql.{DataFrame, SQLContext}
+import org.apache.spark.sql.{DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.ArrowUtils
@@ -195,27 +195,27 @@ private[sql] object ArrowConverters {
   private[sql] def toDataFrame(
       arrowBatchRDD: JavaRDD[Array[Byte]],
       schemaString: String,
-      sqlContext: SQLContext): DataFrame = {
+      session: SparkSession): DataFrame = {
     val schema = DataType.fromJson(schemaString).asInstanceOf[StructType]
-    val timeZoneId = sqlContext.sessionState.conf.sessionLocalTimeZone
+    val timeZoneId = session.sessionState.conf.sessionLocalTimeZone
     val rdd = arrowBatchRDD.rdd.mapPartitions { iter =>
       val context = TaskContext.get()
       ArrowConverters.fromBatchIterator(iter, schema, timeZoneId, context)
     }
-    sqlContext.internalCreateDataFrame(rdd.setName("arrow"), schema)
+    session.internalCreateDataFrame(rdd.setName("arrow"), schema)
   }
 
   /**
    * Read a file as an Arrow stream and parallelize as an RDD of serialized ArrowRecordBatches.
    */
   private[sql] def readArrowStreamFromFile(
-      sqlContext: SQLContext,
+      session: SparkSession,
       filename: String): JavaRDD[Array[Byte]] = {
     Utils.tryWithResource(new FileInputStream(filename)) { fileStream =>
       // Create array to consume iterator so that we can safely close the file
       val batches = getBatchesFromStream(fileStream.getChannel).toArray
       // Parallelize the record batches to create an RDD
-      JavaRDD.fromRDD(sqlContext.sparkContext.parallelize(batches, batches.length))
+      JavaRDD.fromRDD(session.sparkContext.parallelize(batches, batches.length))
     }
   }
 

From 0be132c128e80bc9d866001a64cb3f6331c85b1e Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Tue, 15 Feb 2022 11:47:42 +0900
Subject: [PATCH 230/513] [SPARK-38124][SS][FOLLOWUP] Document the current
 challenge on fixing distribution of stateful operator

### What changes were proposed in this pull request?

This PR proposes to add the context of current challenge on fixing distribution of stateful operator, even the distribution is a sort of "broken" now.

This PR addresses the review comment https://github.com/apache/spark/pull/35419#discussion_r801343068

### Why are the changes needed?

In SPARK-38124 we figured out the existing long-standing problem in stateful operator, but it is not easy to fix since the fix may break the existing query if the fix is not carefully designed. Anyone should also be pretty much careful when touching the required distribution. We want to document this explicitly to help others to be careful whenever someone is around the codebase.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Code comment only changes.

Closes #35512 from HeartSaVioR/SPARK-38124-followup.

Authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../catalyst/plans/physical/partitioning.scala |  8 ++++++++
 .../streaming/FlatMapGroupsWithStateExec.scala |  3 +++
 .../streaming/statefulOperators.scala          | 18 +++++++++++++++++-
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
index 4418d3253a8b5..5342c8ee6d672 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
@@ -101,6 +101,14 @@ case class ClusteredDistribution(
  * Since this distribution relies on [[HashPartitioning]] on the physical partitioning of the
  * stateful operator, only [[HashPartitioning]] (and HashPartitioning in
  * [[PartitioningCollection]]) can satisfy this distribution.
+ *
+ * NOTE: This is applied only to stream-stream join as of now. For other stateful operators, we
+ * have been using ClusteredDistribution, which could construct the physical partitioning of the
+ * state in different way (ClusteredDistribution requires relaxed condition and multiple
+ * partitionings can satisfy the requirement.) We need to construct the way to fix this with
+ * minimizing possibility to break the existing checkpoints.
+ *
+ * TODO(SPARK-38204): address the issue explained in above note.
  */
 case class StatefulOpClusteredDistribution(
     expressions: Seq[Expression],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
index a00a62216f3dc..93ed5916bfb2c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
@@ -93,6 +93,9 @@ case class FlatMapGroupsWithStateExec(
    * to have the same grouping so that the data are co-lacated on the same task.
    */
   override def requiredChildDistribution: Seq[Distribution] = {
+    // NOTE: Please read through the NOTE on the classdoc of StatefulOpClusteredDistribution
+    // before making any changes.
+    // TODO(SPARK-38204)
     ClusteredDistribution(groupingAttributes, stateInfo.map(_.numPartitions)) ::
     ClusteredDistribution(initialStateGroupAttrs, stateInfo.map(_.numPartitions)) ::
       Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
index 3431823765c1b..3ab2ad47e98c4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
@@ -334,6 +334,9 @@ case class StateStoreRestoreExec(
   override def outputPartitioning: Partitioning = child.outputPartitioning
 
   override def requiredChildDistribution: Seq[Distribution] = {
+    // NOTE: Please read through the NOTE on the classdoc of StatefulOpClusteredDistribution
+    // before making any changes.
+    // TODO(SPARK-38204)
     if (keyExpressions.isEmpty) {
       AllTuples :: Nil
     } else {
@@ -493,6 +496,9 @@ case class StateStoreSaveExec(
   override def outputPartitioning: Partitioning = child.outputPartitioning
 
   override def requiredChildDistribution: Seq[Distribution] = {
+    // NOTE: Please read through the NOTE on the classdoc of StatefulOpClusteredDistribution
+    // before making any changes.
+    // TODO(SPARK-38204)
     if (keyExpressions.isEmpty) {
       AllTuples :: Nil
     } else {
@@ -573,6 +579,9 @@ case class SessionWindowStateStoreRestoreExec(
   }
 
   override def requiredChildDistribution: Seq[Distribution] = {
+    // NOTE: Please read through the NOTE on the classdoc of StatefulOpClusteredDistribution
+    // before making any changes.
+    // TODO(SPARK-38204)
     ClusteredDistribution(keyWithoutSessionExpressions, stateInfo.map(_.numPartitions)) :: Nil
   }
 
@@ -684,6 +693,9 @@ case class SessionWindowStateStoreSaveExec(
   override def outputPartitioning: Partitioning = child.outputPartitioning
 
   override def requiredChildDistribution: Seq[Distribution] = {
+    // NOTE: Please read through the NOTE on the classdoc of StatefulOpClusteredDistribution
+    // before making any changes.
+    // TODO(SPARK-38204)
     ClusteredDistribution(keyExpressions, stateInfo.map(_.numPartitions)) :: Nil
   }
 
@@ -741,8 +753,12 @@ case class StreamingDeduplicateExec(
   extends UnaryExecNode with StateStoreWriter with WatermarkSupport {
 
   /** Distribute by grouping attributes */
-  override def requiredChildDistribution: Seq[Distribution] =
+  override def requiredChildDistribution: Seq[Distribution] = {
+    // NOTE: Please read through the NOTE on the classdoc of StatefulOpClusteredDistribution
+    // before making any changes.
+    // TODO(SPARK-38204)
     ClusteredDistribution(keyExpressions, stateInfo.map(_.numPartitions)) :: Nil
+  }
 
   override protected def doExecute(): RDD[InternalRow] = {
     metrics // force lazy init at driver

From b5862534edcb2b01a042f238a7b99a65c8898df8 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 15 Feb 2022 12:13:41 +0900
Subject: [PATCH 231/513] [SPARK-35173][SQL][PYTHON][FOLLOW-UP] Use
 DataFrame.sparkSession instead of DataFrame.sql_ctx in withColumns

### What changes were proposed in this pull request?

This PR is a followup for both https://github.com/apache/spark/commit/8853f286371bcc1d44762a0a8ed5bf1a40cdbbd5 and https://github.com/apache/spark/commit/88696ebcb72fd3057b1546831f653b29b7e0abb2. There was a logical conflict here:

We should make it use `DataFrame.sparkSession` instead of `DataFrame.sql_ctx`

https://github.com/apache/spark/runs/5193146096?check_suite_focus=true

```
======================================================================
FAIL: test_with_columns (pyspark.sql.tests.test_dataframe.DataFrameTests)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/__w/spark/spark/python/pyspark/sql/tests/test_dataframe.py", line 484, in test_with_columns
    keys = self.df.withColumns({"key": self.df.key}).select("key").collect()
  File "/__w/spark/spark/python/pyspark/sql/dataframe.py", line 3009, in withColumns
    self._jdf.withColumns(_to_seq(self._sc, col_names), self._jcols(*cols)), self.sql_ctx
  File "/__w/spark/spark/python/pyspark/sql/dataframe.py", line 123, in __init__
    assert not os.environ.get("SPARK_TESTING")  # Sanity check for our internal usage.
AssertionError

```

### Why are the changes needed?

To fix the build, and remove deprecated usage of `SQLContext`.

### Does this PR introduce _any_ user-facing change?

No, it's not released out yet.

### How was this patch tested?

CI should test it out.

Closes #35518 from HyukjinKwon/SPARK-35173.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/dataframe.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 610b8d6997046..b224d7f8d608e 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -3006,7 +3006,8 @@ def withColumns(self, *colsMap: Dict[str, Column]) -> "DataFrame":
         cols = list(colsMap.values())
 
         return DataFrame(
-            self._jdf.withColumns(_to_seq(self._sc, col_names), self._jcols(*cols)), self.sql_ctx
+            self._jdf.withColumns(_to_seq(self._sc, col_names), self._jcols(*cols)),
+            self.sparkSession,
         )
 
     def withColumn(self, colName: str, col: Column) -> "DataFrame":

From 5613e2f930bcc5ee19b9d18f56840e97c5f7ac45 Mon Sep 17 00:00:00 2001
From: itholic <haejoon.lee@databricks.com>
Date: Tue, 15 Feb 2022 12:46:35 +0900
Subject: [PATCH 232/513] [SPARK-38183][PYTHON] Show warning when creating
 pandas-on-Spark session under ANSI mode

### What changes were proposed in this pull request?

This PR proposes to show warning message when creating pandas-on-Spark session under ANSI mode.

The message will be shown looks like the below:
```python
>>> ps.Series(['a', 'b', 'c'])
.../spark/python/pyspark/pandas/utils.py:969: PandasAPIOnSparkAdviceWarning: The config 'spark.sql.ansi.enabled' is set to True. This can cause the unexpected behavior from pandas API on Spark since pandas API on Spark follows the behavior of pandas, not SQL.
  warnings.warn(message, PandasAPIOnSparkAdviceWarning)
```

### Why are the changes needed?

Since pandas API on Spark follows the behavior of pandas, not SQL.

So the unexpected behavior can be occurred when the ANSI mode is on (when "spark.sql.ansi.enabled" is True).

For example,

- It raises exception when `div` & `mod` related methods returns null (e.g. `DataFrame.rmod`)
```python
>>> df
   angels  degress
0       0      360
1       3      180
2       4      360
>>> df.rmod(2)
Traceback (most recent call last):
...
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 32.0 failed 1 times, most recent failure: Lost task 0.0 in stage 32.0 (TID 165) (172.30.1.44 executor driver): org.apache.spark.SparkArithmeticException: divide by zero. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error.
```

- It raises exception when DataFrame for `ps.melt` has not the same column type.
```python
>>> df
   A  B  C
0  a  1  2
1  b  3  4
2  c  5  6
>>> ps.melt(df)
Traceback (most recent call last):
...
pyspark.sql.utils.AnalysisException: cannot resolve 'array(struct('A', A), struct('B', B), struct('C', C))' due to data type mismatch: input to function array should all be the same type, but it's [struct<variable:string,value:string>, struct<variable:string,value:bigint>, struct<variable:string,value:bigint>]
To fix the error, you might need to add explicit type casts. If necessary set spark.sql.ansi.enabled to false to bypass this error.;
'Project [__index_level_0__#223L, A#224, B#225L, C#226L, __natural_order__#231L, explode(array(struct(variable, A, value, A#224), struct(variable, B, value, B#225L), struct(variable, C, value, C#226L))) AS pairs#269]
+- Project [__index_level_0__#223L, A#224, B#225L, C#226L, monotonically_increasing_id() AS __natural_order__#231L]
   +- LogicalRDD [__index_level_0__#223L, A#224, B#225L, C#226L], false
```

- It raises exception when `CategoricalIndex.remove_categories` doesn't remove the entire index
```python
>>> idx
CategoricalIndex(['a', 'b', 'b', 'c', 'c', 'c'], categories=['a', 'b', 'c'], ordered=False, dtype='category')
>>> idx.remove_categories('b')
22/02/14 09:16:14 ERROR Executor: Exception in task 2.0 in stage 41.0 (TID 215)
org.apache.spark.SparkNoSuchElementException: Key b does not exist. If necessary set spark.sql.ansi.strictIndexOperator to false to bypass this error.
...
...
```

- It raises exception when `CategoricalIndex.set_categories` doesn't set the entire index
```python
>>> idx.set_categories(['b', 'c'])
22/02/14 09:16:14 ERROR Executor: Exception in task 2.0 in stage 41.0 (TID 215)
org.apache.spark.SparkNoSuchElementException: Key a does not exist. If necessary set spark.sql.ansi.strictIndexOperator to false to bypass this error.
...
...
```

- It raises exception when `ps.to_numeric` get a non-numeric type
```python
>>> psser
0    apple
1      1.0
2        2
3       -3
dtype: object
>>> ps.to_numeric(psser)
22/02/14 09:22:36 ERROR Executor: Exception in task 2.0 in stage 63.0 (TID 328)
org.apache.spark.SparkNumberFormatException: invalid input syntax for type numeric: apple. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error.
...
```

- It raises exception when `strings.StringMethods.rsplit` - also `strings.StringMethods.split` - with `expand=True` returns null columns
```python
>>> s
0                       this is a regular sentence
1    https://docs.python.org/3/tutorial/index.html
2                                             None
dtype: object
>>> s.str.split(n=4, expand=True)
22/02/14 09:26:23 ERROR Executor: Exception in task 5.0 in stage 69.0 (TID 356)
org.apache.spark.SparkArrayIndexOutOfBoundsException: Invalid index: 1, numElements: 1. If necessary set spark.sql.ansi.strictIndexOperator to false to bypass this error.
```

- It raises exception when `as_type` with `CategoricalDtype`, and the categories of `CategoricalDtype` is not matched with data.
```python
>>> psser
0    1994-01-31
1    1994-02-01
2    1994-02-02
dtype: object
>>> cat_type
CategoricalDtype(categories=['a', 'b', 'c'], ordered=False)
>>> psser.astype(cat_type)
22/02/14 09:34:56 ERROR Executor: Exception in task 5.0 in stage 90.0 (TID 468)
org.apache.spark.SparkNoSuchElementException: Key 1994-02-01 does not exist. If necessary set spark.sql.ansi.strictIndexOperator to false to bypass this error.
```

Not only for the example cases, if the internal SQL function used to implement the function has different behavior according to ANSI options, an unexpected error may occur.

### Does this PR introduce _any_ user-facing change?

This will show warning message as mentioned above, when the session for pandas API on Spark is initialized if "spark.sql.ansi.enabled" is set as True.

### How was this patch tested?

The existing tests should be passed.

Closes #35488 from itholic/SPARK-38183.

Authored-by: itholic <haejoon.lee@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/pandas/utils.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/pandas/utils.py b/python/pyspark/pandas/utils.py
index cd79acc22a225..ac68975f3f855 100644
--- a/python/pyspark/pandas/utils.py
+++ b/python/pyspark/pandas/utils.py
@@ -467,11 +467,18 @@ def is_testing() -> bool:
 
 def default_session() -> SparkSession:
     spark = SparkSession.getActiveSession()
-    if spark is not None:
-        return spark
+    if spark is None:
+        spark = SparkSession.builder.appName("pandas-on-Spark").getOrCreate()
+
+    if spark.conf.get("spark.sql.ansi.enabled"):
+        log_advice(
+            "The config 'spark.sql.ansi.enabled' is set to True. "
+            "This can cause unexpected behavior "
+            "from pandas API on Spark since pandas API on Spark follows "
+            "the behavior of pandas, not SQL."
+        )
 
-    builder = SparkSession.builder.appName("pandas-on-Spark")
-    return builder.getOrCreate()
+    return spark
 
 
 @contextmanager

From 78514e3149bc43b2485e4be0ab982601a842600b Mon Sep 17 00:00:00 2001
From: tianlzhang <tianlzhang@ebay.com>
Date: Tue, 15 Feb 2022 12:52:37 +0900
Subject: [PATCH 233/513] [SPARK-38211][SQL][DOCS] Add SQL migration guide on
 restoring loose upcast from string to other types

### What changes were proposed in this pull request?
Add doc on restoring loose upcast from string to other types (behavior before 2.4.1) to SQL migration guide.

### Why are the changes needed?
After [SPARK-24586](https://issues.apache.org/jira/browse/SPARK-24586), loose upcasting from string to other types are not allowed by default. User can still set `spark.sql.legacy.looseUpcast=true` to restore old behavior but it's not documented.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Only doc change.

Closes #35519 from manuzhang/spark-38211.

Authored-by: tianlzhang <tianlzhang@ebay.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/sql-migration-guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 63fc51a5132db..0893f46c89dce 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -420,7 +420,7 @@ license: |
     need to specify a value with units like "30s" now, to avoid being interpreted as milliseconds; otherwise,
     the extremely short interval that results will likely cause applications to fail.
 
-  - When turning a Dataset to another Dataset, Spark will up cast the fields in the original Dataset to the type of corresponding fields in the target DataSet. In version 2.4 and earlier, this up cast is not very strict, e.g. `Seq("str").toDS.as[Int]` fails, but `Seq("str").toDS.as[Boolean]` works and throw NPE during execution. In Spark 3.0, the up cast is stricter and turning String into something else is not allowed, i.e. `Seq("str").toDS.as[Boolean]` will fail during analysis.
+  - When turning a Dataset to another Dataset, Spark will up cast the fields in the original Dataset to the type of corresponding fields in the target DataSet. In version 2.4 and earlier, this up cast is not very strict, e.g. `Seq("str").toDS.as[Int]` fails, but `Seq("str").toDS.as[Boolean]` works and throw NPE during execution. In Spark 3.0, the up cast is stricter and turning String into something else is not allowed, i.e. `Seq("str").toDS.as[Boolean]` will fail during analysis. To restore the behavior before 2.4.1, set `spark.sql.legacy.looseUpcast` to `true`.
 
 ## Upgrading from Spark SQL 2.3 to 2.4
 

From ca9bbbae75c9dc36d61debd51f16fed9ecdd31ce Mon Sep 17 00:00:00 2001
From: itholic <haejoon.lee@databricks.com>
Date: Tue, 15 Feb 2022 14:32:59 +0900
Subject: [PATCH 234/513] [SPARK-38145][PYTHON][TESTS] Make PySpark tests pass
 when "spark.sql.ansi.enabled" is True by default

### What changes were proposed in this pull request?

This PR proposes to fix the tests which are failed when `spark.sql.ansi.enabled` is `True`.

### Why are the changes needed?

Tests should always be passed regardless of changing option values.

### Does this PR introduce _any_ user-facing change?

No. It's test only.

### How was this patch tested?

The existing CI should be passed.

Closes #35454 from itholic/SPARK-38145.

Authored-by: itholic <haejoon.lee@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/pandas/utils.py         |   4 +
 python/pyspark/sql/dataframe.py        | 104 ++++++++++++-------------
 python/pyspark/sql/functions.py        |  22 ++++--
 python/pyspark/sql/tests/test_types.py |  20 ++++-
 4 files changed, 86 insertions(+), 64 deletions(-)

diff --git a/python/pyspark/pandas/utils.py b/python/pyspark/pandas/utils.py
index ac68975f3f855..95a7eec2d5ec3 100644
--- a/python/pyspark/pandas/utils.py
+++ b/python/pyspark/pandas/utils.py
@@ -470,6 +470,10 @@ def default_session() -> SparkSession:
     if spark is None:
         spark = SparkSession.builder.appName("pandas-on-Spark").getOrCreate()
 
+    # Turn ANSI off when testing the pandas API on Spark since
+    # the behavior of pandas API on Spark follows pandas, not SQL.
+    if is_testing():
+        spark.conf.set("spark.sql.ansi.enabled", False)  # type: ignore[arg-type]
     if spark.conf.get("spark.sql.ansi.enabled"):
         log_advice(
             "The config 'spark.sql.ansi.enabled' is set to True. "
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index b224d7f8d608e..4b778bdc2005a 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1799,26 +1799,31 @@ def describe(self, *cols: Union[str, List[str]]) -> "DataFrame":
 
         Examples
         --------
+        >>> df = spark.createDataFrame(
+        ...     [("Bob", 13, 40.3, 150.5), ("Alice", 12, 37.8, 142.3), ("Tom", 11, 44.1, 142.2)],
+        ...     ["name", "age", "weight", "height"],
+        ... )
         >>> df.describe(['age']).show()
-        +-------+------------------+
-        |summary|               age|
-        +-------+------------------+
-        |  count|                 2|
-        |   mean|               3.5|
-        | stddev|2.1213203435596424|
-        |    min|                 2|
-        |    max|                 5|
-        +-------+------------------+
-        >>> df.describe().show()
-        +-------+------------------+-----+
-        |summary|               age| name|
-        +-------+------------------+-----+
-        |  count|                 2|    2|
-        |   mean|               3.5| null|
-        | stddev|2.1213203435596424| null|
-        |    min|                 2|Alice|
-        |    max|                 5|  Bob|
-        +-------+------------------+-----+
+        +-------+----+
+        |summary| age|
+        +-------+----+
+        |  count|   3|
+        |   mean|12.0|
+        | stddev| 1.0|
+        |    min|  11|
+        |    max|  13|
+        +-------+----+
+
+        >>> df.describe(['age', 'weight', 'height']).show()
+        +-------+----+------------------+-----------------+
+        |summary| age|            weight|           height|
+        +-------+----+------------------+-----------------+
+        |  count|   3|                 3|                3|
+        |   mean|12.0| 40.73333333333333|            145.0|
+        | stddev| 1.0|3.1722757341273704|4.763402145525822|
+        |    min|  11|              37.8|            142.2|
+        |    max|  13|              44.1|            150.5|
+        +-------+----+------------------+-----------------+
 
         See Also
         --------
@@ -1851,39 +1856,34 @@ def summary(self, *statistics: str) -> "DataFrame":
 
         Examples
         --------
-        >>> df.summary().show()
-        +-------+------------------+-----+
-        |summary|               age| name|
-        +-------+------------------+-----+
-        |  count|                 2|    2|
-        |   mean|               3.5| null|
-        | stddev|2.1213203435596424| null|
-        |    min|                 2|Alice|
-        |    25%|                 2| null|
-        |    50%|                 2| null|
-        |    75%|                 5| null|
-        |    max|                 5|  Bob|
-        +-------+------------------+-----+
-
-        >>> df.summary("count", "min", "25%", "75%", "max").show()
-        +-------+---+-----+
-        |summary|age| name|
-        +-------+---+-----+
-        |  count|  2|    2|
-        |    min|  2|Alice|
-        |    25%|  2| null|
-        |    75%|  5| null|
-        |    max|  5|  Bob|
-        +-------+---+-----+
-
-        To do a summary for specific columns first select them:
-
-        >>> df.select("age", "name").summary("count").show()
-        +-------+---+----+
-        |summary|age|name|
-        +-------+---+----+
-        |  count|  2|   2|
-        +-------+---+----+
+        >>> df = spark.createDataFrame(
+        ...     [("Bob", 13, 40.3, 150.5), ("Alice", 12, 37.8, 142.3), ("Tom", 11, 44.1, 142.2)],
+        ...     ["name", "age", "weight", "height"],
+        ... )
+        >>> df.select("age", "weight", "height").summary().show()
+        +-------+----+------------------+-----------------+
+        |summary| age|            weight|           height|
+        +-------+----+------------------+-----------------+
+        |  count|   3|                 3|                3|
+        |   mean|12.0| 40.73333333333333|            145.0|
+        | stddev| 1.0|3.1722757341273704|4.763402145525822|
+        |    min|  11|              37.8|            142.2|
+        |    25%|  11|              37.8|            142.2|
+        |    50%|  12|              40.3|            142.3|
+        |    75%|  13|              44.1|            150.5|
+        |    max|  13|              44.1|            150.5|
+        +-------+----+------------------+-----------------+
+
+        >>> df.select("age", "weight", "height").summary("count", "min", "25%", "75%", "max").show()
+        +-------+---+------+------+
+        |summary|age|weight|height|
+        +-------+---+------+------+
+        |  count|  3|     3|     3|
+        |    min| 11|  37.8| 142.2|
+        |    25%| 11|  37.8| 142.2|
+        |    75%| 13|  44.1| 150.5|
+        |    max| 13|  44.1| 150.5|
+        +-------+---+------+------+
 
         See Also
         --------
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index d79da47d3a3ce..269374525c3b1 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2039,7 +2039,8 @@ def hour(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['ts'])
+    >>> import datetime
+    >>> df = spark.createDataFrame([(datetime.datetime(2015, 4, 8, 13, 8, 15),)], ['ts'])
     >>> df.select(hour('ts').alias('hour')).collect()
     [Row(hour=13)]
     """
@@ -2054,7 +2055,8 @@ def minute(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['ts'])
+    >>> import datetime
+    >>> df = spark.createDataFrame([(datetime.datetime(2015, 4, 8, 13, 8, 15),)], ['ts'])
     >>> df.select(minute('ts').alias('minute')).collect()
     [Row(minute=8)]
     """
@@ -2069,7 +2071,8 @@ def second(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['ts'])
+    >>> import datetime
+    >>> df = spark.createDataFrame([(datetime.datetime(2015, 4, 8, 13, 8, 15),)], ['ts'])
     >>> df.select(second('ts').alias('second')).collect()
     [Row(second=15)]
     """
@@ -2572,7 +2575,10 @@ def window(
 
     Examples
     --------
-    >>> df = spark.createDataFrame([("2016-03-11 09:00:07", 1)]).toDF("date", "val")
+    >>> import datetime
+    >>> df = spark.createDataFrame(
+    ...     [(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)],
+    ... ).toDF("date", "val")
     >>> w = df.groupBy(window("date", "5 seconds")).agg(sum("val").alias("sum"))
     >>> w.select(w.window.start.cast("string").alias("start"),
     ...          w.window.end.cast("string").alias("end"), "sum").collect()
@@ -3661,13 +3667,13 @@ def element_at(col: "ColumnOrName", extraction: Any) -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([(["a", "b", "c"],), ([],)], ['data'])
+    >>> df = spark.createDataFrame([(["a", "b", "c"],)], ['data'])
     >>> df.select(element_at(df.data, 1)).collect()
-    [Row(element_at(data, 1)='a'), Row(element_at(data, 1)=None)]
+    [Row(element_at(data, 1)='a')]
 
-    >>> df = spark.createDataFrame([({"a": 1.0, "b": 2.0},), ({},)], ['data'])
+    >>> df = spark.createDataFrame([({"a": 1.0, "b": 2.0},)], ['data'])
     >>> df.select(element_at(df.data, lit("a"))).collect()
-    [Row(element_at(data, a)=1.0), Row(element_at(data, a)=None)]
+    [Row(element_at(data, a)=1.0)]
     """
     return _invoke_function_over_columns("element_at", col, lit(extraction))
 
diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
index 2502387b8cc50..6aa8b111b4254 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -118,8 +118,14 @@ def test_infer_schema(self):
 
         with self.tempView("test"):
             df.createOrReplaceTempView("test")
-            result = self.spark.sql("SELECT l[0].a from test where d['key'].d = '2'")
-            self.assertEqual(1, result.head()[0])
+            result = self.spark.sql("SELECT l from test")
+            self.assertEqual([], result.head()[0])
+            # We set `spark.sql.ansi.enabled` to False for this case
+            # since it occurs an error in ANSI mode if there is a list index
+            # or key that does not exist.
+            with self.sql_conf({"spark.sql.ansi.enabled": False}):
+                result = self.spark.sql("SELECT l[0].a from test where d['key'].d = '2'")
+                self.assertEqual(1, result.head()[0])
 
         df2 = self.spark.createDataFrame(rdd, samplingRatio=1.0)
         self.assertEqual(df.schema, df2.schema)
@@ -128,8 +134,14 @@ def test_infer_schema(self):
 
         with self.tempView("test2"):
             df2.createOrReplaceTempView("test2")
-            result = self.spark.sql("SELECT l[0].a from test2 where d['key'].d = '2'")
-            self.assertEqual(1, result.head()[0])
+            result = self.spark.sql("SELECT l from test2")
+            self.assertEqual([], result.head()[0])
+            # We set `spark.sql.ansi.enabled` to False for this case
+            # since it occurs an error in ANSI mode if there is a list index
+            # or key that does not exist.
+            with self.sql_conf({"spark.sql.ansi.enabled": False}):
+                result = self.spark.sql("SELECT l[0].a from test2 where d['key'].d = '2'")
+                self.assertEqual(1, result.head()[0])
 
     def test_infer_schema_specification(self):
         from decimal import Decimal

From e2eb6d8e0437e2305a99c897e2e7fe4b544a3573 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Tue, 15 Feb 2022 13:37:23 +0800
Subject: [PATCH 235/513] [SPARK-38203][SQL] Fix SQLInsertTestSuite and
 SchemaPruningSuite under ANSI mode

### What changes were proposed in this pull request?

Fix test failures of following tests under ANSI mode:

- HiveSQLInsertTestSuite
- FileSourceSQLInsertTestSuite
- ParquetV1SchemaPruningSuite
- ParquetV2SchemaPruningSuite

### Why are the changes needed?

To set up a new GA job to run tests with ANSI mode before 3.3.0 release.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manual enable ANSI mode and test.

Closes #35511 from gengliangwang/fixParquetPruneAnsi.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../apache/spark/sql/SQLInsertTestSuite.scala | 26 ++++++++++++-------
 .../datasources/SchemaPruningSuite.scala      |  4 +++
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
index 2f56fbaf7f821..a3f602353a096 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
@@ -286,20 +286,28 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils {
     } else {
       SQLConf.StoreAssignmentPolicy.values
     }
+
+    def shouldThrowException(policy: SQLConf.StoreAssignmentPolicy.Value): Boolean = policy match {
+      case SQLConf.StoreAssignmentPolicy.ANSI | SQLConf.StoreAssignmentPolicy.STRICT =>
+        true
+      case SQLConf.StoreAssignmentPolicy.LEGACY =>
+        SQLConf.get.ansiEnabled
+    }
+
     testingPolicies.foreach { policy =>
       withSQLConf(
-        SQLConf.STORE_ASSIGNMENT_POLICY.key -> policy.toString) {
+        SQLConf.STORE_ASSIGNMENT_POLICY.key -> policy.toString,
+        SQLConf.ANSI_ENABLED.key -> "false") {
         withTable("t") {
           sql("create table t(a int, b string) using parquet partitioned by (a)")
-          policy match {
-            case SQLConf.StoreAssignmentPolicy.ANSI | SQLConf.StoreAssignmentPolicy.STRICT =>
-              val errorMsg = intercept[NumberFormatException] {
-                sql("insert into t partition(a='ansi') values('ansi')")
-              }.getMessage
-              assert(errorMsg.contains("invalid input syntax for type numeric: ansi"))
-            case SQLConf.StoreAssignmentPolicy.LEGACY =>
+          if (shouldThrowException(policy)) {
+            val errorMsg = intercept[NumberFormatException] {
               sql("insert into t partition(a='ansi') values('ansi')")
-              checkAnswer(sql("select * from t"), Row("ansi", null) :: Nil)
+            }.getMessage
+            assert(errorMsg.contains("invalid input syntax for type numeric: ansi"))
+          } else {
+            sql("insert into t partition(a='ansi') values('ansi')")
+            checkAnswer(sql("select * from t"), Row("ansi", null) :: Nil)
           }
         }
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
index 6fd966c42a067..fe50e4e7f9d1a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
@@ -21,6 +21,7 @@ import java.io.File
 
 import org.scalactic.Equality
 
+import org.apache.spark.SparkConf
 import org.apache.spark.sql.{DataFrame, QueryTest, Row}
 import org.apache.spark.sql.catalyst.SchemaPruningTest
 import org.apache.spark.sql.catalyst.expressions.Concat
@@ -57,6 +58,9 @@ abstract class SchemaPruningSuite
     contactId: Int,
     employer: Employer)
 
+  override protected def sparkConf: SparkConf =
+    super.sparkConf.set(SQLConf.ANSI_STRICT_INDEX_OPERATOR.key, "false")
+
   val janeDoe = FullName("Jane", "X.", "Doe")
   val johnDoe = FullName("John", "Y.", "Doe")
   val susanSmith = FullName("Susan", "Z.", "Smith")

From ea1f922e232b0193927cdeec529083f274b108ac Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Tue, 15 Feb 2022 19:06:17 +0900
Subject: [PATCH 236/513] [SPARK-37707][SQL][FOLLOWUP] Allow implicitly casting
 Date type to AnyTimestampType under ANSI mode

### What changes were proposed in this pull request?

Followup of https://github.com/apache/spark/pull/34976: allow implicitly casting Date type to AnyTimestampType under ANSI mode

### Why are the changes needed?

AnyTimestampType is a type collection for Timestamp and TimestampNTZ. As Spark allows implicit casting Date as Timestamp/TimestampNTZ under ANSI mode, Date can be cast as AnyTimestampType as well.

### Does this PR introduce _any_ user-facing change?

Yes, allow implicitly casting Date type to AnyTimestampType under ANSI mode

### How was this patch tested?

Unit  test

Closes #35522 from gengliangwang/fixMoreAnsiTest.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/catalyst/analysis/AnsiTypeCoercion.scala      |  3 +++
 .../sql/catalyst/analysis/TypeCoercionSuite.scala     | 11 +++++++++++
 2 files changed, 14 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
index 61142fcb035ad..90f28fbf447b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
@@ -204,6 +204,9 @@ object AnsiTypeCoercion extends TypeCoercionBase {
       case (StringType, AnyTimestampType) =>
         Some(AnyTimestampType.defaultConcreteType)
 
+      case (DateType, AnyTimestampType) =>
+        Some(AnyTimestampType.defaultConcreteType)
+
       case (_, target: DataType) =>
         if (Cast.canANSIStoreAssign(inType, target)) {
           Some(target)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 8ea5886c62eca..63ad84e8a0947 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -190,6 +190,7 @@ abstract class TypeCoercionSuiteBase extends AnalysisTest {
   test("implicit type cast - DateType") {
     val checkedType = DateType
     checkTypeCasting(checkedType, castableTypes = Seq(checkedType, StringType) ++ datetimeTypes)
+    shouldCast(checkedType, AnyTimestampType, AnyTimestampType.defaultConcreteType)
     shouldNotCast(checkedType, DecimalType)
     shouldNotCast(checkedType, NumericType)
     shouldNotCast(checkedType, IntegralType)
@@ -198,6 +199,16 @@ abstract class TypeCoercionSuiteBase extends AnalysisTest {
   test("implicit type cast - TimestampType") {
     val checkedType = TimestampType
     checkTypeCasting(checkedType, castableTypes = Seq(checkedType, StringType) ++ datetimeTypes)
+    shouldCast(checkedType, AnyTimestampType, checkedType)
+    shouldNotCast(checkedType, DecimalType)
+    shouldNotCast(checkedType, NumericType)
+    shouldNotCast(checkedType, IntegralType)
+  }
+
+  test("implicit type cast - TimestampNTZType") {
+    val checkedType = TimestampNTZType
+    checkTypeCasting(checkedType, castableTypes = Seq(checkedType, StringType) ++ datetimeTypes)
+    shouldCast(checkedType, AnyTimestampType, checkedType)
     shouldNotCast(checkedType, DecimalType)
     shouldNotCast(checkedType, NumericType)
     shouldNotCast(checkedType, IntegralType)

From a9a792b31573733e2972124014f70249cd5413f9 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Tue, 15 Feb 2022 15:41:00 +0300
Subject: [PATCH 237/513] [SPARK-38199][SQL] Delete the unused `dataType`
 specified in the definition of `IntervalColumnAccessor`

### What changes were proposed in this pull request?
SPARK-30066 introduce `IntervalColumnAccessor` and it  accepts 2 constructor parameters: `buffer` and `dataType`, but the `dataType` was ignored because the parameter passed to `BasicColumnAccessor` is alway `CALENDAR_INTERVAL`, so this pr delete the unused `dataType`.

### Why are the changes needed?
Clear class definition of `IntervalColumnAccessor`.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass GA

Closes #35507 from LuciferYang/SPARK-38199.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../apache/spark/sql/execution/columnar/ColumnAccessor.scala    | 2 +-
 .../spark/sql/execution/columnar/GenerateColumnAccessor.scala   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
index fa7140be7f326..770b2442e403c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
@@ -106,7 +106,7 @@ private[columnar] class BinaryColumnAccessor(buffer: ByteBuffer)
   extends BasicColumnAccessor[Array[Byte]](buffer, BINARY)
   with NullableColumnAccessor
 
-private[columnar] class IntervalColumnAccessor(buffer: ByteBuffer, dataType: CalendarIntervalType)
+private[columnar] class IntervalColumnAccessor(buffer: ByteBuffer)
   extends BasicColumnAccessor[CalendarInterval](buffer, CALENDAR_INTERVAL)
   with NullableColumnAccessor
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
index 6e666d4e1f9fc..33918bcee738b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
@@ -100,7 +100,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
       val createCode = dt match {
         case t if CodeGenerator.isPrimitiveType(dt) =>
           s"$accessorName = new $accessorCls(ByteBuffer.wrap(buffers[$index]).order(nativeOrder));"
-        case NullType | StringType | BinaryType =>
+        case NullType | StringType | BinaryType | CalendarIntervalType =>
           s"$accessorName = new $accessorCls(ByteBuffer.wrap(buffers[$index]).order(nativeOrder));"
         case other =>
           s"""$accessorName = new $accessorCls(ByteBuffer.wrap(buffers[$index]).order(nativeOrder),

From c7ee7cd1be59465a8a8a8dc7d04a6df5907c5b09 Mon Sep 17 00:00:00 2001
From: dch nguyen <dgd_contributor@viettel.com.vn>
Date: Tue, 15 Feb 2022 14:13:04 +0100
Subject: [PATCH 238/513] [SPARK-37413][PYTHON] Inline type hints for
 python/pyspark/ml/tree.py

### What changes were proposed in this pull request?
Inline type hints for python/pyspark/ml/tree.py

### Why are the changes needed?
We can take advantage of static type checking within the functions by inlining the type hints.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing tests

Closes #35420 from dchvn/SPARK-37413.

Authored-by: dch nguyen <dgd_contributor@viettel.com.vn>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/ml/tree.py  | 129 +++++++++++++++++++------------------
 python/pyspark/ml/tree.pyi | 110 -------------------------------
 2 files changed, 68 insertions(+), 171 deletions(-)
 delete mode 100644 python/pyspark/ml/tree.pyi

diff --git a/python/pyspark/ml/tree.py b/python/pyspark/ml/tree.py
index 3d607e1c943ff..6c1622bd699aa 100644
--- a/python/pyspark/ml/tree.py
+++ b/python/pyspark/ml/tree.py
@@ -14,8 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from typing import cast, List, Sequence, TYPE_CHECKING, TypeVar
 
 from pyspark import since
+from pyspark.ml.linalg import Vector
 from pyspark.ml.param import Params
 from pyspark.ml.param.shared import (
     HasCheckpointInterval,
@@ -30,35 +32,40 @@
 from pyspark.ml.wrapper import JavaPredictionModel
 from pyspark.ml.common import inherit_doc
 
+if TYPE_CHECKING:
+    from pyspark.ml._typing import P
+
+T = TypeVar("T")
+
 
 @inherit_doc
-class _DecisionTreeModel(JavaPredictionModel):
+class _DecisionTreeModel(JavaPredictionModel[T]):
     """
     Abstraction for Decision Tree models.
 
     .. versionadded:: 1.5.0
     """
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.5.0")
-    def numNodes(self):
+    def numNodes(self) -> int:
         """Return number of nodes of the decision tree."""
         return self._call_java("numNodes")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.5.0")
-    def depth(self):
+    def depth(self) -> int:
         """Return depth of the decision tree."""
         return self._call_java("depth")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def toDebugString(self):
+    def toDebugString(self) -> str:
         """Full description of model."""
         return self._call_java("toDebugString")
 
     @since("3.0.0")
-    def predictLeaf(self, value):
+    def predictLeaf(self, value: Vector) -> float:
         """
         Predict the indices of the leaves corresponding to the feature vector.
         """
@@ -70,7 +77,7 @@ class _DecisionTreeParams(HasCheckpointInterval, HasSeed, HasWeightCol):
     Mixin for Decision Tree parameters.
     """
 
-    leafCol = Param(
+    leafCol: Param[str] = Param(
         Params._dummy(),
         "leafCol",
         "Leaf indices column name. Predicted leaf "
@@ -78,7 +85,7 @@ class _DecisionTreeParams(HasCheckpointInterval, HasSeed, HasWeightCol):
         typeConverter=TypeConverters.toString,
     )
 
-    maxDepth = Param(
+    maxDepth: Param[int] = Param(
         Params._dummy(),
         "maxDepth",
         "Maximum depth of the tree. (>= 0) E.g., "
@@ -87,7 +94,7 @@ class _DecisionTreeParams(HasCheckpointInterval, HasSeed, HasWeightCol):
         typeConverter=TypeConverters.toInt,
     )
 
-    maxBins = Param(
+    maxBins: Param[int] = Param(
         Params._dummy(),
         "maxBins",
         "Max number of bins for discretizing continuous "
@@ -96,7 +103,7 @@ class _DecisionTreeParams(HasCheckpointInterval, HasSeed, HasWeightCol):
         typeConverter=TypeConverters.toInt,
     )
 
-    minInstancesPerNode = Param(
+    minInstancesPerNode: Param[int] = Param(
         Params._dummy(),
         "minInstancesPerNode",
         "Minimum number of "
@@ -107,7 +114,7 @@ class _DecisionTreeParams(HasCheckpointInterval, HasSeed, HasWeightCol):
         typeConverter=TypeConverters.toInt,
     )
 
-    minWeightFractionPerNode = Param(
+    minWeightFractionPerNode: Param[float] = Param(
         Params._dummy(),
         "minWeightFractionPerNode",
         "Minimum "
@@ -119,14 +126,14 @@ class _DecisionTreeParams(HasCheckpointInterval, HasSeed, HasWeightCol):
         typeConverter=TypeConverters.toFloat,
     )
 
-    minInfoGain = Param(
+    minInfoGain: Param[float] = Param(
         Params._dummy(),
         "minInfoGain",
         "Minimum information gain for a split " + "to be considered at a tree node.",
         typeConverter=TypeConverters.toFloat,
     )
 
-    maxMemoryInMB = Param(
+    maxMemoryInMB: Param[int] = Param(
         Params._dummy(),
         "maxMemoryInMB",
         "Maximum memory in MB allocated to "
@@ -135,7 +142,7 @@ class _DecisionTreeParams(HasCheckpointInterval, HasSeed, HasWeightCol):
         typeConverter=TypeConverters.toInt,
     )
 
-    cacheNodeIds = Param(
+    cacheNodeIds: Param[bool] = Param(
         Params._dummy(),
         "cacheNodeIds",
         "If false, the algorithm will pass "
@@ -146,58 +153,58 @@ class _DecisionTreeParams(HasCheckpointInterval, HasSeed, HasWeightCol):
         typeConverter=TypeConverters.toBoolean,
     )
 
-    def __init__(self):
+    def __init__(self) -> None:
         super(_DecisionTreeParams, self).__init__()
 
-    def setLeafCol(self, value):
+    def setLeafCol(self: "P", value: str) -> "P":
         """
         Sets the value of :py:attr:`leafCol`.
         """
         return self._set(leafCol=value)
 
-    def getLeafCol(self):
+    def getLeafCol(self) -> str:
         """
         Gets the value of leafCol or its default value.
         """
         return self.getOrDefault(self.leafCol)
 
-    def getMaxDepth(self):
+    def getMaxDepth(self) -> int:
         """
         Gets the value of maxDepth or its default value.
         """
         return self.getOrDefault(self.maxDepth)
 
-    def getMaxBins(self):
+    def getMaxBins(self) -> int:
         """
         Gets the value of maxBins or its default value.
         """
         return self.getOrDefault(self.maxBins)
 
-    def getMinInstancesPerNode(self):
+    def getMinInstancesPerNode(self) -> int:
         """
         Gets the value of minInstancesPerNode or its default value.
         """
         return self.getOrDefault(self.minInstancesPerNode)
 
-    def getMinWeightFractionPerNode(self):
+    def getMinWeightFractionPerNode(self) -> float:
         """
         Gets the value of minWeightFractionPerNode or its default value.
         """
         return self.getOrDefault(self.minWeightFractionPerNode)
 
-    def getMinInfoGain(self):
+    def getMinInfoGain(self) -> float:
         """
         Gets the value of minInfoGain or its default value.
         """
         return self.getOrDefault(self.minInfoGain)
 
-    def getMaxMemoryInMB(self):
+    def getMaxMemoryInMB(self) -> int:
         """
         Gets the value of maxMemoryInMB or its default value.
         """
         return self.getOrDefault(self.maxMemoryInMB)
 
-    def getCacheNodeIds(self):
+    def getCacheNodeIds(self) -> bool:
         """
         Gets the value of cacheNodeIds or its default value.
         """
@@ -205,44 +212,44 @@ def getCacheNodeIds(self):
 
 
 @inherit_doc
-class _TreeEnsembleModel(JavaPredictionModel):
+class _TreeEnsembleModel(JavaPredictionModel[T]):
     """
     (private abstraction)
     Represents a tree ensemble model.
     """
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def trees(self):
+    def trees(self) -> Sequence["_DecisionTreeModel"]:
         """Trees in this ensemble. Warning: These have null parent Estimators."""
         return [_DecisionTreeModel(m) for m in list(self._call_java("trees"))]
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def getNumTrees(self):
+    def getNumTrees(self) -> int:
         """Number of trees in ensemble."""
         return self._call_java("getNumTrees")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.5.0")
-    def treeWeights(self):
+    def treeWeights(self) -> List[float]:
         """Return the weights for each tree"""
         return list(self._call_java("javaTreeWeights"))
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def totalNumNodes(self):
+    def totalNumNodes(self) -> int:
         """Total number of nodes, summed over all trees in the ensemble."""
         return self._call_java("totalNumNodes")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def toDebugString(self):
+    def toDebugString(self) -> str:
         """Full description of model."""
         return self._call_java("toDebugString")
 
     @since("3.0.0")
-    def predictLeaf(self, value):
+    def predictLeaf(self, value: Vector) -> float:
         """
         Predict the indices of the leaves corresponding to the feature vector.
         """
@@ -254,16 +261,16 @@ class _TreeEnsembleParams(_DecisionTreeParams):
     Mixin for Decision Tree-based ensemble algorithms parameters.
     """
 
-    subsamplingRate = Param(
+    subsamplingRate: Param[float] = Param(
         Params._dummy(),
         "subsamplingRate",
         "Fraction of the training data " + "used for learning each decision tree, in range (0, 1].",
         typeConverter=TypeConverters.toFloat,
     )
 
-    supportedFeatureSubsetStrategies = ["auto", "all", "onethird", "sqrt", "log2"]
+    supportedFeatureSubsetStrategies: List[str] = ["auto", "all", "onethird", "sqrt", "log2"]
 
-    featureSubsetStrategy = Param(
+    featureSubsetStrategy: Param[str] = Param(
         Params._dummy(),
         "featureSubsetStrategy",
         "The number of features to consider for splits at each tree node. Supported "
@@ -277,18 +284,18 @@ class _TreeEnsembleParams(_DecisionTreeParams):
         typeConverter=TypeConverters.toString,
     )
 
-    def __init__(self):
+    def __init__(self) -> None:
         super(_TreeEnsembleParams, self).__init__()
 
     @since("1.4.0")
-    def getSubsamplingRate(self):
+    def getSubsamplingRate(self) -> float:
         """
         Gets the value of subsamplingRate or its default value.
         """
         return self.getOrDefault(self.subsamplingRate)
 
     @since("1.4.0")
-    def getFeatureSubsetStrategy(self):
+    def getFeatureSubsetStrategy(self) -> str:
         """
         Gets the value of featureSubsetStrategy or its default value.
         """
@@ -300,36 +307,36 @@ class _RandomForestParams(_TreeEnsembleParams):
     Private class to track supported random forest parameters.
     """
 
-    numTrees = Param(
+    numTrees: Param[int] = Param(
         Params._dummy(),
         "numTrees",
         "Number of trees to train (>= 1).",
         typeConverter=TypeConverters.toInt,
     )
 
-    bootstrap = Param(
+    bootstrap: Param[bool] = Param(
         Params._dummy(),
         "bootstrap",
         "Whether bootstrap samples are used " "when building trees.",
         typeConverter=TypeConverters.toBoolean,
     )
 
-    def __init__(self):
+    def __init__(self) -> None:
         super(_RandomForestParams, self).__init__()
 
     @since("1.4.0")
-    def getNumTrees(self):
+    def getNumTrees(self) -> int:
         """
         Gets the value of numTrees or its default value.
         """
         return self.getOrDefault(self.numTrees)
 
     @since("3.0.0")
-    def getBootstrap(self):
+    def getBootstrap(self) -> bool:
         """
         Gets the value of bootstrap or its default value.
         """
-        return self.getOrDefault(self.bootstrap)
+        return cast(bool, self.getOrDefault(self.bootstrap))
 
 
 class _GBTParams(_TreeEnsembleParams, HasMaxIter, HasStepSize, HasValidationIndicatorCol):
@@ -337,7 +344,7 @@ class _GBTParams(_TreeEnsembleParams, HasMaxIter, HasStepSize, HasValidationIndi
     Private class to track supported GBT params.
     """
 
-    stepSize = Param(
+    stepSize: Param[float] = Param(
         Params._dummy(),
         "stepSize",
         "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking "
@@ -345,7 +352,7 @@ class _GBTParams(_TreeEnsembleParams, HasMaxIter, HasStepSize, HasValidationIndi
         typeConverter=TypeConverters.toFloat,
     )
 
-    validationTol = Param(
+    validationTol: Param[float] = Param(
         Params._dummy(),
         "validationTol",
         "Threshold for stopping early when fit with validation is used. "
@@ -356,7 +363,7 @@ class _GBTParams(_TreeEnsembleParams, HasMaxIter, HasStepSize, HasValidationIndi
     )
 
     @since("3.0.0")
-    def getValidationTol(self):
+    def getValidationTol(self) -> float:
         """
         Gets the value of validationTol or its default value.
         """
@@ -368,9 +375,9 @@ class _HasVarianceImpurity(Params):
     Private class to track supported impurity measures.
     """
 
-    supportedImpurities = ["variance"]
+    supportedImpurities: List[str] = ["variance"]
 
-    impurity = Param(
+    impurity: Param[str] = Param(
         Params._dummy(),
         "impurity",
         "Criterion used for information gain calculation (case-insensitive). "
@@ -379,11 +386,11 @@ class _HasVarianceImpurity(Params):
         typeConverter=TypeConverters.toString,
     )
 
-    def __init__(self):
+    def __init__(self) -> None:
         super(_HasVarianceImpurity, self).__init__()
 
     @since("1.4.0")
-    def getImpurity(self):
+    def getImpurity(self) -> str:
         """
         Gets the value of impurity or its default value.
         """
@@ -397,9 +404,9 @@ class _TreeClassifierParams(Params):
     .. versionadded:: 1.4.0
     """
 
-    supportedImpurities = ["entropy", "gini"]
+    supportedImpurities: List[str] = ["entropy", "gini"]
 
-    impurity = Param(
+    impurity: Param[str] = Param(
         Params._dummy(),
         "impurity",
         "Criterion used for information gain calculation (case-insensitive). "
@@ -408,11 +415,11 @@ class _TreeClassifierParams(Params):
         typeConverter=TypeConverters.toString,
     )
 
-    def __init__(self):
+    def __init__(self) -> None:
         super(_TreeClassifierParams, self).__init__()
 
     @since("1.6.0")
-    def getImpurity(self):
+    def getImpurity(self) -> str:
         """
         Gets the value of impurity or its default value.
         """
diff --git a/python/pyspark/ml/tree.pyi b/python/pyspark/ml/tree.pyi
deleted file mode 100644
index 5a9b70ed7e3bc..0000000000000
--- a/python/pyspark/ml/tree.pyi
+++ /dev/null
@@ -1,110 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import List, Sequence
-from pyspark.ml._typing import P, T
-
-from pyspark.ml.linalg import Vector
-from pyspark import since as since  # noqa: F401
-from pyspark.ml.common import inherit_doc as inherit_doc  # noqa: F401
-from pyspark.ml.param import Param, Params as Params
-from pyspark.ml.param.shared import (  # noqa: F401
-    HasCheckpointInterval as HasCheckpointInterval,
-    HasMaxIter as HasMaxIter,
-    HasSeed as HasSeed,
-    HasStepSize as HasStepSize,
-    HasValidationIndicatorCol as HasValidationIndicatorCol,
-    HasWeightCol as HasWeightCol,
-    Param as Param,
-    TypeConverters as TypeConverters,
-)
-from pyspark.ml.wrapper import JavaPredictionModel as JavaPredictionModel
-
-class _DecisionTreeModel(JavaPredictionModel[T]):
-    @property
-    def numNodes(self) -> int: ...
-    @property
-    def depth(self) -> int: ...
-    @property
-    def toDebugString(self) -> str: ...
-    def predictLeaf(self, value: Vector) -> float: ...
-
-class _DecisionTreeParams(HasCheckpointInterval, HasSeed, HasWeightCol):
-    leafCol: Param[str]
-    maxDepth: Param[int]
-    maxBins: Param[int]
-    minInstancesPerNode: Param[int]
-    minWeightFractionPerNode: Param[float]
-    minInfoGain: Param[float]
-    maxMemoryInMB: Param[int]
-    cacheNodeIds: Param[bool]
-    def __init__(self) -> None: ...
-    def setLeafCol(self: P, value: str) -> P: ...
-    def getLeafCol(self) -> str: ...
-    def getMaxDepth(self) -> int: ...
-    def getMaxBins(self) -> int: ...
-    def getMinInstancesPerNode(self) -> int: ...
-    def getMinInfoGain(self) -> float: ...
-    def getMaxMemoryInMB(self) -> int: ...
-    def getCacheNodeIds(self) -> bool: ...
-
-class _TreeEnsembleModel(JavaPredictionModel[T]):
-    @property
-    def trees(self) -> Sequence[_DecisionTreeModel]: ...
-    @property
-    def getNumTrees(self) -> int: ...
-    @property
-    def treeWeights(self) -> List[float]: ...
-    @property
-    def totalNumNodes(self) -> int: ...
-    @property
-    def toDebugString(self) -> str: ...
-
-class _TreeEnsembleParams(_DecisionTreeParams):
-    subsamplingRate: Param[float]
-    supportedFeatureSubsetStrategies: List[str]
-    featureSubsetStrategy: Param[str]
-    def __init__(self) -> None: ...
-    def getSubsamplingRate(self) -> float: ...
-    def getFeatureSubsetStrategy(self) -> str: ...
-
-class _RandomForestParams(_TreeEnsembleParams):
-    numTrees: Param[int]
-    bootstrap: Param[bool]
-    def __init__(self) -> None: ...
-    def getNumTrees(self) -> int: ...
-    def getBootstrap(self) -> bool: ...
-
-class _GBTParams(_TreeEnsembleParams, HasMaxIter, HasStepSize, HasValidationIndicatorCol):
-    stepSize: Param[float]
-    validationTol: Param[float]
-    def getValidationTol(self) -> float: ...
-
-class _HasVarianceImpurity(Params):
-    supportedImpurities: List[str]
-    impurity: Param[str]
-    def __init__(self) -> None: ...
-    def getImpurity(self) -> str: ...
-
-class _TreeClassifierParams(Params):
-    supportedImpurities: List[str]
-    impurity: Param[str]
-    def __init__(self) -> None: ...
-    def getImpurity(self) -> str: ...
-
-class _TreeRegressorParams(_HasVarianceImpurity): ...

From 2a5ef001ae578529d7b4b0b40f8df0479f3e382d Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 15 Feb 2022 09:27:15 -0800
Subject: [PATCH 239/513] [SPARK-38121][PYTHON][SQL][FOLLOW-UP] Set
 'spark.sql.catalogImplementation' to 'hive' in HiveContext

### What changes were proposed in this pull request?

This PR is a followup of https://github.com/apache/spark/pull/35410 to fix a mistake. `HiveContext` should set `spark.sql.catalogImplementation` to `hive` instead of `in-memory`.

This PR also includes several changes:
- Make `HiveContext.getOrCreate` works identically as `SQLContext.getOrCreate`
- Match the signature of `HiveContext.__init__` and `SQLContext.__init__` (both are not supported to be directly called by users though).

### Why are the changes needed?

See https://github.com/apache/spark/pull/35410#discussion_r806358814

### Does this PR introduce _any_ user-facing change?

No to end users because this change has not been released out yet.

It creates a non-Hive supported `SparkSession` if there isn't an existing SparkSession running. See also https://github.com/apache/spark/pull/35410#discussion_r806358814.

Nobody uses `HiveContext` directly anymore but it's better to keep the behaviours unchanged.

### How was this patch tested?

Manually tested (in PySpark shell):

```python
spark.stop()
from pyspark import SparkContext
from pyspark.sql import HiveContext
HiveContext.getOrCreate(SparkContext.getOrCreate()).getConf("spark.sql.catalogImplementation")
```

**Before:**

```pyspark
'in-memory'
```

**After:**

```pyspark
'hive'
```

Closes #35517 from HyukjinKwon/SPARK-38121-followup.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 python/pyspark/sql/context.py | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 6f94e9a3b8153..c6eb6c326fd28 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -161,7 +161,9 @@ def getOrCreate(cls: Type["SQLContext"], sc: SparkContext) -> "SQLContext":
         return cls._get_or_create(sc)
 
     @classmethod
-    def _get_or_create(cls: Type["SQLContext"], sc: SparkContext) -> "SQLContext":
+    def _get_or_create(
+        cls: Type["SQLContext"], sc: SparkContext, **static_conf: Any
+    ) -> "SQLContext":
 
         if (
             cls._instantiatedContext is None
@@ -170,7 +172,7 @@ def _get_or_create(cls: Type["SQLContext"], sc: SparkContext) -> "SQLContext":
             assert sc._jvm is not None
             # There can be only one running Spark context. That will automatically
             # be used in the Spark session internally.
-            session = SparkSession._getActiveSessionOrCreate()
+            session = SparkSession._getActiveSessionOrCreate(**static_conf)
             cls(sc, session, session._jsparkSession.sqlContext())
         return cast(SQLContext, cls._instantiatedContext)
 
@@ -705,7 +707,14 @@ class HiveContext(SQLContext):
 
     """
 
-    def __init__(self, sparkContext: SparkContext, jhiveContext: Optional[JavaObject] = None):
+    _static_conf = {"spark.sql.catalogImplementation": "hive"}
+
+    def __init__(
+        self,
+        sparkContext: SparkContext,
+        sparkSession: Optional[SparkSession] = None,
+        jhiveContext: Optional[JavaObject] = None,
+    ):
         warnings.warn(
             "HiveContext is deprecated in Spark 2.0.0. Please use "
             + "SparkSession.builder.enableHiveSupport().getOrCreate() instead.",
@@ -713,11 +722,18 @@ def __init__(self, sparkContext: SparkContext, jhiveContext: Optional[JavaObject
         )
         static_conf = {}
         if jhiveContext is None:
-            static_conf = {"spark.sql.catalogImplementation": "in-memory"}
+            static_conf = HiveContext._static_conf
         # There can be only one running Spark context. That will automatically
         # be used in the Spark session internally.
-        session = SparkSession._getActiveSessionOrCreate(**static_conf)
-        SQLContext.__init__(self, sparkContext, session, jhiveContext)
+        if sparkSession is not None:
+            sparkSession = SparkSession._getActiveSessionOrCreate(**static_conf)
+        SQLContext.__init__(self, sparkContext, sparkSession, jhiveContext)
+
+    @classmethod
+    def _get_or_create(
+        cls: Type["SQLContext"], sc: SparkContext, **static_conf: Any
+    ) -> "SQLContext":
+        return SQLContext._get_or_create(sc, **HiveContext._static_conf)
 
     @classmethod
     def _createForTesting(cls, sparkContext: SparkContext) -> "HiveContext":

From ece34f06b7837340d6727d9512ae13ede437e43f Mon Sep 17 00:00:00 2001
From: Steven Aerts <steven.aerts@airties.com>
Date: Wed, 16 Feb 2022 01:42:51 +0800
Subject: [PATCH 240/513] [SPARK-38130][SQL] Remove array_sort orderable
 entries check

The check in array_sort to check for orderable items can be removed.  As
it is possible to pass a lambda to make the items orderable:

   Seq((Array[Map[String, Int]](Map("a" -> 1), Map()), "x")).toDF("a", "b")
     .selectExpr("array_sort(a, (x,y) -> cardinality(x) - cardinality(y))")

And for the cases where it is relevant the check is never hit as the
LessThan operator has a similar check which is evaluated first:

> due to data type mismatch: LessThan does not support ordering on type map<string,int>

### What changes were proposed in this pull request?

Remove a check which prevents you to sort an array with non-orderable items, with a lambda which makes everything orderable.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

Extra test was added.

Closes #35426 from steven-aerts/SPARK-38130.

Authored-by: Steven Aerts <steven.aerts@airties.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/expressions/higherOrderFunctions.scala    |  9 +++------
 .../org/apache/spark/sql/DataFrameFunctionsSuite.scala | 10 ++++++++++
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
index 0c45f495097aa..f9b2ade9a6029 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
@@ -388,18 +388,13 @@ case class ArraySort(
     checkArgumentDataTypes() match {
       case TypeCheckResult.TypeCheckSuccess =>
         argument.dataType match {
-          case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+          case ArrayType(_, _) =>
             if (function.dataType == IntegerType) {
               TypeCheckResult.TypeCheckSuccess
             } else {
               TypeCheckResult.TypeCheckFailure("Return type of the given function has to be " +
                 "IntegerType")
             }
-          case ArrayType(dt, _) =>
-            val dtSimple = dt.catalogString
-            TypeCheckResult.TypeCheckFailure(
-              s"$prettyName does not support sorting array of type $dtSimple which is not " +
-                "orderable")
           case _ =>
             TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.")
         }
@@ -452,6 +447,8 @@ object ArraySort {
         If(LessThan(left, right), litm1, If(GreaterThan(left, right), lit1, lit0)))))
   }
 
+  // Default Comparator only works for orderable types.
+  // This is validated by the underlying LessTan and GreaterThan
   val defaultComparator: LambdaFunction = {
     val left = UnresolvedNamedLambdaVariable(Seq("left"))
     val right = UnresolvedNamedLambdaVariable(Seq("right"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 1ddb238d1db2f..3999ae8620331 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -481,6 +481,16 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     spark.sql("drop temporary function fStringLength")
   }
 
+  test("SPARK-38130: array_sort with lambda of non-orderable items") {
+    val df6 = Seq((Array[Map[String, Int]](Map("a" -> 1), Map("b" -> 2, "c" -> 3),
+      Map()), "x")).toDF("a", "b")
+    checkAnswer(
+      df6.selectExpr("array_sort(a, (x, y) -> cardinality(x) - cardinality(y))"),
+      Seq(
+        Row(Seq[Map[String, Int]](Map(), Map("a" -> 1), Map("b" -> 2, "c" -> 3))))
+    )
+  }
+
   test("sort_array/array_sort functions") {
     val df = Seq(
       (Array[Int](2, 1, 3), Array("b", "c", "a")),

From 39166eda67c2645566b1f7aa83a0519c1d3214ed Mon Sep 17 00:00:00 2001
From: Cheng Su <chengsu@fb.com>
Date: Wed, 16 Feb 2022 08:03:38 +0900
Subject: [PATCH 241/513] [SPARK-38124][SS][FOLLOWUP] Add test to harden
 assumption of SS partitioning requirement

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/35419#discussion_r801354342, to add unit test to harden the assumption of SS partitioning and distribution requirement:
* Check the `HashPartitioning.partitionIdExpression` to be exactly expected format
* Check all different kinds of `Partitioning` against `StatefulOpClusteredDistribution`.

Also add a minor comment for `StatefulOpClusteredDistribution`, as `SinglePartition` can also satisfy the distribution.

### Why are the changes needed?

Document our assumption of SS in code as unit test.
So next time when we introduce intrusive code change, the unit test can save us by failing loudly.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

The added unit test itself.

Closes #35529 from c21/partition-test.

Authored-by: Cheng Su <chengsu@fb.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../plans/physical/partitioning.scala         |  2 +
 .../sql/catalyst/DistributionSuite.scala      | 78 +++++++++++++++++++
 2 files changed, 80 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
index 5342c8ee6d672..040f1bfab65b7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
@@ -101,6 +101,8 @@ case class ClusteredDistribution(
  * Since this distribution relies on [[HashPartitioning]] on the physical partitioning of the
  * stateful operator, only [[HashPartitioning]] (and HashPartitioning in
  * [[PartitioningCollection]]) can satisfy this distribution.
+ * When `_requiredNumPartitions` is 1, [[SinglePartition]] is essentially same as
+ * [[HashPartitioning]], so it can satisfy this distribution as well.
  *
  * NOTE: This is applied only to stream-stream join as of now. For other stateful operators, we
  * have been using ClusteredDistribution, which could construct the physical partitioning of the
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala
index 5d3f960c3bfac..e047d4c070bec 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala
@@ -20,7 +20,9 @@ package org.apache.spark.sql.catalyst
 import org.apache.spark.SparkFunSuite
 /* Implicit conversions */
 import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.{Literal, Murmur3Hash, Pmod}
 import org.apache.spark.sql.catalyst.plans.physical._
+import org.apache.spark.sql.types.IntegerType
 
 class DistributionSuite extends SparkFunSuite {
 
@@ -265,4 +267,80 @@ class DistributionSuite extends SparkFunSuite {
       ClusteredDistribution(Seq($"a", $"b", $"c"), Some(5)),
       false)
   }
+
+  test("Structured Streaming output partitioning and distribution") {
+    // Validate HashPartitioning.partitionIdExpression to be exactly expected format, because
+    // Structured Streaming state store requires it to be consistent across Spark versions.
+    val expressions = Seq($"a", $"b", $"c")
+    val hashPartitioning = HashPartitioning(expressions, 10)
+    hashPartitioning.partitionIdExpression match {
+      case Pmod(Murmur3Hash(es, 42), Literal(10, IntegerType), _) =>
+        assert(es.length == expressions.length && es.zip(expressions).forall {
+          case (l, r) => l.semanticEquals(r)
+        })
+      case x => fail(s"Unexpected partitionIdExpression $x for $hashPartitioning")
+    }
+
+    // Validate only HashPartitioning (and HashPartitioning in PartitioningCollection) can satisfy
+    // StatefulOpClusteredDistribution. SinglePartition can also satisfy this distribution when
+    // `_requiredNumPartitions` is 1.
+    checkSatisfied(
+      HashPartitioning(Seq($"a", $"b", $"c"), 10),
+      StatefulOpClusteredDistribution(Seq($"a", $"b", $"c"), 10),
+      true)
+
+    checkSatisfied(
+      PartitioningCollection(Seq(
+        HashPartitioning(Seq($"a", $"b", $"c"), 10),
+        RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10))),
+      StatefulOpClusteredDistribution(Seq($"a", $"b", $"c"), 10),
+      true)
+
+    checkSatisfied(
+      SinglePartition,
+      StatefulOpClusteredDistribution(Seq($"a", $"b", $"c"), 1),
+      true)
+
+    checkSatisfied(
+      PartitioningCollection(Seq(
+        HashPartitioning(Seq($"a", $"b"), 1),
+        SinglePartition)),
+      StatefulOpClusteredDistribution(Seq($"a", $"b", $"c"), 1),
+      true)
+
+    checkSatisfied(
+      HashPartitioning(Seq($"a", $"b"), 10),
+      StatefulOpClusteredDistribution(Seq($"a", $"b", $"c"), 10),
+      false)
+
+    checkSatisfied(
+      HashPartitioning(Seq($"a", $"b", $"c"), 5),
+      StatefulOpClusteredDistribution(Seq($"a", $"b", $"c"), 10),
+      false)
+
+    checkSatisfied(
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      StatefulOpClusteredDistribution(Seq($"a", $"b", $"c"), 10),
+      false)
+
+    checkSatisfied(
+      SinglePartition,
+      StatefulOpClusteredDistribution(Seq($"a", $"b", $"c"), 10),
+      false)
+
+    checkSatisfied(
+      BroadcastPartitioning(IdentityBroadcastMode),
+      StatefulOpClusteredDistribution(Seq($"a", $"b", $"c"), 10),
+      false)
+
+    checkSatisfied(
+      RoundRobinPartitioning(10),
+      StatefulOpClusteredDistribution(Seq($"a", $"b", $"c"), 10),
+      false)
+
+    checkSatisfied(
+      UnknownPartitioning(10),
+      StatefulOpClusteredDistribution(Seq($"a", $"b", $"c"), 10),
+      false)
+  }
 }

From 66c83dfb7e443bedc7f9a895f6589f40153b652f Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 16 Feb 2022 09:03:04 +0900
Subject: [PATCH 242/513] [SPARK-38220][BUILD] Upgrade `commons-math3` to 3.6.1

### What changes were proposed in this pull request?

This PR aims to upgrade `commons-math3` to 3.6.1.

### Why are the changes needed?

`3.6.1` is the latest and popular than `3.4.1`.
- https://commons.apache.org/proper/commons-math/download_math.cgi
- https://mvnrepository.com/artifact/org.apache.commons/commons-math3

### Does this PR introduce _any_ user-facing change?

Although this is a dependency change, there is no breaking change.

### How was this patch tested?

Pass the CIs.

Closes #35535 from dongjoon-hyun/SPARK-38220.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +-
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +-
 pom.xml                               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index 50480e4ab6930..26c5439f7d612 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -50,7 +50,7 @@ commons-io/2.4//commons-io-2.4.jar
 commons-lang/2.6//commons-lang-2.6.jar
 commons-lang3/3.12.0//commons-lang3-3.12.0.jar
 commons-logging/1.1.3//commons-logging-1.1.3.jar
-commons-math3/3.4.1//commons-math3-3.4.1.jar
+commons-math3/3.6.1//commons-math3-3.6.1.jar
 commons-net/3.1//commons-net-3.1.jar
 commons-pool/1.5.4//commons-pool-1.5.4.jar
 commons-text/1.6//commons-text-1.6.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 13b23c06cf647..dd95710016340 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -48,7 +48,7 @@ commons-io/2.11.0//commons-io-2.11.0.jar
 commons-lang/2.6//commons-lang-2.6.jar
 commons-lang3/3.12.0//commons-lang3-3.12.0.jar
 commons-logging/1.1.3//commons-logging-1.1.3.jar
-commons-math3/3.4.1//commons-math3-3.4.1.jar
+commons-math3/3.6.1//commons-math3-3.6.1.jar
 commons-net/3.1//commons-net-3.1.jar
 commons-pool/1.5.4//commons-pool-1.5.4.jar
 commons-text/1.6//commons-text-1.6.jar
diff --git a/pom.xml b/pom.xml
index 04feb2aca1cca..b0791f7300ab8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -157,7 +157,7 @@
     <!--  org.apache.httpcomponents/httpclient-->
     <commons.httpclient.version>4.5.13</commons.httpclient.version>
     <commons.httpcore.version>4.4.14</commons.httpcore.version>
-    <commons.math3.version>3.4.1</commons.math3.version>
+    <commons.math3.version>3.6.1</commons.math3.version>
     <!-- managed up from 3.2.1 for SPARK-11652 -->
     <commons.collections.version>4.4</commons.collections.version>
     <scala.version>2.12.15</scala.version>

From bb757b5bdb686be80c5ddca4e9abef71a55eb746 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Tue, 15 Feb 2022 16:58:02 -0800
Subject: [PATCH 243/513] [SPARK-37145][K8S][FOLLOWUP] Add note for
 `KubernetesCustom[Driver/Executor]FeatureConfigStep`

### What changes were proposed in this pull request?
Add note for developers to show how to use `KubernetesDriverCustomFeatureConfigStep` and `KubernetesExecutorCustomFeatureConfigStep` (https://github.com/apache/spark/pull/35345).

### Why are the changes needed?
Give an example to show how to use it.

### Does this PR introduce _any_ user-facing change?
No, doc only

### How was this patch tested?
ci passed

Closes #35496 from Yikun/SPARK-37145-followup.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 ...ernetesDriverCustomFeatureConfigStep.scala | 39 +++++++++++++++++++
 ...netesExecutorCustomFeatureConfigStep.scala | 39 +++++++++++++++++++
 2 files changed, 78 insertions(+)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesDriverCustomFeatureConfigStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesDriverCustomFeatureConfigStep.scala
index bbd05e9f67c51..0edd94d3370ab 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesDriverCustomFeatureConfigStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesDriverCustomFeatureConfigStep.scala
@@ -25,6 +25,45 @@ import org.apache.spark.deploy.k8s.KubernetesDriverConf
  * A base interface to help user extend custom feature step in driver side.
  * Note: If your custom feature step would be used only in driver or both in driver and executor,
  * please use this.
+ *
+ * Example of driver feature step:
+ *
+ * {{{
+ *   class DriverExampleFeatureStep extends KubernetesDriverCustomFeatureConfigStep {
+ *     private var driverConf: KubernetesDriverConf = _
+ *
+ *     override def init(conf: KubernetesDriverConf): Unit = {
+ *       driverConf = conf
+ *     }
+ *
+ *     // Implements methods of `KubernetesFeatureConfigStep`, such as `configurePod`
+ *     override def configurePod(pod: SparkPod): SparkPod = {
+ *       // Apply modifications on the given pod in accordance to this feature.
+ *     }
+ *   }
+ * }}}
+ *
+ * Example of feature step for both driver and executor:
+ *
+ * {{{
+ *   class DriverAndExecutorExampleFeatureStep extends KubernetesDriverCustomFeatureConfigStep
+ *     with KubernetesExecutorCustomFeatureConfigStep {
+ *     private var kubernetesConf: KubernetesConf = _
+ *
+ *     override def init(conf: KubernetesDriverConf): Unit = {
+ *       kubernetesConf = conf
+ *     }
+ *
+ *     override def init(conf: KubernetesExecutorConf): Unit = {
+ *       kubernetesConf = conf
+ *     }
+ *
+ *     // Implements methods of `KubernetesFeatureConfigStep`, such as `configurePod`
+ *     override def configurePod(pod: SparkPod): SparkPod = {
+ *       // Apply modifications on the given pod in accordance to this feature.
+ *     }
+ *   }
+ * }}}
  */
 @Unstable
 @DeveloperApi
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesExecutorCustomFeatureConfigStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesExecutorCustomFeatureConfigStep.scala
index 062fa7dbf1413..dfb1c768c990e 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesExecutorCustomFeatureConfigStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesExecutorCustomFeatureConfigStep.scala
@@ -25,6 +25,45 @@ import org.apache.spark.deploy.k8s.KubernetesExecutorConf
  * A base interface to help user extend custom feature step in executor side.
  * Note: If your custom feature step would be used only in driver or both in driver and executor,
  * please use this.
+ *
+ * Example of executor feature step:
+ *
+ * {{{
+ *   class ExecutorExampleFeatureStep extends KubernetesExecutorCustomFeatureConfigStep {
+ *     private var executorConf: KubernetesExecutorConf = _
+ *
+ *     override def init(conf: KubernetesExecutorConf): Unit = {
+ *       executorConf = conf
+ *     }
+ *
+ *     // Implements methods of `KubernetesFeatureConfigStep`, such as `configurePod`
+ *     override def configurePod(pod: SparkPod): SparkPod = {
+ *       // Apply modifications on the given pod in accordance to this feature.
+ *     }
+ *   }
+ * }}}
+ *
+ * Example of feature step for both driver and executor:
+ *
+ * {{{
+ *   class DriverAndExecutorExampleFeatureStep extends KubernetesDriverCustomFeatureConfigStep
+ *     with KubernetesExecutorCustomFeatureConfigStep {
+ *     private var kubernetesConf: KubernetesConf = _
+ *
+ *     override def init(conf: KubernetesDriverConf): Unit = {
+ *       kubernetesConf = conf
+ *     }
+ *
+ *     override def init(conf: KubernetesExecutorConf): Unit = {
+ *       kubernetesConf = conf
+ *     }
+ *
+ *     // Implements methods of `KubernetesFeatureConfigStep`, such as `configurePod`
+ *     override def configurePod(pod: SparkPod): SparkPod = {
+ *       // Apply modifications on the given pod in accordance to this feature.
+ *     }
+ *   }
+ * }}}
  */
 @Unstable
 @DeveloperApi

From ad2bc7d82296527582dfa469aad33123afdf6736 Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Wed, 16 Feb 2022 11:12:33 +0900
Subject: [PATCH 244/513] [SPARK-38221][SQL] Eagerly iterate over
 groupingExpressions when moving complex grouping expressions out of an
 Aggregate node

### What changes were proposed in this pull request?

Change `PullOutGroupingExpressions` to eagerly iterate over `groupingExpressions` when building `complexGroupingExpressionMap`.

### Why are the changes needed?

Consider this query:
```
Seq(1).toDF("id").groupBy(Stream($"id" + 1, $"id" + 2): _*).sum("id").show(false)
```
It fails with
```
java.lang.IllegalStateException: Couldn't find _groupingexpression#24 in [id#4,_groupingexpression#23]
  at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:80)
  at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:73)
  at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
  at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:83)
  at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
  at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
  at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:425)
  at org.apache.spark.sql.catalyst.expressions.BindReferences$.bindReference(BoundAttribute.scala:73)
  at org.apache.spark.sql.catalyst.expressions.BindReferences$.$anonfun$bindReferences$1(BoundAttribute.scala:94)
  at scala.collection.immutable.Stream.$anonfun$map$1(Stream.scala:418)
  at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1173)
  at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1163)
  at scala.collection.immutable.Stream.$anonfun$map$1(Stream.scala:418)
  at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1173)
  at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1163)
  at scala.collection.immutable.Stream.foreach(Stream.scala:534)
  at scala.collection.TraversableOnce.count(TraversableOnce.scala:152)
  at scala.collection.TraversableOnce.count$(TraversableOnce.scala:145)
  at scala.collection.AbstractTraversable.count(Traversable.scala:108)
  at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.createCode(GenerateUnsafeProjection.scala:293)
  at org.apache.spark.sql.execution.aggregate.HashAggregateExec.doConsumeWithKeys(HashAggregateExec.scala:623)
  ... etc ...
```
When `HashAggregateExec` attempts to bind the references in the group-by expressions, attribute _groupingexpression#24 is missing from the child `ProjectExec`'s output.

This is due to the way `PullOutGroupingExpressions`, when determining which grouping expressions to shift from the `Aggregate` node to a `Project` node,  populates `complexGroupingExpressionMap`. `PullOutGroupingExpressions` uses a map operation to iterate over `groupingExpressions` and updates `complexGroupingExpressionMap` in the closure passed to `map()`. However, if `groupingExpressions` is a `Stream`, the map operation is evaluated lazily, and isn't fully completed until `ComputeCurrentTime` calls `transformAllExpressionsWithPruning`, which is long after `PullOutGroupingExpressions` completes. Therefore, at the time `PullOutGroupingExpressions` is ready to create the `Project` node, `complexGroupingExpressionMap` is not fully populated. As result, the `Project` node is missing all but the first complex grouping expression.

### Does this PR introduce _any_ user-facing change?

No, other than the above query now works.

### How was this patch tested?

New unit test.

Closes #35537 from bersprockets/groupby_stream_issue.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/catalyst/optimizer/PullOutGroupingExpressions.scala  | 2 +-
 .../scala/org/apache/spark/sql/DataFrameAggregateSuite.scala | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PullOutGroupingExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PullOutGroupingExpressions.scala
index 859a73a4842f0..1bd186d89a07d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PullOutGroupingExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PullOutGroupingExpressions.scala
@@ -50,7 +50,7 @@ object PullOutGroupingExpressions extends Rule[LogicalPlan] {
     plan.transformWithPruning(_.containsPattern(AGGREGATE)) {
       case a: Aggregate if a.resolved =>
         val complexGroupingExpressionMap = mutable.LinkedHashMap.empty[Expression, NamedExpression]
-        val newGroupingExpressions = a.groupingExpressions.map {
+        val newGroupingExpressions = a.groupingExpressions.toIndexedSeq.map {
           case e if !e.foldable && e.children.nonEmpty =>
             complexGroupingExpressionMap
               .getOrElseUpdate(e.canonicalized, Alias(e, s"_groupingexpression")())
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 43c0162692e49..215d38d8b1677 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -1448,6 +1448,11 @@ class DataFrameAggregateSuite extends QueryTest
     val emptyAgg = Map.empty[String, String]
     assert(spark.range(2).where("id > 2").agg(emptyAgg).limit(1).count == 1)
   }
+
+  test("SPARK-38221: group by stream of complex expressions should not fail") {
+    val df = Seq(1).toDF("id").groupBy(Stream($"id" + 1, $"id" + 2): _*).sum("id")
+    checkAnswer(df, Row(2, 3, 1))
+  }
 }
 
 case class B(c: Option[Double])

From 69859f81e5b13952b6e37fa4d51b1b4dbe19e5bc Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Tue, 15 Feb 2022 19:49:39 -0800
Subject: [PATCH 245/513] [SPARK-38201][K8S] Fix
 `uploadFileToHadoopCompatibleFS` to use `delSrc` and `overwrite` parameters

### What changes were proposed in this pull request?
`KubernetesUtils#uploadFileToHadoopCompatibleFS` defines the input parameters `delSrc` and `overwrite`,  but constants(`false` and `true`) are used when invoke `FileSystem.copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, Path dst) `, this pr change to use passed in `delSrc` and `overwrite` when invoke the`copyFromLocalFile` method.

### Why are the changes needed?
Bug fix

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA and add new test case

Closes #35509 from LuciferYang/SPARK-38201.

Lead-authored-by: yangjie01 <yangjie01@baidu.com>
Co-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/deploy/k8s/KubernetesUtils.scala    |  2 +-
 .../deploy/k8s/KubernetesUtilsSuite.scala     | 66 ++++++++++++++++++-
 2 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
index 0c8d9646a2b4e..a05d07adcc825 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
@@ -344,7 +344,7 @@ object KubernetesUtils extends Logging {
       delSrc : Boolean = false,
       overwrite: Boolean = true): Unit = {
     try {
-      fs.copyFromLocalFile(false, true, src, dest)
+      fs.copyFromLocalFile(delSrc, overwrite, src, dest)
     } catch {
       case e: IOException =>
         throw new SparkException(s"Error uploading file ${src.getName}", e)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala
index ef57a4b861508..5498238307d1c 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala
@@ -17,13 +17,20 @@
 
 package org.apache.spark.deploy.k8s
 
+import java.io.File
+import java.nio.charset.StandardCharsets
+
 import scala.collection.JavaConverters._
 
 import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder}
+import org.apache.commons.io.FileUtils
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.scalatest.PrivateMethodTester
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkException, SparkFunSuite}
 
-class KubernetesUtilsSuite extends SparkFunSuite {
+class KubernetesUtilsSuite extends SparkFunSuite with PrivateMethodTester {
   private val HOST = "test-host"
   private val POD = new PodBuilder()
     .withNewSpec()
@@ -65,4 +72,59 @@ class KubernetesUtilsSuite extends SparkFunSuite {
     assert(sparkPodWithNoContainerName.pod.getSpec.getHostname == HOST)
     assert(sparkPodWithNoContainerName.container.getName == null)
   }
+
+  test("SPARK-38201: check uploadFileToHadoopCompatibleFS with different delSrc and overwrite") {
+    withTempDir { srcDir =>
+      withTempDir { destDir =>
+        val upload = PrivateMethod[Unit](Symbol("uploadFileToHadoopCompatibleFS"))
+        val fileName = "test.txt"
+        val srcFile = new File(srcDir, fileName)
+        val src = new Path(srcFile.getAbsolutePath)
+        val dest = new Path(destDir.getAbsolutePath, fileName)
+        val fs = src.getFileSystem(new Configuration())
+
+        def checkUploadException(delSrc: Boolean, overwrite: Boolean): Unit = {
+          val message = intercept[SparkException] {
+            KubernetesUtils.invokePrivate(upload(src, dest, fs, delSrc, overwrite))
+          }.getMessage
+          assert(message.contains("Error uploading file"))
+        }
+
+        def appendFileAndUpload(content: String, delSrc: Boolean, overwrite: Boolean): Unit = {
+          FileUtils.write(srcFile, content, StandardCharsets.UTF_8, true)
+          KubernetesUtils.invokePrivate(upload(src, dest, fs, delSrc, overwrite))
+        }
+
+        // Write a new file, upload file with delSrc = false and overwrite = true.
+        // Upload successful and record the `fileLength`.
+        appendFileAndUpload("init-content", delSrc = false, overwrite = true)
+        val firstLength = fs.getFileStatus(dest).getLen
+
+        // Append the file, upload file with delSrc = false and overwrite = true.
+        // Upload succeeded but `fileLength` changed.
+        appendFileAndUpload("append-content", delSrc = false, overwrite = true)
+        val secondLength = fs.getFileStatus(dest).getLen
+        assert(firstLength < secondLength)
+
+        // Upload file with delSrc = false and overwrite = false.
+        // Upload failed because dest exists and not changed.
+        checkUploadException(delSrc = false, overwrite = false)
+        assert(fs.exists(dest))
+        assert(fs.getFileStatus(dest).getLen == secondLength)
+
+        // Append the file again, upload file delSrc = true and overwrite = true.
+        // Upload succeeded, `fileLength` changed and src not exists.
+        appendFileAndUpload("append-content", delSrc = true, overwrite = true)
+        val thirdLength = fs.getFileStatus(dest).getLen
+        assert(secondLength < thirdLength)
+        assert(!fs.exists(src))
+
+        // Rewrite a new file, upload file with delSrc = true and overwrite = false.
+        // Upload failed because dest exists, src still exists.
+        FileUtils.write(srcFile, "re-init-content", StandardCharsets.UTF_8, true)
+        checkUploadException(delSrc = true, overwrite = false)
+        assert(fs.exists(src))
+      }
+    }
+  }
 }

From 1ef5638177dcf06ebca4e9b0bc88401e0fce2ae8 Mon Sep 17 00:00:00 2001
From: TongWeii <vvtwow@gmail.com>
Date: Wed, 16 Feb 2022 12:40:59 +0800
Subject: [PATCH 246/513] =?UTF-8?q?[SPARK-38173][SQL]=20Quoted=20column=20?=
 =?UTF-8?q?cannot=20be=20recognized=20correctly=20when=20quotedRegexColumn?=
 =?UTF-8?q?Na=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

bug fix

### Why are the changes needed?

When spark.sql.parser.quotedRegexColumnNames=true
```
SELECT `(C3)?+.+`,`C1` * C2 FROM (SELECT 3 AS C1,2 AS C2,1 AS C3) T;
```
The above query will throw an exception
```
Error: org.apache.hive.service.cli.HiveSQLException: Error running query: org.apache.spark.sql.AnalysisException: Invalid usage of '*' in expression 'multiply'
        at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:370)
        at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:266)
        at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
        at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties(SparkOperation.scala:78)
        at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties$(SparkOperation.scala:62)
        at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:44)
        at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:266)
        at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:261)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:422)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729)
        at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:275)
        at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.sql.AnalysisException: Invalid usage of '*' in expression 'multiply'
        at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.failAnalysis(CheckAnalysis.scala:50)
        at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.failAnalysis$(CheckAnalysis.scala:49)
        at org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:155)
        at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$expandStarExpression$1.applyOrElse(Analyzer.scala:1700)
        at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$expandStarExpression$1.applyOrElse(Analyzer.scala:1671)
        at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformUp$2(TreeNode.scala:342)
        at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:74)
        at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:342)
        at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformUp$1(TreeNode.scala:339)
        at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:408)
        at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:244)
        at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:406)
        at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:359)
        at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:339)
        at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.expandStarExpression(Analyzer.scala:1671)
        at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.$anonfun$buildExpandedProjectList$1(Analyzer.scala:1656)
```
It works fine in hive, because hive treats a pattern with all alphabets/digits and "_" as a normal string
```
  /**
   * Returns whether the pattern is a regex expression (instead of a normal
   * string). Normal string is a string with all alphabets/digits and "_".
   */
  static boolean isRegex(String pattern, HiveConf conf) {
    String qIdSupport = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_QUOTEDID_SUPPORT);
    if ( "column".equals(qIdSupport)) {
      return false;
    }
    for (int i = 0; i < pattern.length(); i++) {
      if (!Character.isLetterOrDigit(pattern.charAt(i))
          && pattern.charAt(i) != '_') {
        return true;
      }
    }
    return false;
  }
```
```
0: jdbc:hive2://hiveserver-inc.> set hive.support.quoted.identifiers=none;
No rows affected (0.003 seconds)
0: jdbc:hive2://hiveserver-inc.> SELECT `(C3)?+.+`,`C1` * C2 FROM (SELECT 3 AS C1,2 AS C2,1 AS C3) T;
22/02/10 19:01:43 INFO ql.Driver: OK
+-------+-------+------+
| t.c1  | t.c2  | _c1  |
+-------+-------+------+
| 3     | 2     | 6    |
+-------+-------+------+
1 row selected (0.136 seconds)
```

In this pr, we add the `isRegex` method to check whether the pattern is a regex expression

### Does this PR introduce _any_ user-facing change?

NO

### How was this patch tested?

UT

Closes #35476 from TongWei1105/SPARK-38173.

Lead-authored-by: TongWeii <vvtwow@gmail.com>
Co-authored-by: TongWei <68682646+TongWei1105@users.noreply.github.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/AstBuilder.scala     | 14 ++++++++++++--
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 13 +++++++++++++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index bd43cffc98dd0..773d32d01916d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2091,6 +2091,14 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
     return false
   }
 
+  /**
+   * Returns whether the pattern is a regex expression (instead of a normal
+   * string). Normal string is a string with all alphabets/digits and "_".
+   */
+  private def isRegex(pattern: String): Boolean = {
+    pattern.exists(p => !Character.isLetterOrDigit(p) && p != '_')
+  }
+
   /**
    * Create a dereference expression. The return type depends on the type of the parent.
    * If the parent is an [[UnresolvedAttribute]], it can be a [[UnresolvedAttribute]] or
@@ -2103,7 +2111,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       case unresolved_attr @ UnresolvedAttribute(nameParts) =>
         ctx.fieldName.getStart.getText match {
           case escapedIdentifier(columnNameRegex)
-            if conf.supportQuotedRegexColumnName && canApplyRegex(ctx) =>
+            if conf.supportQuotedRegexColumnName &&
+              isRegex(columnNameRegex) && canApplyRegex(ctx) =>
             UnresolvedRegex(columnNameRegex, Some(unresolved_attr.name),
               conf.caseSensitiveAnalysis)
           case _ =>
@@ -2121,7 +2130,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   override def visitColumnReference(ctx: ColumnReferenceContext): Expression = withOrigin(ctx) {
     ctx.getStart.getText match {
       case escapedIdentifier(columnNameRegex)
-        if conf.supportQuotedRegexColumnName && canApplyRegex(ctx) =>
+        if conf.supportQuotedRegexColumnName &&
+          isRegex(columnNameRegex) && canApplyRegex(ctx) =>
         UnresolvedRegex(columnNameRegex, None, conf.caseSensitiveAnalysis)
       case _ =>
         UnresolvedAttribute.quoted(ctx.getText)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index ffc3db31c90dc..974e489dcc0ad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -4281,6 +4281,19 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
         Row(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22) :: Nil)
     }
   }
+
+  test("SPARK-38173: Quoted column cannot be recognized correctly " +
+    "when quotedRegexColumnNames is true") {
+    withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "true") {
+      checkAnswer(
+        sql(
+          """
+            |SELECT `(C3)?+.+`,T.`C1` * `C2` AS CC
+            |FROM (SELECT 3 AS C1,2 AS C2,1 AS C3) T
+            |""".stripMargin),
+        Row(3, 2, 6) :: Nil)
+    }
+  }
 }
 
 case class Foo(bar: Option[String])

From 302cb2257b66642cd3de0f61a700293b8ac7b000 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Wed, 16 Feb 2022 00:48:54 -0800
Subject: [PATCH 247/513] [SPARK-36059][K8S][FOLLOWUP] Support
 `spark.kubernetes.scheduler.name`

### What changes were proposed in this pull request?
Add `spark.kubernetes.scheduler.name` to support specify driver and executor scheduler togerther.

### Why are the changes needed?
Before this patch, we have to specify two configuration for driver and executor:
```
spark.kubernetes.executor.scheduler.name='volcano'
spark.kubernetes.driver.scheduler.name='volcano'
```

After this patch, we can specify executor and driver scheduler name by one configuration
```
spark.kubernetes.scheduler.name='volcano'
```

### Does this PR introduce _any_ user-facing change?
Yes, a configuration added.

### How was this patch tested?
UT

Closes #35499 from Yikun/sheduler_name.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/running-on-kubernetes.md                 | 13 ++++++++--
 .../org/apache/spark/deploy/k8s/Config.scala  |  9 +++++++
 .../spark/deploy/k8s/KubernetesConf.scala     | 10 +++++---
 .../k8s/features/BasicDriverFeatureStep.scala |  2 +-
 .../features/BasicExecutorFeatureStep.scala   |  2 +-
 .../deploy/k8s/KubernetesConfSuite.scala      | 24 ++++++++++++++-----
 .../spark/deploy/k8s/PodBuilderSuite.scala    | 16 +++++++++++++
 .../submit/KubernetesDriverBuilderSuite.scala |  4 ++++
 .../k8s/KubernetesExecutorBuilderSuite.scala  |  4 ++++
 9 files changed, 71 insertions(+), 13 deletions(-)

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 7cb90d8d20ccf..375de57474c79 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1332,7 +1332,7 @@ See the [configuration page](configuration.html) for information on Spark config
   <td>3.3.0</td>
 </tr>
 <tr>
-  <td><code>spark.kubernetes.executor.scheduler.name<code></td>
+  <td><code>spark.kubernetes.executor.scheduler.name</code></td>
   <td>(none)</td>
   <td>
 	Specify the scheduler name for each executor pod.
@@ -1340,13 +1340,22 @@ See the [configuration page](configuration.html) for information on Spark config
   <td>3.0.0</td>
 </tr>
 <tr>
-  <td><code>spark.kubernetes.driver.scheduler.name<code></td>
+  <td><code>spark.kubernetes.driver.scheduler.name</code></td>
   <td>(none)</td>
   <td>
     Specify the scheduler name for driver pod.
   </td>
   <td>3.3.0</td>
 </tr>
+<tr>
+  <td><code>spark.kubernetes.scheduler.name</code></td>
+  <td>(none)</td>
+  <td>
+    Specify the scheduler name for driver and executor pods. If `spark.kubernetes.driver.scheduler.name` or
+    `spark.kubernetes.executor.scheduler.name` is set, will override this.
+  </td>
+  <td>3.3.0</td>
+</tr>
 <tr>
   <td><code>spark.kubernetes.configMap.maxSize</code></td>
   <td><code>1572864</code></td>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index bd6bd93ca3da4..91bbb410dca7f 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -283,6 +283,15 @@ private[spark] object Config extends Logging {
       .stringConf
       .createOptional
 
+  val KUBERNETES_SCHEDULER_NAME =
+    ConfigBuilder("spark.kubernetes.scheduler.name")
+      .doc("Specify the scheduler name for driver and executor pods. If " +
+        s"`${KUBERNETES_DRIVER_SCHEDULER_NAME.key}` or " +
+        s"`${KUBERNETES_EXECUTOR_SCHEDULER_NAME.key}` is set, will override this.")
+      .version("3.3.0")
+      .stringConf
+      .createOptional
+
   val KUBERNETES_EXECUTOR_REQUEST_CORES =
     ConfigBuilder("spark.kubernetes.executor.request.cores")
       .doc("Specify the cpu request for each executor pod")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
index 46086fac02021..118f4e5a61d3f 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
@@ -42,7 +42,7 @@ private[spark] abstract class KubernetesConf(val sparkConf: SparkConf) {
   def secretEnvNamesToKeyRefs: Map[String, String]
   def secretNamesToMountPaths: Map[String, String]
   def volumes: Seq[KubernetesVolumeSpec]
-  def schedulerName: String
+  def schedulerName: Option[String]
   def appId: String
 
   def appName: String = get("spark.app.name", "spark")
@@ -136,7 +136,9 @@ private[spark] class KubernetesDriverConf(
     KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, KUBERNETES_DRIVER_VOLUMES_PREFIX)
   }
 
-  override def schedulerName: String = get(KUBERNETES_DRIVER_SCHEDULER_NAME).getOrElse("")
+  override def schedulerName: Option[String] = {
+    Option(get(KUBERNETES_DRIVER_SCHEDULER_NAME).getOrElse(get(KUBERNETES_SCHEDULER_NAME).orNull))
+  }
 }
 
 private[spark] class KubernetesExecutorConf(
@@ -195,7 +197,9 @@ private[spark] class KubernetesExecutorConf(
     KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, KUBERNETES_EXECUTOR_VOLUMES_PREFIX)
   }
 
-  override def schedulerName: String = get(KUBERNETES_EXECUTOR_SCHEDULER_NAME).getOrElse("")
+  override def schedulerName: Option[String] = {
+    Option(get(KUBERNETES_EXECUTOR_SCHEDULER_NAME).getOrElse(get(KUBERNETES_SCHEDULER_NAME).orNull))
+  }
 
   private def checkExecutorEnvKey(key: String): Boolean = {
     // Pattern for matching an executorEnv key, which meets certain naming rules.
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
index 925f9dc93a26d..f2104d433ad49 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
@@ -152,7 +152,7 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf)
         .endSpec()
       .build()
 
-    conf.get(KUBERNETES_DRIVER_SCHEDULER_NAME)
+    conf.schedulerName
       .foreach(driverPod.getSpec.setSchedulerName)
 
     SparkPod(driverPod, driverContainer)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
index 6a339efcf3f85..c6084720c56fe 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -299,7 +299,7 @@ private[spark] class BasicExecutorFeatureStep(
         .endSpec()
       .build()
     }
-    kubernetesConf.get(KUBERNETES_EXECUTOR_SCHEDULER_NAME)
+    kubernetesConf.schedulerName
       .foreach(executorPod.getSpec.setSchedulerName)
 
     SparkPod(executorPod, containerWithLifecycle)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
index d0a222df40bc1..eecaff262bf66 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
@@ -206,16 +206,28 @@ class KubernetesConfSuite extends SparkFunSuite {
   test("SPARK-36059: Set driver.scheduler and executor.scheduler") {
     val sparkConf = new SparkConf(false)
     val execUnsetConf = KubernetesTestConf.createExecutorConf(sparkConf)
-    val driverUnsetConf = KubernetesTestConf.createExecutorConf(sparkConf)
-    assert(execUnsetConf.schedulerName === "")
-    assert(driverUnsetConf.schedulerName === "")
-
+    val driverUnsetConf = KubernetesTestConf.createDriverConf(sparkConf)
+    assert(execUnsetConf.schedulerName === None)
+    assert(driverUnsetConf.schedulerName === None)
+
+    sparkConf.set(KUBERNETES_SCHEDULER_NAME, "sameScheduler")
+    // Use KUBERNETES_SCHEDULER_NAME when is NOT set
+    assert(KubernetesTestConf.createDriverConf(sparkConf).schedulerName === Some("sameScheduler"))
+    assert(KubernetesTestConf.createExecutorConf(sparkConf).schedulerName === Some("sameScheduler"))
+
+    // Override by driver/executor side scheduler when ""
+    sparkConf.set(KUBERNETES_DRIVER_SCHEDULER_NAME, "")
+    sparkConf.set(KUBERNETES_EXECUTOR_SCHEDULER_NAME, "")
+    assert(KubernetesTestConf.createDriverConf(sparkConf).schedulerName === Some(""))
+    assert(KubernetesTestConf.createExecutorConf(sparkConf).schedulerName === Some(""))
+
+    // Override by driver/executor side scheduler when set
     sparkConf.set(KUBERNETES_DRIVER_SCHEDULER_NAME, "driverScheduler")
     sparkConf.set(KUBERNETES_EXECUTOR_SCHEDULER_NAME, "executorScheduler")
     val execConf = KubernetesTestConf.createExecutorConf(sparkConf)
-    assert(execConf.schedulerName === "executorScheduler")
+    assert(execConf.schedulerName === Some("executorScheduler"))
     val driverConf = KubernetesTestConf.createDriverConf(sparkConf)
-    assert(driverConf.schedulerName === "driverScheduler")
+    assert(driverConf.schedulerName === Some("driverScheduler"))
   }
 
   test("SPARK-37735: access appId in KubernetesConf") {
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
index c076f22c7b141..642c18db541e1 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
@@ -36,6 +36,8 @@ abstract class PodBuilderSuite extends SparkFunSuite {
 
   protected def templateFileConf: ConfigEntry[_]
 
+  protected def roleSpecificSchedulerNameConf: ConfigEntry[_]
+
   protected def userFeatureStepsConf: ConfigEntry[_]
 
   protected def userFeatureStepWithExpectedAnnotation: (String, String)
@@ -53,6 +55,20 @@ abstract class PodBuilderSuite extends SparkFunSuite {
     verify(client, never()).pods()
   }
 
+  test("SPARK-36059: set custom scheduler") {
+    val client = mockKubernetesClient()
+    val conf1 = baseConf.clone().set(templateFileConf.key, "template-file.yaml")
+      .set(Config.KUBERNETES_SCHEDULER_NAME.key, "custom")
+    val pod1 = buildPod(conf1, client)
+    assert(pod1.pod.getSpec.getSchedulerName === "custom")
+
+    val conf2 = baseConf.clone().set(templateFileConf.key, "template-file.yaml")
+      .set(Config.KUBERNETES_SCHEDULER_NAME.key, "custom")
+      .set(roleSpecificSchedulerNameConf.key, "rolescheduler")
+    val pod2 = buildPod(conf2, client)
+    assert(pod2.pod.getSpec.getSchedulerName === "rolescheduler")
+  }
+
   test("load pod template if specified") {
     val client = mockKubernetesClient()
     val sparkConf = baseConf.clone().set(templateFileConf.key, "template-file.yaml")
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala
index 5389a880d1b4a..861b8e0fff943 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala
@@ -34,6 +34,10 @@ class KubernetesDriverBuilderSuite extends PodBuilderSuite {
     Config.KUBERNETES_DRIVER_PODTEMPLATE_FILE
   }
 
+  override protected def roleSpecificSchedulerNameConf: ConfigEntry[_] = {
+    Config.KUBERNETES_DRIVER_SCHEDULER_NAME
+  }
+
   override protected def userFeatureStepsConf: ConfigEntry[_] = {
     Config.KUBERNETES_DRIVER_POD_FEATURE_STEPS
   }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
index adbb5b296c9dc..97f7f4876ec12 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
@@ -33,6 +33,10 @@ class KubernetesExecutorBuilderSuite extends PodBuilderSuite {
     Config.KUBERNETES_EXECUTOR_PODTEMPLATE_FILE
   }
 
+  override protected def roleSpecificSchedulerNameConf: ConfigEntry[_] = {
+    Config.KUBERNETES_EXECUTOR_SCHEDULER_NAME
+  }
+
   override protected def userFeatureStepsConf: ConfigEntry[_] = {
     Config.KUBERNETES_EXECUTOR_POD_FEATURE_STEPS
   }

From c4104086d9a858ea812589c06331cfeb921a9f32 Mon Sep 17 00:00:00 2001
From: Xinyi Yu <xinyi.yu@databricks.com>
Date: Wed, 16 Feb 2022 00:51:13 -0800
Subject: [PATCH 248/513] [SPARK-38226][SQL][TESTS] Fix HiveCompatibilitySuite
 under ANSI mode

### What changes were proposed in this pull request?
Fix sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala under ANSI mode.

### Why are the changes needed?
To set up a new GA job to run tests with ANSI mode before 3.3.0 release.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manual test by enabling ANSI on or off.

Closes #35538 from anchovYu/SPARK-38226-fix-HiveCompatibilitySuite-ansi.

Authored-by: Xinyi Yu <xinyi.yu@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/hive/execution/HiveCompatibilitySuite.scala     | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index 37efb2d1ba49e..bd323dc4b24e1 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -40,6 +40,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
   private val originalInMemoryPartitionPruning = TestHive.conf.inMemoryPartitionPruning
   private val originalCrossJoinEnabled = TestHive.conf.crossJoinEnabled
   private val originalSessionLocalTimeZone = TestHive.conf.sessionLocalTimeZone
+  private val originalAnsiMode = TestHive.conf.getConf(SQLConf.ANSI_ENABLED)
   private val originalCreateHiveTable =
     TestHive.conf.getConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT)
 
@@ -56,6 +57,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, true)
     // Ensures that cross joins are enabled so that we can test them
     TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, true)
+    // Hive doesn't follow ANSI Standard.
+    TestHive.setConf(SQLConf.ANSI_ENABLED, false)
     // Ensures that the table insertion behavior is consistent with Hive
     TestHive.setConf(SQLConf.STORE_ASSIGNMENT_POLICY, StoreAssignmentPolicy.LEGACY.toString)
     // Fix session local timezone to America/Los_Angeles for those timezone sensitive tests
@@ -72,6 +75,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
       TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning)
       TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled)
       TestHive.setConf(SQLConf.SESSION_LOCAL_TIMEZONE, originalSessionLocalTimeZone)
+      TestHive.setConf(SQLConf.ANSI_ENABLED, originalAnsiMode)
       TestHive.setConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT, originalCreateHiveTable)
 
       // For debugging dump some statistics about how much time was spent in various optimizer rules

From 598461498192ab71233c03183d47b609bf375f9a Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Wed, 16 Feb 2022 01:00:24 -0800
Subject: [PATCH 249/513] [SPARK-36061][K8S] Add `volcano` module and feature
 step

### What changes were proposed in this pull request?
This patch added volcano feature step to help user integrate spark with Volcano Scheduler.
- Add a VolcanoFeatureStep, it can be used in driver and executor side.

After this patch, users can enable this featurestep by submiting job by using
```shell
--conf spark.kubernete.driver.scheduler.name=volcano \
--conf spark.kubernetes.driver.pod.featureSteps=org.apache.spark.deploy.k8s.features.scheduler.VolcanoFeatureStep
```

A PodGroup will be created before driver started, annotations will be set to driver pod to added driver pod to this pod group. Then, Volcano scheduler will help driver pod scheduling instead of deafult kubernetes scheduler.

### Why are the changes needed?

This PR help user integrate Spark with Volcano Scheduler.

See also: [SPARK-36057](https://issues.apache.org/jira/browse/SPARK-36057)

### Does this PR introduce _any_ user-facing change?
Yes, introduced a user feature step.

These are used by `VolcanoFeatureStep`, and also will be used by `YunikornFeatureStep` in future.

### How was this patch tested?
- UT
- Integration test: Test without -Pvolcano (make sure exsiting integration test passed)
```bash
# 1. Test without -Pvolcano (make sure exsiting integration test passed)
# SBT
build/sbt -Pkubernetes -Pkubernetes-integration-tests -Dtest.exclude.tags=minikube,r "kubernetes-integration-tests/test"
# Maven
resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh --exclude-tags minikube,r
```
- Integration test: Test all VolcanoSuite (all kubernetes test with volcano + a new podgroup test) and KubernetesSuite
```bash
# Deploy Volcano (x86)
kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/master/installer/volcano-development.yaml
# Deploy Volcano (arm64)
kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/master/installer/volcano-development-arm64.yaml
# Test all VolcanoSuite (all kubernetes test with volcano + a new podgroup test) and KubernetesSuite
build/sbt -Pvolcano -Pkubernetes -Pkubernetes-integration-tests -Dtest.exclude.tags=minikube,r "kubernetes-integration-tests/test"
```

Closes #35422 from Yikun/SPARK-36061-vc-step.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .github/workflows/build_and_test.yml          |  4 +-
 dev/scalastyle                                |  2 +-
 project/SparkBuild.scala                      | 12 ++++
 resource-managers/kubernetes/core/pom.xml     | 30 ++++++++
 .../k8s/features/VolcanoFeatureStep.scala     | 60 ++++++++++++++++
 .../features/VolcanoFeatureStepSuite.scala    | 49 +++++++++++++
 .../kubernetes/integration-tests/pom.xml      | 33 +++++++++
 .../k8s/integrationtest/KubernetesSuite.scala |  6 +-
 .../k8s/integrationtest/VolcanoSuite.scala    | 33 +++++++++
 .../integrationtest/VolcanoTestsSuite.scala   | 70 +++++++++++++++++++
 10 files changed, 295 insertions(+), 4 deletions(-)
 create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
 create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoSuite.scala
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index ae35f50d1d2a8..060adc487606e 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -614,7 +614,7 @@ jobs:
         export MAVEN_CLI_OPTS="--no-transfer-progress"
         export JAVA_VERSION=${{ matrix.java }}
         # It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414.
-        ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=${JAVA_VERSION/-ea} install
+        ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=${JAVA_VERSION/-ea} install
         rm -rf ~/.m2/repository/org/apache/spark
 
   scala-213:
@@ -660,7 +660,7 @@ jobs:
     - name: Build with SBT
       run: |
         ./dev/change-scala-version.sh 2.13
-        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile
+        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile
 
   tpcds-1g:
     needs: [configure-jobs, precondition]
diff --git a/dev/scalastyle b/dev/scalastyle
index 212ef900eb9b4..5f958b8fb0a7b 100755
--- a/dev/scalastyle
+++ b/dev/scalastyle
@@ -17,7 +17,7 @@
 # limitations under the License.
 #
 
-SPARK_PROFILES=${1:-"-Pmesos -Pkubernetes -Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive"}
+SPARK_PROFILES=${1:-"-Pmesos -Pkubernetes -Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive -Pvolcano"}
 
 # NOTE: echo "q" is needed because SBT prompts the user for input on encountering a build file
 # with failure (either resolution or compilation); the "q" makes SBT quit.
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 3d3a65f3d2333..1b8b258af2776 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -421,6 +421,11 @@ object SparkBuild extends PomBuild {
   // SPARK-14738 - Remove docker tests from main Spark build
   // enable(DockerIntegrationTests.settings)(dockerIntegrationTests)
 
+  if (!profiles.contains("volcano")) {
+    enable(Volcano.settings)(kubernetes)
+    enable(Volcano.settings)(kubernetesIntegrationTests)
+  }
+
   enable(KubernetesIntegrationTests.settings)(kubernetesIntegrationTests)
 
   enable(YARN.settings)(yarn)
@@ -956,6 +961,13 @@ object SparkR {
   )
 }
 
+object Volcano {
+  // Exclude all volcano file for Compile and Test
+  lazy val settings = Seq(
+    unmanagedSources / excludeFilter := HiddenFileFilter || "*Volcano*.scala"
+  )
+}
+
 object Unidoc {
 
   import BuildCommons._
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 0cb5e115906a5..6eb357ef2490c 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -29,8 +29,25 @@
   <name>Spark Project Kubernetes</name>
   <properties>
     <sbt.project.name>kubernetes</sbt.project.name>
+    <volcano.exclude>**/*Volcano*.scala</volcano.exclude>
   </properties>
 
+  <profiles>
+    <profile>
+      <id>volcano</id>
+      <properties>
+        <volcano.exclude></volcano.exclude>
+      </properties>
+      <dependencies>
+        <dependency>
+          <groupId>io.fabric8</groupId>
+          <artifactId>volcano-model-v1beta1</artifactId>
+          <version>${kubernetes-client.version}</version>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
@@ -103,6 +120,19 @@
 
 
   <build>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>net.alchim31.maven</groupId>
+          <artifactId>scala-maven-plugin</artifactId>
+          <configuration>
+            <excludes>
+              <exclude>${volcano.exclude}</exclude>
+            </excludes>
+          </configuration>
+        </plugin>
+      </plugins>
+    </pluginManagement>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
   </build>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
new file mode 100644
index 0000000000000..1c936848db67f
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.features
+
+import io.fabric8.kubernetes.api.model._
+import io.fabric8.volcano.scheduling.v1beta1.PodGroupBuilder
+
+import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverConf, KubernetesExecutorConf, SparkPod}
+
+private[spark] class VolcanoFeatureStep extends KubernetesDriverCustomFeatureConfigStep
+  with KubernetesExecutorCustomFeatureConfigStep {
+
+  private var kubernetesConf: KubernetesConf = _
+
+  private val POD_GROUP_ANNOTATION = "scheduling.k8s.io/group-name"
+
+  private lazy val podGroupName = s"${kubernetesConf.appId}-podgroup"
+  private lazy val namespace = kubernetesConf.namespace
+
+  override def init(config: KubernetesDriverConf): Unit = {
+    kubernetesConf = config
+  }
+
+  override def init(config: KubernetesExecutorConf): Unit = {
+    kubernetesConf = config
+  }
+
+  override def getAdditionalPreKubernetesResources(): Seq[HasMetadata] = {
+    val podGroup = new PodGroupBuilder()
+      .editOrNewMetadata()
+        .withName(podGroupName)
+        .withNamespace(namespace)
+      .endMetadata()
+      .build()
+    Seq(podGroup)
+  }
+
+  override def configurePod(pod: SparkPod): SparkPod = {
+    val k8sPodBuilder = new PodBuilder(pod.pod)
+      .editMetadata()
+        .addToAnnotations(POD_GROUP_ANNOTATION, podGroupName)
+      .endMetadata()
+    val k8sPod = k8sPodBuilder.build()
+    SparkPod(k8sPod, pod.container)
+  }
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
new file mode 100644
index 0000000000000..cf337f99cab97
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.features
+
+import io.fabric8.volcano.scheduling.v1beta1.PodGroup
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.k8s._
+
+class VolcanoFeatureStepSuite extends SparkFunSuite {
+
+  test("SPARK-36061: Driver Pod with Volcano PodGroup") {
+    val sparkConf = new SparkConf()
+    val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf)
+    val step = new VolcanoFeatureStep()
+    step.init(kubernetesConf)
+    val configuredPod = step.configurePod(SparkPod.initialPod())
+
+    val annotations = configuredPod.pod.getMetadata.getAnnotations
+
+    assert(annotations.get("scheduling.k8s.io/group-name") === s"${kubernetesConf.appId}-podgroup")
+    val podGroup = step.getAdditionalPreKubernetesResources().head.asInstanceOf[PodGroup]
+    assert(podGroup.getMetadata.getName === s"${kubernetesConf.appId}-podgroup")
+  }
+
+  test("SPARK-36061: Executor Pod with Volcano PodGroup") {
+    val sparkConf = new SparkConf()
+    val kubernetesConf = KubernetesTestConf.createExecutorConf(sparkConf)
+    val step = new VolcanoFeatureStep()
+    step.init(kubernetesConf)
+    val configuredPod = step.configurePod(SparkPod.initialPod())
+    val annotations = configuredPod.pod.getMetadata.getAnnotations
+    assert(annotations.get("scheduling.k8s.io/group-name") === s"${kubernetesConf.appId}-podgroup")
+  }
+}
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index a44cedb9e1e25..0bc8508cbf86c 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -47,6 +47,7 @@
 
     <test.exclude.tags></test.exclude.tags>
     <test.include.tags></test.include.tags>
+    <volcano.exclude>**/*Volcano*.scala</volcano.exclude>
   </properties>
   <packaging>jar</packaging>
   <name>Spark Project Kubernetes Integration Tests</name>
@@ -74,9 +75,28 @@
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
       <type>test-jar</type>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-kubernetes_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
   <build>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>net.alchim31.maven</groupId>
+          <artifactId>scala-maven-plugin</artifactId>
+          <configuration>
+            <excludes>
+              <exclude>${volcano.exclude}</exclude>
+            </excludes>
+          </configuration>
+        </plugin>
+      </plugins>
+    </pluginManagement>
     <plugins>
       <plugin>
         <groupId>org.codehaus.mojo</groupId>
@@ -209,5 +229,18 @@
         </dependency>
       </dependencies>
     </profile>
+    <profile>
+      <id>volcano</id>
+      <properties>
+        <volcano.exclude></volcano.exclude>
+      </properties>
+      <dependencies>
+        <dependency>
+          <groupId>io.fabric8</groupId>
+          <artifactId>volcano-client</artifactId>
+          <version>${kubernetes-client.version}</version>
+        </dependency>
+      </dependencies>
+    </profile>
   </profiles>
 </project>
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index c1237e3eb9df4..69b736951301e 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -181,7 +181,7 @@ class KubernetesSuite extends SparkFunSuite
     }
   }
 
-  before {
+  protected def setUpTest(): Unit = {
     appLocator = UUID.randomUUID().toString.replaceAll("-", "")
     driverPodName = "spark-test-app-" + UUID.randomUUID().toString.replaceAll("-", "")
     sparkAppConf = kubernetesTestComponents.newSparkAppConf()
@@ -195,6 +195,10 @@ class KubernetesSuite extends SparkFunSuite
     }
   }
 
+  before {
+    setUpTest()
+  }
+
   after {
     if (!kubernetesTestComponents.hasUserSpecifiedNamespace) {
       kubernetesTestComponents.deleteNamespace()
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoSuite.scala
new file mode 100644
index 0000000000000..ed7371718f9a5
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoSuite.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.integrationtest
+
+import org.scalatest.Tag
+
+class VolcanoSuite extends KubernetesSuite with VolcanoTestsSuite {
+
+  override protected def setUpTest(): Unit = {
+    super.setUpTest()
+    sparkAppConf
+      .set("spark.kubernetes.driver.scheduler.name", "volcano")
+      .set("spark.kubernetes.executor.scheduler.name", "volcano")
+  }
+}
+
+private[spark] object VolcanoSuite {
+  val volcanoTag = Tag("volcano")
+}
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
new file mode 100644
index 0000000000000..377a1b8167984
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.integrationtest
+
+import io.fabric8.kubernetes.api.model.Pod
+import io.fabric8.volcano.client.VolcanoClient
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.deploy.k8s.features.VolcanoFeatureStep
+import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.k8sTestTag
+import org.apache.spark.deploy.k8s.integrationtest.VolcanoSuite.volcanoTag
+
+private[spark] trait VolcanoTestsSuite { k8sSuite: KubernetesSuite =>
+  import VolcanoTestsSuite._
+
+  protected def checkScheduler(pod: Pod): Unit = {
+    assert(pod.getSpec.getSchedulerName === "volcano")
+  }
+
+  protected def checkAnnotaion(pod: Pod): Unit = {
+    val appId = pod.getMetadata.getLabels.get("spark-app-selector")
+    val annotations = pod.getMetadata.getAnnotations
+    assert(annotations.get("scheduling.k8s.io/group-name") === s"$appId-podgroup")
+  }
+
+  protected def checkPodGroup(pod: Pod): Unit = {
+    val appId = pod.getMetadata.getLabels.get("spark-app-selector")
+    val podGroupName = s"$appId-podgroup"
+    val volcanoClient = kubernetesTestComponents.kubernetesClient.adapt(classOf[VolcanoClient])
+    val podGroup = volcanoClient.podGroups().withName(podGroupName).get()
+    assert(podGroup.getMetadata.getOwnerReferences.get(0).getName === pod.getMetadata.getName)
+  }
+
+  test("Run SparkPi with volcano scheduler", k8sTestTag, volcanoTag) {
+    sparkAppConf
+      .set("spark.kubernetes.driver.pod.featureSteps", VOLCANO_FEATURE_STEP)
+      .set("spark.kubernetes.executor.pod.featureSteps", VOLCANO_FEATURE_STEP)
+    runSparkPiAndVerifyCompletion(
+      driverPodChecker = (driverPod: Pod) => {
+        doBasicDriverPodCheck(driverPod)
+        checkScheduler(driverPod)
+        checkAnnotaion(driverPod)
+        checkPodGroup(driverPod)
+      },
+      executorPodChecker = (executorPod: Pod) => {
+        doBasicExecutorPodCheck(executorPod)
+        checkScheduler(executorPod)
+        checkAnnotaion(executorPod)
+      }
+    )
+  }
+}
+
+private[spark] object VolcanoTestsSuite extends SparkFunSuite {
+  val VOLCANO_FEATURE_STEP = classOf[VolcanoFeatureStep].getName
+}

From 70f5bfd665b449fb3d7223c81fbd5a53d7985b9d Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Wed, 16 Feb 2022 08:15:48 -0800
Subject: [PATCH 250/513] [SPARK-38231][BUILD] Upgrade commons-text to 1.9

### What changes were proposed in this pull request?
This PR aims to upgrade commons-text to 1.9.

### Why are the changes needed?
1.9 is the latest and popular than 1.6.

- https://commons.apache.org/proper/commons-text/changes-report.html#a1.9
- https://mvnrepository.com/artifact/org.apache.commons/commons-text

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35542 from LuciferYang/upgrade-common-text.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +-
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +-
 pom.xml                               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index 26c5439f7d612..b4fd14b30a4dd 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -53,7 +53,7 @@ commons-logging/1.1.3//commons-logging-1.1.3.jar
 commons-math3/3.6.1//commons-math3-3.6.1.jar
 commons-net/3.1//commons-net-3.1.jar
 commons-pool/1.5.4//commons-pool-1.5.4.jar
-commons-text/1.6//commons-text-1.6.jar
+commons-text/1.9//commons-text-1.9.jar
 compress-lzf/1.0.3//compress-lzf-1.0.3.jar
 core/1.1.2//core-1.1.2.jar
 curator-client/2.7.1//curator-client-2.7.1.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index dd95710016340..96bd2663df60a 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -51,7 +51,7 @@ commons-logging/1.1.3//commons-logging-1.1.3.jar
 commons-math3/3.6.1//commons-math3-3.6.1.jar
 commons-net/3.1//commons-net-3.1.jar
 commons-pool/1.5.4//commons-pool-1.5.4.jar
-commons-text/1.6//commons-text-1.6.jar
+commons-text/1.9//commons-text-1.9.jar
 compress-lzf/1.0.3//compress-lzf-1.0.3.jar
 core/1.1.2//core-1.1.2.jar
 cos_api-bundle/5.6.19//cos_api-bundle-5.6.19.jar
diff --git a/pom.xml b/pom.xml
index b0791f7300ab8..7165cb5229821 100644
--- a/pom.xml
+++ b/pom.xml
@@ -584,7 +584,7 @@
       <dependency>
         <groupId>org.apache.commons</groupId>
         <artifactId>commons-text</artifactId>
-        <version>1.6</version>
+        <version>1.9</version>
       </dependency>
       <dependency>
         <groupId>commons-lang</groupId>

From bbfd058b551935a9916410e820fef18ca67c95c7 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Thu, 17 Feb 2022 00:50:13 +0800
Subject: [PATCH 251/513] [SPARK-38228][SQL] Legacy store assignment should not
 fail on error under ANSI mode

### What changes were proposed in this pull request?

When using legacy store assignment for table insertion, Spark SQL force using the default cast which doesn't fail on error.

### Why are the changes needed?

As the ANSI store assignment policy force using ANSI cast, the legacy store assignment should force using the default cast which doesn't fail on error.
See discussions in https://github.com/apache/spark/pull/35511#discussion_r807529783 as well.

### Does this PR introduce _any_ user-facing change?

Yes, when using legacy store assignment for table insertion, Spark SQL force using the default cast which doesn't fail on error.

### How was this patch tested?

Unit test

Closes #35539 from gengliangwang/legacyStoreAssignment.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../analysis/TableOutputResolver.scala        |  3 +++
 .../datasources/DataSourceStrategy.scala      |  7 ++++--
 .../apache/spark/sql/SQLInsertTestSuite.scala | 23 +++++++++++++++----
 3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
index d471d754e7f8b..2cd069e5858da 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
@@ -236,6 +236,9 @@ object TableOutputResolver {
       val casted = storeAssignmentPolicy match {
         case StoreAssignmentPolicy.ANSI =>
           AnsiCast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone))
+        case StoreAssignmentPolicy.LEGACY =>
+          Cast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone),
+            ansiEnabled = false)
         case _ =>
           Cast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone))
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 5dce3f29deef0..a1602a3aa4880 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -61,7 +61,7 @@ import org.apache.spark.unsafe.types.UTF8String
  * Note that, this rule must be run after `PreprocessTableCreation` and
  * `PreprocessTableInsertion`.
  */
-object DataSourceAnalysis extends Rule[LogicalPlan] with CastSupport {
+object DataSourceAnalysis extends Rule[LogicalPlan] {
 
   def resolver: Resolver = conf.resolver
 
@@ -115,7 +115,10 @@ object DataSourceAnalysis extends Rule[LogicalPlan] with CastSupport {
             Some(Alias(AnsiCast(Literal(partValue), field.dataType,
               Option(conf.sessionLocalTimeZone)), field.name)())
           case _ =>
-            Some(Alias(cast(Literal(partValue), field.dataType), field.name)())
+            val castExpression =
+              Cast(Literal(partValue), field.dataType, Option(conf.sessionLocalTimeZone),
+                ansiEnabled = false)
+            Some(Alias(castExpression, field.name)())
         }
       } else {
         throw QueryCompilationErrors.multiplePartitionColumnValuesSpecifiedError(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
index a3f602353a096..fad01db82ca0e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
@@ -291,13 +291,11 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils {
       case SQLConf.StoreAssignmentPolicy.ANSI | SQLConf.StoreAssignmentPolicy.STRICT =>
         true
       case SQLConf.StoreAssignmentPolicy.LEGACY =>
-        SQLConf.get.ansiEnabled
+        false
     }
 
     testingPolicies.foreach { policy =>
-      withSQLConf(
-        SQLConf.STORE_ASSIGNMENT_POLICY.key -> policy.toString,
-        SQLConf.ANSI_ENABLED.key -> "false") {
+      withSQLConf(SQLConf.STORE_ASSIGNMENT_POLICY.key -> policy.toString) {
         withTable("t") {
           sql("create table t(a int, b string) using parquet partitioned by (a)")
           if (shouldThrowException(policy)) {
@@ -313,6 +311,23 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils {
       }
     }
   }
+
+  test("SPARK-38228: legacy store assignment should not fail on error under ANSI mode") {
+    // DS v2 doesn't support the legacy policy
+    if (format != "foo") {
+      Seq(true, false).foreach { ansiEnabled =>
+        withSQLConf(
+          SQLConf.STORE_ASSIGNMENT_POLICY.key -> SQLConf.StoreAssignmentPolicy.LEGACY.toString,
+          SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) {
+          withTable("t") {
+            sql("create table t(a int) using parquet")
+            sql("insert into t values('ansi')")
+            checkAnswer(spark.table("t"), Row(null))
+          }
+        }
+      }
+    }
+  }
 }
 
 class FileSourceSQLInsertTestSuite extends SQLInsertTestSuite with SharedSparkSession {

From 4342b630f86dcfc6760d91558dfae7036363130c Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Thu, 17 Feb 2022 01:48:11 +0100
Subject: [PATCH 252/513] [SPARK-37411][PYTHON][ML] Inline hints for
 pyspark.ml.regression

### What changes were proposed in this pull request?

This PR migrates type `pyspark.ml.regression` annotations from stub file to inline type hints.

### Why are the changes needed?

Part of ongoing migration of type hints.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #35427 from zero323/SPARK-37411.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/ml/regression.py  | 1180 ++++++++++++++++--------------
 python/pyspark/ml/regression.pyi |  827 ---------------------
 2 files changed, 633 insertions(+), 1374 deletions(-)
 delete mode 100644 python/pyspark/ml/regression.pyi

diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 0faca85354f58..f6190c46c38f1 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -17,6 +17,8 @@
 
 import sys
 
+from typing import Any, Dict, Generic, List, Optional, TypeVar, TYPE_CHECKING
+
 from abc import ABCMeta
 
 from pyspark import keyword_only, since
@@ -52,6 +54,8 @@
     _GBTParams,
     _TreeRegressorParams,
 )
+from pyspark.ml.base import Transformer
+from pyspark.ml.linalg import Vector, Matrix
 from pyspark.ml.util import (
     JavaMLWritable,
     JavaMLReadable,
@@ -63,11 +67,19 @@
     JavaModel,
     JavaPredictor,
     JavaPredictionModel,
+    JavaTransformer,
     JavaWrapper,
 )
 from pyspark.ml.common import inherit_doc
 from pyspark.sql import DataFrame
 
+if TYPE_CHECKING:
+    from py4j.java_gateway import JavaObject  # type: ignore[import]
+
+T = TypeVar("T")
+M = TypeVar("M", bound=Transformer)
+JM = TypeVar("JM", bound=JavaTransformer)
+
 
 __all__ = [
     "AFTSurvivalRegression",
@@ -93,7 +105,7 @@
 ]
 
 
-class Regressor(Predictor, _PredictorParams, metaclass=ABCMeta):
+class Regressor(Predictor[M], _PredictorParams, Generic[M], metaclass=ABCMeta):
     """
     Regressor for regression tasks.
 
@@ -103,7 +115,7 @@ class Regressor(Predictor, _PredictorParams, metaclass=ABCMeta):
     pass
 
 
-class RegressionModel(PredictionModel, _PredictorParams, metaclass=ABCMeta):
+class RegressionModel(PredictionModel[T], _PredictorParams, metaclass=ABCMeta):
     """
     Model produced by a ``Regressor``.
 
@@ -113,7 +125,7 @@ class RegressionModel(PredictionModel, _PredictorParams, metaclass=ABCMeta):
     pass
 
 
-class _JavaRegressor(Regressor, JavaPredictor, metaclass=ABCMeta):
+class _JavaRegressor(Regressor, JavaPredictor[JM], Generic[JM], metaclass=ABCMeta):
     """
     Java Regressor for regression tasks.
 
@@ -123,7 +135,7 @@ class _JavaRegressor(Regressor, JavaPredictor, metaclass=ABCMeta):
     pass
 
 
-class _JavaRegressionModel(RegressionModel, JavaPredictionModel, metaclass=ABCMeta):
+class _JavaRegressionModel(RegressionModel, JavaPredictionModel[T], metaclass=ABCMeta):
     """
     Java Model produced by a ``_JavaRegressor``.
     To be mixed in with :class:`pyspark.ml.JavaModel`
@@ -154,21 +166,21 @@ class _LinearRegressionParams(
     .. versionadded:: 3.0.0
     """
 
-    solver = Param(
+    solver: Param[str] = Param(
         Params._dummy(),
         "solver",
         "The solver algorithm for optimization. Supported " + "options: auto, normal, l-bfgs.",
         typeConverter=TypeConverters.toString,
     )
 
-    loss = Param(
+    loss: Param[str] = Param(
         Params._dummy(),
         "loss",
         "The loss function to be optimized. Supported " + "options: squaredError, huber.",
         typeConverter=TypeConverters.toString,
     )
 
-    epsilon = Param(
+    epsilon: Param[float] = Param(
         Params._dummy(),
         "epsilon",
         "The shape parameter to control the amount of "
@@ -176,7 +188,7 @@ class _LinearRegressionParams(
         typeConverter=TypeConverters.toFloat,
     )
 
-    def __init__(self, *args):
+    def __init__(self, *args: Any):
         super(_LinearRegressionParams, self).__init__(*args)
         self._setDefault(
             maxIter=100,
@@ -188,7 +200,7 @@ def __init__(self, *args):
         )
 
     @since("2.3.0")
-    def getEpsilon(self):
+    def getEpsilon(self) -> float:
         """
         Gets the value of epsilon or its default value.
         """
@@ -196,7 +208,12 @@ def getEpsilon(self):
 
 
 @inherit_doc
-class LinearRegression(_JavaRegressor, _LinearRegressionParams, JavaMLWritable, JavaMLReadable):
+class LinearRegression(
+    _JavaRegressor["LinearRegressionModel"],
+    _LinearRegressionParams,
+    JavaMLWritable,
+    JavaMLReadable["LinearRegression"],
+):
     """
     Linear regression.
 
@@ -279,25 +296,27 @@ class LinearRegression(_JavaRegressor, _LinearRegressionParams, JavaMLWritable,
     >>> model.write().format("pmml").save(model_path + "_2")
     """
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        featuresCol="features",
-        labelCol="label",
-        predictionCol="prediction",
-        maxIter=100,
-        regParam=0.0,
-        elasticNetParam=0.0,
-        tol=1e-6,
-        fitIntercept=True,
-        standardization=True,
-        solver="auto",
-        weightCol=None,
-        aggregationDepth=2,
-        loss="squaredError",
-        epsilon=1.35,
-        maxBlockSizeInMB=0.0,
+        featuresCol: str = "features",
+        labelCol: str = "label",
+        predictionCol: str = "prediction",
+        maxIter: int = 100,
+        regParam: float = 0.0,
+        elasticNetParam: float = 0.0,
+        tol: float = 1e-6,
+        fitIntercept: bool = True,
+        standardization: bool = True,
+        solver: str = "auto",
+        weightCol: Optional[str] = None,
+        aggregationDepth: int = 2,
+        loss: str = "squaredError",
+        epsilon: float = 1.35,
+        maxBlockSizeInMB: float = 0.0,
     ):
         """
         __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
@@ -317,22 +336,22 @@ def __init__(
     def setParams(
         self,
         *,
-        featuresCol="features",
-        labelCol="label",
-        predictionCol="prediction",
-        maxIter=100,
-        regParam=0.0,
-        elasticNetParam=0.0,
-        tol=1e-6,
-        fitIntercept=True,
-        standardization=True,
-        solver="auto",
-        weightCol=None,
-        aggregationDepth=2,
-        loss="squaredError",
-        epsilon=1.35,
-        maxBlockSizeInMB=0.0,
-    ):
+        featuresCol: str = "features",
+        labelCol: str = "label",
+        predictionCol: str = "prediction",
+        maxIter: int = 100,
+        regParam: float = 0.0,
+        elasticNetParam: float = 0.0,
+        tol: float = 1e-6,
+        fitIntercept: bool = True,
+        standardization: bool = True,
+        solver: str = "auto",
+        weightCol: Optional[str] = None,
+        aggregationDepth: int = 2,
+        loss: str = "squaredError",
+        epsilon: float = 1.35,
+        maxBlockSizeInMB: float = 0.0,
+    ) -> "LinearRegression":
         """
         setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
@@ -343,78 +362,78 @@ def setParams(
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
-    def _create_model(self, java_model):
+    def _create_model(self, java_model: "JavaObject") -> "LinearRegressionModel":
         return LinearRegressionModel(java_model)
 
     @since("2.3.0")
-    def setEpsilon(self, value):
+    def setEpsilon(self, value: float) -> "LinearRegression":
         """
         Sets the value of :py:attr:`epsilon`.
         """
         return self._set(epsilon=value)
 
-    def setMaxIter(self, value):
+    def setMaxIter(self, value: int) -> "LinearRegression":
         """
         Sets the value of :py:attr:`maxIter`.
         """
         return self._set(maxIter=value)
 
-    def setRegParam(self, value):
+    def setRegParam(self, value: float) -> "LinearRegression":
         """
         Sets the value of :py:attr:`regParam`.
         """
         return self._set(regParam=value)
 
-    def setTol(self, value):
+    def setTol(self, value: float) -> "LinearRegression":
         """
         Sets the value of :py:attr:`tol`.
         """
         return self._set(tol=value)
 
-    def setElasticNetParam(self, value):
+    def setElasticNetParam(self, value: float) -> "LinearRegression":
         """
         Sets the value of :py:attr:`elasticNetParam`.
         """
         return self._set(elasticNetParam=value)
 
-    def setFitIntercept(self, value):
+    def setFitIntercept(self, value: bool) -> "LinearRegression":
         """
         Sets the value of :py:attr:`fitIntercept`.
         """
         return self._set(fitIntercept=value)
 
-    def setStandardization(self, value):
+    def setStandardization(self, value: bool) -> "LinearRegression":
         """
         Sets the value of :py:attr:`standardization`.
         """
         return self._set(standardization=value)
 
-    def setWeightCol(self, value):
+    def setWeightCol(self, value: str) -> "LinearRegression":
         """
         Sets the value of :py:attr:`weightCol`.
         """
         return self._set(weightCol=value)
 
-    def setSolver(self, value):
+    def setSolver(self, value: str) -> "LinearRegression":
         """
         Sets the value of :py:attr:`solver`.
         """
         return self._set(solver=value)
 
-    def setAggregationDepth(self, value):
+    def setAggregationDepth(self, value: int) -> "LinearRegression":
         """
         Sets the value of :py:attr:`aggregationDepth`.
         """
         return self._set(aggregationDepth=value)
 
-    def setLoss(self, value):
+    def setLoss(self, value: str) -> "LinearRegression":
         """
         Sets the value of :py:attr:`loss`.
         """
         return self._set(lossType=value)
 
     @since("3.1.0")
-    def setMaxBlockSizeInMB(self, value):
+    def setMaxBlockSizeInMB(self, value: float) -> "LinearRegression":
         """
         Sets the value of :py:attr:`maxBlockSizeInMB`.
         """
@@ -425,8 +444,8 @@ class LinearRegressionModel(
     _JavaRegressionModel,
     _LinearRegressionParams,
     GeneralJavaMLWritable,
-    JavaMLReadable,
-    HasTrainingSummary,
+    JavaMLReadable["LinearRegressionModel"],
+    HasTrainingSummary["LinearRegressionSummary"],
 ):
     """
     Model fitted by :class:`LinearRegression`.
@@ -434,33 +453,33 @@ class LinearRegressionModel(
     .. versionadded:: 1.4.0
     """
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def coefficients(self):
+    def coefficients(self) -> Vector:
         """
         Model coefficients.
         """
         return self._call_java("coefficients")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def intercept(self):
+    def intercept(self) -> float:
         """
         Model intercept.
         """
         return self._call_java("intercept")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.3.0")
-    def scale(self):
+    def scale(self) -> float:
         r"""
         The value by which :math:`\|y - X'w\|` is scaled down when loss is "huber", otherwise 1.0.
         """
         return self._call_java("scale")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def summary(self):
+    def summary(self) -> "LinearRegressionTrainingSummary":
         """
         Gets summary (residuals, MSE, r-squared ) of model on
         training set. An exception is thrown if
@@ -473,7 +492,7 @@ def summary(self):
                 "No training summary available for this %s" % self.__class__.__name__
             )
 
-    def evaluate(self, dataset):
+    def evaluate(self, dataset: DataFrame) -> "LinearRegressionSummary":
         """
         Evaluates the model on a test dataset.
 
@@ -498,44 +517,44 @@ class LinearRegressionSummary(JavaWrapper):
     .. versionadded:: 2.0.0
     """
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def predictions(self):
+    def predictions(self) -> DataFrame:
         """
         Dataframe outputted by the model's `transform` method.
         """
         return self._call_java("predictions")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def predictionCol(self):
+    def predictionCol(self) -> str:
         """
         Field in "predictions" which gives the predicted value of
         the label at each instance.
         """
         return self._call_java("predictionCol")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def labelCol(self):
+    def labelCol(self) -> str:
         """
         Field in "predictions" which gives the true label of each
         instance.
         """
         return self._call_java("labelCol")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def featuresCol(self):
+    def featuresCol(self) -> str:
         """
         Field in "predictions" which gives the features of each instance
         as a vector.
         """
         return self._call_java("featuresCol")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def explainedVariance(self):
+    def explainedVariance(self) -> float:
         r"""
         Returns the explained variance regression score.
         explainedVariance = :math:`1 - \frac{variance(y - \hat{y})}{variance(y)}`
@@ -552,9 +571,9 @@ def explainedVariance(self):
         """
         return self._call_java("explainedVariance")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def meanAbsoluteError(self):
+    def meanAbsoluteError(self) -> float:
         """
         Returns the mean absolute error, which is a risk function
         corresponding to the expected value of the absolute error
@@ -568,9 +587,9 @@ def meanAbsoluteError(self):
         """
         return self._call_java("meanAbsoluteError")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def meanSquaredError(self):
+    def meanSquaredError(self) -> float:
         """
         Returns the mean squared error, which is a risk function
         corresponding to the expected value of the squared error
@@ -584,9 +603,9 @@ def meanSquaredError(self):
         """
         return self._call_java("meanSquaredError")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def rootMeanSquaredError(self):
+    def rootMeanSquaredError(self) -> float:
         """
         Returns the root mean squared error, which is defined as the
         square root of the mean squared error.
@@ -599,9 +618,9 @@ def rootMeanSquaredError(self):
         """
         return self._call_java("rootMeanSquaredError")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def r2(self):
+    def r2(self) -> float:
         """
         Returns R^2, the coefficient of determination.
 
@@ -616,9 +635,9 @@ def r2(self):
         """
         return self._call_java("r2")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.4.0")
-    def r2adj(self):
+    def r2adj(self) -> float:
         """
         Returns Adjusted R^2, the adjusted coefficient of determination.
 
@@ -632,41 +651,41 @@ def r2adj(self):
         """
         return self._call_java("r2adj")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def residuals(self):
+    def residuals(self) -> DataFrame:
         """
         Residuals (label - predicted value)
         """
         return self._call_java("residuals")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def numInstances(self):
+    def numInstances(self) -> int:
         """
         Number of instances in DataFrame predictions
         """
         return self._call_java("numInstances")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.2.0")
-    def degreesOfFreedom(self):
+    def degreesOfFreedom(self) -> int:
         """
         Degrees of freedom.
         """
         return self._call_java("degreesOfFreedom")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def devianceResiduals(self):
+    def devianceResiduals(self) -> List[float]:
         """
         The weighted residuals, the usual residuals rescaled by the
         square root of the instance weights.
         """
         return self._call_java("devianceResiduals")
 
-    @property
-    def coefficientStandardErrors(self):
+    @property  # type: ignore[misc]
+    def coefficientStandardErrors(self) -> List[float]:
         """
         Standard error of estimated coefficients and intercept.
         This value is only available when using the "normal" solver.
@@ -682,8 +701,8 @@ def coefficientStandardErrors(self):
         """
         return self._call_java("coefficientStandardErrors")
 
-    @property
-    def tValues(self):
+    @property  # type: ignore[misc]
+    def tValues(self) -> List[float]:
         """
         T-statistic of estimated coefficients and intercept.
         This value is only available when using the "normal" solver.
@@ -699,8 +718,8 @@ def tValues(self):
         """
         return self._call_java("tValues")
 
-    @property
-    def pValues(self):
+    @property  # type: ignore[misc]
+    def pValues(self) -> List[float]:
         """
         Two-sided p-value of estimated coefficients and intercept.
         This value is only available when using the "normal" solver.
@@ -726,8 +745,8 @@ class LinearRegressionTrainingSummary(LinearRegressionSummary):
     .. versionadded:: 2.0.0
     """
 
-    @property
-    def objectiveHistory(self):
+    @property  # type: ignore[misc]
+    def objectiveHistory(self) -> List[float]:
         """
         Objective function (scaled loss + regularization) at each
         iteration.
@@ -741,8 +760,8 @@ def objectiveHistory(self):
         """
         return self._call_java("objectiveHistory")
 
-    @property
-    def totalIterations(self):
+    @property  # type: ignore[misc]
+    def totalIterations(self) -> int:
         """
         Number of training iterations until termination.
         This value is only available when using the "l-bfgs" solver.
@@ -763,31 +782,31 @@ class _IsotonicRegressionParams(HasFeaturesCol, HasLabelCol, HasPredictionCol, H
     .. versionadded:: 3.0.0
     """
 
-    isotonic = Param(
+    isotonic: Param[bool] = Param(
         Params._dummy(),
         "isotonic",
         "whether the output sequence should be isotonic/increasing (true) or"
         + "antitonic/decreasing (false).",
         typeConverter=TypeConverters.toBoolean,
     )
-    featureIndex = Param(
+    featureIndex: Param[int] = Param(
         Params._dummy(),
         "featureIndex",
         "The index of the feature if featuresCol is a vector column, no effect otherwise.",
         typeConverter=TypeConverters.toInt,
     )
 
-    def __init__(self, *args):
+    def __init__(self, *args: Any):
         super(_IsotonicRegressionParams, self).__init__(*args)
         self._setDefault(isotonic=True, featureIndex=0)
 
-    def getIsotonic(self):
+    def getIsotonic(self) -> bool:
         """
         Gets the value of isotonic or its default value.
         """
         return self.getOrDefault(self.isotonic)
 
-    def getFeatureIndex(self):
+    def getFeatureIndex(self) -> int:
         """
         Gets the value of featureIndex or its default value.
         """
@@ -839,16 +858,18 @@ class IsotonicRegression(
     True
     """
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        featuresCol="features",
-        labelCol="label",
-        predictionCol="prediction",
-        weightCol=None,
-        isotonic=True,
-        featureIndex=0,
+        featuresCol: str = "features",
+        labelCol: str = "label",
+        predictionCol: str = "prediction",
+        weightCol: Optional[str] = None,
+        isotonic: bool = True,
+        featureIndex: int = 0,
     ):
         """
         __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
@@ -865,13 +886,13 @@ def __init__(
     def setParams(
         self,
         *,
-        featuresCol="features",
-        labelCol="label",
-        predictionCol="prediction",
-        weightCol=None,
-        isotonic=True,
-        featureIndex=0,
-    ):
+        featuresCol: str = "features",
+        labelCol: str = "label",
+        predictionCol: str = "prediction",
+        weightCol: Optional[str] = None,
+        isotonic: bool = True,
+        featureIndex: int = 0,
+    ) -> "IsotonicRegression":
         """
         setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  weightCol=None, isotonic=True, featureIndex=0):
@@ -880,51 +901,56 @@ def setParams(
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
-    def _create_model(self, java_model):
+    def _create_model(self, java_model: "JavaObject") -> "IsotonicRegressionModel":
         return IsotonicRegressionModel(java_model)
 
-    def setIsotonic(self, value):
+    def setIsotonic(self, value: bool) -> "IsotonicRegression":
         """
         Sets the value of :py:attr:`isotonic`.
         """
         return self._set(isotonic=value)
 
-    def setFeatureIndex(self, value):
+    def setFeatureIndex(self, value: int) -> "IsotonicRegression":
         """
         Sets the value of :py:attr:`featureIndex`.
         """
         return self._set(featureIndex=value)
 
     @since("1.6.0")
-    def setFeaturesCol(self, value):
+    def setFeaturesCol(self, value: str) -> "IsotonicRegression":
         """
         Sets the value of :py:attr:`featuresCol`.
         """
         return self._set(featuresCol=value)
 
     @since("1.6.0")
-    def setPredictionCol(self, value):
+    def setPredictionCol(self, value: str) -> "IsotonicRegression":
         """
         Sets the value of :py:attr:`predictionCol`.
         """
         return self._set(predictionCol=value)
 
     @since("1.6.0")
-    def setLabelCol(self, value):
+    def setLabelCol(self, value: str) -> "IsotonicRegression":
         """
         Sets the value of :py:attr:`labelCol`.
         """
         return self._set(labelCol=value)
 
     @since("1.6.0")
-    def setWeightCol(self, value):
+    def setWeightCol(self, value: str) -> "IsotonicRegression":
         """
         Sets the value of :py:attr:`weightCol`.
         """
         return self._set(weightCol=value)
 
 
-class IsotonicRegressionModel(JavaModel, _IsotonicRegressionParams, JavaMLWritable, JavaMLReadable):
+class IsotonicRegressionModel(
+    JavaModel,
+    _IsotonicRegressionParams,
+    JavaMLWritable,
+    JavaMLReadable["IsotonicRegressionModel"],
+):
     """
     Model fitted by :class:`IsotonicRegression`.
 
@@ -932,52 +958,52 @@ class IsotonicRegressionModel(JavaModel, _IsotonicRegressionParams, JavaMLWritab
     """
 
     @since("3.0.0")
-    def setFeaturesCol(self, value):
+    def setFeaturesCol(self, value: str) -> "IsotonicRegressionModel":
         """
         Sets the value of :py:attr:`featuresCol`.
         """
         return self._set(featuresCol=value)
 
     @since("3.0.0")
-    def setPredictionCol(self, value):
+    def setPredictionCol(self, value: str) -> "IsotonicRegressionModel":
         """
         Sets the value of :py:attr:`predictionCol`.
         """
         return self._set(predictionCol=value)
 
-    def setFeatureIndex(self, value):
+    def setFeatureIndex(self, value: int) -> "IsotonicRegressionModel":
         """
         Sets the value of :py:attr:`featureIndex`.
         """
         return self._set(featureIndex=value)
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.6.0")
-    def boundaries(self):
+    def boundaries(self) -> Vector:
         """
         Boundaries in increasing order for which predictions are known.
         """
         return self._call_java("boundaries")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.6.0")
-    def predictions(self):
+    def predictions(self) -> Vector:
         """
         Predictions associated with the boundaries at the same index, monotone because of isotonic
         regression.
         """
         return self._call_java("predictions")
 
-    @property
+    @property  # type: ignore[misc]
     @since("3.0.0")
-    def numFeatures(self):
+    def numFeatures(self) -> int:
         """
         Returns the number of features the model was trained on. If unknown, returns -1
         """
         return self._call_java("numFeatures")
 
     @since("3.0.0")
-    def predict(self, value):
+    def predict(self, value: float) -> float:
         """
         Predict label for the given features.
         """
@@ -991,7 +1017,7 @@ class _DecisionTreeRegressorParams(_DecisionTreeParams, _TreeRegressorParams, Ha
     .. versionadded:: 3.0.0
     """
 
-    def __init__(self, *args):
+    def __init__(self, *args: Any):
         super(_DecisionTreeRegressorParams, self).__init__(*args)
         self._setDefault(
             maxDepth=5,
@@ -1009,7 +1035,10 @@ def __init__(self, *args):
 
 @inherit_doc
 class DecisionTreeRegressor(
-    _JavaRegressor, _DecisionTreeRegressorParams, JavaMLWritable, JavaMLReadable
+    _JavaRegressor["DecisionTreeRegressionModel"],
+    _DecisionTreeRegressorParams,
+    JavaMLWritable,
+    JavaMLReadable["DecisionTreeRegressor"],
 ):
     """
     `Decision tree <http://en.wikipedia.org/wiki/Decision_tree_learning>`_
@@ -1079,26 +1108,28 @@ class DecisionTreeRegressor(
     DecisionTreeRegressionModel...depth=1, numNodes=3...
     """
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        featuresCol="features",
-        labelCol="label",
-        predictionCol="prediction",
-        maxDepth=5,
-        maxBins=32,
-        minInstancesPerNode=1,
-        minInfoGain=0.0,
-        maxMemoryInMB=256,
-        cacheNodeIds=False,
-        checkpointInterval=10,
-        impurity="variance",
-        seed=None,
-        varianceCol=None,
-        weightCol=None,
-        leafCol="",
-        minWeightFractionPerNode=0.0,
+        featuresCol: str = "features",
+        labelCol: str = "label",
+        predictionCol: str = "prediction",
+        maxDepth: int = 5,
+        maxBins: int = 32,
+        minInstancesPerNode: int = 1,
+        minInfoGain: float = 0.0,
+        maxMemoryInMB: int = 256,
+        cacheNodeIds: bool = False,
+        checkpointInterval: int = 10,
+        impurity: str = "variance",
+        seed: Optional[int] = None,
+        varianceCol: Optional[str] = None,
+        weightCol: Optional[str] = None,
+        leafCol: str = "",
+        minWeightFractionPerNode: float = 0.0,
     ):
         """
         __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
@@ -1119,23 +1150,23 @@ def __init__(
     def setParams(
         self,
         *,
-        featuresCol="features",
-        labelCol="label",
-        predictionCol="prediction",
-        maxDepth=5,
-        maxBins=32,
-        minInstancesPerNode=1,
-        minInfoGain=0.0,
-        maxMemoryInMB=256,
-        cacheNodeIds=False,
-        checkpointInterval=10,
-        impurity="variance",
-        seed=None,
-        varianceCol=None,
-        weightCol=None,
-        leafCol="",
-        minWeightFractionPerNode=0.0,
-    ):
+        featuresCol: str = "features",
+        labelCol: str = "label",
+        predictionCol: str = "prediction",
+        maxDepth: int = 5,
+        maxBins: int = 32,
+        minInstancesPerNode: int = 1,
+        minInfoGain: float = 0.0,
+        maxMemoryInMB: int = 256,
+        cacheNodeIds: bool = False,
+        checkpointInterval: int = 10,
+        impurity: str = "variance",
+        seed: Optional[int] = None,
+        varianceCol: Optional[str] = None,
+        weightCol: Optional[str] = None,
+        leafCol: str = "",
+        minWeightFractionPerNode: float = 0.0,
+    ) -> "DecisionTreeRegressor":
         """
         setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
@@ -1147,87 +1178,87 @@ def setParams(
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
-    def _create_model(self, java_model):
+    def _create_model(self, java_model: "JavaObject") -> "DecisionTreeRegressionModel":
         return DecisionTreeRegressionModel(java_model)
 
     @since("1.4.0")
-    def setMaxDepth(self, value):
+    def setMaxDepth(self, value: int) -> "DecisionTreeRegressor":
         """
         Sets the value of :py:attr:`maxDepth`.
         """
         return self._set(maxDepth=value)
 
     @since("1.4.0")
-    def setMaxBins(self, value):
+    def setMaxBins(self, value: int) -> "DecisionTreeRegressor":
         """
         Sets the value of :py:attr:`maxBins`.
         """
         return self._set(maxBins=value)
 
     @since("1.4.0")
-    def setMinInstancesPerNode(self, value):
+    def setMinInstancesPerNode(self, value: int) -> "DecisionTreeRegressor":
         """
         Sets the value of :py:attr:`minInstancesPerNode`.
         """
         return self._set(minInstancesPerNode=value)
 
     @since("3.0.0")
-    def setMinWeightFractionPerNode(self, value):
+    def setMinWeightFractionPerNode(self, value: float) -> "DecisionTreeRegressor":
         """
         Sets the value of :py:attr:`minWeightFractionPerNode`.
         """
         return self._set(minWeightFractionPerNode=value)
 
     @since("1.4.0")
-    def setMinInfoGain(self, value):
+    def setMinInfoGain(self, value: float) -> "DecisionTreeRegressor":
         """
         Sets the value of :py:attr:`minInfoGain`.
         """
         return self._set(minInfoGain=value)
 
     @since("1.4.0")
-    def setMaxMemoryInMB(self, value):
+    def setMaxMemoryInMB(self, value: int) -> "DecisionTreeRegressor":
         """
         Sets the value of :py:attr:`maxMemoryInMB`.
         """
         return self._set(maxMemoryInMB=value)
 
     @since("1.4.0")
-    def setCacheNodeIds(self, value):
+    def setCacheNodeIds(self, value: bool) -> "DecisionTreeRegressor":
         """
         Sets the value of :py:attr:`cacheNodeIds`.
         """
         return self._set(cacheNodeIds=value)
 
     @since("1.4.0")
-    def setImpurity(self, value):
+    def setImpurity(self, value: str) -> "DecisionTreeRegressor":
         """
         Sets the value of :py:attr:`impurity`.
         """
         return self._set(impurity=value)
 
     @since("1.4.0")
-    def setCheckpointInterval(self, value):
+    def setCheckpointInterval(self, value: int) -> "DecisionTreeRegressor":
         """
         Sets the value of :py:attr:`checkpointInterval`.
         """
         return self._set(checkpointInterval=value)
 
-    def setSeed(self, value):
+    def setSeed(self, value: int) -> "DecisionTreeRegressor":
         """
         Sets the value of :py:attr:`seed`.
         """
         return self._set(seed=value)
 
     @since("3.0.0")
-    def setWeightCol(self, value):
+    def setWeightCol(self, value: str) -> "DecisionTreeRegressor":
         """
         Sets the value of :py:attr:`weightCol`.
         """
         return self._set(weightCol=value)
 
     @since("2.0.0")
-    def setVarianceCol(self, value):
+    def setVarianceCol(self, value: str) -> "DecisionTreeRegressor":
         """
         Sets the value of :py:attr:`varianceCol`.
         """
@@ -1240,7 +1271,7 @@ class DecisionTreeRegressionModel(
     _DecisionTreeModel,
     _DecisionTreeRegressorParams,
     JavaMLWritable,
-    JavaMLReadable,
+    JavaMLReadable["DecisionTreeRegressionModel"],
 ):
     """
     Model fitted by :class:`DecisionTreeRegressor`.
@@ -1249,14 +1280,14 @@ class DecisionTreeRegressionModel(
     """
 
     @since("3.0.0")
-    def setVarianceCol(self, value):
+    def setVarianceCol(self, value: str) -> "DecisionTreeRegressionModel":
         """
         Sets the value of :py:attr:`varianceCol`.
         """
         return self._set(varianceCol=value)
 
-    @property
-    def featureImportances(self):
+    @property  # type: ignore[misc]
+    def featureImportances(self) -> Vector:
         """
         Estimate of the importance of each feature.
 
@@ -1287,7 +1318,7 @@ class _RandomForestRegressorParams(_RandomForestParams, _TreeRegressorParams):
     .. versionadded:: 3.0.0
     """
 
-    def __init__(self, *args):
+    def __init__(self, *args: Any):
         super(_RandomForestRegressorParams, self).__init__(*args)
         self._setDefault(
             maxDepth=5,
@@ -1309,7 +1340,10 @@ def __init__(self, *args):
 
 @inherit_doc
 class RandomForestRegressor(
-    _JavaRegressor, _RandomForestRegressorParams, JavaMLWritable, JavaMLReadable
+    _JavaRegressor["RandomForestRegressionModel"],
+    _RandomForestRegressorParams,
+    JavaMLWritable,
+    JavaMLReadable["RandomForestRegressor"],
 ):
     """
     `Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_
@@ -1374,29 +1408,31 @@ class RandomForestRegressor(
     True
     """
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        featuresCol="features",
-        labelCol="label",
-        predictionCol="prediction",
-        maxDepth=5,
-        maxBins=32,
-        minInstancesPerNode=1,
-        minInfoGain=0.0,
-        maxMemoryInMB=256,
-        cacheNodeIds=False,
-        checkpointInterval=10,
-        impurity="variance",
-        subsamplingRate=1.0,
-        seed=None,
-        numTrees=20,
-        featureSubsetStrategy="auto",
-        leafCol="",
-        minWeightFractionPerNode=0.0,
-        weightCol=None,
-        bootstrap=True,
+        featuresCol: str = "features",
+        labelCol: str = "label",
+        predictionCol: str = "prediction",
+        maxDepth: int = 5,
+        maxBins: int = 32,
+        minInstancesPerNode: int = 1,
+        minInfoGain: float = 0.0,
+        maxMemoryInMB: int = 256,
+        cacheNodeIds: bool = False,
+        checkpointInterval: int = 10,
+        impurity: str = "variance",
+        subsamplingRate: float = 1.0,
+        seed: Optional[int] = None,
+        numTrees: int = 20,
+        featureSubsetStrategy: str = "auto",
+        leafCol: str = "",
+        minWeightFractionPerNode: float = 0.0,
+        weightCol: Optional[str] = None,
+        bootstrap: Optional[bool] = True,
     ):
         """
         __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
@@ -1418,26 +1454,26 @@ def __init__(
     def setParams(
         self,
         *,
-        featuresCol="features",
-        labelCol="label",
-        predictionCol="prediction",
-        maxDepth=5,
-        maxBins=32,
-        minInstancesPerNode=1,
-        minInfoGain=0.0,
-        maxMemoryInMB=256,
-        cacheNodeIds=False,
-        checkpointInterval=10,
-        impurity="variance",
-        subsamplingRate=1.0,
-        seed=None,
-        numTrees=20,
-        featureSubsetStrategy="auto",
-        leafCol="",
-        minWeightFractionPerNode=0.0,
-        weightCol=None,
-        bootstrap=True,
-    ):
+        featuresCol: str = "features",
+        labelCol: str = "label",
+        predictionCol: str = "prediction",
+        maxDepth: int = 5,
+        maxBins: int = 32,
+        minInstancesPerNode: int = 1,
+        minInfoGain: float = 0.0,
+        maxMemoryInMB: int = 256,
+        cacheNodeIds: bool = False,
+        checkpointInterval: int = 10,
+        impurity: str = "variance",
+        subsamplingRate: float = 1.0,
+        seed: Optional[int] = None,
+        numTrees: int = 20,
+        featureSubsetStrategy: str = "auto",
+        leafCol: str = "",
+        minWeightFractionPerNode: float = 0.0,
+        weightCol: Optional[str] = None,
+        bootstrap: Optional[bool] = True,
+    ) -> "RandomForestRegressor":
         """
         setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
@@ -1450,101 +1486,101 @@ def setParams(
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
-    def _create_model(self, java_model):
+    def _create_model(self, java_model: "JavaObject") -> "RandomForestRegressionModel":
         return RandomForestRegressionModel(java_model)
 
-    def setMaxDepth(self, value):
+    def setMaxDepth(self, value: int) -> "RandomForestRegressor":
         """
         Sets the value of :py:attr:`maxDepth`.
         """
         return self._set(maxDepth=value)
 
-    def setMaxBins(self, value):
+    def setMaxBins(self, value: int) -> "RandomForestRegressor":
         """
         Sets the value of :py:attr:`maxBins`.
         """
         return self._set(maxBins=value)
 
-    def setMinInstancesPerNode(self, value):
+    def setMinInstancesPerNode(self, value: int) -> "RandomForestRegressor":
         """
         Sets the value of :py:attr:`minInstancesPerNode`.
         """
         return self._set(minInstancesPerNode=value)
 
-    def setMinInfoGain(self, value):
+    def setMinInfoGain(self, value: float) -> "RandomForestRegressor":
         """
         Sets the value of :py:attr:`minInfoGain`.
         """
         return self._set(minInfoGain=value)
 
-    def setMaxMemoryInMB(self, value):
+    def setMaxMemoryInMB(self, value: int) -> "RandomForestRegressor":
         """
         Sets the value of :py:attr:`maxMemoryInMB`.
         """
         return self._set(maxMemoryInMB=value)
 
-    def setCacheNodeIds(self, value):
+    def setCacheNodeIds(self, value: bool) -> "RandomForestRegressor":
         """
         Sets the value of :py:attr:`cacheNodeIds`.
         """
         return self._set(cacheNodeIds=value)
 
     @since("1.4.0")
-    def setImpurity(self, value):
+    def setImpurity(self, value: str) -> "RandomForestRegressor":
         """
         Sets the value of :py:attr:`impurity`.
         """
         return self._set(impurity=value)
 
     @since("1.4.0")
-    def setNumTrees(self, value):
+    def setNumTrees(self, value: int) -> "RandomForestRegressor":
         """
         Sets the value of :py:attr:`numTrees`.
         """
         return self._set(numTrees=value)
 
     @since("3.0.0")
-    def setBootstrap(self, value):
+    def setBootstrap(self, value: bool) -> "RandomForestRegressor":
         """
         Sets the value of :py:attr:`bootstrap`.
         """
         return self._set(bootstrap=value)
 
     @since("1.4.0")
-    def setSubsamplingRate(self, value):
+    def setSubsamplingRate(self, value: float) -> "RandomForestRegressor":
         """
         Sets the value of :py:attr:`subsamplingRate`.
         """
         return self._set(subsamplingRate=value)
 
     @since("2.4.0")
-    def setFeatureSubsetStrategy(self, value):
+    def setFeatureSubsetStrategy(self, value: str) -> "RandomForestRegressor":
         """
         Sets the value of :py:attr:`featureSubsetStrategy`.
         """
         return self._set(featureSubsetStrategy=value)
 
-    def setCheckpointInterval(self, value):
+    def setCheckpointInterval(self, value: int) -> "RandomForestRegressor":
         """
         Sets the value of :py:attr:`checkpointInterval`.
         """
         return self._set(checkpointInterval=value)
 
-    def setSeed(self, value):
+    def setSeed(self, value: int) -> "RandomForestRegressor":
         """
         Sets the value of :py:attr:`seed`.
         """
         return self._set(seed=value)
 
     @since("3.0.0")
-    def setWeightCol(self, value):
+    def setWeightCol(self, value: str) -> "RandomForestRegressor":
         """
         Sets the value of :py:attr:`weightCol`.
         """
         return self._set(weightCol=value)
 
     @since("3.0.0")
-    def setMinWeightFractionPerNode(self, value):
+    def setMinWeightFractionPerNode(self, value: float) -> "RandomForestRegressor":
         """
         Sets the value of :py:attr:`minWeightFractionPerNode`.
         """
@@ -1552,11 +1588,11 @@ def setMinWeightFractionPerNode(self, value):
 
 
 class RandomForestRegressionModel(
-    _JavaRegressionModel,
+    _JavaRegressionModel[Vector],
     _TreeEnsembleModel,
     _RandomForestRegressorParams,
     JavaMLWritable,
-    JavaMLReadable,
+    JavaMLReadable["RandomForestRegressionModel"],
 ):
     """
     Model fitted by :class:`RandomForestRegressor`.
@@ -1564,14 +1600,14 @@ class RandomForestRegressionModel(
     .. versionadded:: 1.4.0
     """
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def trees(self):
+    def trees(self) -> List[DecisionTreeRegressionModel]:
         """Trees in this ensemble. Warning: These have null parent Estimators."""
         return [DecisionTreeRegressionModel(m) for m in list(self._call_java("trees"))]
 
-    @property
-    def featureImportances(self):
+    @property  # type: ignore[misc]
+    def featureImportances(self) -> Vector:
         """
         Estimate of the importance of each feature.
 
@@ -1596,9 +1632,9 @@ class _GBTRegressorParams(_GBTParams, _TreeRegressorParams):
     .. versionadded:: 3.0.0
     """
 
-    supportedLossTypes = ["squared", "absolute"]
+    supportedLossTypes: List[str] = ["squared", "absolute"]
 
-    lossType = Param(
+    lossType: Param[str] = Param(
         Params._dummy(),
         "lossType",
         "Loss function which GBT tries to minimize (case-insensitive). "
@@ -1607,7 +1643,7 @@ class _GBTRegressorParams(_GBTParams, _TreeRegressorParams):
         typeConverter=TypeConverters.toString,
     )
 
-    def __init__(self, *args):
+    def __init__(self, *args: Any):
         super(_GBTRegressorParams, self).__init__(*args)
         self._setDefault(
             maxDepth=5,
@@ -1629,7 +1665,7 @@ def __init__(self, *args):
         )
 
     @since("1.4.0")
-    def getLossType(self):
+    def getLossType(self) -> str:
         """
         Gets the value of lossType or its default value.
         """
@@ -1637,7 +1673,12 @@ def getLossType(self):
 
 
 @inherit_doc
-class GBTRegressor(_JavaRegressor, _GBTRegressorParams, JavaMLWritable, JavaMLReadable):
+class GBTRegressor(
+    _JavaRegressor["GBTRegressionModel"],
+    _GBTRegressorParams,
+    JavaMLWritable,
+    JavaMLReadable["GBTRegressor"],
+):
     """
     `Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_
     learning algorithm for regression.
@@ -1710,32 +1751,34 @@ class GBTRegressor(_JavaRegressor, _GBTRegressorParams, JavaMLWritable, JavaMLRe
     0.01
     """
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        featuresCol="features",
-        labelCol="label",
-        predictionCol="prediction",
-        maxDepth=5,
-        maxBins=32,
-        minInstancesPerNode=1,
-        minInfoGain=0.0,
-        maxMemoryInMB=256,
-        cacheNodeIds=False,
-        subsamplingRate=1.0,
-        checkpointInterval=10,
-        lossType="squared",
-        maxIter=20,
-        stepSize=0.1,
-        seed=None,
-        impurity="variance",
-        featureSubsetStrategy="all",
-        validationTol=0.01,
-        validationIndicatorCol=None,
-        leafCol="",
-        minWeightFractionPerNode=0.0,
-        weightCol=None,
+        featuresCol: str = "features",
+        labelCol: str = "label",
+        predictionCol: str = "prediction",
+        maxDepth: int = 5,
+        maxBins: int = 32,
+        minInstancesPerNode: int = 1,
+        minInfoGain: float = 0.0,
+        maxMemoryInMB: int = 256,
+        cacheNodeIds: bool = False,
+        subsamplingRate: float = 1.0,
+        checkpointInterval: int = 10,
+        lossType: str = "squared",
+        maxIter: int = 20,
+        stepSize: float = 0.1,
+        seed: Optional[int] = None,
+        impurity: str = "variance",
+        featureSubsetStrategy: str = "all",
+        validationTol: float = 0.1,
+        validationIndicatorCol: Optional[str] = None,
+        leafCol: str = "",
+        minWeightFractionPerNode: float = 0.0,
+        weightCol: Optional[str] = None,
     ):
         """
         __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
@@ -1756,29 +1799,29 @@ def __init__(
     def setParams(
         self,
         *,
-        featuresCol="features",
-        labelCol="label",
-        predictionCol="prediction",
-        maxDepth=5,
-        maxBins=32,
-        minInstancesPerNode=1,
-        minInfoGain=0.0,
-        maxMemoryInMB=256,
-        cacheNodeIds=False,
-        subsamplingRate=1.0,
-        checkpointInterval=10,
-        lossType="squared",
-        maxIter=20,
-        stepSize=0.1,
-        seed=None,
-        impurity="variance",
-        featureSubsetStrategy="all",
-        validationTol=0.01,
-        validationIndicatorCol=None,
-        leafCol="",
-        minWeightFractionPerNode=0.0,
-        weightCol=None,
-    ):
+        featuresCol: str = "features",
+        labelCol: str = "label",
+        predictionCol: str = "prediction",
+        maxDepth: int = 5,
+        maxBins: int = 32,
+        minInstancesPerNode: int = 1,
+        minInfoGain: float = 0.0,
+        maxMemoryInMB: int = 256,
+        cacheNodeIds: bool = False,
+        subsamplingRate: float = 1.0,
+        checkpointInterval: int = 10,
+        lossType: str = "squared",
+        maxIter: int = 20,
+        stepSize: float = 0.1,
+        seed: Optional[int] = None,
+        impurity: str = "variance",
+        featureSubsetStrategy: str = "all",
+        validationTol: float = 0.1,
+        validationIndicatorCol: Optional[str] = None,
+        leafCol: str = "",
+        minWeightFractionPerNode: float = 0.0,
+        weightCol: Optional[str] = None,
+    ) -> "GBTRegressor":
         """
         setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
@@ -1792,123 +1835,123 @@ def setParams(
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
-    def _create_model(self, java_model):
+    def _create_model(self, java_model: "JavaObject") -> "GBTRegressionModel":
         return GBTRegressionModel(java_model)
 
     @since("1.4.0")
-    def setMaxDepth(self, value):
+    def setMaxDepth(self, value: int) -> "GBTRegressor":
         """
         Sets the value of :py:attr:`maxDepth`.
         """
         return self._set(maxDepth=value)
 
     @since("1.4.0")
-    def setMaxBins(self, value):
+    def setMaxBins(self, value: int) -> "GBTRegressor":
         """
         Sets the value of :py:attr:`maxBins`.
         """
         return self._set(maxBins=value)
 
     @since("1.4.0")
-    def setMinInstancesPerNode(self, value):
+    def setMinInstancesPerNode(self, value: int) -> "GBTRegressor":
         """
         Sets the value of :py:attr:`minInstancesPerNode`.
         """
         return self._set(minInstancesPerNode=value)
 
     @since("1.4.0")
-    def setMinInfoGain(self, value):
+    def setMinInfoGain(self, value: float) -> "GBTRegressor":
         """
         Sets the value of :py:attr:`minInfoGain`.
         """
         return self._set(minInfoGain=value)
 
     @since("1.4.0")
-    def setMaxMemoryInMB(self, value):
+    def setMaxMemoryInMB(self, value: int) -> "GBTRegressor":
         """
         Sets the value of :py:attr:`maxMemoryInMB`.
         """
         return self._set(maxMemoryInMB=value)
 
     @since("1.4.0")
-    def setCacheNodeIds(self, value):
+    def setCacheNodeIds(self, value: bool) -> "GBTRegressor":
         """
         Sets the value of :py:attr:`cacheNodeIds`.
         """
         return self._set(cacheNodeIds=value)
 
     @since("1.4.0")
-    def setImpurity(self, value):
+    def setImpurity(self, value: str) -> "GBTRegressor":
         """
         Sets the value of :py:attr:`impurity`.
         """
         return self._set(impurity=value)
 
     @since("1.4.0")
-    def setLossType(self, value):
+    def setLossType(self, value: str) -> "GBTRegressor":
         """
         Sets the value of :py:attr:`lossType`.
         """
         return self._set(lossType=value)
 
     @since("1.4.0")
-    def setSubsamplingRate(self, value):
+    def setSubsamplingRate(self, value: float) -> "GBTRegressor":
         """
         Sets the value of :py:attr:`subsamplingRate`.
         """
         return self._set(subsamplingRate=value)
 
     @since("2.4.0")
-    def setFeatureSubsetStrategy(self, value):
+    def setFeatureSubsetStrategy(self, value: str) -> "GBTRegressor":
         """
         Sets the value of :py:attr:`featureSubsetStrategy`.
         """
         return self._set(featureSubsetStrategy=value)
 
     @since("3.0.0")
-    def setValidationIndicatorCol(self, value):
+    def setValidationIndicatorCol(self, value: str) -> "GBTRegressor":
         """
         Sets the value of :py:attr:`validationIndicatorCol`.
         """
         return self._set(validationIndicatorCol=value)
 
     @since("1.4.0")
-    def setMaxIter(self, value):
+    def setMaxIter(self, value: int) -> "GBTRegressor":
         """
         Sets the value of :py:attr:`maxIter`.
         """
         return self._set(maxIter=value)
 
     @since("1.4.0")
-    def setCheckpointInterval(self, value):
+    def setCheckpointInterval(self, value: int) -> "GBTRegressor":
         """
         Sets the value of :py:attr:`checkpointInterval`.
         """
         return self._set(checkpointInterval=value)
 
     @since("1.4.0")
-    def setSeed(self, value):
+    def setSeed(self, value: int) -> "GBTRegressor":
         """
         Sets the value of :py:attr:`seed`.
         """
         return self._set(seed=value)
 
     @since("1.4.0")
-    def setStepSize(self, value):
+    def setStepSize(self, value: float) -> "GBTRegressor":
         """
         Sets the value of :py:attr:`stepSize`.
         """
         return self._set(stepSize=value)
 
     @since("3.0.0")
-    def setWeightCol(self, value):
+    def setWeightCol(self, value: str) -> "GBTRegressor":
         """
         Sets the value of :py:attr:`weightCol`.
         """
         return self._set(weightCol=value)
 
     @since("3.0.0")
-    def setMinWeightFractionPerNode(self, value):
+    def setMinWeightFractionPerNode(self, value: float) -> "GBTRegressor":
         """
         Sets the value of :py:attr:`minWeightFractionPerNode`.
         """
@@ -1916,7 +1959,11 @@ def setMinWeightFractionPerNode(self, value):
 
 
 class GBTRegressionModel(
-    _JavaRegressionModel, _TreeEnsembleModel, _GBTRegressorParams, JavaMLWritable, JavaMLReadable
+    _JavaRegressionModel[Vector],
+    _TreeEnsembleModel,
+    _GBTRegressorParams,
+    JavaMLWritable,
+    JavaMLReadable["GBTRegressionModel"],
 ):
     """
     Model fitted by :class:`GBTRegressor`.
@@ -1924,8 +1971,8 @@ class GBTRegressionModel(
     .. versionadded:: 1.4.0
     """
 
-    @property
-    def featureImportances(self):
+    @property  # type: ignore[misc]
+    def featureImportances(self) -> Vector:
         """
         Estimate of the importance of each feature.
 
@@ -1942,13 +1989,13 @@ def featureImportances(self):
         """
         return self._call_java("featureImportances")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def trees(self):
+    def trees(self) -> List[DecisionTreeRegressionModel]:
         """Trees in this ensemble. Warning: These have null parent Estimators."""
         return [DecisionTreeRegressionModel(m) for m in list(self._call_java("trees"))]
 
-    def evaluateEachIteration(self, dataset, loss):
+    def evaluateEachIteration(self, dataset: DataFrame, loss: str) -> List[float]:
         """
         Method to compute error or loss for every iteration of gradient boosting.
 
@@ -1975,7 +2022,7 @@ class _AFTSurvivalRegressionParams(
     .. versionadded:: 3.0.0
     """
 
-    censorCol = Param(
+    censorCol: Param[str] = Param(
         Params._dummy(),
         "censorCol",
         "censor column name. The value of this column could be 0 or 1. "
@@ -1983,14 +2030,14 @@ class _AFTSurvivalRegressionParams(
         + "uncensored; otherwise censored.",
         typeConverter=TypeConverters.toString,
     )
-    quantileProbabilities = Param(
+    quantileProbabilities: Param[List[float]] = Param(
         Params._dummy(),
         "quantileProbabilities",
         "quantile probabilities array. Values of the quantile probabilities array "
         + "should be in the range (0, 1) and the array should be non-empty.",
         typeConverter=TypeConverters.toListFloat,
     )
-    quantilesCol = Param(
+    quantilesCol: Param[str] = Param(
         Params._dummy(),
         "quantilesCol",
         "quantiles column name. This column will output quantiles of "
@@ -1998,7 +2045,7 @@ class _AFTSurvivalRegressionParams(
         typeConverter=TypeConverters.toString,
     )
 
-    def __init__(self, *args):
+    def __init__(self, *args: Any):
         super(_AFTSurvivalRegressionParams, self).__init__(*args)
         self._setDefault(
             censorCol="censor",
@@ -2009,21 +2056,21 @@ def __init__(self, *args):
         )
 
     @since("1.6.0")
-    def getCensorCol(self):
+    def getCensorCol(self) -> str:
         """
         Gets the value of censorCol or its default value.
         """
         return self.getOrDefault(self.censorCol)
 
     @since("1.6.0")
-    def getQuantileProbabilities(self):
+    def getQuantileProbabilities(self) -> List[float]:
         """
         Gets the value of quantileProbabilities or its default value.
         """
         return self.getOrDefault(self.quantileProbabilities)
 
     @since("1.6.0")
-    def getQuantilesCol(self):
+    def getQuantilesCol(self) -> str:
         """
         Gets the value of quantilesCol or its default value.
         """
@@ -2032,7 +2079,10 @@ def getQuantilesCol(self):
 
 @inherit_doc
 class AFTSurvivalRegression(
-    _JavaRegressor, _AFTSurvivalRegressionParams, JavaMLWritable, JavaMLReadable
+    _JavaRegressor["AFTSurvivalRegressionModel"],
+    _AFTSurvivalRegressionParams,
+    JavaMLWritable,
+    JavaMLReadable["AFTSurvivalRegression"],
 ):
     """
     Accelerated Failure Time (AFT) Model Survival Regression
@@ -2095,23 +2145,33 @@ class AFTSurvivalRegression(
     .. versionadded:: 1.6.0
     """
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        featuresCol="features",
-        labelCol="label",
-        predictionCol="prediction",
-        fitIntercept=True,
-        maxIter=100,
-        tol=1e-6,
-        censorCol="censor",
-        quantileProbabilities=list(
-            [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]
-        ),  # noqa: B005
-        quantilesCol=None,
-        aggregationDepth=2,
-        maxBlockSizeInMB=0.0,
+        featuresCol: str = "features",
+        labelCol: str = "label",
+        predictionCol: str = "prediction",
+        fitIntercept: bool = True,
+        maxIter: int = 100,
+        tol: float = 1e-6,
+        censorCol: str = "censor",
+        quantileProbabilities: List[float] = [
+            0.01,
+            0.05,
+            0.1,
+            0.25,
+            0.5,
+            0.75,
+            0.9,
+            0.95,
+            0.99,
+        ],  # noqa: B005
+        quantilesCol: Optional[str] = None,
+        aggregationDepth: int = 2,
+        maxBlockSizeInMB: float = 0.0,
     ):
         """
         __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
@@ -2131,20 +2191,28 @@ def __init__(
     def setParams(
         self,
         *,
-        featuresCol="features",
-        labelCol="label",
-        predictionCol="prediction",
-        fitIntercept=True,
-        maxIter=100,
-        tol=1e-6,
-        censorCol="censor",
-        quantileProbabilities=list(
-            [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]
-        ),  # noqa: B005
-        quantilesCol=None,
-        aggregationDepth=2,
-        maxBlockSizeInMB=0.0,
-    ):
+        featuresCol: str = "features",
+        labelCol: str = "label",
+        predictionCol: str = "prediction",
+        fitIntercept: bool = True,
+        maxIter: int = 100,
+        tol: float = 1e-6,
+        censorCol: str = "censor",
+        quantileProbabilities: List[float] = [
+            0.01,
+            0.05,
+            0.1,
+            0.25,
+            0.5,
+            0.75,
+            0.9,
+            0.95,
+            0.99,
+        ],  # noqa: B005
+        quantilesCol: Optional[str] = None,
+        aggregationDepth: int = 2,
+        maxBlockSizeInMB: float = 0.0,
+    ) -> "AFTSurvivalRegression":
         """
         setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor", \
@@ -2154,60 +2222,60 @@ def setParams(
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
-    def _create_model(self, java_model):
+    def _create_model(self, java_model: "JavaObject") -> "AFTSurvivalRegressionModel":
         return AFTSurvivalRegressionModel(java_model)
 
     @since("1.6.0")
-    def setCensorCol(self, value):
+    def setCensorCol(self, value: str) -> "AFTSurvivalRegression":
         """
         Sets the value of :py:attr:`censorCol`.
         """
         return self._set(censorCol=value)
 
     @since("1.6.0")
-    def setQuantileProbabilities(self, value):
+    def setQuantileProbabilities(self, value: List[float]) -> "AFTSurvivalRegression":
         """
         Sets the value of :py:attr:`quantileProbabilities`.
         """
         return self._set(quantileProbabilities=value)
 
     @since("1.6.0")
-    def setQuantilesCol(self, value):
+    def setQuantilesCol(self, value: str) -> "AFTSurvivalRegression":
         """
         Sets the value of :py:attr:`quantilesCol`.
         """
         return self._set(quantilesCol=value)
 
     @since("1.6.0")
-    def setMaxIter(self, value):
+    def setMaxIter(self, value: int) -> "AFTSurvivalRegression":
         """
         Sets the value of :py:attr:`maxIter`.
         """
         return self._set(maxIter=value)
 
     @since("1.6.0")
-    def setTol(self, value):
+    def setTol(self, value: float) -> "AFTSurvivalRegression":
         """
         Sets the value of :py:attr:`tol`.
         """
         return self._set(tol=value)
 
     @since("1.6.0")
-    def setFitIntercept(self, value):
+    def setFitIntercept(self, value: bool) -> "AFTSurvivalRegression":
         """
         Sets the value of :py:attr:`fitIntercept`.
         """
         return self._set(fitIntercept=value)
 
     @since("2.1.0")
-    def setAggregationDepth(self, value):
+    def setAggregationDepth(self, value: int) -> "AFTSurvivalRegression":
         """
         Sets the value of :py:attr:`aggregationDepth`.
         """
         return self._set(aggregationDepth=value)
 
     @since("3.1.0")
-    def setMaxBlockSizeInMB(self, value):
+    def setMaxBlockSizeInMB(self, value: int) -> "AFTSurvivalRegression":
         """
         Sets the value of :py:attr:`maxBlockSizeInMB`.
         """
@@ -2215,7 +2283,10 @@ def setMaxBlockSizeInMB(self, value):
 
 
 class AFTSurvivalRegressionModel(
-    _JavaRegressionModel, _AFTSurvivalRegressionParams, JavaMLWritable, JavaMLReadable
+    _JavaRegressionModel[Vector],
+    _AFTSurvivalRegressionParams,
+    JavaMLWritable,
+    JavaMLReadable["AFTSurvivalRegressionModel"],
 ):
     """
     Model fitted by :class:`AFTSurvivalRegression`.
@@ -2224,45 +2295,45 @@ class AFTSurvivalRegressionModel(
     """
 
     @since("3.0.0")
-    def setQuantileProbabilities(self, value):
+    def setQuantileProbabilities(self, value: List[float]) -> "AFTSurvivalRegressionModel":
         """
         Sets the value of :py:attr:`quantileProbabilities`.
         """
         return self._set(quantileProbabilities=value)
 
     @since("3.0.0")
-    def setQuantilesCol(self, value):
+    def setQuantilesCol(self, value: str) -> "AFTSurvivalRegressionModel":
         """
         Sets the value of :py:attr:`quantilesCol`.
         """
         return self._set(quantilesCol=value)
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def coefficients(self):
+    def coefficients(self) -> Vector:
         """
         Model coefficients.
         """
         return self._call_java("coefficients")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.6.0")
-    def intercept(self):
+    def intercept(self) -> float:
         """
         Model intercept.
         """
         return self._call_java("intercept")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.6.0")
-    def scale(self):
+    def scale(self) -> float:
         """
         Model scale parameter.
         """
         return self._call_java("scale")
 
     @since("2.0.0")
-    def predictQuantiles(self, features):
+    def predictQuantiles(self, features: Vector) -> Vector:
         """
         Predicted Quantiles
         """
@@ -2286,7 +2357,7 @@ class _GeneralizedLinearRegressionParams(
     .. versionadded:: 3.0.0
     """
 
-    family = Param(
+    family: Param[str] = Param(
         Params._dummy(),
         "family",
         "The name of family which is a description of "
@@ -2294,7 +2365,7 @@ class _GeneralizedLinearRegressionParams(
         + "gaussian (default), binomial, poisson, gamma and tweedie.",
         typeConverter=TypeConverters.toString,
     )
-    link = Param(
+    link: Param[str] = Param(
         Params._dummy(),
         "link",
         "The name of link function which provides the "
@@ -2303,13 +2374,13 @@ class _GeneralizedLinearRegressionParams(
         + "and sqrt.",
         typeConverter=TypeConverters.toString,
     )
-    linkPredictionCol = Param(
+    linkPredictionCol: Param[str] = Param(
         Params._dummy(),
         "linkPredictionCol",
         "link prediction (linear " + "predictor) column name",
         typeConverter=TypeConverters.toString,
     )
-    variancePower = Param(
+    variancePower: Param[float] = Param(
         Params._dummy(),
         "variancePower",
         "The power in the variance function "
@@ -2318,19 +2389,19 @@ class _GeneralizedLinearRegressionParams(
         + "for the Tweedie family. Supported values: 0 and [1, Inf).",
         typeConverter=TypeConverters.toFloat,
     )
-    linkPower = Param(
+    linkPower: Param[float] = Param(
         Params._dummy(),
         "linkPower",
         "The index in the power link function. " + "Only applicable to the Tweedie family.",
         typeConverter=TypeConverters.toFloat,
     )
-    solver = Param(
+    solver: Param[str] = Param(
         Params._dummy(),
         "solver",
         "The solver algorithm for optimization. Supported " + "options: irls.",
         typeConverter=TypeConverters.toString,
     )
-    offsetCol = Param(
+    offsetCol: Param[str] = Param(
         Params._dummy(),
         "offsetCol",
         "The offset column name. If this is not set "
@@ -2338,7 +2409,7 @@ class _GeneralizedLinearRegressionParams(
         typeConverter=TypeConverters.toString,
     )
 
-    def __init__(self, *args):
+    def __init__(self, *args: Any):
         super(_GeneralizedLinearRegressionParams, self).__init__(*args)
         self._setDefault(
             family="gaussian",
@@ -2351,42 +2422,42 @@ def __init__(self, *args):
         )
 
     @since("2.0.0")
-    def getFamily(self):
+    def getFamily(self) -> str:
         """
         Gets the value of family or its default value.
         """
         return self.getOrDefault(self.family)
 
     @since("2.0.0")
-    def getLinkPredictionCol(self):
+    def getLinkPredictionCol(self) -> str:
         """
         Gets the value of linkPredictionCol or its default value.
         """
         return self.getOrDefault(self.linkPredictionCol)
 
     @since("2.0.0")
-    def getLink(self):
+    def getLink(self) -> str:
         """
         Gets the value of link or its default value.
         """
         return self.getOrDefault(self.link)
 
     @since("2.2.0")
-    def getVariancePower(self):
+    def getVariancePower(self) -> float:
         """
         Gets the value of variancePower or its default value.
         """
         return self.getOrDefault(self.variancePower)
 
     @since("2.2.0")
-    def getLinkPower(self):
+    def getLinkPower(self) -> float:
         """
         Gets the value of linkPower or its default value.
         """
         return self.getOrDefault(self.linkPower)
 
     @since("2.3.0")
-    def getOffsetCol(self):
+    def getOffsetCol(self) -> str:
         """
         Gets the value of offsetCol or its default value.
         """
@@ -2395,7 +2466,10 @@ def getOffsetCol(self):
 
 @inherit_doc
 class GeneralizedLinearRegression(
-    _JavaRegressor, _GeneralizedLinearRegressionParams, JavaMLWritable, JavaMLReadable
+    _JavaRegressor["GeneralizedLinearRegressionModel"],
+    _GeneralizedLinearRegressionParams,
+    JavaMLWritable,
+    JavaMLReadable["GeneralizedLinearRegression"],
 ):
     """
     Generalized Linear Regression.
@@ -2476,26 +2550,28 @@ class GeneralizedLinearRegression(
     True
     """
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        labelCol="label",
-        featuresCol="features",
-        predictionCol="prediction",
-        family="gaussian",
-        link=None,
-        fitIntercept=True,
-        maxIter=25,
-        tol=1e-6,
-        regParam=0.0,
-        weightCol=None,
-        solver="irls",
-        linkPredictionCol=None,
-        variancePower=0.0,
-        linkPower=None,
-        offsetCol=None,
-        aggregationDepth=2,
+        labelCol: str = "label",
+        featuresCol: str = "features",
+        predictionCol: str = "prediction",
+        family: str = "gaussian",
+        link: Optional[str] = None,
+        fitIntercept: bool = True,
+        maxIter: int = 25,
+        tol: float = 1e-6,
+        regParam: float = 0.0,
+        weightCol: Optional[str] = None,
+        solver: str = "irls",
+        linkPredictionCol: Optional[str] = None,
+        variancePower: float = 0.0,
+        linkPower: Optional[float] = None,
+        offsetCol: Optional[str] = None,
+        aggregationDepth: int = 2,
     ):
         """
         __init__(self, \\*, labelCol="label", featuresCol="features", predictionCol="prediction", \
@@ -2516,23 +2592,23 @@ def __init__(
     def setParams(
         self,
         *,
-        labelCol="label",
-        featuresCol="features",
-        predictionCol="prediction",
-        family="gaussian",
-        link=None,
-        fitIntercept=True,
-        maxIter=25,
-        tol=1e-6,
-        regParam=0.0,
-        weightCol=None,
-        solver="irls",
-        linkPredictionCol=None,
-        variancePower=0.0,
-        linkPower=None,
-        offsetCol=None,
-        aggregationDepth=2,
-    ):
+        labelCol: str = "label",
+        featuresCol: str = "features",
+        predictionCol: str = "prediction",
+        family: str = "gaussian",
+        link: Optional[str] = None,
+        fitIntercept: bool = True,
+        maxIter: int = 25,
+        tol: float = 1e-6,
+        regParam: float = 0.0,
+        weightCol: Optional[str] = None,
+        solver: str = "irls",
+        linkPredictionCol: Optional[str] = None,
+        variancePower: float = 0.0,
+        linkPower: Optional[float] = None,
+        offsetCol: Optional[str] = None,
+        aggregationDepth: int = 2,
+    ) -> "GeneralizedLinearRegression":
         """
         setParams(self, \\*, labelCol="label", featuresCol="features", predictionCol="prediction", \
                   family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6, \
@@ -2543,95 +2619,95 @@ def setParams(
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
-    def _create_model(self, java_model):
+    def _create_model(self, java_model: "JavaObject") -> "GeneralizedLinearRegressionModel":
         return GeneralizedLinearRegressionModel(java_model)
 
     @since("2.0.0")
-    def setFamily(self, value):
+    def setFamily(self, value: str) -> "GeneralizedLinearRegression":
         """
         Sets the value of :py:attr:`family`.
         """
         return self._set(family=value)
 
     @since("2.0.0")
-    def setLinkPredictionCol(self, value):
+    def setLinkPredictionCol(self, value: str) -> "GeneralizedLinearRegression":
         """
         Sets the value of :py:attr:`linkPredictionCol`.
         """
         return self._set(linkPredictionCol=value)
 
     @since("2.0.0")
-    def setLink(self, value):
+    def setLink(self, value: str) -> "GeneralizedLinearRegression":
         """
         Sets the value of :py:attr:`link`.
         """
         return self._set(link=value)
 
     @since("2.2.0")
-    def setVariancePower(self, value):
+    def setVariancePower(self, value: float) -> "GeneralizedLinearRegression":
         """
         Sets the value of :py:attr:`variancePower`.
         """
         return self._set(variancePower=value)
 
     @since("2.2.0")
-    def setLinkPower(self, value):
+    def setLinkPower(self, value: float) -> "GeneralizedLinearRegression":
         """
         Sets the value of :py:attr:`linkPower`.
         """
         return self._set(linkPower=value)
 
     @since("2.3.0")
-    def setOffsetCol(self, value):
+    def setOffsetCol(self, value: str) -> "GeneralizedLinearRegression":
         """
         Sets the value of :py:attr:`offsetCol`.
         """
         return self._set(offsetCol=value)
 
     @since("2.0.0")
-    def setMaxIter(self, value):
+    def setMaxIter(self, value: int) -> "GeneralizedLinearRegression":
         """
         Sets the value of :py:attr:`maxIter`.
         """
         return self._set(maxIter=value)
 
     @since("2.0.0")
-    def setRegParam(self, value):
+    def setRegParam(self, value: float) -> "GeneralizedLinearRegression":
         """
         Sets the value of :py:attr:`regParam`.
         """
         return self._set(regParam=value)
 
     @since("2.0.0")
-    def setTol(self, value):
+    def setTol(self, value: float) -> "GeneralizedLinearRegression":
         """
         Sets the value of :py:attr:`tol`.
         """
         return self._set(tol=value)
 
     @since("2.0.0")
-    def setFitIntercept(self, value):
+    def setFitIntercept(self, value: bool) -> "GeneralizedLinearRegression":
         """
         Sets the value of :py:attr:`fitIntercept`.
         """
         return self._set(fitIntercept=value)
 
     @since("2.0.0")
-    def setWeightCol(self, value):
+    def setWeightCol(self, value: str) -> "GeneralizedLinearRegression":
         """
         Sets the value of :py:attr:`weightCol`.
         """
         return self._set(weightCol=value)
 
     @since("2.0.0")
-    def setSolver(self, value):
+    def setSolver(self, value: str) -> "GeneralizedLinearRegression":
         """
         Sets the value of :py:attr:`solver`.
         """
         return self._set(solver=value)
 
     @since("3.0.0")
-    def setAggregationDepth(self, value):
+    def setAggregationDepth(self, value: int) -> "GeneralizedLinearRegression":
         """
         Sets the value of :py:attr:`aggregationDepth`.
         """
@@ -2639,11 +2715,11 @@ def setAggregationDepth(self, value):
 
 
 class GeneralizedLinearRegressionModel(
-    _JavaRegressionModel,
+    _JavaRegressionModel[Vector],
     _GeneralizedLinearRegressionParams,
     JavaMLWritable,
-    JavaMLReadable,
-    HasTrainingSummary,
+    JavaMLReadable["GeneralizedLinearRegressionModel"],
+    HasTrainingSummary["GeneralizedLinearRegressionTrainingSummary"],
 ):
     """
     Model fitted by :class:`GeneralizedLinearRegression`.
@@ -2652,31 +2728,31 @@ class GeneralizedLinearRegressionModel(
     """
 
     @since("3.0.0")
-    def setLinkPredictionCol(self, value):
+    def setLinkPredictionCol(self, value: str) -> "GeneralizedLinearRegressionModel":
         """
         Sets the value of :py:attr:`linkPredictionCol`.
         """
         return self._set(linkPredictionCol=value)
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def coefficients(self):
+    def coefficients(self) -> Vector:
         """
         Model coefficients.
         """
         return self._call_java("coefficients")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def intercept(self):
+    def intercept(self) -> float:
         """
         Model intercept.
         """
         return self._call_java("intercept")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def summary(self):
+    def summary(self) -> "GeneralizedLinearRegressionTrainingSummary":
         """
         Gets summary (residuals, deviance, p-values) of model on
         training set. An exception is thrown if
@@ -2691,7 +2767,7 @@ def summary(self):
                 "No training summary available for this %s" % self.__class__.__name__
             )
 
-    def evaluate(self, dataset):
+    def evaluate(self, dataset: DataFrame) -> "GeneralizedLinearRegressionSummary":
         """
         Evaluates the model on a test dataset.
 
@@ -2716,64 +2792,64 @@ class GeneralizedLinearRegressionSummary(JavaWrapper):
     .. versionadded:: 2.0.0
     """
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def predictions(self):
+    def predictions(self) -> DataFrame:
         """
         Predictions output by the model's `transform` method.
         """
         return self._call_java("predictions")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def predictionCol(self):
+    def predictionCol(self) -> str:
         """
         Field in :py:attr:`predictions` which gives the predicted value of each instance.
         This is set to a new column name if the original model's `predictionCol` is not set.
         """
         return self._call_java("predictionCol")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.2.0")
-    def numInstances(self):
+    def numInstances(self) -> int:
         """
         Number of instances in DataFrame predictions.
         """
         return self._call_java("numInstances")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def rank(self):
+    def rank(self) -> int:
         """
         The numeric rank of the fitted linear model.
         """
         return self._call_java("rank")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def degreesOfFreedom(self):
+    def degreesOfFreedom(self) -> int:
         """
         Degrees of freedom.
         """
         return self._call_java("degreesOfFreedom")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def residualDegreeOfFreedom(self):
+    def residualDegreeOfFreedom(self) -> int:
         """
         The residual degrees of freedom.
         """
         return self._call_java("residualDegreeOfFreedom")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def residualDegreeOfFreedomNull(self):
+    def residualDegreeOfFreedomNull(self) -> int:
         """
         The residual degrees of freedom for the null model.
         """
         return self._call_java("residualDegreeOfFreedomNull")
 
-    def residuals(self, residualsType="deviance"):
+    def residuals(self, residualsType: str = "deviance") -> DataFrame:
         """
         Get the residuals of the fitted model by type.
 
@@ -2787,25 +2863,25 @@ def residuals(self, residualsType="deviance"):
         """
         return self._call_java("residuals", residualsType)
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def nullDeviance(self):
+    def nullDeviance(self) -> float:
         """
         The deviance for the null model.
         """
         return self._call_java("nullDeviance")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def deviance(self):
+    def deviance(self) -> float:
         """
         The deviance for the fitted model.
         """
         return self._call_java("deviance")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def dispersion(self):
+    def dispersion(self) -> float:
         """
         The dispersion of the fitted model.
         It is taken as 1.0 for the "binomial" and "poisson" families, and otherwise
@@ -2814,9 +2890,9 @@ def dispersion(self):
         """
         return self._call_java("dispersion")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def aic(self):
+    def aic(self) -> float:
         """
         Akaike's "An Information Criterion"(AIC) for the fitted model.
         """
@@ -2831,25 +2907,25 @@ class GeneralizedLinearRegressionTrainingSummary(GeneralizedLinearRegressionSumm
     .. versionadded:: 2.0.0
     """
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def numIterations(self):
+    def numIterations(self) -> int:
         """
         Number of training iterations.
         """
         return self._call_java("numIterations")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def solver(self):
+    def solver(self) -> str:
         """
         The numeric solver used for training.
         """
         return self._call_java("solver")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def coefficientStandardErrors(self):
+    def coefficientStandardErrors(self) -> List[float]:
         """
         Standard error of estimated coefficients and intercept.
 
@@ -2858,9 +2934,9 @@ def coefficientStandardErrors(self):
         """
         return self._call_java("coefficientStandardErrors")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def tValues(self):
+    def tValues(self) -> List[float]:
         """
         T-statistic of estimated coefficients and intercept.
 
@@ -2869,9 +2945,9 @@ def tValues(self):
         """
         return self._call_java("tValues")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def pValues(self):
+    def pValues(self) -> List[float]:
         """
         Two-sided p-value of estimated coefficients and intercept.
 
@@ -2880,7 +2956,7 @@ def pValues(self):
         """
         return self._call_java("pValues")
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return self._call_java("toString")
 
 
@@ -2902,7 +2978,7 @@ class _FactorizationMachinesParams(
     .. versionadded:: 3.0.0
     """
 
-    factorSize = Param(
+    factorSize: Param[int] = Param(
         Params._dummy(),
         "factorSize",
         "Dimensionality of the factor vectors, "
@@ -2910,14 +2986,14 @@ class _FactorizationMachinesParams(
         typeConverter=TypeConverters.toInt,
     )
 
-    fitLinear = Param(
+    fitLinear: Param[bool] = Param(
         Params._dummy(),
         "fitLinear",
         "whether to fit linear term (aka 1-way term)",
         typeConverter=TypeConverters.toBoolean,
     )
 
-    miniBatchFraction = Param(
+    miniBatchFraction: Param[float] = Param(
         Params._dummy(),
         "miniBatchFraction",
         "fraction of the input data "
@@ -2925,7 +3001,7 @@ class _FactorizationMachinesParams(
         typeConverter=TypeConverters.toFloat,
     )
 
-    initStd = Param(
+    initStd: Param[float] = Param(
         Params._dummy(),
         "initStd",
         "standard deviation of initial coefficients",
@@ -2939,7 +3015,7 @@ class _FactorizationMachinesParams(
         typeConverter=TypeConverters.toString,
     )
 
-    def __init__(self, *args):
+    def __init__(self, *args: Any):
         super(_FactorizationMachinesParams, self).__init__(*args)
         self._setDefault(
             factorSize=8,
@@ -2955,28 +3031,28 @@ def __init__(self, *args):
         )
 
     @since("3.0.0")
-    def getFactorSize(self):
+    def getFactorSize(self) -> int:
         """
         Gets the value of factorSize or its default value.
         """
         return self.getOrDefault(self.factorSize)
 
     @since("3.0.0")
-    def getFitLinear(self):
+    def getFitLinear(self) -> bool:
         """
         Gets the value of fitLinear or its default value.
         """
         return self.getOrDefault(self.fitLinear)
 
     @since("3.0.0")
-    def getMiniBatchFraction(self):
+    def getMiniBatchFraction(self) -> float:
         """
         Gets the value of miniBatchFraction or its default value.
         """
         return self.getOrDefault(self.miniBatchFraction)
 
     @since("3.0.0")
-    def getInitStd(self):
+    def getInitStd(self) -> float:
         """
         Gets the value of initStd or its default value.
         """
@@ -2984,7 +3060,12 @@ def getInitStd(self):
 
 
 @inherit_doc
-class FMRegressor(_JavaRegressor, _FactorizationMachinesParams, JavaMLWritable, JavaMLReadable):
+class FMRegressor(
+    _JavaRegressor["FMRegressionModel"],
+    _FactorizationMachinesParams,
+    JavaMLWritable,
+    JavaMLReadable["FMRegressor"],
+):
     """
     Factorization Machines learning algorithm for regression.
 
@@ -3044,24 +3125,26 @@ class FMRegressor(_JavaRegressor, _FactorizationMachinesParams, JavaMLWritable,
     True
     """
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        featuresCol="features",
-        labelCol="label",
-        predictionCol="prediction",
-        factorSize=8,
-        fitIntercept=True,
-        fitLinear=True,
-        regParam=0.0,
-        miniBatchFraction=1.0,
-        initStd=0.01,
-        maxIter=100,
-        stepSize=1.0,
-        tol=1e-6,
-        solver="adamW",
-        seed=None,
+        featuresCol: str = "features",
+        labelCol: str = "label",
+        predictionCol: str = "prediction",
+        factorSize: int = 8,
+        fitIntercept: bool = True,
+        fitLinear: bool = True,
+        regParam: float = 0.0,
+        miniBatchFraction: float = 1.0,
+        initStd: float = 0.01,
+        maxIter: int = 100,
+        stepSize: float = 1.0,
+        tol: float = 1e-6,
+        solver: str = "adamW",
+        seed: Optional[int] = None,
     ):
         """
         __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
@@ -3079,21 +3162,21 @@ def __init__(
     def setParams(
         self,
         *,
-        featuresCol="features",
-        labelCol="label",
-        predictionCol="prediction",
-        factorSize=8,
-        fitIntercept=True,
-        fitLinear=True,
-        regParam=0.0,
-        miniBatchFraction=1.0,
-        initStd=0.01,
-        maxIter=100,
-        stepSize=1.0,
-        tol=1e-6,
-        solver="adamW",
-        seed=None,
-    ):
+        featuresCol: str = "features",
+        labelCol: str = "label",
+        predictionCol: str = "prediction",
+        factorSize: int = 8,
+        fitIntercept: bool = True,
+        fitLinear: bool = True,
+        regParam: float = 0.0,
+        miniBatchFraction: float = 1.0,
+        initStd: float = 0.01,
+        maxIter: int = 100,
+        stepSize: float = 1.0,
+        tol: float = 1e-6,
+        solver: str = "adamW",
+        seed: Optional[int] = None,
+    ) -> "FMRegressor":
         """
         setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0, \
@@ -3104,81 +3187,81 @@ def setParams(
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
-    def _create_model(self, java_model):
+    def _create_model(self, java_model: "JavaObject") -> "FMRegressionModel":
         return FMRegressionModel(java_model)
 
     @since("3.0.0")
-    def setFactorSize(self, value):
+    def setFactorSize(self, value: int) -> "FMRegressor":
         """
         Sets the value of :py:attr:`factorSize`.
         """
         return self._set(factorSize=value)
 
     @since("3.0.0")
-    def setFitLinear(self, value):
+    def setFitLinear(self, value: bool) -> "FMRegressor":
         """
         Sets the value of :py:attr:`fitLinear`.
         """
         return self._set(fitLinear=value)
 
     @since("3.0.0")
-    def setMiniBatchFraction(self, value):
+    def setMiniBatchFraction(self, value: float) -> "FMRegressor":
         """
         Sets the value of :py:attr:`miniBatchFraction`.
         """
         return self._set(miniBatchFraction=value)
 
     @since("3.0.0")
-    def setInitStd(self, value):
+    def setInitStd(self, value: float) -> "FMRegressor":
         """
         Sets the value of :py:attr:`initStd`.
         """
         return self._set(initStd=value)
 
     @since("3.0.0")
-    def setMaxIter(self, value):
+    def setMaxIter(self, value: int) -> "FMRegressor":
         """
         Sets the value of :py:attr:`maxIter`.
         """
         return self._set(maxIter=value)
 
     @since("3.0.0")
-    def setStepSize(self, value):
+    def setStepSize(self, value: float) -> "FMRegressor":
         """
         Sets the value of :py:attr:`stepSize`.
         """
         return self._set(stepSize=value)
 
     @since("3.0.0")
-    def setTol(self, value):
+    def setTol(self, value: float) -> "FMRegressor":
         """
         Sets the value of :py:attr:`tol`.
         """
         return self._set(tol=value)
 
     @since("3.0.0")
-    def setSolver(self, value):
+    def setSolver(self, value: str) -> "FMRegressor":
         """
         Sets the value of :py:attr:`solver`.
         """
         return self._set(solver=value)
 
     @since("3.0.0")
-    def setSeed(self, value):
+    def setSeed(self, value: int) -> "FMRegressor":
         """
         Sets the value of :py:attr:`seed`.
         """
         return self._set(seed=value)
 
     @since("3.0.0")
-    def setFitIntercept(self, value):
+    def setFitIntercept(self, value: bool) -> "FMRegressor":
         """
         Sets the value of :py:attr:`fitIntercept`.
         """
         return self._set(fitIntercept=value)
 
     @since("3.0.0")
-    def setRegParam(self, value):
+    def setRegParam(self, value: float) -> "FMRegressor":
         """
         Sets the value of :py:attr:`regParam`.
         """
@@ -3186,7 +3269,10 @@ def setRegParam(self, value):
 
 
 class FMRegressionModel(
-    _JavaRegressionModel, _FactorizationMachinesParams, JavaMLWritable, JavaMLReadable
+    _JavaRegressionModel,
+    _FactorizationMachinesParams,
+    JavaMLWritable,
+    JavaMLReadable["FMRegressionModel"],
 ):
     """
     Model fitted by :class:`FMRegressor`.
@@ -3194,25 +3280,25 @@ class FMRegressionModel(
     .. versionadded:: 3.0.0
     """
 
-    @property
+    @property  # type: ignore[misc]
     @since("3.0.0")
-    def intercept(self):
+    def intercept(self) -> float:
         """
         Model intercept.
         """
         return self._call_java("intercept")
 
-    @property
+    @property  # type: ignore[misc]
     @since("3.0.0")
-    def linear(self):
+    def linear(self) -> Vector:
         """
         Model linear term.
         """
         return self._call_java("linear")
 
-    @property
+    @property  # type: ignore[misc]
     @since("3.0.0")
-    def factors(self):
+    def factors(self) -> Matrix:
         """
         Model factor term.
         """
diff --git a/python/pyspark/ml/regression.pyi b/python/pyspark/ml/regression.pyi
deleted file mode 100644
index 750e4c7223b84..0000000000000
--- a/python/pyspark/ml/regression.pyi
+++ /dev/null
@@ -1,827 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Any, Generic, List, Optional
-from pyspark.ml._typing import JM, M, T
-
-import abc
-from pyspark.ml import PredictionModel, Predictor
-from pyspark.ml.base import _PredictorParams
-from pyspark.ml.param.shared import (
-    HasAggregationDepth,
-    HasMaxBlockSizeInMB,
-    HasElasticNetParam,
-    HasFeaturesCol,
-    HasFitIntercept,
-    HasLabelCol,
-    HasLoss,
-    HasMaxIter,
-    HasPredictionCol,
-    HasRegParam,
-    HasSeed,
-    HasSolver,
-    HasStandardization,
-    HasStepSize,
-    HasTol,
-    HasVarianceCol,
-    HasWeightCol,
-)
-from pyspark.ml.tree import (
-    _DecisionTreeModel,
-    _DecisionTreeParams,
-    _GBTParams,
-    _RandomForestParams,
-    _TreeEnsembleModel,
-    _TreeRegressorParams,
-)
-from pyspark.ml.util import (
-    GeneralJavaMLWritable,
-    HasTrainingSummary,
-    JavaMLReadable,
-    JavaMLWritable,
-)
-from pyspark.ml.wrapper import (
-    JavaEstimator,
-    JavaModel,
-    JavaPredictionModel,
-    JavaPredictor,
-    JavaWrapper,
-)
-
-from pyspark.ml.linalg import Matrix, Vector
-from pyspark.ml.param import Param
-from pyspark.sql.dataframe import DataFrame
-
-from py4j.java_gateway import JavaObject  # type: ignore[import]
-
-class Regressor(Predictor[M], _PredictorParams, metaclass=abc.ABCMeta): ...
-class RegressionModel(PredictionModel[T], _PredictorParams, metaclass=abc.ABCMeta): ...
-class _JavaRegressor(Regressor, JavaPredictor[JM], Generic[JM], metaclass=abc.ABCMeta): ...
-class _JavaRegressionModel(RegressionModel, JavaPredictionModel[T], metaclass=abc.ABCMeta): ...
-
-class _LinearRegressionParams(
-    _PredictorParams,
-    HasRegParam,
-    HasElasticNetParam,
-    HasMaxIter,
-    HasTol,
-    HasFitIntercept,
-    HasStandardization,
-    HasWeightCol,
-    HasSolver,
-    HasAggregationDepth,
-    HasLoss,
-    HasMaxBlockSizeInMB,
-):
-    solver: Param[str]
-    loss: Param[str]
-    epsilon: Param[float]
-    def __init__(self, *args: Any): ...
-    def getEpsilon(self) -> float: ...
-
-class LinearRegression(
-    _JavaRegressor[LinearRegressionModel],
-    _LinearRegressionParams,
-    JavaMLWritable,
-    JavaMLReadable[LinearRegression],
-):
-    def __init__(
-        self,
-        *,
-        featuresCol: str = ...,
-        labelCol: str = ...,
-        predictionCol: str = ...,
-        maxIter: int = ...,
-        regParam: float = ...,
-        elasticNetParam: float = ...,
-        tol: float = ...,
-        fitIntercept: bool = ...,
-        standardization: bool = ...,
-        solver: str = ...,
-        weightCol: Optional[str] = ...,
-        aggregationDepth: int = ...,
-        epsilon: float = ...,
-        maxBlockSizeInMB: float = ...,
-    ) -> None: ...
-    def setParams(
-        self,
-        *,
-        featuresCol: str = ...,
-        labelCol: str = ...,
-        predictionCol: str = ...,
-        maxIter: int = ...,
-        regParam: float = ...,
-        elasticNetParam: float = ...,
-        tol: float = ...,
-        fitIntercept: bool = ...,
-        standardization: bool = ...,
-        solver: str = ...,
-        weightCol: Optional[str] = ...,
-        aggregationDepth: int = ...,
-        epsilon: float = ...,
-        maxBlockSizeInMB: float = ...,
-    ) -> LinearRegression: ...
-    def setEpsilon(self, value: float) -> LinearRegression: ...
-    def setMaxIter(self, value: int) -> LinearRegression: ...
-    def setRegParam(self, value: float) -> LinearRegression: ...
-    def setTol(self, value: float) -> LinearRegression: ...
-    def setElasticNetParam(self, value: float) -> LinearRegression: ...
-    def setFitIntercept(self, value: bool) -> LinearRegression: ...
-    def setStandardization(self, value: bool) -> LinearRegression: ...
-    def setWeightCol(self, value: str) -> LinearRegression: ...
-    def setSolver(self, value: str) -> LinearRegression: ...
-    def setAggregationDepth(self, value: int) -> LinearRegression: ...
-    def setLoss(self, value: str) -> LinearRegression: ...
-    def setMaxBlockSizeInMB(self, value: float) -> LinearRegression: ...
-    def _create_model(self, java_model: JavaObject) -> LinearRegressionModel: ...
-
-class LinearRegressionModel(
-    _JavaRegressionModel[Vector],
-    _LinearRegressionParams,
-    GeneralJavaMLWritable,
-    JavaMLReadable[LinearRegressionModel],
-    HasTrainingSummary[LinearRegressionSummary],
-):
-    @property
-    def coefficients(self) -> Vector: ...
-    @property
-    def intercept(self) -> float: ...
-    @property
-    def summary(self) -> LinearRegressionTrainingSummary: ...
-    def evaluate(self, dataset: DataFrame) -> LinearRegressionSummary: ...
-
-class LinearRegressionSummary(JavaWrapper):
-    @property
-    def predictions(self) -> DataFrame: ...
-    @property
-    def predictionCol(self) -> str: ...
-    @property
-    def labelCol(self) -> str: ...
-    @property
-    def featuresCol(self) -> str: ...
-    @property
-    def explainedVariance(self) -> float: ...
-    @property
-    def meanAbsoluteError(self) -> float: ...
-    @property
-    def meanSquaredError(self) -> float: ...
-    @property
-    def rootMeanSquaredError(self) -> float: ...
-    @property
-    def r2(self) -> float: ...
-    @property
-    def r2adj(self) -> float: ...
-    @property
-    def residuals(self) -> DataFrame: ...
-    @property
-    def numInstances(self) -> int: ...
-    @property
-    def devianceResiduals(self) -> List[float]: ...
-    @property
-    def coefficientStandardErrors(self) -> List[float]: ...
-    @property
-    def tValues(self) -> List[float]: ...
-    @property
-    def pValues(self) -> List[float]: ...
-
-class LinearRegressionTrainingSummary(LinearRegressionSummary):
-    @property
-    def objectiveHistory(self) -> List[float]: ...
-    @property
-    def totalIterations(self) -> int: ...
-
-class _IsotonicRegressionParams(HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol):
-    isotonic: Param[bool]
-    featureIndex: Param[int]
-    def getIsotonic(self) -> bool: ...
-    def getFeatureIndex(self) -> int: ...
-
-class IsotonicRegression(
-    JavaEstimator[IsotonicRegressionModel],
-    _IsotonicRegressionParams,
-    HasWeightCol,
-    JavaMLWritable,
-    JavaMLReadable[IsotonicRegression],
-):
-    def __init__(
-        self,
-        *,
-        featuresCol: str = ...,
-        labelCol: str = ...,
-        predictionCol: str = ...,
-        weightCol: Optional[str] = ...,
-        isotonic: bool = ...,
-        featureIndex: int = ...,
-    ) -> None: ...
-    def setParams(
-        self,
-        *,
-        featuresCol: str = ...,
-        labelCol: str = ...,
-        predictionCol: str = ...,
-        weightCol: Optional[str] = ...,
-        isotonic: bool = ...,
-        featureIndex: int = ...,
-    ) -> IsotonicRegression: ...
-    def setIsotonic(self, value: bool) -> IsotonicRegression: ...
-    def setFeatureIndex(self, value: int) -> IsotonicRegression: ...
-    def setFeaturesCol(self, value: str) -> IsotonicRegression: ...
-    def setPredictionCol(self, value: str) -> IsotonicRegression: ...
-    def setLabelCol(self, value: str) -> IsotonicRegression: ...
-    def setWeightCol(self, value: str) -> IsotonicRegression: ...
-    def _create_model(self, java_model: JavaObject) -> IsotonicRegressionModel: ...
-
-class IsotonicRegressionModel(
-    JavaModel,
-    _IsotonicRegressionParams,
-    JavaMLWritable,
-    JavaMLReadable[IsotonicRegressionModel],
-):
-    def setFeaturesCol(self, value: str) -> IsotonicRegressionModel: ...
-    def setPredictionCol(self, value: str) -> IsotonicRegressionModel: ...
-    def setFeatureIndex(self, value: int) -> IsotonicRegressionModel: ...
-    @property
-    def boundaries(self) -> Vector: ...
-    @property
-    def predictions(self) -> Vector: ...
-    @property
-    def numFeatures(self) -> int: ...
-    def predict(self, value: float) -> float: ...
-
-class _DecisionTreeRegressorParams(_DecisionTreeParams, _TreeRegressorParams, HasVarianceCol):
-    def __init__(self, *args: Any): ...
-
-class DecisionTreeRegressor(
-    _JavaRegressor[DecisionTreeRegressionModel],
-    _DecisionTreeRegressorParams,
-    JavaMLWritable,
-    JavaMLReadable[DecisionTreeRegressor],
-):
-    def __init__(
-        self,
-        *,
-        featuresCol: str = ...,
-        labelCol: str = ...,
-        predictionCol: str = ...,
-        maxDepth: int = ...,
-        maxBins: int = ...,
-        minInstancesPerNode: int = ...,
-        minInfoGain: float = ...,
-        maxMemoryInMB: int = ...,
-        cacheNodeIds: bool = ...,
-        checkpointInterval: int = ...,
-        impurity: str = ...,
-        seed: Optional[int] = ...,
-        varianceCol: Optional[str] = ...,
-        weightCol: Optional[str] = ...,
-        leafCol: str = ...,
-        minWeightFractionPerNode: float = ...,
-    ) -> None: ...
-    def setParams(
-        self,
-        *,
-        featuresCol: str = ...,
-        labelCol: str = ...,
-        predictionCol: str = ...,
-        maxDepth: int = ...,
-        maxBins: int = ...,
-        minInstancesPerNode: int = ...,
-        minInfoGain: float = ...,
-        maxMemoryInMB: int = ...,
-        cacheNodeIds: bool = ...,
-        checkpointInterval: int = ...,
-        impurity: str = ...,
-        seed: Optional[int] = ...,
-        varianceCol: Optional[str] = ...,
-        weightCol: Optional[str] = ...,
-        leafCol: str = ...,
-        minWeightFractionPerNode: float = ...,
-    ) -> DecisionTreeRegressor: ...
-    def setMaxDepth(self, value: int) -> DecisionTreeRegressor: ...
-    def setMaxBins(self, value: int) -> DecisionTreeRegressor: ...
-    def setMinInstancesPerNode(self, value: int) -> DecisionTreeRegressor: ...
-    def setMinWeightFractionPerNode(self, value: float) -> DecisionTreeRegressor: ...
-    def setMinInfoGain(self, value: float) -> DecisionTreeRegressor: ...
-    def setMaxMemoryInMB(self, value: int) -> DecisionTreeRegressor: ...
-    def setCacheNodeIds(self, value: bool) -> DecisionTreeRegressor: ...
-    def setImpurity(self, value: str) -> DecisionTreeRegressor: ...
-    def setCheckpointInterval(self, value: int) -> DecisionTreeRegressor: ...
-    def setSeed(self, value: int) -> DecisionTreeRegressor: ...
-    def setWeightCol(self, value: str) -> DecisionTreeRegressor: ...
-    def setVarianceCol(self, value: str) -> DecisionTreeRegressor: ...
-    def _create_model(self, java_model: JavaObject) -> DecisionTreeRegressionModel: ...
-
-class DecisionTreeRegressionModel(
-    _JavaRegressionModel[Vector],
-    _DecisionTreeModel,
-    _DecisionTreeRegressorParams,
-    JavaMLWritable,
-    JavaMLReadable[DecisionTreeRegressionModel],
-):
-    def setVarianceCol(self, value: str) -> DecisionTreeRegressionModel: ...
-    @property
-    def featureImportances(self) -> Vector: ...
-
-class _RandomForestRegressorParams(_RandomForestParams, _TreeRegressorParams):
-    def __init__(self, *args: Any): ...
-
-class RandomForestRegressor(
-    _JavaRegressor[RandomForestRegressionModel],
-    _RandomForestRegressorParams,
-    JavaMLWritable,
-    JavaMLReadable[RandomForestRegressor],
-):
-    def __init__(
-        self,
-        *,
-        featuresCol: str = ...,
-        labelCol: str = ...,
-        predictionCol: str = ...,
-        maxDepth: int = ...,
-        maxBins: int = ...,
-        minInstancesPerNode: int = ...,
-        minInfoGain: float = ...,
-        maxMemoryInMB: int = ...,
-        cacheNodeIds: bool = ...,
-        checkpointInterval: int = ...,
-        impurity: str = ...,
-        subsamplingRate: float = ...,
-        seed: Optional[int] = ...,
-        numTrees: int = ...,
-        featureSubsetStrategy: str = ...,
-        leafCol: str = ...,
-        minWeightFractionPerNode: float = ...,
-        weightCol: Optional[str] = ...,
-        bootstrap: Optional[bool] = ...,
-    ) -> None: ...
-    def setParams(
-        self,
-        *,
-        featuresCol: str = ...,
-        labelCol: str = ...,
-        predictionCol: str = ...,
-        maxDepth: int = ...,
-        maxBins: int = ...,
-        minInstancesPerNode: int = ...,
-        minInfoGain: float = ...,
-        maxMemoryInMB: int = ...,
-        cacheNodeIds: bool = ...,
-        checkpointInterval: int = ...,
-        impurity: str = ...,
-        subsamplingRate: float = ...,
-        seed: Optional[int] = ...,
-        numTrees: int = ...,
-        featureSubsetStrategy: str = ...,
-        leafCol: str = ...,
-        minWeightFractionPerNode: float = ...,
-        weightCol: Optional[str] = ...,
-        bootstrap: Optional[bool] = ...,
-    ) -> RandomForestRegressor: ...
-    def setMaxDepth(self, value: int) -> RandomForestRegressor: ...
-    def setMaxBins(self, value: int) -> RandomForestRegressor: ...
-    def setMinInstancesPerNode(self, value: int) -> RandomForestRegressor: ...
-    def setMinInfoGain(self, value: float) -> RandomForestRegressor: ...
-    def setMaxMemoryInMB(self, value: int) -> RandomForestRegressor: ...
-    def setCacheNodeIds(self, value: bool) -> RandomForestRegressor: ...
-    def setImpurity(self, value: str) -> RandomForestRegressor: ...
-    def setNumTrees(self, value: int) -> RandomForestRegressor: ...
-    def setBootstrap(self, value: bool) -> RandomForestRegressor: ...
-    def setSubsamplingRate(self, value: float) -> RandomForestRegressor: ...
-    def setFeatureSubsetStrategy(self, value: str) -> RandomForestRegressor: ...
-    def setCheckpointInterval(self, value: int) -> RandomForestRegressor: ...
-    def setSeed(self, value: int) -> RandomForestRegressor: ...
-    def setWeightCol(self, value: str) -> RandomForestRegressor: ...
-    def setMinWeightFractionPerNode(self, value: float) -> RandomForestRegressor: ...
-    def _create_model(self, java_model: JavaObject) -> RandomForestRegressionModel: ...
-
-class RandomForestRegressionModel(
-    _JavaRegressionModel[Vector],
-    _TreeEnsembleModel,
-    _RandomForestRegressorParams,
-    JavaMLWritable,
-    JavaMLReadable[RandomForestRegressionModel],
-):
-    @property
-    def trees(self) -> List[DecisionTreeRegressionModel]: ...
-    @property
-    def featureImportances(self) -> Vector: ...
-
-class _GBTRegressorParams(_GBTParams, _TreeRegressorParams):
-    supportedLossTypes: List[str]
-    lossType: Param[str]
-    def __init__(self, *args: Any): ...
-    def getLossType(self) -> str: ...
-
-class GBTRegressor(
-    _JavaRegressor[GBTRegressionModel],
-    _GBTRegressorParams,
-    JavaMLWritable,
-    JavaMLReadable[GBTRegressor],
-):
-    def __init__(
-        self,
-        *,
-        featuresCol: str = ...,
-        labelCol: str = ...,
-        predictionCol: str = ...,
-        maxDepth: int = ...,
-        maxBins: int = ...,
-        minInstancesPerNode: int = ...,
-        minInfoGain: float = ...,
-        maxMemoryInMB: int = ...,
-        cacheNodeIds: bool = ...,
-        subsamplingRate: float = ...,
-        checkpointInterval: int = ...,
-        lossType: str = ...,
-        maxIter: int = ...,
-        stepSize: float = ...,
-        seed: Optional[int] = ...,
-        impurity: str = ...,
-        featureSubsetStrategy: str = ...,
-        validationTol: float = ...,
-        validationIndicatorCol: Optional[str] = ...,
-        leafCol: str = ...,
-        minWeightFractionPerNode: float = ...,
-        weightCol: Optional[str] = ...,
-    ) -> None: ...
-    def setParams(
-        self,
-        *,
-        featuresCol: str = ...,
-        labelCol: str = ...,
-        predictionCol: str = ...,
-        maxDepth: int = ...,
-        maxBins: int = ...,
-        minInstancesPerNode: int = ...,
-        minInfoGain: float = ...,
-        maxMemoryInMB: int = ...,
-        cacheNodeIds: bool = ...,
-        subsamplingRate: float = ...,
-        checkpointInterval: int = ...,
-        lossType: str = ...,
-        maxIter: int = ...,
-        stepSize: float = ...,
-        seed: Optional[int] = ...,
-        impurity: str = ...,
-        featureSubsetStrategy: str = ...,
-        validationTol: float = ...,
-        validationIndicatorCol: Optional[str] = ...,
-        leafCol: str = ...,
-        minWeightFractionPerNode: float = ...,
-        weightCol: Optional[str] = ...,
-    ) -> GBTRegressor: ...
-    def setMaxDepth(self, value: int) -> GBTRegressor: ...
-    def setMaxBins(self, value: int) -> GBTRegressor: ...
-    def setMinInstancesPerNode(self, value: int) -> GBTRegressor: ...
-    def setMinInfoGain(self, value: float) -> GBTRegressor: ...
-    def setMaxMemoryInMB(self, value: int) -> GBTRegressor: ...
-    def setCacheNodeIds(self, value: bool) -> GBTRegressor: ...
-    def setImpurity(self, value: str) -> GBTRegressor: ...
-    def setLossType(self, value: str) -> GBTRegressor: ...
-    def setSubsamplingRate(self, value: float) -> GBTRegressor: ...
-    def setFeatureSubsetStrategy(self, value: str) -> GBTRegressor: ...
-    def setValidationIndicatorCol(self, value: str) -> GBTRegressor: ...
-    def setMaxIter(self, value: int) -> GBTRegressor: ...
-    def setCheckpointInterval(self, value: int) -> GBTRegressor: ...
-    def setSeed(self, value: int) -> GBTRegressor: ...
-    def setStepSize(self, value: float) -> GBTRegressor: ...
-    def setWeightCol(self, value: str) -> GBTRegressor: ...
-    def setMinWeightFractionPerNode(self, value: float) -> GBTRegressor: ...
-    def _create_model(self, java_model: JavaObject) -> GBTRegressionModel: ...
-
-class GBTRegressionModel(
-    _JavaRegressionModel[Vector],
-    _TreeEnsembleModel,
-    _GBTRegressorParams,
-    JavaMLWritable,
-    JavaMLReadable[GBTRegressionModel],
-):
-    @property
-    def featureImportances(self) -> Vector: ...
-    @property
-    def trees(self) -> List[DecisionTreeRegressionModel]: ...
-    def evaluateEachIteration(self, dataset: DataFrame, loss: str) -> List[float]: ...
-
-class _AFTSurvivalRegressionParams(
-    _PredictorParams,
-    HasMaxIter,
-    HasTol,
-    HasFitIntercept,
-    HasAggregationDepth,
-    HasMaxBlockSizeInMB,
-):
-    censorCol: Param[str]
-    quantileProbabilities: Param[List[float]]
-    quantilesCol: Param[str]
-    def __init__(self, *args: Any): ...
-    def getCensorCol(self) -> str: ...
-    def getQuantileProbabilities(self) -> List[float]: ...
-    def getQuantilesCol(self) -> str: ...
-
-class AFTSurvivalRegression(
-    _JavaRegressor[AFTSurvivalRegressionModel],
-    _AFTSurvivalRegressionParams,
-    JavaMLWritable,
-    JavaMLReadable[AFTSurvivalRegression],
-):
-    def __init__(
-        self,
-        *,
-        featuresCol: str = ...,
-        labelCol: str = ...,
-        predictionCol: str = ...,
-        fitIntercept: bool = ...,
-        maxIter: int = ...,
-        tol: float = ...,
-        censorCol: str = ...,
-        quantileProbabilities: List[float] = ...,
-        quantilesCol: Optional[str] = ...,
-        aggregationDepth: int = ...,
-        maxBlockSizeInMB: float = ...,
-    ) -> None: ...
-    def setParams(
-        self,
-        *,
-        featuresCol: str = ...,
-        labelCol: str = ...,
-        predictionCol: str = ...,
-        fitIntercept: bool = ...,
-        maxIter: int = ...,
-        tol: float = ...,
-        censorCol: str = ...,
-        quantileProbabilities: List[float] = ...,
-        quantilesCol: Optional[str] = ...,
-        aggregationDepth: int = ...,
-        maxBlockSizeInMB: float = ...,
-    ) -> AFTSurvivalRegression: ...
-    def setCensorCol(self, value: str) -> AFTSurvivalRegression: ...
-    def setQuantileProbabilities(self, value: List[float]) -> AFTSurvivalRegression: ...
-    def setQuantilesCol(self, value: str) -> AFTSurvivalRegression: ...
-    def setMaxIter(self, value: int) -> AFTSurvivalRegression: ...
-    def setTol(self, value: float) -> AFTSurvivalRegression: ...
-    def setFitIntercept(self, value: bool) -> AFTSurvivalRegression: ...
-    def setAggregationDepth(self, value: int) -> AFTSurvivalRegression: ...
-    def setMaxBlockSizeInMB(self, value: float) -> AFTSurvivalRegression: ...
-    def _create_model(self, java_model: JavaObject) -> AFTSurvivalRegressionModel: ...
-
-class AFTSurvivalRegressionModel(
-    _JavaRegressionModel[Vector],
-    _AFTSurvivalRegressionParams,
-    JavaMLWritable,
-    JavaMLReadable[AFTSurvivalRegressionModel],
-):
-    def setQuantileProbabilities(self, value: List[float]) -> AFTSurvivalRegressionModel: ...
-    def setQuantilesCol(self, value: str) -> AFTSurvivalRegressionModel: ...
-    @property
-    def coefficients(self) -> Vector: ...
-    @property
-    def intercept(self) -> float: ...
-    @property
-    def scale(self) -> float: ...
-    def predictQuantiles(self, features: Vector) -> Vector: ...
-    def predict(self, features: Vector) -> float: ...
-
-class _GeneralizedLinearRegressionParams(
-    _PredictorParams,
-    HasFitIntercept,
-    HasMaxIter,
-    HasTol,
-    HasRegParam,
-    HasWeightCol,
-    HasSolver,
-    HasAggregationDepth,
-):
-    family: Param[str]
-    link: Param[str]
-    linkPredictionCol: Param[str]
-    variancePower: Param[float]
-    linkPower: Param[float]
-    solver: Param[str]
-    offsetCol: Param[str]
-    def __init__(self, *args: Any): ...
-    def getFamily(self) -> str: ...
-    def getLinkPredictionCol(self) -> str: ...
-    def getLink(self) -> str: ...
-    def getVariancePower(self) -> float: ...
-    def getLinkPower(self) -> float: ...
-    def getOffsetCol(self) -> str: ...
-
-class GeneralizedLinearRegression(
-    _JavaRegressor[GeneralizedLinearRegressionModel],
-    _GeneralizedLinearRegressionParams,
-    JavaMLWritable,
-    JavaMLReadable[GeneralizedLinearRegression],
-):
-    def __init__(
-        self,
-        *,
-        labelCol: str = ...,
-        featuresCol: str = ...,
-        predictionCol: str = ...,
-        family: str = ...,
-        link: Optional[str] = ...,
-        fitIntercept: bool = ...,
-        maxIter: int = ...,
-        tol: float = ...,
-        regParam: float = ...,
-        weightCol: Optional[str] = ...,
-        solver: str = ...,
-        linkPredictionCol: Optional[str] = ...,
-        variancePower: float = ...,
-        linkPower: Optional[float] = ...,
-        offsetCol: Optional[str] = ...,
-        aggregationDepth: int = ...,
-    ) -> None: ...
-    def setParams(
-        self,
-        *,
-        labelCol: str = ...,
-        featuresCol: str = ...,
-        predictionCol: str = ...,
-        family: str = ...,
-        link: Optional[str] = ...,
-        fitIntercept: bool = ...,
-        maxIter: int = ...,
-        tol: float = ...,
-        regParam: float = ...,
-        weightCol: Optional[str] = ...,
-        solver: str = ...,
-        linkPredictionCol: Optional[str] = ...,
-        variancePower: float = ...,
-        linkPower: Optional[float] = ...,
-        offsetCol: Optional[str] = ...,
-        aggregationDepth: int = ...,
-    ) -> GeneralizedLinearRegression: ...
-    def setFamily(self, value: str) -> GeneralizedLinearRegression: ...
-    def setLinkPredictionCol(self, value: str) -> GeneralizedLinearRegression: ...
-    def setLink(self, value: str) -> GeneralizedLinearRegression: ...
-    def setVariancePower(self, value: float) -> GeneralizedLinearRegression: ...
-    def setLinkPower(self, value: float) -> GeneralizedLinearRegression: ...
-    def setOffsetCol(self, value: str) -> GeneralizedLinearRegression: ...
-    def setMaxIter(self, value: int) -> GeneralizedLinearRegression: ...
-    def setRegParam(self, value: float) -> GeneralizedLinearRegression: ...
-    def setTol(self, value: float) -> GeneralizedLinearRegression: ...
-    def setFitIntercept(self, value: bool) -> GeneralizedLinearRegression: ...
-    def setWeightCol(self, value: str) -> GeneralizedLinearRegression: ...
-    def setSolver(self, value: str) -> GeneralizedLinearRegression: ...
-    def setAggregationDepth(self, value: int) -> GeneralizedLinearRegression: ...
-    def _create_model(self, java_model: JavaObject) -> GeneralizedLinearRegressionModel: ...
-
-class GeneralizedLinearRegressionModel(
-    _JavaRegressionModel[Vector],
-    _GeneralizedLinearRegressionParams,
-    JavaMLWritable,
-    JavaMLReadable[GeneralizedLinearRegressionModel],
-    HasTrainingSummary[GeneralizedLinearRegressionTrainingSummary],
-):
-    def setLinkPredictionCol(self, value: str) -> GeneralizedLinearRegressionModel: ...
-    @property
-    def coefficients(self) -> Vector: ...
-    @property
-    def intercept(self) -> float: ...
-    @property
-    def summary(self) -> GeneralizedLinearRegressionTrainingSummary: ...
-    def evaluate(self, dataset: DataFrame) -> GeneralizedLinearRegressionSummary: ...
-
-class GeneralizedLinearRegressionSummary(JavaWrapper):
-    @property
-    def predictions(self) -> DataFrame: ...
-    @property
-    def predictionCol(self) -> str: ...
-    @property
-    def rank(self) -> int: ...
-    @property
-    def degreesOfFreedom(self) -> int: ...
-    @property
-    def residualDegreeOfFreedom(self) -> int: ...
-    @property
-    def residualDegreeOfFreedomNull(self) -> int: ...
-    def residuals(self, residualsType: str = ...) -> DataFrame: ...
-    @property
-    def nullDeviance(self) -> float: ...
-    @property
-    def deviance(self) -> float: ...
-    @property
-    def dispersion(self) -> float: ...
-    @property
-    def aic(self) -> float: ...
-
-class GeneralizedLinearRegressionTrainingSummary(GeneralizedLinearRegressionSummary):
-    @property
-    def numIterations(self) -> int: ...
-    @property
-    def solver(self) -> str: ...
-    @property
-    def coefficientStandardErrors(self) -> List[float]: ...
-    @property
-    def tValues(self) -> List[float]: ...
-    @property
-    def pValues(self) -> List[float]: ...
-
-class _FactorizationMachinesParams(
-    _PredictorParams,
-    HasMaxIter,
-    HasStepSize,
-    HasTol,
-    HasSolver,
-    HasSeed,
-    HasFitIntercept,
-    HasRegParam,
-    HasWeightCol,
-):
-    factorSize: Param[int]
-    fitLinear: Param[bool]
-    miniBatchFraction: Param[float]
-    initStd: Param[float]
-    solver: Param[str]
-    def __init__(self, *args: Any): ...
-    def getFactorSize(self) -> int: ...
-    def getFitLinear(self) -> bool: ...
-    def getMiniBatchFraction(self) -> float: ...
-    def getInitStd(self) -> float: ...
-
-class FMRegressor(
-    _JavaRegressor[FMRegressionModel],
-    _FactorizationMachinesParams,
-    JavaMLWritable,
-    JavaMLReadable[FMRegressor],
-):
-    factorSize: Param[int]
-    fitLinear: Param[bool]
-    miniBatchFraction: Param[float]
-    initStd: Param[float]
-    solver: Param[str]
-    def __init__(
-        self,
-        featuresCol: str = ...,
-        labelCol: str = ...,
-        predictionCol: str = ...,
-        factorSize: int = ...,
-        fitIntercept: bool = ...,
-        fitLinear: bool = ...,
-        regParam: float = ...,
-        miniBatchFraction: float = ...,
-        initStd: float = ...,
-        maxIter: int = ...,
-        stepSize: float = ...,
-        tol: float = ...,
-        solver: str = ...,
-        seed: Optional[int] = ...,
-    ) -> None: ...
-    def setParams(
-        self,
-        featuresCol: str = ...,
-        labelCol: str = ...,
-        predictionCol: str = ...,
-        factorSize: int = ...,
-        fitIntercept: bool = ...,
-        fitLinear: bool = ...,
-        regParam: float = ...,
-        miniBatchFraction: float = ...,
-        initStd: float = ...,
-        maxIter: int = ...,
-        stepSize: float = ...,
-        tol: float = ...,
-        solver: str = ...,
-        seed: Optional[int] = ...,
-    ) -> FMRegressor: ...
-    def setFactorSize(self, value: int) -> FMRegressor: ...
-    def setFitLinear(self, value: bool) -> FMRegressor: ...
-    def setMiniBatchFraction(self, value: float) -> FMRegressor: ...
-    def setInitStd(self, value: float) -> FMRegressor: ...
-    def setMaxIter(self, value: int) -> FMRegressor: ...
-    def setStepSize(self, value: float) -> FMRegressor: ...
-    def setTol(self, value: float) -> FMRegressor: ...
-    def setSolver(self, value: str) -> FMRegressor: ...
-    def setSeed(self, value: int) -> FMRegressor: ...
-    def setFitIntercept(self, value: bool) -> FMRegressor: ...
-    def setRegParam(self, value: float) -> FMRegressor: ...
-    def _create_model(self, java_model: JavaObject) -> FMRegressionModel: ...
-
-class FMRegressionModel(
-    _JavaRegressionModel,
-    _FactorizationMachinesParams,
-    JavaMLWritable,
-    JavaMLReadable[FMRegressionModel],
-):
-    @property
-    def intercept(self) -> float: ...
-    @property
-    def linear(self) -> Vector: ...
-    @property
-    def factors(self) -> Matrix: ...

From cfb048ae1648934c29daf5036f98a94df8ff17c0 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Wed, 16 Feb 2022 22:50:10 -0800
Subject: [PATCH 253/513] [SPARK-37783][SQL][FOLLOWUP] Enable tail-recursion
 wherever possible

### What changes were proposed in this pull request?
This pr adds `scala.annotation.tailrec` inspected by IDE (IntelliJ)

### Why are the changes needed?
To improve performance.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35540 from LuciferYang/SPARK-37783-FOLLOWUP.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala   | 1 +
 .../org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala  | 1 +
 .../org/apache/spark/sql/execution/columnar/ColumnType.scala    | 2 +-
 .../main/scala/org/apache/spark/sql/hive/client/HiveShim.scala  | 1 +
 4 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
index 90f28fbf447b1..e13ff2b3e709f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
@@ -135,6 +135,7 @@ object AnsiTypeCoercion extends TypeCoercionBase {
   }
 
   /** Promotes StringType to other data types. */
+  @scala.annotation.tailrec
   private def findWiderTypeForString(dt1: DataType, dt2: DataType): Option[DataType] = {
     (dt1, dt2) match {
       case (StringType, _: IntegralType) => Some(LongType)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
index 9d6582476b76b..5dd8c35e4c2e5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
@@ -60,6 +60,7 @@ class UnivocityGenerator(
     legacyFormat = FAST_DATE_FORMAT,
     isParsing = false)
 
+  @scala.annotation.tailrec
   private def makeConverter(dataType: DataType): ValueConverter = dataType match {
     case DateType =>
       (row: InternalRow, ordinal: Int) => dateFormatter.format(row.getInt(ordinal))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
index 9b4c136273451..c029786637687 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
@@ -833,7 +833,7 @@ private[columnar] object ColumnType {
       case arr: ArrayType => ARRAY(arr)
       case map: MapType => MAP(map)
       case struct: StructType => STRUCT(struct)
-      case udt: UserDefinedType[_] => apply(udt.sqlType)
+      case udt: UserDefinedType[_] => ColumnType(udt.sqlType)
       case other => throw QueryExecutionErrors.unsupportedTypeError(other)
     }
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index c197b17224c9c..71be39a23af37 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -978,6 +978,7 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
     val inSetThreshold = SQLConf.get.metastorePartitionPruningInSetThreshold
 
     object ExtractAttribute {
+      @scala.annotation.tailrec
       def unapply(expr: Expression): Option[Attribute] = {
         expr match {
           case attr: Attribute => Some(attr)

From 0b17e87c612332b770056ec153e706f2f9613ef8 Mon Sep 17 00:00:00 2001
From: Yikf <yikaifei1@gmail.com>
Date: Thu, 17 Feb 2022 17:40:42 +0800
Subject: [PATCH 254/513] [SPARK-38229][SQL] Should't check
 temp/external/ifNotExists with visitReplaceTable when parser

### What changes were proposed in this pull request?
Spark does not support replace table syntax such as CREATE OR REPLACE TEMPORARY TABLE.../REPLACE EXTERNAL TABLE/REPLACE ... IF NOT EXISTS, And we don't need to check these tokens
![image](https://user-images.githubusercontent.com/51110188/154250690-07a28054-3fc1-4f3e-ad1c-206b02273111.png)

### Why are the changes needed?
code simplification

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Exist ut

Closes #35541 from Yikf/SPARK-38229.

Authored-by: Yikf <yikaifei1@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/parser/AstBuilder.scala      | 20 +++----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 773d32d01916d..53fdf84937b5c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2945,9 +2945,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    * Validate a replace table statement and return the [[TableIdentifier]].
    */
   override def visitReplaceTableHeader(
-      ctx: ReplaceTableHeaderContext): TableHeader = withOrigin(ctx) {
+      ctx: ReplaceTableHeaderContext): Seq[String] = withOrigin(ctx) {
     val multipartIdentifier = ctx.multipartIdentifier.parts.asScala.map(_.getText).toSeq
-    (multipartIdentifier, false, false, false)
+    multipartIdentifier
   }
 
   /**
@@ -3543,22 +3543,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    * }}}
    */
   override def visitReplaceTable(ctx: ReplaceTableContext): LogicalPlan = withOrigin(ctx) {
-    val (table, temp, ifNotExists, external) = visitReplaceTableHeader(ctx.replaceTableHeader)
+    val table = visitReplaceTableHeader(ctx.replaceTableHeader)
     val orCreate = ctx.replaceTableHeader().CREATE() != null
-
-    if (temp) {
-      val action = if (orCreate) "CREATE OR REPLACE" else "REPLACE"
-      operationNotAllowed(s"$action TEMPORARY TABLE ..., use $action TEMPORARY VIEW instead.", ctx)
-    }
-
-    if (external) {
-      operationNotAllowed("REPLACE EXTERNAL TABLE ...", ctx)
-    }
-
-    if (ifNotExists) {
-      operationNotAllowed("REPLACE ... IF NOT EXISTS, use CREATE IF NOT EXISTS instead", ctx)
-    }
-
     val (partTransforms, partCols, bucketSpec, properties, options, location, comment, serdeInfo) =
       visitCreateTableClauses(ctx.createTableClauses())
     val columns = Option(ctx.colTypeList()).map(visitColTypeList).getOrElse(Nil)

From bd7937891322694a9f78351d0ce1736087d6c28d Mon Sep 17 00:00:00 2001
From: Yikf <yikaifei1@gmail.com>
Date: Thu, 17 Feb 2022 17:42:08 +0800
Subject: [PATCH 255/513] [SPARK-38216][SQL] Fail early if all the columns are
 partitioned columns when creating a Hive table

### What changes were proposed in this pull request?
In Hive the schema and partition columns must be disjoint sets, if hive table which all columns are partitioned columns, so that other columns is empty, it will fail when Hive create table, error msg as follow:

`
throw new HiveException(
"at least one column must be specified for the table")
`
That's because we did the disjoint operation in `toHiveTable`

So when creating a Hive table, fail early if all the columns are partitioned columns,

### Why are the changes needed?
unify analysis error msg when create table with all the columns are partitioned columns

### Does this PR introduce _any_ user-facing change?
yes, but error msg only

### How was this patch tested?
add ut

Closes #35527 from Yikf/ct-hive.

Authored-by: Yikf <yikaifei1@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/execution/datasources/rules.scala | 10 +---------
 .../apache/spark/sql/hive/execution/HiveDDLSuite.scala |  6 ++++++
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 605f7c17fed30..af43f8d1c1bd8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -319,15 +319,7 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
       conf.resolver)
 
     if (schema.nonEmpty && normalizedPartitionCols.length == schema.length) {
-      if (DDLUtils.isHiveTable(table)) {
-        // When we hit this branch, it means users didn't specify schema for the table to be
-        // created, as we always include partition columns in table schema for hive serde tables.
-        // The real schema will be inferred at hive metastore by hive serde, plus the given
-        // partition columns, so we should not fail the analysis here.
-      } else {
-        failAnalysis("Cannot use all columns for partition columns")
-      }
-
+      failAnalysis("Cannot use all columns for partition columns")
     }
 
     schema.filter(f => normalizedPartitionCols.contains(f.name)).map(_.dataType).foreach {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index c3c47c58b90be..a6eb12ef6ed67 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -3043,4 +3043,10 @@ class HiveDDLSuite
       assert(df1.schema.names.toSeq == Seq("A", "B"))
     }
   }
+
+  test("SPARK-38216: Fail early if all the columns are partitioned columns") {
+    assertAnalysisError(
+      "CREATE TABLE tab (c1 int) PARTITIONED BY (c1) STORED AS PARQUET",
+      "Cannot use all columns for partition columns")
+  }
 }

From 16b6686ec186a3fde869d064636cb8dfc1a5662b Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Thu, 17 Feb 2022 11:18:09 +0100
Subject: [PATCH 256/513] [SPARK-37410][PYTHON][ML] Inline hints for
 pyspark.ml.recommendation

### What changes were proposed in this pull request?

This PR migrates type `pyspark.ml.recommendation` annotations from stub file to inline type hints.

### Why are the changes needed?

Part of ongoing migration of type hints.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #35429 from zero323/SPARK-37410.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/ml/recommendation.py  | 203 ++++++++++++++-------------
 python/pyspark/ml/recommendation.pyi | 149 --------------------
 2 files changed, 105 insertions(+), 247 deletions(-)
 delete mode 100644 python/pyspark/ml/recommendation.pyi

diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index b8e2a6097d93f..f13fb721b9a88 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -16,6 +16,7 @@
 #
 
 import sys
+from typing import Any, Dict, Optional, TYPE_CHECKING
 
 from pyspark import since, keyword_only
 from pyspark.ml.param.shared import (
@@ -30,6 +31,10 @@
 from pyspark.ml.common import inherit_doc
 from pyspark.ml.param import Params, TypeConverters, Param
 from pyspark.ml.util import JavaMLWritable, JavaMLReadable
+from pyspark.sql import DataFrame
+
+if TYPE_CHECKING:
+    from py4j.java_gateway import JavaObject
 
 
 __all__ = ["ALS", "ALSModel"]
@@ -43,19 +48,19 @@ class _ALSModelParams(HasPredictionCol, HasBlockSize):
     .. versionadded:: 3.0.0
     """
 
-    userCol = Param(
+    userCol: Param[str] = Param(
         Params._dummy(),
         "userCol",
         "column name for user ids. Ids must be within " + "the integer value range.",
         typeConverter=TypeConverters.toString,
     )
-    itemCol = Param(
+    itemCol: Param[str] = Param(
         Params._dummy(),
         "itemCol",
         "column name for item ids. Ids must be within " + "the integer value range.",
         typeConverter=TypeConverters.toString,
     )
-    coldStartStrategy = Param(
+    coldStartStrategy: Param[str] = Param(
         Params._dummy(),
         "coldStartStrategy",
         "strategy for dealing with "
@@ -66,26 +71,26 @@ class _ALSModelParams(HasPredictionCol, HasBlockSize):
         typeConverter=TypeConverters.toString,
     )
 
-    def __init__(self, *args):
+    def __init__(self, *args: Any):
         super(_ALSModelParams, self).__init__(*args)
         self._setDefault(blockSize=4096)
 
     @since("1.4.0")
-    def getUserCol(self):
+    def getUserCol(self) -> str:
         """
         Gets the value of userCol or its default value.
         """
         return self.getOrDefault(self.userCol)
 
     @since("1.4.0")
-    def getItemCol(self):
+    def getItemCol(self) -> str:
         """
         Gets the value of itemCol or its default value.
         """
         return self.getOrDefault(self.itemCol)
 
     @since("2.2.0")
-    def getColdStartStrategy(self):
+    def getColdStartStrategy(self) -> str:
         """
         Gets the value of coldStartStrategy or its default value.
         """
@@ -100,60 +105,60 @@ class _ALSParams(_ALSModelParams, HasMaxIter, HasRegParam, HasCheckpointInterval
     .. versionadded:: 3.0.0
     """
 
-    rank = Param(
+    rank: Param[int] = Param(
         Params._dummy(), "rank", "rank of the factorization", typeConverter=TypeConverters.toInt
     )
-    numUserBlocks = Param(
+    numUserBlocks: Param[int] = Param(
         Params._dummy(),
         "numUserBlocks",
         "number of user blocks",
         typeConverter=TypeConverters.toInt,
     )
-    numItemBlocks = Param(
+    numItemBlocks: Param[int] = Param(
         Params._dummy(),
         "numItemBlocks",
         "number of item blocks",
         typeConverter=TypeConverters.toInt,
     )
-    implicitPrefs = Param(
+    implicitPrefs: Param[bool] = Param(
         Params._dummy(),
         "implicitPrefs",
         "whether to use implicit preference",
         typeConverter=TypeConverters.toBoolean,
     )
-    alpha = Param(
+    alpha: Param[float] = Param(
         Params._dummy(),
         "alpha",
         "alpha for implicit preference",
         typeConverter=TypeConverters.toFloat,
     )
 
-    ratingCol = Param(
+    ratingCol: Param[str] = Param(
         Params._dummy(),
         "ratingCol",
         "column name for ratings",
         typeConverter=TypeConverters.toString,
     )
-    nonnegative = Param(
+    nonnegative: Param[bool] = Param(
         Params._dummy(),
         "nonnegative",
         "whether to use nonnegative constraint for least squares",
         typeConverter=TypeConverters.toBoolean,
     )
-    intermediateStorageLevel = Param(
+    intermediateStorageLevel: Param[str] = Param(
         Params._dummy(),
         "intermediateStorageLevel",
         "StorageLevel for intermediate datasets. Cannot be 'NONE'.",
         typeConverter=TypeConverters.toString,
     )
-    finalStorageLevel = Param(
+    finalStorageLevel: Param[str] = Param(
         Params._dummy(),
         "finalStorageLevel",
         "StorageLevel for ALS model factors.",
         typeConverter=TypeConverters.toString,
     )
 
-    def __init__(self, *args):
+    def __init__(self, *args: Any):
         super(_ALSParams, self).__init__(*args)
         self._setDefault(
             rank=10,
@@ -174,63 +179,63 @@ def __init__(self, *args):
         )
 
     @since("1.4.0")
-    def getRank(self):
+    def getRank(self) -> int:
         """
         Gets the value of rank or its default value.
         """
         return self.getOrDefault(self.rank)
 
     @since("1.4.0")
-    def getNumUserBlocks(self):
+    def getNumUserBlocks(self) -> int:
         """
         Gets the value of numUserBlocks or its default value.
         """
         return self.getOrDefault(self.numUserBlocks)
 
     @since("1.4.0")
-    def getNumItemBlocks(self):
+    def getNumItemBlocks(self) -> int:
         """
         Gets the value of numItemBlocks or its default value.
         """
         return self.getOrDefault(self.numItemBlocks)
 
     @since("1.4.0")
-    def getImplicitPrefs(self):
+    def getImplicitPrefs(self) -> bool:
         """
         Gets the value of implicitPrefs or its default value.
         """
         return self.getOrDefault(self.implicitPrefs)
 
     @since("1.4.0")
-    def getAlpha(self):
+    def getAlpha(self) -> float:
         """
         Gets the value of alpha or its default value.
         """
         return self.getOrDefault(self.alpha)
 
     @since("1.4.0")
-    def getRatingCol(self):
+    def getRatingCol(self) -> str:
         """
         Gets the value of ratingCol or its default value.
         """
         return self.getOrDefault(self.ratingCol)
 
     @since("1.4.0")
-    def getNonnegative(self):
+    def getNonnegative(self) -> bool:
         """
         Gets the value of nonnegative or its default value.
         """
         return self.getOrDefault(self.nonnegative)
 
     @since("2.0.0")
-    def getIntermediateStorageLevel(self):
+    def getIntermediateStorageLevel(self) -> str:
         """
         Gets the value of intermediateStorageLevel or its default value.
         """
         return self.getOrDefault(self.intermediateStorageLevel)
 
     @since("2.0.0")
-    def getFinalStorageLevel(self):
+    def getFinalStorageLevel(self) -> str:
         """
         Gets the value of finalStorageLevel or its default value.
         """
@@ -238,7 +243,7 @@ def getFinalStorageLevel(self):
 
 
 @inherit_doc
-class ALS(JavaEstimator, _ALSParams, JavaMLWritable, JavaMLReadable):
+class ALS(JavaEstimator["ALSModel"], _ALSParams, JavaMLWritable, JavaMLReadable["ALS"]):
     """
     Alternating Least Squares (ALS) matrix factorization.
 
@@ -359,27 +364,29 @@ class ALS(JavaEstimator, _ALSParams, JavaMLWritable, JavaMLReadable):
     True
     """
 
+    _input_kwargs: Dict[str, Any]
+
     @keyword_only
     def __init__(
         self,
         *,
-        rank=10,
-        maxIter=10,
-        regParam=0.1,
-        numUserBlocks=10,
-        numItemBlocks=10,
-        implicitPrefs=False,
-        alpha=1.0,
-        userCol="user",
-        itemCol="item",
-        seed=None,
-        ratingCol="rating",
-        nonnegative=False,
-        checkpointInterval=10,
-        intermediateStorageLevel="MEMORY_AND_DISK",
-        finalStorageLevel="MEMORY_AND_DISK",
-        coldStartStrategy="nan",
-        blockSize=4096,
+        rank: int = 10,
+        maxIter: int = 10,
+        regParam: float = 0.1,
+        numUserBlocks: int = 10,
+        numItemBlocks: int = 10,
+        implicitPrefs: bool = False,
+        alpha: float = 1.0,
+        userCol: str = "user",
+        itemCol: str = "item",
+        seed: Optional[int] = None,
+        ratingCol: str = "rating",
+        nonnegative: bool = False,
+        checkpointInterval: int = 10,
+        intermediateStorageLevel: str = "MEMORY_AND_DISK",
+        finalStorageLevel: str = "MEMORY_AND_DISK",
+        coldStartStrategy: str = "nan",
+        blockSize: int = 4096,
     ):
         """
         __init__(self, \\*, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10,
@@ -398,24 +405,24 @@ def __init__(
     def setParams(
         self,
         *,
-        rank=10,
-        maxIter=10,
-        regParam=0.1,
-        numUserBlocks=10,
-        numItemBlocks=10,
-        implicitPrefs=False,
-        alpha=1.0,
-        userCol="user",
-        itemCol="item",
-        seed=None,
-        ratingCol="rating",
-        nonnegative=False,
-        checkpointInterval=10,
-        intermediateStorageLevel="MEMORY_AND_DISK",
-        finalStorageLevel="MEMORY_AND_DISK",
-        coldStartStrategy="nan",
-        blockSize=4096,
-    ):
+        rank: int = 10,
+        maxIter: int = 10,
+        regParam: float = 0.1,
+        numUserBlocks: int = 10,
+        numItemBlocks: int = 10,
+        implicitPrefs: bool = False,
+        alpha: float = 1.0,
+        userCol: str = "user",
+        itemCol: str = "item",
+        seed: Optional[int] = None,
+        ratingCol: str = "rating",
+        nonnegative: bool = False,
+        checkpointInterval: int = 10,
+        intermediateStorageLevel: str = "MEMORY_AND_DISK",
+        finalStorageLevel: str = "MEMORY_AND_DISK",
+        coldStartStrategy: str = "nan",
+        blockSize: int = 4096,
+    ) -> "ALS":
         """
         setParams(self, \\*, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, \
                  numItemBlocks=10, implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", \
@@ -427,32 +434,32 @@ def setParams(
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
-    def _create_model(self, java_model):
+    def _create_model(self, java_model: "JavaObject") -> "ALSModel":
         return ALSModel(java_model)
 
     @since("1.4.0")
-    def setRank(self, value):
+    def setRank(self, value: int) -> "ALS":
         """
         Sets the value of :py:attr:`rank`.
         """
         return self._set(rank=value)
 
     @since("1.4.0")
-    def setNumUserBlocks(self, value):
+    def setNumUserBlocks(self, value: int) -> "ALS":
         """
         Sets the value of :py:attr:`numUserBlocks`.
         """
         return self._set(numUserBlocks=value)
 
     @since("1.4.0")
-    def setNumItemBlocks(self, value):
+    def setNumItemBlocks(self, value: int) -> "ALS":
         """
         Sets the value of :py:attr:`numItemBlocks`.
         """
         return self._set(numItemBlocks=value)
 
     @since("1.4.0")
-    def setNumBlocks(self, value):
+    def setNumBlocks(self, value: int) -> "ALS":
         """
         Sets both :py:attr:`numUserBlocks` and :py:attr:`numItemBlocks` to the specific value.
         """
@@ -460,107 +467,107 @@ def setNumBlocks(self, value):
         return self._set(numItemBlocks=value)
 
     @since("1.4.0")
-    def setImplicitPrefs(self, value):
+    def setImplicitPrefs(self, value: bool) -> "ALS":
         """
         Sets the value of :py:attr:`implicitPrefs`.
         """
         return self._set(implicitPrefs=value)
 
     @since("1.4.0")
-    def setAlpha(self, value):
+    def setAlpha(self, value: float) -> "ALS":
         """
         Sets the value of :py:attr:`alpha`.
         """
         return self._set(alpha=value)
 
     @since("1.4.0")
-    def setUserCol(self, value):
+    def setUserCol(self, value: str) -> "ALS":
         """
         Sets the value of :py:attr:`userCol`.
         """
         return self._set(userCol=value)
 
     @since("1.4.0")
-    def setItemCol(self, value):
+    def setItemCol(self, value: str) -> "ALS":
         """
         Sets the value of :py:attr:`itemCol`.
         """
         return self._set(itemCol=value)
 
     @since("1.4.0")
-    def setRatingCol(self, value):
+    def setRatingCol(self, value: str) -> "ALS":
         """
         Sets the value of :py:attr:`ratingCol`.
         """
         return self._set(ratingCol=value)
 
     @since("1.4.0")
-    def setNonnegative(self, value):
+    def setNonnegative(self, value: bool) -> "ALS":
         """
         Sets the value of :py:attr:`nonnegative`.
         """
         return self._set(nonnegative=value)
 
     @since("2.0.0")
-    def setIntermediateStorageLevel(self, value):
+    def setIntermediateStorageLevel(self, value: str) -> "ALS":
         """
         Sets the value of :py:attr:`intermediateStorageLevel`.
         """
         return self._set(intermediateStorageLevel=value)
 
     @since("2.0.0")
-    def setFinalStorageLevel(self, value):
+    def setFinalStorageLevel(self, value: str) -> "ALS":
         """
         Sets the value of :py:attr:`finalStorageLevel`.
         """
         return self._set(finalStorageLevel=value)
 
     @since("2.2.0")
-    def setColdStartStrategy(self, value):
+    def setColdStartStrategy(self, value: str) -> "ALS":
         """
         Sets the value of :py:attr:`coldStartStrategy`.
         """
         return self._set(coldStartStrategy=value)
 
-    def setMaxIter(self, value):
+    def setMaxIter(self, value: int) -> "ALS":
         """
         Sets the value of :py:attr:`maxIter`.
         """
         return self._set(maxIter=value)
 
-    def setRegParam(self, value):
+    def setRegParam(self, value: float) -> "ALS":
         """
         Sets the value of :py:attr:`regParam`.
         """
         return self._set(regParam=value)
 
-    def setPredictionCol(self, value):
+    def setPredictionCol(self, value: str) -> "ALS":
         """
         Sets the value of :py:attr:`predictionCol`.
         """
         return self._set(predictionCol=value)
 
-    def setCheckpointInterval(self, value):
+    def setCheckpointInterval(self, value: int) -> "ALS":
         """
         Sets the value of :py:attr:`checkpointInterval`.
         """
         return self._set(checkpointInterval=value)
 
-    def setSeed(self, value):
+    def setSeed(self, value: int) -> "ALS":
         """
         Sets the value of :py:attr:`seed`.
         """
         return self._set(seed=value)
 
     @since("3.0.0")
-    def setBlockSize(self, value):
+    def setBlockSize(self, value: int) -> "ALS":
         """
         Sets the value of :py:attr:`blockSize`.
         """
         return self._set(blockSize=value)
 
 
-class ALSModel(JavaModel, _ALSModelParams, JavaMLWritable, JavaMLReadable):
+class ALSModel(JavaModel, _ALSModelParams, JavaMLWritable, JavaMLReadable["ALSModel"]):
     """
     Model fitted by ALS.
 
@@ -568,65 +575,65 @@ class ALSModel(JavaModel, _ALSModelParams, JavaMLWritable, JavaMLReadable):
     """
 
     @since("3.0.0")
-    def setUserCol(self, value):
+    def setUserCol(self, value: str) -> "ALSModel":
         """
         Sets the value of :py:attr:`userCol`.
         """
         return self._set(userCol=value)
 
     @since("3.0.0")
-    def setItemCol(self, value):
+    def setItemCol(self, value: str) -> "ALSModel":
         """
         Sets the value of :py:attr:`itemCol`.
         """
         return self._set(itemCol=value)
 
     @since("3.0.0")
-    def setColdStartStrategy(self, value):
+    def setColdStartStrategy(self, value: str) -> "ALSModel":
         """
         Sets the value of :py:attr:`coldStartStrategy`.
         """
         return self._set(coldStartStrategy=value)
 
     @since("3.0.0")
-    def setPredictionCol(self, value):
+    def setPredictionCol(self, value: str) -> "ALSModel":
         """
         Sets the value of :py:attr:`predictionCol`.
         """
         return self._set(predictionCol=value)
 
     @since("3.0.0")
-    def setBlockSize(self, value):
+    def setBlockSize(self, value: int) -> "ALSModel":
         """
         Sets the value of :py:attr:`blockSize`.
         """
         return self._set(blockSize=value)
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def rank(self):
+    def rank(self) -> int:
         """rank of the matrix factorization model"""
         return self._call_java("rank")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def userFactors(self):
+    def userFactors(self) -> DataFrame:
         """
         a DataFrame that stores user factors in two columns: `id` and
         `features`
         """
         return self._call_java("userFactors")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def itemFactors(self):
+    def itemFactors(self) -> DataFrame:
         """
         a DataFrame that stores item factors in two columns: `id` and
         `features`
         """
         return self._call_java("itemFactors")
 
-    def recommendForAllUsers(self, numItems):
+    def recommendForAllUsers(self, numItems: int) -> DataFrame:
         """
         Returns top `numItems` items recommended for each user, for all users.
 
@@ -645,7 +652,7 @@ def recommendForAllUsers(self, numItems):
         """
         return self._call_java("recommendForAllUsers", numItems)
 
-    def recommendForAllItems(self, numUsers):
+    def recommendForAllItems(self, numUsers: int) -> DataFrame:
         """
         Returns top `numUsers` users recommended for each item, for all items.
 
@@ -664,7 +671,7 @@ def recommendForAllItems(self, numUsers):
         """
         return self._call_java("recommendForAllItems", numUsers)
 
-    def recommendForUserSubset(self, dataset, numItems):
+    def recommendForUserSubset(self, dataset: DataFrame, numItems: int) -> DataFrame:
         """
         Returns top `numItems` items recommended for each user id in the input data set. Note that
         if there are duplicate ids in the input dataset, only one set of recommendations per unique
@@ -687,7 +694,7 @@ def recommendForUserSubset(self, dataset, numItems):
         """
         return self._call_java("recommendForUserSubset", dataset, numItems)
 
-    def recommendForItemSubset(self, dataset, numUsers):
+    def recommendForItemSubset(self, dataset: DataFrame, numUsers: int) -> DataFrame:
         """
         Returns top `numUsers` users recommended for each item id in the input data set. Note that
         if there are duplicate ids in the input dataset, only one set of recommendations per unique
diff --git a/python/pyspark/ml/recommendation.pyi b/python/pyspark/ml/recommendation.pyi
deleted file mode 100644
index f7faacaf48b29..0000000000000
--- a/python/pyspark/ml/recommendation.pyi
+++ /dev/null
@@ -1,149 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Any, Optional
-
-import sys  # noqa: F401
-
-from pyspark import since, keyword_only  # noqa: F401
-from pyspark.ml.param.shared import (
-    HasBlockSize,
-    HasCheckpointInterval,
-    HasMaxIter,
-    HasPredictionCol,
-    HasRegParam,
-    HasSeed,
-)
-from pyspark.ml.wrapper import JavaEstimator, JavaModel
-from pyspark.ml.common import inherit_doc  # noqa: F401
-from pyspark.ml.param import Param
-from pyspark.ml.util import JavaMLWritable, JavaMLReadable
-
-from pyspark.sql.dataframe import DataFrame
-
-from py4j.java_gateway import JavaObject  # type: ignore[import]
-
-class _ALSModelParams(HasPredictionCol, HasBlockSize):
-    userCol: Param[str]
-    itemCol: Param[str]
-    coldStartStrategy: Param[str]
-    def getUserCol(self) -> str: ...
-    def getItemCol(self) -> str: ...
-    def getColdStartStrategy(self) -> str: ...
-
-class _ALSParams(_ALSModelParams, HasMaxIter, HasRegParam, HasCheckpointInterval, HasSeed):
-    rank: Param[int]
-    numUserBlocks: Param[int]
-    numItemBlocks: Param[int]
-    implicitPrefs: Param[bool]
-    alpha: Param[float]
-    ratingCol: Param[str]
-    nonnegative: Param[bool]
-    intermediateStorageLevel: Param[str]
-    finalStorageLevel: Param[str]
-    def __init__(self, *args: Any): ...
-    def getRank(self) -> int: ...
-    def getNumUserBlocks(self) -> int: ...
-    def getNumItemBlocks(self) -> int: ...
-    def getImplicitPrefs(self) -> bool: ...
-    def getAlpha(self) -> float: ...
-    def getRatingCol(self) -> str: ...
-    def getNonnegative(self) -> bool: ...
-    def getIntermediateStorageLevel(self) -> str: ...
-    def getFinalStorageLevel(self) -> str: ...
-
-class ALS(JavaEstimator[ALSModel], _ALSParams, JavaMLWritable, JavaMLReadable[ALS]):
-    def __init__(
-        self,
-        *,
-        rank: int = ...,
-        maxIter: int = ...,
-        regParam: float = ...,
-        numUserBlocks: int = ...,
-        numItemBlocks: int = ...,
-        implicitPrefs: bool = ...,
-        alpha: float = ...,
-        userCol: str = ...,
-        itemCol: str = ...,
-        seed: Optional[int] = ...,
-        ratingCol: str = ...,
-        nonnegative: bool = ...,
-        checkpointInterval: int = ...,
-        intermediateStorageLevel: str = ...,
-        finalStorageLevel: str = ...,
-        coldStartStrategy: str = ...,
-        blockSize: int = ...,
-    ) -> None: ...
-    def setParams(
-        self,
-        *,
-        rank: int = ...,
-        maxIter: int = ...,
-        regParam: float = ...,
-        numUserBlocks: int = ...,
-        numItemBlocks: int = ...,
-        implicitPrefs: bool = ...,
-        alpha: float = ...,
-        userCol: str = ...,
-        itemCol: str = ...,
-        seed: Optional[int] = ...,
-        ratingCol: str = ...,
-        nonnegative: bool = ...,
-        checkpointInterval: int = ...,
-        intermediateStorageLevel: str = ...,
-        finalStorageLevel: str = ...,
-        coldStartStrategy: str = ...,
-        blockSize: int = ...,
-    ) -> ALS: ...
-    def setRank(self, value: int) -> ALS: ...
-    def setNumUserBlocks(self, value: int) -> ALS: ...
-    def setNumItemBlocks(self, value: int) -> ALS: ...
-    def setNumBlocks(self, value: int) -> ALS: ...
-    def setImplicitPrefs(self, value: bool) -> ALS: ...
-    def setAlpha(self, value: float) -> ALS: ...
-    def setUserCol(self, value: str) -> ALS: ...
-    def setItemCol(self, value: str) -> ALS: ...
-    def setRatingCol(self, value: str) -> ALS: ...
-    def setNonnegative(self, value: bool) -> ALS: ...
-    def setIntermediateStorageLevel(self, value: str) -> ALS: ...
-    def setFinalStorageLevel(self, value: str) -> ALS: ...
-    def setColdStartStrategy(self, value: str) -> ALS: ...
-    def setMaxIter(self, value: int) -> ALS: ...
-    def setRegParam(self, value: float) -> ALS: ...
-    def setPredictionCol(self, value: str) -> ALS: ...
-    def setCheckpointInterval(self, value: int) -> ALS: ...
-    def setSeed(self, value: int) -> ALS: ...
-    def setBlockSize(self, value: int) -> ALS: ...
-    def _create_model(self, java_model: JavaObject) -> ALSModel: ...
-
-class ALSModel(JavaModel, _ALSModelParams, JavaMLWritable, JavaMLReadable[ALSModel]):
-    def setUserCol(self, value: str) -> ALSModel: ...
-    def setItemCol(self, value: str) -> ALSModel: ...
-    def setColdStartStrategy(self, value: str) -> ALSModel: ...
-    def setPredictionCol(self, value: str) -> ALSModel: ...
-    def setBlockSize(self, value: int) -> ALSModel: ...
-    @property
-    def rank(self) -> int: ...
-    @property
-    def userFactors(self) -> DataFrame: ...
-    @property
-    def itemFactors(self) -> DataFrame: ...
-    def recommendForAllUsers(self, numItems: int) -> DataFrame: ...
-    def recommendForAllItems(self, numUsers: int) -> DataFrame: ...
-    def recommendForUserSubset(self, dataset: DataFrame, numItems: int) -> DataFrame: ...
-    def recommendForItemSubset(self, dataset: DataFrame, numUsers: int) -> DataFrame: ...

From f33e371a2759e797351743f85df94ea27243b656 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Thu, 17 Feb 2022 09:54:57 -0800
Subject: [PATCH 257/513] [SPARK-38244][K8S][BUILD] Upgrade kubernetes-client
 to 5.12.1

### What changes were proposed in this pull request?
Upgrade kubernetes-client to 5.12.1:
https://github.com/fabric8io/kubernetes-client/releases/tag/v5.12.1

### Why are the changes needed?
The next kubernetes client version will be 6.x with breaking changes: https://github.com/fabric8io/kubernetes-client/blob/master/CHANGELOG.md#note-breaking-changes-in-the-api .

We'd better to upgrade to latest 5.X to reduce follow upgrade cost.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
- CI
- integration test

Closes #35557 from Yikun/k8scli-5.12.1.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2-hive-2.3 | 42 +++++++++++++--------------
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 42 +++++++++++++--------------
 pom.xml                               |  2 +-
 3 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index b4fd14b30a4dd..90a4ce07e8d8b 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -162,27 +162,27 @@ jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/1.7.32//jul-to-slf4j-1.7.32.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/5.12.0//kubernetes-client-5.12.0.jar
-kubernetes-model-admissionregistration/5.12.0//kubernetes-model-admissionregistration-5.12.0.jar
-kubernetes-model-apiextensions/5.12.0//kubernetes-model-apiextensions-5.12.0.jar
-kubernetes-model-apps/5.12.0//kubernetes-model-apps-5.12.0.jar
-kubernetes-model-autoscaling/5.12.0//kubernetes-model-autoscaling-5.12.0.jar
-kubernetes-model-batch/5.12.0//kubernetes-model-batch-5.12.0.jar
-kubernetes-model-certificates/5.12.0//kubernetes-model-certificates-5.12.0.jar
-kubernetes-model-common/5.12.0//kubernetes-model-common-5.12.0.jar
-kubernetes-model-coordination/5.12.0//kubernetes-model-coordination-5.12.0.jar
-kubernetes-model-core/5.12.0//kubernetes-model-core-5.12.0.jar
-kubernetes-model-discovery/5.12.0//kubernetes-model-discovery-5.12.0.jar
-kubernetes-model-events/5.12.0//kubernetes-model-events-5.12.0.jar
-kubernetes-model-extensions/5.12.0//kubernetes-model-extensions-5.12.0.jar
-kubernetes-model-flowcontrol/5.12.0//kubernetes-model-flowcontrol-5.12.0.jar
-kubernetes-model-metrics/5.12.0//kubernetes-model-metrics-5.12.0.jar
-kubernetes-model-networking/5.12.0//kubernetes-model-networking-5.12.0.jar
-kubernetes-model-node/5.12.0//kubernetes-model-node-5.12.0.jar
-kubernetes-model-policy/5.12.0//kubernetes-model-policy-5.12.0.jar
-kubernetes-model-rbac/5.12.0//kubernetes-model-rbac-5.12.0.jar
-kubernetes-model-scheduling/5.12.0//kubernetes-model-scheduling-5.12.0.jar
-kubernetes-model-storageclass/5.12.0//kubernetes-model-storageclass-5.12.0.jar
+kubernetes-client/5.12.1//kubernetes-client-5.12.1.jar
+kubernetes-model-admissionregistration/5.12.1//kubernetes-model-admissionregistration-5.12.1.jar
+kubernetes-model-apiextensions/5.12.1//kubernetes-model-apiextensions-5.12.1.jar
+kubernetes-model-apps/5.12.1//kubernetes-model-apps-5.12.1.jar
+kubernetes-model-autoscaling/5.12.1//kubernetes-model-autoscaling-5.12.1.jar
+kubernetes-model-batch/5.12.1//kubernetes-model-batch-5.12.1.jar
+kubernetes-model-certificates/5.12.1//kubernetes-model-certificates-5.12.1.jar
+kubernetes-model-common/5.12.1//kubernetes-model-common-5.12.1.jar
+kubernetes-model-coordination/5.12.1//kubernetes-model-coordination-5.12.1.jar
+kubernetes-model-core/5.12.1//kubernetes-model-core-5.12.1.jar
+kubernetes-model-discovery/5.12.1//kubernetes-model-discovery-5.12.1.jar
+kubernetes-model-events/5.12.1//kubernetes-model-events-5.12.1.jar
+kubernetes-model-extensions/5.12.1//kubernetes-model-extensions-5.12.1.jar
+kubernetes-model-flowcontrol/5.12.1//kubernetes-model-flowcontrol-5.12.1.jar
+kubernetes-model-metrics/5.12.1//kubernetes-model-metrics-5.12.1.jar
+kubernetes-model-networking/5.12.1//kubernetes-model-networking-5.12.1.jar
+kubernetes-model-node/5.12.1//kubernetes-model-node-5.12.1.jar
+kubernetes-model-policy/5.12.1//kubernetes-model-policy-5.12.1.jar
+kubernetes-model-rbac/5.12.1//kubernetes-model-rbac-5.12.1.jar
+kubernetes-model-scheduling/5.12.1//kubernetes-model-scheduling-5.12.1.jar
+kubernetes-model-storageclass/5.12.1//kubernetes-model-storageclass-5.12.1.jar
 lapack/2.2.1//lapack-2.2.1.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 96bd2663df60a..b052f1a6aa275 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -148,27 +148,27 @@ jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/1.7.32//jul-to-slf4j-1.7.32.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/5.12.0//kubernetes-client-5.12.0.jar
-kubernetes-model-admissionregistration/5.12.0//kubernetes-model-admissionregistration-5.12.0.jar
-kubernetes-model-apiextensions/5.12.0//kubernetes-model-apiextensions-5.12.0.jar
-kubernetes-model-apps/5.12.0//kubernetes-model-apps-5.12.0.jar
-kubernetes-model-autoscaling/5.12.0//kubernetes-model-autoscaling-5.12.0.jar
-kubernetes-model-batch/5.12.0//kubernetes-model-batch-5.12.0.jar
-kubernetes-model-certificates/5.12.0//kubernetes-model-certificates-5.12.0.jar
-kubernetes-model-common/5.12.0//kubernetes-model-common-5.12.0.jar
-kubernetes-model-coordination/5.12.0//kubernetes-model-coordination-5.12.0.jar
-kubernetes-model-core/5.12.0//kubernetes-model-core-5.12.0.jar
-kubernetes-model-discovery/5.12.0//kubernetes-model-discovery-5.12.0.jar
-kubernetes-model-events/5.12.0//kubernetes-model-events-5.12.0.jar
-kubernetes-model-extensions/5.12.0//kubernetes-model-extensions-5.12.0.jar
-kubernetes-model-flowcontrol/5.12.0//kubernetes-model-flowcontrol-5.12.0.jar
-kubernetes-model-metrics/5.12.0//kubernetes-model-metrics-5.12.0.jar
-kubernetes-model-networking/5.12.0//kubernetes-model-networking-5.12.0.jar
-kubernetes-model-node/5.12.0//kubernetes-model-node-5.12.0.jar
-kubernetes-model-policy/5.12.0//kubernetes-model-policy-5.12.0.jar
-kubernetes-model-rbac/5.12.0//kubernetes-model-rbac-5.12.0.jar
-kubernetes-model-scheduling/5.12.0//kubernetes-model-scheduling-5.12.0.jar
-kubernetes-model-storageclass/5.12.0//kubernetes-model-storageclass-5.12.0.jar
+kubernetes-client/5.12.1//kubernetes-client-5.12.1.jar
+kubernetes-model-admissionregistration/5.12.1//kubernetes-model-admissionregistration-5.12.1.jar
+kubernetes-model-apiextensions/5.12.1//kubernetes-model-apiextensions-5.12.1.jar
+kubernetes-model-apps/5.12.1//kubernetes-model-apps-5.12.1.jar
+kubernetes-model-autoscaling/5.12.1//kubernetes-model-autoscaling-5.12.1.jar
+kubernetes-model-batch/5.12.1//kubernetes-model-batch-5.12.1.jar
+kubernetes-model-certificates/5.12.1//kubernetes-model-certificates-5.12.1.jar
+kubernetes-model-common/5.12.1//kubernetes-model-common-5.12.1.jar
+kubernetes-model-coordination/5.12.1//kubernetes-model-coordination-5.12.1.jar
+kubernetes-model-core/5.12.1//kubernetes-model-core-5.12.1.jar
+kubernetes-model-discovery/5.12.1//kubernetes-model-discovery-5.12.1.jar
+kubernetes-model-events/5.12.1//kubernetes-model-events-5.12.1.jar
+kubernetes-model-extensions/5.12.1//kubernetes-model-extensions-5.12.1.jar
+kubernetes-model-flowcontrol/5.12.1//kubernetes-model-flowcontrol-5.12.1.jar
+kubernetes-model-metrics/5.12.1//kubernetes-model-metrics-5.12.1.jar
+kubernetes-model-networking/5.12.1//kubernetes-model-networking-5.12.1.jar
+kubernetes-model-node/5.12.1//kubernetes-model-node-5.12.1.jar
+kubernetes-model-policy/5.12.1//kubernetes-model-policy-5.12.1.jar
+kubernetes-model-rbac/5.12.1//kubernetes-model-rbac-5.12.1.jar
+kubernetes-model-scheduling/5.12.1//kubernetes-model-scheduling-5.12.1.jar
+kubernetes-model-storageclass/5.12.1//kubernetes-model-storageclass-5.12.1.jar
 lapack/2.2.1//lapack-2.2.1.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
diff --git a/pom.xml b/pom.xml
index 7165cb5229821..a2b98e93c062a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -204,7 +204,7 @@
     <arrow.version>7.0.0</arrow.version>
     <!-- org.fusesource.leveldbjni will be used except on arm64 platform. -->
     <leveldbjni.group>org.fusesource.leveldbjni</leveldbjni.group>
-    <kubernetes-client.version>5.12.0</kubernetes-client.version>
+    <kubernetes-client.version>5.12.1</kubernetes-client.version>
 
     <test.java.home>${java.home}</test.java.home>
 

From 724bc319ca0859e2a05c251741f2a667d17e60ee Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Thu, 17 Feb 2022 13:11:20 -0800
Subject: [PATCH 258/513] [SPARK-38182][SQL] Fix NoSuchElementException if
 pushed filter does not contain any references

### What changes were proposed in this pull request?

skip non-references filter during binding metadata-based filiter

### Why are the changes needed?

this issue is from https://github.com/apache/spark/pull/35055.

reproduce:
```sql
CREATE TABLE t (c1 int) USING PARQUET;

SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.BooleanSimplification;

SELECT * FROM t WHERE c1 = 1 AND 2 > 1;
```

and the error msg:
```
java.util.NoSuchElementException: next on empty iterator
	at scala.collection.Iterator$$anon$2.next(Iterator.scala:41)
	at scala.collection.Iterator$$anon$2.next(Iterator.scala:39)
	at scala.collection.mutable.LinkedHashSet$$anon$1.next(LinkedHashSet.scala:89)
	at scala.collection.IterableLike.head(IterableLike.scala:109)
	at scala.collection.IterableLike.head$(IterableLike.scala:108)
	at org.apache.spark.sql.catalyst.expressions.AttributeSet.head(AttributeSet.scala:69)
	at org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex.$anonfun$listFiles$3(PartitioningAwareFileIndex.scala:85)
	at scala.Option.map(Option.scala:230)
	at org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex.listFiles(PartitioningAwareFileIndex.scala:84)
	at org.apache.spark.sql.execution.FileSourceScanExec.selectedPartitions$lzycompute(DataSourceScanExec.scala:249)
```

### Does this PR introduce _any_ user-facing change?

yes, a bug fix

### How was this patch tested?

add a new test

Closes #35487 from ulysses-you/SPARK-38182.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../datasources/PartitioningAwareFileIndex.scala     | 10 ++++++----
 .../sql/execution/datasources/FileIndexSuite.scala   | 12 ++++++++++++
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index 35cd7c2715869..d70c4b11bc0d7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -74,10 +74,12 @@ abstract class PartitioningAwareFileIndex(
     }
 
     // retrieve the file metadata filters and reduce to a final filter expression
-    val fileMetadataFilterOpt = dataFilters.filter(_.references.forall {
-      case FileSourceMetadataAttribute(_) => true
-      case _ => false
-    }).reduceOption(expressions.And)
+    val fileMetadataFilterOpt = dataFilters.filter { f =>
+      f.references.nonEmpty && f.references.forall {
+        case FileSourceMetadataAttribute(_) => true
+        case _ => false
+      }
+    }.reduceOption(expressions.And)
 
     // - create a bound references for filters: put the metadata struct at 0 position for each file
     // - retrieve the final metadata struct (could be pruned) from filters
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
index fcaf8df4f9a02..690623e72c994 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
@@ -520,6 +520,18 @@ class FileIndexSuite extends SharedSparkSession {
       SQLConf.get.setConf(StaticSQLConf.METADATA_CACHE_TTL_SECONDS, previousValue)
     }
   }
+
+  test("SPARK-38182: Fix NoSuchElementException if pushed filter does not contain any " +
+    "references") {
+    withTable("t") {
+      withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key ->
+        "org.apache.spark.sql.catalyst.optimizer.BooleanSimplification") {
+
+        sql("CREATE TABLE t (c1 int) USING PARQUET")
+        assert(sql("SELECT * FROM t WHERE c1 = 1 AND 2 > 1").count() == 0)
+      }
+    }
+  }
 }
 
 object DeletionRaceFileSystem {

From 4070ea88fc08a4730af89a880cbe37d7382b9063 Mon Sep 17 00:00:00 2001
From: Rui Wang <rui.wang@databricks.com>
Date: Thu, 17 Feb 2022 15:59:30 -0800
Subject: [PATCH 259/513] [SPARK-38118][SQL] Func(wrong data type) in HAVING
 clause should throw data mismatch error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

```
with t as (select true c)
select t.c
from t
group by t.c
having mean(t.c) > 0
```

This query throws `Column 't.c' does not exist. Did you mean one of the following? [t.c]`

However, mean(boolean) is not a supported function signature, thus error result should be  `cannot resolve 'mean(t.c)' due to data type mismatch: function average requires numeric or interval types, not boolean`

 

This is because

1. The mean(boolean) in HAVING was not marked as resolved in `ResolveFunctions` rule.
2. Thus in `ResolveAggregationFunctions`, the `TempResolvedColumn` as a wrapper in `mean(TempResolvedColumn(t.c))` cannot be removed (only resolved AGG can remove its’s `TempResolvedColumn`).
3. Thus in a later batch rule applying,  `TempResolvedColumn` was reverted and it becomes mean(`t.c`), so mean loses the information about t.c.
4. Thus at the last step, the analyzer can only report t.c not found.

 

mean(boolean) in HAVING is not marked as resolved in {{ResolveFunctions}} rule because 
1. It uses Expression default `resolved` field population code:
```lazy val resolved: Boolean = childrenResolved && checkInputDataTypes().isSuccess```
2. During the analyzing,  mean(boolean) is mean(TempResolveColumn(boolean), thus childrenResolved is true.
3. however checkInputDataTypes() will be false [Average.scala#L55](https://github.com/apache/spark/blob/74ebef243c18e7a8f32bf90ea75ab6afed9e3132/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L55)
4. Thus eventually Average's `resolved`  will be false, but it leads to wrong error message.

### Why are the changes needed?

Improve error message so users can better debug their query.

### Does this PR introduce _any_ user-facing change?

Yes. This will change user-facing error message.

### How was this patch tested?

Unit Test

Closes #35404 from amaliujia/meanboolean.

Authored-by: Rui Wang <rui.wang@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 28 +++++++++++++++++--
 .../sql/catalyst/analysis/CheckAnalysis.scala |  2 +-
 .../sql/catalyst/analysis/unresolved.scala    |  1 +
 .../sql/catalyst/analysis/AnalysisSuite.scala | 24 ++++++++++++++++
 4 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 18684bdad63cc..195c25177da8c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -28,6 +28,7 @@ import scala.util.{Failure, Random, Success, Try}
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst._
+import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.{extraHintForAnsiTypeCoercionExpression, DATA_TYPE_MISMATCH_ERROR}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.OuterScopes
 import org.apache.spark.sql.catalyst.expressions.{Expression, FrameLessOffsetWindowFunction, _}
@@ -4247,7 +4248,30 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
  * rule right after the main resolution batch.
  */
 object RemoveTempResolvedColumn extends Rule[LogicalPlan] {
-  override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveExpressions {
-    case t: TempResolvedColumn => UnresolvedAttribute(t.nameParts)
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    plan.foreachUp {
+      // HAVING clause will be resolved as a Filter. When having func(column with wrong data type),
+      // the column could be wrapped by a TempResolvedColumn, e.g. mean(tempresolvedcolumn(t.c)).
+      // Because TempResolvedColumn can still preserve column data type, here is a chance to check
+      // if the data type matches with the required data type of the function. We can throw an error
+      // when data types mismatches.
+      case operator: Filter =>
+        operator.expressions.foreach(_.foreachUp {
+          case e: Expression if e.childrenResolved && e.checkInputDataTypes().isFailure =>
+            e.checkInputDataTypes() match {
+              case TypeCheckResult.TypeCheckFailure(message) =>
+                e.setTagValue(DATA_TYPE_MISMATCH_ERROR, true)
+                e.failAnalysis(
+                  s"cannot resolve '${e.sql}' due to data type mismatch: $message" +
+                    extraHintForAnsiTypeCoercionExpression(plan))
+            }
+          case _ =>
+        })
+      case _ =>
+    }
+
+    plan.resolveExpressions {
+      case t: TempResolvedColumn => UnresolvedAttribute(t.nameParts)
+    }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index d06996a09df02..2da2686bdc847 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -623,7 +623,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
     }
   }
 
-  private def extraHintForAnsiTypeCoercionExpression(plan: LogicalPlan): String = {
+  private[analysis] def extraHintForAnsiTypeCoercionExpression(plan: LogicalPlan): String = {
     if (!SQLConf.get.ansiEnabled) {
       ""
     } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index b861e5df72c3a..c8ef71eb8b89a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -645,4 +645,5 @@ case class TempResolvedColumn(child: Expression, nameParts: Seq[String]) extends
   override def dataType: DataType = child.dataType
   override protected def withNewChildInternal(newChild: Expression): Expression =
     copy(child = newChild)
+  override def sql: String = child.sql
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 63f90a8d6b886..ff05b797e7cd7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -1150,4 +1150,28 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       "MISSING_COLUMN",
       Array("c.y", "x"))
   }
+
+  test("SPARK-38118: Func(wrong_type) in the HAVING clause should throw data mismatch error") {
+    Seq("mean", "abs").foreach { func =>
+      assertAnalysisError(parsePlan(
+        s"""
+           |WITH t as (SELECT true c)
+           |SELECT t.c
+           |FROM t
+           |GROUP BY t.c
+           |HAVING ${func}(t.c) > 0d""".stripMargin),
+        Seq(s"cannot resolve '$func(t.c)' due to data type mismatch"),
+        false)
+
+      assertAnalysisError(parsePlan(
+        s"""
+           |WITH t as (SELECT true c, false d)
+           |SELECT (t.c AND t.d) c
+           |FROM t
+           |GROUP BY t.c
+           |HAVING ${func}(c) > 0d""".stripMargin),
+        Seq(s"cannot resolve '$func(t.c)' due to data type mismatch"),
+        false)
+    }
+  }
 }

From 3a179d762602243497c528bea3b3370e7548fa2d Mon Sep 17 00:00:00 2001
From: Microsoft Learn Student <learn@contoso.com>
Date: Thu, 17 Feb 2022 16:09:28 -0800
Subject: [PATCH 260/513] [MINOR][DOCS] Fixed closing tags in
 running-on-kubernetes.md
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

*This contribution is my original work and I license the work to the project under the project’s open source license.*

### What changes were proposed in this pull request?
Several `<code>` elements in `running-on-kubernetes.md` had typos in their closing tags, causing rendering issues on the HTML site. Example of rendering issue:

https://spark.apache.org/docs/3.2.1/running-on-kubernetes.html#configuration

This PR fixes those typos and resolves the rendering issue.

### Why are the changes needed?
The typo fixes allows the HTML site to render the article correctly.

### Does this PR introduce _any_ user-facing change?
Yes, currently the site shows several headers and closing tags as plain text:
https://spark.apache.org/docs/3.2.1/running-on-kubernetes.html#configuration

This PR allows those headers to be correctly rendered and no longer shows the table's closing tags.

### How was this patch tested?
No tests were added. I did a local build of the site per the instructions and confirmed the HTML renders correctly.

Closes #35561 from zr-msft/fix-doc-running-on-kubernetes.

Authored-by: Microsoft Learn Student <learn@contoso.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/running-on-kubernetes.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 375de57474c79..ee0b23012a591 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1141,7 +1141,7 @@ See the [configuration page](configuration.html) for information on Spark config
   <td><code>spark.kubernetes.memoryOverheadFactor</code></td>
   <td><code>0.1</code></td>
   <td>
-    This sets the Memory Overhead Factor that will allocate memory to non-JVM memory, which includes off-heap memory allocations, non-JVM tasks, various systems processes, and <code>tmpfs</code>-based local directories when <code>spark.kubernetes.local.dirs.tmpfs<code> is <code>true</code>. For JVM-based jobs this value will default to 0.10 and 0.40 for non-JVM jobs.
+    This sets the Memory Overhead Factor that will allocate memory to non-JVM memory, which includes off-heap memory allocations, non-JVM tasks, various systems processes, and <code>tmpfs</code>-based local directories when <code>spark.kubernetes.local.dirs.tmpfs</code> is <code>true</code>. For JVM-based jobs this value will default to 0.10 and 0.40 for non-JVM jobs.
     This is done as non-JVM tasks need more non-JVM heap space and such tasks commonly fail with "Memory Overhead Exceeded" errors. This preempts this error with a higher default.
   </td>
   <td>2.4.0</td>
@@ -1314,7 +1314,7 @@ See the [configuration page](configuration.html) for information on Spark config
   <td>3.0.0</td>
 </tr>
 <tr>
-  <td><code>spark.kubernetes.executor.decommmissionLabel<code></td>
+  <td><code>spark.kubernetes.executor.decommmissionLabel</code></td>
   <td>(none)</td>
   <td>
     Label to be applied to pods which are exiting or being decommissioned. Intended for use
@@ -1323,7 +1323,7 @@ See the [configuration page](configuration.html) for information on Spark config
   <td>3.3.0</td>
 </tr>
 <tr>
-  <td><code>spark.kubernetes.executor.decommmissionLabelValue<code></td>
+  <td><code>spark.kubernetes.executor.decommmissionLabelValue</code></td>
   <td>(none)</td>
   <td>
     Value to be applied with the label when

From f82cf9e244fe43e32705ba948b2938e95247bc42 Mon Sep 17 00:00:00 2001
From: Erik Krogen <xkrogen@apache.org>
Date: Thu, 17 Feb 2022 16:54:50 -0800
Subject: [PATCH 261/513] [SPARK-34378][SQL][AVRO] Loosen AvroSerializer
 validation to allow extra nullable user-provided fields

### What changes were proposed in this pull request?
Loosen the schema validation logic in `AvroSerializer` to accommodate the situation where a user has provided an explicit schema (via `avroSchema`) and this schema has extra fields which are not present in the Catalyst schema (the DF being written). Specifically, extra _nullable_ fields will be allowed and populated as null. _Required_ fields (non-null) will still be checked for existence.

### Why are the changes needed?
It's common for Avro schemas to evolve in a _compatible_ way (as discussed in Confluent's documentation on [Schema Evolution and Compatibility](https://docs.confluent.io/platform/current/schema-registry/avro.html); here I refer to `FULL` compatibility). Under such a scenario, new _optional_ fields are added to a schema. Producers are free to include the new field if they so choose, and consumers are free to read the new field if they so choose. It is optional on both sides.

Consider the following code:
```
val outputSchema = getOutputSchema()
df.write.format("avro").option("avroSchema", outputSchema).save(...)
```
If you have a situation where schemas are managed in some centralized repository (e.g. a [schema registry](https://docs.confluent.io/platform/current/schema-registry/index.html)), `outputSchema` may update at some point to add a new optional field, without you necessarily initiating any action on your side as a data producer. With the current code, this would cause the producer job to break, because validation would complain that the newly added field is not present in the DataFrame. Really, the producer should be able to continue producing data as normal even without adding the new field to the DataFrame it is writing out, because the field is optional.

### Does this PR introduce _any_ user-facing change?
Yes, when using the `avroSchema` option on the Avro data source during writes, validation is less strict, and allows for (compatible) schema evolution to be handled more gracefully.

### How was this patch tested?
New unit tests added. We've also been employing this logic internally for a few years, though the implementation was quite different due to recent changes in this area of the code.

Closes #34009 from xkrogen/xkrogen-SPARK-34378-avro-serializer-ignore-extra-nullable-fields.

Authored-by: Erik Krogen <xkrogen@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/avro/AvroSerializer.scala       |  2 +-
 .../org/apache/spark/sql/avro/AvroUtils.scala | 13 ++++--
 .../sql/avro/AvroSchemaHelperSuite.scala      | 25 ++++++++++-
 .../spark/sql/avro/AvroSerdeSuite.scala       | 41 ++++++++++++-------
 .../org/apache/spark/sql/avro/AvroSuite.scala | 26 ++++++++++++
 5 files changed, 87 insertions(+), 20 deletions(-)

diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
index f2f754aabd3ed..4a82df6ba0dce 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
@@ -262,7 +262,7 @@ private[sql] class AvroSerializer(
       avroStruct, catalystStruct, avroPath, catalystPath, positionalFieldMatch)
 
     avroSchemaHelper.validateNoExtraCatalystFields(ignoreNullable = false)
-    avroSchemaHelper.validateNoExtraAvroFields()
+    avroSchemaHelper.validateNoExtraRequiredAvroFields()
 
     val (avroIndices, fieldConverters) = avroSchemaHelper.matchedFields.map {
       case AvroMatchedField(catalystField, _, avroField) =>
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
index 149d0b6e73de6..ef9d22f35d048 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
@@ -270,10 +270,12 @@ private[sql] object AvroUtils extends Logging {
 
     /**
      * Validate that there are no Avro fields which don't have a matching Catalyst field, throwing
-     * [[IncompatibleSchemaException]] if such extra fields are found.
+     * [[IncompatibleSchemaException]] if such extra fields are found. Only required (non-nullable)
+     * fields are checked; nullable fields are ignored.
      */
-    def validateNoExtraAvroFields(): Unit = {
-      (avroFieldArray.toSet -- matchedFields.map(_.avroField)).foreach { extraField =>
+    def validateNoExtraRequiredAvroFields(): Unit = {
+      val extraFields = avroFieldArray.toSet -- matchedFields.map(_.avroField)
+      extraFields.filterNot(isNullable).foreach { extraField =>
         if (positionalFieldMatch) {
           throw new IncompatibleSchemaException(s"Found field '${extraField.name()}' at position " +
             s"${extraField.pos()} of ${toFieldStr(avroPath)} from Avro schema but there is no " +
@@ -328,4 +330,9 @@ private[sql] object AvroUtils extends Logging {
     case Seq() => "top-level record"
     case n => s"field '${n.mkString(".")}'"
   }
+
+  /** Return true iff `avroField` is nullable, i.e. `UNION` type and has `NULL` as an option. */
+  private[avro] def isNullable(avroField: Schema.Field): Boolean =
+    avroField.schema().getType == Schema.Type.UNION &&
+      avroField.schema().getTypes.asScala.exists(_.getType == Schema.Type.NULL)
 }
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSchemaHelperSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSchemaHelperSuite.scala
index 604b4e80d89e3..8ad06492fa5d9 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSchemaHelperSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSchemaHelperSuite.scala
@@ -104,7 +104,7 @@ class AvroSchemaHelperSuite extends SQLTestUtils with SharedSparkSession {
       AvroMatchedField(catalystSchema("shared2"), 3, avroSchema.getField("shared2"))
     ))
     assertThrows[IncompatibleSchemaException] {
-      helper.validateNoExtraAvroFields()
+      helper.validateNoExtraRequiredAvroFields()
     }
     helper.validateNoExtraCatalystFields(ignoreNullable = true)
     assertThrows[IncompatibleSchemaException] {
@@ -133,4 +133,27 @@ class AvroSchemaHelperSuite extends SQLTestUtils with SharedSparkSession {
       helperNullable.validateNoExtraCatalystFields(ignoreNullable = false)
     }
   }
+
+  test("SPARK-34378: validateNoExtraRequiredAvroFields detects required and ignores nullable") {
+    val avroSchema = SchemaBuilder.record("record").fields()
+      .requiredInt("foo")
+      .nullableInt("bar", 1)
+      .optionalInt("baz")
+      .endRecord()
+
+    val catalystFull =
+      new StructType().add("foo", IntegerType).add("bar", IntegerType).add("baz", IntegerType)
+
+    def testValidation(catalystFieldToRemove: String): Unit = {
+      val filteredSchema = StructType(catalystFull.filterNot(_.name == catalystFieldToRemove))
+      new AvroUtils.AvroSchemaHelper(avroSchema, filteredSchema, Seq(""), Seq(""), false)
+        .validateNoExtraRequiredAvroFields()
+    }
+
+    assertThrows[IncompatibleSchemaException] {
+      testValidation("foo")
+    }
+    testValidation("bar")
+    testValidation("baz")
+  }
 }
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSerdeSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSerdeSuite.scala
index 6d0a734f381ee..bfd56613fd64c 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSerdeSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSerdeSuite.scala
@@ -121,30 +121,41 @@ class AvroSerdeSuite extends SparkFunSuite {
     }
   }
 
-  test("Fail to convert for serialization with field count mismatch") {
-    // Note that this is allowed for deserialization, but not serialization
-    val tooManyFields =
-      createAvroSchemaWithTopLevelFields(_.optionalInt("foo").optionalLong("bar"))
-    assertFailedConversionMessage(tooManyFields, Serializer, BY_NAME,
+  test("Fail to convert with missing Catalyst fields") {
+    val nestedFooField = SchemaBuilder.record("foo").fields().optionalInt("bar").endRecord()
+    val avroExtraOptional = createAvroSchemaWithTopLevelFields(
+      _.name("foo").`type`(nestedFooField).noDefault().optionalLong("bar"))
+    val avroExtraRequired = createAvroSchemaWithTopLevelFields(
+      _.name("foo").`type`(nestedFooField).noDefault().requiredLong("bar"))
+
+    // serializing with extra _nullable_ Avro field is okay, but fails if extra field is required
+    withFieldMatchType(Serializer.create(CATALYST_STRUCT, avroExtraOptional, _))
+    assertFailedConversionMessage(avroExtraRequired, Serializer, BY_NAME,
       "Found field 'bar' in Avro schema but there is no match in the SQL schema")
-    assertFailedConversionMessage(tooManyFields, Serializer, BY_POSITION,
+    assertFailedConversionMessage(avroExtraRequired, Serializer, BY_POSITION,
       "Found field 'bar' at position 1 of top-level record from Avro schema but there is no " +
         "match in the SQL schema at top-level record (using positional matching)")
 
-    val tooManyFieldsNested =
+    // deserializing should work regardless of whether the extra field is required or not
+    withFieldMatchType(Deserializer.create(CATALYST_STRUCT, avroExtraOptional, _))
+    withFieldMatchType(Deserializer.create(CATALYST_STRUCT, avroExtraRequired, _))
+
+    val avroExtraNestedOptional =
       createNestedAvroSchemaWithFields("foo", _.optionalInt("bar").optionalInt("baz"))
-    assertFailedConversionMessage(tooManyFieldsNested, Serializer, BY_NAME,
+    val avroExtraNestedRequired =
+      createNestedAvroSchemaWithFields("foo", _.optionalInt("bar").requiredInt("baz"))
+
+    // serializing with extra _nullable_ Avro field is okay, but fails if extra field is required
+    withFieldMatchType(Serializer.create(CATALYST_STRUCT, avroExtraNestedOptional, _))
+    assertFailedConversionMessage(avroExtraNestedRequired, Serializer, BY_NAME,
       "Found field 'foo.baz' in Avro schema but there is no match in the SQL schema")
-    assertFailedConversionMessage(tooManyFieldsNested, Serializer, BY_POSITION,
+    assertFailedConversionMessage(avroExtraNestedRequired, Serializer, BY_POSITION,
       s"Found field 'baz' at position 1 of field 'foo' from Avro schema but there is no match " +
         s"in the SQL schema at field 'foo' (using positional matching)")
 
-    val tooFewFields = createAvroSchemaWithTopLevelFields(f => f)
-    assertFailedConversionMessage(tooFewFields, Serializer, BY_NAME,
-      "Cannot find field 'foo' in Avro schema")
-    assertFailedConversionMessage(tooFewFields, Serializer, BY_POSITION,
-      "Cannot find field at position 0 of top-level record from Avro schema " +
-        "(using positional matching)")
+    // deserializing should work regardless of whether the extra field is required or not
+    withFieldMatchType(Deserializer.create(CATALYST_STRUCT, avroExtraNestedOptional, _))
+    withFieldMatchType(Deserializer.create(CATALYST_STRUCT, avroExtraNestedRequired, _))
   }
 
   /**
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index d85baeb9386f2..d9d8c3c8b64f8 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -1359,6 +1359,32 @@ abstract class AvroSuite
     }
   }
 
+  test("SPARK-34378: support writing user provided avro schema with missing optional fields") {
+    withTempDir { tempDir =>
+      val avroSchema = SchemaBuilder.builder().record("test").fields()
+        .requiredString("f1").optionalString("f2").endRecord().toString()
+
+      val data = Seq("foo", "bar")
+
+      // Fail if required field f1 is missing
+      val e = intercept[SparkException] {
+        data.toDF("f2").write.option("avroSchema", avroSchema).format("avro").save(s"$tempDir/fail")
+      }
+      assertExceptionMsg[IncompatibleSchemaException](e,
+        "Found field 'f1' in Avro schema but there is no match in the SQL schema")
+
+      val tempSaveDir = s"$tempDir/save/"
+      // Succeed if optional field f2 is missing
+      data.toDF("f1").write.option("avroSchema", avroSchema).format("avro").save(tempSaveDir)
+
+      val newDf = spark.read.format("avro").load(tempSaveDir)
+      assert(newDf.schema === new StructType().add("f1", StringType).add("f2", StringType))
+      val rows = newDf.collect()
+      assert(rows.map(_.getAs[String]("f1")).sorted === data.sorted)
+      rows.foreach(row => assert(row.isNullAt(1)))
+    }
+  }
+
   test("SPARK-34133: Reading user provided schema respects case sensitivity for field matching") {
     val wrongCaseSchema = new StructType()
         .add("STRING", StringType, nullable = false)

From e86c8e265866cf2bc443f4a385c87cd0c136a4b4 Mon Sep 17 00:00:00 2001
From: nyingping <smileyingping@163.com>
Date: Fri, 18 Feb 2022 10:17:32 +0900
Subject: [PATCH 262/513] [SPARK-38214][SS] No need to filter windows when
 windowDuration is multiple of slideDuration

### What changes were proposed in this pull request?

At present, the sliding window adopts the form of expand + filter, but in some cases, filter is not necessary.

Filtering is required if the sliding window is irregular. When the window length is divided by the slide length the result is an integer (I believe this is also the case for most work scenarios in practice for sliding window), there is no need to filter, which can save calculation resources and improve performance.

### Why are the changes needed?

save calculation resources and improve performance.

### Does this PR introduce _any_ user-facing change?

NO

### How was this patch tested?

UT and benchmark.

simple benchmark in this [commit ](https://github.com/nyingping/spark/commit/cccc742f601cffca99ab602165c024b3523ebc72),thanks [HeartSaVioRd532b6f](https://github.com/HeartSaVioR/spark/commit/d532b6f6bcdd80cdaac520b21587ebb69ff2df8f)

---------------------------------------
-------case 1
---------------------------------------
> spark.range(numOfRow)
>       .selectExpr("CAST(id AS timestamp) AS time")
>       .select(window(col("time"), "15 seconds", "3 seconds", "2 seconds"))
>       .count()

Result:

```
Java HotSpot(TM) 64-Bit Server VM 1.8.0_291-b10 on Windows 10 10.0
AMD64 Family 23 Model 96 Stepping 1, AuthenticAMD
sliding windows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
old logic                                           799            866          70         12.5          79.9       1.0X
new logic                                            58             68           9        171.2           5.8      13.7X
```

 ---------------------------------------
 -------case 2
 ---------------------------------------

> spark.range(numOfRow)
      .selectExpr("CAST(id AS timestamp) AS time")
      .select(window(col("time"), "10 seconds", "5 seconds", "2 seconds"))
      .count()

Result:
```
Java HotSpot(TM) 64-Bit Server VM 1.8.0_291-b10 on Windows 10 10.0
AMD64 Family 23 Model 96 Stepping 1, AuthenticAMD
sliding windows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
old logic                                           359            409          52         27.8          35.9       1.0X
new logic                                            47             54           4        212.6           4.7       7.6X
```

Closes #35526 from nyingping/SPARK-38214.

Lead-authored-by: nyingping <smileyingping@163.com>
Co-authored-by: Nie yingping <smileyingping@163.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 11 +++++-
 .../sql/DataFrameTimeWindowingSuite.scala     | 39 ++++++++++++++++++-
 2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 195c25177da8c..c560062d5b09a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -3927,9 +3927,16 @@ object TimeWindowing extends Rule[LogicalPlan] {
 
           val projections = windows.map(_ +: child.output)
 
+          // When the condition windowDuration % slideDuration = 0 is fulfilled,
+          // the estimation of the number of windows becomes exact one,
+          // which means all produced windows are valid.
           val filterExpr =
-            window.timeColumn >= windowAttr.getField(WINDOW_START) &&
-              window.timeColumn < windowAttr.getField(WINDOW_END)
+            if (window.windowDuration % window.slideDuration == 0) {
+              IsNotNull(window.timeColumn)
+            } else {
+              window.timeColumn >= windowAttr.getField(WINDOW_START) &&
+                window.timeColumn < windowAttr.getField(WINDOW_END)
+            }
 
           val substitutedPlan = Filter(filterExpr,
             Expand(projections, windowAttr +: child.output, child))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
index c385d9f58cc84..e9a145cec01c2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql
 import java.time.LocalDateTime
 
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, Filter}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -490,4 +490,41 @@ class DataFrameTimeWindowingSuite extends QueryTest with SharedSparkSession {
       assert(attributeReference.dataType == tuple._2)
     }
   }
+
+  test("No need to filter windows when windowDuration is multiple of slideDuration") {
+    val df1 = Seq(
+      ("2022-02-15 19:39:34", 1, "a"),
+      ("2022-02-15 19:39:56", 2, "a"),
+      ("2022-02-15 19:39:27", 4, "b")).toDF("time", "value", "id")
+      .select(window($"time", "9 seconds", "3 seconds", "0 second"), $"value")
+      .orderBy($"window.start".asc, $"value".desc).select("value")
+    val df2 = Seq(
+      (LocalDateTime.parse("2022-02-15T19:39:34"), 1, "a"),
+      (LocalDateTime.parse("2022-02-15T19:39:56"), 2, "a"),
+      (LocalDateTime.parse("2022-02-15T19:39:27"), 4, "b")).toDF("time", "value", "id")
+      .select(window($"time", "9 seconds", "3 seconds", "0 second"), $"value")
+      .orderBy($"window.start".asc, $"value".desc).select("value")
+
+    val df3 = Seq(
+      ("2022-02-15 19:39:34", 1, "a"),
+      ("2022-02-15 19:39:56", 2, "a"),
+      ("2022-02-15 19:39:27", 4, "b")).toDF("time", "value", "id")
+      .select(window($"time", "9 seconds", "3 seconds", "-2 second"), $"value")
+      .orderBy($"window.start".asc, $"value".desc).select("value")
+    val df4 = Seq(
+      (LocalDateTime.parse("2022-02-15T19:39:34"), 1, "a"),
+      (LocalDateTime.parse("2022-02-15T19:39:56"), 2, "a"),
+      (LocalDateTime.parse("2022-02-15T19:39:27"), 4, "b")).toDF("time", "value", "id")
+      .select(window($"time", "9 seconds", "3 seconds", "2 second"), $"value")
+      .orderBy($"window.start".asc, $"value".desc).select("value")
+
+    Seq(df1, df2, df3, df4).foreach { df =>
+      val filter = df.queryExecution.optimizedPlan.find(_.isInstanceOf[Filter])
+      assert(filter.isDefined)
+      val exist = filter.get.constraints.filter(e =>
+        e.toString.contains(">=") || e.toString.contains("<"))
+      assert(exist.isEmpty, "No need to filter windows " +
+        "when windowDuration is multiple of slideDuration")
+    }
+  }
 }

From 3022fd4ccfed676d4ba194afbfde2dd5ec1d348f Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Thu, 17 Feb 2022 19:52:16 -0600
Subject: [PATCH 263/513] [SPARK-38197][CORE] Improve error message of
 BlockManager.fetchRemoteManagedBuffer

### What changes were proposed in this pull request?
When locations's size is 1, and fetch failed, it only will print a error message like
```
22/02/13 18:58:11 WARN BlockManager: Failed to fetch block after 1 fetch failures. Most recent failure cause:
java.lang.IllegalStateException: Empty buffer received for non empty block
	at org.apache.spark.storage.BlockManager.fetchRemoteManagedBuffer(BlockManager.scala:1063)
	at org.apache.spark.storage.BlockManager.$anonfun$getRemoteBlock$8(BlockManager.scala:1005)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.storage.BlockManager.getRemoteBlock(BlockManager.scala:1005)
	at org.apache.spark.storage.BlockManager.getRemoteValues(BlockManager.scala:951)
	at org.apache.spark.storage.BlockManager.get(BlockManager.scala:1168)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1230)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:384)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:335)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
	at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
```

We don't know the target nm ip and block id. This pr improve the error message to show necessary information

### Why are the changes needed?
Improve error message

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Not need

Closes #35505 from AngersZhuuuu/SPARK-38197.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../main/scala/org/apache/spark/storage/BlockManager.scala  | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index ec4dc7722e681..7ae57f71a129d 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -1143,7 +1143,8 @@ private[spark] class BlockManager(
         val buf = blockTransferService.fetchBlockSync(loc.host, loc.port, loc.executorId,
           blockId.toString, tempFileManager)
         if (blockSize > 0 && buf.size() == 0) {
-          throw new IllegalStateException("Empty buffer received for non empty block")
+          throw new IllegalStateException("Empty buffer received for non empty block " +
+            s"when fetching remote block $blockId from $loc")
         }
         buf
       } catch {
@@ -1155,7 +1156,8 @@ private[spark] class BlockManager(
             // Give up trying anymore locations. Either we've tried all of the original locations,
             // or we've refreshed the list of locations from the master, and have still
             // hit failures after trying locations from the refreshed list.
-            logWarning(s"Failed to fetch block after $totalFailureCount fetch failures. " +
+            logWarning(s"Failed to fetch remote block $blockId " +
+              s"from [${locations.mkString(", ")}] after $totalFailureCount fetch failures. " +
               s"Most recent failure cause:", e)
             return None
           }

From 837248a0c42d55ad48240647d503ad544e64f016 Mon Sep 17 00:00:00 2001
From: Karthik Subramanian <karsubr@microsoft.com>
Date: Fri, 18 Feb 2022 12:52:11 +0900
Subject: [PATCH 264/513] [MINOR][DOC] Fix documentation for structured
 streaming - addListener

### What changes were proposed in this pull request?

This PR fixes the incorrect documentation in Structured Streaming Guide where it says `sparkSession.streams.attachListener()` instead of `sparkSession.streams.addListener()` which is the correct usage as mentioned in the code snippet below in the same doc.

![image](https://user-images.githubusercontent.com/298735/154593814-0a865311-b168-4929-b4af-a8c939168f26.png)

### Why are the changes needed?

The documentation was erroneous, and needs to be fixed to avoid confusion by readers

### Does this PR introduce _any_ user-facing change?

Yes, since it's a doc fix. This fix needs to be applied to previous versions retro-actively as well.

### How was this patch tested?

Not necessary

Closes #35562 from yeskarthik/fix-structured-streaming-docs-1.

Authored-by: Karthik Subramanian <karsubr@microsoft.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/structured-streaming-programming-guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 82dee8b1bf147..54325850a0333 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -3318,7 +3318,7 @@ You can also asynchronously monitor all queries associated with a
 `SparkSession` by attaching a `StreamingQueryListener`
 ([Scala](api/scala/org/apache/spark/sql/streaming/StreamingQueryListener.html)/[Java](api/java/org/apache/spark/sql/streaming/StreamingQueryListener.html) docs).
 Once you attach your custom `StreamingQueryListener` object with
-`sparkSession.streams.attachListener()`, you will get callbacks when a query is started and
+`sparkSession.streams.addListener()`, you will get callbacks when a query is started and
 stopped and when there is progress made in an active query. Here is an example,
 
 <div class="codetabs">

From 3a7eafdb2f4e7fb27e60aae1bac7cc975dc45078 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 18 Feb 2022 08:50:46 +0300
Subject: [PATCH 265/513] [SPARK-38195][SQL] Add the `TIMESTAMPADD()` function

### What changes were proposed in this pull request?
In the PR, I propose to add new function `TIMESTAMPADD` with the following parameters:
1. `unit` - specifies an unit of interval. It can be a string or an identifier. Supported the following values (case-insensitive):
   - YEAR
   - QUARTER
   - MONTH
   - WEEK
   - DAY, DAYOFYEAR
   - HOUR
   - MINUTE
   - SECOND
   - MILLISECOND
   - MICROSECOND
2. `quantity` - the amount of `unit`s to add. It has the `INT` type. It can be positive or negative.
3. `timestamp` - a timestamp (w/ or w/o timezone) to which you want to add.

The function returns the original timestamp plus the given interval. The result has the same type as the input `timestamp` (for `timestamp_ntz`, it returns `timestamp_ntz` and for `timestamp_ltz` -> `timestamp_ltz`).

For example:
```scala
scala> val df = sql("select timestampadd(YEAR, 1, timestamp_ltz'2022-02-16 01:02:03') as ts1, timestampadd(YEAR, 1, timestamp_ntz'2022-02-16 01:02:03') as ts2")
df: org.apache.spark.sql.DataFrame = [ts1: timestamp, ts2: timestamp_ntz]

scala> df.printSchema
root
 |-- ts1: timestamp (nullable = false)
 |-- ts2: timestamp_ntz (nullable = false)

scala> df.show(false)
+-------------------+-------------------+
|ts1                |ts2                |
+-------------------+-------------------+
|2023-02-16 01:02:03|2023-02-16 01:02:03|
+-------------------+-------------------+
```

**Note:** if the `timestamp` has the type `timestamp_ltz`, and `unit` is:
- YEAR, QUARTER, MONTH - the input timestamp is converted to a local timestamp at the session time (see `spark.sql.session.timeZone`). And after that, the function adds the amount of months to the local timestamp, and converts the result to a `timestamp_ltz` at the same session time zone.
- `WEEK`, `DAY` - in similar way as above, the function adds the total amount of days to the timestamp at the session time zone.
- `HOUR`, `MINUTE`, `SECOND`, `MILLISECOND`, `MICROSECOND` - the functions converts the interval to the total amount of microseconds, and adds them to the given timestamp (expressed as an offset from the epoch).

For example, Sun 13-Mar-2022 at 02:00:00 A.M. is a daylight saving time in the `America/Los_Angeles` time zone:
```sql
spark-sql> set spark.sql.session.timeZone=America/Los_Angeles;
spark.sql.session.timeZone	America/Los_Angeles
spark-sql> select timestampadd(HOUR, 4, timestamp_ltz'2022-03-13 00:00:00'), timestampadd(HOUR, 4, timestamp_ntz'2022-03-13 00:00:00');
2022-03-13 05:00:00	2022-03-13 04:00:00
spark-sql> select timestampadd(DAY, 1, timestamp_ltz'2022-03-13 00:00:00'), timestampadd(DAY, 1, timestamp_ntz'2022-03-13 00:00:00');
2022-03-14 00:00:00	2022-03-14 00:00:00
spark-sql> select timestampadd(Month, -1, timestamp_ltz'2022-03-13 00:00:00'), timestampadd(month, -1, timestamp_ntz'2022-03-13 00:00:00');
2022-02-13 00:00:00	2022-02-13 00:00:00
```
In fact, such behavior is similar to adding an ANSI interval to a timestamp.

The function also supports implicit conversion of the input date to a timestamp according the general rules of Spark SQL. By default, Spark SQL converts dates to timestamp (which is timestamp_ltz by default).

### Why are the changes needed?
1. To make the migration process from other systems to Spark SQL easier.
2. To achieve feature parity with other DBMSs.

### Does this PR introduce _any_ user-facing change?
No. This is new feature.

### How was this patch tested?
By running new tests:
```
$ build/sbt "test:testOnly *QueryExecutionErrorsSuite"
$ build/sbt "test:testOnly *DateTimeUtilsSuite"
$ build/sbt "sql/test:testOnly org.apache.spark.sql.expressions.ExpressionInfoSuite"
$ build/sbt "sql/testOnly *ExpressionsSchemaSuite"
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z timestamp.sql"
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z timestamp-ansi.sql"
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z datetime-legacy.sql"
$ build/sbt "test:testOnly *DateExpressionsSuite"
$ build/sbt "test:testOnly *SQLKeywordSuite"
```

Closes #35502 from MaxGekk/timestampadd.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 docs/sql-ref-ansi-compliance.md               |  1 +
 .../spark/sql/catalyst/parser/SqlBase.g4      |  4 +
 .../catalyst/analysis/FunctionRegistry.scala  |  1 +
 .../expressions/datetimeExpressions.scala     | 84 +++++++++++++++++++
 .../sql/catalyst/parser/AstBuilder.scala      | 11 +++
 .../sql/catalyst/util/DateTimeUtils.scala     | 36 ++++++++
 .../sql/errors/QueryExecutionErrors.scala     |  6 ++
 .../expressions/DateExpressionsSuite.scala    | 62 ++++++++++++++
 .../catalyst/util/DateTimeUtilsSuite.scala    | 36 +++++++-
 .../sql-functions/sql-expression-schema.md    |  3 +-
 .../resources/sql-tests/inputs/timestamp.sql  |  6 ++
 .../sql-tests/results/ansi/timestamp.sql.out  | 34 +++++++-
 .../sql-tests/results/datetime-legacy.sql.out | 34 +++++++-
 .../sql-tests/results/timestamp.sql.out       | 34 +++++++-
 .../timestampNTZ/timestamp-ansi.sql.out       | 34 +++++++-
 .../results/timestampNTZ/timestamp.sql.out    | 34 +++++++-
 .../errors/QueryExecutionErrorsSuite.scala    | 12 ++-
 17 files changed, 424 insertions(+), 8 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 3c87263c7de18..d695693c24de4 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -573,6 +573,7 @@ Below is a list of all the keywords in Spark SQL.
 |THEN|reserved|non-reserved|reserved|
 |TIME|reserved|non-reserved|reserved|
 |TIMESTAMP|non-reserved|non-reserved|non-reserved|
+|TIMESTAMPADD|non-reserved|non-reserved|non-reserved|
 |TO|reserved|non-reserved|reserved|
 |TOUCH|non-reserved|non-reserved|non-reserved|
 |TRAILING|reserved|non-reserved|reserved|
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 6331798ef5db0..d44f508707681 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -860,6 +860,7 @@ valueExpression
 
 primaryExpression
     : name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER)                                   #currentLike
+    | TIMESTAMPADD '(' unit=identifier ',' unitsAmount=valueExpression ',' timestamp=valueExpression ')'  #timestampadd
     | CASE whenClause+ (ELSE elseExpression=expression)? END                                   #searchedCase
     | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END                  #simpleCase
     | name=(CAST | TRY_CAST) '(' expression AS dataType ')'                                    #cast
@@ -1267,6 +1268,7 @@ ansiNonReserved
     | TEMPORARY
     | TERMINATED
     | TIMESTAMP
+    | TIMESTAMPADD
     | TOUCH
     | TRANSACTION
     | TRANSACTIONS
@@ -1544,6 +1546,7 @@ nonReserved
     | THEN
     | TIME
     | TIMESTAMP
+    | TIMESTAMPADD
     | TO
     | TOUCH
     | TRAILING
@@ -1821,6 +1824,7 @@ TERMINATED: 'TERMINATED';
 THEN: 'THEN';
 TIME: 'TIME';
 TIMESTAMP: 'TIMESTAMP';
+TIMESTAMPADD: 'TIMESTAMPADD';
 TO: 'TO';
 TOUCH: 'TOUCH';
 TRAILING: 'TRAILING';
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 7a3809378cfd1..0bc349ce37901 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -594,6 +594,7 @@ object FunctionRegistry {
     expression[UnixMillis]("unix_millis"),
     expression[UnixMicros]("unix_micros"),
     expression[ConvertTimezone]("convert_timezone"),
+    expression[TimestampAdd]("timestampadd"),
 
     // collection functions
     expression[CreateArray]("array"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index c679c1f5a5801..e73e989c9c99e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -3057,3 +3057,87 @@ case class ConvertTimezone(
     copy(sourceTz = newFirst, targetTz = newSecond, sourceTs = newThird)
   }
 }
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(unit, quantity, timestamp) - Adds the specified number of units to the given timestamp.",
+  arguments = """
+    Arguments:
+      * unit - this indicates the units of datetime that you want to add.
+        Supported string values of `unit` are (case insensitive):
+          - "YEAR"
+          - "QUARTER" - 3 months
+          - "MONTH"
+          - "WEEK" - 7 days
+          - "DAY", "DAYOFYEAR"
+          - "HOUR"
+          - "MINUTE"
+          - "SECOND"
+          - "MILLISECOND" - milliseconds
+          - "MICROSECOND"
+      * quantity - this is the number of units of time that you want to add.
+      * timestamp - this is a timestamp (w/ or w/o timezone) to which you want to add.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('HOUR', 8, timestamp_ntz'2022-02-11 20:30:00');
+       2022-02-12 04:30:00
+      > SELECT _FUNC_('MONTH', 1, timestamp_ltz'2022-01-31 00:00:00');
+       2022-02-28 00:00:00
+      > SELECT _FUNC_(SECOND, -10, date'2022-01-01');
+       2021-12-31 23:59:50
+      > SELECT _FUNC_(YEAR, 10, timestamp'2000-01-01 01:02:03.123456');
+       2010-01-01 01:02:03.123456
+  """,
+  group = "datetime_funcs",
+  since = "3.3.0")
+// scalastyle:on line.size.limit
+case class TimestampAdd(
+    unit: Expression,
+    quantity: Expression,
+    timestamp: Expression,
+    timeZoneId: Option[String] = None)
+  extends TernaryExpression
+  with ImplicitCastInputTypes
+  with NullIntolerant
+  with TimeZoneAwareExpression {
+
+  def this(unit: Expression, quantity: Expression, timestamp: Expression) =
+    this(unit, quantity, timestamp, None)
+
+  override def first: Expression = unit
+  override def second: Expression = quantity
+  override def third: Expression = timestamp
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, IntegerType, AnyTimestampType)
+  override def dataType: DataType = timestamp.dataType
+
+  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
+    copy(timeZoneId = Option(timeZoneId))
+
+  @transient private lazy val zoneIdInEval: ZoneId = zoneIdForType(timestamp.dataType)
+
+  override def nullSafeEval(u: Any, q: Any, micros: Any): Any = {
+    DateTimeUtils.timestampAdd(
+      u.asInstanceOf[UTF8String].toString,
+      q.asInstanceOf[Int],
+      micros.asInstanceOf[Long],
+      zoneIdInEval)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+    val zid = ctx.addReferenceObj("zoneId", zoneIdInEval, classOf[ZoneId].getName)
+    defineCodeGen(ctx, ev, (u, q, micros) =>
+      s"""$dtu.timestampAdd($u.toString(), $q, $micros, $zid)""")
+  }
+
+  override def prettyName: String = "timestampadd"
+
+  override protected def withNewChildrenInternal(
+      newFirst: Expression,
+      newSecond: Expression,
+      newThird: Expression): TimestampAdd = {
+    copy(unit = newFirst, quantity = newSecond, timestamp = newThird)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 53fdf84937b5c..08f2cb92b93e0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -4506,4 +4506,15 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   private def alterViewTypeMismatchHint: Option[String] = Some("Please use ALTER TABLE instead.")
 
   private def alterTableTypeMismatchHint: Option[String] = Some("Please use ALTER VIEW instead.")
+
+  /**
+   * Create a TimestampAdd expression.
+   */
+  override def visitTimestampadd(ctx: TimestampaddContext): Expression = withOrigin(ctx) {
+    val arguments = Seq(
+      Literal(ctx.unit.getText),
+      expression(ctx.unitsAmount),
+      expression(ctx.timestamp))
+    UnresolvedFunction("timestampadd", arguments, isDistinct = false)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 445ec8444a915..b5525776c3f99 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -1163,4 +1163,40 @@ object DateTimeUtils {
     val localStartTs = getLocalDateTime(startMicros, zoneId)
     ChronoUnit.MICROS.between(localStartTs, localEndTs)
   }
+
+  /**
+   * Adds the specified number of units to a timestamp.
+   *
+   * @param unit A keyword that specifies the interval units to add to the input timestamp.
+   * @param quantity The amount of `unit`s to add. It can be positive or negative.
+   * @param micros The input timestamp value, expressed in microseconds since 1970-01-01 00:00:00Z.
+   * @param zoneId The time zone ID at which the operation is performed.
+   * @return A timestamp value, expressed in microseconds since 1970-01-01 00:00:00Z.
+   */
+  def timestampAdd(unit: String, quantity: Int, micros: Long, zoneId: ZoneId): Long = {
+    unit.toUpperCase(Locale.ROOT) match {
+      case "MICROSECOND" =>
+        timestampAddDayTime(micros, quantity, zoneId)
+      case "MILLISECOND" =>
+        timestampAddDayTime(micros, quantity * MICROS_PER_MILLIS, zoneId)
+      case "SECOND" =>
+        timestampAddDayTime(micros, quantity * MICROS_PER_SECOND, zoneId)
+      case "MINUTE" =>
+        timestampAddDayTime(micros, quantity * MICROS_PER_MINUTE, zoneId)
+      case "HOUR" =>
+        timestampAddDayTime(micros, quantity * MICROS_PER_HOUR, zoneId)
+      case "DAY" | "DAYOFYEAR" =>
+        timestampAddDayTime(micros, quantity * MICROS_PER_DAY, zoneId)
+      case "WEEK" =>
+        timestampAddDayTime(micros, quantity * MICROS_PER_DAY * DAYS_PER_WEEK, zoneId)
+      case "MONTH" =>
+        timestampAddMonths(micros, quantity, zoneId)
+      case "QUARTER" =>
+        timestampAddMonths(micros, quantity * 3, zoneId)
+      case "YEAR" =>
+        timestampAddMonths(micros, quantity * MONTHS_PER_YEAR, zoneId)
+      case _ =>
+        throw QueryExecutionErrors.invalidUnitInTimestampAdd(unit)
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index d3753afdb7990..1d87e9f0a992b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -1963,4 +1963,10 @@ object QueryExecutionErrors {
   def unsupportedDropNamespaceRestrictError(): Throwable = {
     new SQLFeatureNotSupportedException("Drop namespace restrict is not supported")
   }
+
+  def invalidUnitInTimestampAdd(unit: String): Throwable = {
+    new SparkIllegalArgumentException(
+      errorClass = "INVALID_PARAMETER_VALUE",
+      messageParameters = Array("unit", "timestampadd", unit))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index d0c0a1948b442..31d7da354460f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -1885,4 +1885,66 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       }
     }
   }
+
+  test("SPARK-38195: add a quantity of interval units to a timestamp") {
+    // Check case-insensitivity
+    checkEvaluation(
+      TimestampAdd(Literal("Hour"), Literal(1), Literal(LocalDateTime.of(2022, 2, 15, 12, 57, 0))),
+      LocalDateTime.of(2022, 2, 15, 13, 57, 0))
+    // Check nulls as input values
+    checkEvaluation(
+      TimestampAdd(
+        Literal.create(null, StringType),
+        Literal(1),
+        Literal(LocalDateTime.of(2022, 2, 15, 12, 57, 0))),
+      null)
+    checkEvaluation(
+      TimestampAdd(
+        Literal("MINUTE"),
+        Literal.create(null, IntegerType),
+        Literal(LocalDateTime.of(2022, 2, 15, 12, 57, 0))),
+      null)
+    checkEvaluation(
+      TimestampAdd(
+        Literal("MINUTE"),
+        Literal(1),
+        Literal.create(null, TimestampType)),
+      null)
+    // Check crossing the daylight saving time
+    checkEvaluation(
+      TimestampAdd(
+        Literal("HOUR"),
+        Literal(6),
+        Literal(Instant.parse("2022-03-12T23:30:00Z")),
+        Some("America/Los_Angeles")),
+      Instant.parse("2022-03-13T05:30:00Z"))
+    // Check the leap year
+    checkEvaluation(
+      TimestampAdd(
+        Literal("DAY"),
+        Literal(2),
+        Literal(LocalDateTime.of(2020, 2, 28, 10, 11, 12)),
+        Some("America/Los_Angeles")),
+      LocalDateTime.of(2020, 3, 1, 10, 11, 12))
+
+    Seq(
+      "YEAR", "QUARTER", "MONTH",
+      "WEEK", "DAY",
+      "HOUR", "MINUTE", "SECOND",
+      "MILLISECOND", "MICROSECOND"
+    ).foreach { unit =>
+      outstandingTimezonesIds.foreach { tz =>
+        Seq(TimestampNTZType, TimestampType).foreach { tsType =>
+          checkConsistencyBetweenInterpretedAndCodegenAllowingException(
+            (quantity: Expression, timestamp: Expression) =>
+              TimestampAdd(
+                Literal(unit),
+                quantity,
+                timestamp,
+                Some(tz)),
+            IntegerType, tsType)
+        }
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 09a011d5ccee1..d4e8e29c38fa4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -26,7 +26,7 @@ import java.util.concurrent.TimeUnit
 import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkFunSuite, SparkIllegalArgumentException}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
@@ -955,4 +955,38 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
         s"The difference is ${(result - expectedMicros) / MICROS_PER_HOUR} hours")
     }
   }
+
+  test("SPARK-38195: add a quantity of interval units to a timestamp") {
+    outstandingZoneIds.foreach { zid =>
+      assert(timestampAdd("MICROSECOND", 1, date(2022, 2, 14, 11, 27, 0, 0, zid), zid) ===
+        date(2022, 2, 14, 11, 27, 0, 1, zid))
+      assert(timestampAdd("MILLISECOND", -1, date(2022, 2, 14, 11, 27, 0, 1000, zid), zid) ===
+        date(2022, 2, 14, 11, 27, 0, 0, zid))
+      assert(timestampAdd("SECOND", 0, date(2022, 2, 14, 11, 27, 0, 1001, zid), zid) ===
+        date(2022, 2, 14, 11, 27, 0, 1001, zid))
+      assert(timestampAdd("MINUTE", -90, date(2022, 2, 14, 11, 0, 1, 1, zid), zid) ===
+        date(2022, 2, 14, 9, 30, 1, 1, zid))
+      assert(timestampAdd("HOUR", 24, date(2022, 2, 14, 11, 0, 1, 0, zid), zid) ===
+        date(2022, 2, 15, 11, 0, 1, 0, zid))
+      assert(timestampAdd("DAY", 1, date(2022, 2, 28, 11, 1, 0, 0, zid), zid) ===
+        date(2022, 3, 1, 11, 1, 0, 0, zid))
+      assert(timestampAdd("DAYOFYEAR", 364, date(2022, 1, 1, 0, 0, 0, 0, zid), zid) ===
+        date(2022, 12, 31, 0, 0, 0, 0, zid))
+      assert(timestampAdd("WEEK", 1, date(2022, 2, 14, 11, 43, 0, 1, zid), zid) ===
+        date(2022, 2, 21, 11, 43, 0, 1, zid))
+      assert(timestampAdd("MONTH", 10, date(2022, 2, 14, 11, 43, 0, 1, zid), zid) ===
+        date(2022, 12, 14, 11, 43, 0, 1, zid))
+      assert(timestampAdd("QUARTER", 1, date(1900, 2, 1, 0, 0, 0, 1, zid), zid) ===
+        date(1900, 5, 1, 0, 0, 0, 1, zid))
+      assert(timestampAdd("YEAR", 1, date(9998, 1, 1, 0, 0, 0, 1, zid), zid) ===
+        date(9999, 1, 1, 0, 0, 0, 1, zid))
+      assert(timestampAdd("YEAR", -9998, date(9999, 1, 1, 0, 0, 0, 1, zid), zid) ===
+        date(1, 1, 1, 0, 0, 0, 1, zid))
+    }
+
+    val e = intercept[SparkIllegalArgumentException] {
+      timestampAdd("SECS", 1, date(1969, 1, 1, 0, 0, 0, 1, getZoneId("UTC")), getZoneId("UTC"))
+    }
+    assert(e.getMessage.contains("invalid: SECS"))
+  }
 }
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 33ba2b73e6b07..8b1a12f9d9c63 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,6 +1,6 @@
 <!-- Automatically generated by ExpressionsSchemaSuite -->
 ## Summary
-  - Number of queries: 379
+  - Number of queries: 380
   - Number of expressions that missing example: 12
   - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint
 ## Schema of Built-in Functions
@@ -299,6 +299,7 @@
 | org.apache.spark.sql.catalyst.expressions.Tan | tan | SELECT tan(0) | struct<TAN(0):double> |
 | org.apache.spark.sql.catalyst.expressions.Tanh | tanh | SELECT tanh(0) | struct<TANH(0):double> |
 | org.apache.spark.sql.catalyst.expressions.TimeWindow | window | SELECT a, window.start, window.end, count(*) as cnt FROM VALUES ('A1', '2021-01-01 00:00:00'), ('A1', '2021-01-01 00:04:30'), ('A1', '2021-01-01 00:06:00'), ('A2', '2021-01-01 00:01:00') AS tab(a, b) GROUP by a, window(b, '5 minutes') ORDER BY a, start | struct<a:string,start:timestamp,end:timestamp,cnt:bigint> |
+| org.apache.spark.sql.catalyst.expressions.TimestampAdd | timestampadd | SELECT timestampadd('HOUR', 8, timestamp_ntz'2022-02-11 20:30:00') | struct<timestampadd(HOUR, 8, TIMESTAMP_NTZ '2022-02-11 20:30:00'):timestamp_ntz> |
 | org.apache.spark.sql.catalyst.expressions.ToBinary | to_binary | SELECT to_binary('abc', 'utf-8') | struct<to_binary(abc, utf-8):binary> |
 | org.apache.spark.sql.catalyst.expressions.ToDegrees | degrees | SELECT degrees(3.141592653589793) | struct<DEGREES(3.141592653589793):double> |
 | org.apache.spark.sql.catalyst.expressions.ToNumber | to_number | SELECT to_number('454', '999') | struct<to_number(454, 999):decimal(3,0)> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql
index 0bc77a8c971f8..49eb228c33f44 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql
@@ -142,3 +142,9 @@ select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE');
 select unix_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE');
 select from_json('{"t":"26/October/2015"}', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'));
 select from_csv('26/October/2015', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'));
+
+-- Add a number of units to a timestamp or a date
+select timestampadd('MONTH', -1, timestamp'2022-02-14 01:02:03');
+select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03');
+select timestampadd(YEAR, 1, date'2022-02-15');
+select timestampadd('SECOND', -1, date'2022-02-15');
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
index 96ea94869d7e6..91e526316864a 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 89
+-- Number of queries: 93
 
 
 -- !query
@@ -771,3 +771,35 @@ struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
 You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+
+
+-- !query
+select timestampadd('MONTH', -1, timestamp'2022-02-14 01:02:03')
+-- !query schema
+struct<timestampadd(MONTH, -1, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
+-- !query output
+2022-01-14 01:02:03
+
+
+-- !query
+select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03')
+-- !query schema
+struct<timestampadd(MINUTE, 58, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
+-- !query output
+2022-02-14 02:00:03
+
+
+-- !query
+select timestampadd(YEAR, 1, date'2022-02-15')
+-- !query schema
+struct<timestampadd(YEAR, 1, DATE '2022-02-15'):timestamp>
+-- !query output
+2023-02-15 00:00:00
+
+
+-- !query
+select timestampadd('SECOND', -1, date'2022-02-15')
+-- !query schema
+struct<timestampadd(SECOND, -1, DATE '2022-02-15'):timestamp>
+-- !query output
+2022-02-14 23:59:59
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index 74480ab6cc2b4..e38df80819fb3 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 166
+-- Number of queries: 170
 
 
 -- !query
@@ -1415,3 +1415,35 @@ select from_csv('26/October/2015', 't Timestamp', map('timestampFormat', 'dd/MMM
 struct<from_csv(26/October/2015):struct<t:timestamp>>
 -- !query output
 {"t":2015-10-26 00:00:00}
+
+
+-- !query
+select timestampadd('MONTH', -1, timestamp'2022-02-14 01:02:03')
+-- !query schema
+struct<timestampadd(MONTH, -1, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
+-- !query output
+2022-01-14 01:02:03
+
+
+-- !query
+select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03')
+-- !query schema
+struct<timestampadd(MINUTE, 58, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
+-- !query output
+2022-02-14 02:00:03
+
+
+-- !query
+select timestampadd(YEAR, 1, date'2022-02-15')
+-- !query schema
+struct<timestampadd(YEAR, 1, DATE '2022-02-15'):timestamp>
+-- !query output
+2023-02-15 00:00:00
+
+
+-- !query
+select timestampadd('SECOND', -1, date'2022-02-15')
+-- !query schema
+struct<timestampadd(SECOND, -1, DATE '2022-02-15'):timestamp>
+-- !query output
+2022-02-14 23:59:59
diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
index 77b4f73d179f0..34e313eeb8a24 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 89
+-- Number of queries: 93
 
 
 -- !query
@@ -765,3 +765,35 @@ struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
 You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+
+
+-- !query
+select timestampadd('MONTH', -1, timestamp'2022-02-14 01:02:03')
+-- !query schema
+struct<timestampadd(MONTH, -1, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
+-- !query output
+2022-01-14 01:02:03
+
+
+-- !query
+select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03')
+-- !query schema
+struct<timestampadd(MINUTE, 58, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
+-- !query output
+2022-02-14 02:00:03
+
+
+-- !query
+select timestampadd(YEAR, 1, date'2022-02-15')
+-- !query schema
+struct<timestampadd(YEAR, 1, DATE '2022-02-15'):timestamp>
+-- !query output
+2023-02-15 00:00:00
+
+
+-- !query
+select timestampadd('SECOND', -1, date'2022-02-15')
+-- !query schema
+struct<timestampadd(SECOND, -1, DATE '2022-02-15'):timestamp>
+-- !query output
+2022-02-14 23:59:59
diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
index 6b93f7688fb73..00ad665ea0198 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 89
+-- Number of queries: 93
 
 
 -- !query
@@ -769,3 +769,35 @@ select from_csv('26/October/2015', 't Timestamp', map('timestampFormat', 'dd/MMM
 struct<from_csv(26/October/2015):struct<t:timestamp_ntz>>
 -- !query output
 {"t":null}
+
+
+-- !query
+select timestampadd('MONTH', -1, timestamp'2022-02-14 01:02:03')
+-- !query schema
+struct<timestampadd(MONTH, -1, TIMESTAMP_NTZ '2022-02-14 01:02:03'):timestamp_ntz>
+-- !query output
+2022-01-14 01:02:03
+
+
+-- !query
+select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03')
+-- !query schema
+struct<timestampadd(MINUTE, 58, TIMESTAMP_NTZ '2022-02-14 01:02:03'):timestamp_ntz>
+-- !query output
+2022-02-14 02:00:03
+
+
+-- !query
+select timestampadd(YEAR, 1, date'2022-02-15')
+-- !query schema
+struct<timestampadd(YEAR, 1, DATE '2022-02-15'):timestamp>
+-- !query output
+2023-02-15 00:00:00
+
+
+-- !query
+select timestampadd('SECOND', -1, date'2022-02-15')
+-- !query schema
+struct<timestampadd(SECOND, -1, DATE '2022-02-15'):timestamp>
+-- !query output
+2022-02-14 23:59:59
diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out
index d8958d66cef4b..339e6db3cf0e5 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 89
+-- Number of queries: 93
 
 
 -- !query
@@ -763,3 +763,35 @@ select from_csv('26/October/2015', 't Timestamp', map('timestampFormat', 'dd/MMM
 struct<from_csv(26/October/2015):struct<t:timestamp_ntz>>
 -- !query output
 {"t":null}
+
+
+-- !query
+select timestampadd('MONTH', -1, timestamp'2022-02-14 01:02:03')
+-- !query schema
+struct<timestampadd(MONTH, -1, TIMESTAMP_NTZ '2022-02-14 01:02:03'):timestamp_ntz>
+-- !query output
+2022-01-14 01:02:03
+
+
+-- !query
+select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03')
+-- !query schema
+struct<timestampadd(MINUTE, 58, TIMESTAMP_NTZ '2022-02-14 01:02:03'):timestamp_ntz>
+-- !query output
+2022-02-14 02:00:03
+
+
+-- !query
+select timestampadd(YEAR, 1, date'2022-02-15')
+-- !query schema
+struct<timestampadd(YEAR, 1, DATE '2022-02-15'):timestamp>
+-- !query output
+2023-02-15 00:00:00
+
+
+-- !query
+select timestampadd('SECOND', -1, date'2022-02-15')
+-- !query schema
+struct<timestampadd(SECOND, -1, DATE '2022-02-15'):timestamp>
+-- !query output
+2022-02-14 23:59:59
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index dc9f5065e277c..57fbeacc31c61 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.errors
 
-import org.apache.spark.{SparkException, SparkRuntimeException, SparkUnsupportedOperationException}
+import org.apache.spark.{SparkException, SparkIllegalArgumentException, SparkRuntimeException, SparkUnsupportedOperationException}
 import org.apache.spark.sql.{DataFrame, QueryTest}
 import org.apache.spark.sql.functions.{lit, lower, struct, sum}
 import org.apache.spark.sql.test.SharedSparkSession
@@ -92,6 +92,16 @@ class QueryExecutionErrorsSuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  test("INVALID_PARAMETER_VALUE: invalid unit passed to timestampadd") {
+    val e = intercept[SparkIllegalArgumentException] {
+      sql("select timestampadd('nanosecond', 100, timestamp'2022-02-13 18:00:00')").collect()
+    }
+    assert(e.getErrorClass === "INVALID_PARAMETER_VALUE")
+    assert(e.getSqlState === "22023")
+    assert(e.getMessage ===
+      "The value of parameter(s) 'unit' in timestampadd is invalid: nanosecond")
+  }
+
   test("UNSUPPORTED_FEATURE: unsupported combinations of AES modes and padding") {
     val key16 = "abcdefghijklmnop"
     val key32 = "abcdefghijklmnop12345678ABCDEFGH"

From 9fd98305a1a3223f33446a9abeaea6d2c13dfdda Mon Sep 17 00:00:00 2001
From: Xinrong Meng <xinrong.meng@databricks.com>
Date: Fri, 18 Feb 2022 14:12:20 +0800
Subject: [PATCH 266/513] [SPARK-38225][SQL] Adjust input `format` of function
 `to_binary`

### What changes were proposed in this pull request?
Adjust input `format` of function `to_binary`:
- gracefully fail for the non-string `format` parameter
- remove arguable `base2` format support

### Why are the changes needed?
Currently, function to_binary doesn't deal with the non-string `format` parameter properly.
For example, `spark.sql("select to_binary('abc', 1)")` raises casting error, rather than hint that encoding format is unsupported.

In addition, `base2` format is arguable as discussed [here](https://github.com/apache/spark/pull/35415#discussion_r805578036). We may exclude it following what Snowflake [to_binary](https://docs.snowflake.com/en/sql-reference/functions/to_binary.html) does for now.

### Does this PR introduce _any_ user-facing change?
Yes.

- Better error messages for non-string `format` parameter. For example:

From:
```
scala> spark.sql("select to_binary('abc', 1)")
org.apache.spark.sql.AnalysisException: class java.lang.Integer cannot be cast to class org.apache.spark.unsafe.types.UTF8String (java.lang.Integer is in module java.base of loader 'bootstrap'; org.apache.spark.unsafe.types.UTF8String is in unnamed module of loader 'app'); line 1 pos 7
```

To:
```
scala> spark.sql("select to_binary('abc', 1)")
org.apache.spark.sql.AnalysisException: cannot resolve 'to_binary('abc', 1)' due to data type mismatch: Unsupported encoding format: Some(1). The format has to be a case-insensitive string literal of 'hex', 'utf-8', 'base2', or 'base64'; line 1 pos 7;
```

- Removed `base2` format support
```
scala> spark.sql("select to_binary('abc', 'base2')").show()
org.apache.spark.sql.AnalysisException: cannot resolve 'to_binary('abc', 'base2')' due to data type mismatch: Unsupported encoding format: Some(base2). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7;
```

### How was this patch tested?
Unit test.

Closes #35533 from xinrong-databricks/to_binary_followup.

Authored-by: Xinrong Meng <xinrong.meng@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/stringExpressions.scala       | 14 ++++-----
 .../sql-tests/inputs/string-functions.sql     |  3 +-
 .../results/ansi/string-functions.sql.out     | 30 ++++++++++++-------
 .../results/string-functions.sql.out          | 30 ++++++++++++-------
 4 files changed, 49 insertions(+), 28 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index f450dd80a8b13..56cd224dd8c53 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -2545,7 +2545,7 @@ case class Encode(value: Expression, charset: Expression)
 @ExpressionDescription(
   usage = """
     _FUNC_(str[, fmt]) - Converts the input `str` to a binary value based on the supplied `fmt`.
-      `fmt` can be a case-insensitive string literal of "hex", "utf-8", "base2", or "base64".
+      `fmt` can be a case-insensitive string literal of "hex", "utf-8", or "base64".
       By default, the binary format for conversion is "hex" if `fmt` is omitted.
       The function returns NULL if at least one of the input parameters is NULL.
   """,
@@ -2562,7 +2562,7 @@ case class ToBinary(expr: Expression, format: Option[Expression], child: Express
 
   def this(expr: Expression, format: Expression) = this(expr, Option(format),
     format match {
-      case lit if lit.foldable =>
+      case lit if (lit.foldable && Seq(StringType, NullType).contains(lit.dataType)) =>
         val value = lit.eval()
         if (value == null) Literal(null, BinaryType)
         else {
@@ -2570,7 +2570,6 @@ case class ToBinary(expr: Expression, format: Option[Expression], child: Express
             case "hex" => Unhex(expr)
             case "utf-8" => Encode(expr, Literal("UTF-8"))
             case "base64" => UnBase64(expr)
-            case "base2" => Cast(expr, BinaryType)
             case _ => lit
           }
         }
@@ -2589,10 +2588,11 @@ case class ToBinary(expr: Expression, format: Option[Expression], child: Express
 
   override def checkInputDataTypes(): TypeCheckResult = {
     def checkFormat(lit: Expression) = {
-      if (lit.foldable) {
+      if (lit.foldable && Seq(StringType, NullType).contains(lit.dataType)) {
         val value = lit.eval()
-        value == null || Seq("hex", "utf-8", "base64", "base2").contains(
-          value.asInstanceOf[UTF8String].toString.toLowerCase(Locale.ROOT))
+        value == null ||
+          Seq("hex", "utf-8", "base64").contains(
+            value.asInstanceOf[UTF8String].toString.toLowerCase(Locale.ROOT))
       } else false
     }
 
@@ -2601,7 +2601,7 @@ case class ToBinary(expr: Expression, format: Option[Expression], child: Express
     } else {
       TypeCheckResult.TypeCheckFailure(
         s"Unsupported encoding format: $format. The format has to be " +
-          s"a case-insensitive string literal of 'hex', 'utf-8', 'base2', or 'base64'")
+          s"a case-insensitive string literal of 'hex', 'utf-8', or 'base64'")
     }
   }
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index 9571f3eb6c2bb..94eb96f6249a0 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -140,7 +140,6 @@ select to_number('00,454.8-', '00,000.9-');
 select to_binary('abc');
 select to_binary('abc', 'utf-8');
 select to_binary('abc', 'base64');
-select to_binary('abc', 'base2');
 select to_binary('abc', 'hex');
 select to_binary('abc', concat('utf', '-8'));
 select to_binary('abc', concat('base', '64'));
@@ -150,4 +149,6 @@ select to_binary('abc', null);
 select to_binary(null, 'utf-8');
 select to_binary(null, null);
 select to_binary(null, cast(null as string));
+select to_binary(null, cast(null as int));
 select to_binary('abc', 'invalidFormat');
+select to_binary('abc', 1);
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
index 86c90fc1fe34d..4c0aa8c948334 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 116
+-- Number of queries: 117
 
 
 -- !query
@@ -850,14 +850,6 @@ struct<to_binary(abc, base64):binary>
 i�
 
 
--- !query
-select to_binary('abc', 'base2')
--- !query schema
-struct<to_binary(abc, base2):binary>
--- !query output
-abc
-
-
 -- !query
 select to_binary('abc', 'hex')
 -- !query schema
@@ -930,10 +922,28 @@ struct<to_binary(NULL, CAST(NULL AS STRING)):binary>
 NULL
 
 
+-- !query
+select to_binary(null, cast(null as int))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'to_binary(NULL, CAST(NULL AS INT))' due to data type mismatch: Unsupported encoding format: Some(ansi_cast(null as int)). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7
+
+
 -- !query
 select to_binary('abc', 'invalidFormat')
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'to_binary('abc', 'invalidFormat')' due to data type mismatch: Unsupported encoding format: Some(invalidFormat). The format has to be a case-insensitive string literal of 'hex', 'utf-8', 'base2', or 'base64'; line 1 pos 7
+cannot resolve 'to_binary('abc', 'invalidFormat')' due to data type mismatch: Unsupported encoding format: Some(invalidFormat). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7
+
+
+-- !query
+select to_binary('abc', 1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'to_binary('abc', 1)' due to data type mismatch: Unsupported encoding format: Some(1). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index f3852a9527b00..bb2974db2322b 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 116
+-- Number of queries: 117
 
 
 -- !query
@@ -846,14 +846,6 @@ struct<to_binary(abc, base64):binary>
 i�
 
 
--- !query
-select to_binary('abc', 'base2')
--- !query schema
-struct<to_binary(abc, base2):binary>
--- !query output
-abc
-
-
 -- !query
 select to_binary('abc', 'hex')
 -- !query schema
@@ -926,10 +918,28 @@ struct<to_binary(NULL, CAST(NULL AS STRING)):binary>
 NULL
 
 
+-- !query
+select to_binary(null, cast(null as int))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'to_binary(NULL, CAST(NULL AS INT))' due to data type mismatch: Unsupported encoding format: Some(cast(null as int)). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7
+
+
 -- !query
 select to_binary('abc', 'invalidFormat')
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'to_binary('abc', 'invalidFormat')' due to data type mismatch: Unsupported encoding format: Some(invalidFormat). The format has to be a case-insensitive string literal of 'hex', 'utf-8', 'base2', or 'base64'; line 1 pos 7
+cannot resolve 'to_binary('abc', 'invalidFormat')' due to data type mismatch: Unsupported encoding format: Some(invalidFormat). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7
+
+
+-- !query
+select to_binary('abc', 1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'to_binary('abc', 1)' due to data type mismatch: Unsupported encoding format: Some(1). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7

From 0fcb56036608f9bca44c5b11d0c8df96a045aa4e Mon Sep 17 00:00:00 2001
From: Cheng Pan <chengpan@apache.org>
Date: Fri, 18 Feb 2022 14:18:14 +0800
Subject: [PATCH 267/513] [SPARK-38138][SQL] Materialize QueryPlan subqueries

### What changes were proposed in this pull request?

This PR propose to materialize `QueryPlan#subqueries` and pruned by `PLAN_EXPRESSION` on searching to improve the SQL compile performance.

### Why are the changes needed?

We found a query in production that cost lots of time in optimize phase (also include AQE optimize phase) when enable DPP, the SQL pattern likes

```
select <cols...>
from a
left join b on a.<col> = b.<col>
left join c on b.<col> = c.<col>
left join d on c.<col> = d.<col>
left join e on d.<col> = e.<col>
left join f on e.<col> = f.<col>
left join g on f.<col> = g.<col>
left join h on g.<col> = h.<col>
...
```
SPARK-36444 significantly reduces the optimize time (exclude AQE phase), see detail at #35431, but there are still lots of time costs in `InsertAdaptiveSparkPlan` on AQE optimize phase.

Before this change, the query costs 658s, after this change only costs 65s.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing UTs.

Closes #35438 from pan3793/subquery.

Authored-by: Cheng Pan <chengpan@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/catalyst/plans/QueryPlan.scala     | 6 +++---
 .../sql/execution/adaptive/InsertAdaptiveSparkPlan.scala    | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 2417ff904570b..58f2425e53706 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.rules.RuleId
 import org.apache.spark.sql.catalyst.rules.UnknownRuleId
 import org.apache.spark.sql.catalyst.trees.{AlwaysProcess, CurrentOrigin, TreeNode, TreeNodeTag}
-import org.apache.spark.sql.catalyst.trees.TreePattern.OUTER_REFERENCE
+import org.apache.spark.sql.catalyst.trees.TreePattern.{OUTER_REFERENCE, PLAN_EXPRESSION}
 import org.apache.spark.sql.catalyst.trees.TreePatternBits
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, StructType}
@@ -427,8 +427,8 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
   /**
    * All the top-level subqueries of the current plan node. Nested subqueries are not included.
    */
-  def subqueries: Seq[PlanType] = {
-    expressions.flatMap(_.collect {
+  @transient lazy val subqueries: Seq[PlanType] = {
+    expressions.filter(_.containsPattern(PLAN_EXPRESSION)).flatMap(_.collect {
       case e: PlanExpression[_] => e.plan.asInstanceOf[PlanType]
     })
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
index 68042d8384102..5c208457004cb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.{ListQuery, SubqueryExpression}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.plans.physical.UnspecifiedDistribution
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TreePattern.{DYNAMIC_PRUNING_SUBQUERY, IN_SUBQUERY, SCALAR_SUBQUERY}
+import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.command.{DataWritingCommandExec, ExecutedCommandExec}
 import org.apache.spark.sql.execution.datasources.v2.V2CommandExec
@@ -118,7 +118,7 @@ case class InsertAdaptiveSparkPlan(
     if (!plan.containsAnyPattern(SCALAR_SUBQUERY, IN_SUBQUERY, DYNAMIC_PRUNING_SUBQUERY)) {
       return subqueryMap.toMap
     }
-    plan.foreach(_.expressions.foreach(_.foreach {
+    plan.foreach(_.expressions.filter(_.containsPattern(PLAN_EXPRESSION)).foreach(_.foreach {
       case expressions.ScalarSubquery(p, _, exprId, _)
           if !subqueryMap.contains(exprId.id) =>
         val executedPlan = compileSubquery(p)

From a92f873bd352908b71895f092f5caaf69261ac95 Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Fri, 18 Feb 2022 16:04:42 +0800
Subject: [PATCH 268/513] [SPARK-38215][SQL] InsertIntoHiveDir should use data
 source if it's convertible

### What changes were proposed in this pull request?
Currently spark sql
```
INSERT OVERWRITE DIRECTORY 'path'
STORED AS PARQUET
query
```
can't be converted to use InsertIntoDataSourceCommand, still use Hive SerDe to write data, this cause we can't use feature provided by new parquet/orc version, such as zstd compress.

```
spark-sql> INSERT OVERWRITE DIRECTORY 'hdfs://nameservice/user/hive/warehouse/test_zstd_dir'
         > stored as parquet
         > select 1 as id;
[Stage 5:>                                                          (0 + 1) / 1]22/02/15 16:49:31 WARN TaskSetManager: Lost task 0.0 in stage 5.0 (TID 5, ip-xx-xx-xx-xx, executor 21): org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.IllegalArgumentException: No enum constant parquet.hadoop.metadata.CompressionCodecName.ZSTD
	at org.apache.hadoop.hive.ql.io.HiveFileFormatUtils.getHiveRecordWriter(HiveFileFormatUtils.java:249)
	at org.apache.spark.sql.hive.execution.HiveOutputWriter.<init>(HiveFileFormat.scala:123)
	at org.apache.spark.sql.hive.execution.HiveFileFormat$$anon$1.newInstance(HiveFileFormat.scala:103)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:120)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:108)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:269)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:203)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:202)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:123)
	at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
```

### Why are the changes needed?
Convert InsertIntoHiveDirCommand  to InsertIntoDataSourceCommand can support more features of parquet/orc

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added UT

Closes #35528 from AngersZhuuuu/SPARK-38215.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/hive/HiveMetastoreCatalog.scala | 26 +++++++++
 .../spark/sql/hive/HiveStrategies.scala       | 25 ++++++++-
 .../org/apache/spark/sql/hive/HiveUtils.scala |  9 ++++
 .../sql/hive/execution/HiveDDLSuite.scala     | 54 ++++++++++---------
 .../sql/hive/execution/SQLQuerySuite.scala    | 42 ++++++++++++++-
 5 files changed, 128 insertions(+), 28 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index c905a52c4836b..b6f06f5989d2f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -156,6 +156,32 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
     }
   }
 
+  def convertStorageFormat(storage: CatalogStorageFormat): CatalogStorageFormat = {
+    val serde = storage.serde.getOrElse("").toLowerCase(Locale.ROOT)
+
+    if (serde.contains("parquet")) {
+      val options = storage.properties + (ParquetOptions.MERGE_SCHEMA ->
+        SQLConf.get.getConf(HiveUtils.CONVERT_METASTORE_PARQUET_WITH_SCHEMA_MERGING).toString)
+      storage.copy(
+        serde = None,
+        properties = options
+      )
+    } else {
+      val options = storage.properties
+      if (SQLConf.get.getConf(SQLConf.ORC_IMPLEMENTATION) == "native") {
+        storage.copy(
+          serde = None,
+          properties = options
+        )
+      } else {
+        storage.copy(
+          serde = None,
+          properties = options
+        )
+      }
+    }
+  }
+
   private def convertToLogicalRelation(
       relation: HiveTableRelation,
       options: Map[String, String],
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 6a3de557e6e09..d1e222794a526 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.planning._
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoStatement, LogicalPlan, ScriptTransformation, Statistics}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution._
-import org.apache.spark.sql.execution.command.{CreateTableCommand, DDLUtils}
+import org.apache.spark.sql.execution.command.{CreateTableCommand, DDLUtils, InsertIntoDataSourceDirCommand}
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSourceStrategy}
 import org.apache.spark.sql.hive.execution._
 import org.apache.spark.sql.hive.execution.HiveScriptTransformationExec
@@ -186,6 +186,8 @@ object HiveAnalysis extends Rule[LogicalPlan] {
  * - When writing to non-partitioned Hive-serde Parquet/Orc tables
  * - When writing to partitioned Hive-serde Parquet/Orc tables when
  *   `spark.sql.hive.convertInsertingPartitionedTable` is true
+ * - When writing to directory with Hive-serde
+ * - When writing to non-partitioned Hive-serde Parquet/ORC tables using CTAS
  * - When scanning Hive-serde Parquet/ORC tables
  *
  * This rule must be run before all other DDL post-hoc resolution rules, i.e.
@@ -198,11 +200,20 @@ case class RelationConversions(
   }
 
   private def isConvertible(tableMeta: CatalogTable): Boolean = {
-    val serde = tableMeta.storage.serde.getOrElse("").toLowerCase(Locale.ROOT)
+    isConvertible(tableMeta.storage)
+  }
+
+  private def isConvertible(storage: CatalogStorageFormat): Boolean = {
+    val serde = storage.serde.getOrElse("").toLowerCase(Locale.ROOT)
     serde.contains("parquet") && conf.getConf(HiveUtils.CONVERT_METASTORE_PARQUET) ||
       serde.contains("orc") && conf.getConf(HiveUtils.CONVERT_METASTORE_ORC)
   }
 
+  private def convertProvider(storage: CatalogStorageFormat): String = {
+    val serde = storage.serde.getOrElse("").toLowerCase(Locale.ROOT)
+    if (serde.contains("parquet")) "parquet" else "orc"
+  }
+
   private val metastoreCatalog = sessionCatalog.metastoreCatalog
 
   override def apply(plan: LogicalPlan): LogicalPlan = {
@@ -230,6 +241,16 @@ case class RelationConversions(
         DDLUtils.checkTableColumns(tableDesc.copy(schema = query.schema))
         OptimizedCreateHiveTableAsSelectCommand(
           tableDesc, query, query.output.map(_.name), mode)
+
+      // INSERT HIVE DIR
+      case InsertIntoDir(_, storage, provider, query, overwrite)
+        if query.resolved && DDLUtils.isHiveTable(provider) &&
+          isConvertible(storage) && conf.getConf(HiveUtils.CONVERT_METASTORE_INSERT_DIR) =>
+        val outputPath = new Path(storage.locationUri.get)
+        if (overwrite) DDLUtils.verifyNotReadPath(query, outputPath)
+
+        InsertIntoDataSourceDirCommand(metastoreCatalog.convertStorageFormat(storage),
+          convertProvider(storage), query, overwrite)
     }
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 93a38e524ebdc..911cb98588d78 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -160,6 +160,15 @@ private[spark] object HiveUtils extends Logging {
     .booleanConf
     .createWithDefault(true)
 
+  val CONVERT_METASTORE_INSERT_DIR = buildConf("spark.sql.hive.convertMetastoreInsertDir")
+    .doc("When set to true,  Spark will try to use built-in data source writer " +
+      "instead of Hive serde in INSERT OVERWRITE DIRECTORY. This flag is effective only if " +
+      "`spark.sql.hive.convertMetastoreParquet` or `spark.sql.hive.convertMetastoreOrc` is " +
+      "enabled respectively for Parquet and ORC formats")
+    .version("3.3.0")
+    .booleanConf
+    .createWithDefault(true)
+
   val HIVE_METASTORE_SHARED_PREFIXES = buildStaticConf("spark.sql.hive.metastore.sharedPrefixes")
     .doc("A comma separated list of class prefixes that should be loaded using the classloader " +
       "that is shared between Spark SQL and a specific version of Hive. An example of classes " +
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index a6eb12ef6ed67..c4cef44b6cc90 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER
 import org.apache.spark.sql.execution.command.{DDLSuite, DDLUtils}
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.hive.HiveExternalCatalog
+import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
 import org.apache.spark.sql.hive.HiveUtils.{CONVERT_METASTORE_ORC, CONVERT_METASTORE_PARQUET}
 import org.apache.spark.sql.hive.orc.OrcFileOperator
 import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton, TestHiveSparkSession}
@@ -2927,37 +2927,41 @@ class HiveDDLSuite
   }
 
   test("SPARK-33844, 37969: Insert overwrite directory should check schema too") {
-    withView("v") {
-      spark.range(1).createTempView("v")
-      withTempPath { path =>
-        Seq("PARQUET", "ORC").foreach { format =>
-          val e = intercept[SparkException] {
-            spark.sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}' " +
-              s"STORED AS $format SELECT ID, if(1=1, 1, 0), abs(id), '^-' FROM v")
-          }.getCause.getMessage
-          assert(e.contains("Column name \"(IF((1 = 1), 1, 0))\" contains" +
-            " invalid character(s). Please use alias to rename it."))
+    withSQLConf(HiveUtils.CONVERT_METASTORE_INSERT_DIR.key -> "false") {
+      withView("v") {
+        spark.range(1).createTempView("v")
+        withTempPath { path =>
+          Seq("PARQUET", "ORC").foreach { format =>
+            val e = intercept[SparkException] {
+              spark.sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}' " +
+                s"STORED AS $format SELECT ID, if(1=1, 1, 0), abs(id), '^-' FROM v")
+            }.getCause.getMessage
+            assert(e.contains("Column name \"(IF((1 = 1), 1, 0))\" contains" +
+              " invalid character(s). Please use alias to rename it."))
+          }
         }
       }
     }
   }
 
   test("SPARK-36201: Add check for inner field of parquet/orc schema") {
-    withView("v") {
-      spark.range(1).createTempView("v")
-      withTempPath { path =>
-        val e = intercept[SparkException] {
-          spark.sql(
-            s"""
-               |INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}'
-               |STORED AS PARQUET
-               |SELECT
-               |NAMED_STRUCT('ID', ID, 'IF(ID=1,ID,0)', IF(ID=1,ID,0), 'B', ABS(ID)) AS col1
-               |FROM v
+    withSQLConf(HiveUtils.CONVERT_METASTORE_INSERT_DIR.key -> "false") {
+      withView("v") {
+        spark.range(1).createTempView("v")
+        withTempPath { path =>
+          val e = intercept[SparkException] {
+            spark.sql(
+              s"""
+                 |INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}'
+                 |STORED AS PARQUET
+                 |SELECT
+                 |NAMED_STRUCT('ID', ID, 'IF(ID=1,ID,0)', IF(ID=1,ID,0), 'B', ABS(ID)) AS col1
+                 |FROM v
                """.stripMargin)
-        }.getCause.getMessage
-        assert(e.contains("Column name \"IF(ID=1,ID,0)\" contains invalid character(s). " +
-          "Please use alias to rename it."))
+          }.getCause.getMessage
+          assert(e.contains("Column name \"IF(ID=1,ID,0)\" contains invalid character(s). " +
+            "Please use alias to rename it."))
+        }
       }
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index d3f5d7613ace7..f2711db839913 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
 import org.apache.spark.sql.execution.TestUncaughtExceptionHandler
 import org.apache.spark.sql.execution.adaptive.{DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite}
-import org.apache.spark.sql.execution.command.LoadDataCommand
+import org.apache.spark.sql.execution.command.{InsertIntoDataSourceDirCommand, LoadDataCommand}
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
@@ -2654,6 +2654,46 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
       }
     }
   }
+
+  test("SPARK-38215: Hive Insert Dir should use data source if it is convertible") {
+    withTempView("p") {
+      Seq(1, 2, 3).toDF("id").createOrReplaceTempView("p")
+
+      Seq("orc", "parquet").foreach { format =>
+        Seq(true, false).foreach { isConverted =>
+          withSQLConf(
+            HiveUtils.CONVERT_METASTORE_ORC.key -> s"$isConverted",
+            HiveUtils.CONVERT_METASTORE_PARQUET.key -> s"$isConverted") {
+            Seq(true, false).foreach { isConvertedCtas =>
+              withSQLConf(HiveUtils.CONVERT_METASTORE_INSERT_DIR.key -> s"$isConvertedCtas") {
+                withTempDir { dir =>
+                  val df = sql(
+                    s"""
+                       |INSERT OVERWRITE LOCAL DIRECTORY '${dir.getAbsolutePath}'
+                       |STORED AS $format
+                       |SELECT 1
+                  """.stripMargin)
+                  val insertIntoDSDir = df.queryExecution.analyzed.collect {
+                    case _: InsertIntoDataSourceDirCommand => true
+                  }.headOption
+                  val insertIntoHiveDir = df.queryExecution.analyzed.collect {
+                    case _: InsertIntoHiveDirCommand => true
+                  }.headOption
+                  if (isConverted && isConvertedCtas) {
+                    assert(insertIntoDSDir.nonEmpty)
+                    assert(insertIntoHiveDir.isEmpty)
+                  } else {
+                    assert(insertIntoDSDir.isEmpty)
+                    assert(insertIntoHiveDir.nonEmpty)
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
 }
 
 @SlowHiveTest

From e263b65c1c4dc8e9566e5dccafb66bffdf8e5535 Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Fri, 18 Feb 2022 16:33:41 +0800
Subject: [PATCH 269/513] [SPARK-38232][SQL] Explain formatted does not collect
 subqueries under query stage in AQE

### What changes were proposed in this pull request?

Match `QueryStageExec`  during collecting subqeries in ExplainUtils

### Why are the changes needed?

ExplainUtils have not catched QueryStageExec during collecting subquries. So we can not get the subqueries formatted explain who is under the QueryStageExec.

Note that, it also affects the subquery of dpp.

An example to see the issue
```scala
spark.sql("CREATE TABLE t USING PARQUET AS SELECT 1 AS c")
val df = spark.sql("SELECT count(s) FROM (SELECT (SELECT c FROM t) AS s)")
df.explain("formatted")
df.collect
df.explain("formatted")
```

### Does this PR introduce _any_ user-facing change?

yes, after fix, user can see all subquries in AQE.

### How was this patch tested?

Add test

Closes #35544 from ulysses-you/SPARK-38232.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/execution/ExplainUtils.scala   |  2 ++
 .../scala/org/apache/spark/sql/ExplainSuite.scala   | 13 +++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
index 1eea0cd777ed9..12ffbc8554e63 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
@@ -247,6 +247,8 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
     plan.foreach {
       case a: AdaptiveSparkPlanExec =>
         getSubqueries(a.executedPlan, subqueries)
+      case q: QueryStageExec =>
+        getSubqueries(q.plan, subqueries)
       case p: SparkPlan =>
         p.expressions.foreach (_.collect {
           case e: PlanExpression[_] =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index 44d0445928b90..99bdfc829b442 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -723,6 +723,19 @@ class ExplainSuiteAE extends ExplainSuiteHelper with EnableAdaptiveExecutionSuit
       assert(inMemoryRelationNodeId != columnarToRowNodeId)
     }
   }
+
+  test("SPARK-38232: Explain formatted does not collect subqueries under query stage in AQE") {
+    withTable("t") {
+      sql("CREATE TABLE t USING PARQUET AS SELECT 1 AS c")
+      val expected =
+        "Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#x, [id=#x]"
+      val df = sql("SELECT count(s) FROM (SELECT (SELECT c FROM t) as s)")
+      df.collect()
+      withNormalizedExplain(df, FormattedMode) { output =>
+        assert(output.contains(expected))
+      }
+    }
+  }
 }
 
 case class ExplainSingleData(id: Int)

From 15532c70293e81511fdc481cbece2305950d819d Mon Sep 17 00:00:00 2001
From: "yi.wu" <yi.wu@databricks.com>
Date: Fri, 18 Feb 2022 22:15:28 +0800
Subject: [PATCH 270/513] [SPARK-35937][FOLLOW-UP][SQL] GetDateFieldOperations
 should skip unresolved nodes

### What changes were proposed in this pull request?

Skip nodes whose children have not been resolved yet within `GetDateFieldOperations`.

### Why are the changes needed?

The current `GetDateFieldOperations` could result in `org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to dataType on unresolved object` in some cases. See the example added in unit test.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added a unit test.

Closes #35568 from Ngone51/SPARK-35937-followup.

Authored-by: yi.wu <yi.wu@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/analysis/AnsiTypeCoercion.scala  |  3 +++
 .../spark/sql/DataFrameSelfJoinSuite.scala    | 19 ++++++++++++++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
index e13ff2b3e709f..036efba34fab6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
@@ -280,6 +280,9 @@ object AnsiTypeCoercion extends TypeCoercionBase {
    */
   object GetDateFieldOperations extends TypeCoercionRule {
     override def transform: PartialFunction[Expression, Expression] = {
+      // Skip nodes who's children have not been resolved yet.
+      case g if !g.childrenResolved => g
+
       case g: GetDateField if AnyTimestampType.unapply(g.child) =>
         g.withNewChildren(Seq(Cast(g.child, DateType)))
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
index a0ddabcf76043..4d0dd46b9569c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, AttributeReference, PythonUDF, SortOrder}
 import org.apache.spark.sql.catalyst.plans.logical.{Expand, Generate, ScriptInputOutputSchema, ScriptTransformation, Window => WindowPlan}
 import org.apache.spark.sql.expressions.Window
-import org.apache.spark.sql.functions.{count, explode, sum}
+import org.apache.spark.sql.functions.{count, explode, sum, year}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.test.SQLTestData.TestData
@@ -467,4 +467,21 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
     assertAmbiguousSelfJoin(df21.join(df22, df21("x") === df22("y")))
     assertAmbiguousSelfJoin(df22.join(df21, df21("x") === df22("y")))
   }
+
+  test("SPARK-35937: GetDateFieldOperations should skip unresolved nodes") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      val df = Seq("1644821603").map(i => (i.toInt, i)).toDF("tsInt", "tsStr")
+      val df1 = df.select(df("tsStr").cast("timestamp")).as("df1")
+      val df2 = df.select(df("tsStr").cast("timestamp")).as("df2")
+      df1.join(df2, $"df1.tsStr" === $"df2.tsStr", "left_outer")
+      val df3 = df1.join(df2, $"df1.tsStr" === $"df2.tsStr", "left_outer")
+        .select($"df1.tsStr".as("timeStr")).as("df3")
+      // Before the fix, it throws "UnresolvedException: Invalid call to
+      // dataType on unresolved object".
+      val ex = intercept[AnalysisException](
+        df3.join(df1, year($"df1.timeStr") === year($"df3.tsStr"))
+      )
+      assert(ex.message.contains("Column 'df1.timeStr' does not exist."))
+    }
+  }
 }

From e613f08f3f5044a128894389cbda7ad2bdde076d Mon Sep 17 00:00:00 2001
From: Jiaan Geng <beliefer@163.com>
Date: Fri, 18 Feb 2022 22:22:04 +0800
Subject: [PATCH 271/513] [SPARK-37867][SQL][FOLLOWUP] Compile aggregate
 functions for build-in DB2 dialect

### What changes were proposed in this pull request?
This PR follows up https://github.com/apache/spark/pull/35166.
The previously referenced DB2 documentation is incorrect, resulting in the lack of compile that supports some aggregate functions.

The correct documentation is https://www.ibm.com/docs/en/db2/11.5?topic=af-regression-functions-regr-avgx-regr-avgy-regr-count

### Why are the changes needed?
Make build-in DB2 dialect support complete aggregate push-down more aggregate functions.

### Does this PR introduce _any_ user-facing change?
'Yes'.
Users could use complete aggregate push-down with build-in DB2 dialect.

### How was this patch tested?
New tests.

Closes #35520 from beliefer/SPARK-37867_followup.

Authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/jdbc/v2/DB2IntegrationSuite.scala     |  9 +++
 .../jdbc/v2/MsSqlServerIntegrationSuite.scala |  4 ++
 .../jdbc/v2/PostgresIntegrationSuite.scala    |  7 +++
 .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 63 ++++++++++---------
 .../apache/spark/sql/jdbc/DB2Dialect.scala    | 19 ++++++
 .../apache/spark/sql/jdbc/DerbyDialect.scala  | 23 +++----
 .../spark/sql/jdbc/MsSqlServerDialect.scala   |  3 +
 .../apache/spark/sql/jdbc/MySQLDialect.scala  | 21 +++----
 .../apache/spark/sql/jdbc/OracleDialect.scala | 38 +++++------
 .../spark/sql/jdbc/PostgresDialect.scala      |  1 +
 .../spark/sql/jdbc/TeradataDialect.scala      | 18 +++---
 11 files changed, 123 insertions(+), 83 deletions(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
index 2cfb21395d8a9..4b2bbbdd8494c 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
@@ -97,4 +97,13 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
   override def caseConvert(tableName: String): String = tableName.toUpperCase(Locale.ROOT)
 
   testVarPop()
+  testVarPop(true)
+  testVarSamp()
+  testVarSamp(true)
+  testStddevPop()
+  testStddevPop(true)
+  testStddevSamp()
+  testStddevSamp(true)
+  testCovarPop()
+  testCovarSamp()
 }
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
index e9521ec35a8ce..a527c6f8cb5b6 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
@@ -98,7 +98,11 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD
   }
 
   testVarPop()
+  testVarPop(true)
   testVarSamp()
+  testVarSamp(true)
   testStddevPop()
+  testStddevPop(true)
   testStddevSamp()
+  testStddevSamp(true)
 }
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
index 86f5c3c8cd418..77ace3f3f4ea7 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
@@ -91,10 +91,17 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT
   override def indexOptions: String = "FILLFACTOR=70"
 
   testVarPop()
+  testVarPop(true)
   testVarSamp()
+  testVarSamp(true)
   testStddevPop()
+  testStddevPop(true)
   testStddevSamp()
+  testStddevSamp(true)
   testCovarPop()
+  testCovarPop(true)
   testCovarSamp()
+  testCovarSamp(true)
   testCorr()
+  testCorr(true)
 }
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
index 6ea2099346781..ebd5b844cbc9b 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -386,10 +386,11 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
 
   protected def caseConvert(tableName: String): String = tableName
 
-  protected def testVarPop(): Unit = {
-    test(s"scan with aggregate push-down: VAR_POP") {
-      val df = sql(s"SELECT VAR_POP(bonus) FROM $catalogAndNamespace.${caseConvert("employee")}" +
-        " WHERE dept > 0 GROUP BY dept ORDER BY dept")
+  protected def testVarPop(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: VAR_POP with distinct: $isDistinct") {
+      val df = sql(s"SELECT VAR_POP(${distinct}bonus) FROM $catalogAndNamespace." +
+        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
       checkFilterPushed(df)
       checkAggregateRemoved(df)
       checkAggregatePushed(df, "VAR_POP")
@@ -401,11 +402,12 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     }
   }
 
-  protected def testVarSamp(): Unit = {
-    test(s"scan with aggregate push-down: VAR_SAMP") {
+  protected def testVarSamp(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: VAR_SAMP with distinct: $isDistinct") {
       val df = sql(
-        s"SELECT VAR_SAMP(bonus) FROM $catalogAndNamespace.${caseConvert("employee")}" +
-        " WHERE dept > 0 GROUP BY dept ORDER BY dept")
+        s"SELECT VAR_SAMP(${distinct}bonus) FROM $catalogAndNamespace." +
+        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
       checkFilterPushed(df)
       checkAggregateRemoved(df)
       checkAggregatePushed(df, "VAR_SAMP")
@@ -417,11 +419,12 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     }
   }
 
-  protected def testStddevPop(): Unit = {
-    test("scan with aggregate push-down: STDDEV_POP") {
+  protected def testStddevPop(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: STDDEV_POP with distinct: $isDistinct") {
       val df = sql(
-        s"SELECT STDDEV_POP(bonus) FROM $catalogAndNamespace.${caseConvert("employee")}" +
-        " WHERE dept > 0 GROUP BY dept ORDER BY dept")
+        s"SELECT STDDEV_POP(${distinct}bonus) FROM $catalogAndNamespace." +
+        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
       checkFilterPushed(df)
       checkAggregateRemoved(df)
       checkAggregatePushed(df, "STDDEV_POP")
@@ -433,11 +436,12 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     }
   }
 
-  protected def testStddevSamp(): Unit = {
-    test("scan with aggregate push-down: STDDEV_SAMP") {
+  protected def testStddevSamp(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: STDDEV_SAMP with distinct: $isDistinct") {
       val df = sql(
-        s"SELECT STDDEV_SAMP(bonus) FROM $catalogAndNamespace.${caseConvert("employee")}" +
-        " WHERE dept > 0 GROUP BY dept ORDER BY dept")
+        s"SELECT STDDEV_SAMP(${distinct}bonus) FROM $catalogAndNamespace." +
+        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
       checkFilterPushed(df)
       checkAggregateRemoved(df)
       checkAggregatePushed(df, "STDDEV_SAMP")
@@ -449,11 +453,12 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     }
   }
 
-  protected def testCovarPop(): Unit = {
-    test("scan with aggregate push-down: COVAR_POP") {
+  protected def testCovarPop(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: COVAR_POP with distinct: $isDistinct") {
       val df = sql(
-        s"SELECT COVAR_POP(bonus, bonus) FROM $catalogAndNamespace.${caseConvert("employee")}" +
-        " WHERE dept > 0 GROUP BY dept ORDER BY dept")
+        s"SELECT COVAR_POP(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
+        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
       checkFilterPushed(df)
       checkAggregateRemoved(df)
       checkAggregatePushed(df, "COVAR_POP")
@@ -465,11 +470,12 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     }
   }
 
-  protected def testCovarSamp(): Unit = {
-    test("scan with aggregate push-down: COVAR_SAMP") {
+  protected def testCovarSamp(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: COVAR_SAMP with distinct: $isDistinct") {
       val df = sql(
-        s"SELECT COVAR_SAMP(bonus, bonus) FROM $catalogAndNamespace.${caseConvert("employee")}" +
-        " WHERE dept > 0 GROUP BY dept ORDER BY dept")
+        s"SELECT COVAR_SAMP(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
+        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
       checkFilterPushed(df)
       checkAggregateRemoved(df)
       checkAggregatePushed(df, "COVAR_SAMP")
@@ -481,11 +487,12 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     }
   }
 
-  protected def testCorr(): Unit = {
-    test("scan with aggregate push-down: CORR") {
+  protected def testCorr(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: CORR with distinct: $isDistinct") {
       val df = sql(
-        s"SELECT CORR(bonus, bonus) FROM $catalogAndNamespace.${caseConvert("employee")}" +
-        " WHERE dept > 0 GROUP BY dept ORDER BY dept")
+        s"SELECT CORR(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
+        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
       checkFilterPushed(df)
       checkAggregateRemoved(df)
       checkAggregatePushed(df, "CORR")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
index baa772f4546a4..6af5cc00ef5db 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
@@ -30,6 +30,7 @@ private object DB2Dialect extends JdbcDialect {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:db2")
 
+  // See https://www.ibm.com/docs/en/db2/11.5?topic=functions-aggregate
   override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
     super.compileAggregate(aggFunction).orElse(
       aggFunction match {
@@ -37,6 +38,24 @@ private object DB2Dialect extends JdbcDialect {
           assert(f.inputs().length == 1)
           val distinct = if (f.isDistinct) "DISTINCT " else ""
           Some(s"VARIANCE($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"VARIANCE_SAMP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"STDDEV($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" =>
+          assert(f.inputs().length == 1)
+          val distinct = if (f.isDistinct) "DISTINCT " else ""
+          Some(s"STDDEV_SAMP($distinct${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "COVAR_POP" && f.isDistinct == false =>
+          assert(f.inputs().length == 2)
+          Some(s"COVARIANCE(${f.inputs().head}, ${f.inputs().last})")
+        case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" && f.isDistinct == false =>
+          assert(f.inputs().length == 2)
+          Some(s"COVARIANCE_SAMP(${f.inputs().head}, ${f.inputs().last})")
         case _ => None
       }
     )
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala
index e87d4d08ae031..bf838b8ed66eb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala
@@ -30,25 +30,22 @@ private object DerbyDialect extends JdbcDialect {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:derby")
 
+  // See https://db.apache.org/derby/docs/10.15/ref/index.html
   override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
     super.compileAggregate(aggFunction).orElse(
       aggFunction match {
-        case f: GeneralAggregateFunc if f.name() == "VAR_POP" =>
+        case f: GeneralAggregateFunc if f.name() == "VAR_POP" && f.isDistinct == false =>
           assert(f.inputs().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"VAR_POP($distinct${f.inputs().head})")
-        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" =>
+          Some(s"VAR_POP(${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" && f.isDistinct == false =>
           assert(f.inputs().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"VAR_SAMP($distinct${f.inputs().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" =>
+          Some(s"VAR_SAMP(${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" && f.isDistinct == false =>
           assert(f.inputs().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"STDDEV_POP($distinct${f.inputs().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" =>
+          Some(s"STDDEV_POP(${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" && f.isDistinct == false =>
           assert(f.inputs().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"STDDEV_SAMP($distinct${f.inputs().head})")
+          Some(s"STDDEV_SAMP(${f.inputs().head})")
         case _ => None
       }
     )
@@ -72,7 +69,7 @@ private object DerbyDialect extends JdbcDialect {
 
   override def isCascadingTruncateTable(): Option[Boolean] = Some(false)
 
-  // See https://db.apache.org/derby/docs/10.5/ref/rrefsqljrenametablestatement.html
+  // See https://db.apache.org/derby/docs/10.15/ref/rrefsqljrenametablestatement.html
   override def renameTable(oldTable: String, newTable: String): String = {
     s"RENAME TABLE $oldTable TO $newTable"
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
index 3d8a48a66ea8f..841f1c87319b5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
@@ -40,6 +40,9 @@ private object MsSqlServerDialect extends JdbcDialect {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:sqlserver")
 
+  // scalastyle:off line.size.limit
+  // See https://docs.microsoft.com/en-us/sql/t-sql/functions/aggregate-functions-transact-sql?view=sql-server-ver15
+  // scalastyle:on line.size.limit
   override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
     super.compileAggregate(aggFunction).orElse(
       aggFunction match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index 3cca81048e812..d73721de962d7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -38,25 +38,22 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
   override def canHandle(url : String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:mysql")
 
+  // See https://dev.mysql.com/doc/refman/8.0/en/aggregate-functions.html
   override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
     super.compileAggregate(aggFunction).orElse(
       aggFunction match {
-        case f: GeneralAggregateFunc if f.name() == "VAR_POP" =>
+        case f: GeneralAggregateFunc if f.name() == "VAR_POP" && f.isDistinct == false =>
           assert(f.inputs().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"VAR_POP($distinct${f.inputs().head})")
-        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" =>
+          Some(s"VAR_POP(${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" && f.isDistinct == false =>
           assert(f.inputs().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"VAR_SAMP($distinct${f.inputs().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" =>
+          Some(s"VAR_SAMP(${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" && f.isDistinct == false =>
           assert(f.inputs().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"STDDEV_POP($distinct${f.inputs().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" =>
+          Some(s"STDDEV_POP(${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" && f.isDistinct == false =>
           assert(f.inputs().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"STDDEV_SAMP($distinct${f.inputs().head})")
+          Some(s"STDDEV_SAMP(${f.inputs().head})")
         case _ => None
       }
     )
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
index 4fe7d93142c1e..71db7e9285f5e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
@@ -34,37 +34,33 @@ private case object OracleDialect extends JdbcDialect {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:oracle")
 
+  // scalastyle:off line.size.limit
+  // https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Aggregate-Functions.html#GUID-62BE676B-AF18-4E63-BD14-25206FEA0848
+  // scalastyle:on line.size.limit
   override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
     super.compileAggregate(aggFunction).orElse(
       aggFunction match {
-        case f: GeneralAggregateFunc if f.name() == "VAR_POP" =>
+        case f: GeneralAggregateFunc if f.name() == "VAR_POP" && f.isDistinct == false =>
           assert(f.inputs().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"VAR_POP($distinct${f.inputs().head})")
-        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" =>
+          Some(s"VAR_POP(${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" && f.isDistinct == false =>
           assert(f.inputs().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"VAR_SAMP($distinct${f.inputs().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" =>
+          Some(s"VAR_SAMP(${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" && f.isDistinct == false =>
           assert(f.inputs().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"STDDEV_POP($distinct${f.inputs().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" =>
+          Some(s"STDDEV_POP(${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" && f.isDistinct == false =>
           assert(f.inputs().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"STDDEV_SAMP($distinct${f.inputs().head})")
-        case f: GeneralAggregateFunc if f.name() == "COVAR_POP" =>
+          Some(s"STDDEV_SAMP(${f.inputs().head})")
+        case f: GeneralAggregateFunc if f.name() == "COVAR_POP" && f.isDistinct == false =>
           assert(f.inputs().length == 2)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"COVAR_POP($distinct${f.inputs().head}, ${f.inputs().last})")
-        case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" =>
+          Some(s"COVAR_POP(${f.inputs().head}, ${f.inputs().last})")
+        case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" && f.isDistinct == false =>
           assert(f.inputs().length == 2)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"COVAR_SAMP($distinct${f.inputs().head}, ${f.inputs().last})")
-        case f: GeneralAggregateFunc if f.name() == "CORR" =>
+          Some(s"COVAR_SAMP(${f.inputs().head}, ${f.inputs().last})")
+        case f: GeneralAggregateFunc if f.name() == "CORR" && f.isDistinct == false =>
           assert(f.inputs().length == 2)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"CORR($distinct${f.inputs().head}, ${f.inputs().last})")
+          Some(s"CORR(${f.inputs().head}, ${f.inputs().last})")
         case _ => None
       }
     )
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
index 46e79404f3e54..e2023d110ae4b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
@@ -36,6 +36,7 @@ private object PostgresDialect extends JdbcDialect with SQLConfHelper {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:postgresql")
 
+  // See https://www.postgresql.org/docs/8.4/functions-aggregate.html
   override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
     super.compileAggregate(aggFunction).orElse(
       aggFunction match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala
index 6344667b3180e..13e16d24d048d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala
@@ -28,6 +28,9 @@ private case object TeradataDialect extends JdbcDialect {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:teradata")
 
+  // scalastyle:off line.size.limit
+  // See https://docs.teradata.com/r/Teradata-VantageTM-SQL-Functions-Expressions-and-Predicates/March-2019/Aggregate-Functions
+  // scalastyle:on line.size.limit
   override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
     super.compileAggregate(aggFunction).orElse(
       aggFunction match {
@@ -47,18 +50,15 @@ private case object TeradataDialect extends JdbcDialect {
           assert(f.inputs().length == 1)
           val distinct = if (f.isDistinct) "DISTINCT " else ""
           Some(s"STDDEV_SAMP($distinct${f.inputs().head})")
-        case f: GeneralAggregateFunc if f.name() == "COVAR_POP" =>
+        case f: GeneralAggregateFunc if f.name() == "COVAR_POP" && f.isDistinct == false =>
           assert(f.inputs().length == 2)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"COVAR_POP($distinct${f.inputs().head}, ${f.inputs().last})")
-        case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" =>
+          Some(s"COVAR_POP(${f.inputs().head}, ${f.inputs().last})")
+        case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" && f.isDistinct == false =>
           assert(f.inputs().length == 2)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"COVAR_SAMP($distinct${f.inputs().head}, ${f.inputs().last})")
-        case f: GeneralAggregateFunc if f.name() == "CORR" =>
+          Some(s"COVAR_SAMP(${f.inputs().head}, ${f.inputs().last})")
+        case f: GeneralAggregateFunc if f.name() == "CORR" && f.isDistinct == false =>
           assert(f.inputs().length == 2)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"CORR($distinct${f.inputs().head}, ${f.inputs().last})")
+          Some(s"CORR(${f.inputs().head}, ${f.inputs().last})")
         case _ => None
       }
     )

From b5eae59c5548a22cf9a1d67115e59600fb6cb9d4 Mon Sep 17 00:00:00 2001
From: jackierwzhang <ruowang.zhang@databricks.com>
Date: Fri, 18 Feb 2022 23:12:05 +0800
Subject: [PATCH 272/513] [SPARK-38094] Enable matching schema column names by
 field ids

### What changes were proposed in this pull request?
Field Id is a native field in the Parquet schema (https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift#L398)

After this PR, when the requested schema has field IDs, Parquet readers will first use the field ID to determine which Parquet columns to read if the field ID exists in Spark schema, before falling back to match using column names.

This PR supports:
- Vectorized reader
- parquet-mr reader

### Why are the changes needed?
It enables matching columns by field id for supported DWs like iceberg and Delta. Specifically, it enables easy conversion from Iceberg (which uses field ids by name) to Delta, and allows `id` mode for Delta [column mapping](https://docs.databricks.com/delta/delta-column-mapping.html)

### Does this PR introduce _any_ user-facing change?
This PR introduces three new configurations:

`spark.sql.parquet.fieldId.write.enabled`: If enabled, Spark will write out native field ids that are stored inside StructField's metadata as `parquet.field.id` to parquet files. This configuration is default to `true`.

`spark.sql.parquet.fieldId.read.enabled`: If enabled, Spark will attempt to read field ids in parquet files and utilize them for matching columns. This configuration is default to `false`, so Spark could maintain its existing behavior by default.

`spark.sql.parquet.fieldId.read.ignoreMissing`: if enabled, Spark will read parquet files that do not have any field ids, while attempting to match the columns by id in Spark schema;  nulls will be returned for spark columns without a match. This configuration is default to `false`, so Spark could alert the user in case field id matching is expected but parquet files do not have any ids.

### How was this patch tested?
Existing tests + new unit tests.

Closes #35385 from jackierwzhang/SPARK-38094-field-ids.

Authored-by: jackierwzhang <ruowang.zhang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/errors/QueryExecutionErrors.scala     |   9 +
 .../apache/spark/sql/internal/SQLConf.scala   |  33 ++
 .../parquet/ParquetFileFormat.scala           |   4 +
 .../parquet/ParquetReadSupport.scala          | 249 +++++++--
 .../parquet/ParquetRowConverter.scala         |  34 +-
 .../parquet/ParquetSchemaConverter.scala      |  18 +-
 .../datasources/parquet/ParquetUtils.scala    |  44 +-
 .../datasources/v2/parquet/ParquetWrite.scala |   4 +
 .../parquet/ParquetFieldIdIOSuite.scala       | 213 +++++++
 .../parquet/ParquetFieldIdSchemaSuite.scala   | 528 ++++++++++++++++++
 .../parquet/ParquetSchemaSuite.scala          |  10 +-
 .../datasources/parquet/ParquetTest.scala     |  12 +-
 .../spark/sql/test/TestSQLContext.scala       |   8 +-
 13 files changed, 1088 insertions(+), 78 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdSchemaSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 1d87e9f0a992b..d1db0177dfd23 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -808,6 +808,15 @@ object QueryExecutionErrors {
        """.stripMargin.replaceAll("\n", " "))
   }
 
+  def foundDuplicateFieldInFieldIdLookupModeError(
+      requiredId: Int, matchedFields: String): Throwable = {
+    new RuntimeException(
+      s"""
+         |Found duplicate field(s) "$requiredId": $matchedFields
+         |in id mapping mode
+       """.stripMargin.replaceAll("\n", " "))
+  }
+
   def failedToMergeIncompatibleSchemasError(
       left: StructType, right: StructType, e: Throwable): Throwable = {
     new SparkException(s"Failed to merge incompatible schemas $left and $right", e)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 59a896a29b6f2..3a7ce650ea633 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -934,6 +934,33 @@ object SQLConf {
     .intConf
     .createWithDefault(4096)
 
+   val PARQUET_FIELD_ID_WRITE_ENABLED =
+    buildConf("spark.sql.parquet.fieldId.write.enabled")
+      .doc("Field ID is a native field of the Parquet schema spec. When enabled, " +
+        "Parquet writers will populate the field Id " +
+        "metadata (if present) in the Spark schema to the Parquet schema.")
+      .version("3.3.0")
+      .booleanConf
+      .createWithDefault(true)
+
+  val PARQUET_FIELD_ID_READ_ENABLED =
+    buildConf("spark.sql.parquet.fieldId.read.enabled")
+      .doc("Field ID is a native field of the Parquet schema spec. When enabled, Parquet readers " +
+        "will use field IDs (if present) in the requested Spark schema to look up Parquet " +
+        "fields instead of using column names")
+      .version("3.3.0")
+      .booleanConf
+      .createWithDefault(false)
+
+  val IGNORE_MISSING_PARQUET_FIELD_ID =
+    buildConf("spark.sql.parquet.fieldId.read.ignoreMissing")
+      .doc("When the Parquet file doesn't have any field IDs but the " +
+        "Spark read schema is using field IDs to read, we will silently return nulls " +
+        "when this flag is enabled, or error otherwise.")
+      .version("3.3.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val ORC_COMPRESSION = buildConf("spark.sql.orc.compression.codec")
     .doc("Sets the compression codec used when writing ORC files. If either `compression` or " +
       "`orc.compress` is specified in the table-specific options/properties, the precedence " +
@@ -4253,6 +4280,12 @@ class SQLConf extends Serializable with Logging {
 
   def inferDictAsStruct: Boolean = getConf(SQLConf.INFER_NESTED_DICT_AS_STRUCT)
 
+  def parquetFieldIdReadEnabled: Boolean = getConf(SQLConf.PARQUET_FIELD_ID_READ_ENABLED)
+
+  def parquetFieldIdWriteEnabled: Boolean = getConf(SQLConf.PARQUET_FIELD_ID_WRITE_ENABLED)
+
+  def ignoreMissingParquetFieldId: Boolean = getConf(SQLConf.IGNORE_MISSING_PARQUET_FIELD_ID)
+
   def useV1Command: Boolean = getConf(SQLConf.LEGACY_USE_V1_COMMAND)
 
   /** ********************** SQLConf functionality methods ************ */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index aa6f9ee91656d..18876dedb951e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -119,6 +119,10 @@ class ParquetFileFormat
       SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key,
       sparkSession.sessionState.conf.parquetOutputTimestampType.toString)
 
+    conf.set(
+      SQLConf.PARQUET_FIELD_ID_WRITE_ENABLED.key,
+      sparkSession.sessionState.conf.parquetFieldIdWriteEnabled.toString)
+
     // Sets compression scheme
     conf.set(ParquetOutputFormat.COMPRESSION, parquetOptions.compressionCodecClassName)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
index bdab0f7892f00..97e691ff7c66c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.execution.datasources.parquet
 
 import java.time.ZoneId
-import java.util.{Locale, Map => JMap}
+import java.util
+import java.util.{Locale, Map => JMap, UUID}
 
 import scala.collection.JavaConverters._
 
@@ -85,13 +86,71 @@ class ParquetReadSupport(
       StructType.fromString(schemaString)
     }
 
+    val parquetRequestedSchema = ParquetReadSupport.getRequestedSchema(
+      context.getFileSchema, catalystRequestedSchema, conf, enableVectorizedReader)
+    new ReadContext(parquetRequestedSchema, new util.HashMap[String, String]())
+  }
+
+  /**
+   * Called on executor side after [[init()]], before instantiating actual Parquet record readers.
+   * Responsible for instantiating [[RecordMaterializer]], which is used for converting Parquet
+   * records to Catalyst [[InternalRow]]s.
+   */
+  override def prepareForRead(
+      conf: Configuration,
+      keyValueMetaData: JMap[String, String],
+      fileSchema: MessageType,
+      readContext: ReadContext): RecordMaterializer[InternalRow] = {
+    val parquetRequestedSchema = readContext.getRequestedSchema
+    new ParquetRecordMaterializer(
+      parquetRequestedSchema,
+      ParquetReadSupport.expandUDT(catalystRequestedSchema),
+      new ParquetToSparkSchemaConverter(conf),
+      convertTz,
+      datetimeRebaseSpec,
+      int96RebaseSpec)
+  }
+}
+
+object ParquetReadSupport extends Logging {
+  val SPARK_ROW_REQUESTED_SCHEMA = "org.apache.spark.sql.parquet.row.requested_schema"
+
+  val SPARK_METADATA_KEY = "org.apache.spark.sql.parquet.row.metadata"
+
+  def generateFakeColumnName: String = s"_fake_name_${UUID.randomUUID()}"
+
+  def getRequestedSchema(
+      parquetFileSchema: MessageType,
+      catalystRequestedSchema: StructType,
+      conf: Configuration,
+      enableVectorizedReader: Boolean): MessageType = {
     val caseSensitive = conf.getBoolean(SQLConf.CASE_SENSITIVE.key,
       SQLConf.CASE_SENSITIVE.defaultValue.get)
     val schemaPruningEnabled = conf.getBoolean(SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key,
       SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.defaultValue.get)
-    val parquetFileSchema = context.getFileSchema
+    val useFieldId = conf.getBoolean(SQLConf.PARQUET_FIELD_ID_READ_ENABLED.key,
+      SQLConf.PARQUET_FIELD_ID_READ_ENABLED.defaultValue.get)
+    val ignoreMissingIds = conf.getBoolean(SQLConf.IGNORE_MISSING_PARQUET_FIELD_ID.key,
+      SQLConf.IGNORE_MISSING_PARQUET_FIELD_ID.defaultValue.get)
+
+    if (!ignoreMissingIds &&
+        !containsFieldIds(parquetFileSchema) &&
+        ParquetUtils.hasFieldIds(catalystRequestedSchema)) {
+      throw new RuntimeException(
+        "Spark read schema expects field Ids, " +
+          "but Parquet file schema doesn't contain any field Ids.\n" +
+        "Please remove the field ids from Spark schema or ignore missing ids by " +
+          "setting `spark.sql.parquet.fieldId.ignoreMissing = true`\n" +
+        s"""
+           |Spark read schema:
+           |${catalystRequestedSchema.prettyJson}
+           |
+           |Parquet file schema:
+           |${parquetFileSchema.toString}
+           |""".stripMargin)
+    }
     val parquetClippedSchema = ParquetReadSupport.clipParquetSchema(parquetFileSchema,
-      catalystRequestedSchema, caseSensitive)
+      catalystRequestedSchema, caseSensitive, useFieldId)
 
     // We pass two schema to ParquetRecordMaterializer:
     // - parquetRequestedSchema: the schema of the file data we want to read
@@ -109,6 +168,7 @@ class ParquetReadSupport(
       // in parquetRequestedSchema which are not present in the file.
       parquetClippedSchema
     }
+
     logDebug(
       s"""Going to read the following fields from the Parquet file with the following schema:
          |Parquet file schema:
@@ -120,34 +180,20 @@ class ParquetReadSupport(
          |Catalyst requested schema:
          |${catalystRequestedSchema.treeString}
        """.stripMargin)
-    new ReadContext(parquetRequestedSchema, Map.empty[String, String].asJava)
+
+    parquetRequestedSchema
   }
 
   /**
-   * Called on executor side after [[init()]], before instantiating actual Parquet record readers.
-   * Responsible for instantiating [[RecordMaterializer]], which is used for converting Parquet
-   * records to Catalyst [[InternalRow]]s.
+   * Overloaded method for backward compatibility with
+   * `caseSensitive` default to `true` and `useFieldId` default to `false`
    */
-  override def prepareForRead(
-      conf: Configuration,
-      keyValueMetaData: JMap[String, String],
-      fileSchema: MessageType,
-      readContext: ReadContext): RecordMaterializer[InternalRow] = {
-    val parquetRequestedSchema = readContext.getRequestedSchema
-    new ParquetRecordMaterializer(
-      parquetRequestedSchema,
-      ParquetReadSupport.expandUDT(catalystRequestedSchema),
-      new ParquetToSparkSchemaConverter(conf),
-      convertTz,
-      datetimeRebaseSpec,
-      int96RebaseSpec)
+  def clipParquetSchema(
+      parquetSchema: MessageType,
+      catalystSchema: StructType,
+      caseSensitive: Boolean = true): MessageType = {
+    clipParquetSchema(parquetSchema, catalystSchema, caseSensitive, useFieldId = false)
   }
-}
-
-object ParquetReadSupport {
-  val SPARK_ROW_REQUESTED_SCHEMA = "org.apache.spark.sql.parquet.row.requested_schema"
-
-  val SPARK_METADATA_KEY = "org.apache.spark.sql.parquet.row.metadata"
 
   /**
    * Tailors `parquetSchema` according to `catalystSchema` by removing column paths don't exist
@@ -156,9 +202,10 @@ object ParquetReadSupport {
   def clipParquetSchema(
       parquetSchema: MessageType,
       catalystSchema: StructType,
-      caseSensitive: Boolean = true): MessageType = {
+      caseSensitive: Boolean,
+      useFieldId: Boolean): MessageType = {
     val clippedParquetFields = clipParquetGroupFields(
-      parquetSchema.asGroupType(), catalystSchema, caseSensitive)
+      parquetSchema.asGroupType(), catalystSchema, caseSensitive, useFieldId)
     if (clippedParquetFields.isEmpty) {
       ParquetSchemaConverter.EMPTY_MESSAGE
     } else {
@@ -170,26 +217,36 @@ object ParquetReadSupport {
   }
 
   private def clipParquetType(
-      parquetType: Type, catalystType: DataType, caseSensitive: Boolean): Type = {
-    catalystType match {
+      parquetType: Type,
+      catalystType: DataType,
+      caseSensitive: Boolean,
+      useFieldId: Boolean): Type = {
+    val newParquetType = catalystType match {
       case t: ArrayType if !isPrimitiveCatalystType(t.elementType) =>
         // Only clips array types with nested type as element type.
-        clipParquetListType(parquetType.asGroupType(), t.elementType, caseSensitive)
+        clipParquetListType(parquetType.asGroupType(), t.elementType, caseSensitive, useFieldId)
 
       case t: MapType
         if !isPrimitiveCatalystType(t.keyType) ||
            !isPrimitiveCatalystType(t.valueType) =>
         // Only clips map types with nested key type or value type
-        clipParquetMapType(parquetType.asGroupType(), t.keyType, t.valueType, caseSensitive)
+        clipParquetMapType(
+          parquetType.asGroupType(), t.keyType, t.valueType, caseSensitive, useFieldId)
 
       case t: StructType =>
-        clipParquetGroup(parquetType.asGroupType(), t, caseSensitive)
+        clipParquetGroup(parquetType.asGroupType(), t, caseSensitive, useFieldId)
 
       case _ =>
         // UDTs and primitive types are not clipped.  For UDTs, a clipped version might not be able
         // to be mapped to desired user-space types.  So UDTs shouldn't participate schema merging.
         parquetType
     }
+
+    if (useFieldId && parquetType.getId != null) {
+      newParquetType.withId(parquetType.getId.intValue())
+    } else {
+      newParquetType
+    }
   }
 
   /**
@@ -210,7 +267,10 @@ object ParquetReadSupport {
    * [[StructType]].
    */
   private def clipParquetListType(
-      parquetList: GroupType, elementType: DataType, caseSensitive: Boolean): Type = {
+      parquetList: GroupType,
+      elementType: DataType,
+      caseSensitive: Boolean,
+      useFieldId: Boolean): Type = {
     // Precondition of this method, should only be called for lists with nested element types.
     assert(!isPrimitiveCatalystType(elementType))
 
@@ -218,7 +278,7 @@ object ParquetReadSupport {
     // list element type is just the group itself.  Clip it.
     if (parquetList.getLogicalTypeAnnotation == null &&
       parquetList.isRepetition(Repetition.REPEATED)) {
-      clipParquetType(parquetList, elementType, caseSensitive)
+      clipParquetType(parquetList, elementType, caseSensitive, useFieldId)
     } else {
       assert(
         parquetList.getLogicalTypeAnnotation.isInstanceOf[ListLogicalTypeAnnotation],
@@ -250,19 +310,28 @@ object ParquetReadSupport {
         Types
           .buildGroup(parquetList.getRepetition)
           .as(LogicalTypeAnnotation.listType())
-          .addField(clipParquetType(repeatedGroup, elementType, caseSensitive))
+          .addField(clipParquetType(repeatedGroup, elementType, caseSensitive, useFieldId))
           .named(parquetList.getName)
       } else {
+        val newRepeatedGroup = Types
+          .repeatedGroup()
+          .addField(
+            clipParquetType(
+              repeatedGroup.getType(0), elementType, caseSensitive, useFieldId))
+          .named(repeatedGroup.getName)
+
+        val newElementType = if (useFieldId && repeatedGroup.getId != null) {
+          newRepeatedGroup.withId(repeatedGroup.getId.intValue())
+        } else {
+          newRepeatedGroup
+        }
+
         // Otherwise, the repeated field's type is the element type with the repeated field's
         // repetition.
         Types
           .buildGroup(parquetList.getRepetition)
           .as(LogicalTypeAnnotation.listType())
-          .addField(
-            Types
-              .repeatedGroup()
-              .addField(clipParquetType(repeatedGroup.getType(0), elementType, caseSensitive))
-              .named(repeatedGroup.getName))
+          .addField(newElementType)
           .named(parquetList.getName)
       }
     }
@@ -277,7 +346,8 @@ object ParquetReadSupport {
       parquetMap: GroupType,
       keyType: DataType,
       valueType: DataType,
-      caseSensitive: Boolean): GroupType = {
+      caseSensitive: Boolean,
+      useFieldId: Boolean): GroupType = {
     // Precondition of this method, only handles maps with nested key types or value types.
     assert(!isPrimitiveCatalystType(keyType) || !isPrimitiveCatalystType(valueType))
 
@@ -285,13 +355,19 @@ object ParquetReadSupport {
     val parquetKeyType = repeatedGroup.getType(0)
     val parquetValueType = repeatedGroup.getType(1)
 
-    val clippedRepeatedGroup =
-      Types
+    val clippedRepeatedGroup = {
+      val newRepeatedGroup = Types
         .repeatedGroup()
         .as(repeatedGroup.getLogicalTypeAnnotation)
-        .addField(clipParquetType(parquetKeyType, keyType, caseSensitive))
-        .addField(clipParquetType(parquetValueType, valueType, caseSensitive))
+        .addField(clipParquetType(parquetKeyType, keyType, caseSensitive, useFieldId))
+        .addField(clipParquetType(parquetValueType, valueType, caseSensitive, useFieldId))
         .named(repeatedGroup.getName)
+      if (useFieldId && repeatedGroup.getId != null) {
+        newRepeatedGroup.withId(repeatedGroup.getId.intValue())
+      } else {
+        newRepeatedGroup
+      }
+    }
 
     Types
       .buildGroup(parquetMap.getRepetition)
@@ -309,8 +385,12 @@ object ParquetReadSupport {
    *       pruning.
    */
   private def clipParquetGroup(
-      parquetRecord: GroupType, structType: StructType, caseSensitive: Boolean): GroupType = {
-    val clippedParquetFields = clipParquetGroupFields(parquetRecord, structType, caseSensitive)
+      parquetRecord: GroupType,
+      structType: StructType,
+      caseSensitive: Boolean,
+      useFieldId: Boolean): GroupType = {
+    val clippedParquetFields =
+      clipParquetGroupFields(parquetRecord, structType, caseSensitive, useFieldId)
     Types
       .buildGroup(parquetRecord.getRepetition)
       .as(parquetRecord.getLogicalTypeAnnotation)
@@ -324,23 +404,29 @@ object ParquetReadSupport {
    * @return A list of clipped [[GroupType]] fields, which can be empty.
    */
   private def clipParquetGroupFields(
-      parquetRecord: GroupType, structType: StructType, caseSensitive: Boolean): Seq[Type] = {
-    val toParquet = new SparkToParquetSchemaConverter(writeLegacyParquetFormat = false)
-    if (caseSensitive) {
-      val caseSensitiveParquetFieldMap =
+      parquetRecord: GroupType,
+      structType: StructType,
+      caseSensitive: Boolean,
+      useFieldId: Boolean): Seq[Type] = {
+    val toParquet = new SparkToParquetSchemaConverter(
+      writeLegacyParquetFormat = false, useFieldId = useFieldId)
+    lazy val caseSensitiveParquetFieldMap =
         parquetRecord.getFields.asScala.map(f => f.getName -> f).toMap
-      structType.map { f =>
-        caseSensitiveParquetFieldMap
+    lazy val caseInsensitiveParquetFieldMap =
+        parquetRecord.getFields.asScala.groupBy(_.getName.toLowerCase(Locale.ROOT))
+    lazy val idToParquetFieldMap =
+        parquetRecord.getFields.asScala.filter(_.getId != null).groupBy(f => f.getId.intValue())
+
+    def matchCaseSensitiveField(f: StructField): Type = {
+      caseSensitiveParquetFieldMap
           .get(f.name)
-          .map(clipParquetType(_, f.dataType, caseSensitive))
+          .map(clipParquetType(_, f.dataType, caseSensitive, useFieldId))
           .getOrElse(toParquet.convertField(f))
-      }
-    } else {
+    }
+
+    def matchCaseInsensitiveField(f: StructField): Type = {
       // Do case-insensitive resolution only if in case-insensitive mode
-      val caseInsensitiveParquetFieldMap =
-        parquetRecord.getFields.asScala.groupBy(_.getName.toLowerCase(Locale.ROOT))
-      structType.map { f =>
-        caseInsensitiveParquetFieldMap
+      caseInsensitiveParquetFieldMap
           .get(f.name.toLowerCase(Locale.ROOT))
           .map { parquetTypes =>
             if (parquetTypes.size > 1) {
@@ -349,9 +435,39 @@ object ParquetReadSupport {
               throw QueryExecutionErrors.foundDuplicateFieldInCaseInsensitiveModeError(
                 f.name, parquetTypesString)
             } else {
-              clipParquetType(parquetTypes.head, f.dataType, caseSensitive)
+              clipParquetType(parquetTypes.head, f.dataType, caseSensitive, useFieldId)
             }
           }.getOrElse(toParquet.convertField(f))
+    }
+
+    def matchIdField(f: StructField): Type = {
+      val fieldId = ParquetUtils.getFieldId(f)
+      idToParquetFieldMap
+        .get(fieldId)
+        .map { parquetTypes =>
+          if (parquetTypes.size > 1) {
+            // Need to fail if there is ambiguity, i.e. more than one field is matched
+            val parquetTypesString = parquetTypes.map(_.getName).mkString("[", ", ", "]")
+            throw QueryExecutionErrors.foundDuplicateFieldInFieldIdLookupModeError(
+              fieldId, parquetTypesString)
+          } else {
+            clipParquetType(parquetTypes.head, f.dataType, caseSensitive, useFieldId)
+          }
+        }.getOrElse {
+          // When there is no ID match, we use a fake name to avoid a name match by accident
+          // We need this name to be unique as well, otherwise there will be type conflicts
+          toParquet.convertField(f.copy(name = generateFakeColumnName))
+        }
+    }
+
+    val shouldMatchById = useFieldId && ParquetUtils.hasFieldIds(structType)
+    structType.map { f =>
+      if (shouldMatchById && ParquetUtils.hasFieldId(f)) {
+        matchIdField(f)
+      } else if (caseSensitive) {
+        matchCaseSensitiveField(f)
+      } else {
+        matchCaseInsensitiveField(f)
       }
     }
   }
@@ -410,4 +526,13 @@ object ParquetReadSupport {
 
     expand(schema).asInstanceOf[StructType]
   }
+
+  /**
+   * Whether the parquet schema contains any field IDs.
+   */
+  def containsFieldIds(schema: Type): Boolean = schema match {
+    case p: PrimitiveType => p.getId != null
+    // We don't require all fields to have IDs, so we use `exists` here.
+    case g: GroupType => g.getId != null || g.getFields.asScala.exists(containsFieldIds)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
index b12898360dcf4..63ad5ed6db82e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
@@ -203,16 +203,38 @@ private[parquet] class ParquetRowConverter(
   private[this] val fieldConverters: Array[Converter with HasParentContainerUpdater] = {
     // (SPARK-31116) Use case insensitive map if spark.sql.caseSensitive is false
     // to prevent throwing IllegalArgumentException when searching catalyst type's field index
-    val catalystFieldNameToIndex = if (SQLConf.get.caseSensitiveAnalysis) {
-      catalystType.fieldNames.zipWithIndex.toMap
+    def nameToIndex: Map[String, Int] = catalystType.fieldNames.zipWithIndex.toMap
+
+    val catalystFieldIdxByName = if (SQLConf.get.caseSensitiveAnalysis) {
+      nameToIndex
     } else {
-      CaseInsensitiveMap(catalystType.fieldNames.zipWithIndex.toMap)
+      CaseInsensitiveMap(nameToIndex)
     }
+
+    // (SPARK-38094) parquet field ids, if exist, should be prioritized for matching
+    val catalystFieldIdxByFieldId =
+      if (SQLConf.get.parquetFieldIdReadEnabled && ParquetUtils.hasFieldIds(catalystType)) {
+        catalystType.fields
+          .zipWithIndex
+          .filter { case (f, _) => ParquetUtils.hasFieldId(f) }
+          .map { case (f, idx) => (ParquetUtils.getFieldId(f), idx) }
+          .toMap
+      } else {
+        Map.empty[Int, Int]
+      }
+
     parquetType.getFields.asScala.map { parquetField =>
-      val fieldIndex = catalystFieldNameToIndex(parquetField.getName)
-      val catalystField = catalystType(fieldIndex)
+      val catalystFieldIndex = Option(parquetField.getId).flatMap { fieldId =>
+        // field has id, try to match by id first before falling back to match by name
+        catalystFieldIdxByFieldId.get(fieldId.intValue())
+      }.getOrElse {
+        // field doesn't have id, just match by name
+        catalystFieldIdxByName(parquetField.getName)
+      }
+      val catalystField = catalystType(catalystFieldIndex)
       // Converted field value should be set to the `fieldIndex`-th cell of `currentRow`
-      newConverter(parquetField, catalystField.dataType, new RowUpdater(currentRow, fieldIndex))
+      newConverter(parquetField,
+        catalystField.dataType, new RowUpdater(currentRow, catalystFieldIndex))
     }.toArray
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
index cb5d646f85e9e..34a4eb8c002d6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
@@ -434,20 +434,25 @@ class ParquetToSparkSchemaConverter(
  *        When set to false, use standard format defined in parquet-format spec.  This argument only
  *        affects Parquet write path.
  * @param outputTimestampType which parquet timestamp type to use when writing.
+ * @param useFieldId whether we should include write field id to Parquet schema. Set this to false
+ *        via `spark.sql.parquet.fieldId.write.enabled = false` to disable writing field ids.
  */
 class SparkToParquetSchemaConverter(
     writeLegacyParquetFormat: Boolean = SQLConf.PARQUET_WRITE_LEGACY_FORMAT.defaultValue.get,
     outputTimestampType: SQLConf.ParquetOutputTimestampType.Value =
-      SQLConf.ParquetOutputTimestampType.INT96) {
+      SQLConf.ParquetOutputTimestampType.INT96,
+    useFieldId: Boolean = SQLConf.PARQUET_FIELD_ID_WRITE_ENABLED.defaultValue.get) {
 
   def this(conf: SQLConf) = this(
     writeLegacyParquetFormat = conf.writeLegacyParquetFormat,
-    outputTimestampType = conf.parquetOutputTimestampType)
+    outputTimestampType = conf.parquetOutputTimestampType,
+    useFieldId = conf.parquetFieldIdWriteEnabled)
 
   def this(conf: Configuration) = this(
     writeLegacyParquetFormat = conf.get(SQLConf.PARQUET_WRITE_LEGACY_FORMAT.key).toBoolean,
     outputTimestampType = SQLConf.ParquetOutputTimestampType.withName(
-      conf.get(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key)))
+      conf.get(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key)),
+    useFieldId = conf.get(SQLConf.PARQUET_FIELD_ID_WRITE_ENABLED.key).toBoolean)
 
   /**
    * Converts a Spark SQL [[StructType]] to a Parquet [[MessageType]].
@@ -463,7 +468,12 @@ class SparkToParquetSchemaConverter(
    * Converts a Spark SQL [[StructField]] to a Parquet [[Type]].
    */
   def convertField(field: StructField): Type = {
-    convertField(field, if (field.nullable) OPTIONAL else REQUIRED)
+    val converted = convertField(field, if (field.nullable) OPTIONAL else REQUIRED)
+    if (useFieldId && ParquetUtils.hasFieldId(field)) {
+      converted.withId(ParquetUtils.getFieldId(field))
+    } else {
+      converted
+    }
   }
 
   private def convertField(field: StructField, repetition: Type.Repetition): Type = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
index 63c529e3542f2..2c565c8890e70 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.connector.expressions.aggregate.{Aggregation, Count,
 import org.apache.spark.sql.execution.datasources.AggregatePushDownUtils
 import org.apache.spark.sql.execution.datasources.v2.V2ColumnUtils
 import org.apache.spark.sql.internal.SQLConf.{LegacyBehaviorPolicy, PARQUET_AGGREGATE_PUSHDOWN_ENABLED}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructField, StructType}
 
 object ParquetUtils {
   def inferSchema(
@@ -145,6 +145,48 @@ object ParquetUtils {
       file.getName == ParquetFileWriter.PARQUET_METADATA_FILE
   }
 
+  /**
+   * A StructField metadata key used to set the field id of a column in the Parquet schema.
+   */
+  val FIELD_ID_METADATA_KEY = "parquet.field.id"
+
+  /**
+   * Whether there exists a field in the schema, whether inner or leaf, has the parquet field
+   * ID metadata.
+   */
+  def hasFieldIds(schema: StructType): Boolean = {
+    def recursiveCheck(schema: DataType): Boolean = {
+      schema match {
+        case st: StructType =>
+          st.exists(field => hasFieldId(field) || recursiveCheck(field.dataType))
+
+        case at: ArrayType => recursiveCheck(at.elementType)
+
+        case mt: MapType => recursiveCheck(mt.keyType) || recursiveCheck(mt.valueType)
+
+        case _ =>
+          // No need to really check primitive types, just to terminate the recursion
+          false
+      }
+    }
+    if (schema.isEmpty) false else recursiveCheck(schema)
+  }
+
+  def hasFieldId(field: StructField): Boolean =
+    field.metadata.contains(FIELD_ID_METADATA_KEY)
+
+  def getFieldId(field: StructField): Int = {
+    require(hasFieldId(field),
+      s"The key `$FIELD_ID_METADATA_KEY` doesn't exist in the metadata of " + field)
+    try {
+      Math.toIntExact(field.metadata.getLong(FIELD_ID_METADATA_KEY))
+    } catch {
+      case _: ArithmeticException | _: ClassCastException =>
+        throw new IllegalArgumentException(
+          s"The key `$FIELD_ID_METADATA_KEY` must be a 32-bit integer")
+    }
+  }
+
   /**
    * When the partial aggregates (Max/Min/Count) are pushed down to Parquet, we don't need to
    * createRowBaseReader to read data from Parquet and aggregate at Spark layer. Instead we want
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala
index 0316d91f40732..d84acedb962e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala
@@ -81,6 +81,10 @@ case class ParquetWrite(
 
     conf.set(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key, sqlConf.parquetOutputTimestampType.toString)
 
+    conf.set(
+      SQLConf.PARQUET_FIELD_ID_WRITE_ENABLED.key,
+      sqlConf.parquetFieldIdWriteEnabled.toString)
+
     // Sets compression scheme
     conf.set(ParquetOutputFormat.COMPRESSION, parquetOptions.compressionCodecClassName)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala
new file mode 100644
index 0000000000000..ff0bb2f92d208
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala
@@ -0,0 +1,213 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{IntegerType, Metadata, MetadataBuilder, StringType, StructType}
+
+class ParquetFieldIdIOSuite extends QueryTest with ParquetTest with SharedSparkSession  {
+
+  private def withId(id: Int): Metadata =
+    new MetadataBuilder().putLong(ParquetUtils.FIELD_ID_METADATA_KEY, id).build()
+
+  test("Parquet reads infer fields using field ids correctly") {
+    withTempDir { dir =>
+      val readSchema =
+        new StructType()
+          .add("a", StringType, true, withId(0))
+          .add("b", IntegerType, true, withId(1))
+
+      val readSchemaMixed =
+        new StructType()
+          .add("name", StringType, true)
+          .add("b", IntegerType, true, withId(1))
+
+      val readSchemaMixedHalfMatched =
+        new StructType()
+          .add("unmatched", StringType, true)
+          .add("b", IntegerType, true, withId(1))
+
+      val writeSchema =
+        new StructType()
+          .add("random", IntegerType, true, withId(1))
+          .add("name", StringType, true, withId(0))
+
+      val readData = Seq(Row("text", 100), Row("more", 200))
+      val readDataHalfMatched = Seq(Row(null, 100), Row(null, 200))
+      val writeData = Seq(Row(100, "text"), Row(200, "more"))
+      spark.createDataFrame(writeData.asJava, writeSchema)
+        .write.mode("overwrite").parquet(dir.getCanonicalPath)
+
+      withAllParquetReaders {
+        // read with schema
+        checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath), readData)
+        checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath)
+          .where("b < 50"), Seq.empty)
+        checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath)
+          .where("a >= 'oh'"), Row("text", 100) :: Nil)
+        // read with mixed field-id/name schema
+        checkAnswer(spark.read.schema(readSchemaMixed).parquet(dir.getCanonicalPath), readData)
+        checkAnswer(spark.read.schema(readSchemaMixedHalfMatched)
+          .parquet(dir.getCanonicalPath), readDataHalfMatched)
+
+        // schema inference should pull into the schema with ids
+        val reader = spark.read.parquet(dir.getCanonicalPath)
+        assert(reader.schema == writeSchema)
+        checkAnswer(reader.where("name >= 'oh'"), Row(100, "text") :: Nil)
+      }
+    }
+  }
+
+  test("absence of field ids") {
+    withTempDir { dir =>
+      val readSchema =
+        new StructType()
+          .add("a", IntegerType, true, withId(1))
+          .add("b", StringType, true, withId(2))
+          .add("c", IntegerType, true, withId(3))
+
+      val writeSchema =
+        new StructType()
+          .add("a", IntegerType, true, withId(3))
+          .add("randomName", StringType, true)
+
+      val writeData = Seq(Row(100, "text"), Row(200, "more"))
+
+      spark.createDataFrame(writeData.asJava, writeSchema)
+        .write.mode("overwrite").parquet(dir.getCanonicalPath)
+
+      withAllParquetReaders {
+        checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath),
+          // 3 different cases for the 3 columns to read:
+          //   - a: ID 1 is not found, but there is column with name `a`, still return null
+          //   - b: ID 2 is not found, return null
+          //   - c: ID 3 is found, read it
+          Row(null, null, 100) :: Row(null, null, 200) :: Nil)
+      }
+    }
+  }
+
+  test("multiple id matches") {
+    withTempDir { dir =>
+      val readSchema =
+        new StructType()
+          .add("a", IntegerType, true, withId(1))
+
+      val writeSchema =
+        new StructType()
+          .add("a", IntegerType, true, withId(1))
+          .add("rand1", StringType, true, withId(2))
+          .add("rand2", StringType, true, withId(1))
+
+      val writeData = Seq(Row(100, "text", "txt"), Row(200, "more", "mr"))
+
+      spark.createDataFrame(writeData.asJava, writeSchema)
+        .write.mode("overwrite").parquet(dir.getCanonicalPath)
+
+      withAllParquetReaders {
+        val cause = intercept[SparkException] {
+          spark.read.schema(readSchema).parquet(dir.getCanonicalPath).collect()
+        }.getCause
+        assert(cause.isInstanceOf[RuntimeException] &&
+          cause.getMessage.contains("Found duplicate field(s)"))
+      }
+    }
+  }
+
+  test("read parquet file without ids") {
+    withTempDir { dir =>
+      val readSchema =
+        new StructType()
+          .add("a", IntegerType, true, withId(1))
+
+      val writeSchema =
+        new StructType()
+          .add("a", IntegerType, true)
+          .add("rand1", StringType, true)
+          .add("rand2", StringType, true)
+
+      val writeData = Seq(Row(100, "text", "txt"), Row(200, "more", "mr"))
+      spark.createDataFrame(writeData.asJava, writeSchema)
+        .write.mode("overwrite").parquet(dir.getCanonicalPath)
+      withAllParquetReaders {
+        Seq(readSchema, readSchema.add("b", StringType, true)).foreach { schema =>
+          val cause = intercept[SparkException] {
+            spark.read.schema(schema).parquet(dir.getCanonicalPath).collect()
+          }.getCause
+          assert(cause.isInstanceOf[RuntimeException] &&
+            cause.getMessage.contains("Parquet file schema doesn't contain any field Ids"))
+          val expectedValues = (1 to schema.length).map(_ => null)
+          withSQLConf(SQLConf.IGNORE_MISSING_PARQUET_FIELD_ID.key -> "true") {
+            checkAnswer(
+              spark.read.schema(schema).parquet(dir.getCanonicalPath),
+              Row(expectedValues: _*) :: Row(expectedValues: _*) :: Nil)
+          }
+        }
+      }
+    }
+  }
+
+  test("global read/write flag should work correctly") {
+    withTempDir { dir =>
+      val readSchema =
+        new StructType()
+          .add("some", IntegerType, true, withId(1))
+          .add("other", StringType, true, withId(2))
+          .add("name", StringType, true, withId(3))
+
+      val writeSchema =
+          new StructType()
+            .add("a", IntegerType, true, withId(1))
+            .add("rand1", StringType, true, withId(2))
+            .add("rand2", StringType, true, withId(3))
+
+      val writeData = Seq(Row(100, "text", "txt"), Row(200, "more", "mr"))
+
+      val expectedResult = Seq(Row(null, null, null), Row(null, null, null))
+
+      withSQLConf(SQLConf.PARQUET_FIELD_ID_WRITE_ENABLED.key -> "false",
+        SQLConf.PARQUET_FIELD_ID_READ_ENABLED.key -> "true") {
+        spark.createDataFrame(writeData.asJava, writeSchema)
+          .write.mode("overwrite").parquet(dir.getCanonicalPath)
+        withAllParquetReaders {
+          // no field id found exception
+          val cause = intercept[SparkException] {
+            spark.read.schema(readSchema).parquet(dir.getCanonicalPath).collect()
+          }.getCause
+          assert(cause.isInstanceOf[RuntimeException] &&
+            cause.getMessage.contains("Parquet file schema doesn't contain any field Ids"))
+        }
+      }
+
+      withSQLConf(SQLConf.PARQUET_FIELD_ID_WRITE_ENABLED.key -> "true",
+        SQLConf.PARQUET_FIELD_ID_READ_ENABLED.key -> "false") {
+        spark.createDataFrame(writeData.asJava, writeSchema)
+          .write.mode("overwrite").parquet(dir.getCanonicalPath)
+        withAllParquetReaders {
+          // ids are there, but we don't use id for matching, so no results would be returned
+          checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath), expectedResult)
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdSchemaSuite.scala
new file mode 100644
index 0000000000000..b3babdd3a0cff
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdSchemaSuite.scala
@@ -0,0 +1,528 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet
+
+import scala.collection.JavaConverters._
+
+import org.apache.parquet.schema.{MessageType, MessageTypeParser}
+
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+class ParquetFieldIdSchemaSuite extends ParquetSchemaTest {
+
+  private val FAKE_COLUMN_NAME = "_fake_name_"
+  private val UUID_REGEX =
+    "[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}".r
+
+  private def withId(id: Int) =
+    new MetadataBuilder().putLong(ParquetUtils.FIELD_ID_METADATA_KEY, id).build()
+
+  private def testSchemaClipping(
+      testName: String,
+      parquetSchema: String,
+      catalystSchema: StructType,
+      expectedSchema: String,
+      caseSensitive: Boolean = true,
+      useFieldId: Boolean = true): Unit = {
+    test(s"Clipping with field id - $testName") {
+      val fileSchema = MessageTypeParser.parseMessageType(parquetSchema)
+      val actual = ParquetReadSupport.clipParquetSchema(
+        fileSchema,
+        catalystSchema,
+        caseSensitive = caseSensitive,
+        useFieldId = useFieldId)
+
+      // each fake name should be uniquely generated
+      val fakeColumnNames = actual.getPaths.asScala.flatten.filter(_.startsWith(FAKE_COLUMN_NAME))
+      assert(
+        fakeColumnNames.distinct == fakeColumnNames, "Should generate unique fake column names")
+
+      // replace the random part of all fake names with a fixed id generator
+      val ids1 = (1 to 100).iterator
+      val actualNormalized = MessageTypeParser.parseMessageType(
+        UUID_REGEX.replaceAllIn(actual.toString, _ => ids1.next().toString)
+      )
+      val ids2 = (1 to 100).iterator
+      val expectedNormalized = MessageTypeParser.parseMessageType(
+        FAKE_COLUMN_NAME.r.replaceAllIn(expectedSchema, _ => s"$FAKE_COLUMN_NAME${ids2.next()}")
+      )
+
+      try {
+        expectedNormalized.checkContains(actualNormalized)
+        actualNormalized.checkContains(expectedNormalized)
+      } catch { case cause: Throwable =>
+        fail(
+          s"""Expected clipped schema:
+             |$expectedSchema
+             |Actual clipped schema:
+             |$actual
+           """.stripMargin,
+          cause)
+      }
+      checkEqual(actualNormalized, expectedNormalized)
+      // might be redundant but just to have some free tests for the utils
+      assert(ParquetReadSupport.containsFieldIds(fileSchema))
+      assert(ParquetUtils.hasFieldIds(catalystSchema))
+    }
+  }
+
+  private def testSqlToParquet(
+    testName: String,
+    sqlSchema: StructType,
+    parquetSchema: String): Unit = {
+    val converter = new SparkToParquetSchemaConverter(
+      writeLegacyParquetFormat = false,
+      outputTimestampType = SQLConf.ParquetOutputTimestampType.INT96,
+      useFieldId = true)
+
+    test(s"sql => parquet: $testName") {
+      val actual = converter.convert(sqlSchema)
+      val expected = MessageTypeParser.parseMessageType(parquetSchema)
+      checkEqual(actual, expected)
+    }
+  }
+
+  private def checkEqual(actual: MessageType, expected: MessageType): Unit = {
+    actual.checkContains(expected)
+    expected.checkContains(actual)
+    assert(actual.toString == expected.toString,
+      s"""
+         |Schema mismatch.
+         |Expected schema:
+         |${expected.toString}
+         |Actual schema:
+         |${actual.toString}
+         """.stripMargin
+    )
+  }
+
+  test("check hasFieldIds for schema") {
+    val simpleSchemaMissingId = new StructType()
+      .add("f010", DoubleType, nullable = true, withId(7))
+      .add("f012", LongType, nullable = true)
+
+    assert(ParquetUtils.hasFieldIds(simpleSchemaMissingId))
+
+    val f01ElementType = new StructType()
+      .add("f010", DoubleType, nullable = true, withId(7))
+      .add("f012", LongType, nullable = true, withId(8))
+
+    assert(ParquetUtils.hasFieldIds(f01ElementType))
+
+    val f0Type = new StructType()
+      .add("f00", ArrayType(StringType, containsNull = false), nullable = true, withId(2))
+      .add("f01", ArrayType(f01ElementType, containsNull = false), nullable = true)
+
+    assert(ParquetUtils.hasFieldIds(f0Type))
+
+    assert(ParquetUtils.hasFieldIds(
+      new StructType().add("f0", f0Type, nullable = false, withId(1))))
+
+    assert(!ParquetUtils.hasFieldIds(new StructType().add("f0", IntegerType, nullable = true)))
+    assert(!ParquetUtils.hasFieldIds(new StructType()));
+  }
+
+  test("check getFieldId for schema") {
+    val schema = new StructType()
+      .add("overflowId", DoubleType, nullable = true,
+        new MetadataBuilder()
+          .putLong(ParquetUtils.FIELD_ID_METADATA_KEY, 12345678987654321L).build())
+      .add("stringId", StringType, nullable = true,
+        new MetadataBuilder()
+          .putString(ParquetUtils.FIELD_ID_METADATA_KEY, "lol").build())
+      .add("negativeId", LongType, nullable = true, withId(-20))
+      .add("noId", LongType, nullable = true)
+
+    assert(intercept[IllegalArgumentException] {
+      ParquetUtils.getFieldId(schema.findNestedField(Seq("noId")).get._2)
+    }.getMessage.contains("doesn't exist"))
+
+    assert(intercept[IllegalArgumentException] {
+      ParquetUtils.getFieldId(schema.findNestedField(Seq("overflowId")).get._2)
+    }.getMessage.contains("must be a 32-bit integer"))
+
+    assert(intercept[IllegalArgumentException] {
+      ParquetUtils.getFieldId(schema.findNestedField(Seq("stringId")).get._2)
+    }.getMessage.contains("must be a 32-bit integer"))
+
+    // negative id allowed
+    assert(ParquetUtils.getFieldId(schema.findNestedField(Seq("negativeId")).get._2) == -20)
+  }
+
+  test("check containsFieldIds for parquet schema") {
+
+    // empty Parquet schema fails too
+    assert(
+      !ParquetReadSupport.containsFieldIds(
+        MessageTypeParser.parseMessageType(
+          """message root {
+             |}
+          """.stripMargin)))
+
+    assert(
+      !ParquetReadSupport.containsFieldIds(
+        MessageTypeParser.parseMessageType(
+          """message root {
+            |  required group f0 {
+            |    optional int32 f00;
+            |  }
+            |}
+          """.stripMargin)))
+
+    assert(
+      ParquetReadSupport.containsFieldIds(
+        MessageTypeParser.parseMessageType(
+          """message root {
+            |  required group f0 = 1 {
+            |    optional int32 f00;
+            |    optional binary f01;
+            |  }
+            |}
+          """.stripMargin)))
+
+    assert(
+      ParquetReadSupport.containsFieldIds(
+        MessageTypeParser.parseMessageType(
+          """message root {
+            |  required group f0 {
+            |    optional int32 f00 = 1;
+            |    optional binary f01;
+            |  }
+            |}
+          """.stripMargin)))
+
+    assert(
+      !ParquetReadSupport.containsFieldIds(
+        MessageTypeParser.parseMessageType(
+          """message spark_schema {
+              |  required group f0 {
+              |    optional group f00 (LIST) {
+              |      repeated group list {
+              |        required binary element (UTF8);
+              |      }
+              |    }
+              |  }
+              |}
+            """.stripMargin)))
+
+    assert(
+      ParquetReadSupport.containsFieldIds(
+        MessageTypeParser.parseMessageType(
+          """message spark_schema {
+            |  required group f0 {
+            |    optional group f00 (LIST) {
+            |      repeated group list = 1 {
+            |        required binary element (UTF8);
+            |      }
+            |    }
+            |  }
+            |}
+            """.stripMargin)))
+  }
+
+  test("ID in Parquet Types is read as null when not set") {
+    val parquetSchemaString =
+      """message root {
+        |  required group f0 {
+        |    optional int32 f00;
+        |  }
+        |}
+      """.stripMargin
+
+    val parquetSchema = MessageTypeParser.parseMessageType(parquetSchemaString)
+    val f0 = parquetSchema.getFields().get(0)
+    assert(f0.getId() == null)
+    assert(f0.asGroupType().getFields.get(0).getId == null)
+  }
+
+  testSqlToParquet(
+    "standard array",
+    sqlSchema = {
+      val f01ElementType = new StructType()
+        .add("f010", DoubleType, nullable = true, withId(7))
+        .add("f012", LongType, nullable = true, withId(9))
+
+      val f0Type = new StructType()
+        .add("f00", ArrayType(StringType, containsNull = false), nullable = true, withId(2))
+        .add("f01", ArrayType(f01ElementType, containsNull = false), nullable = true, withId(5))
+
+      new StructType().add("f0", f0Type, nullable = false, withId(1))
+    },
+    parquetSchema =
+      """message spark_schema {
+        |  required group f0 = 1 {
+        |    optional group f00 (LIST) = 2 {
+        |      repeated group list {
+        |        required binary element (UTF8);
+        |      }
+        |    }
+        |
+        |    optional group f01 (LIST) = 5 {
+        |      repeated group list {
+        |        required group element {
+        |          optional double f010 = 7;
+        |          optional int64 f012 = 9;
+        |        }
+        |      }
+        |    }
+        |  }
+        |}
+      """.stripMargin)
+
+  testSchemaClipping(
+    "simple nested struct",
+
+    parquetSchema =
+      """message root {
+        |  required group f0 = 1 {
+        |    optional int32 f00 = 2;
+        |    optional int32 f01 = 3;
+        |  }
+        |}
+      """.stripMargin,
+
+    catalystSchema = {
+      val f0Type = new StructType().add(
+        "g00", IntegerType, nullable = true, withId(2))
+      new StructType()
+        .add("g0", f0Type, nullable = false, withId(1))
+        .add("g1", IntegerType, nullable = true, withId(4))
+    },
+
+    expectedSchema =
+      s"""message spark_schema {
+        |  required group f0 = 1 {
+        |    optional int32 f00 = 2;
+        |  }
+        |  optional int32 $FAKE_COLUMN_NAME = 4;
+        |}
+      """.stripMargin)
+
+  testSchemaClipping(
+    "standard array",
+
+    parquetSchema =
+      """message root {
+        |  required group f0 = 1 {
+        |    optional group f00 (LIST) = 2 {
+        |      repeated group list {
+        |        required binary element (UTF8);
+        |      }
+        |    }
+        |
+        |    optional group f01 (LIST) = 5 {
+        |      repeated group list {
+        |        required group element {
+        |          optional int32 f010 = 7;
+        |          optional double f011 = 8;
+        |        }
+        |      }
+        |    }
+        |  }
+        |}
+      """.stripMargin,
+
+    catalystSchema = {
+      val f01ElementType = new StructType()
+        .add("g011", DoubleType, nullable = true, withId(8))
+        .add("g012", LongType, nullable = true, withId(9))
+
+      val f0Type = new StructType()
+        .add("g00", ArrayType(StringType, containsNull = false), nullable = true, withId(2))
+        .add("g01", ArrayType(f01ElementType, containsNull = false), nullable = true, withId(5))
+
+      new StructType().add("g0", f0Type, nullable = false, withId(1))
+    },
+
+    expectedSchema =
+      s"""message spark_schema {
+        |  required group f0 = 1 {
+        |    optional group f00 (LIST) = 2 {
+        |      repeated group list {
+        |        required binary element (UTF8);
+        |      }
+        |    }
+        |
+        |    optional group f01 (LIST) = 5 {
+        |      repeated group list {
+        |        required group element {
+        |          optional double f011 = 8;
+        |          optional int64 $FAKE_COLUMN_NAME = 9;
+        |        }
+        |      }
+        |    }
+        |  }
+        |}
+      """.stripMargin)
+
+  testSchemaClipping(
+    "standard map with complex key",
+
+    parquetSchema =
+      """message root {
+        |  required group f0 (MAP) = 3 {
+        |    repeated group key_value = 1 {
+        |      required group key = 2 {
+        |        required int32 value_f0 = 4;
+        |        required int64 value_f1 = 6;
+        |      }
+        |      required int32 value = 5;
+        |    }
+        |  }
+        |}
+      """.stripMargin,
+
+    catalystSchema = {
+      val keyType =
+        new StructType()
+          .add("value_g1", LongType, nullable = false, withId(6))
+          .add("value_g2", DoubleType, nullable = false, withId(7))
+
+      val f0Type = MapType(keyType, IntegerType, valueContainsNull = false)
+
+      new StructType().add("g0", f0Type, nullable = false, withId(3))
+    },
+
+    expectedSchema =
+      s"""message spark_schema {
+        |  required group f0 (MAP) = 3 {
+        |    repeated group key_value = 1 {
+        |      required group key = 2 {
+        |        required int64 value_f1 = 6;
+        |        required double $FAKE_COLUMN_NAME = 7;
+        |      }
+        |      required int32 value = 5;
+        |    }
+        |  }
+        |}
+      """.stripMargin)
+
+  testSchemaClipping(
+    "won't match field id if structure is different",
+
+    parquetSchema =
+      """message root {
+        |  required group f0 = 1 {
+        |    optional int32 f00 = 2;
+        |  }
+        |  optional int32 f1 = 3;
+        |}
+      """.stripMargin,
+
+    catalystSchema = {
+      val f0Type = new StructType()
+        .add("g00", IntegerType, nullable = true, withId(2))
+        // parquet has id 3, but won't use because structure is different
+        .add("g01", IntegerType, nullable = true, withId(3))
+      new StructType()
+        .add("g0", f0Type, nullable = false, withId(1))
+    },
+
+    // note that f1 is not picked up, even though it's Id is 3
+    expectedSchema =
+      s"""message spark_schema {
+        |  required group f0 = 1 {
+        |    optional int32 f00 = 2;
+        |    optional int32 $FAKE_COLUMN_NAME = 3;
+        |  }
+        |}
+      """.stripMargin)
+
+  testSchemaClipping(
+    "Complex type with multiple mismatches should work",
+
+    parquetSchema =
+      """message root {
+        |  required group f0 = 1 {
+        |    optional int32 f00 = 2;
+        |  }
+        |  optional int32 f1 = 3;
+        |  optional int32 f2 = 4;
+        |}
+      """.stripMargin,
+
+    catalystSchema = {
+      val f0Type = new StructType()
+        .add("g00", IntegerType, nullable = true, withId(2))
+
+      new StructType()
+        .add("g0", f0Type, nullable = false, withId(999))
+        .add("g1", IntegerType, nullable = true, withId(3))
+        .add("g2", IntegerType, nullable = true, withId(888))
+    },
+
+    expectedSchema =
+      s"""message spark_schema {
+        |  required group $FAKE_COLUMN_NAME = 999 {
+        |    optional int32 g00 = 2;
+        |  }
+        |  optional int32 f1 = 3;
+        |  optional int32 $FAKE_COLUMN_NAME = 888;
+        |}
+      """.stripMargin)
+
+  testSchemaClipping(
+    "Should allow fall-back to name matching if id not found",
+
+    parquetSchema =
+      """message root {
+        |  required group f0 = 1 {
+        |    optional int32 f00 = 2;
+        |  }
+        |  optional int32 f1 = 3;
+        |  optional int32 f2 = 4;
+        |  required group f4 = 5 {
+        |    optional int32 f40 = 6;
+        |  }
+        |}
+      """.stripMargin,
+
+    catalystSchema = {
+      val f0Type = new StructType()
+        // nested f00 without id should also work
+        .add("f00", IntegerType, nullable = true)
+
+      val f4Type = new StructType()
+        .add("g40", IntegerType, nullable = true, withId(6))
+
+      new StructType()
+        .add("g0", f0Type, nullable = false, withId(1))
+        .add("g1", IntegerType, nullable = true, withId(3))
+        // f2 without id should be matched using name matching
+        .add("f2", IntegerType, nullable = true)
+        // name is not matched
+        .add("g2", IntegerType, nullable = true)
+        // f4 without id will do name matching, but g40 will be matched using id
+        .add("f4", f4Type, nullable = true)
+    },
+
+    expectedSchema =
+      s"""message spark_schema {
+        |  required group f0 = 1 {
+        |    optional int32 f00 = 2;
+        |  }
+        |  optional int32 f1 = 3;
+        |  optional int32 f2 = 4;
+        |  optional int32 g2;
+        |  required group f4 = 5 {
+        |    optional int32 f40 = 6;
+        |  }
+        |}
+      """.stripMargin)
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
index 272f12e138b68..2feea41d15656 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
@@ -2257,7 +2257,10 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
       caseSensitive: Boolean): Unit = {
     test(s"Clipping - $testName") {
       val actual = ParquetReadSupport.clipParquetSchema(
-        MessageTypeParser.parseMessageType(parquetSchema), catalystSchema, caseSensitive)
+        MessageTypeParser.parseMessageType(parquetSchema),
+        catalystSchema,
+        caseSensitive,
+        useFieldId = false)
 
       try {
         expectedSchema.checkContains(actual)
@@ -2821,7 +2824,10 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
       }
       assertThrows[RuntimeException] {
         ParquetReadSupport.clipParquetSchema(
-          MessageTypeParser.parseMessageType(parquetSchema), catalystSchema, caseSensitive = false)
+         MessageTypeParser.parseMessageType(parquetSchema),
+          catalystSchema,
+          caseSensitive = false,
+          useFieldId = false)
       }
     }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala
index 7a7957c67dce1..18690844d484c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala
@@ -165,9 +165,17 @@ private[sql] trait ParquetTest extends FileBasedDataSourceTest {
 
   def withAllParquetReaders(code: => Unit): Unit = {
     // test the row-based reader
-    withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false")(code)
+    withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
+      withClue("Parquet-mr reader") {
+        code
+      }
+    }
     // test the vectorized reader
-    withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true")(code)
+    withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
+      withClue("Vectorized reader") {
+        code
+      }
+    }
   }
 
   def withAllParquetWriters(code: => Unit): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
index 47a6f3617da63..fb3d38f3b7b18 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
@@ -61,7 +61,13 @@ private[sql] object TestSQLContext {
   val overrideConfs: Map[String, String] =
     Map(
       // Fewer shuffle partitions to speed up testing.
-      SQLConf.SHUFFLE_PARTITIONS.key -> "5")
+      SQLConf.SHUFFLE_PARTITIONS.key -> "5",
+      // Enable parquet read field id for tests to ensure correctness
+      // By default, if Spark schema doesn't contain the `parquet.field.id` metadata,
+      // the underlying matching mechanism should behave exactly like name matching
+      // which is the existing behavior. Therefore, turning this on ensures that we didn't
+      // introduce any regression for such mixed matching mode.
+      SQLConf.PARQUET_FIELD_ID_READ_ENABLED.key -> "true")
 }
 
 private[sql] class TestSQLSessionStateBuilder(

From 42d8c509c868fe3aca13a3e93fb86bc698560623 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Fri, 18 Feb 2022 23:37:50 +0800
Subject: [PATCH 273/513] [SPARK-38251][SQL] Change Cast.toString as "cast"
 instead of "ansi_cast" under ANSI mode

### What changes were proposed in this pull request?

Change Cast.toString as "cast" instead of "ansi_cast" under ANSI mode.
This is to restore the behavior before https://github.com/apache/spark/pull/27608

### Why are the changes needed?

1. There is no such a function "ansi_cast" in Spark SQL
2. Add/Divide/.. has different behavior under ANSI mode as well, but they don't have this special string representation.
3. As we are setting up new Github job for ANSI mode, this can avoid test failures from TPCDS plan stability test suites

### Does this PR introduce _any_ user-facing change?

Yes but quite minor, the string output of `Cast` under ANSI mode becomes "cast" instead of "ansi_cast" again.

### How was this patch tested?

Existing UT

Closes #35570 from gengliangwang/revert-SPARK-30863.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../org/apache/spark/sql/catalyst/expressions/Cast.scala     | 5 +----
 .../sql-tests/results/ansi/string-functions.sql.out          | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 06a148f063201..ef054e77707ac 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -293,10 +293,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
    */
   def typeCheckFailureMessage: String
 
-  override def toString: String = {
-    val ansi = if (ansiEnabled) "ansi_" else ""
-    s"${ansi}cast($child as ${dataType.simpleString})"
-  }
+  override def toString: String = s"cast($child as ${dataType.simpleString})"
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (canCast(child.dataType, dataType)) {
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
index 4c0aa8c948334..ec7f41dcf4bff 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
@@ -928,7 +928,7 @@ select to_binary(null, cast(null as int))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'to_binary(NULL, CAST(NULL AS INT))' due to data type mismatch: Unsupported encoding format: Some(ansi_cast(null as int)). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7
+cannot resolve 'to_binary(NULL, CAST(NULL AS INT))' due to data type mismatch: Unsupported encoding format: Some(cast(null as int)). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7
 
 
 -- !query

From 439975590cf4f21c2a548a2ac6231eb234e1a2f3 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Fri, 18 Feb 2022 11:08:33 -0600
Subject: [PATCH 274/513] [SPARK-38243][PYTHON][ML] Fix
 pyspark.ml.LogisticRegression.getThreshold error message logic

### What changes were proposed in this pull request?

This PR replaces incorrect usage of `str.join` on a `List[float]` in `LogisticRegression.getThreshold`.

### Why are the changes needed?

To avoid unexpected failure if method is used in case of multi-class classification.

After this change, the following code:

```python
from pyspark.ml.classification import LogisticRegression

LogisticRegression(thresholds=[1.0, 2.0, 3.0]).getThreshold()
```
raises

```python
Traceback (most recent call last):
  Input In [4] in <module>
    model.getThreshold()
  File /path/to/spark/python/pyspark/ml/classification.py:999 in getThreshold
    raise ValueError(
ValueError: Logistic Regression getThreshold only applies to binary classification, but thresholds has length != 2.  thresholds: [1.0, 2.0, 3.0]
```

instead of current

```python
Traceback (most recent call last):
  Input In [7] in <module>
    model.getThreshold()
  File /path/to/spark/python/pyspark/ml/classification.py:1003 in getThreshold
    + ",".join(ts)
TypeError: sequence item 0: expected str instance, float found

```

### Does this PR introduce _any_ user-facing change?

No. Bugfix.

### How was this patch tested?

Manual testing.

Closes #35558 from zero323/SPARK-38243.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 python/pyspark/ml/classification.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 058740e820542..b791e6f169d44 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -999,8 +999,7 @@ def getThreshold(self):
                 raise ValueError(
                     "Logistic Regression getThreshold only applies to"
                     + " binary classification, but thresholds has length != 2."
-                    + "  thresholds: "
-                    + ",".join(ts)
+                    + "  thresholds: {ts}".format(ts=ts)
                 )
             return 1.0 / (1.0 + ts[0] / ts[1])
         else:

From 8a70aeccf96de04cc122a30f50ab752b9b9c85ed Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Fri, 18 Feb 2022 18:46:50 -0800
Subject: [PATCH 275/513] [SPARK-38246][CORE][SQL][SS][WEBUI] Refactor
 `KVUtils` and add UTs related to RocksDB

### What changes were proposed in this pull request?
The main change of this pr as follows:

1. Refactor `KVUtils` to let the `open` method can use the passed `conf` to construct the corresponding `KVStore`
2. Use new `KVUtils#open` to add UTs related to `RocksDB`, the new UTs cover the scenarios `LevelDB` has tested.

### Why are the changes needed?
Add more test scenarios related to `RocksDB`.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA and add new UTs

Closes #35563 from LuciferYang/kvutils-open.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../deploy/history/FsHistoryProvider.scala    | 16 ++++-----
 .../org/apache/spark/status/KVUtils.scala     |  9 ++---
 .../HistoryServerDiskManagerSuite.scala       | 35 ++++++++++++++-----
 .../spark/status/AppStatusListenerSuite.scala | 27 ++++++++++----
 .../spark/status/AppStatusStoreSuite.scala    | 10 ++++--
 .../StreamingQueryStatusListenerSuite.scala   | 12 +++++--
 6 files changed, 75 insertions(+), 34 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 55f648a4a05c8..faa7033a147d9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -148,7 +148,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       AppStatusStore.CURRENT_VERSION, logDir.toString())
 
     try {
-      open(dbPath, metadata)
+      open(dbPath, metadata, conf)
     } catch {
       // If there's an error, remove the listing database and any existing UI database
       // from the store directory, since it's extremely likely that they'll all contain
@@ -156,12 +156,12 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       case _: UnsupportedStoreVersionException | _: MetadataMismatchException =>
         logInfo("Detected incompatible DB versions, deleting...")
         path.listFiles().foreach(Utils.deleteRecursively)
-        open(dbPath, metadata)
+        open(dbPath, metadata, conf)
       case dbExc @ (_: NativeDB.DBException | _: RocksDBException) =>
         // Get rid of the corrupted data and re-create it.
         logWarning(s"Failed to load disk store $dbPath :", dbExc)
         Utils.deleteRecursively(dbPath)
-        open(dbPath, metadata)
+        open(dbPath, metadata, conf)
     }
   }.getOrElse(new InMemoryStore())
 
@@ -1218,7 +1218,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     // the existing data.
     dm.openStore(appId, attempt.info.attemptId).foreach { path =>
       try {
-        return KVUtils.open(path, metadata)
+        return KVUtils.open(path, metadata, conf)
       } catch {
         case e: Exception =>
           logInfo(s"Failed to open existing store for $appId/${attempt.info.attemptId}.", e)
@@ -1284,14 +1284,14 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     try {
       logInfo(s"Leasing disk manager space for app $appId / ${attempt.info.attemptId}...")
       lease = dm.lease(reader.totalSize, reader.compressionCodec.isDefined)
-      val diskStore = KVUtils.open(lease.tmpPath, metadata)
+      val diskStore = KVUtils.open(lease.tmpPath, metadata, conf)
       hybridStore.setDiskStore(diskStore)
       hybridStore.switchToDiskStore(new HybridStore.SwitchToDiskStoreListener {
         override def onSwitchToDiskStoreSuccess: Unit = {
           logInfo(s"Completely switched to diskStore for app $appId / ${attempt.info.attemptId}.")
           diskStore.close()
           val newStorePath = lease.commit(appId, attempt.info.attemptId)
-          hybridStore.setDiskStore(KVUtils.open(newStorePath, metadata))
+          hybridStore.setDiskStore(KVUtils.open(newStorePath, metadata, conf))
           memoryManager.release(appId, attempt.info.attemptId)
         }
         override def onSwitchToDiskStoreFail(e: Exception): Unit = {
@@ -1327,7 +1327,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       logInfo(s"Leasing disk manager space for app $appId / ${attempt.info.attemptId}...")
       val lease = dm.lease(reader.totalSize, isCompressed)
       try {
-        Utils.tryWithResource(KVUtils.open(lease.tmpPath, metadata)) { store =>
+        Utils.tryWithResource(KVUtils.open(lease.tmpPath, metadata, conf)) { store =>
           rebuildAppStore(store, reader, attempt.info.lastUpdated.getTime())
         }
         newStorePath = lease.commit(appId, attempt.info.attemptId)
@@ -1345,7 +1345,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       }
     }
 
-    KVUtils.open(newStorePath, metadata)
+    KVUtils.open(newStorePath, metadata, conf)
   }
 
   private def createInMemoryStore(attempt: AttemptInfoWrapper): KVStore = {
diff --git a/core/src/main/scala/org/apache/spark/status/KVUtils.scala b/core/src/main/scala/org/apache/spark/status/KVUtils.scala
index ddee539eb9eb4..7a4b613ac0696 100644
--- a/core/src/main/scala/org/apache/spark/status/KVUtils.scala
+++ b/core/src/main/scala/org/apache/spark/status/KVUtils.scala
@@ -38,8 +38,8 @@ private[spark] object KVUtils extends Logging {
   /** Use this to annotate constructor params to be used as KVStore indices. */
   type KVIndexParam = KVIndex @getter
 
-  private lazy val backend =
-    HybridStoreDiskBackend.withName(new SparkConf().get(HYBRID_STORE_DISK_BACKEND))
+  private def backend(conf: SparkConf) =
+    HybridStoreDiskBackend.withName(conf.get(HYBRID_STORE_DISK_BACKEND))
 
   /**
    * A KVStoreSerializer that provides Scala types serialization too, and uses the same options as
@@ -59,11 +59,12 @@ private[spark] object KVUtils extends Logging {
    * @param metadata Metadata value to compare to the data in the store. If the store does not
    *                 contain any metadata (e.g. it's a new store), this value is written as
    *                 the store's metadata.
+   * @param conf SparkConf use to get `HYBRID_STORE_DISK_BACKEND`
    */
-  def open[M: ClassTag](path: File, metadata: M): KVStore = {
+  def open[M: ClassTag](path: File, metadata: M, conf: SparkConf): KVStore = {
     require(metadata != null, "Metadata is required.")
 
-    val db = backend match {
+    val db = backend(conf) match {
       case LEVELDB => new LevelDB(path, new KVStoreScalaSerializer())
       case ROCKSDB => new RocksDB(path, new KVStoreScalaSerializer())
     }
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerDiskManagerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerDiskManagerSuite.scala
index de5b5187aa2fa..c534d66c1571c 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerDiskManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerDiskManagerSuite.scala
@@ -26,13 +26,20 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config.History._
+import org.apache.spark.internal.config.History.HybridStoreDiskBackend
 import org.apache.spark.status.KVUtils
-import org.apache.spark.tags.ExtendedLevelDBTest
+import org.apache.spark.tags.{ExtendedLevelDBTest, ExtendedRocksDBTest}
 import org.apache.spark.util.{ManualClock, Utils}
 import org.apache.spark.util.kvstore.KVStore
 
-@ExtendedLevelDBTest
-class HistoryServerDiskManagerSuite extends SparkFunSuite with BeforeAndAfter {
+abstract class HistoryServerDiskManagerSuite extends SparkFunSuite with BeforeAndAfter {
+
+  protected def backend: HybridStoreDiskBackend.Value
+
+  protected def extension: String
+
+  protected def conf: SparkConf = new SparkConf()
+    .set(HYBRID_STORE_DISK_BACKEND, backend.toString)
 
   private def doReturn(value: Any) = org.mockito.Mockito.doReturn(value, Seq.empty: _*)
 
@@ -43,7 +50,7 @@ class HistoryServerDiskManagerSuite extends SparkFunSuite with BeforeAndAfter {
 
   before {
     testDir = Utils.createTempDir()
-    store = KVUtils.open(new File(testDir, "listing"), "test")
+    store = KVUtils.open(new File(testDir, "listing"), "test", conf)
   }
 
   after {
@@ -213,10 +220,20 @@ class HistoryServerDiskManagerSuite extends SparkFunSuite with BeforeAndAfter {
   }
 
   test("SPARK-38095: appStorePath should use backend extensions") {
-    HybridStoreDiskBackend.values.zip(Seq(".ldb", ".rdb")).foreach { case (backend, extension) =>
-      val conf = new SparkConf().set(HYBRID_STORE_DISK_BACKEND, backend.toString)
-      val manager = new HistoryServerDiskManager(conf, testDir, store, new ManualClock())
-      assert(manager.appStorePath("appId", None).getName.endsWith(extension))
-    }
+    val conf = new SparkConf().set(HYBRID_STORE_DISK_BACKEND, backend.toString)
+    val manager = new HistoryServerDiskManager(conf, testDir, store, new ManualClock())
+    assert(manager.appStorePath("appId", None).getName.endsWith(extension))
   }
 }
+
+@ExtendedLevelDBTest
+class HistoryServerDiskManagerUseLevelDBSuite extends HistoryServerDiskManagerSuite {
+  override protected def backend: HybridStoreDiskBackend.Value = HybridStoreDiskBackend.LEVELDB
+  override protected def extension: String = ".ldb"
+}
+
+@ExtendedRocksDBTest
+class HistoryServerDiskManagerUseRocksDBSuite extends HistoryServerDiskManagerSuite {
+  override protected def backend: HybridStoreDiskBackend.Value = HybridStoreDiskBackend.ROCKSDB
+  override protected def extension: String = ".rdb"
+}
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
index c6db626121fa2..5e2e931c37689 100644
--- a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
@@ -28,6 +28,7 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark._
 import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
+import org.apache.spark.internal.config.History.{HYBRID_STORE_DISK_BACKEND, HybridStoreDiskBackend}
 import org.apache.spark.internal.config.Status._
 import org.apache.spark.metrics.ExecutorMetricType
 import org.apache.spark.resource.ResourceProfile
@@ -36,15 +37,11 @@ import org.apache.spark.scheduler.cluster._
 import org.apache.spark.status.ListenerEventsTestHelper._
 import org.apache.spark.status.api.v1
 import org.apache.spark.storage._
-import org.apache.spark.tags.ExtendedLevelDBTest
+import org.apache.spark.tags.{ExtendedLevelDBTest, ExtendedRocksDBTest}
 import org.apache.spark.util.Utils
 import org.apache.spark.util.kvstore.{InMemoryStore, KVStore}
 
-@ExtendedLevelDBTest
-class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
-  private val conf = new SparkConf()
-    .set(LIVE_ENTITY_UPDATE_PERIOD, 0L)
-    .set(ASYNC_TRACKING_ENABLED, false)
+abstract class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
 
   private val twoReplicaMemAndDiskLevel = StorageLevel(true, true, false, true, 2)
 
@@ -53,7 +50,11 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
   private var store: ElementTrackingStore = _
   private var taskIdTracker = -1L
 
-  protected def createKVStore: KVStore = KVUtils.open(testDir, getClass().getName())
+  protected def conf: SparkConf = new SparkConf()
+    .set(LIVE_ENTITY_UPDATE_PERIOD, 0L)
+    .set(ASYNC_TRACKING_ENABLED, false)
+
+  protected def createKVStore: KVStore = KVUtils.open(testDir, getClass().getName(), conf)
 
   before {
     time = 0L
@@ -1891,3 +1892,15 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
 class AppStatusListenerWithInMemoryStoreSuite extends AppStatusListenerSuite {
   override def createKVStore: KVStore = new InMemoryStore()
 }
+
+@ExtendedLevelDBTest
+class AppStatusListenerWithLevelDBSuite extends AppStatusListenerSuite {
+  override def conf: SparkConf = super.conf
+    .set(HYBRID_STORE_DISK_BACKEND, HybridStoreDiskBackend.LEVELDB.toString)
+}
+
+@ExtendedRocksDBTest
+class AppStatusListenerWithRocksDBSuite extends AppStatusListenerSuite {
+  override def conf: SparkConf = super.conf
+    .set(HYBRID_STORE_DISK_BACKEND, HybridStoreDiskBackend.ROCKSDB.toString)
+}
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
index 422d80976867d..798cff8d60fcd 100644
--- a/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.status
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.internal.config.History.{HYBRID_STORE_DISK_BACKEND, HybridStoreDiskBackend}
 import org.apache.spark.internal.config.Status.LIVE_ENTITY_UPDATE_PERIOD
 import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.scheduler.{SparkListenerStageSubmitted, SparkListenerTaskStart, StageInfo, TaskInfo, TaskLocality}
@@ -81,7 +82,8 @@ class AppStatusStoreSuite extends SparkFunSuite {
     assert(store.count(classOf[CachedQuantile]) === 2)
   }
 
-  private def createAppStore(disk: Boolean, live: Boolean): AppStatusStore = {
+  private def createAppStore(disk: Boolean, diskStoreType: HybridStoreDiskBackend.Value = null,
+      live: Boolean): AppStatusStore = {
     val conf = new SparkConf()
     if (live) {
       return AppStatusStore.createLiveStore(conf)
@@ -92,8 +94,9 @@ class AppStatusStoreSuite extends SparkFunSuite {
     }
 
     val store: KVStore = if (disk) {
+      conf.set(HYBRID_STORE_DISK_BACKEND, diskStoreType.toString)
       val testDir = Utils.createTempDir()
-      val diskStore = KVUtils.open(testDir, getClass.getName)
+      val diskStore = KVUtils.open(testDir, getClass.getName, conf)
       new ElementTrackingStore(diskStore, conf)
     } else {
       new ElementTrackingStore(new InMemoryStore, conf)
@@ -102,7 +105,8 @@ class AppStatusStoreSuite extends SparkFunSuite {
   }
 
   Seq(
-    "disk" -> createAppStore(disk = true, live = false),
+    "disk leveldb" -> createAppStore(disk = true, HybridStoreDiskBackend.LEVELDB, live = false),
+    "disk rocksdb" -> createAppStore(disk = true, HybridStoreDiskBackend.ROCKSDB, live = false),
     "in memory" -> createAppStore(disk = false, live = false),
     "in memory live" -> createAppStore(disk = false, live = true)
   ).foreach { case (hint, appStore) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala
index eee1a7c5ff3cd..1d1b51354f8d8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala
@@ -23,6 +23,8 @@ import java.util.{Date, UUID}
 import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS}
 import org.scalatest.time.SpanSugar._
 
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.config.History.{HYBRID_STORE_DISK_BACKEND, HybridStoreDiskBackend}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.getTimeZone
 import org.apache.spark.sql.execution.ui.StreamingQueryStatusStore
 import org.apache.spark.sql.internal.StaticSQLConf
@@ -30,7 +32,7 @@ import org.apache.spark.sql.streaming.{StreamingQueryListener, StreamingQueryPro
 import org.apache.spark.sql.streaming
 import org.apache.spark.status.{ElementTrackingStore, KVUtils}
 import org.apache.spark.util.Utils
-import org.apache.spark.util.kvstore.{InMemoryStore, KVStore, RocksDB}
+import org.apache.spark.util.kvstore.{InMemoryStore, KVStore}
 
 class StreamingQueryStatusListenerSuite extends StreamTest {
 
@@ -221,8 +223,10 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
 
   test("SPARK-38056: test writing StreamingQueryData to a LevelDB store") {
     assume(!Utils.isMacOnAppleSilicon)
+    val conf = new SparkConf()
+      .set(HYBRID_STORE_DISK_BACKEND, HybridStoreDiskBackend.LEVELDB.toString)
     val testDir = Utils.createTempDir()
-    val kvStore = KVUtils.open(testDir, getClass.getName)
+    val kvStore = KVUtils.open(testDir, getClass.getName, conf)
     try {
       testStreamingQueryData(kvStore)
     } finally {
@@ -233,8 +237,10 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
 
   test("SPARK-38056: test writing StreamingQueryData to a RocksDB store") {
     assume(!Utils.isMacOnAppleSilicon)
+    val conf = new SparkConf()
+      .set(HYBRID_STORE_DISK_BACKEND, HybridStoreDiskBackend.ROCKSDB.toString)
     val testDir = Utils.createTempDir()
-    val kvStore = new RocksDB(testDir)
+    val kvStore = KVUtils.open(testDir, getClass.getName, conf)
     try {
       testStreamingQueryData(kvStore)
     } finally {

From bc6eb920b99acfd9903da84b7f1f5ce2fef8fbff Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 18 Feb 2022 23:22:16 -0800
Subject: [PATCH 276/513] Revert "[SPARK-38244][K8S][BUILD] Upgrade
 kubernetes-client to 5.12.1"

This reverts commit f33e371a2759e797351743f85df94ea27243b656.
---
 dev/deps/spark-deps-hadoop-2-hive-2.3 | 42 +++++++++++++--------------
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 42 +++++++++++++--------------
 pom.xml                               |  2 +-
 3 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index 90a4ce07e8d8b..b4fd14b30a4dd 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -162,27 +162,27 @@ jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/1.7.32//jul-to-slf4j-1.7.32.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/5.12.1//kubernetes-client-5.12.1.jar
-kubernetes-model-admissionregistration/5.12.1//kubernetes-model-admissionregistration-5.12.1.jar
-kubernetes-model-apiextensions/5.12.1//kubernetes-model-apiextensions-5.12.1.jar
-kubernetes-model-apps/5.12.1//kubernetes-model-apps-5.12.1.jar
-kubernetes-model-autoscaling/5.12.1//kubernetes-model-autoscaling-5.12.1.jar
-kubernetes-model-batch/5.12.1//kubernetes-model-batch-5.12.1.jar
-kubernetes-model-certificates/5.12.1//kubernetes-model-certificates-5.12.1.jar
-kubernetes-model-common/5.12.1//kubernetes-model-common-5.12.1.jar
-kubernetes-model-coordination/5.12.1//kubernetes-model-coordination-5.12.1.jar
-kubernetes-model-core/5.12.1//kubernetes-model-core-5.12.1.jar
-kubernetes-model-discovery/5.12.1//kubernetes-model-discovery-5.12.1.jar
-kubernetes-model-events/5.12.1//kubernetes-model-events-5.12.1.jar
-kubernetes-model-extensions/5.12.1//kubernetes-model-extensions-5.12.1.jar
-kubernetes-model-flowcontrol/5.12.1//kubernetes-model-flowcontrol-5.12.1.jar
-kubernetes-model-metrics/5.12.1//kubernetes-model-metrics-5.12.1.jar
-kubernetes-model-networking/5.12.1//kubernetes-model-networking-5.12.1.jar
-kubernetes-model-node/5.12.1//kubernetes-model-node-5.12.1.jar
-kubernetes-model-policy/5.12.1//kubernetes-model-policy-5.12.1.jar
-kubernetes-model-rbac/5.12.1//kubernetes-model-rbac-5.12.1.jar
-kubernetes-model-scheduling/5.12.1//kubernetes-model-scheduling-5.12.1.jar
-kubernetes-model-storageclass/5.12.1//kubernetes-model-storageclass-5.12.1.jar
+kubernetes-client/5.12.0//kubernetes-client-5.12.0.jar
+kubernetes-model-admissionregistration/5.12.0//kubernetes-model-admissionregistration-5.12.0.jar
+kubernetes-model-apiextensions/5.12.0//kubernetes-model-apiextensions-5.12.0.jar
+kubernetes-model-apps/5.12.0//kubernetes-model-apps-5.12.0.jar
+kubernetes-model-autoscaling/5.12.0//kubernetes-model-autoscaling-5.12.0.jar
+kubernetes-model-batch/5.12.0//kubernetes-model-batch-5.12.0.jar
+kubernetes-model-certificates/5.12.0//kubernetes-model-certificates-5.12.0.jar
+kubernetes-model-common/5.12.0//kubernetes-model-common-5.12.0.jar
+kubernetes-model-coordination/5.12.0//kubernetes-model-coordination-5.12.0.jar
+kubernetes-model-core/5.12.0//kubernetes-model-core-5.12.0.jar
+kubernetes-model-discovery/5.12.0//kubernetes-model-discovery-5.12.0.jar
+kubernetes-model-events/5.12.0//kubernetes-model-events-5.12.0.jar
+kubernetes-model-extensions/5.12.0//kubernetes-model-extensions-5.12.0.jar
+kubernetes-model-flowcontrol/5.12.0//kubernetes-model-flowcontrol-5.12.0.jar
+kubernetes-model-metrics/5.12.0//kubernetes-model-metrics-5.12.0.jar
+kubernetes-model-networking/5.12.0//kubernetes-model-networking-5.12.0.jar
+kubernetes-model-node/5.12.0//kubernetes-model-node-5.12.0.jar
+kubernetes-model-policy/5.12.0//kubernetes-model-policy-5.12.0.jar
+kubernetes-model-rbac/5.12.0//kubernetes-model-rbac-5.12.0.jar
+kubernetes-model-scheduling/5.12.0//kubernetes-model-scheduling-5.12.0.jar
+kubernetes-model-storageclass/5.12.0//kubernetes-model-storageclass-5.12.0.jar
 lapack/2.2.1//lapack-2.2.1.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index b052f1a6aa275..96bd2663df60a 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -148,27 +148,27 @@ jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/1.7.32//jul-to-slf4j-1.7.32.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/5.12.1//kubernetes-client-5.12.1.jar
-kubernetes-model-admissionregistration/5.12.1//kubernetes-model-admissionregistration-5.12.1.jar
-kubernetes-model-apiextensions/5.12.1//kubernetes-model-apiextensions-5.12.1.jar
-kubernetes-model-apps/5.12.1//kubernetes-model-apps-5.12.1.jar
-kubernetes-model-autoscaling/5.12.1//kubernetes-model-autoscaling-5.12.1.jar
-kubernetes-model-batch/5.12.1//kubernetes-model-batch-5.12.1.jar
-kubernetes-model-certificates/5.12.1//kubernetes-model-certificates-5.12.1.jar
-kubernetes-model-common/5.12.1//kubernetes-model-common-5.12.1.jar
-kubernetes-model-coordination/5.12.1//kubernetes-model-coordination-5.12.1.jar
-kubernetes-model-core/5.12.1//kubernetes-model-core-5.12.1.jar
-kubernetes-model-discovery/5.12.1//kubernetes-model-discovery-5.12.1.jar
-kubernetes-model-events/5.12.1//kubernetes-model-events-5.12.1.jar
-kubernetes-model-extensions/5.12.1//kubernetes-model-extensions-5.12.1.jar
-kubernetes-model-flowcontrol/5.12.1//kubernetes-model-flowcontrol-5.12.1.jar
-kubernetes-model-metrics/5.12.1//kubernetes-model-metrics-5.12.1.jar
-kubernetes-model-networking/5.12.1//kubernetes-model-networking-5.12.1.jar
-kubernetes-model-node/5.12.1//kubernetes-model-node-5.12.1.jar
-kubernetes-model-policy/5.12.1//kubernetes-model-policy-5.12.1.jar
-kubernetes-model-rbac/5.12.1//kubernetes-model-rbac-5.12.1.jar
-kubernetes-model-scheduling/5.12.1//kubernetes-model-scheduling-5.12.1.jar
-kubernetes-model-storageclass/5.12.1//kubernetes-model-storageclass-5.12.1.jar
+kubernetes-client/5.12.0//kubernetes-client-5.12.0.jar
+kubernetes-model-admissionregistration/5.12.0//kubernetes-model-admissionregistration-5.12.0.jar
+kubernetes-model-apiextensions/5.12.0//kubernetes-model-apiextensions-5.12.0.jar
+kubernetes-model-apps/5.12.0//kubernetes-model-apps-5.12.0.jar
+kubernetes-model-autoscaling/5.12.0//kubernetes-model-autoscaling-5.12.0.jar
+kubernetes-model-batch/5.12.0//kubernetes-model-batch-5.12.0.jar
+kubernetes-model-certificates/5.12.0//kubernetes-model-certificates-5.12.0.jar
+kubernetes-model-common/5.12.0//kubernetes-model-common-5.12.0.jar
+kubernetes-model-coordination/5.12.0//kubernetes-model-coordination-5.12.0.jar
+kubernetes-model-core/5.12.0//kubernetes-model-core-5.12.0.jar
+kubernetes-model-discovery/5.12.0//kubernetes-model-discovery-5.12.0.jar
+kubernetes-model-events/5.12.0//kubernetes-model-events-5.12.0.jar
+kubernetes-model-extensions/5.12.0//kubernetes-model-extensions-5.12.0.jar
+kubernetes-model-flowcontrol/5.12.0//kubernetes-model-flowcontrol-5.12.0.jar
+kubernetes-model-metrics/5.12.0//kubernetes-model-metrics-5.12.0.jar
+kubernetes-model-networking/5.12.0//kubernetes-model-networking-5.12.0.jar
+kubernetes-model-node/5.12.0//kubernetes-model-node-5.12.0.jar
+kubernetes-model-policy/5.12.0//kubernetes-model-policy-5.12.0.jar
+kubernetes-model-rbac/5.12.0//kubernetes-model-rbac-5.12.0.jar
+kubernetes-model-scheduling/5.12.0//kubernetes-model-scheduling-5.12.0.jar
+kubernetes-model-storageclass/5.12.0//kubernetes-model-storageclass-5.12.0.jar
 lapack/2.2.1//lapack-2.2.1.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
diff --git a/pom.xml b/pom.xml
index a2b98e93c062a..7165cb5229821 100644
--- a/pom.xml
+++ b/pom.xml
@@ -204,7 +204,7 @@
     <arrow.version>7.0.0</arrow.version>
     <!-- org.fusesource.leveldbjni will be used except on arm64 platform. -->
     <leveldbjni.group>org.fusesource.leveldbjni</leveldbjni.group>
-    <kubernetes-client.version>5.12.1</kubernetes-client.version>
+    <kubernetes-client.version>5.12.0</kubernetes-client.version>
 
     <test.java.home>${java.home}</test.java.home>
 

From 6ff760d483124b121d79c3a2d5fdc3ee3f27dd00 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Sat, 19 Feb 2022 11:32:13 +0100
Subject: [PATCH 277/513] [SPARK-37154][PYTHON] Inline hints for pyspark.rdd

### What changes were proposed in this pull request?

This PR proposes migration of type hints for `pyspark.rdd` from stub file to inline annotation.

### Why are the changes needed?

As a part of ongoing process of migration of stubs to inline hints.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests + new data tests.

Closes #35252 from zero323/SPARK-37154.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/_typing.pyi                    |  14 +-
 python/pyspark/conf.py                        |  12 +
 python/pyspark/context.py                     |   3 +-
 python/pyspark/rdd.py                         | 987 +++++++++++++-----
 python/pyspark/rdd.pyi                        | 481 ---------
 python/pyspark/serializers.py                 |   7 +
 python/pyspark/sql/_typing.pyi                |   4 +-
 .../pyspark/sql/pandas/_typing/__init__.pyi   |   5 +-
 python/pyspark/tests/typing/test_rdd.yml      |  77 +-
 9 files changed, 814 insertions(+), 776 deletions(-)
 delete mode 100644 python/pyspark/rdd.pyi

diff --git a/python/pyspark/_typing.pyi b/python/pyspark/_typing.pyi
index 9a36c8945bf96..6cc09263684d5 100644
--- a/python/pyspark/_typing.pyi
+++ b/python/pyspark/_typing.pyi
@@ -17,17 +17,27 @@
 # under the License.
 
 from typing import Callable, Iterable, Sized, TypeVar, Union
-from typing_extensions import Protocol
+from typing_extensions import Literal, Protocol
+
+from numpy import int32, int64, float32, float64, ndarray
 
 F = TypeVar("F", bound=Callable)
 T_co = TypeVar("T_co", covariant=True)
 
 PrimitiveType = Union[bool, float, int, str]
 
+NonUDFType = Literal[0]
+
 class SupportsIAdd(Protocol):
     def __iadd__(self, other: SupportsIAdd) -> SupportsIAdd: ...
 
 class SupportsOrdering(Protocol):
-    def __le__(self, other: SupportsOrdering) -> bool: ...
+    def __lt__(self, other: SupportsOrdering) -> bool: ...
 
 class SizedIterable(Protocol, Sized, Iterable[T_co]): ...
+
+S = TypeVar("S", bound=SupportsOrdering)
+
+NumberOrArray = TypeVar(
+    "NumberOrArray", float, int, complex, int32, int64, float32, float64, ndarray
+)
diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py
index 536e1f89cff3f..a9e26966b0611 100644
--- a/python/pyspark/conf.py
+++ b/python/pyspark/conf.py
@@ -203,6 +203,18 @@ def setAll(self, pairs: List[Tuple[str, str]]) -> "SparkConf":
             self.set(k, v)
         return self
 
+    @overload
+    def get(self, key: str) -> Optional[str]:
+        ...
+
+    @overload
+    def get(self, key: str, defaultValue: None) -> Optional[str]:
+        ...
+
+    @overload
+    def get(self, key: str, defaultValue: str) -> str:
+        ...
+
     def get(self, key: str, defaultValue: Optional[str] = None) -> Optional[str]:
         """Get the configured value for some key, or return a default otherwise."""
         if defaultValue is None:  # Py4J doesn't call the right get() if we pass None
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 3db9630898af7..68f748e68faad 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -35,6 +35,7 @@
     List,
     NoReturn,
     Optional,
+    Sequence,
     Tuple,
     Type,
     TYPE_CHECKING,
@@ -1421,7 +1422,7 @@ def runJob(
         self,
         rdd: "RDD[T]",
         partitionFunc: Callable[[Iterable[T]], Iterable[U]],
-        partitions: Optional[List[int]] = None,
+        partitions: Optional[Sequence[int]] = None,
         allowLocal: bool = False,
     ) -> List[U]:
         """
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 97b87ea87e834..7cb887fe35606 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -30,6 +30,26 @@
 from itertools import chain
 from functools import reduce
 from math import sqrt, log, isinf, isnan, pow, ceil
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Generic,
+    Hashable,
+    Iterable,
+    Iterator,
+    IO,
+    List,
+    NoReturn,
+    Optional,
+    Sequence,
+    Tuple,
+    Union,
+    TypeVar,
+    cast,
+    overload,
+    TYPE_CHECKING,
+)
 
 from pyspark.java_gateway import local_connect_and_auth
 from pyspark.serializers import (
@@ -40,6 +60,7 @@
     CloudPickleSerializer,
     PairDeserializer,
     CPickleSerializer,
+    Serializer,
     pack_long,
     read_int,
     write_int,
@@ -67,6 +88,41 @@
 from pyspark.traceback_utils import SCCallSiteSync
 from pyspark.util import fail_on_stopiteration, _parse_memory
 
+
+if TYPE_CHECKING:
+    import socket
+    import io
+
+    from pyspark._typing import NonUDFType
+    from pyspark._typing import S, NumberOrArray
+    from pyspark.context import SparkContext
+    from pyspark.sql.pandas._typing import (
+        PandasScalarUDFType,
+        PandasGroupedMapUDFType,
+        PandasGroupedAggUDFType,
+        PandasWindowAggUDFType,
+        PandasScalarIterUDFType,
+        PandasMapIterUDFType,
+        PandasCogroupedMapUDFType,
+        ArrowMapIterUDFType,
+    )
+    from pyspark.sql.dataframe import DataFrame
+    from pyspark.sql.types import AtomicType, StructType
+    from pyspark.sql._typing import AtomicValue, RowLike, SQLBatchedUDFType
+
+    from py4j.java_gateway import JavaObject  # type: ignore[import]
+    from py4j.java_collections import JavaArray  # type: ignore[import]
+
+T = TypeVar("T")
+T_co = TypeVar("T_co", covariant=True)
+U = TypeVar("U")
+K = TypeVar("K", bound=Hashable)
+V = TypeVar("V")
+V1 = TypeVar("V1")
+V2 = TypeVar("V2")
+V3 = TypeVar("V3")
+
+
 __all__ = ["RDD"]
 
 
@@ -79,21 +135,21 @@ class PythonEvalType:
     These values should match values in org.apache.spark.api.python.PythonEvalType.
     """
 
-    NON_UDF = 0
+    NON_UDF: "NonUDFType" = 0
 
-    SQL_BATCHED_UDF = 100
+    SQL_BATCHED_UDF: "SQLBatchedUDFType" = 100
 
-    SQL_SCALAR_PANDAS_UDF = 200
-    SQL_GROUPED_MAP_PANDAS_UDF = 201
-    SQL_GROUPED_AGG_PANDAS_UDF = 202
-    SQL_WINDOW_AGG_PANDAS_UDF = 203
-    SQL_SCALAR_PANDAS_ITER_UDF = 204
-    SQL_MAP_PANDAS_ITER_UDF = 205
-    SQL_COGROUPED_MAP_PANDAS_UDF = 206
-    SQL_MAP_ARROW_ITER_UDF = 207
+    SQL_SCALAR_PANDAS_UDF: "PandasScalarUDFType" = 200
+    SQL_GROUPED_MAP_PANDAS_UDF: "PandasGroupedMapUDFType" = 201
+    SQL_GROUPED_AGG_PANDAS_UDF: "PandasGroupedAggUDFType" = 202
+    SQL_WINDOW_AGG_PANDAS_UDF: "PandasWindowAggUDFType" = 203
+    SQL_SCALAR_PANDAS_ITER_UDF: "PandasScalarIterUDFType" = 204
+    SQL_MAP_PANDAS_ITER_UDF: "PandasMapIterUDFType" = 205
+    SQL_COGROUPED_MAP_PANDAS_UDF: "PandasCogroupedMapUDFType" = 206
+    SQL_MAP_ARROW_ITER_UDF: "ArrowMapIterUDFType" = 207
 
 
-def portable_hash(x):
+def portable_hash(x: Hashable) -> int:
     """
     This function returns consistent hash code for builtin types, especially
     for None and tuple with None.
@@ -137,7 +193,11 @@ class BoundedFloat(float):
     100.0
     """
 
-    def __new__(cls, mean, confidence, low, high):
+    confidence: float
+    low: float
+    high: float
+
+    def __new__(cls, mean: float, confidence: float, low: float, high: float) -> "BoundedFloat":
         obj = float.__new__(cls, mean)
         obj.confidence = confidence
         obj.low = low
@@ -145,7 +205,7 @@ def __new__(cls, mean, confidence, low, high):
         return obj
 
 
-def _create_local_socket(sock_info):
+def _create_local_socket(sock_info: "JavaArray") -> "io.BufferedRWPair":
     """
     Create a local socket that can be used to load deserialized data from the JVM
 
@@ -158,8 +218,10 @@ def _create_local_socket(sock_info):
     -------
     sockfile file descriptor of the local socket
     """
-    port = sock_info[0]
-    auth_secret = sock_info[1]
+    sockfile: "io.BufferedRWPair"
+    sock: "socket.socket"
+    port: int = sock_info[0]
+    auth_secret: str = sock_info[1]
     sockfile, sock = local_connect_and_auth(port, auth_secret)
     # The RDD materialization time is unpredictable, if we set a timeout for socket reading
     # operation, it will very possibly fail. See SPARK-18281.
@@ -167,7 +229,7 @@ def _create_local_socket(sock_info):
     return sockfile
 
 
-def _load_from_socket(sock_info, serializer):
+def _load_from_socket(sock_info: "JavaArray", serializer: Serializer) -> Iterator[Any]:
     """
     Connect to a local socket described by sock_info and use the given serializer to yield data
 
@@ -188,18 +250,21 @@ def _load_from_socket(sock_info, serializer):
     return serializer.load_stream(sockfile)
 
 
-def _local_iterator_from_socket(sock_info, serializer):
+def _local_iterator_from_socket(sock_info: "JavaArray", serializer: Serializer) -> Iterator[Any]:
     class PyLocalIterable:
         """Create a synchronous local iterable over a socket"""
 
-        def __init__(self, _sock_info, _serializer):
+        def __init__(self, _sock_info: "JavaArray", _serializer: Serializer):
+            port: int
+            auth_secret: str
+            jsocket_auth_server: "JavaObject"
             port, auth_secret, self.jsocket_auth_server = _sock_info
             self._sockfile = _create_local_socket((port, auth_secret))
             self._serializer = _serializer
-            self._read_iter = iter([])  # Initialize as empty iterator
+            self._read_iter: Iterator[Any] = iter([])  # Initialize as empty iterator
             self._read_status = 1
 
-        def __iter__(self):
+        def __iter__(self) -> Iterator[Any]:
             while self._read_status == 1:
                 # Request next partition data from Java
                 write_int(1, self._sockfile)
@@ -218,7 +283,7 @@ def __iter__(self):
                 elif self._read_status == -1:
                     self.jsocket_auth_server.getResult()
 
-        def __del__(self):
+        def __del__(self) -> None:
             # If local iterator is not fully consumed,
             if self._read_status == 1:
                 try:
@@ -236,22 +301,22 @@ def __del__(self):
 
 
 class Partitioner:
-    def __init__(self, numPartitions, partitionFunc):
+    def __init__(self, numPartitions: int, partitionFunc: Callable[[Any], int]):
         self.numPartitions = numPartitions
         self.partitionFunc = partitionFunc
 
-    def __eq__(self, other):
+    def __eq__(self, other: Any) -> bool:
         return (
             isinstance(other, Partitioner)
             and self.numPartitions == other.numPartitions
             and self.partitionFunc == other.partitionFunc
         )
 
-    def __call__(self, k):
+    def __call__(self, k: Any) -> int:
         return self.partitionFunc(k) % self.numPartitions
 
 
-class RDD:
+class RDD(Generic[T_co]):
 
     """
     A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
@@ -259,7 +324,12 @@ class RDD:
     operated on in parallel.
     """
 
-    def __init__(self, jrdd, ctx, jrdd_deserializer=AutoBatchedSerializer(CPickleSerializer())):
+    def __init__(
+        self,
+        jrdd: "JavaObject",
+        ctx: "SparkContext",
+        jrdd_deserializer: Serializer = AutoBatchedSerializer(CPickleSerializer()),
+    ):
         self._jrdd = jrdd
         self.is_cached = False
         self.is_checkpointed = False
@@ -267,21 +337,21 @@ def __init__(self, jrdd, ctx, jrdd_deserializer=AutoBatchedSerializer(CPickleSer
         self.ctx = ctx
         self._jrdd_deserializer = jrdd_deserializer
         self._id = jrdd.id()
-        self.partitioner = None
+        self.partitioner: Optional[Partitioner] = None
 
-    def _pickled(self):
+    def _pickled(self: "RDD[T]") -> "RDD[T]":
         return self._reserialize(AutoBatchedSerializer(CPickleSerializer()))
 
-    def id(self):
+    def id(self) -> int:
         """
         A unique ID for this RDD (within its SparkContext).
         """
         return self._id
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return self._jrdd.toString()
 
-    def __getnewargs__(self):
+    def __getnewargs__(self) -> NoReturn:
         # This method is called when attempting to pickle an RDD, which is always an error:
         raise RuntimeError(
             "It appears that you are attempting to broadcast an RDD or reference an RDD from an "
@@ -293,13 +363,13 @@ def __getnewargs__(self):
         )
 
     @property
-    def context(self):
+    def context(self) -> "SparkContext":
         """
         The :class:`SparkContext` that this RDD was created on.
         """
         return self.ctx
 
-    def cache(self):
+    def cache(self: "RDD[T]") -> "RDD[T]":
         """
         Persist this RDD with the default storage level (`MEMORY_ONLY`).
         """
@@ -307,7 +377,7 @@ def cache(self):
         self.persist(StorageLevel.MEMORY_ONLY)
         return self
 
-    def persist(self, storageLevel=StorageLevel.MEMORY_ONLY):
+    def persist(self: "RDD[T]", storageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) -> "RDD[T]":
         """
         Set this RDD's storage level to persist its values across operations
         after the first time it is computed. This can only be used to assign
@@ -325,7 +395,7 @@ def persist(self, storageLevel=StorageLevel.MEMORY_ONLY):
         self._jrdd.persist(javaStorageLevel)
         return self
 
-    def unpersist(self, blocking=False):
+    def unpersist(self: "RDD[T]", blocking: bool = False) -> "RDD[T]":
         """
         Mark the RDD as non-persistent, and remove all blocks for it from
         memory and disk.
@@ -338,7 +408,7 @@ def unpersist(self, blocking=False):
         self._jrdd.unpersist(blocking)
         return self
 
-    def checkpoint(self):
+    def checkpoint(self) -> None:
         """
         Mark this RDD for checkpointing. It will be saved to a file inside the
         checkpoint directory set with :meth:`SparkContext.setCheckpointDir` and
@@ -350,13 +420,13 @@ def checkpoint(self):
         self.is_checkpointed = True
         self._jrdd.rdd().checkpoint()
 
-    def isCheckpointed(self):
+    def isCheckpointed(self) -> bool:
         """
         Return whether this RDD is checkpointed and materialized, either reliably or locally.
         """
         return self._jrdd.rdd().isCheckpointed()
 
-    def localCheckpoint(self):
+    def localCheckpoint(self) -> None:
         """
         Mark this RDD for local checkpointing using Spark's existing caching layer.
 
@@ -377,7 +447,7 @@ def localCheckpoint(self):
         """
         self._jrdd.rdd().localCheckpoint()
 
-    def isLocallyCheckpointed(self):
+    def isLocallyCheckpointed(self) -> bool:
         """
         Return whether this RDD is marked for local checkpointing.
 
@@ -385,17 +455,17 @@ def isLocallyCheckpointed(self):
         """
         return self._jrdd.rdd().isLocallyCheckpointed()
 
-    def getCheckpointFile(self):
+    def getCheckpointFile(self) -> Optional[str]:
         """
         Gets the name of the file to which this RDD was checkpointed
 
         Not defined if RDD is checkpointed locally.
         """
         checkpointFile = self._jrdd.rdd().getCheckpointFile()
-        if checkpointFile.isDefined():
-            return checkpointFile.get()
 
-    def map(self, f, preservesPartitioning=False):
+        return checkpointFile.get() if checkpointFile.isDefined() else None
+
+    def map(self: "RDD[T]", f: Callable[[T], U], preservesPartitioning: bool = False) -> "RDD[U]":
         """
         Return a new RDD by applying a function to each element of this RDD.
 
@@ -406,12 +476,14 @@ def map(self, f, preservesPartitioning=False):
         [('a', 1), ('b', 1), ('c', 1)]
         """
 
-        def func(_, iterator):
+        def func(_: int, iterator: Iterable[T]) -> Iterable[U]:
             return map(fail_on_stopiteration(f), iterator)
 
         return self.mapPartitionsWithIndex(func, preservesPartitioning)
 
-    def flatMap(self, f, preservesPartitioning=False):
+    def flatMap(
+        self: "RDD[T]", f: Callable[[T], Iterable[U]], preservesPartitioning: bool = False
+    ) -> "RDD[U]":
         """
         Return a new RDD by first applying a function to all elements of this
         RDD, and then flattening the results.
@@ -425,12 +497,14 @@ def flatMap(self, f, preservesPartitioning=False):
         [(2, 2), (2, 2), (3, 3), (3, 3), (4, 4), (4, 4)]
         """
 
-        def func(s, iterator):
+        def func(_: int, iterator: Iterable[T]) -> Iterable[U]:
             return chain.from_iterable(map(fail_on_stopiteration(f), iterator))
 
         return self.mapPartitionsWithIndex(func, preservesPartitioning)
 
-    def mapPartitions(self, f, preservesPartitioning=False):
+    def mapPartitions(
+        self: "RDD[T]", f: Callable[[Iterable[T]], Iterable[U]], preservesPartitioning: bool = False
+    ) -> "RDD[U]":
         """
         Return a new RDD by applying a function to each partition of this RDD.
 
@@ -442,12 +516,16 @@ def mapPartitions(self, f, preservesPartitioning=False):
         [3, 7]
         """
 
-        def func(s, iterator):
+        def func(_: int, iterator: Iterable[T]) -> Iterable[U]:
             return f(iterator)
 
         return self.mapPartitionsWithIndex(func, preservesPartitioning)
 
-    def mapPartitionsWithIndex(self, f, preservesPartitioning=False):
+    def mapPartitionsWithIndex(
+        self: "RDD[T]",
+        f: Callable[[int, Iterable[T]], Iterable[U]],
+        preservesPartitioning: bool = False,
+    ) -> "RDD[U]":
         """
         Return a new RDD by applying a function to each partition of this RDD,
         while tracking the index of the original partition.
@@ -461,7 +539,11 @@ def mapPartitionsWithIndex(self, f, preservesPartitioning=False):
         """
         return PipelinedRDD(self, f, preservesPartitioning)
 
-    def mapPartitionsWithSplit(self, f, preservesPartitioning=False):
+    def mapPartitionsWithSplit(
+        self: "RDD[T]",
+        f: Callable[[int, Iterable[T]], Iterable[U]],
+        preservesPartitioning: bool = False,
+    ) -> "RDD[U]":
         """
 
         Return a new RDD by applying a function to each partition of this RDD,
@@ -484,7 +566,7 @@ def mapPartitionsWithSplit(self, f, preservesPartitioning=False):
         )
         return self.mapPartitionsWithIndex(f, preservesPartitioning)
 
-    def getNumPartitions(self):
+    def getNumPartitions(self) -> int:
         """
         Returns the number of partitions in RDD
 
@@ -496,7 +578,7 @@ def getNumPartitions(self):
         """
         return self._jrdd.partitions().size()
 
-    def filter(self, f):
+    def filter(self: "RDD[T]", f: Callable[[T], bool]) -> "RDD[T]":
         """
         Return a new RDD containing only the elements that satisfy a predicate.
 
@@ -507,12 +589,12 @@ def filter(self, f):
         [2, 4]
         """
 
-        def func(iterator):
+        def func(iterator: Iterable[T]) -> Iterable[T]:
             return filter(fail_on_stopiteration(f), iterator)
 
         return self.mapPartitions(func, True)
 
-    def distinct(self, numPartitions=None):
+    def distinct(self: "RDD[T]", numPartitions: Optional[int] = None) -> "RDD[T]":
         """
         Return a new RDD containing the distinct elements in this RDD.
 
@@ -527,7 +609,9 @@ def distinct(self, numPartitions=None):
             .map(lambda x: x[0])
         )
 
-    def sample(self, withReplacement, fraction, seed=None):
+    def sample(
+        self: "RDD[T]", withReplacement: bool, fraction: float, seed: Optional[int] = None
+    ) -> "RDD[T]":
         """
         Return a sampled subset of this RDD.
 
@@ -556,7 +640,9 @@ def sample(self, withReplacement, fraction, seed=None):
         assert fraction >= 0.0, "Negative fraction value: %s" % fraction
         return self.mapPartitionsWithIndex(RDDSampler(withReplacement, fraction, seed).func, True)
 
-    def randomSplit(self, weights, seed=None):
+    def randomSplit(
+        self: "RDD[T]", weights: Sequence[Union[int, float]], seed: Optional[int] = None
+    ) -> "List[RDD[T]]":
         """
         Randomly splits this RDD with the provided weights.
 
@@ -593,7 +679,9 @@ def randomSplit(self, weights, seed=None):
         ]
 
     # this is ported from scala/spark/RDD.scala
-    def takeSample(self, withReplacement, num, seed=None):
+    def takeSample(
+        self: "RDD[T]", withReplacement: bool, num: int, seed: Optional[int] = None
+    ) -> List[T]:
         """
         Return a fixed-size sampled subset of this RDD.
 
@@ -651,7 +739,9 @@ def takeSample(self, withReplacement, num, seed=None):
         return samples[0:num]
 
     @staticmethod
-    def _computeFractionForSampleSize(sampleSizeLowerBound, total, withReplacement):
+    def _computeFractionForSampleSize(
+        sampleSizeLowerBound: int, total: int, withReplacement: bool
+    ) -> float:
         """
         Returns a sampling rate that guarantees a sample of
         size >= sampleSizeLowerBound 99.99% of the time.
@@ -683,7 +773,7 @@ def _computeFractionForSampleSize(sampleSizeLowerBound, total, withReplacement):
             gamma = -log(delta) / total
             return min(1, fraction + gamma + sqrt(gamma * gamma + 2 * gamma * fraction))
 
-    def union(self, other):
+    def union(self: "RDD[T]", other: "RDD[U]") -> "RDD[Union[T, U]]":
         """
         Return the union of this RDD and another one.
 
@@ -694,7 +784,9 @@ def union(self, other):
         [1, 1, 2, 3, 1, 1, 2, 3]
         """
         if self._jrdd_deserializer == other._jrdd_deserializer:
-            rdd = RDD(self._jrdd.union(other._jrdd), self.ctx, self._jrdd_deserializer)
+            rdd: "RDD[Union[T, U]]" = RDD(
+                self._jrdd.union(other._jrdd), self.ctx, self._jrdd_deserializer
+            )
         else:
             # These RDDs contain data in different serialized formats, so we
             # must normalize them to the default serializer.
@@ -708,7 +800,7 @@ def union(self, other):
             rdd.partitioner = self.partitioner
         return rdd
 
-    def intersection(self, other):
+    def intersection(self: "RDD[T]", other: "RDD[T]") -> "RDD[T]":
         """
         Return the intersection of this RDD and another one. The output will
         not contain any duplicate elements, even if the input RDDs did.
@@ -731,14 +823,14 @@ def intersection(self, other):
             .keys()
         )
 
-    def _reserialize(self, serializer=None):
+    def _reserialize(self: "RDD[T]", serializer: Optional[Serializer] = None) -> "RDD[T]":
         serializer = serializer or self.ctx.serializer
         if self._jrdd_deserializer != serializer:
             self = self.map(lambda x: x, preservesPartitioning=True)
             self._jrdd_deserializer = serializer
         return self
 
-    def __add__(self, other):
+    def __add__(self: "RDD[T]", other: "RDD[U]") -> "RDD[Union[T, U]]":
         """
         Return the union of this RDD and another one.
 
@@ -752,9 +844,43 @@ def __add__(self, other):
             raise TypeError
         return self.union(other)
 
+    @overload
     def repartitionAndSortWithinPartitions(
-        self, numPartitions=None, partitionFunc=portable_hash, ascending=True, keyfunc=lambda x: x
-    ):
+        self: "RDD[Tuple[S, V]]",
+        numPartitions: Optional[int] = ...,
+        partitionFunc: Callable[["S"], int] = ...,
+        ascending: bool = ...,
+    ) -> "RDD[Tuple[S, V]]":
+        ...
+
+    @overload
+    def repartitionAndSortWithinPartitions(
+        self: "RDD[Tuple[K, V]]",
+        numPartitions: Optional[int],
+        partitionFunc: Callable[[K], int],
+        ascending: bool,
+        keyfunc: Callable[[K], "S"],
+    ) -> "RDD[Tuple[K, V]]":
+        ...
+
+    @overload
+    def repartitionAndSortWithinPartitions(
+        self: "RDD[Tuple[K, V]]",
+        numPartitions: Optional[int] = ...,
+        partitionFunc: Callable[[K], int] = ...,
+        ascending: bool = ...,
+        *,
+        keyfunc: Callable[[K], "S"],
+    ) -> "RDD[Tuple[K, V]]":
+        ...
+
+    def repartitionAndSortWithinPartitions(
+        self: "RDD[Tuple[Any, Any]]",
+        numPartitions: Optional[int] = None,
+        partitionFunc: Callable[[Any], int] = portable_hash,
+        ascending: bool = True,
+        keyfunc: Callable[[Any], Any] = lambda x: x,
+    ) -> "RDD[Tuple[Any, Any]]":
         """
         Repartition the RDD according to the given partitioner and, within each resulting partition,
         sort records by their keys.
@@ -772,13 +898,45 @@ def repartitionAndSortWithinPartitions(
         memory = self._memory_limit()
         serializer = self._jrdd_deserializer
 
-        def sortPartition(iterator):
+        def sortPartition(iterator: Iterable[Tuple[K, V]]) -> Iterable[Tuple[K, V]]:
             sort = ExternalSorter(memory * 0.9, serializer).sorted
             return iter(sort(iterator, key=lambda k_v: keyfunc(k_v[0]), reverse=(not ascending)))
 
         return self.partitionBy(numPartitions, partitionFunc).mapPartitions(sortPartition, True)
 
-    def sortByKey(self, ascending=True, numPartitions=None, keyfunc=lambda x: x):
+    @overload
+    def sortByKey(
+        self: "RDD[Tuple[S, V]]",
+        ascending: bool = ...,
+        numPartitions: Optional[int] = ...,
+    ) -> "RDD[Tuple[K, V]]":
+        ...
+
+    @overload
+    def sortByKey(
+        self: "RDD[Tuple[K, V]]",
+        ascending: bool,
+        numPartitions: int,
+        keyfunc: Callable[[K], "S"],
+    ) -> "RDD[Tuple[K, V]]":
+        ...
+
+    @overload
+    def sortByKey(
+        self: "RDD[Tuple[K, V]]",
+        ascending: bool = ...,
+        numPartitions: Optional[int] = ...,
+        *,
+        keyfunc: Callable[[K], "S"],
+    ) -> "RDD[Tuple[K, V]]":
+        ...
+
+    def sortByKey(
+        self: "RDD[Tuple[K, V]]",
+        ascending: Optional[bool] = True,
+        numPartitions: Optional[int] = None,
+        keyfunc: Callable[[Any], Any] = lambda x: x,
+    ) -> "RDD[Tuple[K, V]]":
         """
         Sorts this RDD, which is assumed to consist of (key, value) pairs.
 
@@ -802,7 +960,7 @@ def sortByKey(self, ascending=True, numPartitions=None, keyfunc=lambda x: x):
         memory = self._memory_limit()
         serializer = self._jrdd_deserializer
 
-        def sortPartition(iterator):
+        def sortPartition(iterator: Iterable[Tuple[K, V]]) -> Iterable[Tuple[K, V]]:
             sort = ExternalSorter(memory * 0.9, serializer).sorted
             return iter(sort(iterator, key=lambda kv: keyfunc(kv[0]), reverse=(not ascending)))
 
@@ -829,16 +987,21 @@ def sortPartition(iterator):
             for i in range(0, numPartitions - 1)
         ]
 
-        def rangePartitioner(k):
+        def rangePartitioner(k: K) -> int:
             p = bisect.bisect_left(bounds, keyfunc(k))
             if ascending:
                 return p
             else:
-                return numPartitions - 1 - p
+                return numPartitions - 1 - p  # type: ignore[operator]
 
         return self.partitionBy(numPartitions, rangePartitioner).mapPartitions(sortPartition, True)
 
-    def sortBy(self, keyfunc, ascending=True, numPartitions=None):
+    def sortBy(
+        self: "RDD[T]",
+        keyfunc: Callable[[T], "S"],
+        ascending: bool = True,
+        numPartitions: Optional[int] = None,
+    ) -> "RDD[T]":
         """
         Sorts this RDD by the given keyfunc
 
@@ -850,9 +1013,13 @@ def sortBy(self, keyfunc, ascending=True, numPartitions=None):
         >>> sc.parallelize(tmp).sortBy(lambda x: x[1]).collect()
         [('a', 1), ('b', 2), ('1', 3), ('d', 4), ('2', 5)]
         """
-        return self.keyBy(keyfunc).sortByKey(ascending, numPartitions).values()
+        return (
+            self.keyBy(keyfunc)  # type: ignore[type-var]
+            .sortByKey(ascending, numPartitions)
+            .values()
+        )
 
-    def glom(self):
+    def glom(self: "RDD[T]") -> "RDD[List[T]]":
         """
         Return an RDD created by coalescing all elements within each partition
         into a list.
@@ -864,12 +1031,12 @@ def glom(self):
         [[1, 2], [3, 4]]
         """
 
-        def func(iterator):
+        def func(iterator: Iterable[T]) -> Iterable[List[T]]:
             yield list(iterator)
 
         return self.mapPartitions(func)
 
-    def cartesian(self, other):
+    def cartesian(self: "RDD[T]", other: "RDD[U]") -> "RDD[Tuple[T, U]]":
         """
         Return the Cartesian product of this RDD and another one, that is, the
         RDD of all pairs of elements ``(a, b)`` where ``a`` is in `self` and
@@ -885,7 +1052,12 @@ def cartesian(self, other):
         deserializer = CartesianDeserializer(self._jrdd_deserializer, other._jrdd_deserializer)
         return RDD(self._jrdd.cartesian(other._jrdd), self.ctx, deserializer)
 
-    def groupBy(self, f, numPartitions=None, partitionFunc=portable_hash):
+    def groupBy(
+        self: "RDD[T]",
+        f: Callable[[T], K],
+        numPartitions: Optional[int] = None,
+        partitionFunc: Callable[[K], int] = portable_hash,
+    ) -> "RDD[Tuple[K, Iterable[T]]]":
         """
         Return an RDD of grouped items.
 
@@ -898,7 +1070,9 @@ def groupBy(self, f, numPartitions=None, partitionFunc=portable_hash):
         """
         return self.map(lambda x: (f(x), x)).groupByKey(numPartitions, partitionFunc)
 
-    def pipe(self, command, env=None, checkCode=False):
+    def pipe(
+        self, command: str, env: Optional[Dict[str, str]] = None, checkCode: bool = False
+    ) -> "RDD[str]":
         """
         Return an RDD created by piping elements to a forked external process.
 
@@ -919,10 +1093,10 @@ def pipe(self, command, env=None, checkCode=False):
         if env is None:
             env = dict()
 
-        def func(iterator):
+        def func(iterator: Iterable[T]) -> Iterable[str]:
             pipe = Popen(shlex.split(command), env=env, stdin=PIPE, stdout=PIPE)
 
-            def pipe_objs(out):
+            def pipe_objs(out: IO[bytes]) -> None:
                 for obj in iterator:
                     s = str(obj).rstrip("\n") + "\n"
                     out.write(s.encode("utf-8"))
@@ -930,7 +1104,7 @@ def pipe_objs(out):
 
             Thread(target=pipe_objs, args=[pipe.stdin]).start()
 
-            def check_return_code():
+            def check_return_code() -> Iterable[int]:
                 pipe.wait()
                 if checkCode and pipe.returncode:
                     raise RuntimeError(
@@ -942,13 +1116,15 @@ def check_return_code():
                         yield i
 
             return (
-                x.rstrip(b"\n").decode("utf-8")
-                for x in chain(iter(pipe.stdout.readline, b""), check_return_code())
+                cast(bytes, x).rstrip(b"\n").decode("utf-8")
+                for x in chain(
+                    iter(cast(IO[bytes], pipe.stdout).readline, b""), check_return_code()
+                )
             )
 
         return self.mapPartitions(func)
 
-    def foreach(self, f):
+    def foreach(self: "RDD[T]", f: Callable[[T], None]) -> None:
         """
         Applies a function to all elements of this RDD.
 
@@ -959,14 +1135,14 @@ def foreach(self, f):
         """
         f = fail_on_stopiteration(f)
 
-        def processPartition(iterator):
+        def processPartition(iterator: Iterable[T]) -> Iterable[Any]:
             for x in iterator:
                 f(x)
             return iter([])
 
         self.mapPartitions(processPartition).count()  # Force evaluation
 
-    def foreachPartition(self, f):
+    def foreachPartition(self: "RDD[T]", f: Callable[[Iterable[T]], None]) -> None:
         """
         Applies a function to each partition of this RDD.
 
@@ -978,16 +1154,16 @@ def foreachPartition(self, f):
         >>> sc.parallelize([1, 2, 3, 4, 5]).foreachPartition(f)
         """
 
-        def func(it):
+        def func(it: Iterable[T]) -> Iterable[Any]:
             r = f(it)
             try:
-                return iter(r)
+                return iter(r)  # type: ignore[call-overload]
             except TypeError:
                 return iter([])
 
         self.mapPartitions(func).count()  # Force evaluation
 
-    def collect(self):
+    def collect(self: "RDD[T]") -> List[T]:
         """
         Return a list that contains all of the elements in this RDD.
 
@@ -997,10 +1173,13 @@ def collect(self):
         to be small, as all the data is loaded into the driver's memory.
         """
         with SCCallSiteSync(self.context):
+            assert self.ctx._jvm is not None
             sock_info = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
         return list(_load_from_socket(sock_info, self._jrdd_deserializer))
 
-    def collectWithJobGroup(self, groupId, description, interruptOnCancel=False):
+    def collectWithJobGroup(
+        self: "RDD[T]", groupId: str, description: str, interruptOnCancel: bool = False
+    ) -> "List[T]":
         """
         When collect rdd, use this method to specify job group.
 
@@ -1015,12 +1194,13 @@ def collectWithJobGroup(self, groupId, description, interruptOnCancel=False):
         )
 
         with SCCallSiteSync(self.context):
+            assert self.ctx._jvm is not None
             sock_info = self.ctx._jvm.PythonRDD.collectAndServeWithJobGroup(
                 self._jrdd.rdd(), groupId, description, interruptOnCancel
             )
         return list(_load_from_socket(sock_info, self._jrdd_deserializer))
 
-    def reduce(self, f):
+    def reduce(self: "RDD[T]", f: Callable[[T, T], T]) -> T:
         """
         Reduces the elements of this RDD using the specified commutative and
         associative binary operator. Currently reduces partitions locally.
@@ -1039,7 +1219,7 @@ def reduce(self, f):
         """
         f = fail_on_stopiteration(f)
 
-        def func(iterator):
+        def func(iterator: Iterable[T]) -> Iterable[T]:
             iterator = iter(iterator)
             try:
                 initial = next(iterator)
@@ -1052,7 +1232,7 @@ def func(iterator):
             return reduce(f, vals)
         raise ValueError("Can not reduce() empty RDD")
 
-    def treeReduce(self, f, depth=2):
+    def treeReduce(self: "RDD[T]", f: Callable[[T, T], T], depth: int = 2) -> T:
         """
         Reduces the elements of this RDD in a multi-level tree pattern.
 
@@ -1080,22 +1260,26 @@ def treeReduce(self, f, depth=2):
         if depth < 1:
             raise ValueError("Depth cannot be smaller than 1 but got %d." % depth)
 
-        zeroValue = None, True  # Use the second entry to indicate whether this is a dummy value.
+        # Use the second entry to indicate whether this is a dummy value.
+        zeroValue: Tuple[T, bool] = (  # type: ignore[assignment]
+            None,
+            True,
+        )
 
-        def op(x, y):
+        def op(x: Tuple[T, bool], y: Tuple[T, bool]) -> Tuple[T, bool]:
             if x[1]:
                 return y
             elif y[1]:
                 return x
             else:
-                return f(x[0], y[0]), False
+                return f(x[0], y[0]), False  # type: ignore[arg-type]
 
         reduced = self.map(lambda x: (x, False)).treeAggregate(zeroValue, op, op, depth)
         if reduced[1]:
             raise ValueError("Cannot reduce empty RDD.")
         return reduced[0]
 
-    def fold(self, zeroValue, op):
+    def fold(self: "RDD[T]", zeroValue: T, op: Callable[[T, T], T]) -> T:
         """
         Aggregate the elements of each partition, and then the results for all
         the partitions, using a given associative function and a neutral "zero value."
@@ -1120,7 +1304,7 @@ def fold(self, zeroValue, op):
         """
         op = fail_on_stopiteration(op)
 
-        def func(iterator):
+        def func(iterator: Iterable[T]) -> Iterable[T]:
             acc = zeroValue
             for obj in iterator:
                 acc = op(acc, obj)
@@ -1132,7 +1316,9 @@ def func(iterator):
         vals = self.mapPartitions(func).collect()
         return reduce(op, vals, zeroValue)
 
-    def aggregate(self, zeroValue, seqOp, combOp):
+    def aggregate(
+        self: "RDD[T]", zeroValue: U, seqOp: Callable[[U, T], U], combOp: Callable[[U, U], U]
+    ) -> U:
         """
         Aggregate the elements of each partition, and then the results for all
         the partitions, using a given combine functions and a neutral "zero
@@ -1158,7 +1344,7 @@ def aggregate(self, zeroValue, seqOp, combOp):
         seqOp = fail_on_stopiteration(seqOp)
         combOp = fail_on_stopiteration(combOp)
 
-        def func(iterator):
+        def func(iterator: Iterable[T]) -> Iterable[U]:
             acc = zeroValue
             for obj in iterator:
                 acc = seqOp(acc, obj)
@@ -1170,7 +1356,13 @@ def func(iterator):
         vals = self.mapPartitions(func).collect()
         return reduce(combOp, vals, zeroValue)
 
-    def treeAggregate(self, zeroValue, seqOp, combOp, depth=2):
+    def treeAggregate(
+        self: "RDD[T]",
+        zeroValue: U,
+        seqOp: Callable[[U, T], U],
+        combOp: Callable[[U, U], U],
+        depth: int = 2,
+    ) -> U:
         """
         Aggregates the elements of this RDD in a multi-level tree
         pattern.
@@ -1199,7 +1391,7 @@ def treeAggregate(self, zeroValue, seqOp, combOp, depth=2):
         if self.getNumPartitions() == 0:
             return zeroValue
 
-        def aggregatePartition(iterator):
+        def aggregatePartition(iterator: Iterable[T]) -> Iterable[U]:
             acc = zeroValue
             for obj in iterator:
                 acc = seqOp(acc, obj)
@@ -1211,10 +1403,10 @@ def aggregatePartition(iterator):
         # If creating an extra level doesn't help reduce the wall-clock time, we stop the tree
         # aggregation.
         while numPartitions > scale + numPartitions / scale:
-            numPartitions /= scale
+            numPartitions /= scale  # type: ignore[assignment]
             curNumPartitions = int(numPartitions)
 
-            def mapPartition(i, iterator):
+            def mapPartition(i: int, iterator: Iterable[U]) -> Iterable[Tuple[int, U]]:
                 for obj in iterator:
                     yield (i % curNumPartitions, obj)
 
@@ -1226,7 +1418,15 @@ def mapPartition(i, iterator):
 
         return partiallyAggregated.reduce(combOp)
 
-    def max(self, key=None):
+    @overload
+    def max(self: "RDD[S]") -> "S":
+        ...
+
+    @overload
+    def max(self: "RDD[T]", key: Callable[[T], "S"]) -> T:
+        ...
+
+    def max(self: "RDD[T]", key: Optional[Callable[[T], "S"]] = None) -> T:
         """
         Find the maximum item in this RDD.
 
@@ -1244,10 +1444,18 @@ def max(self, key=None):
         5.0
         """
         if key is None:
-            return self.reduce(max)
-        return self.reduce(lambda a, b: max(a, b, key=key))
+            return self.reduce(max)  # type: ignore[arg-type]
+        return self.reduce(lambda a, b: max(a, b, key=key))  # type: ignore[arg-type]
 
-    def min(self, key=None):
+    @overload
+    def min(self: "RDD[S]") -> "S":
+        ...
+
+    @overload
+    def min(self: "RDD[T]", key: Callable[[T], "S"]) -> T:
+        ...
+
+    def min(self: "RDD[T]", key: Optional[Callable[[T], "S"]] = None) -> T:
         """
         Find the minimum item in this RDD.
 
@@ -1265,10 +1473,10 @@ def min(self, key=None):
         10.0
         """
         if key is None:
-            return self.reduce(min)
-        return self.reduce(lambda a, b: min(a, b, key=key))
+            return self.reduce(min)  # type: ignore[arg-type]
+        return self.reduce(lambda a, b: min(a, b, key=key))  # type: ignore[arg-type]
 
-    def sum(self):
+    def sum(self: "RDD[NumberOrArray]") -> "NumberOrArray":
         """
         Add up the elements in this RDD.
 
@@ -1277,9 +1485,11 @@ def sum(self):
         >>> sc.parallelize([1.0, 2.0, 3.0]).sum()
         6.0
         """
-        return self.mapPartitions(lambda x: [sum(x)]).fold(0, operator.add)
+        return self.mapPartitions(lambda x: [sum(x)]).fold(  # type: ignore[return-value]
+            0, operator.add
+        )
 
-    def count(self):
+    def count(self) -> int:
         """
         Return the number of elements in this RDD.
 
@@ -1290,18 +1500,22 @@ def count(self):
         """
         return self.mapPartitions(lambda i: [sum(1 for _ in i)]).sum()
 
-    def stats(self):
+    def stats(self: "RDD[NumberOrArray]") -> StatCounter:
         """
         Return a :class:`StatCounter` object that captures the mean, variance
         and count of the RDD's elements in one operation.
         """
 
-        def redFunc(left_counter, right_counter):
+        def redFunc(left_counter: StatCounter, right_counter: StatCounter) -> StatCounter:
             return left_counter.mergeStats(right_counter)
 
-        return self.mapPartitions(lambda i: [StatCounter(i)]).reduce(redFunc)
+        return self.mapPartitions(lambda i: [StatCounter(i)]).reduce(  # type: ignore[arg-type]
+            redFunc
+        )
 
-    def histogram(self, buckets):
+    def histogram(
+        self: "RDD[S]", buckets: Union[int, List["S"], Tuple["S", ...]]
+    ) -> Tuple[Sequence["S"], List[int]]:
         """
         Compute a histogram using the provided buckets. The buckets
         are all open to the right except for the last which is closed.
@@ -1345,7 +1559,7 @@ def histogram(self, buckets):
                 raise ValueError("number of buckets must be >= 1")
 
             # filter out non-comparable elements
-            def comparable(x):
+            def comparable(x: Any) -> bool:
                 if x is None:
                     return False
                 if type(x) is float and isnan(x):
@@ -1355,7 +1569,7 @@ def comparable(x):
             filtered = self.filter(comparable)
 
             # faster than stats()
-            def minmax(a, b):
+            def minmax(a: Tuple["S", "S"], b: Tuple["S", "S"]) -> Tuple["S", "S"]:
                 return min(a[0], b[0]), max(a[1], b[1])
 
             try:
@@ -1369,7 +1583,7 @@ def minmax(a, b):
                 return [minv, maxv], [filtered.count()]
 
             try:
-                inc = (maxv - minv) / buckets
+                inc = (maxv - minv) / buckets  # type: ignore[operator]
             except TypeError:
                 raise TypeError("Can not generate buckets with non-number in RDD")
 
@@ -1378,8 +1592,8 @@ def minmax(a, b):
 
             # keep them as integer if possible
             inc = int(inc)
-            if inc * buckets != maxv - minv:
-                inc = (maxv - minv) * 1.0 / buckets
+            if inc * buckets != maxv - minv:  # type: ignore[operator]
+                inc = (maxv - minv) * 1.0 / buckets  # type: ignore[operator]
 
             buckets = [i * inc + minv for i in range(buckets)]
             buckets.append(maxv)  # fix accumulated error
@@ -1403,35 +1617,47 @@ def minmax(a, b):
             even = False
             inc = None
             try:
-                steps = [buckets[i + 1] - buckets[i] for i in range(len(buckets) - 1)]
+                steps = [
+                    buckets[i + 1] - buckets[i]  # type: ignore[operator]
+                    for i in range(len(buckets) - 1)
+                ]
             except TypeError:
                 pass  # objects in buckets do not support '-'
             else:
                 if max(steps) - min(steps) < 1e-10:  # handle precision errors
                     even = True
-                    inc = (maxv - minv) / (len(buckets) - 1)
+                    inc = (maxv - minv) / (len(buckets) - 1)  # type: ignore[operator]
 
         else:
             raise TypeError("buckets should be a list or tuple or number(int or long)")
 
-        def histogram(iterator):
-            counters = [0] * len(buckets)
+        def histogram(iterator: Iterable["S"]) -> Iterable[List[int]]:
+            counters = [0] * len(buckets)  # type: ignore[arg-type]
             for i in iterator:
-                if i is None or (type(i) is float and isnan(i)) or i > maxv or i < minv:
+                if (
+                    i is None
+                    or (isinstance(i, float) and isnan(i))  # type: ignore[arg-type]
+                    or i > maxv
+                    or i < minv
+                ):
                     continue
-                t = int((i - minv) / inc) if even else bisect.bisect_right(buckets, i) - 1
+                t = (
+                    int((i - minv) / inc)  # type: ignore[operator]
+                    if even
+                    else bisect.bisect_right(buckets, i) - 1  # type: ignore[arg-type]
+                )
                 counters[t] += 1
             # add last two together
             last = counters.pop()
             counters[-1] += last
             return [counters]
 
-        def mergeCounters(a, b):
+        def mergeCounters(a: List[int], b: List[int]) -> List[int]:
             return [i + j for i, j in zip(a, b)]
 
         return buckets, self.mapPartitions(histogram).reduce(mergeCounters)
 
-    def mean(self):
+    def mean(self: "RDD[NumberOrArray]") -> "NumberOrArray":
         """
         Compute the mean of this RDD's elements.
 
@@ -1440,9 +1666,9 @@ def mean(self):
         >>> sc.parallelize([1, 2, 3]).mean()
         2.0
         """
-        return self.stats().mean()
+        return self.stats().mean()  # type: ignore[return-value]
 
-    def variance(self):
+    def variance(self: "RDD[NumberOrArray]") -> "NumberOrArray":
         """
         Compute the variance of this RDD's elements.
 
@@ -1451,9 +1677,9 @@ def variance(self):
         >>> sc.parallelize([1, 2, 3]).variance()
         0.666...
         """
-        return self.stats().variance()
+        return self.stats().variance()  # type: ignore[return-value]
 
-    def stdev(self):
+    def stdev(self: "RDD[NumberOrArray]") -> "NumberOrArray":
         """
         Compute the standard deviation of this RDD's elements.
 
@@ -1462,9 +1688,9 @@ def stdev(self):
         >>> sc.parallelize([1, 2, 3]).stdev()
         0.816...
         """
-        return self.stats().stdev()
+        return self.stats().stdev()  # type: ignore[return-value]
 
-    def sampleStdev(self):
+    def sampleStdev(self: "RDD[NumberOrArray]") -> "NumberOrArray":
         """
         Compute the sample standard deviation of this RDD's elements (which
         corrects for bias in estimating the standard deviation by dividing by
@@ -1475,9 +1701,9 @@ def sampleStdev(self):
         >>> sc.parallelize([1, 2, 3]).sampleStdev()
         1.0
         """
-        return self.stats().sampleStdev()
+        return self.stats().sampleStdev()  # type: ignore[return-value]
 
-    def sampleVariance(self):
+    def sampleVariance(self: "RDD[NumberOrArray]") -> "NumberOrArray":
         """
         Compute the sample variance of this RDD's elements (which corrects
         for bias in estimating the variance by dividing by N-1 instead of N).
@@ -1487,9 +1713,9 @@ def sampleVariance(self):
         >>> sc.parallelize([1, 2, 3]).sampleVariance()
         1.0
         """
-        return self.stats().sampleVariance()
+        return self.stats().sampleVariance()  # type: ignore[return-value]
 
-    def countByValue(self):
+    def countByValue(self: "RDD[K]") -> Dict[K, int]:
         """
         Return the count of each unique value in this RDD as a dictionary of
         (value, count) pairs.
@@ -1500,20 +1726,28 @@ def countByValue(self):
         [(1, 2), (2, 3)]
         """
 
-        def countPartition(iterator):
-            counts = defaultdict(int)
+        def countPartition(iterator: Iterable[K]) -> Iterable[Dict[K, int]]:
+            counts: Dict[K, int] = defaultdict(int)
             for obj in iterator:
                 counts[obj] += 1
             yield counts
 
-        def mergeMaps(m1, m2):
+        def mergeMaps(m1: Dict[K, int], m2: Dict[K, int]) -> Dict[K, int]:
             for k, v in m2.items():
                 m1[k] += v
             return m1
 
         return self.mapPartitions(countPartition).reduce(mergeMaps)
 
-    def top(self, num, key=None):
+    @overload
+    def top(self: "RDD[S]", num: int) -> List["S"]:
+        ...
+
+    @overload
+    def top(self: "RDD[T]", num: int, key: Callable[[T], "S"]) -> List[T]:
+        ...
+
+    def top(self: "RDD[T]", num: int, key: Optional[Callable[[T], "S"]] = None) -> List[T]:
         """
         Get the top N elements from an RDD.
 
@@ -1534,15 +1768,23 @@ def top(self, num, key=None):
         [4, 3, 2]
         """
 
-        def topIterator(iterator):
+        def topIterator(iterator: Iterable[T]) -> Iterable[List[T]]:
             yield heapq.nlargest(num, iterator, key=key)
 
-        def merge(a, b):
+        def merge(a: List[T], b: List[T]) -> List[T]:
             return heapq.nlargest(num, a + b, key=key)
 
         return self.mapPartitions(topIterator).reduce(merge)
 
-    def takeOrdered(self, num, key=None):
+    @overload
+    def takeOrdered(self: "RDD[S]", num: int) -> List["S"]:
+        ...
+
+    @overload
+    def takeOrdered(self: "RDD[T]", num: int, key: Callable[[T], "S"]) -> List[T]:
+        ...
+
+    def takeOrdered(self: "RDD[T]", num: int, key: Optional[Callable[[T], "S"]] = None) -> List[T]:
         """
         Get the N elements from an RDD ordered in ascending order or as
         specified by the optional key function.
@@ -1560,12 +1802,12 @@ def takeOrdered(self, num, key=None):
         [10, 9, 7, 6, 5, 4]
         """
 
-        def merge(a, b):
+        def merge(a: List[T], b: List[T]) -> List[T]:
             return heapq.nsmallest(num, a + b, key)
 
         return self.mapPartitions(lambda it: [heapq.nsmallest(num, it, key)]).reduce(merge)
 
-    def take(self, num):
+    def take(self: "RDD[T]", num: int) -> List[T]:
         """
         Take the first num elements of the RDD.
 
@@ -1589,7 +1831,7 @@ def take(self, num):
         >>> sc.parallelize(range(100), 100).filter(lambda x: x > 90).take(3)
         [91, 92, 93]
         """
-        items = []
+        items: List[T] = []
         totalParts = self.getNumPartitions()
         partsScanned = 0
 
@@ -1612,7 +1854,7 @@ def take(self, num):
 
             left = num - len(items)
 
-            def takeUpToNumLeft(iterator):
+            def takeUpToNumLeft(iterator: Iterable[T]) -> Iterable[T]:
                 iterator = iter(iterator)
                 taken = 0
                 while taken < left:
@@ -1630,7 +1872,7 @@ def takeUpToNumLeft(iterator):
 
         return items[:num]
 
-    def first(self):
+    def first(self: "RDD[T]") -> T:
         """
         Return the first element in this RDD.
 
@@ -1648,7 +1890,7 @@ def first(self):
             return rs[0]
         raise ValueError("RDD is empty")
 
-    def isEmpty(self):
+    def isEmpty(self) -> bool:
         """
         Returns true if and only if the RDD contains no elements at all.
 
@@ -1665,7 +1907,12 @@ def isEmpty(self):
         """
         return self.getNumPartitions() == 0 or len(self.take(1)) == 0
 
-    def saveAsNewAPIHadoopDataset(self, conf, keyConverter=None, valueConverter=None):
+    def saveAsNewAPIHadoopDataset(
+        self: "RDD[Tuple[K, V]]",
+        conf: Dict[str, str],
+        keyConverter: Optional[str] = None,
+        valueConverter: Optional[str] = None,
+    ) -> None:
         """
         Output a Python RDD of key-value pairs (of form ``RDD[(K, V)]``) to any Hadoop file
         system, using the new Hadoop OutputFormat API (mapreduce package). Keys/values are
@@ -1683,20 +1930,22 @@ def saveAsNewAPIHadoopDataset(self, conf, keyConverter=None, valueConverter=None
         """
         jconf = self.ctx._dictToJavaMap(conf)
         pickledRDD = self._pickled()
+        assert self.ctx._jvm is not None
+
         self.ctx._jvm.PythonRDD.saveAsHadoopDataset(
             pickledRDD._jrdd, True, jconf, keyConverter, valueConverter, True
         )
 
     def saveAsNewAPIHadoopFile(
-        self,
-        path,
-        outputFormatClass,
-        keyClass=None,
-        valueClass=None,
-        keyConverter=None,
-        valueConverter=None,
-        conf=None,
-    ):
+        self: "RDD[Tuple[K, V]]",
+        path: str,
+        outputFormatClass: str,
+        keyClass: Optional[str] = None,
+        valueClass: Optional[str] = None,
+        keyConverter: Optional[str] = None,
+        valueConverter: Optional[str] = None,
+        conf: Optional[Dict[str, str]] = None,
+    ) -> None:
         """
         Output a Python RDD of key-value pairs (of form ``RDD[(K, V)]``) to any Hadoop file
         system, using the new Hadoop OutputFormat API (mapreduce package). Key and value types
@@ -1725,6 +1974,8 @@ def saveAsNewAPIHadoopFile(
         """
         jconf = self.ctx._dictToJavaMap(conf)
         pickledRDD = self._pickled()
+        assert self.ctx._jvm is not None
+
         self.ctx._jvm.PythonRDD.saveAsNewAPIHadoopFile(
             pickledRDD._jrdd,
             True,
@@ -1737,7 +1988,12 @@ def saveAsNewAPIHadoopFile(
             jconf,
         )
 
-    def saveAsHadoopDataset(self, conf, keyConverter=None, valueConverter=None):
+    def saveAsHadoopDataset(
+        self: "RDD[Tuple[K, V]]",
+        conf: Dict[str, str],
+        keyConverter: Optional[str] = None,
+        valueConverter: Optional[str] = None,
+    ) -> None:
         """
         Output a Python RDD of key-value pairs (of form ``RDD[(K, V)]``) to any Hadoop file
         system, using the old Hadoop OutputFormat API (mapred package). Keys/values are
@@ -1755,21 +2011,23 @@ def saveAsHadoopDataset(self, conf, keyConverter=None, valueConverter=None):
         """
         jconf = self.ctx._dictToJavaMap(conf)
         pickledRDD = self._pickled()
+        assert self.ctx._jvm is not None
+
         self.ctx._jvm.PythonRDD.saveAsHadoopDataset(
             pickledRDD._jrdd, True, jconf, keyConverter, valueConverter, False
         )
 
     def saveAsHadoopFile(
-        self,
-        path,
-        outputFormatClass,
-        keyClass=None,
-        valueClass=None,
-        keyConverter=None,
-        valueConverter=None,
-        conf=None,
-        compressionCodecClass=None,
-    ):
+        self: "RDD[Tuple[K, V]]",
+        path: str,
+        outputFormatClass: str,
+        keyClass: Optional[str] = None,
+        valueClass: Optional[str] = None,
+        keyConverter: Optional[str] = None,
+        valueConverter: Optional[str] = None,
+        conf: Optional[Dict[str, str]] = None,
+        compressionCodecClass: Optional[str] = None,
+    ) -> None:
         """
         Output a Python RDD of key-value pairs (of form ``RDD[(K, V)]``) to any Hadoop file
         system, using the old Hadoop OutputFormat API (mapred package). Key and value types
@@ -1803,6 +2061,8 @@ def saveAsHadoopFile(
         """
         jconf = self.ctx._dictToJavaMap(conf)
         pickledRDD = self._pickled()
+        assert self.ctx._jvm is not None
+
         self.ctx._jvm.PythonRDD.saveAsHadoopFile(
             pickledRDD._jrdd,
             True,
@@ -1816,7 +2076,9 @@ def saveAsHadoopFile(
             compressionCodecClass,
         )
 
-    def saveAsSequenceFile(self, path, compressionCodecClass=None):
+    def saveAsSequenceFile(
+        self: "RDD[Tuple[K, V]]", path: str, compressionCodecClass: Optional[str] = None
+    ) -> None:
         """
         Output a Python RDD of key-value pairs (of form ``RDD[(K, V)]``) to any Hadoop file
         system, using the "org.apache.hadoop.io.Writable" types that we convert from the
@@ -1834,11 +2096,13 @@ def saveAsSequenceFile(self, path, compressionCodecClass=None):
             i.e. "org.apache.hadoop.io.compress.GzipCodec" (None by default)
         """
         pickledRDD = self._pickled()
+        assert self.ctx._jvm is not None
+
         self.ctx._jvm.PythonRDD.saveAsSequenceFile(
             pickledRDD._jrdd, True, path, compressionCodecClass
         )
 
-    def saveAsPickleFile(self, path, batchSize=10):
+    def saveAsPickleFile(self, path: str, batchSize: int = 10) -> None:
         """
         Save this RDD as a SequenceFile of serialized objects. The serializer
         used is :class:`pyspark.serializers.CPickleSerializer`, default batch size
@@ -1853,13 +2117,14 @@ def saveAsPickleFile(self, path, batchSize=10):
         >>> sorted(sc.pickleFile(tmpFile.name, 5).map(str).collect())
         ['1', '2', 'rdd', 'spark']
         """
+        ser: Serializer
         if batchSize == 0:
             ser = AutoBatchedSerializer(CPickleSerializer())
         else:
             ser = BatchedSerializer(CPickleSerializer(), batchSize)
         self._reserialize(ser)._jrdd.saveAsObjectFile(path)
 
-    def saveAsTextFile(self, path, compressionCodecClass=None):
+    def saveAsTextFile(self, path: str, compressionCodecClass: Optional[str] = None) -> None:
         """
         Save this RDD as a text file, using string representations of elements.
 
@@ -1904,16 +2169,20 @@ def saveAsTextFile(self, path, compressionCodecClass=None):
         'bar\\nfoo\\n'
         """
 
-        def func(split, iterator):
+        def func(split: int, iterator: Iterable[Any]) -> Iterable[bytes]:
             for x in iterator:
-                if not isinstance(x, (str, bytes)):
-                    x = str(x)
-                if isinstance(x, str):
-                    x = x.encode("utf-8")
-                yield x
+                if isinstance(x, bytes):
+                    yield x
+                elif isinstance(x, str):
+                    yield x.encode("utf-8")
+                else:
+                    yield str(x).encode("utf-8")
 
         keyed = self.mapPartitionsWithIndex(func)
-        keyed._bypass_serializer = True
+        keyed._bypass_serializer = True  # type: ignore[attr-defined]
+
+        assert self.ctx._jvm is not None
+
         if compressionCodecClass:
             compressionCodec = self.ctx._jvm.java.lang.Class.forName(compressionCodecClass)
             keyed._jrdd.map(self.ctx._jvm.BytesToString()).saveAsTextFile(path, compressionCodec)
@@ -1922,7 +2191,7 @@ def func(split, iterator):
 
     # Pair functions
 
-    def collectAsMap(self):
+    def collectAsMap(self: "RDD[Tuple[K, V]]") -> Dict[K, V]:
         """
         Return the key-value pairs in this RDD to the master as a dictionary.
 
@@ -1941,7 +2210,7 @@ def collectAsMap(self):
         """
         return dict(self.collect())
 
-    def keys(self):
+    def keys(self: "RDD[Tuple[K, V]]") -> "RDD[K]":
         """
         Return an RDD with the keys of each tuple.
 
@@ -1953,7 +2222,7 @@ def keys(self):
         """
         return self.map(lambda x: x[0])
 
-    def values(self):
+    def values(self: "RDD[Tuple[K, V]]") -> "RDD[V]":
         """
         Return an RDD with the values of each tuple.
 
@@ -1965,7 +2234,12 @@ def values(self):
         """
         return self.map(lambda x: x[1])
 
-    def reduceByKey(self, func, numPartitions=None, partitionFunc=portable_hash):
+    def reduceByKey(
+        self: "RDD[Tuple[K, V]]",
+        func: Callable[[V, V], V],
+        numPartitions: Optional[int] = None,
+        partitionFunc: Callable[[K], int] = portable_hash,
+    ) -> "RDD[Tuple[K, V]]":
         """
         Merge the values for each key using an associative and commutative reduce function.
 
@@ -1985,7 +2259,7 @@ def reduceByKey(self, func, numPartitions=None, partitionFunc=portable_hash):
         """
         return self.combineByKey(lambda x: x, func, func, numPartitions, partitionFunc)
 
-    def reduceByKeyLocally(self, func):
+    def reduceByKeyLocally(self: "RDD[Tuple[K, V]]", func: Callable[[V, V], V]) -> Dict[K, V]:
         """
         Merge the values for each key using an associative and commutative reduce function, but
         return the results immediately to the master as a dictionary.
@@ -2002,20 +2276,20 @@ def reduceByKeyLocally(self, func):
         """
         func = fail_on_stopiteration(func)
 
-        def reducePartition(iterator):
-            m = {}
+        def reducePartition(iterator: Iterable[Tuple[K, V]]) -> Iterable[Dict[K, V]]:
+            m: Dict[K, V] = {}
             for k, v in iterator:
                 m[k] = func(m[k], v) if k in m else v
             yield m
 
-        def mergeMaps(m1, m2):
+        def mergeMaps(m1: Dict[K, V], m2: Dict[K, V]) -> Dict[K, V]:
             for k, v in m2.items():
                 m1[k] = func(m1[k], v) if k in m1 else v
             return m1
 
         return self.mapPartitions(reducePartition).reduce(mergeMaps)
 
-    def countByKey(self):
+    def countByKey(self: "RDD[Tuple[K, V]]") -> Dict[K, int]:
         """
         Count the number of elements for each key, and return the result to the
         master as a dictionary.
@@ -2028,7 +2302,11 @@ def countByKey(self):
         """
         return self.map(lambda x: x[0]).countByValue()
 
-    def join(self, other, numPartitions=None):
+    def join(
+        self: "RDD[Tuple[K, V]]",
+        other: "RDD[Tuple[K, U]]",
+        numPartitions: Optional[int] = None,
+    ) -> "RDD[Tuple[K, Tuple[V, U]]]":
         """
         Return an RDD containing all pairs of elements with matching keys in
         `self` and `other`.
@@ -2047,7 +2325,11 @@ def join(self, other, numPartitions=None):
         """
         return python_join(self, other, numPartitions)
 
-    def leftOuterJoin(self, other, numPartitions=None):
+    def leftOuterJoin(
+        self: "RDD[Tuple[K, V]]",
+        other: "RDD[Tuple[K, U]]",
+        numPartitions: Optional[int] = None,
+    ) -> "RDD[Tuple[K, Tuple[V, Optional[U]]]]":
         """
         Perform a left outer join of `self` and `other`.
 
@@ -2066,7 +2348,11 @@ def leftOuterJoin(self, other, numPartitions=None):
         """
         return python_left_outer_join(self, other, numPartitions)
 
-    def rightOuterJoin(self, other, numPartitions=None):
+    def rightOuterJoin(
+        self: "RDD[Tuple[K, V]]",
+        other: "RDD[Tuple[K, U]]",
+        numPartitions: Optional[int] = None,
+    ) -> "RDD[Tuple[K, Tuple[Optional[V], U]]]":
         """
         Perform a right outer join of `self` and `other`.
 
@@ -2085,7 +2371,11 @@ def rightOuterJoin(self, other, numPartitions=None):
         """
         return python_right_outer_join(self, other, numPartitions)
 
-    def fullOuterJoin(self, other, numPartitions=None):
+    def fullOuterJoin(
+        self: "RDD[Tuple[K, V]]",
+        other: "RDD[Tuple[K, U]]",
+        numPartitions: Optional[int] = None,
+    ) -> "RDD[Tuple[K, Tuple[Optional[V], Optional[U]]]]":
         """
         Perform a right outer join of `self` and `other`.
 
@@ -2111,7 +2401,11 @@ def fullOuterJoin(self, other, numPartitions=None):
     # TODO: add option to control map-side combining
     # portable_hash is used as default, because builtin hash of None is different
     # cross machines.
-    def partitionBy(self, numPartitions, partitionFunc=portable_hash):
+    def partitionBy(
+        self: "RDD[Tuple[K, V]]",
+        numPartitions: Optional[int],
+        partitionFunc: Callable[[K], int] = portable_hash,
+    ) -> "RDD[Tuple[K, V]]":
         """
         Return a copy of the RDD partitioned using the specified partitioner.
 
@@ -2138,13 +2432,13 @@ def partitionBy(self, numPartitions, partitionFunc=portable_hash):
 
         limit = self._memory_limit() / 2
 
-        def add_shuffle_key(split, iterator):
+        def add_shuffle_key(split: int, iterator: Iterable[Tuple[K, V]]) -> Iterable[bytes]:
 
             buckets = defaultdict(list)
-            c, batch = 0, min(10 * numPartitions, 1000)
+            c, batch = 0, min(10 * numPartitions, 1000)  # type: ignore[operator]
 
             for k, v in iterator:
-                buckets[partitionFunc(k) % numPartitions].append((k, v))
+                buckets[partitionFunc(k) % numPartitions].append((k, v))  # type: ignore[operator]
                 c += 1
 
                 # check used memory and avg size of chunk of objects
@@ -2160,7 +2454,7 @@ def add_shuffle_key(split, iterator):
                     avg = int(size / n) >> 20
                     # let 1M < avg < 10M
                     if avg < 1:
-                        batch = min(sys.maxsize, batch * 1.5)
+                        batch = min(sys.maxsize, batch * 1.5)  # type: ignore[assignment]
                     elif avg > 10:
                         batch = max(int(batch / 1.5), 1)
                     c = 0
@@ -2170,24 +2464,26 @@ def add_shuffle_key(split, iterator):
                 yield outputSerializer.dumps(items)
 
         keyed = self.mapPartitionsWithIndex(add_shuffle_key, preservesPartitioning=True)
-        keyed._bypass_serializer = True
+        keyed._bypass_serializer = True  # type: ignore[attr-defined]
+        assert self.ctx._jvm is not None
+
         with SCCallSiteSync(self.context):
             pairRDD = self.ctx._jvm.PairwiseRDD(keyed._jrdd.rdd()).asJavaPairRDD()
             jpartitioner = self.ctx._jvm.PythonPartitioner(numPartitions, id(partitionFunc))
         jrdd = self.ctx._jvm.PythonRDD.valueOfPair(pairRDD.partitionBy(jpartitioner))
-        rdd = RDD(jrdd, self.ctx, BatchedSerializer(outputSerializer))
+        rdd: "RDD[Tuple[K, V]]" = RDD(jrdd, self.ctx, BatchedSerializer(outputSerializer))
         rdd.partitioner = partitioner
         return rdd
 
     # TODO: add control over map-side aggregation
     def combineByKey(
-        self,
-        createCombiner,
-        mergeValue,
-        mergeCombiners,
-        numPartitions=None,
-        partitionFunc=portable_hash,
-    ):
+        self: "RDD[Tuple[K, V]]",
+        createCombiner: Callable[[V], U],
+        mergeValue: Callable[[U, V], U],
+        mergeCombiners: Callable[[U, U], U],
+        numPartitions: Optional[int] = None,
+        partitionFunc: Callable[[K], int] = portable_hash,
+    ) -> "RDD[Tuple[K, U]]":
         """
         Generic function to combine the elements for each key using a custom
         set of aggregation functions.
@@ -2238,7 +2534,7 @@ def combineByKey(
         memory = self._memory_limit()
         agg = Aggregator(createCombiner, mergeValue, mergeCombiners)
 
-        def combineLocally(iterator):
+        def combineLocally(iterator: Iterable[Tuple[K, V]]) -> Iterable[Tuple[K, U]]:
             merger = ExternalMerger(agg, memory * 0.9, serializer)
             merger.mergeValues(iterator)
             return merger.items()
@@ -2246,7 +2542,7 @@ def combineLocally(iterator):
         locally_combined = self.mapPartitions(combineLocally, preservesPartitioning=True)
         shuffled = locally_combined.partitionBy(numPartitions, partitionFunc)
 
-        def _mergeCombiners(iterator):
+        def _mergeCombiners(iterator: Iterable[Tuple[K, U]]) -> Iterable[Tuple[K, U]]:
             merger = ExternalMerger(agg, memory, serializer)
             merger.mergeCombiners(iterator)
             return merger.items()
@@ -2254,8 +2550,13 @@ def _mergeCombiners(iterator):
         return shuffled.mapPartitions(_mergeCombiners, preservesPartitioning=True)
 
     def aggregateByKey(
-        self, zeroValue, seqFunc, combFunc, numPartitions=None, partitionFunc=portable_hash
-    ):
+        self: "RDD[Tuple[K, V]]",
+        zeroValue: U,
+        seqFunc: Callable[[U, V], U],
+        combFunc: Callable[[U, U], U],
+        numPartitions: Optional[int] = None,
+        partitionFunc: Callable[[K], int] = portable_hash,
+    ) -> "RDD[Tuple[K, U]]":
         """
         Aggregate the values of each key, using given combine functions and a neutral
         "zero value". This function can return a different result type, U, than the type
@@ -2266,14 +2567,20 @@ def aggregateByKey(
         allowed to modify and return their first argument instead of creating a new U.
         """
 
-        def createZero():
+        def createZero() -> U:
             return copy.deepcopy(zeroValue)
 
         return self.combineByKey(
             lambda v: seqFunc(createZero(), v), seqFunc, combFunc, numPartitions, partitionFunc
         )
 
-    def foldByKey(self, zeroValue, func, numPartitions=None, partitionFunc=portable_hash):
+    def foldByKey(
+        self: "RDD[Tuple[K, V]]",
+        zeroValue: V,
+        func: Callable[[V, V], V],
+        numPartitions: Optional[int] = None,
+        partitionFunc: Callable[[K], int] = portable_hash,
+    ) -> "RDD[Tuple[K, V]]":
         """
         Merge the values for each key using an associative function "func"
         and a neutral "zeroValue" which may be added to the result an
@@ -2288,18 +2595,22 @@ def foldByKey(self, zeroValue, func, numPartitions=None, partitionFunc=portable_
         [('a', 2), ('b', 1)]
         """
 
-        def createZero():
+        def createZero() -> V:
             return copy.deepcopy(zeroValue)
 
         return self.combineByKey(
             lambda v: func(createZero(), v), func, func, numPartitions, partitionFunc
         )
 
-    def _memory_limit(self):
+    def _memory_limit(self) -> int:
         return _parse_memory(self.ctx._conf.get("spark.python.worker.memory", "512m"))
 
     # TODO: support variant with custom partitioner
-    def groupByKey(self, numPartitions=None, partitionFunc=portable_hash):
+    def groupByKey(
+        self: "RDD[Tuple[K, V]]",
+        numPartitions: Optional[int] = None,
+        partitionFunc: Callable[[K], int] = portable_hash,
+    ) -> "RDD[Tuple[K, Iterable[V]]]":
         """
         Group the values for each key in the RDD into a single sequence.
         Hash-partitions the resulting RDD with numPartitions partitions.
@@ -2319,14 +2630,14 @@ def groupByKey(self, numPartitions=None, partitionFunc=portable_hash):
         [('a', [1, 1]), ('b', [1])]
         """
 
-        def createCombiner(x):
+        def createCombiner(x: V) -> List[V]:
             return [x]
 
-        def mergeValue(xs, x):
+        def mergeValue(xs: List[V], x: V) -> List[V]:
             xs.append(x)
             return xs
 
-        def mergeCombiners(a, b):
+        def mergeCombiners(a: List[V], b: List[V]) -> List[V]:
             a.extend(b)
             return a
 
@@ -2334,7 +2645,7 @@ def mergeCombiners(a, b):
         serializer = self._jrdd_deserializer
         agg = Aggregator(createCombiner, mergeValue, mergeCombiners)
 
-        def combine(iterator):
+        def combine(iterator: Iterable[Tuple[K, V]]) -> Iterable[Tuple[K, List[V]]]:
             merger = ExternalMerger(agg, memory * 0.9, serializer)
             merger.mergeValues(iterator)
             return merger.items()
@@ -2342,14 +2653,16 @@ def combine(iterator):
         locally_combined = self.mapPartitions(combine, preservesPartitioning=True)
         shuffled = locally_combined.partitionBy(numPartitions, partitionFunc)
 
-        def groupByKey(it):
+        def groupByKey(it: Iterable[Tuple[K, List[V]]]) -> Iterable[Tuple[K, List[V]]]:
             merger = ExternalGroupBy(agg, memory, serializer)
             merger.mergeCombiners(it)
             return merger.items()
 
         return shuffled.mapPartitions(groupByKey, True).mapValues(ResultIterable)
 
-    def flatMapValues(self, f):
+    def flatMapValues(
+        self: "RDD[Tuple[K, V]]", f: Callable[[V], Iterable[U]]
+    ) -> "RDD[Tuple[K, U]]":
         """
         Pass each value in the key-value pair RDD through a flatMap function
         without changing the keys; this also retains the original RDD's
@@ -2363,12 +2676,12 @@ def flatMapValues(self, f):
         [('a', 'x'), ('a', 'y'), ('a', 'z'), ('b', 'p'), ('b', 'r')]
         """
 
-        def flat_map_fn(kv):
+        def flat_map_fn(kv: Tuple[K, V]) -> Iterable[Tuple[K, U]]:
             return ((kv[0], x) for x in f(kv[1]))
 
         return self.flatMap(flat_map_fn, preservesPartitioning=True)
 
-    def mapValues(self, f):
+    def mapValues(self: "RDD[Tuple[K, V]]", f: Callable[[V], U]) -> "RDD[Tuple[K, U]]":
         """
         Pass each value in the key-value pair RDD through a map function
         without changing the keys; this also retains the original RDD's
@@ -2382,12 +2695,45 @@ def mapValues(self, f):
         [('a', 3), ('b', 1)]
         """
 
-        def map_values_fn(kv):
+        def map_values_fn(kv: Tuple[K, V]) -> Tuple[K, U]:
             return kv[0], f(kv[1])
 
         return self.map(map_values_fn, preservesPartitioning=True)
 
-    def groupWith(self, other, *others):
+    @overload
+    def groupWith(
+        self: "RDD[Tuple[K, V]]", other: "RDD[Tuple[K, V1]]"
+    ) -> "RDD[Tuple[K, Tuple[ResultIterable[V], ResultIterable[V1]]]]":
+        ...
+
+    @overload
+    def groupWith(
+        self: "RDD[Tuple[K, V]]", other: "RDD[Tuple[K, V1]]", __o1: "RDD[Tuple[K, V2]]"
+    ) -> "RDD[Tuple[K, Tuple[ResultIterable[V], ResultIterable[V1], ResultIterable[V2]]]]":
+        ...
+
+    @overload
+    def groupWith(
+        self: "RDD[Tuple[K, V]]",
+        other: "RDD[Tuple[K, V1]]",
+        _o1: "RDD[Tuple[K, V2]]",
+        _o2: "RDD[Tuple[K, V3]]",
+    ) -> """RDD[
+        Tuple[
+            K,
+            Tuple[
+                ResultIterable[V],
+                ResultIterable[V1],
+                ResultIterable[V2],
+                ResultIterable[V3],
+            ],
+        ]
+    ]""":
+        ...
+
+    def groupWith(  # type: ignore[misc]
+        self: "RDD[Tuple[Any, Any]]", other: "RDD[Tuple[Any, Any]]", *others: "RDD[Tuple[Any, Any]]"
+    ) -> "RDD[Tuple[Any, Tuple[ResultIterable[Any], ...]]]":
         """
         Alias for cogroup but with support for multiple RDDs.
 
@@ -2404,7 +2750,11 @@ def groupWith(self, other, *others):
         return python_cogroup((self, other) + others, numPartitions=None)
 
     # TODO: add variant with custom partitioner
-    def cogroup(self, other, numPartitions=None):
+    def cogroup(
+        self: "RDD[Tuple[K, V]]",
+        other: "RDD[Tuple[K, U]]",
+        numPartitions: Optional[int] = None,
+    ) -> "RDD[Tuple[K, Tuple[ResultIterable[V], ResultIterable[U]]]]":
         """
         For each key k in `self` or `other`, return a resulting RDD that
         contains a tuple with the list of values for that key in `self` as
@@ -2419,7 +2769,12 @@ def cogroup(self, other, numPartitions=None):
         """
         return python_cogroup((self, other), numPartitions)
 
-    def sampleByKey(self, withReplacement, fractions, seed=None):
+    def sampleByKey(
+        self: "RDD[Tuple[K, V]]",
+        withReplacement: bool,
+        fractions: Dict[K, Union[float, int]],
+        seed: Optional[int] = None,
+    ) -> "RDD[Tuple[K, V]]":
         """
         Return a subset of this RDD sampled by key (via stratified sampling).
         Create a sample of this RDD using variable sampling rates for
@@ -2443,7 +2798,11 @@ def sampleByKey(self, withReplacement, fractions, seed=None):
             RDDStratifiedSampler(withReplacement, fractions, seed).func, True
         )
 
-    def subtractByKey(self, other, numPartitions=None):
+    def subtractByKey(
+        self: "RDD[Tuple[K, V]]",
+        other: "RDD[Tuple[K, Any]]",
+        numPartitions: Optional[int] = None,
+    ) -> "RDD[Tuple[K, V]]":
         """
         Return each (key, value) pair in `self` that has no pair with matching
         key in `other`.
@@ -2456,13 +2815,17 @@ def subtractByKey(self, other, numPartitions=None):
         [('b', 4), ('b', 5)]
         """
 
-        def filter_func(pair):
+        def filter_func(pair: Tuple[K, Tuple[V, Any]]) -> bool:
             key, (val1, val2) = pair
-            return val1 and not val2
+            return val1 and not val2  # type: ignore[return-value]
 
-        return self.cogroup(other, numPartitions).filter(filter_func).flatMapValues(lambda x: x[0])
+        return (
+            self.cogroup(other, numPartitions)
+            .filter(filter_func)  # type: ignore[arg-type]
+            .flatMapValues(lambda x: x[0])
+        )
 
-    def subtract(self, other, numPartitions=None):
+    def subtract(self: "RDD[T]", other: "RDD[T]", numPartitions: Optional[int] = None) -> "RDD[T]":
         """
         Return each value in `self` that is not contained in `other`.
 
@@ -2477,7 +2840,7 @@ def subtract(self, other, numPartitions=None):
         rdd = other.map(lambda x: (x, True))
         return self.map(lambda x: (x, True)).subtractByKey(rdd, numPartitions).keys()
 
-    def keyBy(self, f):
+    def keyBy(self: "RDD[T]", f: Callable[[T], K]) -> "RDD[Tuple[K, T]]":
         """
         Creates tuples of the elements in this RDD by applying `f`.
 
@@ -2490,7 +2853,7 @@ def keyBy(self, f):
         """
         return self.map(lambda x: (f(x), x))
 
-    def repartition(self, numPartitions):
+    def repartition(self: "RDD[T]", numPartitions: int) -> "RDD[T]":
         """
          Return a new RDD that has exactly numPartitions partitions.
 
@@ -2511,7 +2874,7 @@ def repartition(self, numPartitions):
         """
         return self.coalesce(numPartitions, shuffle=True)
 
-    def coalesce(self, numPartitions, shuffle=False):
+    def coalesce(self: "RDD[T]", numPartitions: int, shuffle: bool = False) -> "RDD[T]":
         """
         Return a new RDD that is reduced into `numPartitions` partitions.
 
@@ -2535,7 +2898,7 @@ def coalesce(self, numPartitions, shuffle=False):
             jrdd = self._jrdd.coalesce(numPartitions, shuffle)
         return RDD(jrdd, self.ctx, jrdd_deserializer)
 
-    def zip(self, other):
+    def zip(self: "RDD[T]", other: "RDD[U]") -> "RDD[Tuple[T, U]]":
         """
         Zips this RDD with another one, returning key-value pairs with the
         first element in each RDD second element in each RDD, etc. Assumes
@@ -2551,12 +2914,12 @@ def zip(self, other):
         [(0, 1000), (1, 1001), (2, 1002), (3, 1003), (4, 1004)]
         """
 
-        def get_batch_size(ser):
+        def get_batch_size(ser: Serializer) -> int:
             if isinstance(ser, BatchedSerializer):
                 return ser.batchSize
             return 1  # not batched
 
-        def batch_as(rdd, batchSize):
+        def batch_as(rdd: "RDD[V]", batchSize: int) -> "RDD[V]":
             return rdd._reserialize(BatchedSerializer(CPickleSerializer(), batchSize))
 
         my_batch = get_batch_size(self._jrdd_deserializer)
@@ -2579,7 +2942,7 @@ def batch_as(rdd, batchSize):
         deserializer = PairDeserializer(self._jrdd_deserializer, other._jrdd_deserializer)
         return RDD(pairRDD, self.ctx, deserializer)
 
-    def zipWithIndex(self):
+    def zipWithIndex(self: "RDD[T]") -> "RDD[Tuple[T, int]]":
         """
         Zips this RDD with its element indices.
 
@@ -2602,13 +2965,13 @@ def zipWithIndex(self):
             for i in range(len(nums) - 1):
                 starts.append(starts[-1] + nums[i])
 
-        def func(k, it):
+        def func(k: int, it: Iterable[T]) -> Iterable[Tuple[T, int]]:
             for i, v in enumerate(it, starts[k]):
                 yield v, i
 
         return self.mapPartitionsWithIndex(func)
 
-    def zipWithUniqueId(self):
+    def zipWithUniqueId(self: "RDD[T]") -> "RDD[Tuple[T, int]]":
         """
         Zips this RDD with generated unique Long ids.
 
@@ -2624,21 +2987,20 @@ def zipWithUniqueId(self):
         """
         n = self.getNumPartitions()
 
-        def func(k, it):
+        def func(k: int, it: Iterable[T]) -> Iterable[Tuple[T, int]]:
             for i, v in enumerate(it):
                 yield v, i * n + k
 
         return self.mapPartitionsWithIndex(func)
 
-    def name(self):
+    def name(self) -> Optional[str]:
         """
         Return the name of this RDD.
         """
         n = self._jrdd.name()
-        if n:
-            return n
+        return n if n else None
 
-    def setName(self, name):
+    def setName(self: "RDD[T]", name: str) -> "RDD[T]":
         """
         Assign a name to this RDD.
 
@@ -2651,15 +3013,15 @@ def setName(self, name):
         self._jrdd.setName(name)
         return self
 
-    def toDebugString(self):
+    def toDebugString(self) -> Optional[bytes]:
         """
         A description of this RDD and its recursive dependencies for debugging.
         """
         debug_string = self._jrdd.toDebugString()
-        if debug_string:
-            return debug_string.encode("utf-8")
 
-    def getStorageLevel(self):
+        return debug_string.encode("utf-8") if debug_string else None
+
+    def getStorageLevel(self) -> StorageLevel:
         """
         Get the RDD's current storage level.
 
@@ -2681,7 +3043,7 @@ def getStorageLevel(self):
         )
         return storage_level
 
-    def _defaultReducePartitions(self):
+    def _defaultReducePartitions(self) -> int:
         """
         Returns the default number of partitions to use during reduce tasks (e.g., groupBy).
         If spark.default.parallelism is set, then we'll use the value from SparkContext
@@ -2696,7 +3058,7 @@ def _defaultReducePartitions(self):
         else:
             return self.getNumPartitions()
 
-    def lookup(self, key):
+    def lookup(self: "RDD[Tuple[K, V]]", key: K) -> List[V]:
         """
         Return the list of values in the RDD for key `key`. This operation
         is done efficiently if the RDD has a known partitioner by only
@@ -2724,16 +3086,18 @@ def lookup(self, key):
 
         return values.collect()
 
-    def _to_java_object_rdd(self):
+    def _to_java_object_rdd(self) -> "JavaObject":
         """Return a JavaRDD of Object by unpickling
 
         It will convert each Python object into Java object by Pickle, whenever the
         RDD is serialized in batch or not.
         """
         rdd = self._pickled()
+        assert self.ctx._jvm is not None
+
         return self.ctx._jvm.SerDeUtil.pythonToJava(rdd._jrdd, True)
 
-    def countApprox(self, timeout, confidence=0.95):
+    def countApprox(self, timeout: int, confidence: float = 0.95) -> int:
         """
         Approximate version of count() that returns a potentially incomplete
         result within a timeout, even if not all tasks have finished.
@@ -2747,7 +3111,9 @@ def countApprox(self, timeout, confidence=0.95):
         drdd = self.mapPartitions(lambda it: [float(sum(1 for i in it))])
         return int(drdd.sumApprox(timeout, confidence))
 
-    def sumApprox(self, timeout, confidence=0.95):
+    def sumApprox(
+        self: "RDD[Union[float, int]]", timeout: int, confidence: float = 0.95
+    ) -> BoundedFloat:
         """
         Approximate operation to return the sum within a timeout
         or meet the confidence.
@@ -2760,11 +3126,14 @@ def sumApprox(self, timeout, confidence=0.95):
         True
         """
         jrdd = self.mapPartitions(lambda it: [float(sum(it))])._to_java_object_rdd()
+        assert self.ctx._jvm is not None
         jdrdd = self.ctx._jvm.JavaDoubleRDD.fromRDD(jrdd.rdd())
         r = jdrdd.sumApprox(timeout, confidence).getFinalValue()
         return BoundedFloat(r.mean(), r.confidence(), r.low(), r.high())
 
-    def meanApprox(self, timeout, confidence=0.95):
+    def meanApprox(
+        self: "RDD[Union[float, int]]", timeout: int, confidence: float = 0.95
+    ) -> BoundedFloat:
         """
         Approximate operation to return the mean within a timeout
         or meet the confidence.
@@ -2777,11 +3146,12 @@ def meanApprox(self, timeout, confidence=0.95):
         True
         """
         jrdd = self.map(float)._to_java_object_rdd()
+        assert self.ctx._jvm is not None
         jdrdd = self.ctx._jvm.JavaDoubleRDD.fromRDD(jrdd.rdd())
         r = jdrdd.meanApprox(timeout, confidence).getFinalValue()
         return BoundedFloat(r.mean(), r.confidence(), r.low(), r.high())
 
-    def countApproxDistinct(self, relativeSD=0.05):
+    def countApproxDistinct(self: "RDD[T]", relativeSD: float = 0.05) -> int:
         """
         Return approximate number of distinct elements in the RDD.
 
@@ -2814,7 +3184,7 @@ def countApproxDistinct(self, relativeSD=0.05):
         hashRDD = self.map(lambda x: portable_hash(x) & 0xFFFFFFFF)
         return hashRDD._to_java_object_rdd().countApproxDistinct(relativeSD)
 
-    def toLocalIterator(self, prefetchPartitions=False):
+    def toLocalIterator(self: "RDD[T]", prefetchPartitions: bool = False) -> Iterator[T]:
         """
         Return an iterator that contains all of the elements in this RDD.
         The iterator will consume as much memory as the largest partition in this RDD.
@@ -2832,13 +3202,15 @@ def toLocalIterator(self, prefetchPartitions=False):
         >>> [x for x in rdd.toLocalIterator()]
         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
         """
+        assert self.ctx._jvm is not None
+
         with SCCallSiteSync(self.context):
             sock_info = self.ctx._jvm.PythonRDD.toLocalIteratorAndServe(
                 self._jrdd.rdd(), prefetchPartitions
             )
         return _local_iterator_from_socket(sock_info, self._jrdd_deserializer)
 
-    def barrier(self):
+    def barrier(self: "RDD[T]") -> "RDDBarrier[T]":
         """
         Marks the current stage as a barrier stage, where Spark must launch all tasks together.
         In case of a task failure, instead of only restarting the failed task, Spark will abort the
@@ -2868,13 +3240,13 @@ def barrier(self):
         """
         return RDDBarrier(self)
 
-    def _is_barrier(self):
+    def _is_barrier(self) -> bool:
         """
         Whether this RDD is in a barrier stage.
         """
         return self._jrdd.rdd().isBarrier()
 
-    def withResources(self, profile):
+    def withResources(self: "RDD[T]", profile: ResourceProfile) -> "RDD[T]":
         """
         Specify a :class:`pyspark.resource.ResourceProfile` to use when calculating this RDD.
         This is only supported on certain cluster managers and currently requires dynamic
@@ -2891,6 +3263,8 @@ def withResources(self, profile):
         if profile._java_resource_profile is not None:
             jrp = profile._java_resource_profile
         else:
+            assert self.ctx._jvm is not None
+
             builder = self.ctx._jvm.org.apache.spark.resource.ResourceProfileBuilder()
             ereqs = ExecutorResourceRequests(self.ctx._jvm, profile._executor_resource_requests)
             treqs = TaskResourceRequests(self.ctx._jvm, profile._task_resource_requests)
@@ -2901,7 +3275,7 @@ def withResources(self, profile):
         self._jrdd.withResources(jrp)
         return self
 
-    def getResourceProfile(self):
+    def getResourceProfile(self) -> Optional[ResourceProfile]:
         """
         Get the :class:`pyspark.resource.ResourceProfile` specified with this RDD or None
         if it wasn't specified.
@@ -2923,11 +3297,38 @@ def getResourceProfile(self):
         else:
             return None
 
+    @overload
+    def toDF(
+        self: "RDD[RowLike]",
+        schema: Optional[Union[List[str], Tuple[str, ...]]] = None,
+        sampleRatio: Optional[float] = None,
+    ) -> "DataFrame":
+        ...
+
+    @overload
+    def toDF(
+        self: "RDD[RowLike]", schema: Optional[Union["StructType", str]] = None
+    ) -> "DataFrame":
+        ...
+
+    @overload
+    def toDF(
+        self: "RDD[AtomicValue]",
+        schema: Union["AtomicType", str],
+    ) -> "DataFrame":
+        ...
+
+    def toDF(
+        self: "RDD[Any]", schema: Optional[Any] = None, sampleRatio: Optional[float] = None
+    ) -> "DataFrame":
+        raise RuntimeError("""RDD.toDF was called before SparkSession was initialized.""")
 
-def _prepare_for_python_RDD(sc, command):
+
+def _prepare_for_python_RDD(sc: "SparkContext", command: Any) -> Tuple[bytes, Any, Any, Any]:
     # the serialized command will be compressed by broadcast
     ser = CloudPickleSerializer()
     pickled_command = ser.dumps(command)
+    assert sc._jvm is not None
     if len(pickled_command) > sc._jvm.PythonUtils.getBroadcastThreshold(sc._jsc):  # Default 1M
         # The broadcast will have same life cycle as created PythonRDD
         broadcast = sc.broadcast(pickled_command)
@@ -2937,11 +3338,14 @@ def _prepare_for_python_RDD(sc, command):
     return pickled_command, broadcast_vars, sc.environment, sc._python_includes
 
 
-def _wrap_function(sc, func, deserializer, serializer, profiler=None):
+def _wrap_function(
+    sc: "SparkContext", func: Callable, deserializer: Any, serializer: Any, profiler: Any = None
+) -> "JavaObject":
     assert deserializer, "deserializer should not be empty"
     assert serializer, "serializer should not be empty"
     command = (func, profiler, deserializer, serializer)
     pickled_command, broadcast_vars, env, includes = _prepare_for_python_RDD(sc, command)
+    assert sc._jvm is not None
     return sc._jvm.PythonFunction(
         bytearray(pickled_command),
         env,
@@ -2953,7 +3357,7 @@ def _wrap_function(sc, func, deserializer, serializer, profiler=None):
     )
 
 
-class RDDBarrier:
+class RDDBarrier(Generic[T]):
 
     """
     Wraps an RDD in a barrier stage, which forces Spark to launch tasks of this stage together.
@@ -2966,10 +3370,12 @@ class RDDBarrier:
     This API is experimental
     """
 
-    def __init__(self, rdd):
+    def __init__(self, rdd: RDD[T]):
         self.rdd = rdd
 
-    def mapPartitions(self, f, preservesPartitioning=False):
+    def mapPartitions(
+        self, f: Callable[[Iterable[T]], Iterable[U]], preservesPartitioning: bool = False
+    ) -> RDD[U]:
         """
         Returns a new RDD by applying a function to each partition of the wrapped RDD,
         where tasks are launched together in a barrier stage.
@@ -2983,12 +3389,16 @@ def mapPartitions(self, f, preservesPartitioning=False):
         This API is experimental
         """
 
-        def func(s, iterator):
+        def func(s: int, iterator: Iterable[T]) -> Iterable[U]:
             return f(iterator)
 
         return PipelinedRDD(self.rdd, func, preservesPartitioning, isFromBarrier=True)
 
-    def mapPartitionsWithIndex(self, f, preservesPartitioning=False):
+    def mapPartitionsWithIndex(
+        self,
+        f: Callable[[int, Iterable[T]], Iterable[U]],
+        preservesPartitioning: bool = False,
+    ) -> RDD[U]:
         """
         Returns a new RDD by applying a function to each partition of the wrapped RDD, while
         tracking the index of the original partition. And all tasks are launched together
@@ -3005,7 +3415,7 @@ def mapPartitionsWithIndex(self, f, preservesPartitioning=False):
         return PipelinedRDD(self.rdd, f, preservesPartitioning, isFromBarrier=True)
 
 
-class PipelinedRDD(RDD):
+class PipelinedRDD(RDD[U], Generic[T, U]):
 
     """
     Examples
@@ -3027,7 +3437,13 @@ class PipelinedRDD(RDD):
     20
     """
 
-    def __init__(self, prev, func, preservesPartitioning=False, isFromBarrier=False):
+    def __init__(
+        self,
+        prev: RDD[T],
+        func: Callable[[int, Iterable[T]], Iterable[U]],
+        preservesPartitioning: bool = False,
+        isFromBarrier: bool = False,
+    ):
         if not isinstance(prev, PipelinedRDD) or not prev._is_pipelinable():
             # This transformation is the first in its stage:
             self.func = func
@@ -3035,9 +3451,9 @@ def __init__(self, prev, func, preservesPartitioning=False, isFromBarrier=False)
             self._prev_jrdd = prev._jrdd
             self._prev_jrdd_deserializer = prev._jrdd_deserializer
         else:
-            prev_func = prev.func
+            prev_func: Callable[[int, Iterable[V]], Iterable[T]] = prev.func
 
-            def pipeline_func(split, iterator):
+            def pipeline_func(split: int, iterator: Iterable[V]) -> Iterable[U]:
                 return func(split, prev_func(split, iterator))
 
             self.func = pipeline_func
@@ -3049,18 +3465,18 @@ def pipeline_func(split, iterator):
         self.is_checkpointed = False
         self.ctx = prev.ctx
         self.prev = prev
-        self._jrdd_val = None
+        self._jrdd_val: Optional["JavaObject"] = None
         self._id = None
         self._jrdd_deserializer = self.ctx.serializer
         self._bypass_serializer = False
         self.partitioner = prev.partitioner if self.preservesPartitioning else None
         self.is_barrier = isFromBarrier or prev._is_barrier()
 
-    def getNumPartitions(self):
+    def getNumPartitions(self) -> int:
         return self._prev_jrdd.partitions().size()
 
     @property
-    def _jrdd(self):
+    def _jrdd(self) -> "JavaObject":
         if self._jrdd_val:
             return self._jrdd_val
         if self._bypass_serializer:
@@ -3074,29 +3490,32 @@ def _jrdd(self):
         wrapped_func = _wrap_function(
             self.ctx, self.func, self._prev_jrdd_deserializer, self._jrdd_deserializer, profiler
         )
+
+        assert self.ctx._jvm is not None
         python_rdd = self.ctx._jvm.PythonRDD(
             self._prev_jrdd.rdd(), wrapped_func, self.preservesPartitioning, self.is_barrier
         )
         self._jrdd_val = python_rdd.asJavaRDD()
 
         if profiler:
+            assert self._jrdd_val is not None
             self._id = self._jrdd_val.id()
             self.ctx.profiler_collector.add_profiler(self._id, profiler)
         return self._jrdd_val
 
-    def id(self):
+    def id(self) -> int:
         if self._id is None:
             self._id = self._jrdd.id()
         return self._id
 
-    def _is_pipelinable(self):
+    def _is_pipelinable(self) -> bool:
         return not (self.is_cached or self.is_checkpointed or self.has_resource_profile)
 
-    def _is_barrier(self):
+    def _is_barrier(self) -> bool:
         return self.is_barrier
 
 
-def _test():
+def _test() -> None:
     import doctest
     from pyspark.context import SparkContext
 
diff --git a/python/pyspark/rdd.pyi b/python/pyspark/rdd.pyi
deleted file mode 100644
index c4eddbf150423..0000000000000
--- a/python/pyspark/rdd.pyi
+++ /dev/null
@@ -1,481 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import overload
-from typing import (
-    Any,
-    Callable,
-    Dict,
-    Generic,
-    Hashable,
-    Iterable,
-    Iterator,
-    List,
-    Optional,
-    Tuple,
-    Union,
-    TypeVar,
-)
-from typing_extensions import Literal
-
-from numpy import int32, int64, float32, float64, ndarray
-
-from pyspark._typing import SupportsOrdering
-from pyspark.sql.pandas._typing import (
-    PandasScalarUDFType,
-    PandasScalarIterUDFType,
-    PandasGroupedMapUDFType,
-    PandasCogroupedMapUDFType,
-    PandasGroupedAggUDFType,
-    PandasMapIterUDFType,
-    ArrowMapIterUDFType,
-)
-import pyspark.context
-from pyspark.resultiterable import ResultIterable
-from pyspark.serializers import Serializer
-from pyspark.storagelevel import StorageLevel
-from pyspark.resource.requests import (  # noqa: F401
-    ExecutorResourceRequests,
-    TaskResourceRequests,
-)
-from pyspark.resource.profile import ResourceProfile
-from pyspark.statcounter import StatCounter
-from pyspark.sql.dataframe import DataFrame
-from pyspark.sql.types import AtomicType, StructType
-from pyspark.sql._typing import AtomicValue, RowLike
-from py4j.java_gateway import JavaObject  # type: ignore[import]
-
-T = TypeVar("T")
-T_co = TypeVar("T_co", covariant=True)
-U = TypeVar("U")
-K = TypeVar("K", bound=Hashable)
-V = TypeVar("V")
-V1 = TypeVar("V1")
-V2 = TypeVar("V2")
-V3 = TypeVar("V3")
-O = TypeVar("O", bound=SupportsOrdering)
-NumberOrArray = TypeVar(
-    "NumberOrArray", float, int, complex, int32, int64, float32, float64, ndarray
-)
-
-def portable_hash(x: Hashable) -> int: ...
-
-class PythonEvalType:
-    NON_UDF: Literal[0]
-    SQL_BATCHED_UDF: Literal[100]
-    SQL_SCALAR_PANDAS_UDF: PandasScalarUDFType
-    SQL_GROUPED_MAP_PANDAS_UDF: PandasGroupedMapUDFType
-    SQL_GROUPED_AGG_PANDAS_UDF: PandasGroupedAggUDFType
-    SQL_WINDOW_AGG_PANDAS_UDF: Literal[203]
-    SQL_SCALAR_PANDAS_ITER_UDF: PandasScalarIterUDFType
-    SQL_MAP_PANDAS_ITER_UDF: PandasMapIterUDFType
-    SQL_COGROUPED_MAP_PANDAS_UDF: PandasCogroupedMapUDFType
-    SQL_MAP_ARROW_ITER_UDF: ArrowMapIterUDFType
-
-class BoundedFloat(float):
-    def __new__(cls, mean: float, confidence: float, low: float, high: float) -> BoundedFloat: ...
-
-class Partitioner:
-    numPartitions: int
-    partitionFunc: Callable[[Any], int]
-    def __init__(self, numPartitions: int, partitionFunc: Callable[[Any], int]) -> None: ...
-    def __eq__(self, other: Any) -> bool: ...
-    def __call__(self, k: Any) -> int: ...
-
-class RDD(Generic[T_co]):
-    is_cached: bool
-    is_checkpointed: bool
-    ctx: pyspark.context.SparkContext
-    partitioner: Optional[Partitioner]
-    def __init__(
-        self,
-        jrdd: JavaObject,
-        ctx: pyspark.context.SparkContext,
-        jrdd_deserializer: Serializer = ...,
-    ) -> None: ...
-    def id(self) -> int: ...
-    def __getnewargs__(self) -> Any: ...
-    @property
-    def context(self) -> pyspark.context.SparkContext: ...
-    def cache(self) -> RDD[T_co]: ...
-    def persist(self, storageLevel: StorageLevel = ...) -> RDD[T_co]: ...
-    def unpersist(self, blocking: bool = ...) -> RDD[T_co]: ...
-    def checkpoint(self) -> None: ...
-    def isCheckpointed(self) -> bool: ...
-    def localCheckpoint(self) -> None: ...
-    def isLocallyCheckpointed(self) -> bool: ...
-    def getCheckpointFile(self) -> Optional[str]: ...
-    def map(self, f: Callable[[T_co], U], preservesPartitioning: bool = ...) -> RDD[U]: ...
-    def flatMap(
-        self, f: Callable[[T_co], Iterable[U]], preservesPartitioning: bool = ...
-    ) -> RDD[U]: ...
-    def mapPartitions(
-        self, f: Callable[[Iterable[T_co]], Iterable[U]], preservesPartitioning: bool = ...
-    ) -> RDD[U]: ...
-    def mapPartitionsWithIndex(
-        self,
-        f: Callable[[int, Iterable[T_co]], Iterable[U]],
-        preservesPartitioning: bool = ...,
-    ) -> RDD[U]: ...
-    def mapPartitionsWithSplit(
-        self,
-        f: Callable[[int, Iterable[T_co]], Iterable[U]],
-        preservesPartitioning: bool = ...,
-    ) -> RDD[U]: ...
-    def getNumPartitions(self) -> int: ...
-    def filter(self, f: Callable[[T_co], bool]) -> RDD[T_co]: ...
-    def distinct(self, numPartitions: Optional[int] = ...) -> RDD[T_co]: ...
-    def sample(
-        self, withReplacement: bool, fraction: float, seed: Optional[int] = ...
-    ) -> RDD[T_co]: ...
-    def randomSplit(
-        self, weights: List[Union[int, float]], seed: Optional[int] = ...
-    ) -> List[RDD[T_co]]: ...
-    def takeSample(
-        self, withReplacement: bool, num: int, seed: Optional[int] = ...
-    ) -> List[T_co]: ...
-    def union(self, other: RDD[U]) -> RDD[Union[T_co, U]]: ...
-    def intersection(self, other: RDD[T_co]) -> RDD[T_co]: ...
-    def __add__(self, other: RDD[T_co]) -> RDD[T_co]: ...
-    @overload
-    def repartitionAndSortWithinPartitions(
-        self: RDD[Tuple[O, V]],
-        numPartitions: Optional[int] = ...,
-        partitionFunc: Callable[[O], int] = ...,
-        ascending: bool = ...,
-    ) -> RDD[Tuple[O, V]]: ...
-    @overload
-    def repartitionAndSortWithinPartitions(
-        self: RDD[Tuple[K, V]],
-        numPartitions: Optional[int],
-        partitionFunc: Callable[[K], int],
-        ascending: bool,
-        keyfunc: Callable[[K], O],
-    ) -> RDD[Tuple[K, V]]: ...
-    @overload
-    def repartitionAndSortWithinPartitions(
-        self: RDD[Tuple[K, V]],
-        numPartitions: Optional[int] = ...,
-        partitionFunc: Callable[[K], int] = ...,
-        ascending: bool = ...,
-        *,
-        keyfunc: Callable[[K], O],
-    ) -> RDD[Tuple[K, V]]: ...
-    @overload
-    def sortByKey(
-        self: RDD[Tuple[O, V]],
-        ascending: bool = ...,
-        numPartitions: Optional[int] = ...,
-    ) -> RDD[Tuple[K, V]]: ...
-    @overload
-    def sortByKey(
-        self: RDD[Tuple[K, V]],
-        ascending: bool,
-        numPartitions: int,
-        keyfunc: Callable[[K], O],
-    ) -> RDD[Tuple[K, V]]: ...
-    @overload
-    def sortByKey(
-        self: RDD[Tuple[K, V]],
-        ascending: bool = ...,
-        numPartitions: Optional[int] = ...,
-        *,
-        keyfunc: Callable[[K], O],
-    ) -> RDD[Tuple[K, V]]: ...
-    def sortBy(
-        self,
-        keyfunc: Callable[[T_co], O],
-        ascending: bool = ...,
-        numPartitions: Optional[int] = ...,
-    ) -> RDD[T_co]: ...
-    def glom(self) -> RDD[List[T_co]]: ...
-    def cartesian(self, other: RDD[U]) -> RDD[Tuple[T_co, U]]: ...
-    def groupBy(
-        self,
-        f: Callable[[T_co], K],
-        numPartitions: Optional[int] = ...,
-        partitionFunc: Callable[[K], int] = ...,
-    ) -> RDD[Tuple[K, Iterable[T_co]]]: ...
-    def pipe(
-        self, command: str, env: Optional[Dict[str, str]] = ..., checkCode: bool = ...
-    ) -> RDD[str]: ...
-    def foreach(self, f: Callable[[T_co], None]) -> None: ...
-    def foreachPartition(self, f: Callable[[Iterable[T_co]], None]) -> None: ...
-    def collect(self) -> List[T_co]: ...
-    def collectWithJobGroup(
-        self, groupId: str, description: str, interruptOnCancel: bool = ...
-    ) -> List[T_co]: ...
-    def reduce(self, f: Callable[[T_co, T_co], T_co]) -> T_co: ...
-    def treeReduce(self, f: Callable[[T_co, T_co], T_co], depth: int = ...) -> T_co: ...
-    def fold(self, zeroValue: T, op: Callable[[T_co, T_co], T_co]) -> T_co: ...
-    def aggregate(
-        self, zeroValue: U, seqOp: Callable[[U, T_co], U], combOp: Callable[[U, U], U]
-    ) -> U: ...
-    def treeAggregate(
-        self,
-        zeroValue: U,
-        seqOp: Callable[[U, T_co], U],
-        combOp: Callable[[U, U], U],
-        depth: int = ...,
-    ) -> U: ...
-    @overload
-    def max(self: RDD[O]) -> O: ...
-    @overload
-    def max(self, key: Callable[[T_co], O]) -> T_co: ...
-    @overload
-    def min(self: RDD[O]) -> O: ...
-    @overload
-    def min(self, key: Callable[[T_co], O]) -> T_co: ...
-    def sum(self: RDD[NumberOrArray]) -> NumberOrArray: ...
-    def count(self) -> int: ...
-    def stats(self: RDD[NumberOrArray]) -> StatCounter: ...
-    def histogram(
-        self, buckets: Union[int, List[T_co], Tuple[T_co, ...]]
-    ) -> Tuple[List[T_co], List[int]]: ...
-    def mean(self: RDD[NumberOrArray]) -> NumberOrArray: ...
-    def variance(self: RDD[NumberOrArray]) -> NumberOrArray: ...
-    def stdev(self: RDD[NumberOrArray]) -> NumberOrArray: ...
-    def sampleStdev(self: RDD[NumberOrArray]) -> NumberOrArray: ...
-    def sampleVariance(self: RDD[NumberOrArray]) -> NumberOrArray: ...
-    def countByValue(self: RDD[K]) -> Dict[K, int]: ...
-    @overload
-    def top(self: RDD[O], num: int) -> List[O]: ...
-    @overload
-    def top(self, num: int, key: Callable[[T_co], O]) -> List[T_co]: ...
-    @overload
-    def takeOrdered(self: RDD[O], num: int) -> List[O]: ...
-    @overload
-    def takeOrdered(self, num: int, key: Callable[[T_co], O]) -> List[T_co]: ...
-    def take(self, num: int) -> List[T_co]: ...
-    def first(self) -> T_co: ...
-    def isEmpty(self) -> bool: ...
-    def saveAsNewAPIHadoopDataset(
-        self: RDD[Tuple[K, V]],
-        conf: Dict[str, str],
-        keyConverter: Optional[str] = ...,
-        valueConverter: Optional[str] = ...,
-    ) -> None: ...
-    def saveAsNewAPIHadoopFile(
-        self: RDD[Tuple[K, V]],
-        path: str,
-        outputFormatClass: str,
-        keyClass: Optional[str] = ...,
-        valueClass: Optional[str] = ...,
-        keyConverter: Optional[str] = ...,
-        valueConverter: Optional[str] = ...,
-        conf: Optional[Dict[str, str]] = ...,
-    ) -> None: ...
-    def saveAsHadoopDataset(
-        self: RDD[Tuple[K, V]],
-        conf: Dict[str, str],
-        keyConverter: Optional[str] = ...,
-        valueConverter: Optional[str] = ...,
-    ) -> None: ...
-    def saveAsHadoopFile(
-        self: RDD[Tuple[K, V]],
-        path: str,
-        outputFormatClass: str,
-        keyClass: Optional[str] = ...,
-        valueClass: Optional[str] = ...,
-        keyConverter: Optional[str] = ...,
-        valueConverter: Optional[str] = ...,
-        conf: Optional[str] = ...,
-        compressionCodecClass: Optional[str] = ...,
-    ) -> None: ...
-    def saveAsSequenceFile(
-        self: RDD[Tuple[K, V]], path: str, compressionCodecClass: Optional[str] = ...
-    ) -> None: ...
-    def saveAsPickleFile(self, path: str, batchSize: int = ...) -> None: ...
-    def saveAsTextFile(self, path: str, compressionCodecClass: Optional[str] = ...) -> None: ...
-    def collectAsMap(self: RDD[Tuple[K, V]]) -> Dict[K, V]: ...
-    def keys(self: RDD[Tuple[K, V]]) -> RDD[K]: ...
-    def values(self: RDD[Tuple[K, V]]) -> RDD[V]: ...
-    def reduceByKey(
-        self: RDD[Tuple[K, V]],
-        func: Callable[[V, V], V],
-        numPartitions: Optional[int] = ...,
-        partitionFunc: Callable[[K], int] = ...,
-    ) -> RDD[Tuple[K, V]]: ...
-    def reduceByKeyLocally(self: RDD[Tuple[K, V]], func: Callable[[V, V], V]) -> Dict[K, V]: ...
-    def countByKey(self: RDD[Tuple[K, V]]) -> Dict[K, int]: ...
-    def join(
-        self: RDD[Tuple[K, V]],
-        other: RDD[Tuple[K, U]],
-        numPartitions: Optional[int] = ...,
-    ) -> RDD[Tuple[K, Tuple[V, U]]]: ...
-    def leftOuterJoin(
-        self: RDD[Tuple[K, V]],
-        other: RDD[Tuple[K, U]],
-        numPartitions: Optional[int] = ...,
-    ) -> RDD[Tuple[K, Tuple[V, Optional[U]]]]: ...
-    def rightOuterJoin(
-        self: RDD[Tuple[K, V]],
-        other: RDD[Tuple[K, U]],
-        numPartitions: Optional[int] = ...,
-    ) -> RDD[Tuple[K, Tuple[Optional[V], U]]]: ...
-    def fullOuterJoin(
-        self: RDD[Tuple[K, V]],
-        other: RDD[Tuple[K, U]],
-        numPartitions: Optional[int] = ...,
-    ) -> RDD[Tuple[K, Tuple[Optional[V], Optional[U]]]]: ...
-    def partitionBy(
-        self: RDD[Tuple[K, V]],
-        numPartitions: int,
-        partitionFunc: Callable[[K], int] = ...,
-    ) -> RDD[Tuple[K, V]]: ...
-    def combineByKey(
-        self: RDD[Tuple[K, V]],
-        createCombiner: Callable[[V], U],
-        mergeValue: Callable[[U, V], U],
-        mergeCombiners: Callable[[U, U], U],
-        numPartitions: Optional[int] = ...,
-        partitionFunc: Callable[[K], int] = ...,
-    ) -> RDD[Tuple[K, U]]: ...
-    def aggregateByKey(
-        self: RDD[Tuple[K, V]],
-        zeroValue: U,
-        seqFunc: Callable[[U, V], U],
-        combFunc: Callable[[U, U], U],
-        numPartitions: Optional[int] = ...,
-        partitionFunc: Callable[[K], int] = ...,
-    ) -> RDD[Tuple[K, U]]: ...
-    def foldByKey(
-        self: RDD[Tuple[K, V]],
-        zeroValue: V,
-        func: Callable[[V, V], V],
-        numPartitions: Optional[int] = ...,
-        partitionFunc: Callable[[K], int] = ...,
-    ) -> RDD[Tuple[K, V]]: ...
-    def groupByKey(
-        self: RDD[Tuple[K, V]],
-        numPartitions: Optional[int] = ...,
-        partitionFunc: Callable[[K], int] = ...,
-    ) -> RDD[Tuple[K, Iterable[V]]]: ...
-    def flatMapValues(
-        self: RDD[Tuple[K, V]], f: Callable[[V], Iterable[U]]
-    ) -> RDD[Tuple[K, U]]: ...
-    def mapValues(self: RDD[Tuple[K, V]], f: Callable[[V], U]) -> RDD[Tuple[K, U]]: ...
-    @overload
-    def groupWith(
-        self: RDD[Tuple[K, V]], __o: RDD[Tuple[K, V1]]
-    ) -> RDD[Tuple[K, Tuple[ResultIterable[V], ResultIterable[V1]]]]: ...
-    @overload
-    def groupWith(
-        self: RDD[Tuple[K, V]], __o1: RDD[Tuple[K, V1]], __o2: RDD[Tuple[K, V2]]
-    ) -> RDD[Tuple[K, Tuple[ResultIterable[V], ResultIterable[V1], ResultIterable[V2]]]]: ...
-    @overload
-    def groupWith(
-        self: RDD[Tuple[K, V]],
-        other1: RDD[Tuple[K, V1]],
-        other2: RDD[Tuple[K, V2]],
-        other3: RDD[Tuple[K, V3]],
-    ) -> RDD[
-        Tuple[
-            K,
-            Tuple[
-                ResultIterable[V],
-                ResultIterable[V1],
-                ResultIterable[V2],
-                ResultIterable[V3],
-            ],
-        ]
-    ]: ...
-    def cogroup(
-        self: RDD[Tuple[K, V]],
-        other: RDD[Tuple[K, U]],
-        numPartitions: Optional[int] = ...,
-    ) -> RDD[Tuple[K, Tuple[ResultIterable[V], ResultIterable[U]]]]: ...
-    def sampleByKey(
-        self: RDD[Tuple[K, V]],
-        withReplacement: bool,
-        fractions: Dict[K, Union[float, int]],
-        seed: Optional[int] = ...,
-    ) -> RDD[Tuple[K, V]]: ...
-    def subtractByKey(
-        self: RDD[Tuple[K, V]],
-        other: RDD[Tuple[K, U]],
-        numPartitions: Optional[int] = ...,
-    ) -> RDD[Tuple[K, V]]: ...
-    def subtract(self, other: RDD[T_co], numPartitions: Optional[int] = ...) -> RDD[T_co]: ...
-    def keyBy(self, f: Callable[[T_co], K]) -> RDD[Tuple[K, T_co]]: ...
-    def repartition(self, numPartitions: int) -> RDD[T_co]: ...
-    def coalesce(self, numPartitions: int, shuffle: bool = ...) -> RDD[T_co]: ...
-    def zip(self, other: RDD[U]) -> RDD[Tuple[T_co, U]]: ...
-    def zipWithIndex(self) -> RDD[Tuple[T_co, int]]: ...
-    def zipWithUniqueId(self) -> RDD[Tuple[T_co, int]]: ...
-    def name(self) -> str: ...
-    def setName(self, name: str) -> RDD[T_co]: ...
-    def toDebugString(self) -> bytes: ...
-    def getStorageLevel(self) -> StorageLevel: ...
-    def lookup(self: RDD[Tuple[K, V]], key: K) -> List[V]: ...
-    def countApprox(self, timeout: int, confidence: float = ...) -> int: ...
-    def sumApprox(
-        self: RDD[Union[float, int]], timeout: int, confidence: float = ...
-    ) -> BoundedFloat: ...
-    def meanApprox(
-        self: RDD[Union[float, int]], timeout: int, confidence: float = ...
-    ) -> BoundedFloat: ...
-    def countApproxDistinct(self, relativeSD: float = ...) -> int: ...
-    def toLocalIterator(self, prefetchPartitions: bool = ...) -> Iterator[T_co]: ...
-    def barrier(self) -> RDDBarrier[T_co]: ...
-    def withResources(self, profile: ResourceProfile) -> RDD[T_co]: ...
-    def getResourceProfile(self) -> Optional[ResourceProfile]: ...
-    @overload
-    def toDF(
-        self: RDD[RowLike],
-        schema: Optional[Union[List[str], Tuple[str, ...]]] = ...,
-        sampleRatio: Optional[float] = ...,
-    ) -> DataFrame: ...
-    @overload
-    def toDF(self: RDD[RowLike], schema: Optional[Union[StructType, str]] = ...) -> DataFrame: ...
-    @overload
-    def toDF(
-        self: RDD[AtomicValue],
-        schema: Union[AtomicType, str],
-    ) -> DataFrame: ...
-
-class RDDBarrier(Generic[T]):
-    rdd: RDD[T]
-    def __init__(self, rdd: RDD[T]) -> None: ...
-    def mapPartitions(
-        self, f: Callable[[Iterable[T]], Iterable[U]], preservesPartitioning: bool = ...
-    ) -> RDD[U]: ...
-    def mapPartitionsWithIndex(
-        self,
-        f: Callable[[int, Iterable[T]], Iterable[U]],
-        preservesPartitioning: bool = ...,
-    ) -> RDD[U]: ...
-
-class PipelinedRDD(RDD[U], Generic[T, U]):
-    func: Callable[[T], U]
-    preservesPartitioning: bool
-    is_cached: bool
-    is_checkpointed: bool
-    ctx: pyspark.context.SparkContext
-    prev: RDD[T]
-    partitioner: Optional[Partitioner]
-    is_barrier: bool
-    def __init__(
-        self,
-        prev: RDD[T],
-        func: Callable[[Iterable[T]], Iterable[U]],
-        preservesPartitioning: bool = ...,
-        isFromBarrier: bool = ...,
-    ) -> None: ...
-    def getNumPartitions(self) -> int: ...
-    def id(self) -> int: ...
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index a0941afd36e4f..628ef18a54a12 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -100,6 +100,13 @@ def load_stream(self, stream):
         """
         raise NotImplementedError
 
+    def dumps(self, obj):
+        """
+        Serialize an object into a byte array.
+        When batching is used, this will be called with an array of objects.
+        """
+        raise NotImplementedError
+
     def _load_stream_without_unbatching(self, stream):
         """
         Return an iterator of deserialized batches (iterable) of objects from the input stream.
diff --git a/python/pyspark/sql/_typing.pyi b/python/pyspark/sql/_typing.pyi
index 2adae6c237389..209bb70faddef 100644
--- a/python/pyspark/sql/_typing.pyi
+++ b/python/pyspark/sql/_typing.pyi
@@ -25,7 +25,7 @@ from typing import (
     TypeVar,
     Union,
 )
-from typing_extensions import Protocol
+from typing_extensions import Literal, Protocol
 
 import datetime
 import decimal
@@ -56,6 +56,8 @@ AtomicValue = TypeVar(
 
 RowLike = TypeVar("RowLike", List[Any], Tuple[Any, ...], pyspark.sql.types.Row)
 
+SQLBatchedUDFType = Literal[100]
+
 class SupportsOpen(Protocol):
     def open(self, partition_id: int, epoch_id: int) -> bool: ...
 
diff --git a/python/pyspark/sql/pandas/_typing/__init__.pyi b/python/pyspark/sql/pandas/_typing/__init__.pyi
index d3796f48066de..906703d3c86ca 100644
--- a/python/pyspark/sql/pandas/_typing/__init__.pyi
+++ b/python/pyspark/sql/pandas/_typing/__init__.pyi
@@ -42,11 +42,12 @@ DataFrameOrSeriesLike_ = TypeVar("DataFrameOrSeriesLike_", bound=DataFrameOrSeri
 
 # UDF annotations
 PandasScalarUDFType = Literal[200]
-PandasScalarIterUDFType = Literal[204]
 PandasGroupedMapUDFType = Literal[201]
-PandasCogroupedMapUDFType = Literal[206]
 PandasGroupedAggUDFType = Literal[202]
+PandasWindowAggUDFType = Literal[203]
+PandasScalarIterUDFType = Literal[204]
 PandasMapIterUDFType = Literal[205]
+PandasCogroupedMapUDFType = Literal[206]
 ArrowMapIterUDFType = Literal[207]
 
 class PandasVariadicScalarToScalarFunction(Protocol):
diff --git a/python/pyspark/tests/typing/test_rdd.yml b/python/pyspark/tests/typing/test_rdd.yml
index 749ad534d5ade..48965829cfdca 100644
--- a/python/pyspark/tests/typing/test_rdd.yml
+++ b/python/pyspark/tests/typing/test_rdd.yml
@@ -18,11 +18,11 @@
 - case: toDF
   main: |
     from pyspark.sql.types import (
-      IntegerType,
-      Row,
-      StructType,
-      StringType,
-      StructField,
+        IntegerType,
+        Row,
+        StructType,
+        StringType,
+        StructField,
     )
     from collections import namedtuple
     from pyspark.sql import SparkSession
@@ -60,3 +60,70 @@
     rdd_named_tuple.toDF(sampleRatio=0.4)
     rdd_named_tuple.toDF(["a", "b"], sampleRatio=0.4)
     rdd_named_tuple.toDF(struct)
+
+
+- case: rddMethods
+  main: |
+    from operator import add
+    from typing import Iterable, Set, Tuple
+    from pyspark.sql import SparkSession
+
+    spark = SparkSession.builder.getOrCreate()
+    sc = spark.sparkContext
+
+    def f1(x: int) -> str:
+        return str(x)
+
+    reveal_type(sc.range(10).map(f1))
+
+    def f2(x: int) -> Iterable[int]:
+        return range(x)
+
+    reveal_type(sc.range(10).flatMap(f2))
+
+    reveal_type(sc.parallelize([("a", 1), ("b", 0)]).filter(lambda x: x[1] != 0))
+
+    reveal_type(sc.parallelize([("a", 1), ("b", 0)]).max())
+
+    reveal_type(sc.range(10).reduce(add))
+
+    def seq_func(xs: Set[str], x: int) -> Set[str]:
+        xs.add(str(x % 11))
+        return xs
+
+    def comb_func(xs: Set[str], ys: Set[str]) -> Set[str]:
+        xs.update(ys)
+        return xs
+
+    zero: Set[str] = set()
+
+    reveal_type(sc.parallelize([("a", 1)]).aggregateByKey(zero, seq_func, comb_func))
+
+  out: |
+     main:11: note: Revealed type is "pyspark.rdd.RDD[builtins.str*]"
+     main:16: note: Revealed type is "pyspark.rdd.RDD[builtins.int*]"
+     main:18: note: Revealed type is "pyspark.rdd.RDD[Tuple[builtins.str, builtins.int]]"
+     main:20: note: Revealed type is "Tuple[builtins.str, builtins.int]"
+     main:22: note: Revealed type is "builtins.int"
+     main:34: note: Revealed type is "pyspark.rdd.RDD[Tuple[builtins.str, builtins.set[builtins.str]]]"
+
+- case: rddMethodsErrors
+  main: |
+    from pyspark.sql import SparkSession
+
+    spark = SparkSession.builder.getOrCreate()
+    sc = spark.sparkContext
+
+    def f1(x: str) -> str:
+        return x
+
+    sc.range(10).map(f1)
+
+    def f2(x: int) -> str:
+        return str(x)
+
+    sc.range(10).reduce(f2)
+
+  out: |
+    main:9: error: Argument 1 to "map" of "RDD" has incompatible type "Callable[[str], str]"; expected "Callable[[int], str]"  [arg-type]
+    main:14: error: Argument 1 to "reduce" of "RDD" has incompatible type "Callable[[int], str]"; expected "Callable[[int, int], int]"  [arg-type]

From 157dc7faafec98fd426ddc28e33670b89116eab8 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Sat, 19 Feb 2022 13:49:26 +0100
Subject: [PATCH 278/513] [SPARK-37428][PYTHON][MLLIB] Inline type hints for
 pyspark.mllib.util

### What changes were proposed in this pull request?

This PR migrates type `pyspark.mllib.util` annotations from stub file to inline type hints.

### Why are the changes needed?

Part of ongoing migration of type hints.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Part of ongoing migration of type hints.

Closes #35532 from zero323/SPARK-37428.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/mllib/common.py |   6 +-
 python/pyspark/mllib/util.py   | 100 +++++++++++++++++++++++----------
 python/pyspark/mllib/util.pyi  |  90 -----------------------------
 3 files changed, 72 insertions(+), 124 deletions(-)
 delete mode 100644 python/pyspark/mllib/util.pyi

diff --git a/python/pyspark/mllib/common.py b/python/pyspark/mllib/common.py
index 00653aa6c9dd9..cf68c5bb11e47 100644
--- a/python/pyspark/mllib/common.py
+++ b/python/pyspark/mllib/common.py
@@ -127,13 +127,13 @@ def _java2py(sc: SparkContext, r: "JavaObjectOrPickleDump", encoding: str = "byt
 
 def callJavaFunc(
     sc: pyspark.context.SparkContext, func: Callable[..., "JavaObjectOrPickleDump"], *args: Any
-) -> "JavaObjectOrPickleDump":
+) -> Any:
     """Call Java Function"""
     java_args = [_py2java(sc, a) for a in args]
     return _java2py(sc, func(*java_args))
 
 
-def callMLlibFunc(name: str, *args: Any) -> "JavaObjectOrPickleDump":
+def callMLlibFunc(name: str, *args: Any) -> Any:
     """Call API in PythonMLLibAPI"""
     sc = SparkContext.getOrCreate()
     assert sc._jvm is not None
@@ -154,7 +154,7 @@ def __del__(self) -> None:
         assert self._sc._gateway is not None
         self._sc._gateway.detach(self._java_model)
 
-    def call(self, name: str, *a: Any) -> "JavaObjectOrPickleDump":
+    def call(self, name: str, *a: Any) -> Any:
         """Call method of java_model"""
         return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
 
diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py
index d3824e86c2618..8f28e2cfee0eb 100644
--- a/python/pyspark/mllib/util.py
+++ b/python/pyspark/mllib/util.py
@@ -16,12 +16,27 @@
 #
 
 import sys
+from functools import reduce
 import numpy as np
 
 from pyspark import SparkContext, since
 from pyspark.mllib.common import callMLlibFunc, inherit_doc
 from pyspark.mllib.linalg import Vectors, SparseVector, _convert_to_vector
 from pyspark.sql import DataFrame
+from typing import Generic, Iterable, List, Optional, Tuple, Type, TypeVar, cast, TYPE_CHECKING
+from pyspark.context import SparkContext
+from pyspark.mllib.linalg import Vector
+from pyspark.rdd import RDD
+from pyspark.sql.dataframe import DataFrame
+
+T = TypeVar("T")
+L = TypeVar("L", bound="Loader")
+JL = TypeVar("JL", bound="JavaLoader")
+
+if TYPE_CHECKING:
+    from pyspark.mllib._typing import VectorLike
+    from py4j.java_gateway import JavaObject
+    from pyspark.mllib.regression import LabeledPoint
 
 
 class MLUtils:
@@ -33,7 +48,7 @@ class MLUtils:
     """
 
     @staticmethod
-    def _parse_libsvm_line(line):
+    def _parse_libsvm_line(line: str) -> Tuple[float, np.ndarray, np.ndarray]:
         """
         Parses a line in LIBSVM format into (label, indices, values).
         """
@@ -49,7 +64,7 @@ def _parse_libsvm_line(line):
         return label, indices, values
 
     @staticmethod
-    def _convert_labeled_point_to_libsvm(p):
+    def _convert_labeled_point_to_libsvm(p: "LabeledPoint") -> str:
         """Converts a LabeledPoint to a string in LIBSVM format."""
         from pyspark.mllib.regression import LabeledPoint
 
@@ -62,11 +77,13 @@ def _convert_labeled_point_to_libsvm(p):
                 items.append(str(v.indices[i] + 1) + ":" + str(v.values[i]))
         else:
             for i in range(len(v)):
-                items.append(str(i + 1) + ":" + str(v[i]))
+                items.append(str(i + 1) + ":" + str(v[i]))  # type: ignore[index]
         return " ".join(items)
 
     @staticmethod
-    def loadLibSVMFile(sc, path, numFeatures=-1, minPartitions=None):
+    def loadLibSVMFile(
+        sc: SparkContext, path: str, numFeatures: int = -1, minPartitions: Optional[int] = None
+    ) -> RDD["LabeledPoint"]:
         """
         Loads labeled data in the LIBSVM format into an RDD of
         LabeledPoint. The LIBSVM format is a text-based format used by
@@ -128,10 +145,14 @@ def loadLibSVMFile(sc, path, numFeatures=-1, minPartitions=None):
         if numFeatures <= 0:
             parsed.cache()
             numFeatures = parsed.map(lambda x: -1 if x[1].size == 0 else x[1][-1]).reduce(max) + 1
-        return parsed.map(lambda x: LabeledPoint(x[0], Vectors.sparse(numFeatures, x[1], x[2])))
+        return parsed.map(
+            lambda x: LabeledPoint(
+                x[0], Vectors.sparse(numFeatures, x[1], x[2])  # type: ignore[arg-type]
+            )
+        )
 
     @staticmethod
-    def saveAsLibSVMFile(data, dir):
+    def saveAsLibSVMFile(data: RDD["LabeledPoint"], dir: str) -> None:
         """
         Save labeled data in LIBSVM format.
 
@@ -163,7 +184,9 @@ def saveAsLibSVMFile(data, dir):
         lines.saveAsTextFile(dir)
 
     @staticmethod
-    def loadLabeledPoints(sc, path, minPartitions=None):
+    def loadLabeledPoints(
+        sc: SparkContext, path: str, minPartitions: Optional[int] = None
+    ) -> RDD["LabeledPoint"]:
         """
         Load labeled points saved using RDD.saveAsTextFile.
 
@@ -201,7 +224,7 @@ def loadLabeledPoints(sc, path, minPartitions=None):
 
     @staticmethod
     @since("1.5.0")
-    def appendBias(data):
+    def appendBias(data: Vector) -> Vector:
         """
         Returns a new vector with `1.0` (bias) appended to
         the end of the input vector.
@@ -216,7 +239,7 @@ def appendBias(data):
 
     @staticmethod
     @since("1.5.0")
-    def loadVectors(sc, path):
+    def loadVectors(sc: SparkContext, path: str) -> RDD[Vector]:
         """
         Loads vectors saved using `RDD[Vector].saveAsTextFile`
         with the default number of partitions.
@@ -224,7 +247,7 @@ def loadVectors(sc, path):
         return callMLlibFunc("loadVectors", sc, path)
 
     @staticmethod
-    def convertVectorColumnsToML(dataset, *cols):
+    def convertVectorColumnsToML(dataset: DataFrame, *cols: str) -> DataFrame:
         """
         Converts vector columns in an input DataFrame from the
         :py:class:`pyspark.mllib.linalg.Vector` type to the new
@@ -273,7 +296,7 @@ def convertVectorColumnsToML(dataset, *cols):
         return callMLlibFunc("convertVectorColumnsToML", dataset, list(cols))
 
     @staticmethod
-    def convertVectorColumnsFromML(dataset, *cols):
+    def convertVectorColumnsFromML(dataset: DataFrame, *cols: str) -> DataFrame:
         """
         Converts vector columns in an input DataFrame to the
         :py:class:`pyspark.mllib.linalg.Vector` type from the new
@@ -322,7 +345,7 @@ def convertVectorColumnsFromML(dataset, *cols):
         return callMLlibFunc("convertVectorColumnsFromML", dataset, list(cols))
 
     @staticmethod
-    def convertMatrixColumnsToML(dataset, *cols):
+    def convertMatrixColumnsToML(dataset: DataFrame, *cols: str) -> DataFrame:
         """
         Converts matrix columns in an input DataFrame from the
         :py:class:`pyspark.mllib.linalg.Matrix` type to the new
@@ -371,7 +394,7 @@ def convertMatrixColumnsToML(dataset, *cols):
         return callMLlibFunc("convertMatrixColumnsToML", dataset, list(cols))
 
     @staticmethod
-    def convertMatrixColumnsFromML(dataset, *cols):
+    def convertMatrixColumnsFromML(dataset: DataFrame, *cols: str) -> DataFrame:
         """
         Converts matrix columns in an input DataFrame to the
         :py:class:`pyspark.mllib.linalg.Matrix` type from the new
@@ -427,7 +450,7 @@ class Saveable:
     .. versionadded:: 1.3.0
     """
 
-    def save(self, sc, path):
+    def save(self, sc: SparkContext, path: str) -> None:
         """
         Save this model to the given path.
 
@@ -458,8 +481,10 @@ class JavaSaveable(Saveable):
     .. versionadded:: 1.3.0
     """
 
+    _java_model: "JavaObject"
+
     @since("1.3.0")
-    def save(self, sc, path):
+    def save(self, sc: SparkContext, path: str) -> None:
         """Save this model to the given path."""
         if not isinstance(sc, SparkContext):
             raise TypeError("sc should be a SparkContext, got type %s" % type(sc))
@@ -468,7 +493,7 @@ def save(self, sc, path):
         self._java_model.save(sc._jsc.sc(), path)
 
 
-class Loader:
+class Loader(Generic[T]):
     """
     Mixin for classes which can load saved models from files.
 
@@ -476,7 +501,7 @@ class Loader:
     """
 
     @classmethod
-    def load(cls, sc, path):
+    def load(cls: Type[L], sc: SparkContext, path: str) -> L:
         """
         Load a model from the given path. The model should have been
         saved using :py:meth:`Saveable.save`.
@@ -497,7 +522,7 @@ def load(cls, sc, path):
 
 
 @inherit_doc
-class JavaLoader(Loader):
+class JavaLoader(Loader[T]):
     """
     Mixin for classes which can load saved models using its Scala
     implementation.
@@ -506,7 +531,7 @@ class JavaLoader(Loader):
     """
 
     @classmethod
-    def _java_loader_class(cls):
+    def _java_loader_class(cls) -> str:
         """
         Returns the full class name of the Java loader. The default
         implementation replaces "pyspark" by "org.apache.spark" in
@@ -516,22 +541,20 @@ def _java_loader_class(cls):
         return ".".join([java_package, cls.__name__])
 
     @classmethod
-    def _load_java(cls, sc, path):
+    def _load_java(cls, sc: SparkContext, path: str) -> "JavaObject":
         """
         Load a Java model from the given path.
         """
         java_class = cls._java_loader_class()
-        java_obj = sc._jvm
-        for name in java_class.split("."):
-            java_obj = getattr(java_obj, name)
+        java_obj: "JavaObject" = reduce(getattr, java_class.split("."), sc._jvm)
         return java_obj.load(sc._jsc.sc(), path)
 
     @classmethod
     @since("1.3.0")
-    def load(cls, sc, path):
+    def load(cls: Type[JL], sc: SparkContext, path: str) -> JL:
         """Load a model from the given path."""
         java_model = cls._load_java(sc, path)
-        return cls(java_model)
+        return cls(java_model)  # type: ignore[call-arg]
 
 
 class LinearDataGenerator:
@@ -541,7 +564,15 @@ class LinearDataGenerator:
     """
 
     @staticmethod
-    def generateLinearInput(intercept, weights, xMean, xVariance, nPoints, seed, eps):
+    def generateLinearInput(
+        intercept: float,
+        weights: "VectorLike",
+        xMean: "VectorLike",
+        xVariance: "VectorLike",
+        nPoints: int,
+        seed: int,
+        eps: float,
+    ) -> List["LabeledPoint"]:
         """
         .. versionadded:: 1.5.0
 
@@ -568,9 +599,9 @@ def generateLinearInput(intercept, weights, xMean, xVariance, nPoints, seed, eps
         list
             of :py:class:`pyspark.mllib.regression.LabeledPoints` of length nPoints
         """
-        weights = [float(weight) for weight in weights]
-        xMean = [float(mean) for mean in xMean]
-        xVariance = [float(var) for var in xVariance]
+        weights = [float(weight) for weight in cast(Iterable[float], weights)]
+        xMean = [float(mean) for mean in cast(Iterable[float], xMean)]
+        xVariance = [float(var) for var in cast(Iterable[float], xVariance)]
         return list(
             callMLlibFunc(
                 "generateLinearInputWrapper",
@@ -586,7 +617,14 @@ def generateLinearInput(intercept, weights, xMean, xVariance, nPoints, seed, eps
 
     @staticmethod
     @since("1.5.0")
-    def generateLinearRDD(sc, nexamples, nfeatures, eps, nParts=2, intercept=0.0):
+    def generateLinearRDD(
+        sc: SparkContext,
+        nexamples: int,
+        nfeatures: int,
+        eps: float,
+        nParts: int = 2,
+        intercept: float = 0.0,
+    ) -> RDD["LabeledPoint"]:
         """
         Generate an RDD of LabeledPoints.
         """
@@ -601,7 +639,7 @@ def generateLinearRDD(sc, nexamples, nfeatures, eps, nParts=2, intercept=0.0):
         )
 
 
-def _test():
+def _test() -> None:
     import doctest
     from pyspark.sql import SparkSession
 
diff --git a/python/pyspark/mllib/util.pyi b/python/pyspark/mllib/util.pyi
deleted file mode 100644
index 265f765ee263a..0000000000000
--- a/python/pyspark/mllib/util.pyi
+++ /dev/null
@@ -1,90 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Generic, List, Optional, TypeVar
-
-from pyspark.mllib._typing import VectorLike
-from pyspark.context import SparkContext
-from pyspark.mllib.linalg import Vector
-from pyspark.mllib.regression import LabeledPoint
-from pyspark.rdd import RDD
-from pyspark.sql.dataframe import DataFrame
-
-T = TypeVar("T")
-
-class MLUtils:
-    @staticmethod
-    def loadLibSVMFile(
-        sc: SparkContext,
-        path: str,
-        numFeatures: int = ...,
-        minPartitions: Optional[int] = ...,
-    ) -> RDD[LabeledPoint]: ...
-    @staticmethod
-    def saveAsLibSVMFile(data: RDD[LabeledPoint], dir: str) -> None: ...
-    @staticmethod
-    def loadLabeledPoints(
-        sc: SparkContext, path: str, minPartitions: Optional[int] = ...
-    ) -> RDD[LabeledPoint]: ...
-    @staticmethod
-    def appendBias(data: Vector) -> Vector: ...
-    @staticmethod
-    def loadVectors(sc: SparkContext, path: str) -> RDD[Vector]: ...
-    @staticmethod
-    def convertVectorColumnsToML(dataset: DataFrame, *cols: str) -> DataFrame: ...
-    @staticmethod
-    def convertVectorColumnsFromML(dataset: DataFrame, *cols: str) -> DataFrame: ...
-    @staticmethod
-    def convertMatrixColumnsToML(dataset: DataFrame, *cols: str) -> DataFrame: ...
-    @staticmethod
-    def convertMatrixColumnsFromML(dataset: DataFrame, *cols: str) -> DataFrame: ...
-
-class Saveable:
-    def save(self, sc: SparkContext, path: str) -> None: ...
-
-class JavaSaveable(Saveable):
-    def save(self, sc: SparkContext, path: str) -> None: ...
-
-class Loader(Generic[T]):
-    @classmethod
-    def load(cls, sc: SparkContext, path: str) -> T: ...
-
-class JavaLoader(Loader[T]):
-    @classmethod
-    def load(cls, sc: SparkContext, path: str) -> T: ...
-
-class LinearDataGenerator:
-    @staticmethod
-    def generateLinearInput(
-        intercept: float,
-        weights: VectorLike,
-        xMean: VectorLike,
-        xVariance: VectorLike,
-        nPoints: int,
-        seed: int,
-        eps: float,
-    ) -> List[LabeledPoint]: ...
-    @staticmethod
-    def generateLinearRDD(
-        sc: SparkContext,
-        nexamples: int,
-        nfeatures: int,
-        eps: float,
-        nParts: int = ...,
-        intercept: float = ...,
-    ) -> RDD[LabeledPoint]: ...

From 06f4ce43e167cc88f1782076e88e9e4cd2d57fc6 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Sat, 19 Feb 2022 08:54:35 -0600
Subject: [PATCH 279/513] [SPARK-38175][CORE][FOLLOWUP] Remove `urlPattern`
 from `HistoryAppStatusStore#replaceLogUrls` method signature

### What changes were proposed in this pull request?
This pr is a followup of SPARK-38175 to remove `urlPattern` from `HistoryAppStatusStore#replaceLogUrls` method signature

### Why are the changes needed?
Cleanup unused symbol.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35567 from LuciferYang/SPARK-38175-FOLLOWUP.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../deploy/history/HistoryAppStatusStore.scala   | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala
index ac0f102d81a6a..d86243df7163f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala
@@ -44,21 +44,23 @@ private[spark] class HistoryAppStatusStore(
 
   override def executorList(activeOnly: Boolean): Seq[v1.ExecutorSummary] = {
     val execList = super.executorList(activeOnly)
-    logUrlPattern match {
-      case Some(pattern) => execList.map(replaceLogUrls(_, pattern))
-      case None => execList
+    if (logUrlPattern.nonEmpty) {
+      execList.map(replaceLogUrls)
+    } else {
+      execList
     }
   }
 
   override def executorSummary(executorId: String): v1.ExecutorSummary = {
     val execSummary = super.executorSummary(executorId)
-    logUrlPattern match {
-      case Some(pattern) => replaceLogUrls(execSummary, pattern)
-      case None => execSummary
+    if (logUrlPattern.nonEmpty) {
+      replaceLogUrls(execSummary)
+    } else {
+      execSummary
     }
   }
 
-  private def replaceLogUrls(exec: v1.ExecutorSummary, urlPattern: String): v1.ExecutorSummary = {
+  private def replaceLogUrls(exec: v1.ExecutorSummary): v1.ExecutorSummary = {
     val newLogUrlMap = logUrlHandler.applyPattern(exec.executorLogs, exec.attributes)
     replaceExecutorLogs(exec, newLogUrlMap)
   }

From 789a510c78ca81db0137bba1687102e2d9acd149 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Sat, 19 Feb 2022 09:04:01 -0600
Subject: [PATCH 280/513] [SPARK-38249][CORE][GRAPHX] Cleanup unused private
 methods/fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
This pr aims to cleanup unused ·`private methods/fields`.

### Why are the changes needed?
Code clean.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass GA

Closes #35566 from LuciferYang/never-used.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../scala/org/apache/spark/deploy/master/Master.scala    | 9 +--------
 .../spark/executor/CoarseGrainedExecutorBackend.scala    | 5 -----
 .../main/scala/org/apache/spark/rdd/NewHadoopRDD.scala   | 1 -
 .../scala/org/apache/spark/resource/ResourceUtils.scala  | 1 -
 .../scala/org/apache/spark/graphx/impl/GraphImpl.scala   | 9 ---------
 5 files changed, 1 insertion(+), 24 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 7dbf6b92b4088..775b27bcbf279 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -25,7 +25,7 @@ import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 import scala.util.Random
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
-import org.apache.spark.deploy.{ApplicationDescription, DriverDescription, ExecutorState, SparkHadoopUtil}
+import org.apache.spark.deploy.{ApplicationDescription, DriverDescription, ExecutorState}
 import org.apache.spark.deploy.DeployMessages._
 import org.apache.spark.deploy.master.DriverState.DriverState
 import org.apache.spark.deploy.master.MasterMessages._
@@ -53,8 +53,6 @@ private[deploy] class Master(
   private val forwardMessageThread =
     ThreadUtils.newDaemonSingleThreadScheduledExecutor("master-forward-message-thread")
 
-  private val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
-
   // For application IDs
   private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US)
 
@@ -95,11 +93,6 @@ private[deploy] class Master(
   // After onStart, webUi will be set
   private var webUi: MasterWebUI = null
 
-  private val masterPublicAddress = {
-    val envVar = conf.getenv("SPARK_PUBLIC_DNS")
-    if (envVar != null) envVar else address.host
-  }
-
   private val masterUrl = address.toSparkURL
   private var masterWebUiUrl: String = _
 
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index fb7b4e62150db..a94e63656e1a1 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -42,7 +42,6 @@ import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler.{ExecutorLossMessage, ExecutorLossReason, TaskDescription}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
-import org.apache.spark.serializer.SerializerInstance
 import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader, SignalUtils, ThreadUtils, Utils}
 
 private[spark] class CoarseGrainedExecutorBackend(
@@ -65,10 +64,6 @@ private[spark] class CoarseGrainedExecutorBackend(
   var executor: Executor = null
   @volatile var driver: Option[RpcEndpointRef] = None
 
-  // If this CoarseGrainedExecutorBackend is changed to support multiple threads, then this may need
-  // to be changed so that we don't share the serializer instance across threads
-  private[this] val ser: SerializerInstance = env.closureSerializer.newInstance()
-
   private var _resources = Map.empty[String, ResourceInformation]
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index c6959a5a4dafa..596298b222e05 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -244,7 +244,6 @@ class NewHadoopRDD[K, V](
       }
 
       private var havePair = false
-      private var recordsSinceMetricsUpdate = 0
 
       override def hasNext: Boolean = {
         if (!finished && !havePair) {
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
index 837b2d80aace6..3f0a0d36dff6e 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
@@ -386,7 +386,6 @@ private[spark] object ResourceUtils extends Logging {
     val resourcePlugins = Utils.loadExtensions(classOf[ResourceDiscoveryPlugin], pluginClasses,
       sparkConf)
     // apply each plugin until one of them returns the information for this resource
-    var riOption: Optional[ResourceInformation] = Optional.empty()
     resourcePlugins.foreach { plugin =>
       val riOption = plugin.discoverResource(resourceRequest, sparkConf)
       if (riOption.isPresent()) {
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
index 8564597f4f135..4a790878cf9dc 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
@@ -21,7 +21,6 @@ import scala.reflect.{classTag, ClassTag}
 
 import org.apache.spark.HashPartitioner
 import org.apache.spark.graphx._
-import org.apache.spark.graphx.util.BytecodeUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 
@@ -265,14 +264,6 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
     }
   }
 
-  /** Test whether the closure accesses the attribute with name `attrName`. */
-  private def accessesVertexAttr(closure: AnyRef, attrName: String): Boolean = {
-    try {
-      BytecodeUtils.invokedMethod(closure, classOf[EdgeTriplet[VD, ED]], attrName)
-    } catch {
-      case _: ClassNotFoundException => true // if we don't know, be conservative
-    }
-  }
 } // end of class GraphImpl
 
 

From ae67adde4d2dc0a75e03710fc3e66ea253feeda3 Mon Sep 17 00:00:00 2001
From: Itay Bittan <itay.bittan@gmail.com>
Date: Sun, 20 Feb 2022 10:51:53 +0800
Subject: [PATCH 281/513] [MINOR][DOCS] fix default value of history server

### What changes were proposed in this pull request?
Alignment between the documentation and the code.

### Why are the changes needed?

The [actual default value ](https://github.com/apache/spark/blame/master/core/src/main/scala/org/apache/spark/internal/config/History.scala#L198) for `spark.history.custom.executor.log.url.applyIncompleteApplication` is `true` and not `false` as stated in the documentation.

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

Closes #35577 from itayB/doc.

Authored-by: Itay Bittan <itay.bittan@gmail.com>
Signed-off-by: Yuming Wang <yumwang@ebay.com>
---
 docs/monitoring.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/monitoring.md b/docs/monitoring.md
index e54ac5414ba79..f2c6e37974926 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -357,7 +357,7 @@ Security options for the Spark History Server are covered more detail in the
   </tr>
   <tr>
     <td>spark.history.custom.executor.log.url.applyIncompleteApplication</td>
-    <td>false</td>
+    <td>true</td>
     <td>
         Specifies whether to apply custom spark executor log URL to incomplete applications as well.
         If executor logs for running applications should be provided as origin log URLs, set this to `false`.

From 4789e1f234a92b3c17d1f962c8f374ef9478612a Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sun, 20 Feb 2022 14:35:19 -0600
Subject: [PATCH 282/513] [SPARK-37090][BUILD] Upgrade `libthrift` to 0.16.0 to
 avoid security vulnerabilities

### What changes were proposed in this pull request?

This PR ported HIVE-21498, HIVE-25098 and upgraded libthrift to 0.16.0.

The CHANGES list for libthrift 0.16.0 is available at: https://github.com/apache/thrift/blob/v0.16.0/CHANGES.md

### Why are the changes needed?

To address [CVE-2020-13949](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13949).

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing test.

Closes #34362 from wangyum/SPARK-37090.

Lead-authored-by: Yuming Wang <yumwang@ebay.com>
Co-authored-by: Yuming Wang <wgyumg@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 dev/deps/spark-deps-hadoop-2-hive-2.3         |   2 +-
 dev/deps/spark-deps-hadoop-3-hive-2.3         |   2 +-
 pom.xml                                       |   6 +-
 .../hive/service/auth/KerberosSaslHelper.java |   5 +-
 .../hive/service/auth/PlainSaslHelper.java    |   3 +-
 .../service/auth/TSetIpAddressProcessor.java  |   5 +-
 .../cli/thrift/ThriftBinaryCLIService.java    |   6 -
 .../service/cli/thrift/ThriftCLIService.java  |  10 +
 .../thrift/transport/TFramedTransport.java    | 200 ++++++++++++++++++
 9 files changed, 225 insertions(+), 14 deletions(-)
 create mode 100644 sql/hive/src/main/java/org/apache/thrift/transport/TFramedTransport.java

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index b4fd14b30a4dd..9ab65b48ea2c0 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -186,7 +186,7 @@ kubernetes-model-storageclass/5.12.0//kubernetes-model-storageclass-5.12.0.jar
 lapack/2.2.1//lapack-2.2.1.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
-libthrift/0.12.0//libthrift-0.12.0.jar
+libthrift/0.16.0//libthrift-0.16.0.jar
 log4j-1.2-api/2.17.1//log4j-1.2-api-2.17.1.jar
 log4j-api/2.17.1//log4j-api-2.17.1.jar
 log4j-core/2.17.1//log4j-core-2.17.1.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 96bd2663df60a..7d8729193cb28 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -172,7 +172,7 @@ kubernetes-model-storageclass/5.12.0//kubernetes-model-storageclass-5.12.0.jar
 lapack/2.2.1//lapack-2.2.1.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
-libthrift/0.12.0//libthrift-0.12.0.jar
+libthrift/0.16.0//libthrift-0.16.0.jar
 log4j-1.2-api/2.17.1//log4j-1.2-api-2.17.1.jar
 log4j-api/2.17.1//log4j-api-2.17.1.jar
 log4j-core/2.17.1//log4j-core-2.17.1.jar
diff --git a/pom.xml b/pom.xml
index 7165cb5229821..18b75f4d1717e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -187,7 +187,7 @@
     <joda.version>2.10.13</joda.version>
     <jodd.version>3.5.2</jodd.version>
     <jsr305.version>3.0.0</jsr305.version>
-    <libthrift.version>0.12.0</libthrift.version>
+    <libthrift.version>0.16.0</libthrift.version>
     <antlr4.version>4.8</antlr4.version>
     <jpam.version>1.1</jpam.version>
     <selenium.version>3.141.59</selenium.version>
@@ -2607,6 +2607,10 @@
             <groupId>org.slf4j</groupId>
             <artifactId>slf4j-api</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>javax.annotation</groupId>
+            <artifactId>javax.annotation-api</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
       <dependency>
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
index 175412ed98c6c..ef91f94eeec2b 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
@@ -30,6 +30,7 @@
 import org.apache.thrift.TProcessorFactory;
 import org.apache.thrift.transport.TSaslClientTransport;
 import org.apache.thrift.transport.TTransport;
+import org.apache.thrift.transport.TTransportException;
 
 public final class KerberosSaslHelper {
 
@@ -68,8 +69,8 @@ public static TTransport createSubjectAssumedTransport(String principal,
         new TSaslClientTransport("GSSAPI", null, names[0], names[1], saslProps, null,
           underlyingTransport);
       return new TSubjectAssumingTransport(saslTransport);
-    } catch (SaslException se) {
-      throw new IOException("Could not instantiate SASL transport", se);
+    } catch (SaslException | TTransportException se) {
+      throw new IOException("Could not instantiate transport", se);
     }
   }
 
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
index c06f6ec34653f..5ac29950f4f85 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
@@ -38,6 +38,7 @@
 import org.apache.thrift.transport.TSaslClientTransport;
 import org.apache.thrift.transport.TSaslServerTransport;
 import org.apache.thrift.transport.TTransport;
+import org.apache.thrift.transport.TTransportException;
 import org.apache.thrift.transport.TTransportFactory;
 
 public final class PlainSaslHelper {
@@ -64,7 +65,7 @@ public static TTransportFactory getPlainTransportFactory(String authTypeStr)
   }
 
   public static TTransport getPlainTransport(String username, String password,
-    TTransport underlyingTransport) throws SaslException {
+    TTransport underlyingTransport) throws SaslException, TTransportException {
     return new TSaslClientTransport("PLAIN", null, null, null, new HashMap<String, String>(),
       new PlainCallbackHandler(username, password), underlyingTransport);
   }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
index 1205d21be6be6..b727b4e27de8d 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
@@ -45,11 +45,12 @@ public TSetIpAddressProcessor(Iface iface) {
   }
 
   @Override
-  public boolean process(final TProtocol in, final TProtocol out) throws TException {
+  public void process(final TProtocol in, final TProtocol out) throws TException {
     setIpAddress(in);
     setUserName(in);
     try {
-      return super.process(in, out);
+      super.process(in, out);
+      return;
     } finally {
       THREAD_LOCAL_USER_NAME.remove();
       THREAD_LOCAL_IP_ADDRESS.remove();
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
index a980b5118be2a..025c85eb65801 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
@@ -90,16 +90,10 @@ protected void initializeServer() {
 
       // Server args
       int maxMessageSize = hiveConf.getIntVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_MAX_MESSAGE_SIZE);
-      int requestTimeout = (int) hiveConf.getTimeVar(
-          HiveConf.ConfVars.HIVE_SERVER2_THRIFT_LOGIN_TIMEOUT, TimeUnit.SECONDS);
-      int beBackoffSlotLength = (int) hiveConf.getTimeVar(
-          HiveConf.ConfVars.HIVE_SERVER2_THRIFT_LOGIN_BEBACKOFF_SLOT_LENGTH, TimeUnit.MILLISECONDS);
       TThreadPoolServer.Args sargs = new TThreadPoolServer.Args(serverSocket)
           .processorFactory(processorFactory).transportFactory(transportFactory)
           .protocolFactory(new TBinaryProtocol.Factory())
           .inputProtocolFactory(new TBinaryProtocol.Factory(true, true, maxMessageSize, maxMessageSize))
-          .requestTimeout(requestTimeout).requestTimeoutUnit(TimeUnit.SECONDS)
-          .beBackoffSlotLength(beBackoffSlotLength).beBackoffSlotLengthUnit(TimeUnit.MILLISECONDS)
           .executorService(executorService);
 
       // TCP Server
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
index 4a223c8666a17..ddbe89b0b721b 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
@@ -83,6 +83,16 @@ public void setSessionHandle(SessionHandle sessionHandle) {
     public SessionHandle getSessionHandle() {
       return sessionHandle;
     }
+
+    @Override
+    public <T> T unwrap(Class<T> aClass) {
+      return null;
+    }
+
+    @Override
+    public boolean isWrapperFor(Class<?> aClass) {
+      return false;
+    }
   }
 
   public ThriftCLIService(CLIService service, String serviceName) {
diff --git a/sql/hive/src/main/java/org/apache/thrift/transport/TFramedTransport.java b/sql/hive/src/main/java/org/apache/thrift/transport/TFramedTransport.java
new file mode 100644
index 0000000000000..4b32108c7d208
--- /dev/null
+++ b/sql/hive/src/main/java/org/apache/thrift/transport/TFramedTransport.java
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.thrift.transport;
+
+
+import org.apache.thrift.TByteArrayOutputStream;
+import org.apache.thrift.TConfiguration;
+
+/**
+ * This is based on libthrift-0.12.0 {@link org.apache.thrift.transport.TFramedTransport}.
+ * To fix class of org.apache.thrift.transport.TFramedTransport not found after upgrading libthrift.
+ *
+ * TFramedTransport is a buffered TTransport that ensures a fully read message
+ * every time by preceding messages with a 4-byte frame size.
+ */
+public class TFramedTransport extends TTransport {
+
+  protected static final int DEFAULT_MAX_LENGTH = 16384000;
+
+  private int maxLength_;
+
+  /**
+   * Underlying transport
+   */
+  private TTransport transport_ = null;
+
+  /**
+   * Buffer for output
+   */
+  private final TByteArrayOutputStream writeBuffer_ =
+      new TByteArrayOutputStream(1024);
+
+  /**
+   * Buffer for input
+   */
+  private final TMemoryInputTransport readBuffer_ =
+      new TMemoryInputTransport(new byte[0]);
+
+  public static class Factory extends TTransportFactory {
+    private int maxLength_;
+
+    public Factory() {
+      maxLength_ = TFramedTransport.DEFAULT_MAX_LENGTH;
+    }
+
+    public Factory(int maxLength) {
+      maxLength_ = maxLength;
+    }
+
+    @Override
+    public TTransport getTransport(TTransport base) throws TTransportException {
+      return new TFramedTransport(base, maxLength_);
+    }
+  }
+
+  /**
+   * Constructor wraps around another transport
+   */
+  public TFramedTransport(TTransport transport, int maxLength) throws TTransportException {
+    transport_ = transport;
+    maxLength_ = maxLength;
+  }
+
+  public TFramedTransport(TTransport transport) throws TTransportException {
+    transport_ = transport;
+    maxLength_ = TFramedTransport.DEFAULT_MAX_LENGTH;
+  }
+
+  public void open() throws TTransportException {
+    transport_.open();
+  }
+
+  public boolean isOpen() {
+    return transport_.isOpen();
+  }
+
+  public void close() {
+    transport_.close();
+  }
+
+  public int read(byte[] buf, int off, int len) throws TTransportException {
+    int got = readBuffer_.read(buf, off, len);
+    if (got > 0) {
+      return got;
+    }
+
+    // Read another frame of data
+    readFrame();
+
+    return readBuffer_.read(buf, off, len);
+  }
+
+  @Override
+  public byte[] getBuffer() {
+    return readBuffer_.getBuffer();
+  }
+
+  @Override
+  public int getBufferPosition() {
+    return readBuffer_.getBufferPosition();
+  }
+
+  @Override
+  public int getBytesRemainingInBuffer() {
+    return readBuffer_.getBytesRemainingInBuffer();
+  }
+
+  @Override
+  public void consumeBuffer(int len) {
+    readBuffer_.consumeBuffer(len);
+  }
+
+  @Override
+  public TConfiguration getConfiguration() {
+    return null;
+  }
+
+  @Override
+  public void updateKnownMessageSize(long l) throws TTransportException {
+
+  }
+
+  @Override
+  public void checkReadBytesAvailable(long l) throws TTransportException {
+
+  }
+
+  public void clear() {
+    readBuffer_.clear();
+  }
+
+  private final byte[] i32buf = new byte[4];
+
+  private void readFrame() throws TTransportException {
+    transport_.readAll(i32buf, 0, 4);
+    int size = decodeFrameSize(i32buf);
+
+    if (size < 0) {
+      close();
+      throw new TTransportException(TTransportException.CORRUPTED_DATA,
+          "Read a negative frame size (" + size + ")!");
+    }
+
+    if (size > maxLength_) {
+      close();
+      throw new TTransportException(TTransportException.CORRUPTED_DATA,
+          "Frame size (" + size + ") larger than max length (" + maxLength_ + ")!");
+    }
+
+    byte[] buff = new byte[size];
+    transport_.readAll(buff, 0, size);
+    readBuffer_.reset(buff);
+  }
+
+  public void write(byte[] buf, int off, int len) throws TTransportException {
+    writeBuffer_.write(buf, off, len);
+  }
+
+  @Override
+  public void flush() throws TTransportException {
+    byte[] buf = writeBuffer_.get();
+    int len = writeBuffer_.len();
+    writeBuffer_.reset();
+
+    encodeFrameSize(len, i32buf);
+    transport_.write(i32buf, 0, 4);
+    transport_.write(buf, 0, len);
+    transport_.flush();
+  }
+
+  public static final void encodeFrameSize(final int frameSize, final byte[] buf) {
+    buf[0] = (byte)(0xff & (frameSize >> 24));
+    buf[1] = (byte)(0xff & (frameSize >> 16));
+    buf[2] = (byte)(0xff & (frameSize >> 8));
+    buf[3] = (byte)(0xff & (frameSize));
+  }
+
+  public static final int decodeFrameSize(final byte[] buf) {
+    return
+        ((buf[0] & 0xff) << 24) |
+            ((buf[1] & 0xff) << 16) |
+            ((buf[2] & 0xff) <<  8) |
+            ((buf[3] & 0xff));
+  }
+}

From 898542746b2c56b2571562ed8e9818bcb565aff2 Mon Sep 17 00:00:00 2001
From: khalidmammadov <xmamedov@hotmail.com>
Date: Mon, 21 Feb 2022 11:04:48 +0900
Subject: [PATCH 283/513] [SPARK-38261][INFRA] Add missing R packages from base
 image

Current GitHub workflow job **Linters, licenses, dependencies and documentation generation** is missing R packages to complete Documentation and API build.

**Build and test** -  is not failing as these packages are installed on the base image.

We need to keep them in-sync IMO with the base image for easy switch back to ubuntu runner when ready.

Reference: [**The base image**](https://hub.docker.com/layers/dongjoon/apache-spark-github-action-image/20220207/images/sha256-af09d172ff8e2cbd71df9a1bc5384a47578c4a4cc293786c539333cafaf4a7ce?context=explore)

### What changes were proposed in this pull request?
Adding missing packages to the workflow file

### Why are the changes needed?
To make them inline with the base image config and make the job task **complete** for standalone execution (i.e. without this image)

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
GitHub builds and in the local Docker containers

Closes #35583 from khalidmammadov/sync_doc_build_with_base.

Authored-by: khalidmammadov <xmamedov@hotmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .github/workflows/build_and_test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 060adc487606e..266d4ab9e575c 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -533,7 +533,7 @@ jobs:
         python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8'
         apt-get update -y
         apt-get install -y ruby ruby-dev
-        Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
+        Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'markdown', 'e1071', 'roxygen2'), repos='https://cloud.r-project.org/')"
         Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')"
         Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
         gem install bundler

From b71b917ddfe60f93f096e54aedf5ad6474cffe7a Mon Sep 17 00:00:00 2001
From: Bo Zhang <bo.zhang@databricks.com>
Date: Mon, 21 Feb 2022 11:30:28 +0800
Subject: [PATCH 284/513] [SPARK-38236][SQL] Treat table location as absolute
 when the first letter of its path is slash in create/alter table

### What changes were proposed in this pull request?
After https://github.com/apache/spark/pull/28527, we change to create table under the database location when the table location is relative. However the criteria to determine if a table location is relative/absolute is `URI.isAbsolute`, which basically checks if the table location URI has a scheme defined. So table URIs like `/table/path` are treated as relative and the scheme and authority of the database location URI are used to create the table. For example, when the database location URI is `s3a://bucket/db`, the table will be created at `s3a://bucket/table/path`, while it should be created under the file system defined in `SessionCatalog.hadoopConf` instead.

This change fixes that by treating table location as absolute when the first letter of its path is slash.

This also applies to alter table.

### Why are the changes needed?
This is to fix the behavior described above.

### Does this PR introduce _any_ user-facing change?
Yes. When users try to create/alter a table with a location that starts with a slash but without a scheme defined, the table will be created under/altered to the file system defined in `SessionCatalog.hadoopConf`, instead of the one defined in the database location URI.

### How was this patch tested?
Updated unit tests.

Closes #35462 from bozhang2820/spark-31709.

Authored-by: Bo Zhang <bo.zhang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/avro/AvroSuite.scala |  2 ++
 .../mllib/util/MLlibTestSparkContext.scala    |  2 ++
 .../sql/catalyst/catalog/SessionCatalog.scala |  2 ++
 .../results/show-create-table.sql.out         |  4 +--
 .../sql/connector/DataSourceV2SQLSuite.scala  |  2 +-
 .../command/v1/ShowCreateTableSuite.scala     |  2 +-
 .../v2/V2SessionCatalogSuite.scala            | 25 ++++++++++++++-----
 7 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index d9d8c3c8b64f8..05d57ecf2408e 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -68,6 +68,8 @@ abstract class AvroSuite
 
   override protected def beforeAll(): Unit = {
     super.beforeAll()
+    // initialize SessionCatalog here so it has a clean hadoopConf
+    spark.sessionState.catalog
     spark.conf.set(SQLConf.FILES_MAX_PARTITION_BYTES.key, 1024)
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
index 5eb128abacdb9..3a7040d470c05 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
@@ -40,6 +40,8 @@ trait MLlibTestSparkContext extends TempDirectory { self: Suite =>
       .appName("MLlibUnitTest")
       .getOrCreate()
     sc = spark.sparkContext
+    // initialize SessionCatalog here so it has a clean hadoopConf
+    spark.sessionState.catalog
 
     checkpointDir = Utils.createDirectory(tempDir.getCanonicalPath, "checkpoints").toString
     sc.setCheckpointDir(checkpointDir)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 464768ac7ce2b..1a3054216972a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -388,6 +388,8 @@ class SessionCatalog(
   private def makeQualifiedTablePath(locationUri: URI, database: String): URI = {
     if (locationUri.isAbsolute) {
       locationUri
+    } else if (new Path(locationUri).isAbsolute) {
+      makeQualifiedPath(locationUri)
     } else {
       val dbName = formatDatabaseName(database)
       val dbLocation = makeQualifiedDBPath(getDatabaseMetadata(dbName).locationUri)
diff --git a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
index ca1652b337a28..ded27abc4c14d 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
@@ -80,7 +80,7 @@ CREATE TABLE default.tbl (
   b STRING,
   c INT)
 USING parquet
-LOCATION 'file:/path/to/table'
+LOCATION 'file:///path/to/table'
 
 
 -- !query
@@ -110,7 +110,7 @@ CREATE TABLE default.tbl (
   b STRING,
   c INT)
 USING parquet
-LOCATION 'file:/path/to/table'
+LOCATION 'file:///path/to/table'
 
 
 -- !query
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index d9e3342240bcf..b64ed080d8bf1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2773,7 +2773,7 @@ class DataSourceV2SQLSuite
       val properties = table.properties
       assert(properties.get(TableCatalog.PROP_PROVIDER) == "parquet")
       assert(properties.get(TableCatalog.PROP_COMMENT) == "This is a comment")
-      assert(properties.get(TableCatalog.PROP_LOCATION) == "file:/tmp")
+      assert(properties.get(TableCatalog.PROP_LOCATION) == "file:///tmp")
       assert(properties.containsKey(TableCatalog.PROP_OWNER))
       assert(properties.get(TableCatalog.PROP_EXTERNAL) == "true")
       assert(properties.get(s"${TableCatalog.OPTION_PREFIX}from") == "0")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
index 1dd5e4a5aaa79..ee8aa424d5c26 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
@@ -53,7 +53,7 @@ trait ShowCreateTableSuiteBase extends command.ShowCreateTableSuiteBase
            |COMMENT 'This is a comment'
            |TBLPROPERTIES ('prop1' = '1', 'prop2' = '2', 'prop3' = 3, 'prop4' = 4)
            |PARTITIONED BY (a)
-           |LOCATION '/tmp'
+           |LOCATION 'file:/tmp'
         """.stripMargin)
       val showDDL = getShowCreateDDL(t)
       assert(showDDL === Array(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
index 1aa8e3736cfe2..bae793bb01214 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
@@ -29,7 +29,7 @@ import org.scalatest.BeforeAndAfter
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, NamespaceChange, TableCatalog, TableChange, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, NamespaceChange, SupportsNamespaces, TableCatalog, TableChange, V1Table}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{DoubleType, IntegerType, LongType, StringType, StructField, StructType, TimestampType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -60,7 +60,8 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     super.beforeAll()
     val catalog = newCatalog()
     catalog.createNamespace(Array("db"), emptyProps)
-    catalog.createNamespace(Array("db2"), emptyProps)
+    catalog.createNamespace(Array("db2"),
+      Map(SupportsNamespaces.PROP_LOCATION -> "file:///db2.db").asJava)
     catalog.createNamespace(Array("ns"), emptyProps)
     catalog.createNamespace(Array("ns2"), emptyProps)
   }
@@ -186,10 +187,17 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     assert(t2.catalogTable.location === makeQualifiedPathWithWarehouse("db.db/relative/path"))
     catalog.dropTable(testIdent)
 
-    // absolute path
+    // absolute path without scheme
     properties.put(TableCatalog.PROP_LOCATION, "/absolute/path")
     val t3 = catalog.createTable(testIdent, schema, Array.empty, properties).asInstanceOf[V1Table]
-    assert(t3.catalogTable.location.toString === "file:/absolute/path")
+    assert(t3.catalogTable.location.toString === "file:///absolute/path")
+    catalog.dropTable(testIdent)
+
+    // absolute path with scheme
+    properties.put(TableCatalog.PROP_LOCATION, "file:/absolute/path")
+    val t4 = catalog.createTable(testIdent, schema, Array.empty, properties).asInstanceOf[V1Table]
+    assert(t4.catalogTable.location.toString === "file:/absolute/path")
+    catalog.dropTable(testIdent)
   }
 
   test("tableExists") {
@@ -685,10 +693,15 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
       TableChange.setProperty(TableCatalog.PROP_LOCATION, "relative/path")).asInstanceOf[V1Table]
     assert(t2.catalogTable.location === makeQualifiedPathWithWarehouse("db.db/relative/path"))
 
-    // absolute path
+    // absolute path without scheme
     val t3 = catalog.alterTable(testIdent,
       TableChange.setProperty(TableCatalog.PROP_LOCATION, "/absolute/path")).asInstanceOf[V1Table]
-    assert(t3.catalogTable.location.toString === "file:/absolute/path")
+    assert(t3.catalogTable.location.toString === "file:///absolute/path")
+
+    // absolute path with scheme
+    val t4 = catalog.alterTable(testIdent, TableChange.setProperty(
+      TableCatalog.PROP_LOCATION, "file:/absolute/path")).asInstanceOf[V1Table]
+    assert(t4.catalogTable.location.toString === "file:/absolute/path")
   }
 
   test("dropTable") {

From e2796d2d2f9069119b273fa9d7a777eca87fa015 Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Sun, 20 Feb 2022 21:44:58 -0800
Subject: [PATCH 285/513] [SPARK-38227][SQL][SS] Apply strict nullability of
 nested column in time window / session window

### What changes were proposed in this pull request?

This PR proposes to apply strict nullability of nested column in window struct for both time window and session window, which respects the dataType of TimeWindow and SessionWindow.

### Why are the changes needed?

The implementation of rule TimeWindowing and SessionWindowing have been exposed the possible risks of inconsistency between the dataType of TimeWindow/SessionWindow and the replacement. For the replacement, it is possible that analyzer/optimizer may decide the value expressions to be non-nullable.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

New tests added.

Closes #35543 from HeartSaVioR/SPARK-38227.

Authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 14 +++--
 .../spark/sql/catalyst/dsl/package.scala      |  8 +++
 .../expressions/constraintExpressions.scala   | 11 ++++
 .../sql/DataFrameSessionWindowingSuite.scala  | 61 +++++++++++++++++++
 .../sql/DataFrameTimeWindowingSuite.scala     | 48 +++++++++++++++
 5 files changed, 138 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index c560062d5b09a..5cb5f21e9f710 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -3893,11 +3893,13 @@ object TimeWindowing extends Rule[LogicalPlan] {
           val windowStart = lastStart - i * window.slideDuration
           val windowEnd = windowStart + window.windowDuration
 
+          // We make sure value fields are nullable since the dataType of TimeWindow defines them
+          // as nullable.
           CreateNamedStruct(
             Literal(WINDOW_START) ::
-              PreciseTimestampConversion(windowStart, LongType, dataType) ::
+              PreciseTimestampConversion(windowStart, LongType, dataType).castNullable() ::
               Literal(WINDOW_END) ::
-              PreciseTimestampConversion(windowEnd, LongType, dataType) ::
+              PreciseTimestampConversion(windowEnd, LongType, dataType).castNullable() ::
               Nil)
         }
 
@@ -4012,11 +4014,15 @@ object SessionWindowing extends Rule[LogicalPlan] {
         val sessionEnd = PreciseTimestampConversion(session.timeColumn + gapDuration,
           session.timeColumn.dataType, LongType)
 
+        // We make sure value fields are nullable since the dataType of SessionWindow defines them
+        // as nullable.
         val literalSessionStruct = CreateNamedStruct(
           Literal(SESSION_START) ::
-            PreciseTimestampConversion(sessionStart, LongType, session.timeColumn.dataType) ::
+            PreciseTimestampConversion(sessionStart, LongType, session.timeColumn.dataType)
+              .castNullable() ::
             Literal(SESSION_END) ::
-            PreciseTimestampConversion(sessionEnd, LongType, session.timeColumn.dataType) ::
+            PreciseTimestampConversion(sessionEnd, LongType, session.timeColumn.dataType)
+              .castNullable() ::
             Nil)
 
         val sessionStruct = Alias(literalSessionStruct, SESSION_COL_NAME)(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index dda0d193e7483..0988bef30290c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -138,6 +138,14 @@ package object dsl {
       }
     }
 
+    def castNullable(): Expression = {
+      if (expr.resolved && expr.nullable) {
+        expr
+      } else {
+        KnownNullable(expr)
+      }
+    }
+
     def asc: SortOrder = SortOrder(expr, Ascending)
     def asc_nullsLast: SortOrder = SortOrder(expr, Ascending, NullsLast, Seq.empty)
     def desc: SortOrder = SortOrder(expr, Descending)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/constraintExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/constraintExpressions.scala
index 8feaf52ecb134..75d912633a0fc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/constraintExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/constraintExpressions.scala
@@ -30,6 +30,17 @@ trait TaggingExpression extends UnaryExpression {
   override def eval(input: InternalRow): Any = child.eval(input)
 }
 
+case class KnownNullable(child: Expression) extends TaggingExpression {
+  override def nullable: Boolean = true
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    child.genCode(ctx)
+  }
+
+  override protected def withNewChildInternal(newChild: Expression): KnownNullable =
+    copy(child = newChild)
+}
+
 case class KnownNotNull(child: Expression) extends TaggingExpression {
   override def nullable: Boolean = false
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala
index b3d212716dd9a..076b64cde8c66 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala
@@ -21,6 +21,7 @@ import java.time.LocalDateTime
 
 import org.scalatest.BeforeAndAfterEach
 
+import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand}
 import org.apache.spark.sql.functions._
@@ -406,4 +407,64 @@ class DataFrameSessionWindowingSuite extends QueryTest with SharedSparkSession
 
     checkAnswer(aggDF, Seq(Row("2016-03-27 19:39:25", "2016-03-27 19:39:40", 2)))
   }
+
+  test("SPARK-38227: 'start' and 'end' fields should be nullable") {
+    // We expect the fields in window struct as nullable since the dataType of SessionWindow
+    // defines them as nullable. The rule 'SessionWindowing' should respect the dataType.
+    val df1 = Seq(
+      ("hello", "2016-03-27 09:00:05", 1),
+      ("structured", "2016-03-27 09:00:32", 2)).toDF("id", "time", "value")
+    val df2 = Seq(
+      ("world", LocalDateTime.parse("2016-03-27T09:00:05"), 1),
+      ("spark", LocalDateTime.parse("2016-03-27T09:00:32"), 2)).toDF("id", "time", "value")
+
+    val udf = spark.udf.register("gapDuration", (s: String) => {
+      if (s == "hello") {
+        "1 second"
+      } else if (s == "structured") {
+        // zero gap duration will be filtered out from aggregation
+        "0 second"
+      } else if (s == "world") {
+        // negative gap duration will be filtered out from aggregation
+        "-10 seconds"
+      } else {
+        "10 seconds"
+      }
+    })
+
+    def validateWindowColumnInSchema(schema: StructType, colName: String): Unit = {
+      schema.find(_.name == colName) match {
+        case Some(StructField(_, st: StructType, _, _)) =>
+          assertFieldInWindowStruct(st, "start")
+          assertFieldInWindowStruct(st, "end")
+
+        case _ => fail("Failed to find suitable window column from DataFrame!")
+      }
+    }
+
+    def assertFieldInWindowStruct(windowType: StructType, fieldName: String): Unit = {
+      val field = windowType.fields.find(_.name == fieldName)
+      assert(field.isDefined, s"'$fieldName' field should exist in window struct")
+      assert(field.get.nullable, s"'$fieldName' field should be nullable")
+    }
+
+    for {
+      df <- Seq(df1, df2)
+      nullable <- Seq(true, false)
+    } {
+      val dfWithDesiredNullability = new DataFrame(df.queryExecution, RowEncoder(
+        StructType(df.schema.fields.map(_.copy(nullable = nullable)))))
+      // session window without dynamic gap
+      val windowedProject = dfWithDesiredNullability
+        .select(session_window($"time", "10 seconds").as("session"), $"value")
+      val schema = windowedProject.queryExecution.optimizedPlan.schema
+      validateWindowColumnInSchema(schema, "session")
+
+      // session window with dynamic gap
+      val windowedProject2 = dfWithDesiredNullability
+        .select(session_window($"time", udf($"id")).as("session"), $"value")
+      val schema2 = windowedProject2.queryExecution.optimizedPlan.schema
+      validateWindowColumnInSchema(schema2, "session")
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
index e9a145cec01c2..bd39453f5120e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql
 
 import java.time.LocalDateTime
 
+import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, Filter}
 import org.apache.spark.sql.functions._
@@ -527,4 +528,51 @@ class DataFrameTimeWindowingSuite extends QueryTest with SharedSparkSession {
         "when windowDuration is multiple of slideDuration")
     }
   }
+
+  test("SPARK-38227: 'start' and 'end' fields should be nullable") {
+    // We expect the fields in window struct as nullable since the dataType of TimeWindow defines
+    // them as nullable. The rule 'TimeWindowing' should respect the dataType.
+    val df1 = Seq(
+      ("2016-03-27 09:00:05", 1),
+      ("2016-03-27 09:00:32", 2)).toDF("time", "value")
+    val df2 = Seq(
+      (LocalDateTime.parse("2016-03-27T09:00:05"), 1),
+      (LocalDateTime.parse("2016-03-27T09:00:32"), 2)).toDF("time", "value")
+
+    def validateWindowColumnInSchema(schema: StructType, colName: String): Unit = {
+      schema.find(_.name == colName) match {
+        case Some(StructField(_, st: StructType, _, _)) =>
+          assertFieldInWindowStruct(st, "start")
+          assertFieldInWindowStruct(st, "end")
+
+        case _ => fail("Failed to find suitable window column from DataFrame!")
+      }
+    }
+
+    def assertFieldInWindowStruct(windowType: StructType, fieldName: String): Unit = {
+      val field = windowType.fields.find(_.name == fieldName)
+      assert(field.isDefined, s"'$fieldName' field should exist in window struct")
+      assert(field.get.nullable, s"'$fieldName' field should be nullable")
+    }
+
+    for {
+      df <- Seq(df1, df2)
+      nullable <- Seq(true, false)
+    } {
+      val dfWithDesiredNullability = new DataFrame(df.queryExecution, RowEncoder(
+        StructType(df.schema.fields.map(_.copy(nullable = nullable)))))
+      // tumbling windows
+      val windowedProject = dfWithDesiredNullability
+        .select(window($"time", "10 seconds").as("window"), $"value")
+      val schema = windowedProject.queryExecution.optimizedPlan.schema
+      validateWindowColumnInSchema(schema, "window")
+
+      // sliding windows
+      val windowedProject2 = dfWithDesiredNullability
+        .select(window($"time", "10 seconds", "3 seconds").as("window"),
+        $"value")
+      val schema2 = windowedProject2.queryExecution.optimizedPlan.schema
+      validateWindowColumnInSchema(schema2, "window")
+    }
+  }
 }

From 62421455e552b892d6e4f908d55a5886b37a37a9 Mon Sep 17 00:00:00 2001
From: Sathiya KUMAR <ext.sathiyaprabhu.kumar@sncf.fr>
Date: Mon, 21 Feb 2022 14:28:29 +0800
Subject: [PATCH 286/513] [SPARK-37475][SQL] Add scale parameter to floor and
 ceil functions

### What changes were proposed in this pull request?

Adds `scale` parameter to `floor`/`ceil` functions in order to allow users to control the rounding position. This feature is proposed in the PR: https://github.com/apache/spark/pull/34593

### Why are the changes needed?

Currently we support Decimal RoundingModes : HALF_UP (round) and HALF_EVEN (bround). But we have use cases that needs RoundingMode.UP and RoundingMode.DOWN.

Floor and Ceil functions helps to do this but it doesn't support the position of the rounding. Adding scale parameter to the functions would help us control the rounding positions.

Snowflake supports `scale` parameter to `floor`/`ceil` :
` FLOOR( <input_expr> [, <scale_expr> ] )`

REF:
https://docs.snowflake.com/en/sql-reference/functions/floor.html

### Does this PR introduce _any_ user-facing change?

Now users can pass `scale` parameter to the `floor` and `ceil` functions.
 ```
     > SELECT floor(-0.1);
       -1.0
      > SELECT floor(5);
       5
      > SELECT floor(3.1411, 3);
       3.141
      > SELECT floor(3.1411, -3);
       1000.0

      > SELECT ceil(-0.1);
       0.0
      > SELECT ceil(5);
       5
      > SELECT ceil(3.1411, 3);
       3.142
      > SELECT ceil(3.1411, -3);
       1000.0

```
### How was this patch tested?

This patch was tested locally using unit test and git workflow.

Closes #34729 from sathiyapk/SPARK-37475-floor-ceil-scale.

Authored-by: Sathiya KUMAR <ext.sathiyaprabhu.kumar@sncf.fr>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/analysis/FunctionRegistry.scala  |  13 +-
 .../expressions/mathExpressions.scala         | 181 +++++++++++++---
 .../sql/errors/QueryCompilationErrors.scala   |   8 +
 .../expressions/MathExpressionsSuite.scala    | 131 +++++++++++-
 .../org/apache/spark/sql/functions.scala      |  32 ++-
 .../inputs/ceil-floor-with-scale-param.sql    |  27 +++
 .../ceil-floor-with-scale-param.sql.out       | 200 ++++++++++++++++++
 .../apache/spark/sql/MathFunctionsSuite.scala |  69 +++++-
 8 files changed, 611 insertions(+), 50 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/ceil-floor-with-scale-param.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/ceil-floor-with-scale-param.sql.out

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 0bc349ce37901..12fa7231b0a91 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -363,8 +363,8 @@ object FunctionRegistry {
     expression[Bin]("bin"),
     expression[BRound]("bround"),
     expression[Cbrt]("cbrt"),
-    expression[Ceil]("ceil"),
-    expression[Ceil]("ceiling", true),
+    expressionBuilder("ceil", CeilExpressionBuilder),
+    expressionBuilder("ceiling", CeilExpressionBuilder, true),
     expression[Cos]("cos"),
     expression[Sec]("sec"),
     expression[Cosh]("cosh"),
@@ -373,7 +373,7 @@ object FunctionRegistry {
     expression[EulerNumber]("e"),
     expression[Exp]("exp"),
     expression[Expm1]("expm1"),
-    expression[Floor]("floor"),
+    expressionBuilder("floor", FloorExpressionBuilder),
     expression[Factorial]("factorial"),
     expression[Hex]("hex"),
     expression[Hypot]("hypot"),
@@ -806,11 +806,14 @@ object FunctionRegistry {
   }
 
   private def expressionBuilder[T <: ExpressionBuilder : ClassTag](
-      name: String, builder: T): (String, (ExpressionInfo, FunctionBuilder)) = {
+      name: String, builder: T, setAlias: Boolean = false)
+  : (String, (ExpressionInfo, FunctionBuilder)) = {
     val info = FunctionRegistryBase.expressionInfo[T](name, None)
     val funcBuilder = (expressions: Seq[Expression]) => {
       assert(expressions.forall(_.resolved), "function arguments must be resolved.")
-      builder.build(expressions)
+      val expr = builder.build(expressions)
+      if (setAlias) expr.setTagValue(FUNC_ALIAS, name)
+      expr
     }
     (name, (info, funcBuilder))
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index 03f9da66cab48..d34b8379bb601 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -21,11 +21,12 @@ import java.{lang => jl}
 import java.util.Locale
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult}
+import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, FunctionRegistry, TypeCheckResult}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util.{NumberConverter, TypeUtils}
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -238,17 +239,6 @@ case class Cbrt(child: Expression) extends UnaryMathExpression(math.cbrt, "CBRT"
   override protected def withNewChildInternal(newChild: Expression): Cbrt = copy(child = newChild)
 }
 
-@ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the smallest integer not smaller than `expr`.",
-  examples = """
-    Examples:
-      > SELECT _FUNC_(-0.1);
-       0
-      > SELECT _FUNC_(5);
-       5
-  """,
-  since = "1.4.0",
-  group = "math_funcs")
 case class Ceil(child: Expression) extends UnaryMathExpression(math.ceil, "CEIL") {
   override def dataType: DataType = child.dataType match {
     case dt @ DecimalType.Fixed(_, 0) => dt
@@ -279,6 +269,77 @@ case class Ceil(child: Expression) extends UnaryMathExpression(math.ceil, "CEIL"
   override protected def withNewChildInternal(newChild: Expression): Ceil = copy(child = newChild)
 }
 
+trait CeilFloorExpressionBuilder extends ExpressionBuilder {
+  val functionName: String
+  def build(expressions: Seq[Expression]): Expression
+
+  def extractChildAndScaleParam(expressions: Seq[Expression]): (Expression, Expression) = {
+    val child = expressions(0)
+    val scale = expressions(1)
+    if (! (scale.foldable && scale.dataType == DataTypes.IntegerType)) {
+      throw QueryCompilationErrors.invalidScaleParameterRoundBase(functionName)
+    }
+    val scaleV = scale.eval(EmptyRow)
+    if (scaleV == null) {
+      throw QueryCompilationErrors.invalidScaleParameterRoundBase(functionName)
+    }
+    (child, scale)
+  }
+}
+
+@ExpressionDescription(
+  usage = """
+  _FUNC_(expr[, scale]) - Returns the smallest number after rounding up that is not smaller
+  than `expr`. A optional `scale` parameter can be specified to control the rounding behavior.""",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(-0.1);
+       0
+      > SELECT _FUNC_(5);
+       5
+      > SELECT _FUNC_(3.1411, 3);
+       3.142
+      > SELECT _FUNC_(3.1411, -3);
+       1000
+  """,
+  since = "3.3.0",
+  group = "math_funcs")
+object CeilExpressionBuilder extends CeilFloorExpressionBuilder {
+  val functionName: String = "ceil"
+
+  def build(expressions: Seq[Expression]): Expression = {
+    if (expressions.length == 1) {
+      Ceil(expressions.head)
+    } else if (expressions.length == 2) {
+      val (child, scale) = extractChildAndScaleParam(expressions)
+      RoundCeil(child, scale)
+    } else {
+      throw QueryCompilationErrors.invalidNumberOfFunctionParameters(functionName)
+    }
+  }
+}
+
+case class RoundCeil(child: Expression, scale: Expression)
+  extends RoundBase(child, scale, BigDecimal.RoundingMode.CEILING, "ROUND_CEILING")
+    with Serializable with ImplicitCastInputTypes {
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(DecimalType, IntegerType)
+
+  override lazy val dataType: DataType = child.dataType match {
+    case DecimalType.Fixed(p, s) =>
+      if (_scale < 0) {
+        DecimalType(math.max(p, 1 - _scale), 0)
+      } else {
+        DecimalType(p, math.min(s, _scale))
+      }
+    case t => t
+  }
+
+  override protected def withNewChildrenInternal(newLeft: Expression, newRight: Expression)
+  : RoundCeil = copy(child = newLeft, scale = newRight)
+  override def nodeName: String = "ceil"
+}
+
 @ExpressionDescription(
   usage = """
     _FUNC_(expr) - Returns the cosine of `expr`, as if computed by
@@ -448,17 +509,6 @@ case class Expm1(child: Expression) extends UnaryMathExpression(StrictMath.expm1
   override protected def withNewChildInternal(newChild: Expression): Expm1 = copy(child = newChild)
 }
 
-@ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the largest integer not greater than `expr`.",
-  examples = """
-    Examples:
-      > SELECT _FUNC_(-0.1);
-       -1
-      > SELECT _FUNC_(5);
-       5
-  """,
-  since = "1.4.0",
-  group = "math_funcs")
 case class Floor(child: Expression) extends UnaryMathExpression(math.floor, "FLOOR") {
   override def dataType: DataType = child.dataType match {
     case dt @ DecimalType.Fixed(_, 0) => dt
@@ -484,9 +534,62 @@ case class Floor(child: Expression) extends UnaryMathExpression(math.floor, "FLO
       case LongType => defineCodeGen(ctx, ev, c => s"$c")
       case _ => defineCodeGen(ctx, ev, c => s"(long)(java.lang.Math.${funcName}($c))")
     }
+ }
+ override protected def withNewChildInternal(newChild: Expression): Floor =
+  copy(child = newChild)
+}
+
+@ExpressionDescription(
+  usage = """
+  _FUNC_(expr[, scale]) - Returns the largest number after rounding down that is not greater
+  than `expr`. An optional `scale` parameter can be specified to control the rounding behavior.""",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(-0.1);
+       -1
+      > SELECT _FUNC_(5);
+       5
+      > SELECT _FUNC_(3.1411, 3);
+       3.141
+      > SELECT _FUNC_(3.1411, -3);
+       0
+  """,
+  since = "3.3.0",
+  group = "math_funcs")
+object FloorExpressionBuilder extends CeilFloorExpressionBuilder {
+  val functionName: String = "floor"
+
+  def build(expressions: Seq[Expression]): Expression = {
+    if (expressions.length == 1) {
+      Floor(expressions.head)
+    } else if (expressions.length == 2) {
+      val(child, scale) = extractChildAndScaleParam(expressions)
+      RoundFloor(child, scale)
+    } else {
+      throw QueryCompilationErrors.invalidNumberOfFunctionParameters(functionName)
+    }
   }
+}
 
-  override protected def withNewChildInternal(newChild: Expression): Floor = copy(child = newChild)
+case class RoundFloor(child: Expression, scale: Expression)
+  extends RoundBase(child, scale, BigDecimal.RoundingMode.FLOOR, "ROUND_FLOOR")
+    with Serializable with ImplicitCastInputTypes {
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(DecimalType, IntegerType)
+
+  override lazy val dataType: DataType = child.dataType match {
+    case DecimalType.Fixed(p, s) =>
+      if (_scale < 0) {
+        DecimalType(math.max(p, 1 - _scale), 0)
+      } else {
+        DecimalType(p, math.min(s, _scale))
+      }
+    case t => t
+  }
+
+  override protected def withNewChildrenInternal(newLeft: Expression, newRight: Expression)
+  : RoundFloor = copy(child = newLeft, scale = newRight)
+  override def nodeName: String = "floor"
 }
 
 object Factorial {
@@ -1375,7 +1478,7 @@ abstract class RoundBase(child: Expression, scale: Expression,
   // avoid unnecessary `child` evaluation in both codegen and non-codegen eval
   // by checking if scaleV == null as well.
   private lazy val scaleV: Any = scale.eval(EmptyRow)
-  private lazy val _scale: Int = scaleV.asInstanceOf[Int]
+  protected lazy val _scale: Int = scaleV.asInstanceOf[Int]
 
   override def eval(input: InternalRow): Any = {
     if (scaleV == null) { // if scale is null, no need to eval its child at all
@@ -1393,10 +1496,14 @@ abstract class RoundBase(child: Expression, scale: Expression,
   // not overriding since _scale is a constant int at runtime
   def nullSafeEval(input1: Any): Any = {
     dataType match {
-      case DecimalType.Fixed(_, s) =>
+      case DecimalType.Fixed(p, s) =>
         val decimal = input1.asInstanceOf[Decimal]
-        // Overflow cannot happen, so no need to control nullOnOverflow
-        decimal.toPrecision(decimal.precision, s, mode)
+        if (_scale >= 0) {
+          // Overflow cannot happen, so no need to control nullOnOverflow
+          decimal.toPrecision(decimal.precision, s, mode)
+        } else {
+          Decimal(decimal.toBigDecimal.setScale(_scale, mode), p, s)
+        }
       case ByteType =>
         BigDecimal(input1.asInstanceOf[Byte]).setScale(_scale, mode).toByte
       case ShortType =>
@@ -1426,12 +1533,18 @@ abstract class RoundBase(child: Expression, scale: Expression,
     val ce = child.genCode(ctx)
 
     val evaluationCode = dataType match {
-      case DecimalType.Fixed(_, s) =>
-        s"""
-           |${ev.value} = ${ce.value}.toPrecision(${ce.value}.precision(), $s,
-           |  Decimal.$modeStr(), true);
-           |${ev.isNull} = ${ev.value} == null;
-         """.stripMargin
+      case DecimalType.Fixed(p, s) =>
+        if (_scale >= 0) {
+          s"""
+            ${ev.value} = ${ce.value}.toPrecision(${ce.value}.precision(), $s,
+            Decimal.$modeStr(), true);
+            ${ev.isNull} = ${ev.value} == null;"""
+       } else {
+          s"""
+            ${ev.value} = new Decimal().set(${ce.value}.toBigDecimal()
+            .setScale(${_scale}, Decimal.$modeStr()), $p, $s);
+            ${ev.isNull} = ${ev.value} == null;"""
+        }
       case ByteType =>
         if (_scale < 0) {
           s"""
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 71caafee2da4f..28be81d6ae439 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -2375,4 +2375,12 @@ object QueryCompilationErrors {
     new AnalysisException(
       "Sinks cannot request distribution and ordering in continuous execution mode")
   }
+
+  def invalidScaleParameterRoundBase(function: String): Throwable = {
+    new AnalysisException(s"The 'scale' parameter of function '$function' must be an int constant.")
+  }
+
+  def invalidNumberOfFunctionParameters(function: String): Throwable = {
+    new AnalysisException(s"Invalid number of parameters to the function '$function'.")
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
index ea0d619ad4c15..5281643b7b107 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
@@ -321,11 +321,21 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkConsistencyBetweenInterpretedAndCodegen(Cbrt, DoubleType)
   }
 
-  def checkDataTypeAndCast(expression: UnaryMathExpression): Expression = {
+  def checkDataTypeAndCast(expression: Expression): Expression = expression match {
+    case e: UnaryMathExpression => checkDataTypeAndCastUnaryMathExpression(e)
+    case e: RoundBase => checkDataTypeAndCastRoundBase(e)
+  }
+
+  def checkDataTypeAndCastUnaryMathExpression(expression: UnaryMathExpression): Expression = {
     val expNew = implicitCast(expression.child, expression.inputTypes(0)).getOrElse(expression)
     expression.withNewChildren(Seq(expNew))
   }
 
+  def checkDataTypeAndCastRoundBase(expression: RoundBase): Expression = {
+    val expNewLeft = implicitCast(expression.left, expression.inputTypes(0)).getOrElse(expression)
+    expression.withNewChildren(Seq(expNewLeft, expression.right))
+  }
+
   test("ceil") {
     testUnary(Ceil, (d: Double) => math.ceil(d).toLong)
     checkConsistencyBetweenInterpretedAndCodegen(Ceil, DoubleType)
@@ -630,7 +640,7 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkConsistencyBetweenInterpretedAndCodegen(Logarithm, DoubleType, DoubleType)
   }
 
-  test("round/bround") {
+  test("round/bround/floor/ceil") {
     val scales = -6 to 6
     val doublePi: Double = math.Pi
     val shortPi: Short = 31415
@@ -658,6 +668,66 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     val intResultsB: Seq[Int] = Seq(314000000, 314200000, 314160000, 314159000, 314159300,
       314159260) ++ Seq.fill(7)(314159265)
 
+    def doubleResultsFloor(i: Int): Decimal = {
+      val results = Seq(0, 0, 0, 0, 0, 0, 3,
+        3.1, 3.14, 3.141, 3.1415, 3.14159, 3.141592)
+      Decimal(results(i))
+    }
+
+    def doubleResultsCeil(i: Int): Any = {
+      val results = Seq(1000000, 100000, 10000, 1000, 100, 10,
+        4, 3.2, 3.15, 3.142, 3.1416, 3.1416, 3.141593)
+      Decimal(results(i))
+    }
+
+    def floatResultsFloor(i: Int): Any = {
+      val results = Seq(0, 0, 0, 0, 0, 0, 3,
+        3.1, 3.14, 3.141, 3.1415, 3.1415, 3.1415)
+      Decimal(results(i))
+    }
+
+    def floatResultsCeil(i: Int): Any = {
+      val results = Seq(1000000, 100000, 10000, 1000, 100, 10, 4,
+        3.2, 3.15, 3.142, 3.1415, 3.1415, 3.1415)
+      Decimal(results(i))
+    }
+
+    def shortResultsFloor(i: Int): Decimal = {
+      val results = Seq(0, 0, 30000, 31000, 31400, 31410) ++ Seq.fill(7)(31415)
+      Decimal(results(i))
+    }
+
+    def shortResultsCeil(i: Int): Decimal = {
+      val results = Seq(1000000, 100000, 40000, 32000, 31500, 31420) ++ Seq.fill(7)(31415)
+      Decimal(results(i))
+    }
+
+    def longResultsFloor(i: Int): Decimal = {
+      val results = Seq(31415926535000000L, 31415926535800000L, 31415926535890000L,
+        31415926535897000L, 31415926535897900L, 31415926535897930L, 31415926535897932L) ++
+        Seq.fill(6)(31415926535897932L)
+      Decimal(results(i))
+    }
+
+    def longResultsCeil(i: Int): Decimal = {
+      val results = Seq(31415926536000000L, 31415926535900000L, 31415926535900000L,
+        31415926535898000L, 31415926535898000L, 31415926535897940L) ++
+        Seq.fill(7)(31415926535897932L)
+      Decimal(results(i))
+    }
+
+    def intResultsFloor(i: Int): Decimal = {
+      val results = Seq(314000000, 314100000, 314150000, 314159000,
+        314159200, 314159260) ++ Seq.fill(7)(314159265)
+      Decimal(results(i))
+    }
+
+    def intResultsCeil(i: Int): Decimal = {
+      val results = Seq(315000000, 314200000, 314160000, 314160000,
+        314159300, 314159270) ++ Seq.fill(7)(314159265)
+      Decimal(results(i))
+    }
+
     scales.zipWithIndex.foreach { case (scale, i) =>
       checkEvaluation(Round(doublePi, scale), doubleResults(i), EmptyRow)
       checkEvaluation(Round(shortPi, scale), shortResults(i), EmptyRow)
@@ -669,19 +739,52 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       checkEvaluation(BRound(intPi, scale), intResultsB(i), EmptyRow)
       checkEvaluation(BRound(longPi, scale), longResults(i), EmptyRow)
       checkEvaluation(BRound(floatPi, scale), floatResults(i), EmptyRow)
+      checkEvaluation(checkDataTypeAndCast(
+        RoundFloor(Literal(doublePi), Literal(scale))), doubleResultsFloor(i), EmptyRow)
+      checkEvaluation(checkDataTypeAndCast(
+        RoundFloor(Literal(shortPi), Literal(scale))), shortResultsFloor(i), EmptyRow)
+      checkEvaluation(checkDataTypeAndCast(
+        RoundFloor(Literal(intPi), Literal(scale))), intResultsFloor(i), EmptyRow)
+      checkEvaluation(checkDataTypeAndCast(
+        RoundFloor(Literal(longPi), Literal(scale))), longResultsFloor(i), EmptyRow)
+      checkEvaluation(checkDataTypeAndCast(
+        RoundFloor(Literal(floatPi), Literal(scale))), floatResultsFloor(i), EmptyRow)
+      checkEvaluation(checkDataTypeAndCast(
+        RoundCeil(Literal(doublePi), Literal(scale))), doubleResultsCeil(i), EmptyRow)
+      checkEvaluation(checkDataTypeAndCast(
+        RoundCeil(Literal(shortPi), Literal(scale))), shortResultsCeil(i), EmptyRow)
+      checkEvaluation(checkDataTypeAndCast(
+        RoundCeil(Literal(intPi), Literal(scale))), intResultsCeil(i), EmptyRow)
+      checkEvaluation(checkDataTypeAndCast(
+        RoundCeil(Literal(longPi), Literal(scale))), longResultsCeil(i), EmptyRow)
+      checkEvaluation(checkDataTypeAndCast(
+        RoundCeil(Literal(floatPi), Literal(scale))), floatResultsCeil(i), EmptyRow)
     }
 
     val bdResults: Seq[BigDecimal] = Seq(BigDecimal(3), BigDecimal("3.1"), BigDecimal("3.14"),
       BigDecimal("3.142"), BigDecimal("3.1416"), BigDecimal("3.14159"),
       BigDecimal("3.141593"), BigDecimal("3.1415927"))
 
+    val bdResultsFloor: Seq[BigDecimal] =
+        Seq(BigDecimal(3), BigDecimal("3.1"), BigDecimal("3.14"),
+      BigDecimal("3.141"), BigDecimal("3.1415"), BigDecimal("3.14159"),
+      BigDecimal("3.141592"), BigDecimal("3.1415927"))
+
+    val bdResultsCeil: Seq[BigDecimal] = Seq(BigDecimal(4), BigDecimal("3.2"), BigDecimal("3.15"),
+      BigDecimal("3.142"), BigDecimal("3.1416"), BigDecimal("3.14160"),
+      BigDecimal("3.141593"), BigDecimal("3.1415927"))
+
     (0 to 7).foreach { i =>
       checkEvaluation(Round(bdPi, i), bdResults(i), EmptyRow)
       checkEvaluation(BRound(bdPi, i), bdResults(i), EmptyRow)
+      checkEvaluation(RoundFloor(bdPi, i), bdResultsFloor(i), EmptyRow)
+      checkEvaluation(RoundCeil(bdPi, i), bdResultsCeil(i), EmptyRow)
     }
     (8 to 10).foreach { scale =>
       checkEvaluation(Round(bdPi, scale), bdPi, EmptyRow)
       checkEvaluation(BRound(bdPi, scale), bdPi, EmptyRow)
+      checkEvaluation(RoundFloor(bdPi, scale), bdPi, EmptyRow)
+      checkEvaluation(RoundCeil(bdPi, scale), bdPi, EmptyRow)
     }
 
     DataTypeTestUtils.numericTypes.foreach { dataType =>
@@ -691,6 +794,10 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       checkEvaluation(BRound(Literal.create(null, dataType), Literal(2)), null)
       checkEvaluation(BRound(Literal.create(null, dataType),
         Literal.create(null, IntegerType)), null)
+      checkEvaluation(checkDataTypeAndCast(
+        RoundFloor(Literal.create(null, dataType), Literal(2))), null)
+      checkEvaluation(checkDataTypeAndCast(
+        RoundCeil(Literal.create(null, dataType), Literal(2))), null)
     }
 
     checkEvaluation(Round(2.5, 0), 3.0)
@@ -705,6 +812,26 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(BRound(-3.5, 0), -4.0)
     checkEvaluation(BRound(-0.35, 1), -0.4)
     checkEvaluation(BRound(-35, -1), -40)
+    checkEvaluation(checkDataTypeAndCast(RoundFloor(Literal(2.5), Literal(0))), Decimal(2))
+    checkEvaluation(checkDataTypeAndCast(RoundFloor(Literal(3.5), Literal(0))), Decimal(3))
+    checkEvaluation(checkDataTypeAndCast(RoundFloor(Literal(-2.5), Literal(0))), Decimal(-3L))
+    checkEvaluation(checkDataTypeAndCast(RoundFloor(Literal(-3.5), Literal(0))), Decimal(-4L))
+    checkEvaluation(checkDataTypeAndCast(RoundFloor(Literal(-0.35), Literal(1))), Decimal(-0.4))
+    checkEvaluation(checkDataTypeAndCast(RoundFloor(Literal(-35), Literal(-1))), Decimal(-40))
+    checkEvaluation(checkDataTypeAndCast(RoundFloor(Literal(-0.1), Literal(0))), Decimal(-1))
+    checkEvaluation(checkDataTypeAndCast(RoundFloor(Literal(5), Literal(0))), Decimal(5))
+    checkEvaluation(checkDataTypeAndCast(RoundFloor(Literal(3.1411), Literal(-3))), Decimal(0))
+    checkEvaluation(checkDataTypeAndCast(RoundFloor(Literal(135.135), Literal(-2))), Decimal(100))
+    checkEvaluation(checkDataTypeAndCast(RoundCeil(Literal(2.5), Literal(0))), Decimal(3))
+    checkEvaluation(checkDataTypeAndCast(RoundCeil(Literal(3.5), Literal(0))), Decimal(4L))
+    checkEvaluation(checkDataTypeAndCast(RoundCeil(Literal(-2.5), Literal(0))), Decimal(-2L))
+    checkEvaluation(checkDataTypeAndCast(RoundCeil(Literal(-3.5), Literal(0))), Decimal(-3L))
+    checkEvaluation(checkDataTypeAndCast(RoundCeil(Literal(-0.35), Literal(1))), Decimal(-0.3))
+    checkEvaluation(checkDataTypeAndCast(RoundCeil(Literal(-35), Literal(-1))), Decimal(-30))
+    checkEvaluation(checkDataTypeAndCast(RoundCeil(Literal(-0.1), Literal(0))), Decimal(0))
+    checkEvaluation(checkDataTypeAndCast(RoundCeil(Literal(5), Literal(0))), Decimal(5))
+    checkEvaluation(checkDataTypeAndCast(RoundCeil(Literal(3.1411), Literal(-3))), Decimal(1000))
+    checkEvaluation(checkDataTypeAndCast(RoundCeil(Literal(135.135), Literal(-2))), Decimal(200))
   }
 
   test("SPARK-36922: Support ANSI intervals for SIGN/SIGNUM") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 0db12a24e6ef9..ea410a67ed279 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1768,15 +1768,25 @@ object functions {
   def cbrt(columnName: String): Column = cbrt(Column(columnName))
 
   /**
-   * Computes the ceiling of the given value.
+   * Computes the ceiling of the given value of `e` to `scale` decimal places.
+   *
+   * @group math_funcs
+   * @since 3.3.0
+   */
+  def ceil(e: Column, scale: Column): Column = withExpr {
+    UnresolvedFunction(Seq("ceil"), Seq(e.expr, scale.expr), isDistinct = false)
+  }
+
+  /**
+   * Computes the ceiling of the given value of `e` to 0 decimal places.
    *
    * @group math_funcs
    * @since 1.4.0
    */
-  def ceil(e: Column): Column = withExpr { Ceil(e.expr) }
+  def ceil(e: Column): Column = ceil(e, lit(0))
 
   /**
-   * Computes the ceiling of the given column.
+   * Computes the ceiling of the given value of `e` to 0 decimal places.
    *
    * @group math_funcs
    * @since 1.4.0
@@ -1888,15 +1898,25 @@ object functions {
   def factorial(e: Column): Column = withExpr { Factorial(e.expr) }
 
   /**
-   * Computes the floor of the given value.
+   * Computes the floor of the given value of `e` to `scale` decimal places.
+   *
+   * @group math_funcs
+   * @since 3.3.0
+   */
+  def floor(e: Column, scale: Column): Column = withExpr {
+    UnresolvedFunction(Seq("floor"), Seq(e.expr, scale.expr), isDistinct = false)
+  }
+
+  /**
+   * Computes the floor of the given value of `e` to 0 decimal places.
    *
    * @group math_funcs
    * @since 1.4.0
    */
-  def floor(e: Column): Column = withExpr { Floor(e.expr) }
+  def floor(e: Column): Column = floor(e, lit(0))
 
   /**
-   * Computes the floor of the given column.
+   * Computes the floor of the given column value to 0 decimal places.
    *
    * @group math_funcs
    * @since 1.4.0
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ceil-floor-with-scale-param.sql b/sql/core/src/test/resources/sql-tests/inputs/ceil-floor-with-scale-param.sql
new file mode 100644
index 0000000000000..1baee30a8cf9a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/ceil-floor-with-scale-param.sql
@@ -0,0 +1,27 @@
+-- Tests different scenarios of ceil and floor functions with scale parameters
+SELECT CEIL(2.5, 0);
+SELECT CEIL(3.5, 0);
+SELECT CEIL(-2.5, 0);
+SELECT CEIL(-3.5, 0);
+SELECT CEIL(-0.35, 1);
+SELECT CEIL(-35, -1);
+SELECT CEIL(-0.1, 0);
+SELECT CEIL(5, 0);
+SELECT CEIL(3.14115, -3);
+SELECT CEIL(2.5, null);
+SELECT CEIL(2.5, 'a');
+SELECT CEIL(2.5, 0, 0);
+
+-- Same inputs with floor function
+SELECT FLOOR(2.5, 0);
+SELECT FLOOR(3.5, 0);
+SELECT FLOOR(-2.5, 0);
+SELECT FLOOR(-3.5, 0);
+SELECT FLOOR(-0.35, 1);
+SELECT FLOOR(-35, -1);
+SELECT FLOOR(-0.1, 0);
+SELECT FLOOR(5, 0);
+SELECT FLOOR(3.14115, -3);
+SELECT FLOOR(2.5, null);
+SELECT FLOOR(2.5, 'a');
+SELECT FLOOR(2.5, 0, 0);
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/results/ceil-floor-with-scale-param.sql.out b/sql/core/src/test/resources/sql-tests/results/ceil-floor-with-scale-param.sql.out
new file mode 100644
index 0000000000000..1ec00af1237cf
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/ceil-floor-with-scale-param.sql.out
@@ -0,0 +1,200 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 18
+
+
+-- !query
+SELECT CEIL(2.5, 0)
+-- !query schema
+struct<ceil(2.5, 0):decimal(2,0)>
+-- !query output
+3
+
+
+-- !query
+SELECT CEIL(3.5, 0)
+-- !query schema
+struct<ceil(3.5, 0):decimal(2,0)>
+-- !query output
+4
+
+
+-- !query
+SELECT CEIL(-2.5, 0)
+-- !query schema
+struct<ceil(-2.5, 0):decimal(2,0)>
+-- !query output
+-2
+
+
+-- !query
+SELECT CEIL(-3.5, 0)
+-- !query schema
+struct<ceil(-3.5, 0):decimal(2,0)>
+-- !query output
+-3
+
+
+-- !query
+SELECT CEIL(-0.35, 1)
+-- !query schema
+struct<ceil(-0.35, 1):decimal(2,1)>
+-- !query output
+-0.3
+
+
+-- !query
+SELECT CEIL(-35, -1)
+-- !query schema
+struct<ceil(-35, -1):decimal(10,0)>
+-- !query output
+-30
+
+
+-- !query
+SELECT CEIL(-0.1, 0)
+-- !query schema
+struct<ceil(-0.1, 0):decimal(1,0)>
+-- !query output
+0
+
+
+-- !query
+SELECT CEIL(5, 0)
+-- !query schema
+struct<ceil(5, 0):decimal(10,0)>
+-- !query output
+5
+
+
+-- !query
+SELECT CEIL(3.14115, -3)
+-- !query schema
+struct<ceil(3.14115, -3):decimal(6,0)>
+-- !query output
+1000
+
+
+-- !query
+SELECT CEIL(2.5, null)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+The 'scale' parameter of function 'ceil' must be an int constant.; line 1 pos 7
+
+
+-- !query
+SELECT CEIL(2.5, 'a')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+The 'scale' parameter of function 'ceil' must be an int constant.; line 1 pos 7
+
+
+-- !query
+SELECT CEIL(2.5, 0, 0)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Invalid number of parameters to the function 'ceil'.; line 1 pos 7
+
+
+-- !query
+SELECT FLOOR(2.5, 0)
+-- !query schema
+struct<floor(2.5, 0):decimal(2,0)>
+-- !query output
+2
+
+
+-- !query
+SELECT FLOOR(3.5, 0)
+-- !query schema
+struct<floor(3.5, 0):decimal(2,0)>
+-- !query output
+3
+
+
+-- !query
+SELECT FLOOR(-2.5, 0)
+-- !query schema
+struct<floor(-2.5, 0):decimal(2,0)>
+-- !query output
+-3
+
+
+-- !query
+SELECT FLOOR(-3.5, 0)
+-- !query schema
+struct<floor(-3.5, 0):decimal(2,0)>
+-- !query output
+-4
+
+
+-- !query
+SELECT FLOOR(-0.35, 1)
+-- !query schema
+struct<floor(-0.35, 1):decimal(2,1)>
+-- !query output
+-0.4
+
+
+-- !query
+SELECT FLOOR(-35, -1)
+-- !query schema
+struct<floor(-35, -1):decimal(10,0)>
+-- !query output
+-40
+
+
+-- !query
+SELECT FLOOR(-0.1, 0)
+-- !query schema
+struct<floor(-0.1, 0):decimal(1,0)>
+-- !query output
+-1
+
+
+-- !query
+SELECT FLOOR(5, 0)
+-- !query schema
+struct<floor(5, 0):decimal(10,0)>
+-- !query output
+5
+
+
+-- !query
+SELECT FLOOR(3.14115, -3)
+-- !query schema
+struct<floor(3.14115, -3):decimal(6,0)>
+-- !query output
+0
+
+
+-- !query
+SELECT FLOOR(2.5, null)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+The 'scale' parameter of function 'floor' must be an int constant.; line 1 pos 7
+
+
+-- !query
+SELECT FLOOR(2.5, 'a')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+The 'scale' parameter of function 'floor' must be an int constant.; line 1 pos 7
+
+
+-- !query
+SELECT FLOOR(2.5, 0, 0)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Invalid number of parameters to the function 'floor'.; line 1 pos 7
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
index ce25a8869c8b8..f3bff7389ee74 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
@@ -268,7 +268,7 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
     testOneToOneMathFunction(rint, math.rint)
   }
 
-  test("round/bround") {
+  test("round/bround/ceil/floor") {
     val df = Seq(5, 55, 555).map(Tuple1(_)).toDF("a")
     checkAnswer(
       df.select(round('a), round('a, -1), round('a, -2)),
@@ -278,6 +278,14 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
       df.select(bround('a), bround('a, -1), bround('a, -2)),
       Seq(Row(5, 0, 0), Row(55, 60, 100), Row(555, 560, 600))
     )
+    checkAnswer(
+      df.select(ceil('a), ceil('a, lit(-1)), ceil('a, lit(-2))),
+      Seq(Row(5, 10, 100), Row(55, 60, 100), Row(555, 560, 600))
+    )
+    checkAnswer(
+      df.select(floor('a), floor('a, lit(-1)), floor('a, lit(-2))),
+      Seq(Row(5, 0, 0), Row(55, 50, 0), Row(555, 550, 500))
+    )
 
     withSQLConf(SQLConf.LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED.key -> "true") {
       val pi = "3.1415"
@@ -293,6 +301,18 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
         Seq(Row(BigDecimal("0E3"), BigDecimal("0E2"), BigDecimal("0E1"), BigDecimal(3),
           BigDecimal("3.1"), BigDecimal("3.14"), BigDecimal("3.142")))
       )
+      checkAnswer(
+        sql(s"SELECT ceil($pi), ceil($pi, -3), ceil($pi, -2), ceil($pi, -1), " +
+          s"ceil($pi, 0), ceil($pi, 1), ceil($pi, 2), ceil($pi, 3)"),
+        Seq(Row(BigDecimal(4), BigDecimal("1E3"), BigDecimal("1E2"), BigDecimal("1E1"),
+          BigDecimal(4), BigDecimal("3.2"), BigDecimal("3.15"), BigDecimal("3.142")))
+      )
+      checkAnswer(
+        sql(s"SELECT floor($pi), floor($pi, -3), floor($pi, -2), floor($pi, -1), " +
+          s"floor($pi, 0), floor($pi, 1), floor($pi, 2), floor($pi, 3)"),
+        Seq(Row(BigDecimal(3), BigDecimal("0E3"), BigDecimal("0E2"), BigDecimal("0E1"),
+          BigDecimal(3), BigDecimal("3.1"), BigDecimal("3.14"), BigDecimal("3.141")))
+      )
     }
 
     val bdPi: BigDecimal = BigDecimal(31415925L, 7)
@@ -307,9 +327,20 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
         s"bround($bdPi, 100), bround($bdPi, 6), bround(null, 8)"),
       Seq(Row(bdPi, bdPi, bdPi, bdPi, bdPi, BigDecimal("3.141592"), null))
     )
+    checkAnswer(
+      sql(s"SELECT ceil($bdPi, 7), ceil($bdPi, 8), ceil($bdPi, 9), ceil($bdPi, 10), " +
+        s"ceil($bdPi, 100), ceil($bdPi, 6), ceil(null, 8)"),
+      Seq(Row(bdPi, bdPi, bdPi, bdPi, bdPi, BigDecimal("3.141593"), null))
+    )
+
+    checkAnswer(
+      sql(s"SELECT floor($bdPi, 7), floor($bdPi, 8), floor($bdPi, 9), floor($bdPi, 10), " +
+        s"floor($bdPi, 100), floor($bdPi, 6), floor(null, 8)"),
+      Seq(Row(bdPi, bdPi, bdPi, bdPi, bdPi, BigDecimal("3.141592"), null))
+    )
   }
 
-  test("round/bround with data frame from a local Seq of Product") {
+  test("round/bround/ceil/floor with data frame from a local Seq of Product") {
     val df = spark.createDataFrame(Seq(Tuple1(BigDecimal("5.9")))).toDF("value")
     checkAnswer(
       df.withColumn("value_rounded", round('value)),
@@ -319,9 +350,23 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
       df.withColumn("value_brounded", bround('value)),
       Seq(Row(BigDecimal("5.9"), BigDecimal("6")))
     )
+    checkAnswer(
+      df
+        .withColumn("value_ceil", ceil('value))
+        .withColumn("value_ceil1", ceil('value, lit(0)))
+        .withColumn("value_ceil2", ceil('value, lit(1))),
+      Seq(Row(BigDecimal("5.9"), BigDecimal("6"), BigDecimal("6"), BigDecimal("5.9")))
+    )
+    checkAnswer(
+      df
+        .withColumn("value_floor", floor('value))
+        .withColumn("value_floor1", floor('value, lit(0)))
+        .withColumn("value_floor2", floor('value, lit(1))),
+      Seq(Row(BigDecimal("5.9"), BigDecimal("5"), BigDecimal("5"), BigDecimal("5.9")))
+    )
   }
 
-  test("round/bround with table columns") {
+  test("round/bround/ceil/floor with table columns") {
     withTable("t") {
       Seq(BigDecimal("5.9")).toDF("i").write.saveAsTable("t")
       checkAnswer(
@@ -330,6 +375,24 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
       checkAnswer(
         sql("select i, bround(i) from t"),
         Seq(Row(BigDecimal("5.9"), BigDecimal("6"))))
+      checkAnswer(
+        sql("select i, ceil(i) from t"),
+        Seq(Row(BigDecimal("5.9"), BigDecimal("6"))))
+      checkAnswer(
+        sql("select i, ceil(i, 0) from t"),
+        Seq(Row(BigDecimal("5.9"), BigDecimal("6"))))
+      checkAnswer(
+        sql("select i, ceil(i, 1) from t"),
+        Seq(Row(BigDecimal("5.9"), BigDecimal("5.9"))))
+      checkAnswer(
+        sql("select i, floor(i) from t"),
+        Seq(Row(BigDecimal("5.9"), BigDecimal("5"))))
+      checkAnswer(
+        sql("select i, floor(i, 0) from t"),
+        Seq(Row(BigDecimal("5.9"), BigDecimal("5"))))
+      checkAnswer(
+        sql("select i, floor(i, 1) from t"),
+        Seq(Row(BigDecimal("5.9"), BigDecimal("5.9"))))
     }
   }
 

From 17567f8ce14a35ae0af9ea49c255bfe2bc4c705f Mon Sep 17 00:00:00 2001
From: Zhenhua Wang <wzh_zju@163.com>
Date: Mon, 21 Feb 2022 15:40:21 +0800
Subject: [PATCH 287/513] [SPARK-38140][SQL] Desc column stats (min, max) for
 timestamp type is not consistent with the values due to time zone difference

### What changes were proposed in this pull request?

Currently timestamp column's stats (min/max) are stored using UTC time zone in metastore, and when desc its min/max column stats, they are also shown in UTC.
As a result, for users not in UTC, the column stats (shown to users) are not consistent with the actual value, which causes confusion.
Note that it does not affect correctness. But we'd better to remove confusion for users.

### Why are the changes needed?

To make column stats and column value consistent when shown to users.

### Does this PR introduce _any_ user-facing change?

As an example:
```
spark-sql> create table tab_ts_master (ts timestamp) using parquet;
spark-sql> insert into tab_ts_master values make_timestamp(2022, 1, 1, 0, 0, 1.123456), make_timestamp(2022, 1, 3, 0, 0, 2.987654);
spark-sql> select * from tab_ts_master;
2022-01-01 00:00:01.123456
2022-01-03 00:00:02.987654
spark-sql> set spark.sql.session.timeZone;
spark.sql.session.timeZone	Asia/Shanghai
spark-sql> analyze table tab_ts_master compute statistics for all columns;
```
Before this change:
```
spark-sql> desc formatted tab_ts_master ts;
col_name	ts
data_type	timestamp
comment	NULL
min	2021-12-31 16:00:01.123456
max	2022-01-02 16:00:02.987654
num_nulls	0
distinct_count	2
avg_col_len	8
max_col_len	8
histogram	NULL
```
The min/max column stats are inconsistent with what the user sees in the column values.

After this change:
```
spark-sql> desc formatted tab_ts ts;
col_name	ts
data_type	timestamp
comment	NULL
min	2022-01-01 00:00:01.123456
max	2022-01-03 00:00:02.987654
num_nulls	0
distinct_count	2
avg_col_len	8
max_col_len	8
histogram	NULL
```
### How was this patch tested?

Added new unit tests.

Closes #35440 from wzhfy/desc_ts_timeZones.

Authored-by: Zhenhua Wang <wzh_zju@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/catalog/interface.scala      | 11 ++-
 .../spark/sql/execution/command/tables.scala  | 29 +++++-
 .../spark/sql/StatisticsCollectionSuite.scala | 89 ++++++++++++++++++-
 3 files changed, 119 insertions(+), 10 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 70ccb06c109fc..4ab14c3156294 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.catalog
 
 import java.net.URI
-import java.time.ZoneOffset
+import java.time.{ZoneId, ZoneOffset}
 import java.util.Date
 
 import scala.collection.mutable
@@ -656,10 +656,13 @@ object CatalogColumnStat extends Logging {
 
   val VERSION = 2
 
-  private def getTimestampFormatter(isParsing: Boolean): TimestampFormatter = {
+  def getTimestampFormatter(
+      isParsing: Boolean,
+      format: String = "yyyy-MM-dd HH:mm:ss.SSSSSS",
+      zoneId: ZoneId = ZoneOffset.UTC): TimestampFormatter = {
     TimestampFormatter(
-      format = "yyyy-MM-dd HH:mm:ss.SSSSSS",
-      zoneId = ZoneOffset.UTC,
+      format = format,
+      zoneId = zoneId,
       isParsing = isParsing)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index eceb9e6536d5d..5c33080c97337 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIfNeeded, CaseInsensitiveMap, CharVarcharUtils}
+import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIfNeeded, CaseInsensitiveMap, CharVarcharUtils, DateTimeUtils}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.TableIdentifierHelper
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.datasources.DataSource
@@ -774,8 +774,10 @@ case class DescribeColumnCommand(
     )
     if (isExtended) {
       // Show column stats when EXTENDED or FORMATTED is specified.
-      buffer += Row("min", cs.flatMap(_.min.map(_.toString)).getOrElse("NULL"))
-      buffer += Row("max", cs.flatMap(_.max.map(_.toString)).getOrElse("NULL"))
+      buffer += Row("min", cs.flatMap(_.min.map(
+        toZoneAwareExternalString(_, field.name, field.dataType))).getOrElse("NULL"))
+      buffer += Row("max", cs.flatMap(_.max.map(
+        toZoneAwareExternalString(_, field.name, field.dataType))).getOrElse("NULL"))
       buffer += Row("num_nulls", cs.flatMap(_.nullCount.map(_.toString)).getOrElse("NULL"))
       buffer += Row("distinct_count",
         cs.flatMap(_.distinctCount.map(_.toString)).getOrElse("NULL"))
@@ -790,6 +792,27 @@ case class DescribeColumnCommand(
     buffer.toSeq
   }
 
+  private def toZoneAwareExternalString(
+      valueStr: String,
+      name: String,
+      dataType: DataType): String = {
+    dataType match {
+      case TimestampType =>
+        // When writing to metastore, we always format timestamp value in the default UTC time zone.
+        // So here we need to first convert to internal value, then format it using the current
+        // time zone.
+        val internalValue =
+          CatalogColumnStat.fromExternalString(valueStr, name, dataType, CatalogColumnStat.VERSION)
+        val curZoneId = DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone)
+        CatalogColumnStat
+          .getTimestampFormatter(
+            isParsing = false, format = "yyyy-MM-dd HH:mm:ss.SSSSSS Z", zoneId = curZoneId)
+          .format(internalValue.asInstanceOf[Long])
+      case _ =>
+        valueStr
+    }
+  }
+
   private def histogramDescription(histogram: Histogram): Seq[Row] = {
     val header = Row("histogram",
       s"height: ${histogram.height}, num_of_bins: ${histogram.bins.length}")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index 9f8000a08f7af..0987825c88117 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -27,8 +27,9 @@ import scala.collection.mutable
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogColumnStat
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
-import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneUTC
+import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils}
+import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, PST, UTC}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, TimeZoneUTC}
 import org.apache.spark.sql.functions.timestamp_seconds
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -470,7 +471,89 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
     }
   }
 
-  def getStatAttrNames(tableName: String): Set[String] = {
+  private def checkDescTimestampColStats(
+      tableName: String,
+      timestampColumn: String,
+      expectedMinTimestamp: String,
+      expectedMaxTimestamp: String): Unit = {
+
+    def extractColumnStatsFromDesc(statsName: String, rows: Array[Row]): String = {
+      rows.collect {
+        case r: Row if r.getString(0) == statsName =>
+          r.getString(1)
+      }.head
+    }
+
+    val descTsCol = sql(s"DESC FORMATTED $tableName $timestampColumn").collect()
+    assert(extractColumnStatsFromDesc("min", descTsCol) == expectedMinTimestamp)
+    assert(extractColumnStatsFromDesc("max", descTsCol) == expectedMaxTimestamp)
+  }
+
+  test("SPARK-38140: describe column stats (min, max) for timestamp column: desc results should " +
+    "be consistent with the written value if writing and desc happen in the same time zone") {
+
+    val zoneIdAndOffsets =
+      Seq((UTC, "+0000"), (PST, "-0800"), (getZoneId("Asia/Hong_Kong"), "+0800"))
+
+    zoneIdAndOffsets.foreach { case (zoneId, offset) =>
+      withDefaultTimeZone(zoneId) {
+        val table = "insert_desc_same_time_zone"
+        val tsCol = "timestamp_typed_col"
+        withTable(table) {
+          val minTimestamp = "make_timestamp(2022, 1, 1, 0, 0, 1.123456)"
+          val maxTimestamp = "make_timestamp(2022, 1, 3, 0, 0, 2.987654)"
+          sql(s"CREATE TABLE $table ($tsCol Timestamp) USING parquet")
+          sql(s"INSERT INTO $table VALUES $minTimestamp, $maxTimestamp")
+          sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR ALL COLUMNS")
+
+          checkDescTimestampColStats(
+            tableName = table,
+            timestampColumn = tsCol,
+            expectedMinTimestamp = "2022-01-01 00:00:01.123456 " + offset,
+            expectedMaxTimestamp = "2022-01-03 00:00:02.987654 " + offset)
+        }
+      }
+    }
+  }
+
+  test("SPARK-38140: describe column stats (min, max) for timestamp column: desc should show " +
+    "different results if writing in UTC and desc in other time zones") {
+
+    val table = "insert_desc_diff_time_zones"
+    val tsCol = "timestamp_typed_col"
+
+    withDefaultTimeZone(UTC) {
+      withTable(table) {
+        val minTimestamp = "make_timestamp(2022, 1, 1, 0, 0, 1.123456)"
+        val maxTimestamp = "make_timestamp(2022, 1, 3, 0, 0, 2.987654)"
+        sql(s"CREATE TABLE $table ($tsCol Timestamp) USING parquet")
+        sql(s"INSERT INTO $table VALUES $minTimestamp, $maxTimestamp")
+        sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR ALL COLUMNS")
+
+        checkDescTimestampColStats(
+          tableName = table,
+          timestampColumn = tsCol,
+          expectedMinTimestamp = "2022-01-01 00:00:01.123456 +0000",
+          expectedMaxTimestamp = "2022-01-03 00:00:02.987654 +0000")
+
+        TimeZone.setDefault(DateTimeUtils.getTimeZone("PST"))
+        checkDescTimestampColStats(
+          tableName = table,
+          timestampColumn = tsCol,
+          expectedMinTimestamp = "2021-12-31 16:00:01.123456 -0800",
+          expectedMaxTimestamp = "2022-01-02 16:00:02.987654 -0800")
+
+        TimeZone.setDefault(DateTimeUtils.getTimeZone("Asia/Hong_Kong"))
+        checkDescTimestampColStats(
+          tableName = table,
+          timestampColumn = tsCol,
+          expectedMinTimestamp = "2022-01-01 08:00:01.123456 +0800",
+          expectedMaxTimestamp = "2022-01-03 08:00:02.987654 +0800")
+      }
+    }
+  }
+
+  private def getStatAttrNames(tableName: String): Set[String] = {
     val queryStats = spark.table(tableName).queryExecution.optimizedPlan.stats.attributeStats
     queryStats.map(_._1.name).toSet
   }

From 23119b030a6eb2887e864ef9b9f6e37026e43417 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Mon, 21 Feb 2022 17:00:13 +0800
Subject: [PATCH 288/513] [SPARK-38268][SQL] Hide the "failOnError" field in
 the toString method of Abs/CheckOverflow

### What changes were proposed in this pull request?

Hide the "failOnError" field in the toString method of Abs/CheckOverflow. Here are two examples:
* Abs.toString: `abs(-1, true)` => `abs(-1)`
* CheckOverflow.toString: `CheckOverflow(0.12, DecimalType(5, 3), true)` => `CheckOverflow(0.12, DecimalType(5, 3))`

### Why are the changes needed?

After changes, over 200 test failures of *PlanStabilitySuite are fixed with ANSI mode on. This is important for setting up testing job for ANSI mode.
Also, having the "failOnError" field in the string output of Abs, e.g.  `abs(-1, true)`, is quite odd.

### Does this PR introduce _any_ user-facing change?

Yes but quite minor, hiding the "failOnError" field in the toString method of Abs/CheckOverflow

### How was this patch tested?

Manual turn on ANSI mode and test all the *PlanStabilitySuite

Closes #35590 from gengliangwang/fixStabilitySuite.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../sql/catalyst/expressions/arithmetic.scala |  2 +
 .../expressions/decimalExpressions.scala      |  2 +-
 .../q53.sf100/explain.txt                     |  2 +-
 .../approved-plans-modified/q53/explain.txt   |  2 +-
 .../q59.sf100/explain.txt                     |  2 +-
 .../approved-plans-modified/q59/explain.txt   |  2 +-
 .../q63.sf100/explain.txt                     |  2 +-
 .../approved-plans-modified/q63/explain.txt   |  2 +-
 .../q65.sf100/explain.txt                     |  2 +-
 .../approved-plans-modified/q65/explain.txt   |  2 +-
 .../q89.sf100/explain.txt                     |  4 +-
 .../approved-plans-modified/q89/explain.txt   |  4 +-
 .../q98.sf100/explain.txt                     |  2 +-
 .../approved-plans-modified/q98/explain.txt   |  2 +-
 .../approved-plans-v1_4/q1.sf100/explain.txt  |  2 +-
 .../approved-plans-v1_4/q1/explain.txt        |  2 +-
 .../approved-plans-v1_4/q11.sf100/explain.txt | 34 +++++-----
 .../q11.sf100/simplified.txt                  |  8 +--
 .../approved-plans-v1_4/q11/explain.txt       | 34 +++++-----
 .../approved-plans-v1_4/q11/simplified.txt    |  8 +--
 .../approved-plans-v1_4/q12.sf100/explain.txt |  2 +-
 .../approved-plans-v1_4/q12/explain.txt       |  2 +-
 .../q14a.sf100/explain.txt                    | 32 +++++-----
 .../q14a.sf100/simplified.txt                 |  8 +--
 .../approved-plans-v1_4/q14a/explain.txt      | 32 +++++-----
 .../approved-plans-v1_4/q14a/simplified.txt   |  8 +--
 .../q14b.sf100/explain.txt                    | 24 +++----
 .../q14b.sf100/simplified.txt                 |  6 +-
 .../approved-plans-v1_4/q14b/explain.txt      | 24 +++----
 .../approved-plans-v1_4/q14b/simplified.txt   |  6 +-
 .../approved-plans-v1_4/q2.sf100/explain.txt  |  2 +-
 .../approved-plans-v1_4/q2/explain.txt        |  2 +-
 .../approved-plans-v1_4/q20.sf100/explain.txt |  2 +-
 .../approved-plans-v1_4/q20/explain.txt       |  2 +-
 .../q23a.sf100/explain.txt                    | 32 +++++-----
 .../q23a.sf100/simplified.txt                 |  6 +-
 .../approved-plans-v1_4/q23a/explain.txt      | 30 ++++-----
 .../approved-plans-v1_4/q23a/simplified.txt   |  6 +-
 .../q23b.sf100/explain.txt                    | 64 +++++++++----------
 .../q23b.sf100/simplified.txt                 | 14 ++--
 .../approved-plans-v1_4/q23b/explain.txt      | 50 +++++++--------
 .../approved-plans-v1_4/q23b/simplified.txt   | 12 ++--
 .../q24a.sf100/explain.txt                    |  2 +-
 .../approved-plans-v1_4/q24a/explain.txt      |  2 +-
 .../q24b.sf100/explain.txt                    |  2 +-
 .../approved-plans-v1_4/q24b/explain.txt      |  2 +-
 .../approved-plans-v1_4/q30.sf100/explain.txt |  2 +-
 .../approved-plans-v1_4/q30/explain.txt       |  2 +-
 .../approved-plans-v1_4/q31.sf100/explain.txt |  4 +-
 .../approved-plans-v1_4/q31/explain.txt       |  6 +-
 .../approved-plans-v1_4/q32.sf100/explain.txt |  2 +-
 .../approved-plans-v1_4/q32/explain.txt       |  2 +-
 .../approved-plans-v1_4/q36.sf100/explain.txt |  2 +-
 .../approved-plans-v1_4/q36/explain.txt       |  2 +-
 .../approved-plans-v1_4/q4.sf100/explain.txt  | 52 +++++++--------
 .../q4.sf100/simplified.txt                   | 12 ++--
 .../approved-plans-v1_4/q4/explain.txt        | 52 +++++++--------
 .../approved-plans-v1_4/q4/simplified.txt     | 12 ++--
 .../approved-plans-v1_4/q40.sf100/explain.txt |  8 +--
 .../q40.sf100/simplified.txt                  |  2 +-
 .../approved-plans-v1_4/q40/explain.txt       |  8 +--
 .../approved-plans-v1_4/q40/simplified.txt    |  2 +-
 .../approved-plans-v1_4/q44.sf100/explain.txt |  2 +-
 .../approved-plans-v1_4/q44/explain.txt       |  2 +-
 .../approved-plans-v1_4/q47.sf100/explain.txt |  4 +-
 .../approved-plans-v1_4/q47/explain.txt       |  4 +-
 .../approved-plans-v1_4/q49.sf100/explain.txt |  6 +-
 .../approved-plans-v1_4/q49/explain.txt       |  6 +-
 .../approved-plans-v1_4/q5.sf100/explain.txt  |  6 +-
 .../approved-plans-v1_4/q5/explain.txt        |  6 +-
 .../approved-plans-v1_4/q53.sf100/explain.txt |  2 +-
 .../approved-plans-v1_4/q53/explain.txt       |  2 +-
 .../approved-plans-v1_4/q54.sf100/explain.txt |  2 +-
 .../approved-plans-v1_4/q54/explain.txt       |  2 +-
 .../approved-plans-v1_4/q57.sf100/explain.txt |  4 +-
 .../approved-plans-v1_4/q57/explain.txt       |  4 +-
 .../approved-plans-v1_4/q58.sf100/explain.txt |  6 +-
 .../approved-plans-v1_4/q58/explain.txt       |  6 +-
 .../approved-plans-v1_4/q59.sf100/explain.txt |  2 +-
 .../approved-plans-v1_4/q59/explain.txt       |  2 +-
 .../approved-plans-v1_4/q61.sf100/explain.txt |  2 +-
 .../approved-plans-v1_4/q61/explain.txt       |  2 +-
 .../approved-plans-v1_4/q63.sf100/explain.txt |  2 +-
 .../approved-plans-v1_4/q63/explain.txt       |  2 +-
 .../approved-plans-v1_4/q65.sf100/explain.txt |  2 +-
 .../approved-plans-v1_4/q65/explain.txt       |  2 +-
 .../approved-plans-v1_4/q66.sf100/explain.txt | 24 +++----
 .../q66.sf100/simplified.txt                  |  6 +-
 .../approved-plans-v1_4/q66/explain.txt       | 24 +++----
 .../approved-plans-v1_4/q66/simplified.txt    |  6 +-
 .../approved-plans-v1_4/q67.sf100/explain.txt |  8 +--
 .../q67.sf100/simplified.txt                  |  2 +-
 .../approved-plans-v1_4/q67/explain.txt       |  8 +--
 .../approved-plans-v1_4/q67/simplified.txt    |  2 +-
 .../approved-plans-v1_4/q77.sf100/explain.txt |  6 +-
 .../approved-plans-v1_4/q77/explain.txt       |  6 +-
 .../approved-plans-v1_4/q80.sf100/explain.txt | 24 +++----
 .../q80.sf100/simplified.txt                  |  6 +-
 .../approved-plans-v1_4/q80/explain.txt       | 24 +++----
 .../approved-plans-v1_4/q80/simplified.txt    |  6 +-
 .../approved-plans-v1_4/q81.sf100/explain.txt |  2 +-
 .../approved-plans-v1_4/q81/explain.txt       |  2 +-
 .../approved-plans-v1_4/q83.sf100/explain.txt |  2 +-
 .../approved-plans-v1_4/q83/explain.txt       |  2 +-
 .../approved-plans-v1_4/q89.sf100/explain.txt |  4 +-
 .../approved-plans-v1_4/q89/explain.txt       |  4 +-
 .../approved-plans-v1_4/q90.sf100/explain.txt |  2 +-
 .../approved-plans-v1_4/q90/explain.txt       |  2 +-
 .../approved-plans-v1_4/q92.sf100/explain.txt |  2 +-
 .../approved-plans-v1_4/q92/explain.txt       |  2 +-
 .../approved-plans-v1_4/q93.sf100/explain.txt |  2 +-
 .../approved-plans-v1_4/q93/explain.txt       |  2 +-
 .../approved-plans-v1_4/q98.sf100/explain.txt |  2 +-
 .../approved-plans-v1_4/q98/explain.txt       |  2 +-
 .../approved-plans-v2_7/q11.sf100/explain.txt | 34 +++++-----
 .../q11.sf100/simplified.txt                  |  8 +--
 .../approved-plans-v2_7/q11/explain.txt       | 34 +++++-----
 .../approved-plans-v2_7/q11/simplified.txt    |  8 +--
 .../approved-plans-v2_7/q12.sf100/explain.txt |  2 +-
 .../approved-plans-v2_7/q12/explain.txt       |  2 +-
 .../approved-plans-v2_7/q14.sf100/explain.txt | 24 +++----
 .../q14.sf100/simplified.txt                  |  6 +-
 .../approved-plans-v2_7/q14/explain.txt       | 24 +++----
 .../approved-plans-v2_7/q14/simplified.txt    |  6 +-
 .../q14a.sf100/explain.txt                    | 32 +++++-----
 .../q14a.sf100/simplified.txt                 |  8 +--
 .../approved-plans-v2_7/q14a/explain.txt      | 32 +++++-----
 .../approved-plans-v2_7/q14a/simplified.txt   |  8 +--
 .../approved-plans-v2_7/q20.sf100/explain.txt |  2 +-
 .../approved-plans-v2_7/q20/explain.txt       |  2 +-
 .../approved-plans-v2_7/q24.sf100/explain.txt |  2 +-
 .../approved-plans-v2_7/q24/explain.txt       |  2 +-
 .../q36a.sf100/explain.txt                    |  6 +-
 .../approved-plans-v2_7/q36a/explain.txt      |  6 +-
 .../approved-plans-v2_7/q47.sf100/explain.txt |  4 +-
 .../approved-plans-v2_7/q47/explain.txt       |  4 +-
 .../approved-plans-v2_7/q49.sf100/explain.txt |  6 +-
 .../approved-plans-v2_7/q49/explain.txt       |  6 +-
 .../approved-plans-v2_7/q57.sf100/explain.txt |  4 +-
 .../approved-plans-v2_7/q57/explain.txt       |  4 +-
 .../approved-plans-v2_7/q5a.sf100/explain.txt |  6 +-
 .../approved-plans-v2_7/q5a/explain.txt       |  6 +-
 .../approved-plans-v2_7/q6.sf100/explain.txt  |  2 +-
 .../approved-plans-v2_7/q6/explain.txt        |  2 +-
 .../approved-plans-v2_7/q64.sf100/explain.txt | 10 +--
 .../q64.sf100/simplified.txt                  |  2 +-
 .../approved-plans-v2_7/q64/explain.txt       | 18 +++---
 .../approved-plans-v2_7/q64/simplified.txt    |  4 +-
 .../q67a.sf100/explain.txt                    | 56 ++++++++--------
 .../q67a.sf100/simplified.txt                 | 18 +++---
 .../approved-plans-v2_7/q67a/explain.txt      | 56 ++++++++--------
 .../approved-plans-v2_7/q67a/simplified.txt   | 18 +++---
 .../approved-plans-v2_7/q74.sf100/explain.txt |  2 +-
 .../approved-plans-v2_7/q74/explain.txt       |  2 +-
 .../approved-plans-v2_7/q75.sf100/explain.txt | 16 ++---
 .../approved-plans-v2_7/q75/explain.txt       | 16 ++---
 .../q77a.sf100/explain.txt                    |  6 +-
 .../approved-plans-v2_7/q77a/explain.txt      |  6 +-
 .../approved-plans-v2_7/q78.sf100/explain.txt |  2 +-
 .../approved-plans-v2_7/q78/explain.txt       |  2 +-
 .../q80a.sf100/explain.txt                    | 24 +++----
 .../q80a.sf100/simplified.txt                 |  6 +-
 .../approved-plans-v2_7/q80a/explain.txt      | 24 +++----
 .../approved-plans-v2_7/q80a/simplified.txt   |  6 +-
 .../approved-plans-v2_7/q98.sf100/explain.txt |  2 +-
 .../approved-plans-v2_7/q98/explain.txt       |  2 +-
 .../tpch-plan-stability/q1/explain.txt        |  8 +--
 .../tpch-plan-stability/q1/simplified.txt     |  2 +-
 .../tpch-plan-stability/q10/explain.txt       |  8 +--
 .../tpch-plan-stability/q10/simplified.txt    |  2 +-
 .../tpch-plan-stability/q11/explain.txt       | 16 ++---
 .../tpch-plan-stability/q11/simplified.txt    |  4 +-
 .../tpch-plan-stability/q14/explain.txt       |  8 +--
 .../tpch-plan-stability/q14/simplified.txt    |  2 +-
 .../tpch-plan-stability/q15/explain.txt       | 16 ++---
 .../tpch-plan-stability/q15/simplified.txt    |  4 +-
 .../tpch-plan-stability/q17/explain.txt       |  4 +-
 .../tpch-plan-stability/q19/explain.txt       |  8 +--
 .../tpch-plan-stability/q19/simplified.txt    |  2 +-
 .../tpch-plan-stability/q20/explain.txt       |  2 +-
 .../tpch-plan-stability/q3/explain.txt        |  8 +--
 .../tpch-plan-stability/q3/simplified.txt     |  2 +-
 .../tpch-plan-stability/q5/explain.txt        |  8 +--
 .../tpch-plan-stability/q5/simplified.txt     |  2 +-
 .../tpch-plan-stability/q6/explain.txt        |  8 +--
 .../tpch-plan-stability/q6/simplified.txt     |  2 +-
 .../tpch-plan-stability/q7/explain.txt        |  2 +-
 .../tpch-plan-stability/q8/explain.txt        |  4 +-
 .../tpch-plan-stability/q9/explain.txt        |  2 +-
 189 files changed, 822 insertions(+), 820 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index 2a906a69606cc..88a38612fc4f5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -204,6 +204,8 @@ case class Abs(child: Expression, failOnError: Boolean = SQLConf.get.ansiEnabled
 
   protected override def nullSafeEval(input: Any): Any = numeric.abs(input)
 
+  override def flatArguments: Iterator[Any] = Iterator(child)
+
   override protected def withNewChildInternal(newChild: Expression): Abs = copy(child = newChild)
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala
index 48ccc2e82b0ad..8116537d7b06d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala
@@ -149,7 +149,7 @@ case class CheckOverflow(
     })
   }
 
-  override def toString: String = s"CheckOverflow($child, $dataType, $nullOnOverflow)"
+  override def toString: String = s"CheckOverflow($child, $dataType)"
 
   override def sql: String = child.sql
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/explain.txt
index d100e73a4de24..42b83c9c7d830 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/explain.txt
@@ -146,7 +146,7 @@ Arguments: [avg(_w0#25) windowspecdefinition(i_manufact_id#5, specifiedwindowfra
 
 (26) Filter [codegen id : 7]
 Input [4]: [i_manufact_id#5, sum_sales#24, _w0#25, avg_quarterly_sales#27]
-Condition : (isnotnull(avg_quarterly_sales#27) AND ((avg_quarterly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000)))
+Condition : (isnotnull(avg_quarterly_sales#27) AND ((avg_quarterly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)))
 
 (27) Project [codegen id : 7]
 Output [3]: [i_manufact_id#5, sum_sales#24, avg_quarterly_sales#27]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/explain.txt
index 2b7ace43773b6..e7ae5ce6dcfb7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/explain.txt
@@ -146,7 +146,7 @@ Arguments: [avg(_w0#25) windowspecdefinition(i_manufact_id#5, specifiedwindowfra
 
 (26) Filter [codegen id : 7]
 Input [4]: [i_manufact_id#5, sum_sales#24, _w0#25, avg_quarterly_sales#27]
-Condition : (isnotnull(avg_quarterly_sales#27) AND ((avg_quarterly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000)))
+Condition : (isnotnull(avg_quarterly_sales#27) AND ((avg_quarterly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)))
 
 (27) Project [codegen id : 7]
 Output [3]: [i_manufact_id#5, sum_sales#24, avg_quarterly_sales#27]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/explain.txt
index 8f71448cb76b2..f260becf18e26 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/explain.txt
@@ -283,7 +283,7 @@ Right keys [2]: [s_store_id2#74, (d_week_seq2#73 - 52)]
 Join condition: None
 
 (50) Project [codegen id : 10]
-Output [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, CheckOverflow((promote_precision(sun_sales1#47) / promote_precision(sun_sales2#75)), DecimalType(37,20), true) AS (sun_sales1 / sun_sales2)#82, CheckOverflow((promote_precision(mon_sales1#48) / promote_precision(mon_sales2#76)), DecimalType(37,20), true) AS (mon_sales1 / mon_sales2)#83, CheckOverflow((promote_precision(tue_sales1#49) / promote_precision(tue_sales1#49)), DecimalType(37,20), true) AS (tue_sales1 / tue_sales1)#84, CheckOverflow((promote_precision(wed_sales1#50) / promote_precision(wed_sales2#77)), DecimalType(37,20), true) AS (wed_sales1 / wed_sales2)#85, CheckOverflow((promote_precision(thu_sales1#51) / promote_precision(thu_sales2#78)), DecimalType(37,20), true) AS (thu_sales1 / thu_sales2)#86, CheckOverflow((promote_precision(fri_sales1#52) / promote_precision(fri_sales2#79)), DecimalType(37,20), true) AS (fri_sales1 / fri_sales2)#87, CheckOverflow((promote_precision(sat_sales1#53) / promote_precision(sat_sales2#80)), DecimalType(37,20), true) AS (sat_sales1 / sat_sales2)#88]
+Output [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, CheckOverflow((promote_precision(sun_sales1#47) / promote_precision(sun_sales2#75)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#82, CheckOverflow((promote_precision(mon_sales1#48) / promote_precision(mon_sales2#76)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#83, CheckOverflow((promote_precision(tue_sales1#49) / promote_precision(tue_sales1#49)), DecimalType(37,20)) AS (tue_sales1 / tue_sales1)#84, CheckOverflow((promote_precision(wed_sales1#50) / promote_precision(wed_sales2#77)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#85, CheckOverflow((promote_precision(thu_sales1#51) / promote_precision(thu_sales2#78)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#86, CheckOverflow((promote_precision(fri_sales1#52) / promote_precision(fri_sales2#79)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#87, CheckOverflow((promote_precision(sat_sales1#53) / promote_precision(sat_sales2#80)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#88]
 Input [18]: [s_store_name1#44, d_week_seq1#45, s_store_id1#46, sun_sales1#47, mon_sales1#48, tue_sales1#49, wed_sales1#50, thu_sales1#51, fri_sales1#52, sat_sales1#53, d_week_seq2#73, s_store_id2#74, sun_sales2#75, mon_sales2#76, wed_sales2#77, thu_sales2#78, fri_sales2#79, sat_sales2#80]
 
 (51) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/explain.txt
index 8f71448cb76b2..f260becf18e26 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/explain.txt
@@ -283,7 +283,7 @@ Right keys [2]: [s_store_id2#74, (d_week_seq2#73 - 52)]
 Join condition: None
 
 (50) Project [codegen id : 10]
-Output [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, CheckOverflow((promote_precision(sun_sales1#47) / promote_precision(sun_sales2#75)), DecimalType(37,20), true) AS (sun_sales1 / sun_sales2)#82, CheckOverflow((promote_precision(mon_sales1#48) / promote_precision(mon_sales2#76)), DecimalType(37,20), true) AS (mon_sales1 / mon_sales2)#83, CheckOverflow((promote_precision(tue_sales1#49) / promote_precision(tue_sales1#49)), DecimalType(37,20), true) AS (tue_sales1 / tue_sales1)#84, CheckOverflow((promote_precision(wed_sales1#50) / promote_precision(wed_sales2#77)), DecimalType(37,20), true) AS (wed_sales1 / wed_sales2)#85, CheckOverflow((promote_precision(thu_sales1#51) / promote_precision(thu_sales2#78)), DecimalType(37,20), true) AS (thu_sales1 / thu_sales2)#86, CheckOverflow((promote_precision(fri_sales1#52) / promote_precision(fri_sales2#79)), DecimalType(37,20), true) AS (fri_sales1 / fri_sales2)#87, CheckOverflow((promote_precision(sat_sales1#53) / promote_precision(sat_sales2#80)), DecimalType(37,20), true) AS (sat_sales1 / sat_sales2)#88]
+Output [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, CheckOverflow((promote_precision(sun_sales1#47) / promote_precision(sun_sales2#75)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#82, CheckOverflow((promote_precision(mon_sales1#48) / promote_precision(mon_sales2#76)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#83, CheckOverflow((promote_precision(tue_sales1#49) / promote_precision(tue_sales1#49)), DecimalType(37,20)) AS (tue_sales1 / tue_sales1)#84, CheckOverflow((promote_precision(wed_sales1#50) / promote_precision(wed_sales2#77)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#85, CheckOverflow((promote_precision(thu_sales1#51) / promote_precision(thu_sales2#78)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#86, CheckOverflow((promote_precision(fri_sales1#52) / promote_precision(fri_sales2#79)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#87, CheckOverflow((promote_precision(sat_sales1#53) / promote_precision(sat_sales2#80)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#88]
 Input [18]: [s_store_name1#44, d_week_seq1#45, s_store_id1#46, sun_sales1#47, mon_sales1#48, tue_sales1#49, wed_sales1#50, thu_sales1#51, fri_sales1#52, sat_sales1#53, d_week_seq2#73, s_store_id2#74, sun_sales2#75, mon_sales2#76, wed_sales2#77, thu_sales2#78, fri_sales2#79, sat_sales2#80]
 
 (51) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/explain.txt
index 1e722cf779dab..698d6f41f8871 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/explain.txt
@@ -146,7 +146,7 @@ Arguments: [avg(_w0#25) windowspecdefinition(i_manager_id#5, specifiedwindowfram
 
 (26) Filter [codegen id : 7]
 Input [4]: [i_manager_id#5, sum_sales#24, _w0#25, avg_monthly_sales#27]
-Condition : (isnotnull(avg_monthly_sales#27) AND ((avg_monthly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000)))
+Condition : (isnotnull(avg_monthly_sales#27) AND ((avg_monthly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)))
 
 (27) Project [codegen id : 7]
 Output [3]: [i_manager_id#5, sum_sales#24, avg_monthly_sales#27]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/explain.txt
index 35eaebb171a51..99146cf1d2829 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/explain.txt
@@ -146,7 +146,7 @@ Arguments: [avg(_w0#25) windowspecdefinition(i_manager_id#5, specifiedwindowfram
 
 (26) Filter [codegen id : 7]
 Input [4]: [i_manager_id#5, sum_sales#24, _w0#25, avg_monthly_sales#27]
-Condition : (isnotnull(avg_monthly_sales#27) AND ((avg_monthly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000)))
+Condition : (isnotnull(avg_monthly_sales#27) AND ((avg_monthly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)))
 
 (27) Project [codegen id : 7]
 Output [3]: [i_manager_id#5, sum_sales#24, avg_monthly_sales#27]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/explain.txt
index 7066bd1ed142e..aabb4fe67f387 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/explain.txt
@@ -158,7 +158,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (24) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [ss_store_sk#2]
 Right keys [1]: [ss_store_sk#13]
-Join condition: (cast(revenue#11 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#28)), DecimalType(23,7), true))
+Join condition: (cast(revenue#11 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#28)), DecimalType(23,7)))
 
 (25) Project [codegen id : 7]
 Output [3]: [ss_store_sk#2, ss_item_sk#1, revenue#11]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/explain.txt
index 02c9fdd520c10..019f4fa4c7076 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/explain.txt
@@ -212,7 +212,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (36) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [ss_store_sk#22]
-Join condition: (cast(revenue#13 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#37)), DecimalType(23,7), true))
+Join condition: (cast(revenue#13 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#37)), DecimalType(23,7)))
 
 (37) Project [codegen id : 9]
 Output [6]: [s_store_name#2, i_item_desc#16, revenue#13, i_current_price#17, i_wholesale_cost#18, i_brand#19]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/explain.txt
index e1b716bd2186e..8b19320021538 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/explain.txt
@@ -141,7 +141,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_category#15, i_brand#13, s_store_
 
 (25) Filter [codegen id : 7]
 Input [9]: [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#21, _w0#22, avg_monthly_sales#24]
-Condition : (isnotnull(avg_monthly_sales#24) AND (NOT (avg_monthly_sales#24 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000)))
+Condition : (isnotnull(avg_monthly_sales#24) AND (NOT (avg_monthly_sales#24 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)))
 
 (26) Project [codegen id : 7]
 Output [8]: [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#21, avg_monthly_sales#24]
@@ -149,7 +149,7 @@ Input [9]: [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_nam
 
 (27) TakeOrderedAndProject
 Input [8]: [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#21, avg_monthly_sales#24]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, s_store_name#9 ASC NULLS FIRST], [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#21, avg_monthly_sales#24]
+Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#9 ASC NULLS FIRST], [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#21, avg_monthly_sales#24]
 
 ===== Subqueries =====
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/explain.txt
index fe910f9157d15..5d3ea6d0cb7be 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/explain.txt
@@ -141,7 +141,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_category#4, i_brand#2, s_store_na
 
 (25) Filter [codegen id : 7]
 Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, _w0#22, avg_monthly_sales#24]
-Condition : (isnotnull(avg_monthly_sales#24) AND (NOT (avg_monthly_sales#24 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000)))
+Condition : (isnotnull(avg_monthly_sales#24) AND (NOT (avg_monthly_sales#24 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)))
 
 (26) Project [codegen id : 7]
 Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24]
@@ -149,7 +149,7 @@ Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#
 
 (27) TakeOrderedAndProject
 Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24]
+Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24]
 
 ===== Subqueries =====
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/explain.txt
index 554005d706d3d..e630982cc606b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/explain.txt
@@ -123,7 +123,7 @@ Input [8]: [i_item_desc#9, i_category#12, i_class#11, i_current_price#10, itemre
 Arguments: [sum(_w1#20) windowspecdefinition(i_class#11, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#11]
 
 (22) Project [codegen id : 9]
-Output [7]: [i_item_desc#9, i_category#12, i_class#11, i_current_price#10, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23, i_item_id#8]
+Output [7]: [i_item_desc#9, i_category#12, i_class#11, i_current_price#10, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17)) AS revenueratio#23, i_item_id#8]
 Input [9]: [i_item_desc#9, i_category#12, i_class#11, i_current_price#10, itemrevenue#18, _w0#19, _w1#20, i_item_id#8, _we0#22]
 
 (23) Exchange
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/explain.txt
index 66206ac265399..fc2390f392247 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/explain.txt
@@ -108,7 +108,7 @@ Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemreve
 Arguments: [sum(_w1#19) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#21], [i_class#9]
 
 (19) Project [codegen id : 6]
-Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17), true) AS revenueratio#22, i_item_id#6]
+Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17)) AS revenueratio#22, i_item_id#6]
 Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6, _we0#21]
 
 (20) Exchange
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/explain.txt
index f071af103792d..0ac812675e8f5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/explain.txt
@@ -154,7 +154,7 @@ Input [3]: [ctr_store_sk#12, sum#19, count#20]
 Keys [1]: [ctr_store_sk#12]
 Functions [1]: [avg(ctr_total_return#13)]
 Aggregate Attributes [1]: [avg(ctr_total_return#13)#22]
-Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#13)#22) * 1.200000), DecimalType(24,7), true) AS (avg(ctr_total_return) * 1.2)#23, ctr_store_sk#12 AS ctr_store_sk#12#24]
+Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#13)#22) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#23, ctr_store_sk#12 AS ctr_store_sk#12#24]
 
 (23) Filter [codegen id : 6]
 Input [2]: [(avg(ctr_total_return) * 1.2)#23, ctr_store_sk#12#24]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt
index 33d072fb94143..bfdc1e926597b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt
@@ -151,7 +151,7 @@ Input [3]: [ctr_store_sk#12, sum#19, count#20]
 Keys [1]: [ctr_store_sk#12]
 Functions [1]: [avg(ctr_total_return#13)]
 Aggregate Attributes [1]: [avg(ctr_total_return#13)#22]
-Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#13)#22) * 1.200000), DecimalType(24,7), true) AS (avg(ctr_total_return) * 1.2)#23, ctr_store_sk#12 AS ctr_store_sk#12#24]
+Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#13)#22) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#23, ctr_store_sk#12 AS ctr_store_sk#12#24]
 
 (23) Filter [codegen id : 6]
 Input [2]: [(avg(ctr_total_return) * 1.2)#23, ctr_store_sk#12#24]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/explain.txt
index 025e881f1bf9e..4d8179a75c6ea 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/explain.txt
@@ -150,7 +150,7 @@ Input [12]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_list_price#3, d_yea
 (16) HashAggregate [codegen id : 6]
 Input [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7]
 Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2), true)))]
+Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))]
 Aggregate Attributes [1]: [sum#18]
 Results [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#19]
 
@@ -161,9 +161,9 @@ Arguments: hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d
 (18) HashAggregate [codegen id : 7]
 Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#19]
 Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2), true)))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2), true)))#21]
-Results [2]: [c_customer_id#10 AS customer_id#22, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2), true)))#21,18,2) AS year_total#23]
+Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))]
+Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))#21]
+Results [2]: [c_customer_id#10 AS customer_id#22, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))#21,18,2) AS year_total#23]
 
 (19) Filter [codegen id : 7]
 Input [2]: [customer_id#22, year_total#23]
@@ -231,7 +231,7 @@ Input [12]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_list_price#27, d_
 (34) HashAggregate [codegen id : 14]
 Input [10]: [c_customer_id#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40, ss_ext_discount_amt#26, ss_ext_list_price#27, d_year#31]
 Keys [8]: [c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2), true)))]
+Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2))))]
 Aggregate Attributes [1]: [sum#41]
 Results [9]: [c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40, sum#42]
 
@@ -242,9 +242,9 @@ Arguments: hashpartitioning(c_customer_id#34, c_first_name#35, c_last_name#36, d
 (36) HashAggregate [codegen id : 15]
 Input [9]: [c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40, sum#42]
 Keys [8]: [c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2), true)))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2), true)))#21]
-Results [3]: [c_customer_id#34 AS customer_id#44, c_preferred_cust_flag#37 AS customer_preferred_cust_flag#45, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2), true)))#21,18,2) AS year_total#46]
+Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2))))]
+Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2))))#21]
+Results [3]: [c_customer_id#34 AS customer_id#44, c_preferred_cust_flag#37 AS customer_preferred_cust_flag#45, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2))))#21,18,2) AS year_total#46]
 
 (37) Exchange
 Input [3]: [customer_id#44, customer_preferred_cust_flag#45, year_total#46]
@@ -317,7 +317,7 @@ Input [12]: [ws_bill_customer_sk#48, ws_ext_discount_amt#49, ws_ext_list_price#5
 (53) HashAggregate [codegen id : 23]
 Input [10]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, ws_ext_discount_amt#49, ws_ext_list_price#50, d_year#53]
 Keys [8]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#53]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#50 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#49 as decimal(8,2)))), DecimalType(8,2), true)))]
+Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#50 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#49 as decimal(8,2)))), DecimalType(8,2))))]
 Aggregate Attributes [1]: [sum#63]
 Results [9]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#53, sum#64]
 
@@ -328,9 +328,9 @@ Arguments: hashpartitioning(c_customer_id#56, c_first_name#57, c_last_name#58, c
 (55) HashAggregate [codegen id : 24]
 Input [9]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#53, sum#64]
 Keys [8]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#53]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#50 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#49 as decimal(8,2)))), DecimalType(8,2), true)))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#50 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#49 as decimal(8,2)))), DecimalType(8,2), true)))#66]
-Results [2]: [c_customer_id#56 AS customer_id#67, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#50 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#49 as decimal(8,2)))), DecimalType(8,2), true)))#66,18,2) AS year_total#68]
+Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#50 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#49 as decimal(8,2)))), DecimalType(8,2))))]
+Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#50 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#49 as decimal(8,2)))), DecimalType(8,2))))#66]
+Results [2]: [c_customer_id#56 AS customer_id#67, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#50 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#49 as decimal(8,2)))), DecimalType(8,2))))#66,18,2) AS year_total#68]
 
 (56) Filter [codegen id : 24]
 Input [2]: [customer_id#67, year_total#68]
@@ -407,7 +407,7 @@ Input [12]: [ws_bill_customer_sk#70, ws_ext_discount_amt#71, ws_ext_list_price#7
 (73) HashAggregate [codegen id : 32]
 Input [10]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, ws_ext_discount_amt#71, ws_ext_list_price#72, d_year#75]
 Keys [8]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#75]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2), true)))]
+Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2))))]
 Aggregate Attributes [1]: [sum#85]
 Results [9]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#75, sum#86]
 
@@ -418,9 +418,9 @@ Arguments: hashpartitioning(c_customer_id#78, c_first_name#79, c_last_name#80, c
 (75) HashAggregate [codegen id : 33]
 Input [9]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#75, sum#86]
 Keys [8]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#75]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2), true)))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2), true)))#66]
-Results [2]: [c_customer_id#78 AS customer_id#88, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2), true)))#66,18,2) AS year_total#89]
+Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2))))]
+Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2))))#66]
+Results [2]: [c_customer_id#78 AS customer_id#88, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2))))#66,18,2) AS year_total#89]
 
 (76) Exchange
 Input [2]: [customer_id#88, year_total#89]
@@ -433,7 +433,7 @@ Arguments: [customer_id#88 ASC NULLS FIRST], false, 0
 (78) SortMergeJoin [codegen id : 35]
 Left keys [1]: [customer_id#22]
 Right keys [1]: [customer_id#88]
-Join condition: (CASE WHEN (year_total#68 > 0.00) THEN CheckOverflow((promote_precision(year_total#89) / promote_precision(year_total#68)), DecimalType(38,20), true) END > CASE WHEN (year_total#23 > 0.00) THEN CheckOverflow((promote_precision(year_total#46) / promote_precision(year_total#23)), DecimalType(38,20), true) END)
+Join condition: (CASE WHEN (year_total#68 > 0.00) THEN CheckOverflow((promote_precision(year_total#89) / promote_precision(year_total#68)), DecimalType(38,20)) END > CASE WHEN (year_total#23 > 0.00) THEN CheckOverflow((promote_precision(year_total#46) / promote_precision(year_total#23)), DecimalType(38,20)) END)
 
 (79) Project [codegen id : 35]
 Output [1]: [customer_preferred_cust_flag#45]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/simplified.txt
index eed9d7158c108..ff149df17d8f4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/simplified.txt
@@ -17,7 +17,7 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
                                 Exchange [customer_id] #1
                                   WholeStageCodegen (7)
                                     Filter [year_total]
-                                      HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,year_total,sum]
+                                      HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
                                         InputAdapter
                                           Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #2
                                             WholeStageCodegen (6)
@@ -61,7 +61,7 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
                               InputAdapter
                                 Exchange [customer_id] #6
                                   WholeStageCodegen (15)
-                                    HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,customer_preferred_cust_flag,year_total,sum]
+                                    HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,customer_preferred_cust_flag,year_total,sum]
                                       InputAdapter
                                         Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #7
                                           WholeStageCodegen (14)
@@ -101,7 +101,7 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
                         Exchange [customer_id] #10
                           WholeStageCodegen (24)
                             Filter [year_total]
-                              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,year_total,sum]
+                              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
                                 InputAdapter
                                   Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #11
                                     WholeStageCodegen (23)
@@ -134,7 +134,7 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
               InputAdapter
                 Exchange [customer_id] #13
                   WholeStageCodegen (33)
-                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,year_total,sum]
+                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
                       InputAdapter
                         Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #14
                           WholeStageCodegen (32)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt
index 87c6b6f7123fe..8cb7c021fb3ea 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt
@@ -130,7 +130,7 @@ Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_fl
 (13) HashAggregate [codegen id : 3]
 Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#16]
 Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2), true)))]
+Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))]
 Aggregate Attributes [1]: [sum#17]
 Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18]
 
@@ -141,9 +141,9 @@ Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_ye
 (15) HashAggregate [codegen id : 16]
 Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18]
 Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2), true)))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2), true)))#20]
-Results [2]: [c_customer_id#2 AS customer_id#21, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2), true)))#20,18,2) AS year_total#22]
+Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))]
+Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))#20]
+Results [2]: [c_customer_id#2 AS customer_id#21, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))#20,18,2) AS year_total#22]
 
 (16) Filter [codegen id : 16]
 Input [2]: [customer_id#21, year_total#22]
@@ -206,7 +206,7 @@ Input [12]: [c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust
 (29) HashAggregate [codegen id : 6]
 Input [10]: [c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, ss_ext_discount_amt#32, ss_ext_list_price#33, d_year#38]
 Keys [8]: [c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2), true)))]
+Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2))))]
 Aggregate Attributes [1]: [sum#39]
 Results [9]: [c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, sum#40]
 
@@ -217,9 +217,9 @@ Arguments: hashpartitioning(c_customer_id#24, c_first_name#25, c_last_name#26, d
 (31) HashAggregate [codegen id : 7]
 Input [9]: [c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, sum#40]
 Keys [8]: [c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2), true)))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2), true)))#20]
-Results [3]: [c_customer_id#24 AS customer_id#42, c_preferred_cust_flag#27 AS customer_preferred_cust_flag#43, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2), true)))#20,18,2) AS year_total#44]
+Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2))))]
+Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2))))#20]
+Results [3]: [c_customer_id#24 AS customer_id#42, c_preferred_cust_flag#27 AS customer_preferred_cust_flag#43, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2))))#20,18,2) AS year_total#44]
 
 (32) BroadcastExchange
 Input [3]: [customer_id#42, customer_preferred_cust_flag#43, year_total#44]
@@ -291,7 +291,7 @@ Input [12]: [c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust
 (47) HashAggregate [codegen id : 10]
 Input [10]: [c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53, ws_ext_discount_amt#55, ws_ext_list_price#56, d_year#60]
 Keys [8]: [c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53, d_year#60]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#56 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#55 as decimal(8,2)))), DecimalType(8,2), true)))]
+Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#56 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#55 as decimal(8,2)))), DecimalType(8,2))))]
 Aggregate Attributes [1]: [sum#61]
 Results [9]: [c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53, d_year#60, sum#62]
 
@@ -302,9 +302,9 @@ Arguments: hashpartitioning(c_customer_id#47, c_first_name#48, c_last_name#49, c
 (49) HashAggregate [codegen id : 11]
 Input [9]: [c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53, d_year#60, sum#62]
 Keys [8]: [c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53, d_year#60]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#56 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#55 as decimal(8,2)))), DecimalType(8,2), true)))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#56 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#55 as decimal(8,2)))), DecimalType(8,2), true)))#64]
-Results [2]: [c_customer_id#47 AS customer_id#65, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#56 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#55 as decimal(8,2)))), DecimalType(8,2), true)))#64,18,2) AS year_total#66]
+Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#56 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#55 as decimal(8,2)))), DecimalType(8,2))))]
+Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#56 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#55 as decimal(8,2)))), DecimalType(8,2))))#64]
+Results [2]: [c_customer_id#47 AS customer_id#65, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#56 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#55 as decimal(8,2)))), DecimalType(8,2))))#64,18,2) AS year_total#66]
 
 (50) Filter [codegen id : 11]
 Input [2]: [customer_id#65, year_total#66]
@@ -380,7 +380,7 @@ Input [12]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust
 (66) HashAggregate [codegen id : 14]
 Input [10]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, ws_ext_discount_amt#77, ws_ext_list_price#78, d_year#82]
 Keys [8]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#82]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#78 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#77 as decimal(8,2)))), DecimalType(8,2), true)))]
+Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#78 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#77 as decimal(8,2)))), DecimalType(8,2))))]
 Aggregate Attributes [1]: [sum#83]
 Results [9]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#82, sum#84]
 
@@ -391,9 +391,9 @@ Arguments: hashpartitioning(c_customer_id#69, c_first_name#70, c_last_name#71, c
 (68) HashAggregate [codegen id : 15]
 Input [9]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#82, sum#84]
 Keys [8]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#82]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#78 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#77 as decimal(8,2)))), DecimalType(8,2), true)))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#78 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#77 as decimal(8,2)))), DecimalType(8,2), true)))#64]
-Results [2]: [c_customer_id#69 AS customer_id#86, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#78 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#77 as decimal(8,2)))), DecimalType(8,2), true)))#64,18,2) AS year_total#87]
+Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#78 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#77 as decimal(8,2)))), DecimalType(8,2))))]
+Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#78 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#77 as decimal(8,2)))), DecimalType(8,2))))#64]
+Results [2]: [c_customer_id#69 AS customer_id#86, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#78 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#77 as decimal(8,2)))), DecimalType(8,2))))#64,18,2) AS year_total#87]
 
 (69) BroadcastExchange
 Input [2]: [customer_id#86, year_total#87]
@@ -402,7 +402,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=
 (70) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [customer_id#21]
 Right keys [1]: [customer_id#86]
-Join condition: (CASE WHEN (year_total#66 > 0.00) THEN CheckOverflow((promote_precision(year_total#87) / promote_precision(year_total#66)), DecimalType(38,20), true) END > CASE WHEN (year_total#22 > 0.00) THEN CheckOverflow((promote_precision(year_total#44) / promote_precision(year_total#22)), DecimalType(38,20), true) END)
+Join condition: (CASE WHEN (year_total#66 > 0.00) THEN CheckOverflow((promote_precision(year_total#87) / promote_precision(year_total#66)), DecimalType(38,20)) END > CASE WHEN (year_total#22 > 0.00) THEN CheckOverflow((promote_precision(year_total#44) / promote_precision(year_total#22)), DecimalType(38,20)) END)
 
 (71) Project [codegen id : 16]
 Output [1]: [customer_preferred_cust_flag#43]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt
index e9c0faa7491a0..6e80ebc5a038d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt
@@ -7,7 +7,7 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
             Project [customer_id,year_total,customer_preferred_cust_flag,year_total]
               BroadcastHashJoin [customer_id,customer_id]
                 Filter [year_total]
-                  HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,year_total,sum]
+                  HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
                     InputAdapter
                       Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #1
                         WholeStageCodegen (3)
@@ -39,7 +39,7 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
                 InputAdapter
                   BroadcastExchange #4
                     WholeStageCodegen (7)
-                      HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,customer_preferred_cust_flag,year_total,sum]
+                      HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,customer_preferred_cust_flag,year_total,sum]
                         InputAdapter
                           Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #5
                             WholeStageCodegen (6)
@@ -72,7 +72,7 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
               BroadcastExchange #8
                 WholeStageCodegen (11)
                   Filter [year_total]
-                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,year_total,sum]
+                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
                       InputAdapter
                         Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #9
                           WholeStageCodegen (10)
@@ -98,7 +98,7 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
         InputAdapter
           BroadcastExchange #11
             WholeStageCodegen (15)
-              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,year_total,sum]
+              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
                 InputAdapter
                   Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #12
                     WholeStageCodegen (14)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/explain.txt
index 64ee24cf9435c..0f0b678bb7074 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/explain.txt
@@ -121,7 +121,7 @@ Input [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrev
 Arguments: [sum(_w1#20) windowspecdefinition(i_class#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#10]
 
 (22) Project [codegen id : 9]
-Output [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23, i_item_id#7]
+Output [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17)) AS revenueratio#23, i_item_id#7]
 Input [9]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, i_item_id#7, _we0#22]
 
 (23) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt
index 306ecd52c1a3b..0b4dfea762918 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt
@@ -106,7 +106,7 @@ Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemreve
 Arguments: [sum(_w1#19) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#21], [i_class#9]
 
 (19) Project [codegen id : 6]
-Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17), true) AS revenueratio#22, i_item_id#6]
+Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17)) AS revenueratio#22, i_item_id#6]
 Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6, _we0#21]
 
 (20) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt
index 14858257813e5..e3eac82fee26b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt
@@ -477,7 +477,7 @@ Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#48, i_brand_
 (78) HashAggregate [codegen id : 45]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#49, i_class_id#50, i_category_id#51]
 Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#54, isEmpty#55, count#56]
 Results [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
 
@@ -488,9 +488,9 @@ Arguments: hashpartitioning(i_brand_id#49, i_class_id#50, i_category_id#51, 5),
 (80) HashAggregate [codegen id : 46]
 Input [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
 Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62]
-Results [5]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#63, count(1)#62 AS number_sales#64]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#61, count(1)#62]
+Results [5]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#61 AS sales#63, count(1)#62 AS number_sales#64]
 
 (81) Filter [codegen id : 46]
 Input [5]: [i_brand_id#49, i_class_id#50, i_category_id#51, sales#63, number_sales#64]
@@ -562,7 +562,7 @@ Input [7]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70, i_item_sk#74, i_bra
 (97) HashAggregate [codegen id : 91]
 Input [5]: [cs_quantity#69, cs_list_price#70, i_brand_id#75, i_class_id#76, i_category_id#77]
 Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#78, isEmpty#79, count#80]
 Results [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
 
@@ -573,9 +573,9 @@ Arguments: hashpartitioning(i_brand_id#75, i_class_id#76, i_category_id#77, 5),
 (99) HashAggregate [codegen id : 92]
 Input [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
 Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#85, count(1)#86]
-Results [5]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#85 AS sales#87, count(1)#86 AS number_sales#88]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2)))#85, count(1)#86]
+Results [5]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2)))#85 AS sales#87, count(1)#86 AS number_sales#88]
 
 (100) Filter [codegen id : 92]
 Input [5]: [i_brand_id#75, i_class_id#76, i_category_id#77, sales#87, number_sales#88]
@@ -647,7 +647,7 @@ Input [7]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92, i_item_sk#96, i_bra
 (116) HashAggregate [codegen id : 137]
 Input [5]: [ws_quantity#91, ws_list_price#92, i_brand_id#97, i_class_id#98, i_category_id#99]
 Keys [3]: [i_brand_id#97, i_class_id#98, i_category_id#99]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#100, isEmpty#101, count#102]
 Results [6]: [i_brand_id#97, i_class_id#98, i_category_id#99, sum#103, isEmpty#104, count#105]
 
@@ -658,9 +658,9 @@ Arguments: hashpartitioning(i_brand_id#97, i_class_id#98, i_category_id#99, 5),
 (118) HashAggregate [codegen id : 138]
 Input [6]: [i_brand_id#97, i_class_id#98, i_category_id#99, sum#103, isEmpty#104, count#105]
 Keys [3]: [i_brand_id#97, i_class_id#98, i_category_id#99]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true))#107, count(1)#108]
-Results [5]: [i_brand_id#97, i_class_id#98, i_category_id#99, sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true))#107 AS sales#109, count(1)#108 AS number_sales#110]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2)))#107, count(1)#108]
+Results [5]: [i_brand_id#97, i_class_id#98, i_category_id#99, sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2)))#107 AS sales#109, count(1)#108 AS number_sales#110]
 
 (119) Filter [codegen id : 138]
 Input [5]: [i_brand_id#97, i_class_id#98, i_category_id#99, sales#109, number_sales#110]
@@ -793,7 +793,7 @@ Input [4]: [ws_quantity#140, ws_list_price#141, ws_sold_date_sk#142, d_date_sk#1
 (143) HashAggregate [codegen id : 7]
 Input [2]: [quantity#132, list_price#133]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#132 as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#132 as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#146, count#147]
 Results [2]: [sum#148, count#149]
 
@@ -804,9 +804,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#150]
 (145) HashAggregate [codegen id : 8]
 Input [2]: [sum#148, count#149]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#132 as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#132 as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2), true))#151]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#132 as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2), true))#151 AS average_sales#152]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#132 as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#132 as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2)))#151]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#132 as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2)))#151 AS average_sales#152]
 
 Subquery:2 Hosting operator id = 127 Hosting Expression = ss_sold_date_sk#130 IN dynamicpruning#13
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt
index 1666d02ce276c..5984e5165f78d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt
@@ -13,7 +13,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                         Filter [sales]
                           Subquery #3
                             WholeStageCodegen (8)
-                              HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
+                              HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
                                 InputAdapter
                                   Exchange #18
                                     WholeStageCodegen (7)
@@ -47,7 +47,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                         ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #10
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #2
                                 WholeStageCodegen (45)
@@ -207,7 +207,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                       Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sales]
                           ReusedSubquery [average_sales] #3
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #19
                                 WholeStageCodegen (91)
@@ -241,7 +241,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                       Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sales]
                           ReusedSubquery [average_sales] #3
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #21
                                 WholeStageCodegen (137)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt
index fa036252e71bc..b263d0f642e45 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt
@@ -406,7 +406,7 @@ Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_
 (65) HashAggregate [codegen id : 25]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#49, isEmpty#50, count#51]
 Results [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 
@@ -417,9 +417,9 @@ Arguments: hashpartitioning(i_brand_id#44, i_class_id#45, i_category_id#46, 5),
 (67) HashAggregate [codegen id : 26]
 Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56, count(1)#57]
-Results [5]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56 AS sales#58, count(1)#57 AS number_sales#59]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56, count(1)#57]
+Results [5]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56 AS sales#58, count(1)#57 AS number_sales#59]
 
 (68) Filter [codegen id : 26]
 Input [5]: [i_brand_id#44, i_class_id#45, i_category_id#46, sales#58, number_sales#59]
@@ -479,7 +479,7 @@ Input [7]: [cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66, i_brand_id#68,
 (81) HashAggregate [codegen id : 51]
 Input [5]: [cs_quantity#64, cs_list_price#65, i_brand_id#68, i_class_id#69, i_category_id#70]
 Keys [3]: [i_brand_id#68, i_class_id#69, i_category_id#70]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#72, isEmpty#73, count#74]
 Results [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77]
 
@@ -490,9 +490,9 @@ Arguments: hashpartitioning(i_brand_id#68, i_class_id#69, i_category_id#70, 5),
 (83) HashAggregate [codegen id : 52]
 Input [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77]
 Keys [3]: [i_brand_id#68, i_class_id#69, i_category_id#70]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79, count(1)#80]
-Results [5]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79 AS sales#81, count(1)#80 AS number_sales#82]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#79, count(1)#80]
+Results [5]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#79 AS sales#81, count(1)#80 AS number_sales#82]
 
 (84) Filter [codegen id : 52]
 Input [5]: [i_brand_id#68, i_class_id#69, i_category_id#70, sales#81, number_sales#82]
@@ -552,7 +552,7 @@ Input [7]: [ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87, i_brand_id#89,
 (97) HashAggregate [codegen id : 77]
 Input [5]: [ws_quantity#85, ws_list_price#86, i_brand_id#89, i_class_id#90, i_category_id#91]
 Keys [3]: [i_brand_id#89, i_class_id#90, i_category_id#91]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#93, isEmpty#94, count#95]
 Results [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98]
 
@@ -563,9 +563,9 @@ Arguments: hashpartitioning(i_brand_id#89, i_class_id#90, i_category_id#91, 5),
 (99) HashAggregate [codegen id : 78]
 Input [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98]
 Keys [3]: [i_brand_id#89, i_class_id#90, i_category_id#91]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true))#100, count(1)#101]
-Results [5]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true))#100 AS sales#102, count(1)#101 AS number_sales#103]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2)))#100, count(1)#101]
+Results [5]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2)))#100 AS sales#102, count(1)#101 AS number_sales#103]
 
 (100) Filter [codegen id : 78]
 Input [5]: [i_brand_id#89, i_class_id#90, i_category_id#91, sales#102, number_sales#103]
@@ -698,7 +698,7 @@ Input [4]: [ws_quantity#133, ws_list_price#134, ws_sold_date_sk#135, d_date_sk#1
 (124) HashAggregate [codegen id : 7]
 Input [2]: [quantity#125, list_price#126]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#139, count#140]
 Results [2]: [sum#141, count#142]
 
@@ -709,9 +709,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#143]
 (126) HashAggregate [codegen id : 8]
 Input [2]: [sum#141, count#142]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2), true))#144]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2), true))#144 AS average_sales#145]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2)))#144]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2)))#144 AS average_sales#145]
 
 Subquery:2 Hosting operator id = 108 Hosting Expression = ss_sold_date_sk#123 IN dynamicpruning#12
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt
index 521d5b34ea0e8..653e3e6564e41 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt
@@ -13,7 +13,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                         Filter [sales]
                           Subquery #3
                             WholeStageCodegen (8)
-                              HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
+                              HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
                                 InputAdapter
                                   Exchange #13
                                     WholeStageCodegen (7)
@@ -47,7 +47,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                         ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #7
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #2
                                 WholeStageCodegen (25)
@@ -168,7 +168,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                       Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sales]
                           ReusedSubquery [average_sales] #3
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #14
                                 WholeStageCodegen (51)
@@ -193,7 +193,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                       Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sales]
                           ReusedSubquery [average_sales] #3
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #15
                                 WholeStageCodegen (77)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt
index ff2e3984e6dd2..78133a44c9a69 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt
@@ -453,7 +453,7 @@ Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#48, i_brand_
 (78) HashAggregate [codegen id : 45]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#49, i_class_id#50, i_category_id#51]
 Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#54, isEmpty#55, count#56]
 Results [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
 
@@ -464,9 +464,9 @@ Arguments: hashpartitioning(i_brand_id#49, i_class_id#50, i_category_id#51, 5),
 (80) HashAggregate [codegen id : 92]
 Input [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
 Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62]
-Results [6]: [store AS channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#64, count(1)#62 AS number_sales#65]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#61, count(1)#62]
+Results [6]: [store AS channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#61 AS sales#64, count(1)#62 AS number_sales#65]
 
 (81) Filter [codegen id : 92]
 Input [6]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sales#64, number_sales#65]
@@ -534,7 +534,7 @@ Input [7]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70, i_item_sk#75, i_bra
 (96) HashAggregate [codegen id : 90]
 Input [5]: [ss_quantity#69, ss_list_price#70, i_brand_id#76, i_class_id#77, i_category_id#78]
 Keys [3]: [i_brand_id#76, i_class_id#77, i_category_id#78]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#79, isEmpty#80, count#81]
 Results [6]: [i_brand_id#76, i_class_id#77, i_category_id#78, sum#82, isEmpty#83, count#84]
 
@@ -545,9 +545,9 @@ Arguments: hashpartitioning(i_brand_id#76, i_class_id#77, i_category_id#78, 5),
 (98) HashAggregate [codegen id : 91]
 Input [6]: [i_brand_id#76, i_class_id#77, i_category_id#78, sum#82, isEmpty#83, count#84]
 Keys [3]: [i_brand_id#76, i_class_id#77, i_category_id#78]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#86, count(1)#87]
-Results [6]: [store AS channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#86 AS sales#89, count(1)#87 AS number_sales#90]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2)))#86, count(1)#87]
+Results [6]: [store AS channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2)))#86 AS sales#89, count(1)#87 AS number_sales#90]
 
 (99) Filter [codegen id : 91]
 Input [6]: [channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sales#89, number_sales#90]
@@ -661,7 +661,7 @@ Input [4]: [ws_quantity#104, ws_list_price#105, ws_sold_date_sk#106, d_date_sk#1
 (119) HashAggregate [codegen id : 7]
 Input [2]: [quantity#96, list_price#97]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#110, count#111]
 Results [2]: [sum#112, count#113]
 
@@ -672,9 +672,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#114]
 (121) HashAggregate [codegen id : 8]
 Input [2]: [sum#112, count#113]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))#115]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))#115 AS average_sales#116]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2)))#115]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2)))#115 AS average_sales#116]
 
 Subquery:2 Hosting operator id = 103 Hosting Expression = ss_sold_date_sk#94 IN dynamicpruning#13
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt
index 7c193e479a013..e7d3f84db0c72 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt
@@ -4,7 +4,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
       Filter [sales]
         Subquery #4
           WholeStageCodegen (8)
-            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
+            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
               InputAdapter
                 Exchange #17
                   WholeStageCodegen (7)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #9
-        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
           InputAdapter
             Exchange [i_brand_id,i_class_id,i_category_id] #1
               WholeStageCodegen (45)
@@ -206,7 +206,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
           WholeStageCodegen (91)
             Filter [sales]
               ReusedSubquery [average_sales] #4
-              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
                 InputAdapter
                   Exchange [i_brand_id,i_class_id,i_category_id] #19
                     WholeStageCodegen (90)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt
index 254c73a9e8884..b0fe619430132 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt
@@ -385,7 +385,7 @@ Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_
 (65) HashAggregate [codegen id : 25]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#49, isEmpty#50, count#51]
 Results [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 
@@ -396,9 +396,9 @@ Arguments: hashpartitioning(i_brand_id#44, i_class_id#45, i_category_id#46, 5),
 (67) HashAggregate [codegen id : 52]
 Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56, count(1)#57]
-Results [6]: [store AS channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56 AS sales#59, count(1)#57 AS number_sales#60]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56, count(1)#57]
+Results [6]: [store AS channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56 AS sales#59, count(1)#57 AS number_sales#60]
 
 (68) Filter [codegen id : 52]
 Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60]
@@ -454,7 +454,7 @@ Input [7]: [ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66, i_brand_id#69,
 (80) HashAggregate [codegen id : 50]
 Input [5]: [ss_quantity#64, ss_list_price#65, i_brand_id#69, i_class_id#70, i_category_id#71]
 Keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#73, isEmpty#74, count#75]
 Results [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78]
 
@@ -465,9 +465,9 @@ Arguments: hashpartitioning(i_brand_id#69, i_class_id#70, i_category_id#71, 5),
 (82) HashAggregate [codegen id : 51]
 Input [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78]
 Keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#80, count(1)#81]
-Results [6]: [store AS channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#80 AS sales#83, count(1)#81 AS number_sales#84]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#80, count(1)#81]
+Results [6]: [store AS channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#80 AS sales#83, count(1)#81 AS number_sales#84]
 
 (83) Filter [codegen id : 51]
 Input [6]: [channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84]
@@ -581,7 +581,7 @@ Input [4]: [ws_quantity#98, ws_list_price#99, ws_sold_date_sk#100, d_date_sk#101
 (103) HashAggregate [codegen id : 7]
 Input [2]: [quantity#90, list_price#91]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#104, count#105]
 Results [2]: [sum#106, count#107]
 
@@ -592,9 +592,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#108]
 (105) HashAggregate [codegen id : 8]
 Input [2]: [sum#106, count#107]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))#109]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))#109 AS average_sales#110]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))#109]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))#109 AS average_sales#110]
 
 Subquery:2 Hosting operator id = 87 Hosting Expression = ss_sold_date_sk#88 IN dynamicpruning#12
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt
index 15fdf6b0eab16..8f722e735172f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt
@@ -4,7 +4,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
       Filter [sales]
         Subquery #4
           WholeStageCodegen (8)
-            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
+            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
               InputAdapter
                 Exchange #12
                   WholeStageCodegen (7)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #6
-        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
           InputAdapter
             Exchange [i_brand_id,i_class_id,i_category_id] #1
               WholeStageCodegen (25)
@@ -167,7 +167,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
           WholeStageCodegen (51)
             Filter [sales]
               ReusedSubquery [average_sales] #4
-              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
                 InputAdapter
                   Exchange [i_brand_id,i_class_id,i_category_id] #14
                     WholeStageCodegen (50)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt
index 33f6c01b4b69b..8f188db553004 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt
@@ -195,7 +195,7 @@ Right keys [1]: [(d_week_seq2#63 - 53)]
 Join condition: None
 
 (35) Project [codegen id : 12]
-Output [8]: [d_week_seq1#45, round(CheckOverflow((promote_precision(sun_sales1#46) / promote_precision(sun_sales2#64)), DecimalType(37,20), true), 2) AS round((sun_sales1 / sun_sales2), 2)#72, round(CheckOverflow((promote_precision(mon_sales1#47) / promote_precision(mon_sales2#65)), DecimalType(37,20), true), 2) AS round((mon_sales1 / mon_sales2), 2)#73, round(CheckOverflow((promote_precision(tue_sales1#48) / promote_precision(tue_sales2#66)), DecimalType(37,20), true), 2) AS round((tue_sales1 / tue_sales2), 2)#74, round(CheckOverflow((promote_precision(wed_sales1#49) / promote_precision(wed_sales2#67)), DecimalType(37,20), true), 2) AS round((wed_sales1 / wed_sales2), 2)#75, round(CheckOverflow((promote_precision(thu_sales1#50) / promote_precision(thu_sales2#68)), DecimalType(37,20), true), 2) AS round((thu_sales1 / thu_sales2), 2)#76, round(CheckOverflow((promote_precision(fri_sales1#51) / promote_precision(fri_sales2#69)), DecimalType(37,20), true), 2) AS round((fri_sales1 / fri_sales2), 2)#77, round(CheckOverflow((promote_precision(sat_sales1#52) / promote_precision(sat_sales2#70)), DecimalType(37,20), true), 2) AS round((sat_sales1 / sat_sales2), 2)#78]
+Output [8]: [d_week_seq1#45, round(CheckOverflow((promote_precision(sun_sales1#46) / promote_precision(sun_sales2#64)), DecimalType(37,20)), 2) AS round((sun_sales1 / sun_sales2), 2)#72, round(CheckOverflow((promote_precision(mon_sales1#47) / promote_precision(mon_sales2#65)), DecimalType(37,20)), 2) AS round((mon_sales1 / mon_sales2), 2)#73, round(CheckOverflow((promote_precision(tue_sales1#48) / promote_precision(tue_sales2#66)), DecimalType(37,20)), 2) AS round((tue_sales1 / tue_sales2), 2)#74, round(CheckOverflow((promote_precision(wed_sales1#49) / promote_precision(wed_sales2#67)), DecimalType(37,20)), 2) AS round((wed_sales1 / wed_sales2), 2)#75, round(CheckOverflow((promote_precision(thu_sales1#50) / promote_precision(thu_sales2#68)), DecimalType(37,20)), 2) AS round((thu_sales1 / thu_sales2), 2)#76, round(CheckOverflow((promote_precision(fri_sales1#51) / promote_precision(fri_sales2#69)), DecimalType(37,20)), 2) AS round((fri_sales1 / fri_sales2), 2)#77, round(CheckOverflow((promote_precision(sat_sales1#52) / promote_precision(sat_sales2#70)), DecimalType(37,20)), 2) AS round((sat_sales1 / sat_sales2), 2)#78]
 Input [16]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52, d_week_seq2#63, sun_sales2#64, mon_sales2#65, tue_sales2#66, wed_sales2#67, thu_sales2#68, fri_sales2#69, sat_sales2#70]
 
 (36) Exchange
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt
index 33f6c01b4b69b..8f188db553004 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt
@@ -195,7 +195,7 @@ Right keys [1]: [(d_week_seq2#63 - 53)]
 Join condition: None
 
 (35) Project [codegen id : 12]
-Output [8]: [d_week_seq1#45, round(CheckOverflow((promote_precision(sun_sales1#46) / promote_precision(sun_sales2#64)), DecimalType(37,20), true), 2) AS round((sun_sales1 / sun_sales2), 2)#72, round(CheckOverflow((promote_precision(mon_sales1#47) / promote_precision(mon_sales2#65)), DecimalType(37,20), true), 2) AS round((mon_sales1 / mon_sales2), 2)#73, round(CheckOverflow((promote_precision(tue_sales1#48) / promote_precision(tue_sales2#66)), DecimalType(37,20), true), 2) AS round((tue_sales1 / tue_sales2), 2)#74, round(CheckOverflow((promote_precision(wed_sales1#49) / promote_precision(wed_sales2#67)), DecimalType(37,20), true), 2) AS round((wed_sales1 / wed_sales2), 2)#75, round(CheckOverflow((promote_precision(thu_sales1#50) / promote_precision(thu_sales2#68)), DecimalType(37,20), true), 2) AS round((thu_sales1 / thu_sales2), 2)#76, round(CheckOverflow((promote_precision(fri_sales1#51) / promote_precision(fri_sales2#69)), DecimalType(37,20), true), 2) AS round((fri_sales1 / fri_sales2), 2)#77, round(CheckOverflow((promote_precision(sat_sales1#52) / promote_precision(sat_sales2#70)), DecimalType(37,20), true), 2) AS round((sat_sales1 / sat_sales2), 2)#78]
+Output [8]: [d_week_seq1#45, round(CheckOverflow((promote_precision(sun_sales1#46) / promote_precision(sun_sales2#64)), DecimalType(37,20)), 2) AS round((sun_sales1 / sun_sales2), 2)#72, round(CheckOverflow((promote_precision(mon_sales1#47) / promote_precision(mon_sales2#65)), DecimalType(37,20)), 2) AS round((mon_sales1 / mon_sales2), 2)#73, round(CheckOverflow((promote_precision(tue_sales1#48) / promote_precision(tue_sales2#66)), DecimalType(37,20)), 2) AS round((tue_sales1 / tue_sales2), 2)#74, round(CheckOverflow((promote_precision(wed_sales1#49) / promote_precision(wed_sales2#67)), DecimalType(37,20)), 2) AS round((wed_sales1 / wed_sales2), 2)#75, round(CheckOverflow((promote_precision(thu_sales1#50) / promote_precision(thu_sales2#68)), DecimalType(37,20)), 2) AS round((thu_sales1 / thu_sales2), 2)#76, round(CheckOverflow((promote_precision(fri_sales1#51) / promote_precision(fri_sales2#69)), DecimalType(37,20)), 2) AS round((fri_sales1 / fri_sales2), 2)#77, round(CheckOverflow((promote_precision(sat_sales1#52) / promote_precision(sat_sales2#70)), DecimalType(37,20)), 2) AS round((sat_sales1 / sat_sales2), 2)#78]
 Input [16]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52, d_week_seq2#63, sun_sales2#64, mon_sales2#65, tue_sales2#66, wed_sales2#67, thu_sales2#68, fri_sales2#69, sat_sales2#70]
 
 (36) Exchange
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/explain.txt
index d50622c2464ea..09e4cd2a57054 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/explain.txt
@@ -121,7 +121,7 @@ Input [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrev
 Arguments: [sum(_w1#20) windowspecdefinition(i_class#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#10]
 
 (22) Project [codegen id : 9]
-Output [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23, i_item_id#7]
+Output [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17)) AS revenueratio#23, i_item_id#7]
 Input [9]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, i_item_id#7, _we0#22]
 
 (23) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt
index b54c704b66c3f..8b9d47316f293 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt
@@ -106,7 +106,7 @@ Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemreve
 Arguments: [sum(_w1#19) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#21], [i_class#9]
 
 (19) Project [codegen id : 6]
-Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17), true) AS revenueratio#22, i_item_id#6]
+Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17)) AS revenueratio#22, i_item_id#6]
 Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6, _we0#21]
 
 (20) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt
index 05c6a35ee7ced..5bf5193487b07 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt
@@ -278,20 +278,20 @@ Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#
 (42) HashAggregate [codegen id : 15]
 Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#31, isEmpty#32]
 Results [3]: [c_customer_sk#29, sum#33, isEmpty#34]
 
 (43) HashAggregate [codegen id : 15]
 Input [3]: [c_customer_sk#29, sum#33, isEmpty#34]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35]
+Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36]
 
 (44) Filter [codegen id : 15]
 Input [2]: [c_customer_sk#29, ssales#36]
-Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
+Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8))))
 
 (45) Project [codegen id : 15]
 Output [1]: [c_customer_sk#29]
@@ -319,7 +319,7 @@ Right keys [1]: [d_date_sk#39]
 Join condition: None
 
 (51) Project [codegen id : 17]
-Output [1]: [CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true) AS sales#40]
+Output [1]: [CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)) AS sales#40]
 Input [4]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, d_date_sk#39]
 
 (52) Scan parquet default.web_sales
@@ -432,20 +432,20 @@ Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#
 (77) HashAggregate [codegen id : 32]
 Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#49, isEmpty#50]
 Results [3]: [c_customer_sk#29, sum#51, isEmpty#52]
 
 (78) HashAggregate [codegen id : 32]
 Input [3]: [c_customer_sk#29, sum#51, isEmpty#52]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35]
+Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36]
 
 (79) Filter [codegen id : 32]
 Input [2]: [c_customer_sk#29, ssales#36]
-Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
+Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8))))
 
 (80) Project [codegen id : 32]
 Output [1]: [c_customer_sk#29]
@@ -473,7 +473,7 @@ Right keys [1]: [d_date_sk#53]
 Join condition: None
 
 (86) Project [codegen id : 34]
-Output [1]: [CheckOverflow((promote_precision(cast(ws_quantity#43 as decimal(12,2))) * promote_precision(cast(ws_list_price#44 as decimal(12,2)))), DecimalType(18,2), true) AS sales#54]
+Output [1]: [CheckOverflow((promote_precision(cast(ws_quantity#43 as decimal(12,2))) * promote_precision(cast(ws_list_price#44 as decimal(12,2)))), DecimalType(18,2)) AS sales#54]
 Input [4]: [ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45, d_date_sk#53]
 
 (87) Union
@@ -632,16 +632,16 @@ Input [4]: [ss_customer_sk#67, ss_quantity#68, ss_sales_price#69, c_customer_sk#
 (113) HashAggregate [codegen id : 6]
 Input [3]: [ss_quantity#68, ss_sales_price#69, c_customer_sk#74]
 Keys [1]: [c_customer_sk#74]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_sales_price#69 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_sales_price#69 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#75, isEmpty#76]
 Results [3]: [c_customer_sk#74, sum#77, isEmpty#78]
 
 (114) HashAggregate [codegen id : 6]
 Input [3]: [c_customer_sk#74, sum#77, isEmpty#78]
 Keys [1]: [c_customer_sk#74]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_sales_price#69 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_sales_price#69 as decimal(12,2)))), DecimalType(18,2), true))#79]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_sales_price#69 as decimal(12,2)))), DecimalType(18,2), true))#79 AS csales#80]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_sales_price#69 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_sales_price#69 as decimal(12,2)))), DecimalType(18,2)))#79]
+Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_sales_price#69 as decimal(12,2)))), DecimalType(18,2)))#79 AS csales#80]
 
 (115) HashAggregate [codegen id : 6]
 Input [1]: [csales#80]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt
index 7fcf4ef29d66a..0683b263ea290 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt
@@ -89,7 +89,7 @@ WholeStageCodegen (36)
                                             Exchange #10
                                               WholeStageCodegen (6)
                                                 HashAggregate [csales] [max,max]
-                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
+                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),csales,sum,isEmpty]
                                                     HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                                       Project [ss_quantity,ss_sales_price,c_customer_sk]
                                                         SortMergeJoin [ss_customer_sk,c_customer_sk]
@@ -120,7 +120,7 @@ WholeStageCodegen (36)
                                                               Sort [c_customer_sk]
                                                                 InputAdapter
                                                                   ReusedExchange [c_customer_sk] #9
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
                                       HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                         Project [ss_quantity,ss_sales_price,c_customer_sk]
                                           SortMergeJoin [ss_customer_sk,c_customer_sk]
@@ -195,7 +195,7 @@ WholeStageCodegen (36)
                                 Project [c_customer_sk]
                                   Filter [ssales]
                                     ReusedSubquery [tpcds_cmax] #3
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
                                       HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                         Project [ss_quantity,ss_sales_price,c_customer_sk]
                                           SortMergeJoin [ss_customer_sk,c_customer_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt
index 8b5ac41195ab8..58d6c22f3fd05 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt
@@ -226,7 +226,7 @@ Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#
 (35) HashAggregate [codegen id : 8]
 Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
 Keys [1]: [c_customer_sk#28]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#30, isEmpty#31]
 Results [3]: [c_customer_sk#28, sum#32, isEmpty#33]
 
@@ -237,13 +237,13 @@ Arguments: hashpartitioning(c_customer_sk#28, 5), ENSURE_REQUIREMENTS, [id=#34]
 (37) HashAggregate [codegen id : 9]
 Input [3]: [c_customer_sk#28, sum#32, isEmpty#33]
 Keys [1]: [c_customer_sk#28]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36]
 
 (38) Filter [codegen id : 9]
 Input [2]: [c_customer_sk#28, ssales#36]
-Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
+Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8))))
 
 (39) Project [codegen id : 9]
 Output [1]: [c_customer_sk#28]
@@ -271,7 +271,7 @@ Right keys [1]: [d_date_sk#39]
 Join condition: None
 
 (45) Project [codegen id : 11]
-Output [1]: [CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true) AS sales#40]
+Output [1]: [CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)) AS sales#40]
 Input [4]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, d_date_sk#39]
 
 (46) Scan parquet default.web_sales
@@ -310,13 +310,13 @@ Output [3]: [c_customer_sk#28, sum#47, isEmpty#48]
 (54) HashAggregate [codegen id : 20]
 Input [3]: [c_customer_sk#28, sum#47, isEmpty#48]
 Keys [1]: [c_customer_sk#28]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36]
 
 (55) Filter [codegen id : 20]
 Input [2]: [c_customer_sk#28, ssales#36]
-Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
+Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8))))
 
 (56) Project [codegen id : 20]
 Output [1]: [c_customer_sk#28]
@@ -344,7 +344,7 @@ Right keys [1]: [d_date_sk#49]
 Join condition: None
 
 (62) Project [codegen id : 22]
-Output [1]: [CheckOverflow((promote_precision(cast(ws_quantity#43 as decimal(12,2))) * promote_precision(cast(ws_list_price#44 as decimal(12,2)))), DecimalType(18,2), true) AS sales#50]
+Output [1]: [CheckOverflow((promote_precision(cast(ws_quantity#43 as decimal(12,2))) * promote_precision(cast(ws_list_price#44 as decimal(12,2)))), DecimalType(18,2)) AS sales#50]
 Input [4]: [ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45, d_date_sk#49]
 
 (63) Union
@@ -489,7 +489,7 @@ Input [5]: [ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66, c_customer_sk
 (86) HashAggregate [codegen id : 3]
 Input [3]: [ss_quantity#64, ss_sales_price#65, c_customer_sk#68]
 Keys [1]: [c_customer_sk#68]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#70, isEmpty#71]
 Results [3]: [c_customer_sk#68, sum#72, isEmpty#73]
 
@@ -500,9 +500,9 @@ Arguments: hashpartitioning(c_customer_sk#68, 5), ENSURE_REQUIREMENTS, [id=#74]
 (88) HashAggregate [codegen id : 4]
 Input [3]: [c_customer_sk#68, sum#72, isEmpty#73]
 Keys [1]: [c_customer_sk#68]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2), true))#75]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2), true))#75 AS csales#76]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2)))#75]
+Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2)))#75 AS csales#76]
 
 (89) HashAggregate [codegen id : 4]
 Input [1]: [csales#76]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt
index dfa1ee1f4fe66..d38e147d305c7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt
@@ -77,7 +77,7 @@ WholeStageCodegen (24)
                                             Exchange #10
                                               WholeStageCodegen (4)
                                                 HashAggregate [csales] [max,max]
-                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
+                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),csales,sum,isEmpty]
                                                     InputAdapter
                                                       Exchange [c_customer_sk] #11
                                                         WholeStageCodegen (3)
@@ -102,7 +102,7 @@ WholeStageCodegen (24)
                                                                       ReusedExchange [c_customer_sk] #9
                                                                 InputAdapter
                                                                   ReusedExchange [d_date_sk] #12
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
                                       InputAdapter
                                         Exchange [c_customer_sk] #8
                                           WholeStageCodegen (8)
@@ -148,7 +148,7 @@ WholeStageCodegen (24)
                                 Project [c_customer_sk]
                                   Filter [ssales]
                                     ReusedSubquery [tpcds_cmax] #3
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
                                       InputAdapter
                                         ReusedExchange [c_customer_sk,sum,isEmpty] #8
                       InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt
index b99458d82af0c..3de1f24613451 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt
@@ -322,20 +322,20 @@ Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#
 (43) HashAggregate [codegen id : 15]
 Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#31, isEmpty#32]
 Results [3]: [c_customer_sk#29, sum#33, isEmpty#34]
 
 (44) HashAggregate [codegen id : 15]
 Input [3]: [c_customer_sk#29, sum#33, isEmpty#34]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35]
+Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36]
 
 (45) Filter [codegen id : 15]
 Input [2]: [c_customer_sk#29, ssales#36]
-Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
+Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8))))
 
 (46) Project [codegen id : 15]
 Output [1]: [c_customer_sk#29]
@@ -410,20 +410,20 @@ Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#
 (63) HashAggregate [codegen id : 24]
 Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#31, isEmpty#32]
 Results [3]: [c_customer_sk#29, sum#33, isEmpty#34]
 
 (64) HashAggregate [codegen id : 24]
 Input [3]: [c_customer_sk#29, sum#33, isEmpty#34]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35]
+Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36]
 
 (65) Filter [codegen id : 24]
 Input [2]: [c_customer_sk#29, ssales#36]
-Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
+Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8))))
 
 (66) Project [codegen id : 24]
 Output [1]: [c_customer_sk#29]
@@ -450,7 +450,7 @@ Input [6]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, c_customer_sk
 (71) HashAggregate [codegen id : 26]
 Input [4]: [cs_quantity#3, cs_list_price#4, c_first_name#41, c_last_name#42]
 Keys [2]: [c_last_name#42, c_first_name#41]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#44, isEmpty#45]
 Results [4]: [c_last_name#42, c_first_name#41, sum#46, isEmpty#47]
 
@@ -461,9 +461,9 @@ Arguments: hashpartitioning(c_last_name#42, c_first_name#41, 5), ENSURE_REQUIREM
 (73) HashAggregate [codegen id : 27]
 Input [4]: [c_last_name#42, c_first_name#41, sum#46, isEmpty#47]
 Keys [2]: [c_last_name#42, c_first_name#41]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#49]
-Results [3]: [c_last_name#42, c_first_name#41, sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#49 AS sales#50]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))#49]
+Results [3]: [c_last_name#42, c_first_name#41, sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))#49 AS sales#50]
 
 (74) Scan parquet default.web_sales
 Output [5]: [ws_item_sk#51, ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, ws_sold_date_sk#55]
@@ -580,20 +580,20 @@ Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#
 (100) HashAggregate [codegen id : 42]
 Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#59, isEmpty#60]
 Results [3]: [c_customer_sk#29, sum#61, isEmpty#62]
 
 (101) HashAggregate [codegen id : 42]
 Input [3]: [c_customer_sk#29, sum#61, isEmpty#62]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35]
+Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36]
 
 (102) Filter [codegen id : 42]
 Input [2]: [c_customer_sk#29, ssales#36]
-Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
+Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8))))
 
 (103) Project [codegen id : 42]
 Output [1]: [c_customer_sk#29]
@@ -653,20 +653,20 @@ Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#
 (117) HashAggregate [codegen id : 51]
 Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#59, isEmpty#60]
 Results [3]: [c_customer_sk#29, sum#61, isEmpty#62]
 
 (118) HashAggregate [codegen id : 51]
 Input [3]: [c_customer_sk#29, sum#61, isEmpty#62]
 Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35]
+Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36]
 
 (119) Filter [codegen id : 51]
 Input [2]: [c_customer_sk#29, ssales#36]
-Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
+Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8))))
 
 (120) Project [codegen id : 51]
 Output [1]: [c_customer_sk#29]
@@ -693,7 +693,7 @@ Input [6]: [ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, c_customer
 (125) HashAggregate [codegen id : 53]
 Input [4]: [ws_quantity#53, ws_list_price#54, c_first_name#65, c_last_name#66]
 Keys [2]: [c_last_name#66, c_first_name#65]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#67, isEmpty#68]
 Results [4]: [c_last_name#66, c_first_name#65, sum#69, isEmpty#70]
 
@@ -704,9 +704,9 @@ Arguments: hashpartitioning(c_last_name#66, c_first_name#65, 5), ENSURE_REQUIREM
 (127) HashAggregate [codegen id : 54]
 Input [4]: [c_last_name#66, c_first_name#65, sum#69, isEmpty#70]
 Keys [2]: [c_last_name#66, c_first_name#65]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2), true))#72]
-Results [3]: [c_last_name#66, c_first_name#65, sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2), true))#72 AS sales#73]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2)))#72]
+Results [3]: [c_last_name#66, c_first_name#65, sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2)))#72 AS sales#73]
 
 (128) Union
 
@@ -850,16 +850,16 @@ Input [4]: [ss_customer_sk#79, ss_quantity#80, ss_sales_price#81, c_customer_sk#
 (152) HashAggregate [codegen id : 6]
 Input [3]: [ss_quantity#80, ss_sales_price#81, c_customer_sk#86]
 Keys [1]: [c_customer_sk#86]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#80 as decimal(12,2))) * promote_precision(cast(ss_sales_price#81 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#80 as decimal(12,2))) * promote_precision(cast(ss_sales_price#81 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#87, isEmpty#88]
 Results [3]: [c_customer_sk#86, sum#89, isEmpty#90]
 
 (153) HashAggregate [codegen id : 6]
 Input [3]: [c_customer_sk#86, sum#89, isEmpty#90]
 Keys [1]: [c_customer_sk#86]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#80 as decimal(12,2))) * promote_precision(cast(ss_sales_price#81 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#80 as decimal(12,2))) * promote_precision(cast(ss_sales_price#81 as decimal(12,2)))), DecimalType(18,2), true))#91]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#80 as decimal(12,2))) * promote_precision(cast(ss_sales_price#81 as decimal(12,2)))), DecimalType(18,2), true))#91 AS csales#92]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#80 as decimal(12,2))) * promote_precision(cast(ss_sales_price#81 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#80 as decimal(12,2))) * promote_precision(cast(ss_sales_price#81 as decimal(12,2)))), DecimalType(18,2)))#91]
+Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#80 as decimal(12,2))) * promote_precision(cast(ss_sales_price#81 as decimal(12,2)))), DecimalType(18,2)))#91 AS csales#92]
 
 (154) HashAggregate [codegen id : 6]
 Input [1]: [csales#92]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt
index c3779ff0d6e2d..6561fbeddef1d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt
@@ -1,7 +1,7 @@
 TakeOrderedAndProject [c_last_name,c_first_name,sales]
   Union
     WholeStageCodegen (27)
-      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sales,sum,isEmpty]
+      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sales,sum,isEmpty]
         InputAdapter
           Exchange [c_last_name,c_first_name] #1
             WholeStageCodegen (26)
@@ -92,7 +92,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                 Exchange #10
                                                   WholeStageCodegen (6)
                                                     HashAggregate [csales] [max,max]
-                                                      HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
+                                                      HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),csales,sum,isEmpty]
                                                         HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                                           Project [ss_quantity,ss_sales_price,c_customer_sk]
                                                             SortMergeJoin [ss_customer_sk,c_customer_sk]
@@ -123,7 +123,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                   Sort [c_customer_sk]
                                                                     InputAdapter
                                                                       ReusedExchange [c_customer_sk] #9
-                                        HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                        HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
                                           HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                             Project [ss_quantity,ss_sales_price,c_customer_sk]
                                               SortMergeJoin [ss_customer_sk,c_customer_sk]
@@ -169,7 +169,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                 Project [c_customer_sk]
                                   Filter [ssales]
                                     ReusedSubquery [tpcds_cmax] #3
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
                                       HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                         Project [ss_quantity,ss_sales_price,c_customer_sk]
                                           SortMergeJoin [ss_customer_sk,c_customer_sk]
@@ -184,7 +184,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                   InputAdapter
                                                     ReusedExchange [c_customer_sk] #9
     WholeStageCodegen (54)
-      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sales,sum,isEmpty]
+      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sales,sum,isEmpty]
         InputAdapter
           Exchange [c_last_name,c_first_name] #14
             WholeStageCodegen (53)
@@ -240,7 +240,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                     Project [c_customer_sk]
                                       Filter [ssales]
                                         ReusedSubquery [tpcds_cmax] #3
-                                        HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                        HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
                                           HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                             Project [ss_quantity,ss_sales_price,c_customer_sk]
                                               SortMergeJoin [ss_customer_sk,c_customer_sk]
@@ -270,7 +270,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                 Project [c_customer_sk]
                                   Filter [ssales]
                                     ReusedSubquery [tpcds_cmax] #3
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
                                       HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                         Project [ss_quantity,ss_sales_price,c_customer_sk]
                                           SortMergeJoin [ss_customer_sk,c_customer_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt
index 0527d277461e7..bea457e24dca9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt
@@ -252,7 +252,7 @@ Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#
 (36) HashAggregate [codegen id : 8]
 Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
 Keys [1]: [c_customer_sk#28]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#30, isEmpty#31]
 Results [3]: [c_customer_sk#28, sum#32, isEmpty#33]
 
@@ -263,13 +263,13 @@ Arguments: hashpartitioning(c_customer_sk#28, 5), ENSURE_REQUIREMENTS, [id=#34]
 (38) HashAggregate [codegen id : 9]
 Input [3]: [c_customer_sk#28, sum#32, isEmpty#33]
 Keys [1]: [c_customer_sk#28]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36]
 
 (39) Filter [codegen id : 9]
 Input [2]: [c_customer_sk#28, ssales#36]
-Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
+Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8))))
 
 (40) Project [codegen id : 9]
 Output [1]: [c_customer_sk#28]
@@ -312,13 +312,13 @@ Output [3]: [c_customer_sk#28, sum#32, isEmpty#33]
 (49) HashAggregate [codegen id : 14]
 Input [3]: [c_customer_sk#28, sum#32, isEmpty#33]
 Keys [1]: [c_customer_sk#28]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36]
 
 (50) Filter [codegen id : 14]
 Input [2]: [c_customer_sk#28, ssales#36]
-Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
+Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8))))
 
 (51) Project [codegen id : 14]
 Output [1]: [c_customer_sk#28]
@@ -361,7 +361,7 @@ Input [6]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#40,
 (60) HashAggregate [codegen id : 17]
 Input [4]: [cs_quantity#3, cs_list_price#4, c_first_name#40, c_last_name#41]
 Keys [2]: [c_last_name#41, c_first_name#40]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#45, isEmpty#46]
 Results [4]: [c_last_name#41, c_first_name#40, sum#47, isEmpty#48]
 
@@ -372,9 +372,9 @@ Arguments: hashpartitioning(c_last_name#41, c_first_name#40, 5), ENSURE_REQUIREM
 (62) HashAggregate [codegen id : 18]
 Input [4]: [c_last_name#41, c_first_name#40, sum#47, isEmpty#48]
 Keys [2]: [c_last_name#41, c_first_name#40]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#50]
-Results [3]: [c_last_name#41, c_first_name#40, sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#50 AS sales#51]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))#50]
+Results [3]: [c_last_name#41, c_first_name#40, sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))#50 AS sales#51]
 
 (63) Scan parquet default.web_sales
 Output [5]: [ws_item_sk#52, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55, ws_sold_date_sk#56]
@@ -417,13 +417,13 @@ Output [3]: [c_customer_sk#28, sum#58, isEmpty#59]
 (72) HashAggregate [codegen id : 27]
 Input [3]: [c_customer_sk#28, sum#58, isEmpty#59]
 Keys [1]: [c_customer_sk#28]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 AS ssales#36]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36]
 
 (73) Filter [codegen id : 27]
 Input [2]: [c_customer_sk#28, ssales#36]
-Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
+Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8))))
 
 (74) Project [codegen id : 27]
 Output [1]: [c_customer_sk#28]
@@ -465,7 +465,7 @@ Input [6]: [ws_quantity#54, ws_list_price#55, ws_sold_date_sk#56, c_first_name#6
 (83) HashAggregate [codegen id : 35]
 Input [4]: [ws_quantity#54, ws_list_price#55, c_first_name#61, c_last_name#62]
 Keys [2]: [c_last_name#62, c_first_name#61]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#54 as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#54 as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#64, isEmpty#65]
 Results [4]: [c_last_name#62, c_first_name#61, sum#66, isEmpty#67]
 
@@ -476,9 +476,9 @@ Arguments: hashpartitioning(c_last_name#62, c_first_name#61, 5), ENSURE_REQUIREM
 (85) HashAggregate [codegen id : 36]
 Input [4]: [c_last_name#62, c_first_name#61, sum#66, isEmpty#67]
 Keys [2]: [c_last_name#62, c_first_name#61]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#54 as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#54 as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))#69]
-Results [3]: [c_last_name#62, c_first_name#61, sum(CheckOverflow((promote_precision(cast(ws_quantity#54 as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sales#70]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#54 as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#54 as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2)))#69]
+Results [3]: [c_last_name#62, c_first_name#61, sum(CheckOverflow((promote_precision(cast(ws_quantity#54 as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2)))#69 AS sales#70]
 
 (86) Union
 
@@ -608,7 +608,7 @@ Input [5]: [ss_quantity#77, ss_sales_price#78, ss_sold_date_sk#79, c_customer_sk
 (107) HashAggregate [codegen id : 3]
 Input [3]: [ss_quantity#77, ss_sales_price#78, c_customer_sk#81]
 Keys [1]: [c_customer_sk#81]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#77 as decimal(12,2))) * promote_precision(cast(ss_sales_price#78 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#77 as decimal(12,2))) * promote_precision(cast(ss_sales_price#78 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#83, isEmpty#84]
 Results [3]: [c_customer_sk#81, sum#85, isEmpty#86]
 
@@ -619,9 +619,9 @@ Arguments: hashpartitioning(c_customer_sk#81, 5), ENSURE_REQUIREMENTS, [id=#87]
 (109) HashAggregate [codegen id : 4]
 Input [3]: [c_customer_sk#81, sum#85, isEmpty#86]
 Keys [1]: [c_customer_sk#81]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#77 as decimal(12,2))) * promote_precision(cast(ss_sales_price#78 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#77 as decimal(12,2))) * promote_precision(cast(ss_sales_price#78 as decimal(12,2)))), DecimalType(18,2), true))#88]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#77 as decimal(12,2))) * promote_precision(cast(ss_sales_price#78 as decimal(12,2)))), DecimalType(18,2), true))#88 AS csales#89]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#77 as decimal(12,2))) * promote_precision(cast(ss_sales_price#78 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#77 as decimal(12,2))) * promote_precision(cast(ss_sales_price#78 as decimal(12,2)))), DecimalType(18,2)))#88]
+Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#77 as decimal(12,2))) * promote_precision(cast(ss_sales_price#78 as decimal(12,2)))), DecimalType(18,2)))#88 AS csales#89]
 
 (110) HashAggregate [codegen id : 4]
 Input [1]: [csales#89]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt
index 84ab178f95260..19f5b95dce994 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt
@@ -1,7 +1,7 @@
 TakeOrderedAndProject [c_last_name,c_first_name,sales]
   Union
     WholeStageCodegen (18)
-      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sales,sum,isEmpty]
+      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sales,sum,isEmpty]
         InputAdapter
           Exchange [c_last_name,c_first_name] #1
             WholeStageCodegen (17)
@@ -78,7 +78,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                             Exchange #10
                                               WholeStageCodegen (4)
                                                 HashAggregate [csales] [max,max]
-                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
+                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),csales,sum,isEmpty]
                                                     InputAdapter
                                                       Exchange [c_customer_sk] #11
                                                         WholeStageCodegen (3)
@@ -103,7 +103,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                       ReusedExchange [c_customer_sk] #9
                                                                 InputAdapter
                                                                   ReusedExchange [d_date_sk] #12
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
                                       InputAdapter
                                         Exchange [c_customer_sk] #8
                                           WholeStageCodegen (8)
@@ -142,13 +142,13 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                       Project [c_customer_sk]
                                         Filter [ssales]
                                           ReusedSubquery [tpcds_cmax] #3
-                                          HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                          HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
                                             InputAdapter
                                               ReusedExchange [c_customer_sk,sum,isEmpty] #8
                     InputAdapter
                       ReusedExchange [d_date_sk] #3
     WholeStageCodegen (36)
-      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sales,sum,isEmpty]
+      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sales,sum,isEmpty]
         InputAdapter
           Exchange [c_last_name,c_first_name] #15
             WholeStageCodegen (35)
@@ -179,7 +179,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                 Project [c_customer_sk]
                                   Filter [ssales]
                                     ReusedSubquery [tpcds_cmax] #3
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
                                       InputAdapter
                                         ReusedExchange [c_customer_sk,sum,isEmpty] #8
                         InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt
index 2ecb115faf87d..7b82aed515f39 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt
@@ -536,6 +536,6 @@ Input [2]: [sum#61, count#62]
 Keys: []
 Functions [1]: [avg(netpaid#39)]
 Aggregate Attributes [1]: [avg(netpaid#39)#64]
-Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#39)#64)), DecimalType(24,8), true) AS (0.05 * avg(netpaid))#65]
+Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#39)#64)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#65]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt
index 0ad7d96f8f777..d1fa0bd182199 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt
@@ -412,6 +412,6 @@ Input [2]: [sum#54, count#55]
 Keys: []
 Functions [1]: [avg(netpaid#38)]
 Aggregate Attributes [1]: [avg(netpaid#38)#57]
-Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#38)#57)), DecimalType(24,8), true) AS (0.05 * avg(netpaid))#58]
+Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#38)#57)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#58]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt
index 9e4e27f2c6726..fa921b7f2b622 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt
@@ -536,6 +536,6 @@ Input [2]: [sum#61, count#62]
 Keys: []
 Functions [1]: [avg(netpaid#39)]
 Aggregate Attributes [1]: [avg(netpaid#39)#64]
-Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#39)#64)), DecimalType(24,8), true) AS (0.05 * avg(netpaid))#65]
+Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#39)#64)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#65]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt
index 78371d380114e..e1a6c33699efd 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt
@@ -412,6 +412,6 @@ Input [2]: [sum#54, count#55]
 Keys: []
 Functions [1]: [avg(netpaid#38)]
 Aggregate Attributes [1]: [avg(netpaid#38)#57]
-Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#38)#57)), DecimalType(24,8), true) AS (0.05 * avg(netpaid))#58]
+Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#38)#57)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#58]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/explain.txt
index 35b9877c4fd09..b2d52de3cae98 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/explain.txt
@@ -287,7 +287,7 @@ Input [3]: [ctr_state#34, sum#42, count#43]
 Keys [1]: [ctr_state#34]
 Functions [1]: [avg(ctr_total_return#35)]
 Aggregate Attributes [1]: [avg(ctr_total_return#35)#45]
-Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#35)#45) * 1.200000), DecimalType(24,7), true) AS (avg(ctr_total_return) * 1.2)#46, ctr_state#34 AS ctr_state#34#47]
+Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#35)#45) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#46, ctr_state#34 AS ctr_state#34#47]
 
 (51) Filter [codegen id : 16]
 Input [2]: [(avg(ctr_total_return) * 1.2)#46, ctr_state#34#47]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt
index fdf276c01e19a..333930275bbd1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt
@@ -199,7 +199,7 @@ Input [3]: [ctr_state#15, sum#22, count#23]
 Keys [1]: [ctr_state#15]
 Functions [1]: [avg(ctr_total_return#16)]
 Aggregate Attributes [1]: [avg(ctr_total_return#16)#25]
-Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#16)#25) * 1.200000), DecimalType(24,7), true) AS (avg(ctr_total_return) * 1.2)#26, ctr_state#15 AS ctr_state#15#27]
+Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#16)#25) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#26, ctr_state#15 AS ctr_state#15#27]
 
 (32) Filter [codegen id : 8]
 Input [2]: [(avg(ctr_total_return) * 1.2)#26, ctr_state#15#27]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt
index 807506df80411..c1bff1a691dc7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt
@@ -599,10 +599,10 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=
 (107) BroadcastHashJoin [codegen id : 42]
 Left keys [1]: [ca_county#41]
 Right keys [1]: [ca_county#55]
-Join condition: ((CASE WHEN (web_sales#60 > 0.00) THEN CheckOverflow((promote_precision(web_sales#73) / promote_precision(web_sales#60)), DecimalType(37,20), true) END > CASE WHEN (store_sales#45 > 0.00) THEN CheckOverflow((promote_precision(store_sales#16) / promote_precision(store_sales#45)), DecimalType(37,20), true) END) AND (CASE WHEN (web_sales#73 > 0.00) THEN CheckOverflow((promote_precision(web_sales#87) / promote_precision(web_sales#73)), DecimalType(37,20), true) END > CASE WHEN (store_sales#16 > 0.00) THEN CheckOverflow((promote_precision(store_sales#30) / promote_precision(store_sales#16)), DecimalType(37,20), true) END))
+Join condition: ((CASE WHEN (web_sales#60 > 0.00) THEN CheckOverflow((promote_precision(web_sales#73) / promote_precision(web_sales#60)), DecimalType(37,20)) END > CASE WHEN (store_sales#45 > 0.00) THEN CheckOverflow((promote_precision(store_sales#16) / promote_precision(store_sales#45)), DecimalType(37,20)) END) AND (CASE WHEN (web_sales#73 > 0.00) THEN CheckOverflow((promote_precision(web_sales#87) / promote_precision(web_sales#73)), DecimalType(37,20)) END > CASE WHEN (store_sales#16 > 0.00) THEN CheckOverflow((promote_precision(store_sales#30) / promote_precision(store_sales#16)), DecimalType(37,20)) END))
 
 (108) Project [codegen id : 42]
-Output [6]: [ca_county#41, d_year#37, CheckOverflow((promote_precision(web_sales#73) / promote_precision(web_sales#60)), DecimalType(37,20), true) AS web_q1_q2_increase#90, CheckOverflow((promote_precision(store_sales#16) / promote_precision(store_sales#45)), DecimalType(37,20), true) AS store_q1_q2_increase#91, CheckOverflow((promote_precision(web_sales#87) / promote_precision(web_sales#73)), DecimalType(37,20), true) AS web_q2_q3_increase#92, CheckOverflow((promote_precision(store_sales#30) / promote_precision(store_sales#16)), DecimalType(37,20), true) AS store_q2_q3_increase#93]
+Output [6]: [ca_county#41, d_year#37, CheckOverflow((promote_precision(web_sales#73) / promote_precision(web_sales#60)), DecimalType(37,20)) AS web_q1_q2_increase#90, CheckOverflow((promote_precision(store_sales#16) / promote_precision(store_sales#45)), DecimalType(37,20)) AS store_q1_q2_increase#91, CheckOverflow((promote_precision(web_sales#87) / promote_precision(web_sales#73)), DecimalType(37,20)) AS web_q2_q3_increase#92, CheckOverflow((promote_precision(store_sales#30) / promote_precision(store_sales#16)), DecimalType(37,20)) AS store_q2_q3_increase#93]
 Input [9]: [store_sales#16, store_sales#30, ca_county#41, d_year#37, store_sales#45, ca_county#55, web_sales#60, web_sales#73, web_sales#87]
 
 (109) Exchange
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt
index 124f6d2dacf17..d5c2cc3377a7e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt
@@ -429,7 +429,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=
 (72) BroadcastHashJoin [codegen id : 24]
 Left keys [1]: [ca_county#51]
 Right keys [1]: [ca_county#65]
-Join condition: (CASE WHEN (web_sales#56 > 0.00) THEN CheckOverflow((promote_precision(web_sales#69) / promote_precision(web_sales#56)), DecimalType(37,20), true) END > CASE WHEN (store_sales#15 > 0.00) THEN CheckOverflow((promote_precision(store_sales#28) / promote_precision(store_sales#15)), DecimalType(37,20), true) END)
+Join condition: (CASE WHEN (web_sales#56 > 0.00) THEN CheckOverflow((promote_precision(web_sales#69) / promote_precision(web_sales#56)), DecimalType(37,20)) END > CASE WHEN (store_sales#15 > 0.00) THEN CheckOverflow((promote_precision(store_sales#28) / promote_precision(store_sales#15)), DecimalType(37,20)) END)
 
 (73) Project [codegen id : 24]
 Output [8]: [ca_county#9, d_year#6, store_sales#15, store_sales#28, store_sales#42, ca_county#51, web_sales#56, web_sales#69]
@@ -499,10 +499,10 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=
 (87) BroadcastHashJoin [codegen id : 24]
 Left keys [1]: [ca_county#51]
 Right keys [1]: [ca_county#78]
-Join condition: (CASE WHEN (web_sales#69 > 0.00) THEN CheckOverflow((promote_precision(web_sales#82) / promote_precision(web_sales#69)), DecimalType(37,20), true) END > CASE WHEN (store_sales#28 > 0.00) THEN CheckOverflow((promote_precision(store_sales#42) / promote_precision(store_sales#28)), DecimalType(37,20), true) END)
+Join condition: (CASE WHEN (web_sales#69 > 0.00) THEN CheckOverflow((promote_precision(web_sales#82) / promote_precision(web_sales#69)), DecimalType(37,20)) END > CASE WHEN (store_sales#28 > 0.00) THEN CheckOverflow((promote_precision(store_sales#42) / promote_precision(store_sales#28)), DecimalType(37,20)) END)
 
 (88) Project [codegen id : 24]
-Output [6]: [ca_county#9, d_year#6, CheckOverflow((promote_precision(web_sales#69) / promote_precision(web_sales#56)), DecimalType(37,20), true) AS web_q1_q2_increase#84, CheckOverflow((promote_precision(store_sales#28) / promote_precision(store_sales#15)), DecimalType(37,20), true) AS store_q1_q2_increase#85, CheckOverflow((promote_precision(web_sales#82) / promote_precision(web_sales#69)), DecimalType(37,20), true) AS web_q2_q3_increase#86, CheckOverflow((promote_precision(store_sales#42) / promote_precision(store_sales#28)), DecimalType(37,20), true) AS store_q2_q3_increase#87]
+Output [6]: [ca_county#9, d_year#6, CheckOverflow((promote_precision(web_sales#69) / promote_precision(web_sales#56)), DecimalType(37,20)) AS web_q1_q2_increase#84, CheckOverflow((promote_precision(store_sales#28) / promote_precision(store_sales#15)), DecimalType(37,20)) AS store_q1_q2_increase#85, CheckOverflow((promote_precision(web_sales#82) / promote_precision(web_sales#69)), DecimalType(37,20)) AS web_q2_q3_increase#86, CheckOverflow((promote_precision(store_sales#42) / promote_precision(store_sales#28)), DecimalType(37,20)) AS store_q2_q3_increase#87]
 Input [10]: [ca_county#9, d_year#6, store_sales#15, store_sales#28, store_sales#42, ca_county#51, web_sales#56, web_sales#69, ca_county#78, web_sales#82]
 
 (89) Exchange
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt
index 1ace9e7f294aa..92ba279df59fe 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt
@@ -93,7 +93,7 @@ Input [3]: [cs_item_sk#4, sum#11, count#12]
 Keys [1]: [cs_item_sk#4]
 Functions [1]: [avg(UnscaledValue(cs_ext_discount_amt#5))]
 Aggregate Attributes [1]: [avg(UnscaledValue(cs_ext_discount_amt#5))#14]
-Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(cs_ext_discount_amt#5))#14 / 100.0) as decimal(11,6)))), DecimalType(14,7), true) AS (1.3 * avg(cs_ext_discount_amt))#15, cs_item_sk#4]
+Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(cs_ext_discount_amt#5))#14 / 100.0) as decimal(11,6)))), DecimalType(14,7)) AS (1.3 * avg(cs_ext_discount_amt))#15, cs_item_sk#4]
 
 (15) Filter
 Input [2]: [(1.3 * avg(cs_ext_discount_amt))#15, cs_item_sk#4]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt
index f6c9b9ed7dcef..e221defe867c1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt
@@ -117,7 +117,7 @@ Input [3]: [cs_item_sk#8, sum#14, count#15]
 Keys [1]: [cs_item_sk#8]
 Functions [1]: [avg(UnscaledValue(cs_ext_discount_amt#9))]
 Aggregate Attributes [1]: [avg(UnscaledValue(cs_ext_discount_amt#9))#17]
-Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(cs_ext_discount_amt#9))#17 / 100.0) as decimal(11,6)))), DecimalType(14,7), true) AS (1.3 * avg(cs_ext_discount_amt))#18, cs_item_sk#8]
+Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(cs_ext_discount_amt#9))#17 / 100.0) as decimal(11,6)))), DecimalType(14,7)) AS (1.3 * avg(cs_ext_discount_amt))#18, cs_item_sk#8]
 
 (20) Filter [codegen id : 4]
 Input [2]: [(1.3 * avg(cs_ext_discount_amt))#18, cs_item_sk#8]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/explain.txt
index 6924f13d615bf..81050cfbb4475 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/explain.txt
@@ -134,7 +134,7 @@ Input [5]: [i_category#15, i_class#16, spark_grouping_id#17, sum#20, sum#21]
 Keys [3]: [i_category#15, i_class#16, spark_grouping_id#17]
 Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))]
 Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#23, sum(UnscaledValue(ss_ext_sales_price#3))#24]
-Results [7]: [CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#23,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#24,17,2))), DecimalType(37,20), true) AS gross_margin#25, i_category#15, i_class#16, (cast((shiftright(spark_grouping_id#17, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#17, 0) & 1) as tinyint)) AS lochierarchy#26, (cast((shiftright(spark_grouping_id#17, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#17, 0) & 1) as tinyint)) AS _w1#27, CASE WHEN (cast((shiftright(spark_grouping_id#17, 0) & 1) as tinyint) = 0) THEN i_category#15 END AS _w2#28, CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#23,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#24,17,2))), DecimalType(37,20), true) AS _w3#29]
+Results [7]: [CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#23,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#24,17,2))), DecimalType(37,20)) AS gross_margin#25, i_category#15, i_class#16, (cast((shiftright(spark_grouping_id#17, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#17, 0) & 1) as tinyint)) AS lochierarchy#26, (cast((shiftright(spark_grouping_id#17, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#17, 0) & 1) as tinyint)) AS _w1#27, CASE WHEN (cast((shiftright(spark_grouping_id#17, 0) & 1) as tinyint) = 0) THEN i_category#15 END AS _w2#28, CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#23,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#24,17,2))), DecimalType(37,20)) AS _w3#29]
 
 (24) Exchange
 Input [7]: [gross_margin#25, i_category#15, i_class#16, lochierarchy#26, _w1#27, _w2#28, _w3#29]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt
index a9cad5df37b9b..7ef898a59a2c1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt
@@ -134,7 +134,7 @@ Input [5]: [i_category#15, i_class#16, spark_grouping_id#17, sum#20, sum#21]
 Keys [3]: [i_category#15, i_class#16, spark_grouping_id#17]
 Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))]
 Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#23, sum(UnscaledValue(ss_ext_sales_price#3))#24]
-Results [7]: [CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#23,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#24,17,2))), DecimalType(37,20), true) AS gross_margin#25, i_category#15, i_class#16, (cast((shiftright(spark_grouping_id#17, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#17, 0) & 1) as tinyint)) AS lochierarchy#26, (cast((shiftright(spark_grouping_id#17, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#17, 0) & 1) as tinyint)) AS _w1#27, CASE WHEN (cast((shiftright(spark_grouping_id#17, 0) & 1) as tinyint) = 0) THEN i_category#15 END AS _w2#28, CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#23,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#24,17,2))), DecimalType(37,20), true) AS _w3#29]
+Results [7]: [CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#23,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#24,17,2))), DecimalType(37,20)) AS gross_margin#25, i_category#15, i_class#16, (cast((shiftright(spark_grouping_id#17, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#17, 0) & 1) as tinyint)) AS lochierarchy#26, (cast((shiftright(spark_grouping_id#17, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#17, 0) & 1) as tinyint)) AS _w1#27, CASE WHEN (cast((shiftright(spark_grouping_id#17, 0) & 1) as tinyint) = 0) THEN i_category#15 END AS _w2#28, CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#23,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#24,17,2))), DecimalType(37,20)) AS _w3#29]
 
 (24) Exchange
 Input [7]: [gross_margin#25, i_category#15, i_class#16, lochierarchy#26, _w1#27, _w2#28, _w3#29]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/explain.txt
index 40deb5feb0b4b..7ebe44763c25a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/explain.txt
@@ -188,7 +188,7 @@ Input [14]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_sales_price#3, ss_e
 (16) HashAggregate [codegen id : 6]
 Input [12]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, ss_ext_discount_amt#2, ss_ext_sales_price#3, ss_ext_wholesale_cost#4, ss_ext_list_price#5, d_year#9]
 Keys [8]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#9]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#5 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#4 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#3 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#5 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#4 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#3 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
 Aggregate Attributes [2]: [sum#20, isEmpty#21]
 Results [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#9, sum#22, isEmpty#23]
 
@@ -199,9 +199,9 @@ Arguments: hashpartitioning(c_customer_id#12, c_first_name#13, c_last_name#14, c
 (18) HashAggregate [codegen id : 7]
 Input [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#9, sum#22, isEmpty#23]
 Keys [8]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#9]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#5 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#4 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#3 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#5 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#4 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#3 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#25]
-Results [2]: [c_customer_id#12 AS customer_id#26, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#5 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#4 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#3 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#25 AS year_total#27]
+Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#5 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#4 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#3 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#5 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#4 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#3 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#25]
+Results [2]: [c_customer_id#12 AS customer_id#26, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#5 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#4 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#3 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#25 AS year_total#27]
 
 (19) Filter [codegen id : 7]
 Input [2]: [customer_id#26, year_total#27]
@@ -269,7 +269,7 @@ Input [14]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_sales_price#31, s
 (34) HashAggregate [codegen id : 14]
 Input [12]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, d_year#37]
 Keys [8]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#37]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#32 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#31 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#32 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#31 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
 Aggregate Attributes [2]: [sum#47, isEmpty#48]
 Results [10]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#37, sum#49, isEmpty#50]
 
@@ -280,9 +280,9 @@ Arguments: hashpartitioning(c_customer_id#40, c_first_name#41, c_last_name#42, c
 (36) HashAggregate [codegen id : 15]
 Input [10]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#37, sum#49, isEmpty#50]
 Keys [8]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#37]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#32 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#31 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#32 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#31 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#25]
-Results [8]: [c_customer_id#40 AS customer_id#52, c_first_name#41 AS customer_first_name#53, c_last_name#42 AS customer_last_name#54, c_preferred_cust_flag#43 AS customer_preferred_cust_flag#55, c_birth_country#44 AS customer_birth_country#56, c_login#45 AS customer_login#57, c_email_address#46 AS customer_email_address#58, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#32 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#31 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#25 AS year_total#59]
+Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#32 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#31 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#32 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#31 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#25]
+Results [8]: [c_customer_id#40 AS customer_id#52, c_first_name#41 AS customer_first_name#53, c_last_name#42 AS customer_last_name#54, c_preferred_cust_flag#43 AS customer_preferred_cust_flag#55, c_birth_country#44 AS customer_birth_country#56, c_login#45 AS customer_login#57, c_email_address#46 AS customer_email_address#58, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#32 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#31 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#25 AS year_total#59]
 
 (37) Exchange
 Input [8]: [customer_id#52, customer_first_name#53, customer_last_name#54, customer_preferred_cust_flag#55, customer_birth_country#56, customer_login#57, customer_email_address#58, year_total#59]
@@ -351,7 +351,7 @@ Input [14]: [cs_bill_customer_sk#61, cs_ext_discount_amt#62, cs_ext_sales_price#
 (52) HashAggregate [codegen id : 23]
 Input [12]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, cs_ext_discount_amt#62, cs_ext_sales_price#63, cs_ext_wholesale_cost#64, cs_ext_list_price#65, d_year#68]
 Keys [8]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#68]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#65 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#64 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#62 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#63 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#65 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#64 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#62 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#63 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
 Aggregate Attributes [2]: [sum#78, isEmpty#79]
 Results [10]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#68, sum#80, isEmpty#81]
 
@@ -362,9 +362,9 @@ Arguments: hashpartitioning(c_customer_id#71, c_first_name#72, c_last_name#73, c
 (54) HashAggregate [codegen id : 24]
 Input [10]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#68, sum#80, isEmpty#81]
 Keys [8]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#68]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#65 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#64 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#62 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#63 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#65 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#64 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#62 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#63 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#83]
-Results [2]: [c_customer_id#71 AS customer_id#84, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#65 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#64 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#62 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#63 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#83 AS year_total#85]
+Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#65 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#64 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#62 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#63 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#65 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#64 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#62 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#63 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#83]
+Results [2]: [c_customer_id#71 AS customer_id#84, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#65 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#64 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#62 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#63 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#83 AS year_total#85]
 
 (55) Filter [codegen id : 24]
 Input [2]: [customer_id#84, year_total#85]
@@ -441,7 +441,7 @@ Input [14]: [cs_bill_customer_sk#87, cs_ext_discount_amt#88, cs_ext_sales_price#
 (72) HashAggregate [codegen id : 32]
 Input [12]: [c_customer_id#97, c_first_name#98, c_last_name#99, c_preferred_cust_flag#100, c_birth_country#101, c_login#102, c_email_address#103, cs_ext_discount_amt#88, cs_ext_sales_price#89, cs_ext_wholesale_cost#90, cs_ext_list_price#91, d_year#94]
 Keys [8]: [c_customer_id#97, c_first_name#98, c_last_name#99, c_preferred_cust_flag#100, c_birth_country#101, c_login#102, c_email_address#103, d_year#94]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#91 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#90 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#88 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#89 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#91 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#90 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#88 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#89 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
 Aggregate Attributes [2]: [sum#104, isEmpty#105]
 Results [10]: [c_customer_id#97, c_first_name#98, c_last_name#99, c_preferred_cust_flag#100, c_birth_country#101, c_login#102, c_email_address#103, d_year#94, sum#106, isEmpty#107]
 
@@ -452,9 +452,9 @@ Arguments: hashpartitioning(c_customer_id#97, c_first_name#98, c_last_name#99, c
 (74) HashAggregate [codegen id : 33]
 Input [10]: [c_customer_id#97, c_first_name#98, c_last_name#99, c_preferred_cust_flag#100, c_birth_country#101, c_login#102, c_email_address#103, d_year#94, sum#106, isEmpty#107]
 Keys [8]: [c_customer_id#97, c_first_name#98, c_last_name#99, c_preferred_cust_flag#100, c_birth_country#101, c_login#102, c_email_address#103, d_year#94]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#91 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#90 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#88 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#89 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#91 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#90 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#88 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#89 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#83]
-Results [2]: [c_customer_id#97 AS customer_id#109, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#91 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#90 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#88 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#89 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#83 AS year_total#110]
+Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#91 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#90 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#88 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#89 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#91 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#90 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#88 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#89 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#83]
+Results [2]: [c_customer_id#97 AS customer_id#109, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#91 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#90 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#88 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#89 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#83 AS year_total#110]
 
 (75) Exchange
 Input [2]: [customer_id#109, year_total#110]
@@ -467,7 +467,7 @@ Arguments: [customer_id#109 ASC NULLS FIRST], false, 0
 (77) SortMergeJoin [codegen id : 35]
 Left keys [1]: [customer_id#26]
 Right keys [1]: [customer_id#109]
-Join condition: (CASE WHEN (year_total#85 > 0.000000) THEN CheckOverflow((promote_precision(year_total#110) / promote_precision(year_total#85)), DecimalType(38,14), true) END > CASE WHEN (year_total#27 > 0.000000) THEN CheckOverflow((promote_precision(year_total#59) / promote_precision(year_total#27)), DecimalType(38,14), true) END)
+Join condition: (CASE WHEN (year_total#85 > 0.000000) THEN CheckOverflow((promote_precision(year_total#110) / promote_precision(year_total#85)), DecimalType(38,14)) END > CASE WHEN (year_total#27 > 0.000000) THEN CheckOverflow((promote_precision(year_total#59) / promote_precision(year_total#27)), DecimalType(38,14)) END)
 
 (78) Project [codegen id : 35]
 Output [10]: [customer_id#26, customer_id#52, customer_first_name#53, customer_last_name#54, customer_preferred_cust_flag#55, customer_birth_country#56, customer_login#57, customer_email_address#58, year_total#85, year_total#110]
@@ -527,7 +527,7 @@ Input [14]: [ws_bill_customer_sk#112, ws_ext_discount_amt#113, ws_ext_sales_pric
 (91) HashAggregate [codegen id : 41]
 Input [12]: [c_customer_id#122, c_first_name#123, c_last_name#124, c_preferred_cust_flag#125, c_birth_country#126, c_login#127, c_email_address#128, ws_ext_discount_amt#113, ws_ext_sales_price#114, ws_ext_wholesale_cost#115, ws_ext_list_price#116, d_year#119]
 Keys [8]: [c_customer_id#122, c_first_name#123, c_last_name#124, c_preferred_cust_flag#125, c_birth_country#126, c_login#127, c_email_address#128, d_year#119]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#116 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#115 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#113 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#114 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#116 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#115 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#113 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#114 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
 Aggregate Attributes [2]: [sum#129, isEmpty#130]
 Results [10]: [c_customer_id#122, c_first_name#123, c_last_name#124, c_preferred_cust_flag#125, c_birth_country#126, c_login#127, c_email_address#128, d_year#119, sum#131, isEmpty#132]
 
@@ -538,9 +538,9 @@ Arguments: hashpartitioning(c_customer_id#122, c_first_name#123, c_last_name#124
 (93) HashAggregate [codegen id : 42]
 Input [10]: [c_customer_id#122, c_first_name#123, c_last_name#124, c_preferred_cust_flag#125, c_birth_country#126, c_login#127, c_email_address#128, d_year#119, sum#131, isEmpty#132]
 Keys [8]: [c_customer_id#122, c_first_name#123, c_last_name#124, c_preferred_cust_flag#125, c_birth_country#126, c_login#127, c_email_address#128, d_year#119]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#116 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#115 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#113 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#114 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#116 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#115 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#113 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#114 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#134]
-Results [2]: [c_customer_id#122 AS customer_id#135, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#116 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#115 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#113 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#114 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#134 AS year_total#136]
+Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#116 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#115 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#113 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#114 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#116 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#115 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#113 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#114 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#134]
+Results [2]: [c_customer_id#122 AS customer_id#135, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#116 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#115 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#113 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#114 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#134 AS year_total#136]
 
 (94) Filter [codegen id : 42]
 Input [2]: [customer_id#135, year_total#136]
@@ -617,7 +617,7 @@ Input [14]: [ws_bill_customer_sk#138, ws_ext_discount_amt#139, ws_ext_sales_pric
 (111) HashAggregate [codegen id : 50]
 Input [12]: [c_customer_id#148, c_first_name#149, c_last_name#150, c_preferred_cust_flag#151, c_birth_country#152, c_login#153, c_email_address#154, ws_ext_discount_amt#139, ws_ext_sales_price#140, ws_ext_wholesale_cost#141, ws_ext_list_price#142, d_year#145]
 Keys [8]: [c_customer_id#148, c_first_name#149, c_last_name#150, c_preferred_cust_flag#151, c_birth_country#152, c_login#153, c_email_address#154, d_year#145]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#142 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#141 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#139 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#140 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#142 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#141 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#139 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#140 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
 Aggregate Attributes [2]: [sum#155, isEmpty#156]
 Results [10]: [c_customer_id#148, c_first_name#149, c_last_name#150, c_preferred_cust_flag#151, c_birth_country#152, c_login#153, c_email_address#154, d_year#145, sum#157, isEmpty#158]
 
@@ -628,9 +628,9 @@ Arguments: hashpartitioning(c_customer_id#148, c_first_name#149, c_last_name#150
 (113) HashAggregate [codegen id : 51]
 Input [10]: [c_customer_id#148, c_first_name#149, c_last_name#150, c_preferred_cust_flag#151, c_birth_country#152, c_login#153, c_email_address#154, d_year#145, sum#157, isEmpty#158]
 Keys [8]: [c_customer_id#148, c_first_name#149, c_last_name#150, c_preferred_cust_flag#151, c_birth_country#152, c_login#153, c_email_address#154, d_year#145]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#142 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#141 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#139 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#140 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#142 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#141 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#139 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#140 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#134]
-Results [2]: [c_customer_id#148 AS customer_id#160, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#142 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#141 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#139 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#140 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#134 AS year_total#161]
+Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#142 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#141 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#139 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#140 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#142 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#141 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#139 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#140 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#134]
+Results [2]: [c_customer_id#148 AS customer_id#160, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#142 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#141 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#139 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#140 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#134 AS year_total#161]
 
 (114) Exchange
 Input [2]: [customer_id#160, year_total#161]
@@ -643,7 +643,7 @@ Arguments: [customer_id#160 ASC NULLS FIRST], false, 0
 (116) SortMergeJoin [codegen id : 53]
 Left keys [1]: [customer_id#26]
 Right keys [1]: [customer_id#160]
-Join condition: (CASE WHEN (year_total#85 > 0.000000) THEN CheckOverflow((promote_precision(year_total#110) / promote_precision(year_total#85)), DecimalType(38,14), true) END > CASE WHEN (year_total#136 > 0.000000) THEN CheckOverflow((promote_precision(year_total#161) / promote_precision(year_total#136)), DecimalType(38,14), true) END)
+Join condition: (CASE WHEN (year_total#85 > 0.000000) THEN CheckOverflow((promote_precision(year_total#110) / promote_precision(year_total#85)), DecimalType(38,14)) END > CASE WHEN (year_total#136 > 0.000000) THEN CheckOverflow((promote_precision(year_total#161) / promote_precision(year_total#136)), DecimalType(38,14)) END)
 
 (117) Project [codegen id : 53]
 Output [7]: [customer_id#52, customer_first_name#53, customer_last_name#54, customer_preferred_cust_flag#55, customer_birth_country#56, customer_login#57, customer_email_address#58]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/simplified.txt
index cb2e3432e4ab2..e8e55fe575720 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/simplified.txt
@@ -24,7 +24,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                               Exchange [customer_id] #1
                                                 WholeStageCodegen (7)
                                                   Filter [year_total]
-                                                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),customer_id,year_total,sum,isEmpty]
+                                                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
                                                       InputAdapter
                                                         Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #2
                                                           WholeStageCodegen (6)
@@ -68,7 +68,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                             InputAdapter
                                               Exchange [customer_id] #6
                                                 WholeStageCodegen (15)
-                                                  HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,sum,isEmpty]
+                                                  HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,sum,isEmpty]
                                                     InputAdapter
                                                       Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #7
                                                         WholeStageCodegen (14)
@@ -108,7 +108,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                         Exchange [customer_id] #10
                                           WholeStageCodegen (24)
                                             Filter [year_total]
-                                              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),customer_id,year_total,sum,isEmpty]
+                                              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
                                                 InputAdapter
                                                   Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #11
                                                     WholeStageCodegen (23)
@@ -141,7 +141,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                               InputAdapter
                                 Exchange [customer_id] #13
                                   WholeStageCodegen (33)
-                                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),customer_id,year_total,sum,isEmpty]
+                                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
                                       InputAdapter
                                         Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #14
                                           WholeStageCodegen (32)
@@ -175,7 +175,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                         Exchange [customer_id] #16
                           WholeStageCodegen (42)
                             Filter [year_total]
-                              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),customer_id,year_total,sum,isEmpty]
+                              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
                                 InputAdapter
                                   Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #17
                                     WholeStageCodegen (41)
@@ -208,7 +208,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
               InputAdapter
                 Exchange [customer_id] #19
                   WholeStageCodegen (51)
-                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),customer_id,year_total,sum,isEmpty]
+                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
                       InputAdapter
                         Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #20
                           WholeStageCodegen (50)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt
index 9dbbacae2047e..b0af6fb5e1627 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt
@@ -166,7 +166,7 @@ Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_fl
 (13) HashAggregate [codegen id : 3]
 Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#18]
 Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#18]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#13 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#12 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#11 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#13 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#12 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#11 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
 Aggregate Attributes [2]: [sum#19, isEmpty#20]
 Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#18, sum#21, isEmpty#22]
 
@@ -177,9 +177,9 @@ Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_pr
 (15) HashAggregate [codegen id : 24]
 Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#18, sum#21, isEmpty#22]
 Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#18]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#13 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#12 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#11 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#13 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#12 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#11 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#24]
-Results [2]: [c_customer_id#2 AS customer_id#25, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#13 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#12 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#11 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#24 AS year_total#26]
+Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#13 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#12 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#11 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#13 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#12 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#11 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#24]
+Results [2]: [c_customer_id#2 AS customer_id#25, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#13 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#12 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#11 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#24 AS year_total#26]
 
 (16) Filter [codegen id : 24]
 Input [2]: [customer_id#25, year_total#26]
@@ -242,7 +242,7 @@ Input [14]: [c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust
 (29) HashAggregate [codegen id : 6]
 Input [12]: [c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust_flag#31, c_birth_country#32, c_login#33, c_email_address#34, ss_ext_discount_amt#36, ss_ext_sales_price#37, ss_ext_wholesale_cost#38, ss_ext_list_price#39, d_year#44]
 Keys [8]: [c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust_flag#31, c_birth_country#32, c_login#33, c_email_address#34, d_year#44]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#39 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#38 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#36 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#37 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#39 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#38 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#36 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#37 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
 Aggregate Attributes [2]: [sum#45, isEmpty#46]
 Results [10]: [c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust_flag#31, c_birth_country#32, c_login#33, c_email_address#34, d_year#44, sum#47, isEmpty#48]
 
@@ -253,9 +253,9 @@ Arguments: hashpartitioning(c_customer_id#28, c_first_name#29, c_last_name#30, c
 (31) HashAggregate [codegen id : 7]
 Input [10]: [c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust_flag#31, c_birth_country#32, c_login#33, c_email_address#34, d_year#44, sum#47, isEmpty#48]
 Keys [8]: [c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust_flag#31, c_birth_country#32, c_login#33, c_email_address#34, d_year#44]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#39 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#38 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#36 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#37 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#39 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#38 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#36 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#37 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#24]
-Results [8]: [c_customer_id#28 AS customer_id#50, c_first_name#29 AS customer_first_name#51, c_last_name#30 AS customer_last_name#52, c_preferred_cust_flag#31 AS customer_preferred_cust_flag#53, c_birth_country#32 AS customer_birth_country#54, c_login#33 AS customer_login#55, c_email_address#34 AS customer_email_address#56, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#39 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#38 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#36 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#37 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#24 AS year_total#57]
+Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#39 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#38 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#36 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#37 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#39 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#38 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#36 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#37 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#24]
+Results [8]: [c_customer_id#28 AS customer_id#50, c_first_name#29 AS customer_first_name#51, c_last_name#30 AS customer_last_name#52, c_preferred_cust_flag#31 AS customer_preferred_cust_flag#53, c_birth_country#32 AS customer_birth_country#54, c_login#33 AS customer_login#55, c_email_address#34 AS customer_email_address#56, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#39 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#38 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#36 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#37 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#24 AS year_total#57]
 
 (32) BroadcastExchange
 Input [8]: [customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#57]
@@ -323,7 +323,7 @@ Input [14]: [c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust
 (46) HashAggregate [codegen id : 10]
 Input [12]: [c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust_flag#63, c_birth_country#64, c_login#65, c_email_address#66, cs_ext_discount_amt#68, cs_ext_sales_price#69, cs_ext_wholesale_cost#70, cs_ext_list_price#71, d_year#75]
 Keys [8]: [c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust_flag#63, c_birth_country#64, c_login#65, c_email_address#66, d_year#75]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#71 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#70 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#68 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#69 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#71 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#70 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#68 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#69 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
 Aggregate Attributes [2]: [sum#76, isEmpty#77]
 Results [10]: [c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust_flag#63, c_birth_country#64, c_login#65, c_email_address#66, d_year#75, sum#78, isEmpty#79]
 
@@ -334,9 +334,9 @@ Arguments: hashpartitioning(c_customer_id#60, c_first_name#61, c_last_name#62, c
 (48) HashAggregate [codegen id : 11]
 Input [10]: [c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust_flag#63, c_birth_country#64, c_login#65, c_email_address#66, d_year#75, sum#78, isEmpty#79]
 Keys [8]: [c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust_flag#63, c_birth_country#64, c_login#65, c_email_address#66, d_year#75]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#71 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#70 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#68 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#69 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#71 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#70 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#68 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#69 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#81]
-Results [2]: [c_customer_id#60 AS customer_id#82, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#71 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#70 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#68 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#69 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#81 AS year_total#83]
+Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#71 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#70 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#68 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#69 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#71 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#70 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#68 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#69 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#81]
+Results [2]: [c_customer_id#60 AS customer_id#82, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#71 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#70 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#68 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#69 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#81 AS year_total#83]
 
 (49) Filter [codegen id : 11]
 Input [2]: [customer_id#82, year_total#83]
@@ -412,7 +412,7 @@ Input [14]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust
 (65) HashAggregate [codegen id : 14]
 Input [12]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, cs_ext_discount_amt#94, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, d_year#101]
 Keys [8]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#101]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#94 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#94 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
 Aggregate Attributes [2]: [sum#102, isEmpty#103]
 Results [10]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#101, sum#104, isEmpty#105]
 
@@ -423,9 +423,9 @@ Arguments: hashpartitioning(c_customer_id#86, c_first_name#87, c_last_name#88, c
 (67) HashAggregate [codegen id : 15]
 Input [10]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#101, sum#104, isEmpty#105]
 Keys [8]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#101]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#94 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#94 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#81]
-Results [2]: [c_customer_id#86 AS customer_id#107, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#94 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#81 AS year_total#108]
+Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#94 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#94 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#81]
+Results [2]: [c_customer_id#86 AS customer_id#107, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#94 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#81 AS year_total#108]
 
 (68) BroadcastExchange
 Input [2]: [customer_id#107, year_total#108]
@@ -434,7 +434,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=
 (69) BroadcastHashJoin [codegen id : 24]
 Left keys [1]: [customer_id#25]
 Right keys [1]: [customer_id#107]
-Join condition: (CASE WHEN (year_total#83 > 0.000000) THEN CheckOverflow((promote_precision(year_total#108) / promote_precision(year_total#83)), DecimalType(38,14), true) END > CASE WHEN (year_total#26 > 0.000000) THEN CheckOverflow((promote_precision(year_total#57) / promote_precision(year_total#26)), DecimalType(38,14), true) END)
+Join condition: (CASE WHEN (year_total#83 > 0.000000) THEN CheckOverflow((promote_precision(year_total#108) / promote_precision(year_total#83)), DecimalType(38,14)) END > CASE WHEN (year_total#26 > 0.000000) THEN CheckOverflow((promote_precision(year_total#57) / promote_precision(year_total#26)), DecimalType(38,14)) END)
 
 (70) Project [codegen id : 24]
 Output [10]: [customer_id#25, customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#83, year_total#108]
@@ -497,7 +497,7 @@ Input [14]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_c
 (83) HashAggregate [codegen id : 18]
 Input [12]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, d_year#126]
 Keys [8]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
 Aggregate Attributes [2]: [sum#127, isEmpty#128]
 Results [10]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126, sum#129, isEmpty#130]
 
@@ -508,9 +508,9 @@ Arguments: hashpartitioning(c_customer_id#111, c_first_name#112, c_last_name#113
 (85) HashAggregate [codegen id : 19]
 Input [10]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126, sum#129, isEmpty#130]
 Keys [8]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#132]
-Results [2]: [c_customer_id#111 AS customer_id#133, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#132 AS year_total#134]
+Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#132]
+Results [2]: [c_customer_id#111 AS customer_id#133, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#132 AS year_total#134]
 
 (86) Filter [codegen id : 19]
 Input [2]: [customer_id#133, year_total#134]
@@ -586,7 +586,7 @@ Input [14]: [c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_c
 (102) HashAggregate [codegen id : 22]
 Input [12]: [c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_cust_flag#140, c_birth_country#141, c_login#142, c_email_address#143, ws_ext_discount_amt#145, ws_ext_sales_price#146, ws_ext_wholesale_cost#147, ws_ext_list_price#148, d_year#152]
 Keys [8]: [c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_cust_flag#140, c_birth_country#141, c_login#142, c_email_address#143, d_year#152]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#148 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#147 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#145 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#146 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#148 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#147 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#145 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#146 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
 Aggregate Attributes [2]: [sum#153, isEmpty#154]
 Results [10]: [c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_cust_flag#140, c_birth_country#141, c_login#142, c_email_address#143, d_year#152, sum#155, isEmpty#156]
 
@@ -597,9 +597,9 @@ Arguments: hashpartitioning(c_customer_id#137, c_first_name#138, c_last_name#139
 (104) HashAggregate [codegen id : 23]
 Input [10]: [c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_cust_flag#140, c_birth_country#141, c_login#142, c_email_address#143, d_year#152, sum#155, isEmpty#156]
 Keys [8]: [c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_cust_flag#140, c_birth_country#141, c_login#142, c_email_address#143, d_year#152]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#148 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#147 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#145 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#146 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#148 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#147 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#145 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#146 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#132]
-Results [2]: [c_customer_id#137 AS customer_id#158, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#148 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#147 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#145 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#146 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#132 AS year_total#159]
+Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#148 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#147 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#145 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#146 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#148 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#147 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#145 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#146 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#132]
+Results [2]: [c_customer_id#137 AS customer_id#158, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#148 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#147 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#145 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#146 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#132 AS year_total#159]
 
 (105) BroadcastExchange
 Input [2]: [customer_id#158, year_total#159]
@@ -608,7 +608,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=
 (106) BroadcastHashJoin [codegen id : 24]
 Left keys [1]: [customer_id#25]
 Right keys [1]: [customer_id#158]
-Join condition: (CASE WHEN (year_total#83 > 0.000000) THEN CheckOverflow((promote_precision(year_total#108) / promote_precision(year_total#83)), DecimalType(38,14), true) END > CASE WHEN (year_total#134 > 0.000000) THEN CheckOverflow((promote_precision(year_total#159) / promote_precision(year_total#134)), DecimalType(38,14), true) END)
+Join condition: (CASE WHEN (year_total#83 > 0.000000) THEN CheckOverflow((promote_precision(year_total#108) / promote_precision(year_total#83)), DecimalType(38,14)) END > CASE WHEN (year_total#134 > 0.000000) THEN CheckOverflow((promote_precision(year_total#159) / promote_precision(year_total#134)), DecimalType(38,14)) END)
 
 (107) Project [codegen id : 24]
 Output [7]: [customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt
index 68d4f3219238a..67afe29952d88 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt
@@ -10,7 +10,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                   BroadcastHashJoin [customer_id,customer_id]
                     BroadcastHashJoin [customer_id,customer_id]
                       Filter [year_total]
-                        HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),customer_id,year_total,sum,isEmpty]
+                        HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
                           InputAdapter
                             Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #1
                               WholeStageCodegen (3)
@@ -42,7 +42,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                       InputAdapter
                         BroadcastExchange #4
                           WholeStageCodegen (7)
-                            HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,sum,isEmpty]
+                            HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,sum,isEmpty]
                               InputAdapter
                                 Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #5
                                   WholeStageCodegen (6)
@@ -75,7 +75,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                       BroadcastExchange #8
                         WholeStageCodegen (11)
                           Filter [year_total]
-                            HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),customer_id,year_total,sum,isEmpty]
+                            HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
                               InputAdapter
                                 Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #9
                                   WholeStageCodegen (10)
@@ -101,7 +101,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                 InputAdapter
                   BroadcastExchange #11
                     WholeStageCodegen (15)
-                      HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),customer_id,year_total,sum,isEmpty]
+                      HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
                         InputAdapter
                           Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #12
                             WholeStageCodegen (14)
@@ -128,7 +128,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
               BroadcastExchange #14
                 WholeStageCodegen (19)
                   Filter [year_total]
-                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),customer_id,year_total,sum,isEmpty]
+                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
                       InputAdapter
                         Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #15
                           WholeStageCodegen (18)
@@ -154,7 +154,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
         InputAdapter
           BroadcastExchange #17
             WholeStageCodegen (23)
-              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),customer_id,year_total,sum,isEmpty]
+              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
                 InputAdapter
                   Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #18
                     WholeStageCodegen (22)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/explain.txt
index 0da152eaf66a8..32d76db8cdf3a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/explain.txt
@@ -165,7 +165,7 @@ Input [7]: [cs_warehouse_sk#1, cs_sales_price#4, cr_refunded_cash#10, i_item_id#
 (30) HashAggregate [codegen id : 8]
 Input [5]: [cs_sales_price#4, cr_refunded_cash#10, w_state#20, i_item_id#14, d_date#18]
 Keys [2]: [w_state#20, i_item_id#14]
-Functions [2]: [partial_sum(CASE WHEN (d_date#18 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#18 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)]
+Functions [2]: [partial_sum(CASE WHEN (d_date#18 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#18 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]
 Aggregate Attributes [4]: [sum#22, isEmpty#23, sum#24, isEmpty#25]
 Results [6]: [w_state#20, i_item_id#14, sum#26, isEmpty#27, sum#28, isEmpty#29]
 
@@ -176,9 +176,9 @@ Arguments: hashpartitioning(w_state#20, i_item_id#14, 5), ENSURE_REQUIREMENTS, [
 (32) HashAggregate [codegen id : 9]
 Input [6]: [w_state#20, i_item_id#14, sum#26, isEmpty#27, sum#28, isEmpty#29]
 Keys [2]: [w_state#20, i_item_id#14]
-Functions [2]: [sum(CASE WHEN (d_date#18 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END), sum(CASE WHEN (d_date#18 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)]
-Aggregate Attributes [2]: [sum(CASE WHEN (d_date#18 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#31, sum(CASE WHEN (d_date#18 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#32]
-Results [4]: [w_state#20, i_item_id#14, sum(CASE WHEN (d_date#18 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#31 AS sales_before#33, sum(CASE WHEN (d_date#18 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#32 AS sales_after#34]
+Functions [2]: [sum(CASE WHEN (d_date#18 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), sum(CASE WHEN (d_date#18 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]
+Aggregate Attributes [2]: [sum(CASE WHEN (d_date#18 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#31, sum(CASE WHEN (d_date#18 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#32]
+Results [4]: [w_state#20, i_item_id#14, sum(CASE WHEN (d_date#18 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#31 AS sales_before#33, sum(CASE WHEN (d_date#18 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#32 AS sales_after#34]
 
 (33) TakeOrderedAndProject
 Input [4]: [w_state#20, i_item_id#14, sales_before#33, sales_after#34]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/simplified.txt
index 296e9186f9fd9..5854dc101f305 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after]
   WholeStageCodegen (9)
-    HashAggregate [w_state,i_item_id,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_date < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END),sum(CASE WHEN (d_date >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END),sales_before,sales_after,sum,isEmpty,sum,isEmpty]
+    HashAggregate [w_state,i_item_id,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_date < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sales_before,sales_after,sum,isEmpty,sum,isEmpty]
       InputAdapter
         Exchange [w_state,i_item_id] #1
           WholeStageCodegen (8)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt
index 7678a91036fd6..f1a79d04f36bc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt
@@ -165,7 +165,7 @@ Input [7]: [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#10, w_state#14
 (30) HashAggregate [codegen id : 8]
 Input [5]: [cs_sales_price#4, cr_refunded_cash#10, w_state#14, i_item_id#17, d_date#21]
 Keys [2]: [w_state#14, i_item_id#17]
-Functions [2]: [partial_sum(CASE WHEN (d_date#21 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#21 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)]
+Functions [2]: [partial_sum(CASE WHEN (d_date#21 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#21 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]
 Aggregate Attributes [4]: [sum#22, isEmpty#23, sum#24, isEmpty#25]
 Results [6]: [w_state#14, i_item_id#17, sum#26, isEmpty#27, sum#28, isEmpty#29]
 
@@ -176,9 +176,9 @@ Arguments: hashpartitioning(w_state#14, i_item_id#17, 5), ENSURE_REQUIREMENTS, [
 (32) HashAggregate [codegen id : 9]
 Input [6]: [w_state#14, i_item_id#17, sum#26, isEmpty#27, sum#28, isEmpty#29]
 Keys [2]: [w_state#14, i_item_id#17]
-Functions [2]: [sum(CASE WHEN (d_date#21 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END), sum(CASE WHEN (d_date#21 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)]
-Aggregate Attributes [2]: [sum(CASE WHEN (d_date#21 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#31, sum(CASE WHEN (d_date#21 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#32]
-Results [4]: [w_state#14, i_item_id#17, sum(CASE WHEN (d_date#21 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#31 AS sales_before#33, sum(CASE WHEN (d_date#21 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#32 AS sales_after#34]
+Functions [2]: [sum(CASE WHEN (d_date#21 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), sum(CASE WHEN (d_date#21 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]
+Aggregate Attributes [2]: [sum(CASE WHEN (d_date#21 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#31, sum(CASE WHEN (d_date#21 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#32]
+Results [4]: [w_state#14, i_item_id#17, sum(CASE WHEN (d_date#21 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#31 AS sales_before#33, sum(CASE WHEN (d_date#21 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#32 AS sales_after#34]
 
 (33) TakeOrderedAndProject
 Input [4]: [w_state#14, i_item_id#17, sales_before#33, sales_after#34]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt
index c691a23f64bf9..206317e8a5210 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after]
   WholeStageCodegen (9)
-    HashAggregate [w_state,i_item_id,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_date < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END),sum(CASE WHEN (d_date >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END),sales_before,sales_after,sum,isEmpty,sum,isEmpty]
+    HashAggregate [w_state,i_item_id,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_date < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sales_before,sales_after,sum,isEmpty,sum,isEmpty]
       InputAdapter
         Exchange [w_state,i_item_id] #1
           WholeStageCodegen (8)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/explain.txt
index 8fa5abffaa52f..0d7aa6dbdfbb8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/explain.txt
@@ -70,7 +70,7 @@ Results [2]: [ss_item_sk#1 AS item_sk#11, cast((avg(UnscaledValue(ss_net_profit#
 
 (8) Filter [codegen id : 2]
 Input [2]: [item_sk#11, rank_col#12]
-Condition : (isnotnull(rank_col#12) AND (cast(rank_col#12 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery scalar-subquery#13, [id=#14])), DecimalType(13,7), true)))
+Condition : (isnotnull(rank_col#12) AND (cast(rank_col#12 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery scalar-subquery#13, [id=#14])), DecimalType(13,7))))
 
 (9) Exchange
 Input [2]: [item_sk#11, rank_col#12]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt
index b3d0081f5d22e..5783d8b49b6a0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt
@@ -71,7 +71,7 @@ Results [2]: [ss_item_sk#1 AS item_sk#11, cast((avg(UnscaledValue(ss_net_profit#
 
 (8) Filter [codegen id : 2]
 Input [2]: [item_sk#11, rank_col#12]
-Condition : (isnotnull(rank_col#12) AND (cast(rank_col#12 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery scalar-subquery#13, [id=#14])), DecimalType(13,7), true)))
+Condition : (isnotnull(rank_col#12) AND (cast(rank_col#12 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery scalar-subquery#13, [id=#14])), DecimalType(13,7))))
 
 (9) Exchange
 Input [2]: [item_sk#11, rank_col#12]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt
index 529b9c8282db5..23dfbecdbca9d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt
@@ -186,7 +186,7 @@ Arguments: [avg(_w0#23) windowspecdefinition(i_category#16, i_brand#15, s_store_
 
 (30) Filter [codegen id : 11]
 Input [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, _w0#23, rn#25, avg_monthly_sales#26]
-Condition : ((isnotnull(avg_monthly_sales#26) AND (avg_monthly_sales#26 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000))
+Condition : ((isnotnull(avg_monthly_sales#26) AND (avg_monthly_sales#26 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))
 
 (31) Project [codegen id : 11]
 Output [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25]
@@ -277,7 +277,7 @@ Input [16]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_ye
 
 (52) TakeOrderedAndProject
 Input [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, psum#49, nsum#50]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST], [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, psum#49, nsum#50]
+Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST], [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, psum#49, nsum#50]
 
 ===== Subqueries =====
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt
index 4f69eb1367b8b..e7faf392ad879 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt
@@ -167,7 +167,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_category#3, i_brand#2, s_store_na
 
 (27) Filter [codegen id : 22]
 Input [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, _w0#22, rn#24, avg_monthly_sales#25]
-Condition : ((isnotnull(avg_monthly_sales#25) AND (avg_monthly_sales#25 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000))
+Condition : ((isnotnull(avg_monthly_sales#25) AND (avg_monthly_sales#25 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))
 
 (28) Project [codegen id : 22]
 Output [9]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, avg_monthly_sales#25, rn#24]
@@ -242,7 +242,7 @@ Input [16]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year
 
 (45) TakeOrderedAndProject
 Input [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, avg_monthly_sales#25, sum_sales#21, psum#47, nsum#48]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST], [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, avg_monthly_sales#25, sum_sales#21, psum#47, nsum#48]
+Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST], [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, avg_monthly_sales#25, sum_sales#21, psum#47, nsum#48]
 
 ===== Subqueries =====
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/explain.txt
index 889ada3f2bd24..65606c025adc4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/explain.txt
@@ -177,7 +177,7 @@ Input [7]: [ws_item_sk#1, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27
 Keys [1]: [ws_item_sk#1]
 Functions [4]: [sum(coalesce(wr_return_quantity#12, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#13 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#12, 0))#29, sum(coalesce(ws_quantity#3, 0))#30, sum(coalesce(cast(wr_return_amt#13 as decimal(12,2)), 0.00))#31, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#32]
-Results [3]: [ws_item_sk#1 AS item#33, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#12, 0))#29 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#30 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#34, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#13 as decimal(12,2)), 0.00))#31 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#32 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#35]
+Results [3]: [ws_item_sk#1 AS item#33, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#12, 0))#29 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#30 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#34, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#13 as decimal(12,2)), 0.00))#31 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#32 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#35]
 
 (21) Exchange
 Input [3]: [item#33, return_ratio#34, currency_ratio#35]
@@ -297,7 +297,7 @@ Input [7]: [cs_item_sk#40, sum#60, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#6
 Keys [1]: [cs_item_sk#40]
 Functions [4]: [sum(coalesce(cr_return_quantity#50, 0)), sum(coalesce(cs_quantity#42, 0)), sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#43 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#50, 0))#67, sum(coalesce(cs_quantity#42, 0))#68, sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00))#69, sum(coalesce(cast(cs_net_paid#43 as decimal(12,2)), 0.00))#70]
-Results [3]: [cs_item_sk#40 AS item#71, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#50, 0))#67 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#42, 0))#68 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#72, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00))#69 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#43 as decimal(12,2)), 0.00))#70 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#73]
+Results [3]: [cs_item_sk#40 AS item#71, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#50, 0))#67 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#42, 0))#68 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#72, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00))#69 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#43 as decimal(12,2)), 0.00))#70 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#73]
 
 (48) Exchange
 Input [3]: [item#71, return_ratio#72, currency_ratio#73]
@@ -417,7 +417,7 @@ Input [7]: [ss_item_sk#78, sum#98, sum#99, sum#100, isEmpty#101, sum#102, isEmpt
 Keys [1]: [ss_item_sk#78]
 Functions [4]: [sum(coalesce(sr_return_quantity#88, 0)), sum(coalesce(ss_quantity#80, 0)), sum(coalesce(cast(sr_return_amt#89 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#88, 0))#105, sum(coalesce(ss_quantity#80, 0))#106, sum(coalesce(cast(sr_return_amt#89 as decimal(12,2)), 0.00))#107, sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))#108]
-Results [3]: [ss_item_sk#78 AS item#109, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#88, 0))#105 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#80, 0))#106 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#110, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#89 as decimal(12,2)), 0.00))#107 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))#108 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#111]
+Results [3]: [ss_item_sk#78 AS item#109, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#88, 0))#105 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#80, 0))#106 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#110, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#89 as decimal(12,2)), 0.00))#107 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))#108 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#111]
 
 (75) Exchange
 Input [3]: [item#109, return_ratio#110, currency_ratio#111]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt
index 399ab59cd7a71..ac64de5188462 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt
@@ -156,7 +156,7 @@ Input [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26
 Keys [1]: [ws_item_sk#1]
 Functions [4]: [sum(coalesce(wr_return_quantity#11, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#11, 0))#28, sum(coalesce(ws_quantity#3, 0))#29, sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#30, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#31]
-Results [3]: [ws_item_sk#1 AS item#32, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#11, 0))#28 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#29 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#33, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#30 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#31 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#34]
+Results [3]: [ws_item_sk#1 AS item#32, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#11, 0))#28 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#29 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#33, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#30 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#31 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#34]
 
 (18) Exchange
 Input [3]: [item#32, return_ratio#33, currency_ratio#34]
@@ -264,7 +264,7 @@ Input [7]: [cs_item_sk#39, sum#58, sum#59, sum#60, isEmpty#61, sum#62, isEmpty#6
 Keys [1]: [cs_item_sk#39]
 Functions [4]: [sum(coalesce(cr_return_quantity#48, 0)), sum(coalesce(cs_quantity#41, 0)), sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#42 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#48, 0))#65, sum(coalesce(cs_quantity#41, 0))#66, sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00))#67, sum(coalesce(cast(cs_net_paid#42 as decimal(12,2)), 0.00))#68]
-Results [3]: [cs_item_sk#39 AS item#69, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#48, 0))#65 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#41, 0))#66 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#70, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00))#67 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#42 as decimal(12,2)), 0.00))#68 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#71]
+Results [3]: [cs_item_sk#39 AS item#69, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#48, 0))#65 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#41, 0))#66 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#70, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00))#67 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#42 as decimal(12,2)), 0.00))#68 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#71]
 
 (42) Exchange
 Input [3]: [item#69, return_ratio#70, currency_ratio#71]
@@ -372,7 +372,7 @@ Input [7]: [ss_item_sk#76, sum#95, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#1
 Keys [1]: [ss_item_sk#76]
 Functions [4]: [sum(coalesce(sr_return_quantity#85, 0)), sum(coalesce(ss_quantity#78, 0)), sum(coalesce(cast(sr_return_amt#86 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#85, 0))#102, sum(coalesce(ss_quantity#78, 0))#103, sum(coalesce(cast(sr_return_amt#86 as decimal(12,2)), 0.00))#104, sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))#105]
-Results [3]: [ss_item_sk#76 AS item#106, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#85, 0))#102 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#78, 0))#103 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#107, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#86 as decimal(12,2)), 0.00))#104 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))#105 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#108]
+Results [3]: [ss_item_sk#76 AS item#106, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#85, 0))#102 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#78, 0))#103 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#107, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#86 as decimal(12,2)), 0.00))#104 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))#105 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#108]
 
 (66) Exchange
 Input [3]: [item#106, return_ratio#107, currency_ratio#108]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt
index 0690c363a98e7..29a88fbab1b3c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt
@@ -173,7 +173,7 @@ Input [5]: [s_store_id#23, sum#30, sum#31, sum#32, sum#33]
 Keys [1]: [s_store_id#23]
 Functions [4]: [sum(UnscaledValue(sales_price#8)), sum(UnscaledValue(return_amt#10)), sum(UnscaledValue(profit#9)), sum(UnscaledValue(net_loss#11))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#8))#35, sum(UnscaledValue(return_amt#10))#36, sum(UnscaledValue(profit#9))#37, sum(UnscaledValue(net_loss#11))#38]
-Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#8))#35,17,2) AS sales#39, MakeDecimal(sum(UnscaledValue(return_amt#10))#36,17,2) AS returns#40, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#37,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#38,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#41, store channel AS channel#42, concat(store, s_store_id#23) AS id#43]
+Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#8))#35,17,2) AS sales#39, MakeDecimal(sum(UnscaledValue(return_amt#10))#36,17,2) AS returns#40, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#37,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#38,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#41, store channel AS channel#42, concat(store, s_store_id#23) AS id#43]
 
 (22) Scan parquet default.catalog_sales
 Output [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47]
@@ -270,7 +270,7 @@ Input [5]: [cp_catalog_page_id#65, sum#72, sum#73, sum#74, sum#75]
 Keys [1]: [cp_catalog_page_id#65]
 Functions [4]: [sum(UnscaledValue(sales_price#50)), sum(UnscaledValue(return_amt#52)), sum(UnscaledValue(profit#51)), sum(UnscaledValue(net_loss#53))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#50))#77, sum(UnscaledValue(return_amt#52))#78, sum(UnscaledValue(profit#51))#79, sum(UnscaledValue(net_loss#53))#80]
-Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#50))#77,17,2) AS sales#81, MakeDecimal(sum(UnscaledValue(return_amt#52))#78,17,2) AS returns#82, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#51))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#53))#80,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#83, catalog channel AS channel#84, concat(catalog_page, cp_catalog_page_id#65) AS id#85]
+Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#50))#77,17,2) AS sales#81, MakeDecimal(sum(UnscaledValue(return_amt#52))#78,17,2) AS returns#82, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#51))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#53))#80,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#83, catalog channel AS channel#84, concat(catalog_page, cp_catalog_page_id#65) AS id#85]
 
 (43) Scan parquet default.web_sales
 Output [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89]
@@ -401,7 +401,7 @@ Input [5]: [web_site_id#114, sum#121, sum#122, sum#123, sum#124]
 Keys [1]: [web_site_id#114]
 Functions [4]: [sum(UnscaledValue(sales_price#92)), sum(UnscaledValue(return_amt#94)), sum(UnscaledValue(profit#93)), sum(UnscaledValue(net_loss#95))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#92))#126, sum(UnscaledValue(return_amt#94))#127, sum(UnscaledValue(profit#93))#128, sum(UnscaledValue(net_loss#95))#129]
-Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#92))#126,17,2) AS sales#130, MakeDecimal(sum(UnscaledValue(return_amt#94))#127,17,2) AS returns#131, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#128,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#129,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#132, web channel AS channel#133, concat(web_site, web_site_id#114) AS id#134]
+Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#92))#126,17,2) AS sales#130, MakeDecimal(sum(UnscaledValue(return_amt#94))#127,17,2) AS returns#131, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#128,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#129,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#132, web channel AS channel#133, concat(web_site, web_site_id#114) AS id#134]
 
 (72) Union
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt
index 693a853440d32..a9e5929f70b54 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt
@@ -170,7 +170,7 @@ Input [5]: [s_store_id#24, sum#30, sum#31, sum#32, sum#33]
 Keys [1]: [s_store_id#24]
 Functions [4]: [sum(UnscaledValue(sales_price#8)), sum(UnscaledValue(return_amt#10)), sum(UnscaledValue(profit#9)), sum(UnscaledValue(net_loss#11))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#8))#35, sum(UnscaledValue(return_amt#10))#36, sum(UnscaledValue(profit#9))#37, sum(UnscaledValue(net_loss#11))#38]
-Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#8))#35,17,2) AS sales#39, MakeDecimal(sum(UnscaledValue(return_amt#10))#36,17,2) AS returns#40, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#37,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#38,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#41, store channel AS channel#42, concat(store, s_store_id#24) AS id#43]
+Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#8))#35,17,2) AS sales#39, MakeDecimal(sum(UnscaledValue(return_amt#10))#36,17,2) AS returns#40, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#37,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#38,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#41, store channel AS channel#42, concat(store, s_store_id#24) AS id#43]
 
 (22) Scan parquet default.catalog_sales
 Output [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47]
@@ -267,7 +267,7 @@ Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]
 Keys [1]: [cp_catalog_page_id#66]
 Functions [4]: [sum(UnscaledValue(sales_price#50)), sum(UnscaledValue(return_amt#52)), sum(UnscaledValue(profit#51)), sum(UnscaledValue(net_loss#53))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#50))#77, sum(UnscaledValue(return_amt#52))#78, sum(UnscaledValue(profit#51))#79, sum(UnscaledValue(net_loss#53))#80]
-Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#50))#77,17,2) AS sales#81, MakeDecimal(sum(UnscaledValue(return_amt#52))#78,17,2) AS returns#82, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#51))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#53))#80,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#83, catalog channel AS channel#84, concat(catalog_page, cp_catalog_page_id#66) AS id#85]
+Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#50))#77,17,2) AS sales#81, MakeDecimal(sum(UnscaledValue(return_amt#52))#78,17,2) AS returns#82, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#51))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#53))#80,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#83, catalog channel AS channel#84, concat(catalog_page, cp_catalog_page_id#66) AS id#85]
 
 (43) Scan parquet default.web_sales
 Output [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89]
@@ -386,7 +386,7 @@ Input [5]: [web_site_id#114, sum#120, sum#121, sum#122, sum#123]
 Keys [1]: [web_site_id#114]
 Functions [4]: [sum(UnscaledValue(sales_price#92)), sum(UnscaledValue(return_amt#94)), sum(UnscaledValue(profit#93)), sum(UnscaledValue(net_loss#95))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#92))#125, sum(UnscaledValue(return_amt#94))#126, sum(UnscaledValue(profit#93))#127, sum(UnscaledValue(net_loss#95))#128]
-Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#92))#125,17,2) AS sales#129, MakeDecimal(sum(UnscaledValue(return_amt#94))#126,17,2) AS returns#130, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#127,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#128,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#131, web channel AS channel#132, concat(web_site, web_site_id#114) AS id#133]
+Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#92))#125,17,2) AS sales#129, MakeDecimal(sum(UnscaledValue(return_amt#94))#126,17,2) AS returns#130, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#127,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#128,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#131, web channel AS channel#132, concat(web_site, web_site_id#114) AS id#133]
 
 (69) Union
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/explain.txt
index ea800b099f46a..694852c3ed6b0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/explain.txt
@@ -146,7 +146,7 @@ Arguments: [avg(_w0#25) windowspecdefinition(i_manufact_id#5, specifiedwindowfra
 
 (26) Filter [codegen id : 7]
 Input [4]: [i_manufact_id#5, sum_sales#24, _w0#25, avg_quarterly_sales#27]
-Condition : (isnotnull(avg_quarterly_sales#27) AND ((avg_quarterly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000)))
+Condition : (isnotnull(avg_quarterly_sales#27) AND ((avg_quarterly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)))
 
 (27) Project [codegen id : 7]
 Output [3]: [i_manufact_id#5, sum_sales#24, avg_quarterly_sales#27]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt
index a2c5cba8b3548..91364dcce16e4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt
@@ -146,7 +146,7 @@ Arguments: [avg(_w0#25) windowspecdefinition(i_manufact_id#5, specifiedwindowfra
 
 (26) Filter [codegen id : 7]
 Input [4]: [i_manufact_id#5, sum_sales#24, _w0#25, avg_quarterly_sales#27]
-Condition : (isnotnull(avg_quarterly_sales#27) AND ((avg_quarterly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000)))
+Condition : (isnotnull(avg_quarterly_sales#27) AND ((avg_quarterly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)))
 
 (27) Project [codegen id : 7]
 Output [3]: [i_manufact_id#5, sum_sales#24, avg_quarterly_sales#27]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt
index b15ae61d824d4..543281ef9100e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt
@@ -310,7 +310,7 @@ Input [2]: [c_customer_sk#27, sum#37]
 Keys [1]: [c_customer_sk#27]
 Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#31))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#31))#38]
-Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#31))#38,17,2)) / 50.00), DecimalType(21,6), true) as int) AS segment#39]
+Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#31))#38,17,2)) / 50.00), DecimalType(21,6)) as int) AS segment#39]
 
 (56) HashAggregate [codegen id : 15]
 Input [1]: [segment#39]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt
index ed5cd21140cad..4c65587bee530 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt
@@ -295,7 +295,7 @@ Input [2]: [c_customer_sk#19, sum#37]
 Keys [1]: [c_customer_sk#19]
 Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#24))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#24))#39]
-Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#24))#39,17,2)) / 50.00), DecimalType(21,6), true) as int) AS segment#40]
+Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#24))#39,17,2)) / 50.00), DecimalType(21,6)) as int) AS segment#40]
 
 (53) HashAggregate [codegen id : 12]
 Input [1]: [segment#40]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt
index ef8d64cee2c4a..0b933a733f888 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt
@@ -186,7 +186,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_category#15, i_brand#14, cc_name#
 
 (30) Filter [codegen id : 11]
 Input [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, _w0#22, rn#24, avg_monthly_sales#25]
-Condition : ((isnotnull(avg_monthly_sales#25) AND (avg_monthly_sales#25 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000))
+Condition : ((isnotnull(avg_monthly_sales#25) AND (avg_monthly_sales#25 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))
 
 (31) Project [codegen id : 11]
 Output [8]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24]
@@ -277,7 +277,7 @@ Input [14]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales
 
 (52) TakeOrderedAndProject
 Input [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, psum#46, nsum#47]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST], [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, psum#46, nsum#47]
+Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST], [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, psum#46, nsum#47]
 
 ===== Subqueries =====
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt
index a3b9279528ba9..6b2736ef4008f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt
@@ -167,7 +167,7 @@ Arguments: [avg(_w0#21) windowspecdefinition(i_category#3, i_brand#2, cc_name#14
 
 (27) Filter [codegen id : 22]
 Input [9]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, _w0#21, rn#23, avg_monthly_sales#24]
-Condition : ((isnotnull(avg_monthly_sales#24) AND (avg_monthly_sales#24 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000))
+Condition : ((isnotnull(avg_monthly_sales#24) AND (avg_monthly_sales#24 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))
 
 (28) Project [codegen id : 22]
 Output [8]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, avg_monthly_sales#24, rn#23]
@@ -242,7 +242,7 @@ Input [14]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales
 
 (45) TakeOrderedAndProject
 Input [9]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, avg_monthly_sales#24, sum_sales#20, psum#44, nsum#45]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, cc_name#14 ASC NULLS FIRST], [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, avg_monthly_sales#24, sum_sales#20, psum#44, nsum#45]
+Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, cc_name#14 ASC NULLS FIRST], [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, avg_monthly_sales#24, sum_sales#20, psum#44, nsum#45]
 
 ===== Subqueries =====
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/explain.txt
index 8e969096c5239..abbd29292b260 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/explain.txt
@@ -194,7 +194,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=
 (31) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [item_id#13]
 Right keys [1]: [item_id#25]
-Join condition: ((((cast(ss_item_rev#14 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#26)), DecimalType(19,3), true)) AND (cast(ss_item_rev#14 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#26)), DecimalType(20,3), true))) AND (cast(cs_item_rev#26 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#14)), DecimalType(19,3), true))) AND (cast(cs_item_rev#26 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#14)), DecimalType(20,3), true)))
+Join condition: ((((cast(ss_item_rev#14 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#26)), DecimalType(19,3))) AND (cast(ss_item_rev#14 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#26)), DecimalType(20,3)))) AND (cast(cs_item_rev#26 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#14)), DecimalType(19,3)))) AND (cast(cs_item_rev#26 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#14)), DecimalType(20,3))))
 
 (32) Project [codegen id : 15]
 Output [3]: [item_id#13, ss_item_rev#14, cs_item_rev#26]
@@ -268,10 +268,10 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=
 (47) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [item_id#13]
 Right keys [1]: [item_id#38]
-Join condition: ((((((((cast(ss_item_rev#14 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#39)), DecimalType(19,3), true)) AND (cast(ss_item_rev#14 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#39)), DecimalType(20,3), true))) AND (cast(cs_item_rev#26 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#39)), DecimalType(19,3), true))) AND (cast(cs_item_rev#26 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#39)), DecimalType(20,3), true))) AND (cast(ws_item_rev#39 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#14)), DecimalType(19,3), true))) AND (cast(ws_item_rev#39 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#14)), DecimalType(20,3), true))) AND (cast(ws_item_rev#39 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#26)), DecimalType(19,3), true))) AND (cast(ws_item_rev#39 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#26)), DecimalType(20,3), true)))
+Join condition: ((((((((cast(ss_item_rev#14 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#39)), DecimalType(19,3))) AND (cast(ss_item_rev#14 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#39)), DecimalType(20,3)))) AND (cast(cs_item_rev#26 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#39)), DecimalType(19,3)))) AND (cast(cs_item_rev#26 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#39)), DecimalType(20,3)))) AND (cast(ws_item_rev#39 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#14)), DecimalType(19,3)))) AND (cast(ws_item_rev#39 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#14)), DecimalType(20,3)))) AND (cast(ws_item_rev#39 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#26)), DecimalType(19,3)))) AND (cast(ws_item_rev#39 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#26)), DecimalType(20,3))))
 
 (48) Project [codegen id : 15]
-Output [8]: [item_id#13, ss_item_rev#14, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2), true))), DecimalType(38,21), true)) / 3.000000000000000000000), DecimalType(38,21), true)) * 100.000000000000000000000), DecimalType(38,17), true) AS ss_dev#41, cs_item_rev#26, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(cs_item_rev#26 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2), true))), DecimalType(38,21), true)) / 3.000000000000000000000), DecimalType(38,21), true)) * 100.000000000000000000000), DecimalType(38,17), true) AS cs_dev#42, ws_item_rev#39, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ws_item_rev#39 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2), true))), DecimalType(38,21), true)) / 3.000000000000000000000), DecimalType(38,21), true)) * 100.000000000000000000000), DecimalType(38,17), true) AS ws_dev#43, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2), true)) / 3.00), DecimalType(23,6), true) AS average#44]
+Output [8]: [item_id#13, ss_item_rev#14, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ss_dev#41, cs_item_rev#26, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(cs_item_rev#26 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS cs_dev#42, ws_item_rev#39, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ws_item_rev#39 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ws_dev#43, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2))) / 3.00), DecimalType(23,6)) AS average#44]
 Input [5]: [item_id#13, ss_item_rev#14, cs_item_rev#26, item_id#38, ws_item_rev#39]
 
 (49) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt
index 67f19d31e3946..47651c0f92dca 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt
@@ -194,7 +194,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=
 (31) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [item_id#13]
 Right keys [1]: [item_id#25]
-Join condition: ((((cast(ss_item_rev#14 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#26)), DecimalType(19,3), true)) AND (cast(ss_item_rev#14 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#26)), DecimalType(20,3), true))) AND (cast(cs_item_rev#26 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#14)), DecimalType(19,3), true))) AND (cast(cs_item_rev#26 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#14)), DecimalType(20,3), true)))
+Join condition: ((((cast(ss_item_rev#14 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#26)), DecimalType(19,3))) AND (cast(ss_item_rev#14 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#26)), DecimalType(20,3)))) AND (cast(cs_item_rev#26 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#14)), DecimalType(19,3)))) AND (cast(cs_item_rev#26 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#14)), DecimalType(20,3))))
 
 (32) Project [codegen id : 15]
 Output [3]: [item_id#13, ss_item_rev#14, cs_item_rev#26]
@@ -268,10 +268,10 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=
 (47) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [item_id#13]
 Right keys [1]: [item_id#38]
-Join condition: ((((((((cast(ss_item_rev#14 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#39)), DecimalType(19,3), true)) AND (cast(ss_item_rev#14 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#39)), DecimalType(20,3), true))) AND (cast(cs_item_rev#26 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#39)), DecimalType(19,3), true))) AND (cast(cs_item_rev#26 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#39)), DecimalType(20,3), true))) AND (cast(ws_item_rev#39 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#14)), DecimalType(19,3), true))) AND (cast(ws_item_rev#39 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#14)), DecimalType(20,3), true))) AND (cast(ws_item_rev#39 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#26)), DecimalType(19,3), true))) AND (cast(ws_item_rev#39 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#26)), DecimalType(20,3), true)))
+Join condition: ((((((((cast(ss_item_rev#14 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#39)), DecimalType(19,3))) AND (cast(ss_item_rev#14 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#39)), DecimalType(20,3)))) AND (cast(cs_item_rev#26 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#39)), DecimalType(19,3)))) AND (cast(cs_item_rev#26 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#39)), DecimalType(20,3)))) AND (cast(ws_item_rev#39 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#14)), DecimalType(19,3)))) AND (cast(ws_item_rev#39 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#14)), DecimalType(20,3)))) AND (cast(ws_item_rev#39 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#26)), DecimalType(19,3)))) AND (cast(ws_item_rev#39 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#26)), DecimalType(20,3))))
 
 (48) Project [codegen id : 15]
-Output [8]: [item_id#13, ss_item_rev#14, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2), true))), DecimalType(38,21), true)) / 3.000000000000000000000), DecimalType(38,21), true)) * 100.000000000000000000000), DecimalType(38,17), true) AS ss_dev#41, cs_item_rev#26, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(cs_item_rev#26 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2), true))), DecimalType(38,21), true)) / 3.000000000000000000000), DecimalType(38,21), true)) * 100.000000000000000000000), DecimalType(38,17), true) AS cs_dev#42, ws_item_rev#39, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ws_item_rev#39 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2), true))), DecimalType(38,21), true)) / 3.000000000000000000000), DecimalType(38,21), true)) * 100.000000000000000000000), DecimalType(38,17), true) AS ws_dev#43, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2), true)) / 3.00), DecimalType(23,6), true) AS average#44]
+Output [8]: [item_id#13, ss_item_rev#14, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ss_dev#41, cs_item_rev#26, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(cs_item_rev#26 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS cs_dev#42, ws_item_rev#39, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ws_item_rev#39 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ws_dev#43, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2))) / 3.00), DecimalType(23,6)) AS average#44]
 Input [5]: [item_id#13, ss_item_rev#14, cs_item_rev#26, item_id#38, ws_item_rev#39]
 
 (49) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/explain.txt
index 201ba377a7f79..1e9c240705bd8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/explain.txt
@@ -241,7 +241,7 @@ Right keys [2]: [s_store_id2#68, (d_week_seq2#67 - 52)]
 Join condition: None
 
 (43) Project [codegen id : 10]
-Output [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, CheckOverflow((promote_precision(sun_sales1#47) / promote_precision(sun_sales2#69)), DecimalType(37,20), true) AS (sun_sales1 / sun_sales2)#77, CheckOverflow((promote_precision(mon_sales1#48) / promote_precision(mon_sales2#70)), DecimalType(37,20), true) AS (mon_sales1 / mon_sales2)#78, CheckOverflow((promote_precision(tue_sales1#49) / promote_precision(tue_sales2#71)), DecimalType(37,20), true) AS (tue_sales1 / tue_sales2)#79, CheckOverflow((promote_precision(wed_sales1#50) / promote_precision(wed_sales2#72)), DecimalType(37,20), true) AS (wed_sales1 / wed_sales2)#80, CheckOverflow((promote_precision(thu_sales1#51) / promote_precision(thu_sales2#73)), DecimalType(37,20), true) AS (thu_sales1 / thu_sales2)#81, CheckOverflow((promote_precision(fri_sales1#52) / promote_precision(fri_sales2#74)), DecimalType(37,20), true) AS (fri_sales1 / fri_sales2)#82, CheckOverflow((promote_precision(sat_sales1#53) / promote_precision(sat_sales2#75)), DecimalType(37,20), true) AS (sat_sales1 / sat_sales2)#83]
+Output [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, CheckOverflow((promote_precision(sun_sales1#47) / promote_precision(sun_sales2#69)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#77, CheckOverflow((promote_precision(mon_sales1#48) / promote_precision(mon_sales2#70)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#78, CheckOverflow((promote_precision(tue_sales1#49) / promote_precision(tue_sales2#71)), DecimalType(37,20)) AS (tue_sales1 / tue_sales2)#79, CheckOverflow((promote_precision(wed_sales1#50) / promote_precision(wed_sales2#72)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#80, CheckOverflow((promote_precision(thu_sales1#51) / promote_precision(thu_sales2#73)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#81, CheckOverflow((promote_precision(fri_sales1#52) / promote_precision(fri_sales2#74)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#82, CheckOverflow((promote_precision(sat_sales1#53) / promote_precision(sat_sales2#75)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#83]
 Input [19]: [s_store_name1#44, d_week_seq1#45, s_store_id1#46, sun_sales1#47, mon_sales1#48, tue_sales1#49, wed_sales1#50, thu_sales1#51, fri_sales1#52, sat_sales1#53, d_week_seq2#67, s_store_id2#68, sun_sales2#69, mon_sales2#70, tue_sales2#71, wed_sales2#72, thu_sales2#73, fri_sales2#74, sat_sales2#75]
 
 (44) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt
index 201ba377a7f79..1e9c240705bd8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt
@@ -241,7 +241,7 @@ Right keys [2]: [s_store_id2#68, (d_week_seq2#67 - 52)]
 Join condition: None
 
 (43) Project [codegen id : 10]
-Output [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, CheckOverflow((promote_precision(sun_sales1#47) / promote_precision(sun_sales2#69)), DecimalType(37,20), true) AS (sun_sales1 / sun_sales2)#77, CheckOverflow((promote_precision(mon_sales1#48) / promote_precision(mon_sales2#70)), DecimalType(37,20), true) AS (mon_sales1 / mon_sales2)#78, CheckOverflow((promote_precision(tue_sales1#49) / promote_precision(tue_sales2#71)), DecimalType(37,20), true) AS (tue_sales1 / tue_sales2)#79, CheckOverflow((promote_precision(wed_sales1#50) / promote_precision(wed_sales2#72)), DecimalType(37,20), true) AS (wed_sales1 / wed_sales2)#80, CheckOverflow((promote_precision(thu_sales1#51) / promote_precision(thu_sales2#73)), DecimalType(37,20), true) AS (thu_sales1 / thu_sales2)#81, CheckOverflow((promote_precision(fri_sales1#52) / promote_precision(fri_sales2#74)), DecimalType(37,20), true) AS (fri_sales1 / fri_sales2)#82, CheckOverflow((promote_precision(sat_sales1#53) / promote_precision(sat_sales2#75)), DecimalType(37,20), true) AS (sat_sales1 / sat_sales2)#83]
+Output [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, CheckOverflow((promote_precision(sun_sales1#47) / promote_precision(sun_sales2#69)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#77, CheckOverflow((promote_precision(mon_sales1#48) / promote_precision(mon_sales2#70)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#78, CheckOverflow((promote_precision(tue_sales1#49) / promote_precision(tue_sales2#71)), DecimalType(37,20)) AS (tue_sales1 / tue_sales2)#79, CheckOverflow((promote_precision(wed_sales1#50) / promote_precision(wed_sales2#72)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#80, CheckOverflow((promote_precision(thu_sales1#51) / promote_precision(thu_sales2#73)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#81, CheckOverflow((promote_precision(fri_sales1#52) / promote_precision(fri_sales2#74)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#82, CheckOverflow((promote_precision(sat_sales1#53) / promote_precision(sat_sales2#75)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#83]
 Input [19]: [s_store_name1#44, d_week_seq1#45, s_store_id1#46, sun_sales1#47, mon_sales1#48, tue_sales1#49, wed_sales1#50, thu_sales1#51, fri_sales1#52, sat_sales1#53, d_week_seq2#67, s_store_id2#68, sun_sales2#69, mon_sales2#70, tue_sales2#71, wed_sales2#72, thu_sales2#73, fri_sales2#74, sat_sales2#75]
 
 (44) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt
index 70ea372f1eb5b..e83c4be6f7e5a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt
@@ -350,7 +350,7 @@ Arguments: IdentityBroadcastMode, [id=#45]
 Join condition: None
 
 (64) Project [codegen id : 15]
-Output [3]: [promotions#30, total#44, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#30 as decimal(15,4))) / promote_precision(cast(total#44 as decimal(15,4)))), DecimalType(35,20), true)) * 100.00000000000000000000), DecimalType(38,19), true) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#46]
+Output [3]: [promotions#30, total#44, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#30 as decimal(15,4))) / promote_precision(cast(total#44 as decimal(15,4)))), DecimalType(35,20))) * 100.00000000000000000000), DecimalType(38,19)) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#46]
 Input [2]: [promotions#30, total#44]
 
 ===== Subqueries =====
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt
index 7e1ce65ee7236..ebf1161c7a1f0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt
@@ -365,7 +365,7 @@ Arguments: IdentityBroadcastMode, [id=#47]
 Join condition: None
 
 (67) Project [codegen id : 15]
-Output [3]: [promotions#30, total#46, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#30 as decimal(15,4))) / promote_precision(cast(total#46 as decimal(15,4)))), DecimalType(35,20), true)) * 100.00000000000000000000), DecimalType(38,19), true) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#48]
+Output [3]: [promotions#30, total#46, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#30 as decimal(15,4))) / promote_precision(cast(total#46 as decimal(15,4)))), DecimalType(35,20))) * 100.00000000000000000000), DecimalType(38,19)) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#48]
 Input [2]: [promotions#30, total#46]
 
 ===== Subqueries =====
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/explain.txt
index 9dd05765ecd2d..fe91e93a55aba 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/explain.txt
@@ -146,7 +146,7 @@ Arguments: [avg(_w0#25) windowspecdefinition(i_manager_id#5, specifiedwindowfram
 
 (26) Filter [codegen id : 7]
 Input [4]: [i_manager_id#5, sum_sales#24, _w0#25, avg_monthly_sales#27]
-Condition : (isnotnull(avg_monthly_sales#27) AND ((avg_monthly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000)))
+Condition : (isnotnull(avg_monthly_sales#27) AND ((avg_monthly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)))
 
 (27) Project [codegen id : 7]
 Output [3]: [i_manager_id#5, sum_sales#24, avg_monthly_sales#27]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt
index b49e25109080e..ad0ca3ea63d42 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt
@@ -146,7 +146,7 @@ Arguments: [avg(_w0#25) windowspecdefinition(i_manager_id#5, specifiedwindowfram
 
 (26) Filter [codegen id : 7]
 Input [4]: [i_manager_id#5, sum_sales#24, _w0#25, avg_monthly_sales#27]
-Condition : (isnotnull(avg_monthly_sales#27) AND ((avg_monthly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000)))
+Condition : (isnotnull(avg_monthly_sales#27) AND ((avg_monthly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)))
 
 (27) Project [codegen id : 7]
 Output [3]: [i_manager_id#5, sum_sales#24, avg_monthly_sales#27]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/explain.txt
index e4baf3b296016..474967b54286a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/explain.txt
@@ -161,7 +161,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (24) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_store_sk#2]
 Right keys [1]: [ss_store_sk#13]
-Join condition: (cast(revenue#11 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#28)), DecimalType(23,7), true))
+Join condition: (cast(revenue#11 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#28)), DecimalType(23,7)))
 
 (25) Project [codegen id : 8]
 Output [3]: [ss_store_sk#2, ss_item_sk#1, revenue#11]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt
index 49cc9f75956a2..c7967bfa915b8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt
@@ -212,7 +212,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (36) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [ss_store_sk#22]
-Join condition: (cast(revenue#13 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#37)), DecimalType(23,7), true))
+Join condition: (cast(revenue#13 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#37)), DecimalType(23,7)))
 
 (37) Project [codegen id : 9]
 Output [6]: [s_store_name#2, i_item_desc#16, revenue#13, i_current_price#17, i_wholesale_cost#18, i_brand#19]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt
index 28f1f48dd9f0a..85aa68cbedd88 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt
@@ -172,7 +172,7 @@ Input [13]: [ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid
 (27) HashAggregate [codegen id : 5]
 Input [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#16, d_moy#17]
 Keys [7]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#16]
-Functions [24]: [partial_sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
+Functions [24]: [partial_sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]
 Aggregate Attributes [48]: [sum#26, isEmpty#27, sum#28, isEmpty#29, sum#30, isEmpty#31, sum#32, isEmpty#33, sum#34, isEmpty#35, sum#36, isEmpty#37, sum#38, isEmpty#39, sum#40, isEmpty#41, sum#42, isEmpty#43, sum#44, isEmpty#45, sum#46, isEmpty#47, sum#48, isEmpty#49, sum#50, isEmpty#51, sum#52, isEmpty#53, sum#54, isEmpty#55, sum#56, isEmpty#57, sum#58, isEmpty#59, sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65, sum#66, isEmpty#67, sum#68, isEmpty#69, sum#70, isEmpty#71, sum#72, isEmpty#73]
 Results [55]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#16, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121]
 
@@ -183,9 +183,9 @@ Arguments: hashpartitioning(w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21
 (29) HashAggregate [codegen id : 6]
 Input [55]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#16, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121]
 Keys [7]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#16]
-Functions [24]: [sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
-Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146]
-Results [32]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, DHL,BARIAN AS ship_carriers#147, d_year#16 AS year#148, sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123 AS jan_sales#149, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124 AS feb_sales#150, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125 AS mar_sales#151, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126 AS apr_sales#152, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127 AS may_sales#153, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128 AS jun_sales#154, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129 AS jul_sales#155, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130 AS aug_sales#156, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131 AS sep_sales#157, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132 AS oct_sales#158, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133 AS nov_sales#159, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134 AS dec_sales#160, sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135 AS jan_net#161, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136 AS feb_net#162, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137 AS mar_net#163, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138 AS apr_net#164, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139 AS may_net#165, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140 AS jun_net#166, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141 AS jul_net#167, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142 AS aug_net#168, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143 AS sep_net#169, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144 AS oct_net#170, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145 AS nov_net#171, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146 AS dec_net#172]
+Functions [24]: [sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]
+Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#142, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#143, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#144, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#145, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#146]
+Results [32]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, DHL,BARIAN AS ship_carriers#147, d_year#16 AS year#148, sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#123 AS jan_sales#149, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#124 AS feb_sales#150, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#125 AS mar_sales#151, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#126 AS apr_sales#152, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#127 AS may_sales#153, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#128 AS jun_sales#154, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#129 AS jul_sales#155, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#130 AS aug_sales#156, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#131 AS sep_sales#157, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#132 AS oct_sales#158, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#133 AS nov_sales#159, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#134 AS dec_sales#160, sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#135 AS jan_net#161, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#136 AS feb_net#162, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#137 AS mar_net#163, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#138 AS apr_net#164, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#139 AS may_net#165, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#140 AS jun_net#166, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#141 AS jul_net#167, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#142 AS aug_net#168, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#143 AS sep_net#169, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#144 AS oct_net#170, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#145 AS nov_net#171, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#146 AS dec_net#172]
 
 (30) Scan parquet default.catalog_sales
 Output [7]: [cs_sold_time_sk#173, cs_ship_mode_sk#174, cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, cs_sold_date_sk#179]
@@ -253,7 +253,7 @@ Input [13]: [cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_pa
 (45) HashAggregate [codegen id : 11]
 Input [11]: [cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, d_year#183, d_moy#184]
 Keys [7]: [w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, d_year#183]
-Functions [24]: [partial_sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
+Functions [24]: [partial_sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]
 Aggregate Attributes [48]: [sum#192, isEmpty#193, sum#194, isEmpty#195, sum#196, isEmpty#197, sum#198, isEmpty#199, sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205, sum#206, isEmpty#207, sum#208, isEmpty#209, sum#210, isEmpty#211, sum#212, isEmpty#213, sum#214, isEmpty#215, sum#216, isEmpty#217, sum#218, isEmpty#219, sum#220, isEmpty#221, sum#222, isEmpty#223, sum#224, isEmpty#225, sum#226, isEmpty#227, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237, sum#238, isEmpty#239]
 Results [55]: [w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, d_year#183, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283, sum#284, isEmpty#285, sum#286, isEmpty#287]
 
@@ -264,16 +264,16 @@ Arguments: hashpartitioning(w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#
 (47) HashAggregate [codegen id : 12]
 Input [55]: [w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, d_year#183, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283, sum#284, isEmpty#285, sum#286, isEmpty#287]
 Keys [7]: [w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, d_year#183]
-Functions [24]: [sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
-Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300, sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#301, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#302, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#303, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#304, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#305, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#306, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#307, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#308, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#309, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#310, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#311, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#312]
-Results [32]: [w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, DHL,BARIAN AS ship_carriers#313, d_year#183 AS year#314, sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289 AS jan_sales#315, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290 AS feb_sales#316, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291 AS mar_sales#317, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292 AS apr_sales#318, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293 AS may_sales#319, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294 AS jun_sales#320, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295 AS jul_sales#321, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296 AS aug_sales#322, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297 AS sep_sales#323, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298 AS oct_sales#324, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299 AS nov_sales#325, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300 AS dec_sales#326, sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#301 AS jan_net#327, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#302 AS feb_net#328, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#303 AS mar_net#329, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#304 AS apr_net#330, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#305 AS may_net#331, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#306 AS jun_net#332, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#307 AS jul_net#333, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#308 AS aug_net#334, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#309 AS sep_net#335, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#310 AS oct_net#336, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#311 AS nov_net#337, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#312 AS dec_net#338]
+Functions [24]: [sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]
+Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#300, sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#301, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#302, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#303, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#304, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#305, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#306, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#307, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#308, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#309, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#310, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#311, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#312]
+Results [32]: [w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, DHL,BARIAN AS ship_carriers#313, d_year#183 AS year#314, sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#289 AS jan_sales#315, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#290 AS feb_sales#316, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#291 AS mar_sales#317, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#292 AS apr_sales#318, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#293 AS may_sales#319, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#294 AS jun_sales#320, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#295 AS jul_sales#321, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#296 AS aug_sales#322, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#297 AS sep_sales#323, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#298 AS oct_sales#324, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#299 AS nov_sales#325, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#300 AS dec_sales#326, sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#301 AS jan_net#327, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#302 AS feb_net#328, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#303 AS mar_net#329, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#304 AS apr_net#330, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#305 AS may_net#331, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#306 AS jun_net#332, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#307 AS jul_net#333, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#308 AS aug_net#334, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#309 AS sep_net#335, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#310 AS oct_net#336, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#311 AS nov_net#337, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#312 AS dec_net#338]
 
 (48) Union
 
 (49) HashAggregate [codegen id : 13]
 Input [32]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, jan_sales#149, feb_sales#150, mar_sales#151, apr_sales#152, may_sales#153, jun_sales#154, jul_sales#155, aug_sales#156, sep_sales#157, oct_sales#158, nov_sales#159, dec_sales#160, jan_net#161, feb_net#162, mar_net#163, apr_net#164, may_net#165, jun_net#166, jul_net#167, aug_net#168, sep_net#169, oct_net#170, nov_net#171, dec_net#172]
 Keys [8]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148]
-Functions [36]: [partial_sum(jan_sales#149), partial_sum(feb_sales#150), partial_sum(mar_sales#151), partial_sum(apr_sales#152), partial_sum(may_sales#153), partial_sum(jun_sales#154), partial_sum(jul_sales#155), partial_sum(aug_sales#156), partial_sum(sep_sales#157), partial_sum(oct_sales#158), partial_sum(nov_sales#159), partial_sum(dec_sales#160), partial_sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(jan_net#161), partial_sum(feb_net#162), partial_sum(mar_net#163), partial_sum(apr_net#164), partial_sum(may_net#165), partial_sum(jun_net#166), partial_sum(jul_net#167), partial_sum(aug_net#168), partial_sum(sep_net#169), partial_sum(oct_net#170), partial_sum(nov_net#171), partial_sum(dec_net#172)]
+Functions [36]: [partial_sum(jan_sales#149), partial_sum(feb_sales#150), partial_sum(mar_sales#151), partial_sum(apr_sales#152), partial_sum(may_sales#153), partial_sum(jun_sales#154), partial_sum(jul_sales#155), partial_sum(aug_sales#156), partial_sum(sep_sales#157), partial_sum(oct_sales#158), partial_sum(nov_sales#159), partial_sum(dec_sales#160), partial_sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(jan_net#161), partial_sum(feb_net#162), partial_sum(mar_net#163), partial_sum(apr_net#164), partial_sum(may_net#165), partial_sum(jun_net#166), partial_sum(jul_net#167), partial_sum(aug_net#168), partial_sum(sep_net#169), partial_sum(oct_net#170), partial_sum(nov_net#171), partial_sum(dec_net#172)]
 Aggregate Attributes [72]: [sum#339, isEmpty#340, sum#341, isEmpty#342, sum#343, isEmpty#344, sum#345, isEmpty#346, sum#347, isEmpty#348, sum#349, isEmpty#350, sum#351, isEmpty#352, sum#353, isEmpty#354, sum#355, isEmpty#356, sum#357, isEmpty#358, sum#359, isEmpty#360, sum#361, isEmpty#362, sum#363, isEmpty#364, sum#365, isEmpty#366, sum#367, isEmpty#368, sum#369, isEmpty#370, sum#371, isEmpty#372, sum#373, isEmpty#374, sum#375, isEmpty#376, sum#377, isEmpty#378, sum#379, isEmpty#380, sum#381, isEmpty#382, sum#383, isEmpty#384, sum#385, isEmpty#386, sum#387, isEmpty#388, sum#389, isEmpty#390, sum#391, isEmpty#392, sum#393, isEmpty#394, sum#395, isEmpty#396, sum#397, isEmpty#398, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410]
 Results [80]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476, sum#477, isEmpty#478, sum#479, isEmpty#480, sum#481, isEmpty#482]
 
@@ -284,9 +284,9 @@ Arguments: hashpartitioning(w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21
 (51) HashAggregate [codegen id : 14]
 Input [80]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476, sum#477, isEmpty#478, sum#479, isEmpty#480, sum#481, isEmpty#482]
 Keys [8]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148]
-Functions [36]: [sum(jan_sales#149), sum(feb_sales#150), sum(mar_sales#151), sum(apr_sales#152), sum(may_sales#153), sum(jun_sales#154), sum(jul_sales#155), sum(aug_sales#156), sum(sep_sales#157), sum(oct_sales#158), sum(nov_sales#159), sum(dec_sales#160), sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true)), sum(jan_net#161), sum(feb_net#162), sum(mar_net#163), sum(apr_net#164), sum(may_net#165), sum(jun_net#166), sum(jul_net#167), sum(aug_net#168), sum(sep_net#169), sum(oct_net#170), sum(nov_net#171), sum(dec_net#172)]
-Aggregate Attributes [36]: [sum(jan_sales#149)#484, sum(feb_sales#150)#485, sum(mar_sales#151)#486, sum(apr_sales#152)#487, sum(may_sales#153)#488, sum(jun_sales#154)#489, sum(jul_sales#155)#490, sum(aug_sales#156)#491, sum(sep_sales#157)#492, sum(oct_sales#158)#493, sum(nov_sales#159)#494, sum(dec_sales#160)#495, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#496, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#497, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#498, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#499, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#500, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#501, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#502, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#503, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#504, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#505, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#506, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#507, sum(jan_net#161)#508, sum(feb_net#162)#509, sum(mar_net#163)#510, sum(apr_net#164)#511, sum(may_net#165)#512, sum(jun_net#166)#513, sum(jul_net#167)#514, sum(aug_net#168)#515, sum(sep_net#169)#516, sum(oct_net#170)#517, sum(nov_net#171)#518, sum(dec_net#172)#519]
-Results [44]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, sum(jan_sales#149)#484 AS jan_sales#520, sum(feb_sales#150)#485 AS feb_sales#521, sum(mar_sales#151)#486 AS mar_sales#522, sum(apr_sales#152)#487 AS apr_sales#523, sum(may_sales#153)#488 AS may_sales#524, sum(jun_sales#154)#489 AS jun_sales#525, sum(jul_sales#155)#490 AS jul_sales#526, sum(aug_sales#156)#491 AS aug_sales#527, sum(sep_sales#157)#492 AS sep_sales#528, sum(oct_sales#158)#493 AS oct_sales#529, sum(nov_sales#159)#494 AS nov_sales#530, sum(dec_sales#160)#495 AS dec_sales#531, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#496 AS jan_sales_per_sq_foot#532, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#497 AS feb_sales_per_sq_foot#533, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#498 AS mar_sales_per_sq_foot#534, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#499 AS apr_sales_per_sq_foot#535, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#500 AS may_sales_per_sq_foot#536, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#501 AS jun_sales_per_sq_foot#537, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#502 AS jul_sales_per_sq_foot#538, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#503 AS aug_sales_per_sq_foot#539, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#504 AS sep_sales_per_sq_foot#540, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#505 AS oct_sales_per_sq_foot#541, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#506 AS nov_sales_per_sq_foot#542, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12), true))#507 AS dec_sales_per_sq_foot#543, sum(jan_net#161)#508 AS jan_net#544, sum(feb_net#162)#509 AS feb_net#545, sum(mar_net#163)#510 AS mar_net#546, sum(apr_net#164)#511 AS apr_net#547, sum(may_net#165)#512 AS may_net#548, sum(jun_net#166)#513 AS jun_net#549, sum(jul_net#167)#514 AS jul_net#550, sum(aug_net#168)#515 AS aug_net#551, sum(sep_net#169)#516 AS sep_net#552, sum(oct_net#170)#517 AS oct_net#553, sum(nov_net#171)#518 AS nov_net#554, sum(dec_net#172)#519 AS dec_net#555]
+Functions [36]: [sum(jan_sales#149), sum(feb_sales#150), sum(mar_sales#151), sum(apr_sales#152), sum(may_sales#153), sum(jun_sales#154), sum(jul_sales#155), sum(aug_sales#156), sum(sep_sales#157), sum(oct_sales#158), sum(nov_sales#159), sum(dec_sales#160), sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(jan_net#161), sum(feb_net#162), sum(mar_net#163), sum(apr_net#164), sum(may_net#165), sum(jun_net#166), sum(jul_net#167), sum(aug_net#168), sum(sep_net#169), sum(oct_net#170), sum(nov_net#171), sum(dec_net#172)]
+Aggregate Attributes [36]: [sum(jan_sales#149)#484, sum(feb_sales#150)#485, sum(mar_sales#151)#486, sum(apr_sales#152)#487, sum(may_sales#153)#488, sum(jun_sales#154)#489, sum(jul_sales#155)#490, sum(aug_sales#156)#491, sum(sep_sales#157)#492, sum(oct_sales#158)#493, sum(nov_sales#159)#494, sum(dec_sales#160)#495, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#496, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#497, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#498, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#499, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#500, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#501, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#502, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#503, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#504, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#505, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#506, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#507, sum(jan_net#161)#508, sum(feb_net#162)#509, sum(mar_net#163)#510, sum(apr_net#164)#511, sum(may_net#165)#512, sum(jun_net#166)#513, sum(jul_net#167)#514, sum(aug_net#168)#515, sum(sep_net#169)#516, sum(oct_net#170)#517, sum(nov_net#171)#518, sum(dec_net#172)#519]
+Results [44]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, sum(jan_sales#149)#484 AS jan_sales#520, sum(feb_sales#150)#485 AS feb_sales#521, sum(mar_sales#151)#486 AS mar_sales#522, sum(apr_sales#152)#487 AS apr_sales#523, sum(may_sales#153)#488 AS may_sales#524, sum(jun_sales#154)#489 AS jun_sales#525, sum(jul_sales#155)#490 AS jul_sales#526, sum(aug_sales#156)#491 AS aug_sales#527, sum(sep_sales#157)#492 AS sep_sales#528, sum(oct_sales#158)#493 AS oct_sales#529, sum(nov_sales#159)#494 AS nov_sales#530, sum(dec_sales#160)#495 AS dec_sales#531, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#496 AS jan_sales_per_sq_foot#532, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#497 AS feb_sales_per_sq_foot#533, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#498 AS mar_sales_per_sq_foot#534, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#499 AS apr_sales_per_sq_foot#535, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#500 AS may_sales_per_sq_foot#536, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#501 AS jun_sales_per_sq_foot#537, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#502 AS jul_sales_per_sq_foot#538, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#503 AS aug_sales_per_sq_foot#539, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#504 AS sep_sales_per_sq_foot#540, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#505 AS oct_sales_per_sq_foot#541, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#506 AS nov_sales_per_sq_foot#542, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#507 AS dec_sales_per_sq_foot#543, sum(jan_net#161)#508 AS jan_net#544, sum(feb_net#162)#509 AS feb_net#545, sum(mar_net#163)#510 AS mar_net#546, sum(apr_net#164)#511 AS apr_net#547, sum(may_net#165)#512 AS may_net#548, sum(jun_net#166)#513 AS jun_net#549, sum(jul_net#167)#514 AS jul_net#550, sum(aug_net#168)#515 AS aug_net#551, sum(sep_net#169)#516 AS sep_net#552, sum(oct_net#170)#517 AS oct_net#553, sum(nov_net#171)#518 AS nov_net#554, sum(dec_net#172)#519 AS dec_net#555]
 
 (52) TakeOrderedAndProject
 Input [44]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, jan_sales#520, feb_sales#521, mar_sales#522, apr_sales#523, may_sales#524, jun_sales#525, jul_sales#526, aug_sales#527, sep_sales#528, oct_sales#529, nov_sales#530, dec_sales#531, jan_sales_per_sq_foot#532, feb_sales_per_sq_foot#533, mar_sales_per_sq_foot#534, apr_sales_per_sq_foot#535, may_sales_per_sq_foot#536, jun_sales_per_sq_foot#537, jul_sales_per_sq_foot#538, aug_sales_per_sq_foot#539, sep_sales_per_sq_foot#540, oct_sales_per_sq_foot#541, nov_sales_per_sq_foot#542, dec_sales_per_sq_foot#543, jan_net#544, feb_net#545, mar_net#546, apr_net#547, may_net#548, jun_net#549, jul_net#550, aug_net#551, sep_net#552, oct_net#553, nov_net#554, dec_net#555]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt
index f84c9de0bcd6b..d9ac8f54234f7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net]
   WholeStageCodegen (14)
-    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(jan_sales),sum(feb_sales),sum(mar_sales),sum(apr_sales),sum(may_sales),sum(jun_sales),sum(jul_sales),sum(aug_sales),sum(sep_sales),sum(oct_sales),sum(nov_sales),sum(dec_sales),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(jan_net),sum(feb_net),sum(mar_net),sum(apr_net),sum(may_net),sum(jun_net),sum(jul_net),sum(aug_net),sum(sep_net),sum(oct_net),sum(nov_net),sum(dec_net),jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(jan_sales),sum(feb_sales),sum(mar_sales),sum(apr_sales),sum(may_sales),sum(jun_sales),sum(jul_sales),sum(aug_sales),sum(sep_sales),sum(oct_sales),sum(nov_sales),sum(dec_sales),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(jan_net),sum(feb_net),sum(mar_net),sum(apr_net),sum(may_net),sum(jun_net),sum(jul_net),sum(aug_net),sum(sep_net),sum(oct_net),sum(nov_net),sum(dec_net),jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
       InputAdapter
         Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year] #1
           WholeStageCodegen (13)
@@ -8,7 +8,7 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
               InputAdapter
                 Union
                   WholeStageCodegen (6)
-                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
                         Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #2
                           WholeStageCodegen (5)
@@ -58,7 +58,7 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
                                             InputAdapter
                                               Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country]
                   WholeStageCodegen (12)
-                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
                         Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #7
                           WholeStageCodegen (11)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt
index c97dfda97c695..f0b239a262c26 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt
@@ -172,7 +172,7 @@ Input [13]: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid
 (27) HashAggregate [codegen id : 5]
 Input [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18, d_moy#19]
 Keys [7]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18]
-Functions [24]: [partial_sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
+Functions [24]: [partial_sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]
 Aggregate Attributes [48]: [sum#26, isEmpty#27, sum#28, isEmpty#29, sum#30, isEmpty#31, sum#32, isEmpty#33, sum#34, isEmpty#35, sum#36, isEmpty#37, sum#38, isEmpty#39, sum#40, isEmpty#41, sum#42, isEmpty#43, sum#44, isEmpty#45, sum#46, isEmpty#47, sum#48, isEmpty#49, sum#50, isEmpty#51, sum#52, isEmpty#53, sum#54, isEmpty#55, sum#56, isEmpty#57, sum#58, isEmpty#59, sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65, sum#66, isEmpty#67, sum#68, isEmpty#69, sum#70, isEmpty#71, sum#72, isEmpty#73]
 Results [55]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121]
 
@@ -183,9 +183,9 @@ Arguments: hashpartitioning(w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12
 (29) HashAggregate [codegen id : 6]
 Input [55]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121]
 Keys [7]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18]
-Functions [24]: [sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
-Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146]
-Results [32]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, DHL,BARIAN AS ship_carriers#147, d_year#18 AS year#148, sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123 AS jan_sales#149, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124 AS feb_sales#150, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125 AS mar_sales#151, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126 AS apr_sales#152, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127 AS may_sales#153, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128 AS jun_sales#154, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129 AS jul_sales#155, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130 AS aug_sales#156, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131 AS sep_sales#157, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132 AS oct_sales#158, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133 AS nov_sales#159, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134 AS dec_sales#160, sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135 AS jan_net#161, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136 AS feb_net#162, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137 AS mar_net#163, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138 AS apr_net#164, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139 AS may_net#165, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140 AS jun_net#166, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141 AS jul_net#167, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142 AS aug_net#168, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143 AS sep_net#169, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144 AS oct_net#170, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145 AS nov_net#171, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146 AS dec_net#172]
+Functions [24]: [sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]
+Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#142, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#143, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#144, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#145, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#146]
+Results [32]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, DHL,BARIAN AS ship_carriers#147, d_year#18 AS year#148, sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#123 AS jan_sales#149, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#124 AS feb_sales#150, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#125 AS mar_sales#151, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#126 AS apr_sales#152, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#127 AS may_sales#153, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#128 AS jun_sales#154, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#129 AS jul_sales#155, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#130 AS aug_sales#156, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#131 AS sep_sales#157, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#132 AS oct_sales#158, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#133 AS nov_sales#159, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#134 AS dec_sales#160, sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#135 AS jan_net#161, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#136 AS feb_net#162, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#137 AS mar_net#163, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#138 AS apr_net#164, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#139 AS may_net#165, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#140 AS jun_net#166, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#141 AS jul_net#167, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#142 AS aug_net#168, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#143 AS sep_net#169, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#144 AS oct_net#170, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#145 AS nov_net#171, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#146 AS dec_net#172]
 
 (30) Scan parquet default.catalog_sales
 Output [7]: [cs_sold_time_sk#173, cs_ship_mode_sk#174, cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, cs_sold_date_sk#179]
@@ -253,7 +253,7 @@ Input [13]: [cs_ship_mode_sk#174, cs_quantity#176, cs_sales_price#177, cs_net_pa
 (45) HashAggregate [codegen id : 11]
 Input [11]: [cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188, d_moy#189]
 Keys [7]: [w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188]
-Functions [24]: [partial_sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
+Functions [24]: [partial_sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]
 Aggregate Attributes [48]: [sum#192, isEmpty#193, sum#194, isEmpty#195, sum#196, isEmpty#197, sum#198, isEmpty#199, sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205, sum#206, isEmpty#207, sum#208, isEmpty#209, sum#210, isEmpty#211, sum#212, isEmpty#213, sum#214, isEmpty#215, sum#216, isEmpty#217, sum#218, isEmpty#219, sum#220, isEmpty#221, sum#222, isEmpty#223, sum#224, isEmpty#225, sum#226, isEmpty#227, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237, sum#238, isEmpty#239]
 Results [55]: [w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283, sum#284, isEmpty#285, sum#286, isEmpty#287]
 
@@ -264,16 +264,16 @@ Arguments: hashpartitioning(w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#
 (47) HashAggregate [codegen id : 12]
 Input [55]: [w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283, sum#284, isEmpty#285, sum#286, isEmpty#287]
 Keys [7]: [w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188]
-Functions [24]: [sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
-Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300, sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#301, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#302, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#303, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#304, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#305, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#306, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#307, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#308, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#309, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#310, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#311, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#312]
-Results [32]: [w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, DHL,BARIAN AS ship_carriers#313, d_year#188 AS year#314, sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289 AS jan_sales#315, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290 AS feb_sales#316, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291 AS mar_sales#317, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292 AS apr_sales#318, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293 AS may_sales#319, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294 AS jun_sales#320, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295 AS jul_sales#321, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296 AS aug_sales#322, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297 AS sep_sales#323, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298 AS oct_sales#324, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299 AS nov_sales#325, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300 AS dec_sales#326, sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#301 AS jan_net#327, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#302 AS feb_net#328, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#303 AS mar_net#329, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#304 AS apr_net#330, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#305 AS may_net#331, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#306 AS jun_net#332, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#307 AS jul_net#333, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#308 AS aug_net#334, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#309 AS sep_net#335, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#310 AS oct_net#336, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#311 AS nov_net#337, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#312 AS dec_net#338]
+Functions [24]: [sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]
+Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#300, sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#301, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#302, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#303, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#304, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#305, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#306, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#307, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#308, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#309, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#310, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#311, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#312]
+Results [32]: [w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, DHL,BARIAN AS ship_carriers#313, d_year#188 AS year#314, sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#289 AS jan_sales#315, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#290 AS feb_sales#316, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#291 AS mar_sales#317, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#292 AS apr_sales#318, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#293 AS may_sales#319, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#294 AS jun_sales#320, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#295 AS jul_sales#321, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#296 AS aug_sales#322, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#297 AS sep_sales#323, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#298 AS oct_sales#324, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#299 AS nov_sales#325, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#300 AS dec_sales#326, sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#301 AS jan_net#327, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#302 AS feb_net#328, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#303 AS mar_net#329, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#304 AS apr_net#330, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#305 AS may_net#331, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#306 AS jun_net#332, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#307 AS jul_net#333, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#308 AS aug_net#334, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#309 AS sep_net#335, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#310 AS oct_net#336, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#311 AS nov_net#337, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#312 AS dec_net#338]
 
 (48) Union
 
 (49) HashAggregate [codegen id : 13]
 Input [32]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, jan_sales#149, feb_sales#150, mar_sales#151, apr_sales#152, may_sales#153, jun_sales#154, jul_sales#155, aug_sales#156, sep_sales#157, oct_sales#158, nov_sales#159, dec_sales#160, jan_net#161, feb_net#162, mar_net#163, apr_net#164, may_net#165, jun_net#166, jul_net#167, aug_net#168, sep_net#169, oct_net#170, nov_net#171, dec_net#172]
 Keys [8]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148]
-Functions [36]: [partial_sum(jan_sales#149), partial_sum(feb_sales#150), partial_sum(mar_sales#151), partial_sum(apr_sales#152), partial_sum(may_sales#153), partial_sum(jun_sales#154), partial_sum(jul_sales#155), partial_sum(aug_sales#156), partial_sum(sep_sales#157), partial_sum(oct_sales#158), partial_sum(nov_sales#159), partial_sum(dec_sales#160), partial_sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(jan_net#161), partial_sum(feb_net#162), partial_sum(mar_net#163), partial_sum(apr_net#164), partial_sum(may_net#165), partial_sum(jun_net#166), partial_sum(jul_net#167), partial_sum(aug_net#168), partial_sum(sep_net#169), partial_sum(oct_net#170), partial_sum(nov_net#171), partial_sum(dec_net#172)]
+Functions [36]: [partial_sum(jan_sales#149), partial_sum(feb_sales#150), partial_sum(mar_sales#151), partial_sum(apr_sales#152), partial_sum(may_sales#153), partial_sum(jun_sales#154), partial_sum(jul_sales#155), partial_sum(aug_sales#156), partial_sum(sep_sales#157), partial_sum(oct_sales#158), partial_sum(nov_sales#159), partial_sum(dec_sales#160), partial_sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(jan_net#161), partial_sum(feb_net#162), partial_sum(mar_net#163), partial_sum(apr_net#164), partial_sum(may_net#165), partial_sum(jun_net#166), partial_sum(jul_net#167), partial_sum(aug_net#168), partial_sum(sep_net#169), partial_sum(oct_net#170), partial_sum(nov_net#171), partial_sum(dec_net#172)]
 Aggregate Attributes [72]: [sum#339, isEmpty#340, sum#341, isEmpty#342, sum#343, isEmpty#344, sum#345, isEmpty#346, sum#347, isEmpty#348, sum#349, isEmpty#350, sum#351, isEmpty#352, sum#353, isEmpty#354, sum#355, isEmpty#356, sum#357, isEmpty#358, sum#359, isEmpty#360, sum#361, isEmpty#362, sum#363, isEmpty#364, sum#365, isEmpty#366, sum#367, isEmpty#368, sum#369, isEmpty#370, sum#371, isEmpty#372, sum#373, isEmpty#374, sum#375, isEmpty#376, sum#377, isEmpty#378, sum#379, isEmpty#380, sum#381, isEmpty#382, sum#383, isEmpty#384, sum#385, isEmpty#386, sum#387, isEmpty#388, sum#389, isEmpty#390, sum#391, isEmpty#392, sum#393, isEmpty#394, sum#395, isEmpty#396, sum#397, isEmpty#398, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410]
 Results [80]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476, sum#477, isEmpty#478, sum#479, isEmpty#480, sum#481, isEmpty#482]
 
@@ -284,9 +284,9 @@ Arguments: hashpartitioning(w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12
 (51) HashAggregate [codegen id : 14]
 Input [80]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476, sum#477, isEmpty#478, sum#479, isEmpty#480, sum#481, isEmpty#482]
 Keys [8]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148]
-Functions [36]: [sum(jan_sales#149), sum(feb_sales#150), sum(mar_sales#151), sum(apr_sales#152), sum(may_sales#153), sum(jun_sales#154), sum(jul_sales#155), sum(aug_sales#156), sum(sep_sales#157), sum(oct_sales#158), sum(nov_sales#159), sum(dec_sales#160), sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true)), sum(jan_net#161), sum(feb_net#162), sum(mar_net#163), sum(apr_net#164), sum(may_net#165), sum(jun_net#166), sum(jul_net#167), sum(aug_net#168), sum(sep_net#169), sum(oct_net#170), sum(nov_net#171), sum(dec_net#172)]
-Aggregate Attributes [36]: [sum(jan_sales#149)#484, sum(feb_sales#150)#485, sum(mar_sales#151)#486, sum(apr_sales#152)#487, sum(may_sales#153)#488, sum(jun_sales#154)#489, sum(jul_sales#155)#490, sum(aug_sales#156)#491, sum(sep_sales#157)#492, sum(oct_sales#158)#493, sum(nov_sales#159)#494, sum(dec_sales#160)#495, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#496, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#497, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#498, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#499, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#500, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#501, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#502, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#503, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#504, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#505, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#506, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#507, sum(jan_net#161)#508, sum(feb_net#162)#509, sum(mar_net#163)#510, sum(apr_net#164)#511, sum(may_net#165)#512, sum(jun_net#166)#513, sum(jul_net#167)#514, sum(aug_net#168)#515, sum(sep_net#169)#516, sum(oct_net#170)#517, sum(nov_net#171)#518, sum(dec_net#172)#519]
-Results [44]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, sum(jan_sales#149)#484 AS jan_sales#520, sum(feb_sales#150)#485 AS feb_sales#521, sum(mar_sales#151)#486 AS mar_sales#522, sum(apr_sales#152)#487 AS apr_sales#523, sum(may_sales#153)#488 AS may_sales#524, sum(jun_sales#154)#489 AS jun_sales#525, sum(jul_sales#155)#490 AS jul_sales#526, sum(aug_sales#156)#491 AS aug_sales#527, sum(sep_sales#157)#492 AS sep_sales#528, sum(oct_sales#158)#493 AS oct_sales#529, sum(nov_sales#159)#494 AS nov_sales#530, sum(dec_sales#160)#495 AS dec_sales#531, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#496 AS jan_sales_per_sq_foot#532, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#497 AS feb_sales_per_sq_foot#533, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#498 AS mar_sales_per_sq_foot#534, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#499 AS apr_sales_per_sq_foot#535, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#500 AS may_sales_per_sq_foot#536, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#501 AS jun_sales_per_sq_foot#537, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#502 AS jul_sales_per_sq_foot#538, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#503 AS aug_sales_per_sq_foot#539, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#504 AS sep_sales_per_sq_foot#540, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#505 AS oct_sales_per_sq_foot#541, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#506 AS nov_sales_per_sq_foot#542, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12), true))#507 AS dec_sales_per_sq_foot#543, sum(jan_net#161)#508 AS jan_net#544, sum(feb_net#162)#509 AS feb_net#545, sum(mar_net#163)#510 AS mar_net#546, sum(apr_net#164)#511 AS apr_net#547, sum(may_net#165)#512 AS may_net#548, sum(jun_net#166)#513 AS jun_net#549, sum(jul_net#167)#514 AS jul_net#550, sum(aug_net#168)#515 AS aug_net#551, sum(sep_net#169)#516 AS sep_net#552, sum(oct_net#170)#517 AS oct_net#553, sum(nov_net#171)#518 AS nov_net#554, sum(dec_net#172)#519 AS dec_net#555]
+Functions [36]: [sum(jan_sales#149), sum(feb_sales#150), sum(mar_sales#151), sum(apr_sales#152), sum(may_sales#153), sum(jun_sales#154), sum(jul_sales#155), sum(aug_sales#156), sum(sep_sales#157), sum(oct_sales#158), sum(nov_sales#159), sum(dec_sales#160), sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(jan_net#161), sum(feb_net#162), sum(mar_net#163), sum(apr_net#164), sum(may_net#165), sum(jun_net#166), sum(jul_net#167), sum(aug_net#168), sum(sep_net#169), sum(oct_net#170), sum(nov_net#171), sum(dec_net#172)]
+Aggregate Attributes [36]: [sum(jan_sales#149)#484, sum(feb_sales#150)#485, sum(mar_sales#151)#486, sum(apr_sales#152)#487, sum(may_sales#153)#488, sum(jun_sales#154)#489, sum(jul_sales#155)#490, sum(aug_sales#156)#491, sum(sep_sales#157)#492, sum(oct_sales#158)#493, sum(nov_sales#159)#494, sum(dec_sales#160)#495, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#496, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#497, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#498, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#499, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#500, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#501, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#502, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#503, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#504, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#505, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#506, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#507, sum(jan_net#161)#508, sum(feb_net#162)#509, sum(mar_net#163)#510, sum(apr_net#164)#511, sum(may_net#165)#512, sum(jun_net#166)#513, sum(jul_net#167)#514, sum(aug_net#168)#515, sum(sep_net#169)#516, sum(oct_net#170)#517, sum(nov_net#171)#518, sum(dec_net#172)#519]
+Results [44]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, sum(jan_sales#149)#484 AS jan_sales#520, sum(feb_sales#150)#485 AS feb_sales#521, sum(mar_sales#151)#486 AS mar_sales#522, sum(apr_sales#152)#487 AS apr_sales#523, sum(may_sales#153)#488 AS may_sales#524, sum(jun_sales#154)#489 AS jun_sales#525, sum(jul_sales#155)#490 AS jul_sales#526, sum(aug_sales#156)#491 AS aug_sales#527, sum(sep_sales#157)#492 AS sep_sales#528, sum(oct_sales#158)#493 AS oct_sales#529, sum(nov_sales#159)#494 AS nov_sales#530, sum(dec_sales#160)#495 AS dec_sales#531, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#496 AS jan_sales_per_sq_foot#532, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#497 AS feb_sales_per_sq_foot#533, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#498 AS mar_sales_per_sq_foot#534, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#499 AS apr_sales_per_sq_foot#535, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#500 AS may_sales_per_sq_foot#536, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#501 AS jun_sales_per_sq_foot#537, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#502 AS jul_sales_per_sq_foot#538, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#503 AS aug_sales_per_sq_foot#539, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#504 AS sep_sales_per_sq_foot#540, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#505 AS oct_sales_per_sq_foot#541, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#506 AS nov_sales_per_sq_foot#542, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#507 AS dec_sales_per_sq_foot#543, sum(jan_net#161)#508 AS jan_net#544, sum(feb_net#162)#509 AS feb_net#545, sum(mar_net#163)#510 AS mar_net#546, sum(apr_net#164)#511 AS apr_net#547, sum(may_net#165)#512 AS may_net#548, sum(jun_net#166)#513 AS jun_net#549, sum(jul_net#167)#514 AS jul_net#550, sum(aug_net#168)#515 AS aug_net#551, sum(sep_net#169)#516 AS sep_net#552, sum(oct_net#170)#517 AS oct_net#553, sum(nov_net#171)#518 AS nov_net#554, sum(dec_net#172)#519 AS dec_net#555]
 
 (52) TakeOrderedAndProject
 Input [44]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, jan_sales#520, feb_sales#521, mar_sales#522, apr_sales#523, may_sales#524, jun_sales#525, jul_sales#526, aug_sales#527, sep_sales#528, oct_sales#529, nov_sales#530, dec_sales#531, jan_sales_per_sq_foot#532, feb_sales_per_sq_foot#533, mar_sales_per_sq_foot#534, apr_sales_per_sq_foot#535, may_sales_per_sq_foot#536, jun_sales_per_sq_foot#537, jul_sales_per_sq_foot#538, aug_sales_per_sq_foot#539, sep_sales_per_sq_foot#540, oct_sales_per_sq_foot#541, nov_sales_per_sq_foot#542, dec_sales_per_sq_foot#543, jan_net#544, feb_net#545, mar_net#546, apr_net#547, may_net#548, jun_net#549, jul_net#550, aug_net#551, sep_net#552, oct_net#553, nov_net#554, dec_net#555]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt
index addcddea15cb2..17037cfe02c2a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net]
   WholeStageCodegen (14)
-    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(jan_sales),sum(feb_sales),sum(mar_sales),sum(apr_sales),sum(may_sales),sum(jun_sales),sum(jul_sales),sum(aug_sales),sum(sep_sales),sum(oct_sales),sum(nov_sales),sum(dec_sales),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12), true)),sum(jan_net),sum(feb_net),sum(mar_net),sum(apr_net),sum(may_net),sum(jun_net),sum(jul_net),sum(aug_net),sum(sep_net),sum(oct_net),sum(nov_net),sum(dec_net),jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(jan_sales),sum(feb_sales),sum(mar_sales),sum(apr_sales),sum(may_sales),sum(jun_sales),sum(jul_sales),sum(aug_sales),sum(sep_sales),sum(oct_sales),sum(nov_sales),sum(dec_sales),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(jan_net),sum(feb_net),sum(mar_net),sum(apr_net),sum(may_net),sum(jun_net),sum(jul_net),sum(aug_net),sum(sep_net),sum(oct_net),sum(nov_net),sum(dec_net),jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
       InputAdapter
         Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year] #1
           WholeStageCodegen (13)
@@ -8,7 +8,7 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
               InputAdapter
                 Union
                   WholeStageCodegen (6)
-                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
                         Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #2
                           WholeStageCodegen (5)
@@ -58,7 +58,7 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
                                               InputAdapter
                                                 Scan parquet default.ship_mode [sm_ship_mode_sk,sm_carrier]
                   WholeStageCodegen (12)
-                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
                         Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #7
                           WholeStageCodegen (11)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/explain.txt
index f8e489f4901a9..5a6c73dbe6a98 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/explain.txt
@@ -131,7 +131,7 @@ Arguments: [[ss_quantity#3, ss_sales_price#4, i_category#18, i_class#17, i_brand
 (23) HashAggregate [codegen id : 7]
 Input [11]: [ss_quantity#3, ss_sales_price#4, i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29]
 Keys [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29]
-Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
 Aggregate Attributes [2]: [sum#30, isEmpty#31]
 Results [11]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, sum#32, isEmpty#33]
 
@@ -142,9 +142,9 @@ Arguments: hashpartitioning(i_category#21, i_class#22, i_brand#23, i_product_nam
 (25) HashAggregate [codegen id : 8]
 Input [11]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, sum#32, isEmpty#33]
 Keys [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#35]
-Results [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#35 AS sumsales#36]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#35]
+Results [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#35 AS sumsales#36]
 
 (26) Exchange
 Input [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/simplified.txt
index 524be972e6332..55953a73ff11d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/simplified.txt
@@ -8,7 +8,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
               InputAdapter
                 Exchange [i_category] #1
                   WholeStageCodegen (8)
-                    HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                    HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                       InputAdapter
                         Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id] #2
                           WholeStageCodegen (7)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt
index a8976d85cddd4..53f71a188fcb5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt
@@ -116,7 +116,7 @@ Arguments: [[ss_quantity#3, ss_sales_price#4, i_category#17, i_class#16, i_brand
 (20) HashAggregate [codegen id : 4]
 Input [11]: [ss_quantity#3, ss_sales_price#4, i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, spark_grouping_id#28]
 Keys [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, spark_grouping_id#28]
-Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
 Aggregate Attributes [2]: [sum#29, isEmpty#30]
 Results [11]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, spark_grouping_id#28, sum#31, isEmpty#32]
 
@@ -127,9 +127,9 @@ Arguments: hashpartitioning(i_category#20, i_class#21, i_brand#22, i_product_nam
 (22) HashAggregate [codegen id : 5]
 Input [11]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, spark_grouping_id#28, sum#31, isEmpty#32]
 Keys [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, spark_grouping_id#28]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#34]
-Results [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#34 AS sumsales#35]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#34]
+Results [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#34 AS sumsales#35]
 
 (23) Exchange
 Input [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, sumsales#35]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt
index b45adcfc883a9..3cb879f7019b5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt
@@ -8,7 +8,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
               InputAdapter
                 Exchange [i_category] #1
                   WholeStageCodegen (5)
-                    HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                    HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                       InputAdapter
                         Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id] #2
                           WholeStageCodegen (4)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/explain.txt
index a00880bad3116..04a0ca4cd3027 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/explain.txt
@@ -225,7 +225,7 @@ Right keys [1]: [s_store_sk#23]
 Join condition: None
 
 (30) Project [codegen id : 8]
-Output [5]: [sales#16, coalesce(returns#31, 0.00) AS returns#34, CheckOverflow((promote_precision(cast(profit#17 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#32, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#35, store channel AS channel#36, s_store_sk#7 AS id#37]
+Output [5]: [sales#16, coalesce(returns#31, 0.00) AS returns#34, CheckOverflow((promote_precision(cast(profit#17 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#32, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#35, store channel AS channel#36, s_store_sk#7 AS id#37]
 Input [6]: [s_store_sk#7, sales#16, profit#17, s_store_sk#23, returns#31, profit_loss#32]
 
 (31) Scan parquet default.catalog_sales
@@ -316,7 +316,7 @@ Arguments: IdentityBroadcastMode, [id=#65]
 Join condition: None
 
 (49) Project [codegen id : 14]
-Output [5]: [sales#50, returns#63, CheckOverflow((promote_precision(cast(profit#51 as decimal(18,2))) - promote_precision(cast(profit_loss#64 as decimal(18,2)))), DecimalType(18,2), true) AS profit#66, catalog channel AS channel#67, cs_call_center_sk#38 AS id#68]
+Output [5]: [sales#50, returns#63, CheckOverflow((promote_precision(cast(profit#51 as decimal(18,2))) - promote_precision(cast(profit_loss#64 as decimal(18,2)))), DecimalType(18,2)) AS profit#66, catalog channel AS channel#67, cs_call_center_sk#38 AS id#68]
 Input [5]: [cs_call_center_sk#38, sales#50, profit#51, returns#63, profit_loss#64]
 
 (50) Scan parquet default.web_sales
@@ -458,7 +458,7 @@ Right keys [1]: [wp_web_page_sk#90]
 Join condition: None
 
 (79) Project [codegen id : 22]
-Output [5]: [sales#83, coalesce(returns#98, 0.00) AS returns#101, CheckOverflow((promote_precision(cast(profit#84 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#99, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#102, web channel AS channel#103, wp_web_page_sk#74 AS id#104]
+Output [5]: [sales#83, coalesce(returns#98, 0.00) AS returns#101, CheckOverflow((promote_precision(cast(profit#84 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#99, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#102, web channel AS channel#103, wp_web_page_sk#74 AS id#104]
 Input [6]: [wp_web_page_sk#74, sales#83, profit#84, wp_web_page_sk#90, returns#98, profit_loss#99]
 
 (80) Union
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt
index 0d7bfa462ef4c..c3cd748f43775 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt
@@ -225,7 +225,7 @@ Right keys [1]: [s_store_sk#23]
 Join condition: None
 
 (30) Project [codegen id : 8]
-Output [5]: [sales#16, coalesce(returns#31, 0.00) AS returns#34, CheckOverflow((promote_precision(cast(profit#17 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#32, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#35, store channel AS channel#36, s_store_sk#7 AS id#37]
+Output [5]: [sales#16, coalesce(returns#31, 0.00) AS returns#34, CheckOverflow((promote_precision(cast(profit#17 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#32, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#35, store channel AS channel#36, s_store_sk#7 AS id#37]
 Input [6]: [s_store_sk#7, sales#16, profit#17, s_store_sk#23, returns#31, profit_loss#32]
 
 (31) Scan parquet default.catalog_sales
@@ -316,7 +316,7 @@ Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#53))#62,17,2) AS re
 Join condition: None
 
 (49) Project [codegen id : 14]
-Output [5]: [sales#50, returns#64, CheckOverflow((promote_precision(cast(profit#51 as decimal(18,2))) - promote_precision(cast(profit_loss#65 as decimal(18,2)))), DecimalType(18,2), true) AS profit#66, catalog channel AS channel#67, cs_call_center_sk#38 AS id#68]
+Output [5]: [sales#50, returns#64, CheckOverflow((promote_precision(cast(profit#51 as decimal(18,2))) - promote_precision(cast(profit_loss#65 as decimal(18,2)))), DecimalType(18,2)) AS profit#66, catalog channel AS channel#67, cs_call_center_sk#38 AS id#68]
 Input [5]: [cs_call_center_sk#38, sales#50, profit#51, returns#64, profit_loss#65]
 
 (50) Scan parquet default.web_sales
@@ -458,7 +458,7 @@ Right keys [1]: [wp_web_page_sk#90]
 Join condition: None
 
 (79) Project [codegen id : 22]
-Output [5]: [sales#83, coalesce(returns#98, 0.00) AS returns#101, CheckOverflow((promote_precision(cast(profit#84 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#99, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#102, web channel AS channel#103, wp_web_page_sk#74 AS id#104]
+Output [5]: [sales#83, coalesce(returns#98, 0.00) AS returns#101, CheckOverflow((promote_precision(cast(profit#84 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#99, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#102, web channel AS channel#103, wp_web_page_sk#74 AS id#104]
 Input [6]: [wp_web_page_sk#74, sales#83, profit#84, wp_web_page_sk#90, returns#98, profit_loss#99]
 
 (80) Union
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt
index cfbaa2e8b48d2..9cc78e12028ff 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt
@@ -270,7 +270,7 @@ Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#
 (37) HashAggregate [codegen id : 9]
 Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#24]
 Keys [1]: [s_store_id#24]
-Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
+Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
 Aggregate Attributes [5]: [sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30]
 Results [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35]
 
@@ -281,9 +281,9 @@ Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, [id=#36]
 (39) HashAggregate [codegen id : 10]
 Input [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35]
 Keys [1]: [s_store_id#24]
-Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
-Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#37, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#39]
-Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#37,17,2) AS sales#40, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38 AS returns#41, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#39 AS profit#42, store channel AS channel#43, concat(store, s_store_id#24) AS id#44]
+Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#37, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#39]
+Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#37,17,2) AS sales#40, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38 AS returns#41, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#39 AS profit#42, store channel AS channel#43, concat(store, s_store_id#24) AS id#44]
 
 (40) Scan parquet default.catalog_sales
 Output [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51]
@@ -409,7 +409,7 @@ Input [7]: [cs_catalog_page_sk#45, cs_ext_sales_price#49, cs_net_profit#50, cr_r
 (68) HashAggregate [codegen id : 19]
 Input [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#63]
 Keys [1]: [cp_catalog_page_id#63]
-Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#49)), partial_sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
+Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#49)), partial_sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
 Aggregate Attributes [5]: [sum#65, sum#66, isEmpty#67, sum#68, isEmpty#69]
 Results [6]: [cp_catalog_page_id#63, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74]
 
@@ -420,9 +420,9 @@ Arguments: hashpartitioning(cp_catalog_page_id#63, 5), ENSURE_REQUIREMENTS, [id=
 (70) HashAggregate [codegen id : 20]
 Input [6]: [cp_catalog_page_id#63, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74]
 Keys [1]: [cp_catalog_page_id#63]
-Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#49)), sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
-Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#49))#76, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#78]
-Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#49))#76,17,2) AS sales#79, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77 AS returns#80, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#78 AS profit#81, catalog channel AS channel#82, concat(catalog_page, cp_catalog_page_id#63) AS id#83]
+Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#49)), sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#49))#76, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#78]
+Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#49))#76,17,2) AS sales#79, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77 AS returns#80, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#78 AS profit#81, catalog channel AS channel#82, concat(catalog_page, cp_catalog_page_id#63) AS id#83]
 
 (71) Scan parquet default.web_sales
 Output [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90]
@@ -548,7 +548,7 @@ Input [7]: [ws_web_site_sk#85, ws_ext_sales_price#88, ws_net_profit#89, wr_retur
 (99) HashAggregate [codegen id : 29]
 Input [5]: [ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#102]
 Keys [1]: [web_site_id#102]
-Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#88)), partial_sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
+Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#88)), partial_sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
 Aggregate Attributes [5]: [sum#104, sum#105, isEmpty#106, sum#107, isEmpty#108]
 Results [6]: [web_site_id#102, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113]
 
@@ -559,9 +559,9 @@ Arguments: hashpartitioning(web_site_id#102, 5), ENSURE_REQUIREMENTS, [id=#114]
 (101) HashAggregate [codegen id : 30]
 Input [6]: [web_site_id#102, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113]
 Keys [1]: [web_site_id#102]
-Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#88)), sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
-Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#88))#115, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#117]
-Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#88))#115,17,2) AS sales#118, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116 AS returns#119, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#117 AS profit#120, web channel AS channel#121, concat(web_site, web_site_id#102) AS id#122]
+Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#88)), sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#88))#115, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#117]
+Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#88))#115,17,2) AS sales#118, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116 AS returns#119, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#117 AS profit#120, web channel AS channel#121, concat(web_site, web_site_id#102) AS id#122]
 
 (102) Union
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt
index b742daa007454..7de3dd817429d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt
@@ -9,7 +9,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                 InputAdapter
                   Union
                     WholeStageCodegen (10)
-                      HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
+                      HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
                         InputAdapter
                           Exchange [s_store_id] #2
                             WholeStageCodegen (9)
@@ -79,7 +79,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                               InputAdapter
                                                 Scan parquet default.store [s_store_sk,s_store_id]
                     WholeStageCodegen (20)
-                      HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
+                      HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
                         InputAdapter
                           Exchange [cp_catalog_page_id] #9
                             WholeStageCodegen (19)
@@ -130,7 +130,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                               InputAdapter
                                                 Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
                     WholeStageCodegen (30)
-                      HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
+                      HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
                         InputAdapter
                           Exchange [web_site_id] #13
                             WholeStageCodegen (29)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt
index c18e9a125335e..20cf55dba4482 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt
@@ -270,7 +270,7 @@ Input [7]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#
 (37) HashAggregate [codegen id : 9]
 Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#18]
 Keys [1]: [s_store_id#18]
-Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
+Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
 Aggregate Attributes [5]: [sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30]
 Results [6]: [s_store_id#18, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35]
 
@@ -281,9 +281,9 @@ Arguments: hashpartitioning(s_store_id#18, 5), ENSURE_REQUIREMENTS, [id=#36]
 (39) HashAggregate [codegen id : 10]
 Input [6]: [s_store_id#18, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35]
 Keys [1]: [s_store_id#18]
-Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
-Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#37, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#39]
-Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#37,17,2) AS sales#40, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38 AS returns#41, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#39 AS profit#42, store channel AS channel#43, concat(store, s_store_id#18) AS id#44]
+Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#37, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#39]
+Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#37,17,2) AS sales#40, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38 AS returns#41, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#39 AS profit#42, store channel AS channel#43, concat(store, s_store_id#18) AS id#44]
 
 (40) Scan parquet default.catalog_sales
 Output [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51]
@@ -409,7 +409,7 @@ Input [7]: [cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_a
 (68) HashAggregate [codegen id : 19]
 Input [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#61]
 Keys [1]: [cp_catalog_page_id#61]
-Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#49)), partial_sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
+Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#49)), partial_sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
 Aggregate Attributes [5]: [sum#65, sum#66, isEmpty#67, sum#68, isEmpty#69]
 Results [6]: [cp_catalog_page_id#61, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74]
 
@@ -420,9 +420,9 @@ Arguments: hashpartitioning(cp_catalog_page_id#61, 5), ENSURE_REQUIREMENTS, [id=
 (70) HashAggregate [codegen id : 20]
 Input [6]: [cp_catalog_page_id#61, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74]
 Keys [1]: [cp_catalog_page_id#61]
-Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#49)), sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
-Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#49))#76, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#78]
-Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#49))#76,17,2) AS sales#79, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77 AS returns#80, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#78 AS profit#81, catalog channel AS channel#82, concat(catalog_page, cp_catalog_page_id#61) AS id#83]
+Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#49)), sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#49))#76, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#78]
+Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#49))#76,17,2) AS sales#79, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77 AS returns#80, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#78 AS profit#81, catalog channel AS channel#82, concat(catalog_page, cp_catalog_page_id#61) AS id#83]
 
 (71) Scan parquet default.web_sales
 Output [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90]
@@ -548,7 +548,7 @@ Input [7]: [ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, wr_return_a
 (99) HashAggregate [codegen id : 29]
 Input [5]: [ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#100]
 Keys [1]: [web_site_id#100]
-Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#88)), partial_sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
+Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#88)), partial_sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
 Aggregate Attributes [5]: [sum#104, sum#105, isEmpty#106, sum#107, isEmpty#108]
 Results [6]: [web_site_id#100, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113]
 
@@ -559,9 +559,9 @@ Arguments: hashpartitioning(web_site_id#100, 5), ENSURE_REQUIREMENTS, [id=#114]
 (101) HashAggregate [codegen id : 30]
 Input [6]: [web_site_id#100, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113]
 Keys [1]: [web_site_id#100]
-Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#88)), sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
-Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#88))#115, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#117]
-Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#88))#115,17,2) AS sales#118, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116 AS returns#119, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#117 AS profit#120, web channel AS channel#121, concat(web_site, web_site_id#100) AS id#122]
+Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#88)), sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#88))#115, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#117]
+Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#88))#115,17,2) AS sales#118, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116 AS returns#119, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#117 AS profit#120, web channel AS channel#121, concat(web_site, web_site_id#100) AS id#122]
 
 (102) Union
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt
index b8122c8270984..a6fd641bc2434 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt
@@ -9,7 +9,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                 InputAdapter
                   Union
                     WholeStageCodegen (10)
-                      HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
+                      HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
                         InputAdapter
                           Exchange [s_store_id] #2
                             WholeStageCodegen (9)
@@ -79,7 +79,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                 InputAdapter
                                                   Scan parquet default.promotion [p_promo_sk,p_channel_tv]
                     WholeStageCodegen (20)
-                      HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
+                      HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
                         InputAdapter
                           Exchange [cp_catalog_page_id] #9
                             WholeStageCodegen (19)
@@ -130,7 +130,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                     InputAdapter
                                       ReusedExchange [p_promo_sk] #8
                     WholeStageCodegen (30)
-                      HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
+                      HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
                         InputAdapter
                           Exchange [web_site_id] #13
                             WholeStageCodegen (29)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt
index 83d227688cf61..288df2457edf2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt
@@ -297,7 +297,7 @@ Input [3]: [ctr_state#36, sum#45, count#46]
 Keys [1]: [ctr_state#36]
 Functions [1]: [avg(ctr_total_return#37)]
 Aggregate Attributes [1]: [avg(ctr_total_return#37)#48]
-Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#37)#48) * 1.200000), DecimalType(24,7), true) AS (avg(ctr_total_return) * 1.2)#49, ctr_state#36 AS ctr_state#36#50]
+Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#37)#48) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#49, ctr_state#36 AS ctr_state#36#50]
 
 (53) Filter [codegen id : 19]
 Input [2]: [(avg(ctr_total_return) * 1.2)#49, ctr_state#36#50]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt
index 260224e41b7f7..91bd90224827a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt
@@ -198,7 +198,7 @@ Input [3]: [ctr_state#15, sum#22, count#23]
 Keys [1]: [ctr_state#15]
 Functions [1]: [avg(ctr_total_return#16)]
 Aggregate Attributes [1]: [avg(ctr_total_return#16)#25]
-Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#16)#25) * 1.200000), DecimalType(24,7), true) AS (avg(ctr_total_return) * 1.2)#26, ctr_state#15 AS ctr_state#15#27]
+Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#16)#25) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#26, ctr_state#15 AS ctr_state#15#27]
 
 (32) Filter [codegen id : 8]
 Input [2]: [(avg(ctr_total_return) * 1.2)#26, ctr_state#15#27]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/explain.txt
index 1fd4febb4e266..3374a3dc3daae 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/explain.txt
@@ -256,7 +256,7 @@ Right keys [1]: [item_id#38]
 Join condition: None
 
 (45) Project [codegen id : 18]
-Output [8]: [item_id#13, sr_item_qty#14, (((cast(sr_item_qty#14 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#26, (((cast(cr_item_qty#26 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as decimal(21,1))) / 3.0), DecimalType(27,6), true) AS average#44]
+Output [8]: [item_id#13, sr_item_qty#14, (((cast(sr_item_qty#14 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#26, (((cast(cr_item_qty#26 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#44]
 Input [5]: [item_id#13, sr_item_qty#14, cr_item_qty#26, item_id#38, wr_item_qty#39]
 
 (46) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt
index b78773ee48f48..106d5dd3090e3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt
@@ -256,7 +256,7 @@ Right keys [1]: [item_id#38]
 Join condition: None
 
 (45) Project [codegen id : 18]
-Output [8]: [item_id#13, sr_item_qty#14, (((cast(sr_item_qty#14 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#26, (((cast(cr_item_qty#26 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as decimal(21,1))) / 3.0), DecimalType(27,6), true) AS average#44]
+Output [8]: [item_id#13, sr_item_qty#14, (((cast(sr_item_qty#14 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#26, (((cast(cr_item_qty#26 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#44]
 Input [5]: [item_id#13, sr_item_qty#14, cr_item_qty#26, item_id#38, wr_item_qty#39]
 
 (46) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/explain.txt
index 9c798856baa66..6325bd574530a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/explain.txt
@@ -141,7 +141,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_category#4, i_brand#2, s_store_na
 
 (25) Filter [codegen id : 7]
 Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, _w0#22, avg_monthly_sales#24]
-Condition : (isnotnull(avg_monthly_sales#24) AND (NOT (avg_monthly_sales#24 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000)))
+Condition : (isnotnull(avg_monthly_sales#24) AND (NOT (avg_monthly_sales#24 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)))
 
 (26) Project [codegen id : 7]
 Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24]
@@ -149,7 +149,7 @@ Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#
 
 (27) TakeOrderedAndProject
 Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24]
+Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24]
 
 ===== Subqueries =====
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt
index 4c6124960bb0d..770ab84503645 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt
@@ -141,7 +141,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_category#4, i_brand#2, s_store_na
 
 (25) Filter [codegen id : 7]
 Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, _w0#22, avg_monthly_sales#24]
-Condition : (isnotnull(avg_monthly_sales#24) AND (NOT (avg_monthly_sales#24 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000)))
+Condition : (isnotnull(avg_monthly_sales#24) AND (NOT (avg_monthly_sales#24 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)))
 
 (26) Project [codegen id : 7]
 Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24]
@@ -149,7 +149,7 @@ Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#
 
 (27) TakeOrderedAndProject
 Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24]
+Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24]
 
 ===== Subqueries =====
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt
index 39b6534100574..095c3d531a509 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt
@@ -280,6 +280,6 @@ Arguments: IdentityBroadcastMode, [id=#33]
 Join condition: None
 
 (51) Project [codegen id : 10]
-Output [1]: [CheckOverflow((promote_precision(cast(amc#18 as decimal(15,4))) / promote_precision(cast(pmc#32 as decimal(15,4)))), DecimalType(35,20), true) AS am_pm_ratio#34]
+Output [1]: [CheckOverflow((promote_precision(cast(amc#18 as decimal(15,4))) / promote_precision(cast(pmc#32 as decimal(15,4)))), DecimalType(35,20)) AS am_pm_ratio#34]
 Input [2]: [amc#18, pmc#32]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt
index 80ab6fd9d8a3f..e9884d694852d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt
@@ -280,6 +280,6 @@ Arguments: IdentityBroadcastMode, [id=#33]
 Join condition: None
 
 (51) Project [codegen id : 10]
-Output [1]: [CheckOverflow((promote_precision(cast(amc#18 as decimal(15,4))) / promote_precision(cast(pmc#32 as decimal(15,4)))), DecimalType(35,20), true) AS am_pm_ratio#34]
+Output [1]: [CheckOverflow((promote_precision(cast(amc#18 as decimal(15,4))) / promote_precision(cast(pmc#32 as decimal(15,4)))), DecimalType(35,20)) AS am_pm_ratio#34]
 Input [2]: [amc#18, pmc#32]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt
index d13b0f1c9bb91..71aa2bb603946 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt
@@ -95,7 +95,7 @@ Input [3]: [ws_item_sk#4, sum#11, count#12]
 Keys [1]: [ws_item_sk#4]
 Functions [1]: [avg(UnscaledValue(ws_ext_discount_amt#5))]
 Aggregate Attributes [1]: [avg(UnscaledValue(ws_ext_discount_amt#5))#14]
-Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(ws_ext_discount_amt#5))#14 / 100.0) as decimal(11,6)))), DecimalType(14,7), true) AS (1.3 * avg(ws_ext_discount_amt))#15, ws_item_sk#4]
+Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(ws_ext_discount_amt#5))#14 / 100.0) as decimal(11,6)))), DecimalType(14,7)) AS (1.3 * avg(ws_ext_discount_amt))#15, ws_item_sk#4]
 
 (15) Filter
 Input [2]: [(1.3 * avg(ws_ext_discount_amt))#15, ws_item_sk#4]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt
index 72c206a372644..bec857eb2489a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt
@@ -119,7 +119,7 @@ Input [3]: [ws_item_sk#8, sum#14, count#15]
 Keys [1]: [ws_item_sk#8]
 Functions [1]: [avg(UnscaledValue(ws_ext_discount_amt#9))]
 Aggregate Attributes [1]: [avg(UnscaledValue(ws_ext_discount_amt#9))#17]
-Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(ws_ext_discount_amt#9))#17 / 100.0) as decimal(11,6)))), DecimalType(14,7), true) AS (1.3 * avg(ws_ext_discount_amt))#18, ws_item_sk#8]
+Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(ws_ext_discount_amt#9))#17 / 100.0) as decimal(11,6)))), DecimalType(14,7)) AS (1.3 * avg(ws_ext_discount_amt))#18, ws_item_sk#8]
 
 (20) Filter [codegen id : 4]
 Input [2]: [(1.3 * avg(ws_ext_discount_amt))#18, ws_item_sk#8]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/explain.txt
index 3ed4a02f3bc9e..3f6b5ffb48a67 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/explain.txt
@@ -109,7 +109,7 @@ Right keys [2]: [ss_item_sk#10, ss_ticket_number#12]
 Join condition: None
 
 (20) Project [codegen id : 6]
-Output [2]: [ss_customer_sk#11, CASE WHEN isnotnull(sr_return_quantity#4) THEN CheckOverflow((promote_precision(cast((ss_quantity#13 - sr_return_quantity#4) as decimal(12,2))) * promote_precision(cast(ss_sales_price#14 as decimal(12,2)))), DecimalType(18,2), true) ELSE CheckOverflow((promote_precision(cast(ss_quantity#13 as decimal(12,2))) * promote_precision(cast(ss_sales_price#14 as decimal(12,2)))), DecimalType(18,2), true) END AS act_sales#17]
+Output [2]: [ss_customer_sk#11, CASE WHEN isnotnull(sr_return_quantity#4) THEN CheckOverflow((promote_precision(cast((ss_quantity#13 - sr_return_quantity#4) as decimal(12,2))) * promote_precision(cast(ss_sales_price#14 as decimal(12,2)))), DecimalType(18,2)) ELSE CheckOverflow((promote_precision(cast(ss_quantity#13 as decimal(12,2))) * promote_precision(cast(ss_sales_price#14 as decimal(12,2)))), DecimalType(18,2)) END AS act_sales#17]
 Input [8]: [sr_item_sk#1, sr_ticket_number#3, sr_return_quantity#4, ss_item_sk#10, ss_customer_sk#11, ss_ticket_number#12, ss_quantity#13, ss_sales_price#14]
 
 (21) HashAggregate [codegen id : 6]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt
index 461172f33f132..11f69606ece91 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt
@@ -109,7 +109,7 @@ Right keys [1]: [r_reason_sk#14]
 Join condition: None
 
 (20) Project [codegen id : 6]
-Output [2]: [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#11) THEN CheckOverflow((promote_precision(cast((ss_quantity#4 - sr_return_quantity#11) as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2), true) ELSE CheckOverflow((promote_precision(cast(ss_quantity#4 as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2), true) END AS act_sales#17]
+Output [2]: [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#11) THEN CheckOverflow((promote_precision(cast((ss_quantity#4 - sr_return_quantity#11) as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2)) ELSE CheckOverflow((promote_precision(cast(ss_quantity#4 as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2)) END AS act_sales#17]
 Input [6]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#9, sr_return_quantity#11, r_reason_sk#14]
 
 (21) HashAggregate [codegen id : 6]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/explain.txt
index 310321f5cf372..b3528e4b6881b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/explain.txt
@@ -123,7 +123,7 @@ Input [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrev
 Arguments: [sum(_w1#20) windowspecdefinition(i_class#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#10]
 
 (22) Project [codegen id : 9]
-Output [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23, i_item_id#7]
+Output [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17)) AS revenueratio#23, i_item_id#7]
 Input [9]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, i_item_id#7, _we0#22]
 
 (23) Exchange
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt
index 95f856b398707..ec1192af4d398 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt
@@ -108,7 +108,7 @@ Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemreve
 Arguments: [sum(_w1#19) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#21], [i_class#9]
 
 (19) Project [codegen id : 6]
-Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17), true) AS revenueratio#22, i_item_id#6]
+Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17)) AS revenueratio#22, i_item_id#6]
 Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6, _we0#21]
 
 (20) Exchange
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/explain.txt
index 732f510b80d1b..7591e3bdb30c7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/explain.txt
@@ -149,7 +149,7 @@ Input [12]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_list_price#3, d_yea
 (16) HashAggregate [codegen id : 6]
 Input [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7]
 Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2), true)))]
+Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))]
 Aggregate Attributes [1]: [sum#18]
 Results [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#19]
 
@@ -160,9 +160,9 @@ Arguments: hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d
 (18) HashAggregate [codegen id : 7]
 Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#19]
 Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2), true)))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2), true)))#21]
-Results [2]: [c_customer_id#10 AS customer_id#22, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2), true)))#21,18,2) AS year_total#23]
+Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))]
+Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))#21]
+Results [2]: [c_customer_id#10 AS customer_id#22, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))#21,18,2) AS year_total#23]
 
 (19) Filter [codegen id : 7]
 Input [2]: [customer_id#22, year_total#23]
@@ -230,7 +230,7 @@ Input [12]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_list_price#27, d_
 (34) HashAggregate [codegen id : 14]
 Input [10]: [c_customer_id#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40, ss_ext_discount_amt#26, ss_ext_list_price#27, d_year#31]
 Keys [8]: [c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2), true)))]
+Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2))))]
 Aggregate Attributes [1]: [sum#41]
 Results [9]: [c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40, sum#42]
 
@@ -241,9 +241,9 @@ Arguments: hashpartitioning(c_customer_id#34, c_first_name#35, c_last_name#36, d
 (36) HashAggregate [codegen id : 15]
 Input [9]: [c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40, sum#42]
 Keys [8]: [c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2), true)))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2), true)))#21]
-Results [5]: [c_customer_id#34 AS customer_id#44, c_first_name#35 AS customer_first_name#45, c_last_name#36 AS customer_last_name#46, c_email_address#40 AS customer_email_address#47, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2), true)))#21,18,2) AS year_total#48]
+Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2))))]
+Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2))))#21]
+Results [5]: [c_customer_id#34 AS customer_id#44, c_first_name#35 AS customer_first_name#45, c_last_name#36 AS customer_last_name#46, c_email_address#40 AS customer_email_address#47, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2))))#21,18,2) AS year_total#48]
 
 (37) Exchange
 Input [5]: [customer_id#44, customer_first_name#45, customer_last_name#46, customer_email_address#47, year_total#48]
@@ -312,7 +312,7 @@ Input [12]: [ws_bill_customer_sk#50, ws_ext_discount_amt#51, ws_ext_list_price#5
 (52) HashAggregate [codegen id : 23]
 Input [10]: [c_customer_id#58, c_first_name#59, c_last_name#60, c_preferred_cust_flag#61, c_birth_country#62, c_login#63, c_email_address#64, ws_ext_discount_amt#51, ws_ext_list_price#52, d_year#55]
 Keys [8]: [c_customer_id#58, c_first_name#59, c_last_name#60, c_preferred_cust_flag#61, c_birth_country#62, c_login#63, c_email_address#64, d_year#55]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#52 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#51 as decimal(8,2)))), DecimalType(8,2), true)))]
+Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#52 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#51 as decimal(8,2)))), DecimalType(8,2))))]
 Aggregate Attributes [1]: [sum#65]
 Results [9]: [c_customer_id#58, c_first_name#59, c_last_name#60, c_preferred_cust_flag#61, c_birth_country#62, c_login#63, c_email_address#64, d_year#55, sum#66]
 
@@ -323,9 +323,9 @@ Arguments: hashpartitioning(c_customer_id#58, c_first_name#59, c_last_name#60, c
 (54) HashAggregate [codegen id : 24]
 Input [9]: [c_customer_id#58, c_first_name#59, c_last_name#60, c_preferred_cust_flag#61, c_birth_country#62, c_login#63, c_email_address#64, d_year#55, sum#66]
 Keys [8]: [c_customer_id#58, c_first_name#59, c_last_name#60, c_preferred_cust_flag#61, c_birth_country#62, c_login#63, c_email_address#64, d_year#55]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#52 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#51 as decimal(8,2)))), DecimalType(8,2), true)))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#52 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#51 as decimal(8,2)))), DecimalType(8,2), true)))#68]
-Results [2]: [c_customer_id#58 AS customer_id#69, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#52 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#51 as decimal(8,2)))), DecimalType(8,2), true)))#68,18,2) AS year_total#70]
+Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#52 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#51 as decimal(8,2)))), DecimalType(8,2))))]
+Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#52 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#51 as decimal(8,2)))), DecimalType(8,2))))#68]
+Results [2]: [c_customer_id#58 AS customer_id#69, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#52 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#51 as decimal(8,2)))), DecimalType(8,2))))#68,18,2) AS year_total#70]
 
 (55) Filter [codegen id : 24]
 Input [2]: [customer_id#69, year_total#70]
@@ -402,7 +402,7 @@ Input [12]: [ws_bill_customer_sk#72, ws_ext_discount_amt#73, ws_ext_list_price#7
 (72) HashAggregate [codegen id : 32]
 Input [10]: [c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86, ws_ext_discount_amt#73, ws_ext_list_price#74, d_year#77]
 Keys [8]: [c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86, d_year#77]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#74 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#73 as decimal(8,2)))), DecimalType(8,2), true)))]
+Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#74 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#73 as decimal(8,2)))), DecimalType(8,2))))]
 Aggregate Attributes [1]: [sum#87]
 Results [9]: [c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86, d_year#77, sum#88]
 
@@ -413,9 +413,9 @@ Arguments: hashpartitioning(c_customer_id#80, c_first_name#81, c_last_name#82, c
 (74) HashAggregate [codegen id : 33]
 Input [9]: [c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86, d_year#77, sum#88]
 Keys [8]: [c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86, d_year#77]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#74 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#73 as decimal(8,2)))), DecimalType(8,2), true)))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#74 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#73 as decimal(8,2)))), DecimalType(8,2), true)))#68]
-Results [2]: [c_customer_id#80 AS customer_id#90, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#74 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#73 as decimal(8,2)))), DecimalType(8,2), true)))#68,18,2) AS year_total#91]
+Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#74 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#73 as decimal(8,2)))), DecimalType(8,2))))]
+Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#74 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#73 as decimal(8,2)))), DecimalType(8,2))))#68]
+Results [2]: [c_customer_id#80 AS customer_id#90, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#74 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#73 as decimal(8,2)))), DecimalType(8,2))))#68,18,2) AS year_total#91]
 
 (75) Exchange
 Input [2]: [customer_id#90, year_total#91]
@@ -428,7 +428,7 @@ Arguments: [customer_id#90 ASC NULLS FIRST], false, 0
 (77) SortMergeJoin [codegen id : 35]
 Left keys [1]: [customer_id#22]
 Right keys [1]: [customer_id#90]
-Join condition: (CASE WHEN (year_total#70 > 0.00) THEN CheckOverflow((promote_precision(year_total#91) / promote_precision(year_total#70)), DecimalType(38,20), true) ELSE 0E-20 END > CASE WHEN (year_total#23 > 0.00) THEN CheckOverflow((promote_precision(year_total#48) / promote_precision(year_total#23)), DecimalType(38,20), true) ELSE 0E-20 END)
+Join condition: (CASE WHEN (year_total#70 > 0.00) THEN CheckOverflow((promote_precision(year_total#91) / promote_precision(year_total#70)), DecimalType(38,20)) ELSE 0E-20 END > CASE WHEN (year_total#23 > 0.00) THEN CheckOverflow((promote_precision(year_total#48) / promote_precision(year_total#23)), DecimalType(38,20)) ELSE 0E-20 END)
 
 (78) Project [codegen id : 35]
 Output [4]: [customer_id#44, customer_first_name#45, customer_last_name#46, customer_email_address#47]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/simplified.txt
index cc47c3516b497..a97e1ed828a9c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                               Exchange [customer_id] #1
                                 WholeStageCodegen (7)
                                   Filter [year_total]
-                                    HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,year_total,sum]
+                                    HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
                                       InputAdapter
                                         Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #2
                                           WholeStageCodegen (6)
@@ -60,7 +60,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                             InputAdapter
                               Exchange [customer_id] #6
                                 WholeStageCodegen (15)
-                                  HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,sum]
+                                  HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,sum]
                                     InputAdapter
                                       Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #7
                                         WholeStageCodegen (14)
@@ -100,7 +100,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                         Exchange [customer_id] #10
                           WholeStageCodegen (24)
                             Filter [year_total]
-                              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,year_total,sum]
+                              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
                                 InputAdapter
                                   Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #11
                                     WholeStageCodegen (23)
@@ -133,7 +133,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
               InputAdapter
                 Exchange [customer_id] #13
                   WholeStageCodegen (33)
-                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,year_total,sum]
+                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
                       InputAdapter
                         Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #14
                           WholeStageCodegen (32)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/explain.txt
index cb7fe2568123f..69d3f4ac97247 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/explain.txt
@@ -129,7 +129,7 @@ Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_fl
 (13) HashAggregate [codegen id : 3]
 Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#16]
 Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2), true)))]
+Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))]
 Aggregate Attributes [1]: [sum#17]
 Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18]
 
@@ -140,9 +140,9 @@ Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_ye
 (15) HashAggregate [codegen id : 16]
 Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18]
 Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2), true)))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2), true)))#20]
-Results [2]: [c_customer_id#2 AS customer_id#21, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2), true)))#20,18,2) AS year_total#22]
+Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))]
+Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))#20]
+Results [2]: [c_customer_id#2 AS customer_id#21, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))#20,18,2) AS year_total#22]
 
 (16) Filter [codegen id : 16]
 Input [2]: [customer_id#21, year_total#22]
@@ -205,7 +205,7 @@ Input [12]: [c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust
 (29) HashAggregate [codegen id : 6]
 Input [10]: [c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, ss_ext_discount_amt#32, ss_ext_list_price#33, d_year#38]
 Keys [8]: [c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2), true)))]
+Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2))))]
 Aggregate Attributes [1]: [sum#39]
 Results [9]: [c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, sum#40]
 
@@ -216,9 +216,9 @@ Arguments: hashpartitioning(c_customer_id#24, c_first_name#25, c_last_name#26, d
 (31) HashAggregate [codegen id : 7]
 Input [9]: [c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, sum#40]
 Keys [8]: [c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2), true)))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2), true)))#20]
-Results [5]: [c_customer_id#24 AS customer_id#42, c_first_name#25 AS customer_first_name#43, c_last_name#26 AS customer_last_name#44, c_email_address#30 AS customer_email_address#45, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2), true)))#20,18,2) AS year_total#46]
+Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2))))]
+Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2))))#20]
+Results [5]: [c_customer_id#24 AS customer_id#42, c_first_name#25 AS customer_first_name#43, c_last_name#26 AS customer_last_name#44, c_email_address#30 AS customer_email_address#45, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2))))#20,18,2) AS year_total#46]
 
 (32) BroadcastExchange
 Input [5]: [customer_id#42, customer_first_name#43, customer_last_name#44, customer_email_address#45, year_total#46]
@@ -286,7 +286,7 @@ Input [12]: [c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust
 (46) HashAggregate [codegen id : 10]
 Input [10]: [c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust_flag#52, c_birth_country#53, c_login#54, c_email_address#55, ws_ext_discount_amt#57, ws_ext_list_price#58, d_year#62]
 Keys [8]: [c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust_flag#52, c_birth_country#53, c_login#54, c_email_address#55, d_year#62]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#57 as decimal(8,2)))), DecimalType(8,2), true)))]
+Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#57 as decimal(8,2)))), DecimalType(8,2))))]
 Aggregate Attributes [1]: [sum#63]
 Results [9]: [c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust_flag#52, c_birth_country#53, c_login#54, c_email_address#55, d_year#62, sum#64]
 
@@ -297,9 +297,9 @@ Arguments: hashpartitioning(c_customer_id#49, c_first_name#50, c_last_name#51, c
 (48) HashAggregate [codegen id : 11]
 Input [9]: [c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust_flag#52, c_birth_country#53, c_login#54, c_email_address#55, d_year#62, sum#64]
 Keys [8]: [c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust_flag#52, c_birth_country#53, c_login#54, c_email_address#55, d_year#62]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#57 as decimal(8,2)))), DecimalType(8,2), true)))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#57 as decimal(8,2)))), DecimalType(8,2), true)))#66]
-Results [2]: [c_customer_id#49 AS customer_id#67, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#57 as decimal(8,2)))), DecimalType(8,2), true)))#66,18,2) AS year_total#68]
+Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#57 as decimal(8,2)))), DecimalType(8,2))))]
+Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#57 as decimal(8,2)))), DecimalType(8,2))))#66]
+Results [2]: [c_customer_id#49 AS customer_id#67, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#57 as decimal(8,2)))), DecimalType(8,2))))#66,18,2) AS year_total#68]
 
 (49) Filter [codegen id : 11]
 Input [2]: [customer_id#67, year_total#68]
@@ -375,7 +375,7 @@ Input [12]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust
 (65) HashAggregate [codegen id : 14]
 Input [10]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, ws_ext_discount_amt#79, ws_ext_list_price#80, d_year#84]
 Keys [8]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#84]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#80 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#79 as decimal(8,2)))), DecimalType(8,2), true)))]
+Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#80 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#79 as decimal(8,2)))), DecimalType(8,2))))]
 Aggregate Attributes [1]: [sum#85]
 Results [9]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#84, sum#86]
 
@@ -386,9 +386,9 @@ Arguments: hashpartitioning(c_customer_id#71, c_first_name#72, c_last_name#73, c
 (67) HashAggregate [codegen id : 15]
 Input [9]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#84, sum#86]
 Keys [8]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#84]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#80 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#79 as decimal(8,2)))), DecimalType(8,2), true)))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#80 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#79 as decimal(8,2)))), DecimalType(8,2), true)))#66]
-Results [2]: [c_customer_id#71 AS customer_id#88, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#80 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#79 as decimal(8,2)))), DecimalType(8,2), true)))#66,18,2) AS year_total#89]
+Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#80 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#79 as decimal(8,2)))), DecimalType(8,2))))]
+Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#80 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#79 as decimal(8,2)))), DecimalType(8,2))))#66]
+Results [2]: [c_customer_id#71 AS customer_id#88, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#80 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#79 as decimal(8,2)))), DecimalType(8,2))))#66,18,2) AS year_total#89]
 
 (68) BroadcastExchange
 Input [2]: [customer_id#88, year_total#89]
@@ -397,7 +397,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=
 (69) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [customer_id#21]
 Right keys [1]: [customer_id#88]
-Join condition: (CASE WHEN (year_total#68 > 0.00) THEN CheckOverflow((promote_precision(year_total#89) / promote_precision(year_total#68)), DecimalType(38,20), true) ELSE 0E-20 END > CASE WHEN (year_total#22 > 0.00) THEN CheckOverflow((promote_precision(year_total#46) / promote_precision(year_total#22)), DecimalType(38,20), true) ELSE 0E-20 END)
+Join condition: (CASE WHEN (year_total#68 > 0.00) THEN CheckOverflow((promote_precision(year_total#89) / promote_precision(year_total#68)), DecimalType(38,20)) ELSE 0E-20 END > CASE WHEN (year_total#22 > 0.00) THEN CheckOverflow((promote_precision(year_total#46) / promote_precision(year_total#22)), DecimalType(38,20)) ELSE 0E-20 END)
 
 (70) Project [codegen id : 16]
 Output [4]: [customer_id#42, customer_first_name#43, customer_last_name#44, customer_email_address#45]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/simplified.txt
index 5fc4dacd55273..91974a295b774 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/simplified.txt
@@ -6,7 +6,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
           BroadcastHashJoin [customer_id,customer_id]
             BroadcastHashJoin [customer_id,customer_id]
               Filter [year_total]
-                HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,year_total,sum]
+                HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
                   InputAdapter
                     Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #1
                       WholeStageCodegen (3)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
               InputAdapter
                 BroadcastExchange #4
                   WholeStageCodegen (7)
-                    HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,sum]
+                    HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,sum]
                       InputAdapter
                         Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #5
                           WholeStageCodegen (6)
@@ -71,7 +71,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
               BroadcastExchange #8
                 WholeStageCodegen (11)
                   Filter [year_total]
-                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,year_total,sum]
+                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
                       InputAdapter
                         Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #9
                           WholeStageCodegen (10)
@@ -97,7 +97,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
         InputAdapter
           BroadcastExchange #11
             WholeStageCodegen (15)
-              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,year_total,sum]
+              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
                 InputAdapter
                   Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #12
                     WholeStageCodegen (14)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/explain.txt
index 40a9cea61aecc..40793508f4786 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/explain.txt
@@ -121,7 +121,7 @@ Input [8]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_pri
 Arguments: [sum(_w1#20) windowspecdefinition(i_class#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#10]
 
 (22) Project [codegen id : 9]
-Output [7]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23]
+Output [7]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17)) AS revenueratio#23]
 Input [9]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, _we0#22]
 
 (23) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/explain.txt
index 479a27f8fee47..02f8baa5a0b81 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/explain.txt
@@ -106,7 +106,7 @@ Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_pric
 Arguments: [sum(_w1#19) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#21], [i_class#9]
 
 (19) Project [codegen id : 6]
-Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17), true) AS revenueratio#22]
+Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17)) AS revenueratio#22]
 Input [9]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, _we0#21]
 
 (20) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt
index e0c588294b920..ae613fa051425 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt
@@ -453,7 +453,7 @@ Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#48, i_brand_
 (78) HashAggregate [codegen id : 45]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#49, i_class_id#50, i_category_id#51]
 Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#54, isEmpty#55, count#56]
 Results [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
 
@@ -464,9 +464,9 @@ Arguments: hashpartitioning(i_brand_id#49, i_class_id#50, i_category_id#51, 5),
 (80) HashAggregate [codegen id : 92]
 Input [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
 Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62]
-Results [6]: [store AS channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#64, count(1)#62 AS number_sales#65]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#61, count(1)#62]
+Results [6]: [store AS channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#61 AS sales#64, count(1)#62 AS number_sales#65]
 
 (81) Filter [codegen id : 92]
 Input [6]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sales#64, number_sales#65]
@@ -534,7 +534,7 @@ Input [7]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70, i_item_sk#75, i_bra
 (96) HashAggregate [codegen id : 90]
 Input [5]: [ss_quantity#69, ss_list_price#70, i_brand_id#76, i_class_id#77, i_category_id#78]
 Keys [3]: [i_brand_id#76, i_class_id#77, i_category_id#78]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#79, isEmpty#80, count#81]
 Results [6]: [i_brand_id#76, i_class_id#77, i_category_id#78, sum#82, isEmpty#83, count#84]
 
@@ -545,9 +545,9 @@ Arguments: hashpartitioning(i_brand_id#76, i_class_id#77, i_category_id#78, 5),
 (98) HashAggregate [codegen id : 91]
 Input [6]: [i_brand_id#76, i_class_id#77, i_category_id#78, sum#82, isEmpty#83, count#84]
 Keys [3]: [i_brand_id#76, i_class_id#77, i_category_id#78]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#86, count(1)#87]
-Results [6]: [store AS channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#86 AS sales#89, count(1)#87 AS number_sales#90]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2)))#86, count(1)#87]
+Results [6]: [store AS channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2)))#86 AS sales#89, count(1)#87 AS number_sales#90]
 
 (99) Filter [codegen id : 91]
 Input [6]: [channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sales#89, number_sales#90]
@@ -661,7 +661,7 @@ Input [4]: [ws_quantity#104, ws_list_price#105, ws_sold_date_sk#106, d_date_sk#1
 (119) HashAggregate [codegen id : 7]
 Input [2]: [quantity#96, list_price#97]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#110, count#111]
 Results [2]: [sum#112, count#113]
 
@@ -672,9 +672,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#114]
 (121) HashAggregate [codegen id : 8]
 Input [2]: [sum#112, count#113]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))#115]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2), true))#115 AS average_sales#116]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2)))#115]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2)))#115 AS average_sales#116]
 
 Subquery:2 Hosting operator id = 103 Hosting Expression = ss_sold_date_sk#94 IN dynamicpruning#13
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt
index 7c193e479a013..e7d3f84db0c72 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt
@@ -4,7 +4,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
       Filter [sales]
         Subquery #4
           WholeStageCodegen (8)
-            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
+            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
               InputAdapter
                 Exchange #17
                   WholeStageCodegen (7)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #9
-        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
           InputAdapter
             Exchange [i_brand_id,i_class_id,i_category_id] #1
               WholeStageCodegen (45)
@@ -206,7 +206,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
           WholeStageCodegen (91)
             Filter [sales]
               ReusedSubquery [average_sales] #4
-              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
                 InputAdapter
                   Exchange [i_brand_id,i_class_id,i_category_id] #19
                     WholeStageCodegen (90)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt
index fa27ed0d5f607..a5e01db243952 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt
@@ -385,7 +385,7 @@ Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_
 (65) HashAggregate [codegen id : 25]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#49, isEmpty#50, count#51]
 Results [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 
@@ -396,9 +396,9 @@ Arguments: hashpartitioning(i_brand_id#44, i_class_id#45, i_category_id#46, 5),
 (67) HashAggregate [codegen id : 52]
 Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56, count(1)#57]
-Results [6]: [store AS channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56 AS sales#59, count(1)#57 AS number_sales#60]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56, count(1)#57]
+Results [6]: [store AS channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56 AS sales#59, count(1)#57 AS number_sales#60]
 
 (68) Filter [codegen id : 52]
 Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60]
@@ -454,7 +454,7 @@ Input [7]: [ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66, i_brand_id#69,
 (80) HashAggregate [codegen id : 50]
 Input [5]: [ss_quantity#64, ss_list_price#65, i_brand_id#69, i_class_id#70, i_category_id#71]
 Keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#73, isEmpty#74, count#75]
 Results [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78]
 
@@ -465,9 +465,9 @@ Arguments: hashpartitioning(i_brand_id#69, i_class_id#70, i_category_id#71, 5),
 (82) HashAggregate [codegen id : 51]
 Input [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78]
 Keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#80, count(1)#81]
-Results [6]: [store AS channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#80 AS sales#83, count(1)#81 AS number_sales#84]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#80, count(1)#81]
+Results [6]: [store AS channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#80 AS sales#83, count(1)#81 AS number_sales#84]
 
 (83) Filter [codegen id : 51]
 Input [6]: [channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84]
@@ -581,7 +581,7 @@ Input [4]: [ws_quantity#98, ws_list_price#99, ws_sold_date_sk#100, d_date_sk#101
 (103) HashAggregate [codegen id : 7]
 Input [2]: [quantity#90, list_price#91]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#104, count#105]
 Results [2]: [sum#106, count#107]
 
@@ -592,9 +592,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#108]
 (105) HashAggregate [codegen id : 8]
 Input [2]: [sum#106, count#107]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))#109]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2), true))#109 AS average_sales#110]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))#109]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))#109 AS average_sales#110]
 
 Subquery:2 Hosting operator id = 87 Hosting Expression = ss_sold_date_sk#88 IN dynamicpruning#12
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt
index 15fdf6b0eab16..8f722e735172f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt
@@ -4,7 +4,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
       Filter [sales]
         Subquery #4
           WholeStageCodegen (8)
-            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
+            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
               InputAdapter
                 Exchange #12
                   WholeStageCodegen (7)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #6
-        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
           InputAdapter
             Exchange [i_brand_id,i_class_id,i_category_id] #1
               WholeStageCodegen (25)
@@ -167,7 +167,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
           WholeStageCodegen (51)
             Filter [sales]
               ReusedSubquery [average_sales] #4
-              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
                 InputAdapter
                   Exchange [i_brand_id,i_class_id,i_category_id] #14
                     WholeStageCodegen (50)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt
index 6b057de932b33..e3ad267942560 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt
@@ -497,7 +497,7 @@ Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#48, i_brand_
 (78) HashAggregate [codegen id : 45]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#49, i_class_id#50, i_category_id#51]
 Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#54, isEmpty#55, count#56]
 Results [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
 
@@ -508,9 +508,9 @@ Arguments: hashpartitioning(i_brand_id#49, i_class_id#50, i_category_id#51, 5),
 (80) HashAggregate [codegen id : 46]
 Input [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
 Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62]
-Results [6]: [store AS channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#64, count(1)#62 AS number_sales#65]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#61, count(1)#62]
+Results [6]: [store AS channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#61 AS sales#64, count(1)#62 AS number_sales#65]
 
 (81) Filter [codegen id : 46]
 Input [6]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sales#64, number_sales#65]
@@ -578,7 +578,7 @@ Input [7]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70, i_item_sk#74, i_bra
 (96) HashAggregate [codegen id : 91]
 Input [5]: [cs_quantity#69, cs_list_price#70, i_brand_id#75, i_class_id#76, i_category_id#77]
 Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#78, isEmpty#79, count#80]
 Results [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
 
@@ -589,9 +589,9 @@ Arguments: hashpartitioning(i_brand_id#75, i_class_id#76, i_category_id#77, 5),
 (98) HashAggregate [codegen id : 92]
 Input [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
 Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#85, count(1)#86]
-Results [6]: [catalog AS channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#85 AS sales#88, count(1)#86 AS number_sales#89]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2)))#85, count(1)#86]
+Results [6]: [catalog AS channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2)))#85 AS sales#88, count(1)#86 AS number_sales#89]
 
 (99) Filter [codegen id : 92]
 Input [6]: [channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sales#88, number_sales#89]
@@ -659,7 +659,7 @@ Input [7]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92, i_item_sk#96, i_bra
 (114) HashAggregate [codegen id : 137]
 Input [5]: [ws_quantity#91, ws_list_price#92, i_brand_id#97, i_class_id#98, i_category_id#99]
 Keys [3]: [i_brand_id#97, i_class_id#98, i_category_id#99]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#100, isEmpty#101, count#102]
 Results [6]: [i_brand_id#97, i_class_id#98, i_category_id#99, sum#103, isEmpty#104, count#105]
 
@@ -670,9 +670,9 @@ Arguments: hashpartitioning(i_brand_id#97, i_class_id#98, i_category_id#99, 5),
 (116) HashAggregate [codegen id : 138]
 Input [6]: [i_brand_id#97, i_class_id#98, i_category_id#99, sum#103, isEmpty#104, count#105]
 Keys [3]: [i_brand_id#97, i_class_id#98, i_category_id#99]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true))#107, count(1)#108]
-Results [6]: [web AS channel#109, i_brand_id#97, i_class_id#98, i_category_id#99, sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2), true))#107 AS sales#110, count(1)#108 AS number_sales#111]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2)))#107, count(1)#108]
+Results [6]: [web AS channel#109, i_brand_id#97, i_class_id#98, i_category_id#99, sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2)))#107 AS sales#110, count(1)#108 AS number_sales#111]
 
 (117) Filter [codegen id : 138]
 Input [6]: [channel#109, i_brand_id#97, i_class_id#98, i_category_id#99, sales#110, number_sales#111]
@@ -929,7 +929,7 @@ Input [4]: [ws_quantity#191, ws_list_price#192, ws_sold_date_sk#193, d_date_sk#1
 (163) HashAggregate [codegen id : 7]
 Input [2]: [quantity#182, list_price#183]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#182 as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#182 as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#197, count#198]
 Results [2]: [sum#199, count#200]
 
@@ -940,9 +940,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#201]
 (165) HashAggregate [codegen id : 8]
 Input [2]: [sum#199, count#200]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#182 as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#182 as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2), true))#202]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#182 as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2), true))#202 AS average_sales#203]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#182 as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#182 as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2)))#202]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#182 as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2)))#202 AS average_sales#203]
 
 Subquery:2 Hosting operator id = 147 Hosting Expression = ss_sold_date_sk#180 IN dynamicpruning#13
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt
index c02368aac7e78..b5378a01bfa13 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt
@@ -19,7 +19,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                     Filter [sales]
                                       Subquery #3
                                         WholeStageCodegen (8)
-                                          HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
+                                          HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
                                             InputAdapter
                                               Exchange #19
                                                 WholeStageCodegen (7)
@@ -60,7 +60,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                     ReusedSubquery [d_date_sk] #4
                                                               InputAdapter
                                                                 ReusedExchange [d_date_sk] #20
-                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
                                           Exchange [i_brand_id,i_class_id,i_category_id] #3
                                             WholeStageCodegen (45)
@@ -219,7 +219,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                   WholeStageCodegen (92)
                                     Filter [sales]
                                       ReusedSubquery [average_sales] #3
-                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
                                           Exchange [i_brand_id,i_class_id,i_category_id] #21
                                             WholeStageCodegen (91)
@@ -252,7 +252,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                   WholeStageCodegen (138)
                                     Filter [sales]
                                       ReusedSubquery [average_sales] #3
-                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
                                           Exchange [i_brand_id,i_class_id,i_category_id] #23
                                             WholeStageCodegen (137)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt
index 01062fa7e351c..5d0a71ecbf8a2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt
@@ -426,7 +426,7 @@ Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_
 (65) HashAggregate [codegen id : 25]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#49, isEmpty#50, count#51]
 Results [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 
@@ -437,9 +437,9 @@ Arguments: hashpartitioning(i_brand_id#44, i_class_id#45, i_category_id#46, 5),
 (67) HashAggregate [codegen id : 26]
 Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56, count(1)#57]
-Results [6]: [store AS channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2), true))#56 AS sales#59, count(1)#57 AS number_sales#60]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56, count(1)#57]
+Results [6]: [store AS channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56 AS sales#59, count(1)#57 AS number_sales#60]
 
 (68) Filter [codegen id : 26]
 Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60]
@@ -495,7 +495,7 @@ Input [7]: [cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66, i_brand_id#68,
 (80) HashAggregate [codegen id : 51]
 Input [5]: [cs_quantity#64, cs_list_price#65, i_brand_id#68, i_class_id#69, i_category_id#70]
 Keys [3]: [i_brand_id#68, i_class_id#69, i_category_id#70]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#72, isEmpty#73, count#74]
 Results [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77]
 
@@ -506,9 +506,9 @@ Arguments: hashpartitioning(i_brand_id#68, i_class_id#69, i_category_id#70, 5),
 (82) HashAggregate [codegen id : 52]
 Input [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77]
 Keys [3]: [i_brand_id#68, i_class_id#69, i_category_id#70]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79, count(1)#80]
-Results [6]: [catalog AS channel#81, i_brand_id#68, i_class_id#69, i_category_id#70, sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79 AS sales#82, count(1)#80 AS number_sales#83]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#79, count(1)#80]
+Results [6]: [catalog AS channel#81, i_brand_id#68, i_class_id#69, i_category_id#70, sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#79 AS sales#82, count(1)#80 AS number_sales#83]
 
 (83) Filter [codegen id : 52]
 Input [6]: [channel#81, i_brand_id#68, i_class_id#69, i_category_id#70, sales#82, number_sales#83]
@@ -564,7 +564,7 @@ Input [7]: [ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87, i_brand_id#89,
 (95) HashAggregate [codegen id : 77]
 Input [5]: [ws_quantity#85, ws_list_price#86, i_brand_id#89, i_class_id#90, i_category_id#91]
 Keys [3]: [i_brand_id#89, i_class_id#90, i_category_id#91]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#93, isEmpty#94, count#95]
 Results [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98]
 
@@ -575,9 +575,9 @@ Arguments: hashpartitioning(i_brand_id#89, i_class_id#90, i_category_id#91, 5),
 (97) HashAggregate [codegen id : 78]
 Input [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98]
 Keys [3]: [i_brand_id#89, i_class_id#90, i_category_id#91]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true))#100, count(1)#101]
-Results [6]: [web AS channel#102, i_brand_id#89, i_class_id#90, i_category_id#91, sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2), true))#100 AS sales#103, count(1)#101 AS number_sales#104]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2)))#100, count(1)#101]
+Results [6]: [web AS channel#102, i_brand_id#89, i_class_id#90, i_category_id#91, sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2)))#100 AS sales#103, count(1)#101 AS number_sales#104]
 
 (98) Filter [codegen id : 78]
 Input [6]: [channel#102, i_brand_id#89, i_class_id#90, i_category_id#91, sales#103, number_sales#104]
@@ -834,7 +834,7 @@ Input [4]: [ws_quantity#184, ws_list_price#185, ws_sold_date_sk#186, d_date_sk#1
 (144) HashAggregate [codegen id : 7]
 Input [2]: [quantity#175, list_price#176]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2), true))]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#190, count#191]
 Results [2]: [sum#192, count#193]
 
@@ -845,9 +845,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#194]
 (146) HashAggregate [codegen id : 8]
 Input [2]: [sum#192, count#193]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2), true))#195]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2), true))#195 AS average_sales#196]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2)))#195]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2)))#195 AS average_sales#196]
 
 Subquery:2 Hosting operator id = 128 Hosting Expression = ss_sold_date_sk#173 IN dynamicpruning#12
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt
index 2d0d4267a2a69..f800a80a4e636 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt
@@ -19,7 +19,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                     Filter [sales]
                                       Subquery #3
                                         WholeStageCodegen (8)
-                                          HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
+                                          HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
                                             InputAdapter
                                               Exchange #14
                                                 WholeStageCodegen (7)
@@ -60,7 +60,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                     ReusedSubquery [d_date_sk] #4
                                                               InputAdapter
                                                                 ReusedExchange [d_date_sk] #15
-                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
                                           Exchange [i_brand_id,i_class_id,i_category_id] #3
                                             WholeStageCodegen (25)
@@ -180,7 +180,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                   WholeStageCodegen (52)
                                     Filter [sales]
                                       ReusedSubquery [average_sales] #3
-                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
                                           Exchange [i_brand_id,i_class_id,i_category_id] #16
                                             WholeStageCodegen (51)
@@ -204,7 +204,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                   WholeStageCodegen (78)
                                     Filter [sales]
                                       ReusedSubquery [average_sales] #3
-                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
                                           Exchange [i_brand_id,i_class_id,i_category_id] #17
                                             WholeStageCodegen (77)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/explain.txt
index 64a92b9e727bc..c925197336e95 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/explain.txt
@@ -121,7 +121,7 @@ Input [8]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_pri
 Arguments: [sum(_w1#20) windowspecdefinition(i_class#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#10]
 
 (22) Project [codegen id : 9]
-Output [7]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23]
+Output [7]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17)) AS revenueratio#23]
 Input [9]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, _we0#22]
 
 (23) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/explain.txt
index 5ea1cda2f68d5..ff461dafc09c0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/explain.txt
@@ -106,7 +106,7 @@ Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_pric
 Arguments: [sum(_w1#19) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#21], [i_class#9]
 
 (19) Project [codegen id : 6]
-Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17), true) AS revenueratio#22]
+Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17)) AS revenueratio#22]
 Input [9]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, _we0#21]
 
 (20) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt
index c08379b07b397..db2116117c81e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt
@@ -430,6 +430,6 @@ Input [2]: [sum#59, count#60]
 Keys: []
 Functions [1]: [avg(netpaid#40)]
 Aggregate Attributes [1]: [avg(netpaid#40)#62]
-Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#40)#62)), DecimalType(24,8), true) AS (0.05 * avg(netpaid))#63]
+Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#40)#62)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#63]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/explain.txt
index d27e5af04e2dc..ea90187cb53ad 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/explain.txt
@@ -422,6 +422,6 @@ Input [2]: [sum#57, count#58]
 Keys: []
 Functions [1]: [avg(netpaid#40)]
 Aggregate Attributes [1]: [avg(netpaid#40)#60]
-Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#40)#60)), DecimalType(24,8), true) AS (0.05 * avg(netpaid))#61]
+Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#40)#60)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#61]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt
index 0e20331e83484..9224fbda95e47 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt
@@ -143,7 +143,7 @@ Input [4]: [i_category#13, i_class#12, sum#17, sum#18]
 Keys [2]: [i_category#13, i_class#12]
 Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))]
 Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#20, sum(UnscaledValue(ss_ext_sales_price#3))#21]
-Results [6]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2))), DecimalType(37,20), true) as decimal(38,20)) AS gross_margin#22, i_category#13, i_class#12, 0 AS t_category#23, 0 AS t_class#24, 0 AS lochierarchy#25]
+Results [6]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2))), DecimalType(37,20)) as decimal(38,20)) AS gross_margin#22, i_category#13, i_class#12, 0 AS t_category#23, 0 AS t_class#24, 0 AS lochierarchy#25]
 
 (23) ReusedExchange [Reuses operator id: 21]
 Output [4]: [i_category#13, i_class#12, sum#26, sum#27]
@@ -171,7 +171,7 @@ Input [5]: [i_category#13, sum#36, isEmpty#37, sum#38, isEmpty#39]
 Keys [1]: [i_category#13]
 Functions [2]: [sum(ss_net_profit#30), sum(ss_ext_sales_price#31)]
 Aggregate Attributes [2]: [sum(ss_net_profit#30)#41, sum(ss_ext_sales_price#31)#42]
-Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#30)#41) / promote_precision(sum(ss_ext_sales_price#31)#42)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#43, i_category#13, null AS i_class#44, 0 AS t_category#45, 1 AS t_class#46, 1 AS lochierarchy#47]
+Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#30)#41) / promote_precision(sum(ss_ext_sales_price#31)#42)), DecimalType(38,11)) as decimal(38,20)) AS gross_margin#43, i_category#13, null AS i_class#44, 0 AS t_category#45, 1 AS t_class#46, 1 AS lochierarchy#47]
 
 (28) ReusedExchange [Reuses operator id: 21]
 Output [4]: [i_category#13, i_class#12, sum#48, sum#49]
@@ -199,7 +199,7 @@ Input [4]: [sum#54, isEmpty#55, sum#56, isEmpty#57]
 Keys: []
 Functions [2]: [sum(ss_net_profit#30), sum(ss_ext_sales_price#31)]
 Aggregate Attributes [2]: [sum(ss_net_profit#30)#59, sum(ss_ext_sales_price#31)#60]
-Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#30)#59) / promote_precision(sum(ss_ext_sales_price#31)#60)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#61, null AS i_category#62, null AS i_class#63, 1 AS t_category#64, 1 AS t_class#65, 2 AS lochierarchy#66]
+Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#30)#59) / promote_precision(sum(ss_ext_sales_price#31)#60)), DecimalType(38,11)) as decimal(38,20)) AS gross_margin#61, null AS i_category#62, null AS i_class#63, 1 AS t_category#64, 1 AS t_class#65, 2 AS lochierarchy#66]
 
 (33) Union
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt
index 5470bf61ac502..f036e3e8fef42 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt
@@ -143,7 +143,7 @@ Input [4]: [i_category#10, i_class#9, sum#17, sum#18]
 Keys [2]: [i_category#10, i_class#9]
 Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))]
 Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#20, sum(UnscaledValue(ss_ext_sales_price#3))#21]
-Results [6]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2))), DecimalType(37,20), true) as decimal(38,20)) AS gross_margin#22, i_category#10, i_class#9, 0 AS t_category#23, 0 AS t_class#24, 0 AS lochierarchy#25]
+Results [6]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2))), DecimalType(37,20)) as decimal(38,20)) AS gross_margin#22, i_category#10, i_class#9, 0 AS t_category#23, 0 AS t_class#24, 0 AS lochierarchy#25]
 
 (23) ReusedExchange [Reuses operator id: 21]
 Output [4]: [i_category#10, i_class#9, sum#26, sum#27]
@@ -171,7 +171,7 @@ Input [5]: [i_category#10, sum#36, isEmpty#37, sum#38, isEmpty#39]
 Keys [1]: [i_category#10]
 Functions [2]: [sum(ss_net_profit#30), sum(ss_ext_sales_price#31)]
 Aggregate Attributes [2]: [sum(ss_net_profit#30)#41, sum(ss_ext_sales_price#31)#42]
-Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#30)#41) / promote_precision(sum(ss_ext_sales_price#31)#42)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#43, i_category#10, null AS i_class#44, 0 AS t_category#45, 1 AS t_class#46, 1 AS lochierarchy#47]
+Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#30)#41) / promote_precision(sum(ss_ext_sales_price#31)#42)), DecimalType(38,11)) as decimal(38,20)) AS gross_margin#43, i_category#10, null AS i_class#44, 0 AS t_category#45, 1 AS t_class#46, 1 AS lochierarchy#47]
 
 (28) ReusedExchange [Reuses operator id: 21]
 Output [4]: [i_category#10, i_class#9, sum#48, sum#49]
@@ -199,7 +199,7 @@ Input [4]: [sum#54, isEmpty#55, sum#56, isEmpty#57]
 Keys: []
 Functions [2]: [sum(ss_net_profit#30), sum(ss_ext_sales_price#31)]
 Aggregate Attributes [2]: [sum(ss_net_profit#30)#59, sum(ss_ext_sales_price#31)#60]
-Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#30)#59) / promote_precision(sum(ss_ext_sales_price#31)#60)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#61, null AS i_category#62, null AS i_class#63, 1 AS t_category#64, 1 AS t_class#65, 2 AS lochierarchy#66]
+Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#30)#59) / promote_precision(sum(ss_ext_sales_price#31)#60)), DecimalType(38,11)) as decimal(38,20)) AS gross_margin#61, null AS i_category#62, null AS i_class#63, 1 AS t_category#64, 1 AS t_class#65, 2 AS lochierarchy#66]
 
 (33) Union
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt
index 4566f30b27d04..d2a5ecef9c900 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt
@@ -186,7 +186,7 @@ Arguments: [avg(_w0#23) windowspecdefinition(i_category#16, i_brand#15, s_store_
 
 (30) Filter [codegen id : 11]
 Input [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, _w0#23, rn#25, avg_monthly_sales#26]
-Condition : ((isnotnull(avg_monthly_sales#26) AND (avg_monthly_sales#26 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000))
+Condition : ((isnotnull(avg_monthly_sales#26) AND (avg_monthly_sales#26 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))
 
 (31) Project [codegen id : 11]
 Output [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25]
@@ -277,7 +277,7 @@ Input [16]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_ye
 
 (52) TakeOrderedAndProject
 Input [7]: [i_category#16, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, psum#49, nsum#50]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], [i_category#16, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, psum#49, nsum#50]
+Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], [i_category#16, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, psum#49, nsum#50]
 
 ===== Subqueries =====
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt
index 21944f91237a0..8abc8fda35cef 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt
@@ -167,7 +167,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_category#3, i_brand#2, s_store_na
 
 (27) Filter [codegen id : 22]
 Input [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, _w0#22, rn#24, avg_monthly_sales#25]
-Condition : ((isnotnull(avg_monthly_sales#25) AND (avg_monthly_sales#25 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000))
+Condition : ((isnotnull(avg_monthly_sales#25) AND (avg_monthly_sales#25 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))
 
 (28) Project [codegen id : 22]
 Output [9]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, avg_monthly_sales#25, rn#24]
@@ -242,7 +242,7 @@ Input [16]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year
 
 (45) TakeOrderedAndProject
 Input [7]: [i_category#3, d_year#11, d_moy#12, avg_monthly_sales#25, sum_sales#21, psum#47, nsum#48]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST], [i_category#3, d_year#11, d_moy#12, avg_monthly_sales#25, sum_sales#21, psum#47, nsum#48]
+Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST], [i_category#3, d_year#11, d_moy#12, avg_monthly_sales#25, sum_sales#21, psum#47, nsum#48]
 
 ===== Subqueries =====
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/explain.txt
index b1b28f1a20048..5efc0bfaed99e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/explain.txt
@@ -177,7 +177,7 @@ Input [7]: [ws_item_sk#1, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27
 Keys [1]: [ws_item_sk#1]
 Functions [4]: [sum(coalesce(wr_return_quantity#12, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#13 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#12, 0))#29, sum(coalesce(ws_quantity#3, 0))#30, sum(coalesce(cast(wr_return_amt#13 as decimal(12,2)), 0.00))#31, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#32]
-Results [3]: [ws_item_sk#1 AS item#33, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#12, 0))#29 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#30 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#34, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#13 as decimal(12,2)), 0.00))#31 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#32 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#35]
+Results [3]: [ws_item_sk#1 AS item#33, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#12, 0))#29 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#30 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#34, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#13 as decimal(12,2)), 0.00))#31 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#32 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#35]
 
 (21) Exchange
 Input [3]: [item#33, return_ratio#34, currency_ratio#35]
@@ -297,7 +297,7 @@ Input [7]: [cs_item_sk#40, sum#60, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#6
 Keys [1]: [cs_item_sk#40]
 Functions [4]: [sum(coalesce(cr_return_quantity#50, 0)), sum(coalesce(cs_quantity#42, 0)), sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#43 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#50, 0))#67, sum(coalesce(cs_quantity#42, 0))#68, sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00))#69, sum(coalesce(cast(cs_net_paid#43 as decimal(12,2)), 0.00))#70]
-Results [3]: [cs_item_sk#40 AS item#71, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#50, 0))#67 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#42, 0))#68 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#72, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00))#69 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#43 as decimal(12,2)), 0.00))#70 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#73]
+Results [3]: [cs_item_sk#40 AS item#71, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#50, 0))#67 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#42, 0))#68 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#72, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00))#69 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#43 as decimal(12,2)), 0.00))#70 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#73]
 
 (48) Exchange
 Input [3]: [item#71, return_ratio#72, currency_ratio#73]
@@ -417,7 +417,7 @@ Input [7]: [ss_item_sk#78, sum#98, sum#99, sum#100, isEmpty#101, sum#102, isEmpt
 Keys [1]: [ss_item_sk#78]
 Functions [4]: [sum(coalesce(sr_return_quantity#88, 0)), sum(coalesce(ss_quantity#80, 0)), sum(coalesce(cast(sr_return_amt#89 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#88, 0))#105, sum(coalesce(ss_quantity#80, 0))#106, sum(coalesce(cast(sr_return_amt#89 as decimal(12,2)), 0.00))#107, sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))#108]
-Results [3]: [ss_item_sk#78 AS item#109, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#88, 0))#105 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#80, 0))#106 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#110, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#89 as decimal(12,2)), 0.00))#107 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))#108 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#111]
+Results [3]: [ss_item_sk#78 AS item#109, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#88, 0))#105 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#80, 0))#106 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#110, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#89 as decimal(12,2)), 0.00))#107 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))#108 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#111]
 
 (75) Exchange
 Input [3]: [item#109, return_ratio#110, currency_ratio#111]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/explain.txt
index 1e11686ade7cc..657a1a1f358c6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/explain.txt
@@ -156,7 +156,7 @@ Input [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26
 Keys [1]: [ws_item_sk#1]
 Functions [4]: [sum(coalesce(wr_return_quantity#11, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#11, 0))#28, sum(coalesce(ws_quantity#3, 0))#29, sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#30, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#31]
-Results [3]: [ws_item_sk#1 AS item#32, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#11, 0))#28 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#29 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#33, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#30 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#31 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#34]
+Results [3]: [ws_item_sk#1 AS item#32, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#11, 0))#28 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#29 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#33, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#30 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#31 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#34]
 
 (18) Exchange
 Input [3]: [item#32, return_ratio#33, currency_ratio#34]
@@ -264,7 +264,7 @@ Input [7]: [cs_item_sk#39, sum#58, sum#59, sum#60, isEmpty#61, sum#62, isEmpty#6
 Keys [1]: [cs_item_sk#39]
 Functions [4]: [sum(coalesce(cr_return_quantity#48, 0)), sum(coalesce(cs_quantity#41, 0)), sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#42 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#48, 0))#65, sum(coalesce(cs_quantity#41, 0))#66, sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00))#67, sum(coalesce(cast(cs_net_paid#42 as decimal(12,2)), 0.00))#68]
-Results [3]: [cs_item_sk#39 AS item#69, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#48, 0))#65 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#41, 0))#66 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#70, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00))#67 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#42 as decimal(12,2)), 0.00))#68 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#71]
+Results [3]: [cs_item_sk#39 AS item#69, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#48, 0))#65 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#41, 0))#66 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#70, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00))#67 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#42 as decimal(12,2)), 0.00))#68 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#71]
 
 (42) Exchange
 Input [3]: [item#69, return_ratio#70, currency_ratio#71]
@@ -372,7 +372,7 @@ Input [7]: [ss_item_sk#76, sum#95, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#1
 Keys [1]: [ss_item_sk#76]
 Functions [4]: [sum(coalesce(sr_return_quantity#85, 0)), sum(coalesce(ss_quantity#78, 0)), sum(coalesce(cast(sr_return_amt#86 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#85, 0))#102, sum(coalesce(ss_quantity#78, 0))#103, sum(coalesce(cast(sr_return_amt#86 as decimal(12,2)), 0.00))#104, sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))#105]
-Results [3]: [ss_item_sk#76 AS item#106, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#85, 0))#102 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#78, 0))#103 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#107, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#86 as decimal(12,2)), 0.00))#104 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))#105 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#108]
+Results [3]: [ss_item_sk#76 AS item#106, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#85, 0))#102 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#78, 0))#103 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#107, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#86 as decimal(12,2)), 0.00))#104 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))#105 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#108]
 
 (66) Exchange
 Input [3]: [item#106, return_ratio#107, currency_ratio#108]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt
index d214b321a4791..d46c1d8c7e336 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt
@@ -186,7 +186,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_category#15, i_brand#14, cc_name#
 
 (30) Filter [codegen id : 11]
 Input [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, _w0#22, rn#24, avg_monthly_sales#25]
-Condition : ((isnotnull(avg_monthly_sales#25) AND (avg_monthly_sales#25 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000))
+Condition : ((isnotnull(avg_monthly_sales#25) AND (avg_monthly_sales#25 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))
 
 (31) Project [codegen id : 11]
 Output [8]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24]
@@ -277,7 +277,7 @@ Input [14]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales
 
 (52) TakeOrderedAndProject
 Input [8]: [i_category#15, i_brand#14, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, psum#46, nsum#47]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, d_year#7 ASC NULLS FIRST], [i_category#15, i_brand#14, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, psum#46, nsum#47]
+Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, d_year#7 ASC NULLS FIRST], [i_category#15, i_brand#14, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, psum#46, nsum#47]
 
 ===== Subqueries =====
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt
index 65a811671c32d..675acedcd9cad 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt
@@ -167,7 +167,7 @@ Arguments: [avg(_w0#21) windowspecdefinition(i_category#3, i_brand#2, cc_name#14
 
 (27) Filter [codegen id : 22]
 Input [9]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, _w0#21, rn#23, avg_monthly_sales#24]
-Condition : ((isnotnull(avg_monthly_sales#24) AND (avg_monthly_sales#24 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true), false)) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16), true) > 0.1000000000000000))
+Condition : ((isnotnull(avg_monthly_sales#24) AND (avg_monthly_sales#24 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))
 
 (28) Project [codegen id : 22]
 Output [8]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, avg_monthly_sales#24, rn#23]
@@ -242,7 +242,7 @@ Input [14]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales
 
 (45) TakeOrderedAndProject
 Input [8]: [i_category#3, i_brand#2, d_year#11, d_moy#12, avg_monthly_sales#24, sum_sales#20, psum#44, nsum#45]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, d_year#11 ASC NULLS FIRST], [i_category#3, i_brand#2, d_year#11, d_moy#12, avg_monthly_sales#24, sum_sales#20, psum#44, nsum#45]
+Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, d_year#11 ASC NULLS FIRST], [i_category#3, i_brand#2, d_year#11, d_moy#12, avg_monthly_sales#24, sum_sales#20, psum#44, nsum#45]
 
 ===== Subqueries =====
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt
index b6a5a36a10c6c..88d3ec5d20f2b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt
@@ -186,7 +186,7 @@ Input [5]: [s_store_id#23, sum#30, sum#31, sum#32, sum#33]
 Keys [1]: [s_store_id#23]
 Functions [4]: [sum(UnscaledValue(sales_price#8)), sum(UnscaledValue(return_amt#10)), sum(UnscaledValue(profit#9)), sum(UnscaledValue(net_loss#11))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#8))#35, sum(UnscaledValue(return_amt#10))#36, sum(UnscaledValue(profit#9))#37, sum(UnscaledValue(net_loss#11))#38]
-Results [5]: [store channel AS channel#39, concat(store, s_store_id#23) AS id#40, MakeDecimal(sum(UnscaledValue(sales_price#8))#35,17,2) AS sales#41, MakeDecimal(sum(UnscaledValue(return_amt#10))#36,17,2) AS returns#42, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#37,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#38,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#43]
+Results [5]: [store channel AS channel#39, concat(store, s_store_id#23) AS id#40, MakeDecimal(sum(UnscaledValue(sales_price#8))#35,17,2) AS sales#41, MakeDecimal(sum(UnscaledValue(return_amt#10))#36,17,2) AS returns#42, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#37,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#38,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#43]
 
 (22) Scan parquet default.catalog_sales
 Output [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47]
@@ -283,7 +283,7 @@ Input [5]: [cp_catalog_page_id#65, sum#72, sum#73, sum#74, sum#75]
 Keys [1]: [cp_catalog_page_id#65]
 Functions [4]: [sum(UnscaledValue(sales_price#50)), sum(UnscaledValue(return_amt#52)), sum(UnscaledValue(profit#51)), sum(UnscaledValue(net_loss#53))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#50))#77, sum(UnscaledValue(return_amt#52))#78, sum(UnscaledValue(profit#51))#79, sum(UnscaledValue(net_loss#53))#80]
-Results [5]: [catalog channel AS channel#81, concat(catalog_page, cp_catalog_page_id#65) AS id#82, MakeDecimal(sum(UnscaledValue(sales_price#50))#77,17,2) AS sales#83, MakeDecimal(sum(UnscaledValue(return_amt#52))#78,17,2) AS returns#84, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#51))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#53))#80,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#85]
+Results [5]: [catalog channel AS channel#81, concat(catalog_page, cp_catalog_page_id#65) AS id#82, MakeDecimal(sum(UnscaledValue(sales_price#50))#77,17,2) AS sales#83, MakeDecimal(sum(UnscaledValue(return_amt#52))#78,17,2) AS returns#84, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#51))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#53))#80,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#85]
 
 (43) Scan parquet default.web_sales
 Output [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89]
@@ -414,7 +414,7 @@ Input [5]: [web_site_id#114, sum#121, sum#122, sum#123, sum#124]
 Keys [1]: [web_site_id#114]
 Functions [4]: [sum(UnscaledValue(sales_price#92)), sum(UnscaledValue(return_amt#94)), sum(UnscaledValue(profit#93)), sum(UnscaledValue(net_loss#95))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#92))#126, sum(UnscaledValue(return_amt#94))#127, sum(UnscaledValue(profit#93))#128, sum(UnscaledValue(net_loss#95))#129]
-Results [5]: [web channel AS channel#130, concat(web_site, web_site_id#114) AS id#131, MakeDecimal(sum(UnscaledValue(sales_price#92))#126,17,2) AS sales#132, MakeDecimal(sum(UnscaledValue(return_amt#94))#127,17,2) AS returns#133, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#128,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#129,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#134]
+Results [5]: [web channel AS channel#130, concat(web_site, web_site_id#114) AS id#131, MakeDecimal(sum(UnscaledValue(sales_price#92))#126,17,2) AS sales#132, MakeDecimal(sum(UnscaledValue(return_amt#94))#127,17,2) AS returns#133, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#128,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#129,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#134]
 
 (72) Union
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt
index 05636f5f44067..cadbb12000ba3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt
@@ -183,7 +183,7 @@ Input [5]: [s_store_id#24, sum#30, sum#31, sum#32, sum#33]
 Keys [1]: [s_store_id#24]
 Functions [4]: [sum(UnscaledValue(sales_price#8)), sum(UnscaledValue(return_amt#10)), sum(UnscaledValue(profit#9)), sum(UnscaledValue(net_loss#11))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#8))#35, sum(UnscaledValue(return_amt#10))#36, sum(UnscaledValue(profit#9))#37, sum(UnscaledValue(net_loss#11))#38]
-Results [5]: [store channel AS channel#39, concat(store, s_store_id#24) AS id#40, MakeDecimal(sum(UnscaledValue(sales_price#8))#35,17,2) AS sales#41, MakeDecimal(sum(UnscaledValue(return_amt#10))#36,17,2) AS returns#42, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#37,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#38,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#43]
+Results [5]: [store channel AS channel#39, concat(store, s_store_id#24) AS id#40, MakeDecimal(sum(UnscaledValue(sales_price#8))#35,17,2) AS sales#41, MakeDecimal(sum(UnscaledValue(return_amt#10))#36,17,2) AS returns#42, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#37,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#38,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#43]
 
 (22) Scan parquet default.catalog_sales
 Output [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47]
@@ -280,7 +280,7 @@ Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]
 Keys [1]: [cp_catalog_page_id#66]
 Functions [4]: [sum(UnscaledValue(sales_price#50)), sum(UnscaledValue(return_amt#52)), sum(UnscaledValue(profit#51)), sum(UnscaledValue(net_loss#53))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#50))#77, sum(UnscaledValue(return_amt#52))#78, sum(UnscaledValue(profit#51))#79, sum(UnscaledValue(net_loss#53))#80]
-Results [5]: [catalog channel AS channel#81, concat(catalog_page, cp_catalog_page_id#66) AS id#82, MakeDecimal(sum(UnscaledValue(sales_price#50))#77,17,2) AS sales#83, MakeDecimal(sum(UnscaledValue(return_amt#52))#78,17,2) AS returns#84, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#51))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#53))#80,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#85]
+Results [5]: [catalog channel AS channel#81, concat(catalog_page, cp_catalog_page_id#66) AS id#82, MakeDecimal(sum(UnscaledValue(sales_price#50))#77,17,2) AS sales#83, MakeDecimal(sum(UnscaledValue(return_amt#52))#78,17,2) AS returns#84, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#51))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#53))#80,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#85]
 
 (43) Scan parquet default.web_sales
 Output [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89]
@@ -399,7 +399,7 @@ Input [5]: [web_site_id#114, sum#120, sum#121, sum#122, sum#123]
 Keys [1]: [web_site_id#114]
 Functions [4]: [sum(UnscaledValue(sales_price#92)), sum(UnscaledValue(return_amt#94)), sum(UnscaledValue(profit#93)), sum(UnscaledValue(net_loss#95))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#92))#125, sum(UnscaledValue(return_amt#94))#126, sum(UnscaledValue(profit#93))#127, sum(UnscaledValue(net_loss#95))#128]
-Results [5]: [web channel AS channel#129, concat(web_site, web_site_id#114) AS id#130, MakeDecimal(sum(UnscaledValue(sales_price#92))#125,17,2) AS sales#131, MakeDecimal(sum(UnscaledValue(return_amt#94))#126,17,2) AS returns#132, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#127,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#128,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#133]
+Results [5]: [web channel AS channel#129, concat(web_site, web_site_id#114) AS id#130, MakeDecimal(sum(UnscaledValue(sales_price#92))#125,17,2) AS sales#131, MakeDecimal(sum(UnscaledValue(return_amt#94))#126,17,2) AS returns#132, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#127,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#128,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#133]
 
 (69) Union
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt
index c7ccb242056f7..1992b08c26b23 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt
@@ -118,7 +118,7 @@ Join condition: None
 
 (15) Filter [codegen id : 3]
 Input [5]: [i_item_sk#5, i_current_price#6, i_category#7, avg(i_current_price)#16, i_category#9]
-Condition : (cast(i_current_price#6 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#16)), DecimalType(14,7), true))
+Condition : (cast(i_current_price#6 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#16)), DecimalType(14,7)))
 
 (16) Project [codegen id : 3]
 Output [1]: [i_item_sk#5]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/explain.txt
index 0e1ea31859b3d..918c6c375a9ea 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/explain.txt
@@ -178,7 +178,7 @@ Join condition: None
 
 (30) Filter [codegen id : 6]
 Input [5]: [i_item_sk#12, i_current_price#13, i_category#14, avg(i_current_price)#23, i_category#16]
-Condition : (cast(i_current_price#13 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#23)), DecimalType(14,7), true))
+Condition : (cast(i_current_price#13 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#23)), DecimalType(14,7)))
 
 (31) Project [codegen id : 6]
 Output [1]: [i_item_sk#12]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/explain.txt
index 19240a79cc91c..868f1f26459aa 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/explain.txt
@@ -332,7 +332,7 @@ Input [8]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cr_item_sk#
 (28) HashAggregate [codegen id : 9]
 Input [5]: [cs_item_sk#19, cs_ext_list_price#21, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28]
 Keys [1]: [cs_item_sk#19]
-Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#21)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))]
+Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#21)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2)))]
 Aggregate Attributes [3]: [sum#31, sum#32, isEmpty#33]
 Results [4]: [cs_item_sk#19, sum#34, sum#35, isEmpty#36]
 
@@ -343,13 +343,13 @@ Arguments: hashpartitioning(cs_item_sk#19, 5), ENSURE_REQUIREMENTS, [id=#37]
 (30) HashAggregate [codegen id : 10]
 Input [4]: [cs_item_sk#19, sum#34, sum#35, isEmpty#36]
 Keys [1]: [cs_item_sk#19]
-Functions [2]: [sum(UnscaledValue(cs_ext_list_price#21)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))]
-Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#21))#38, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))#39]
-Results [3]: [cs_item_sk#19, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#21))#38,17,2) AS sale#40, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))#39 AS refund#41]
+Functions [2]: [sum(UnscaledValue(cs_ext_list_price#21)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2)))]
+Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#21))#38, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2)))#39]
+Results [3]: [cs_item_sk#19, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#21))#38,17,2) AS sale#40, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2)))#39 AS refund#41]
 
 (31) Filter [codegen id : 10]
 Input [3]: [cs_item_sk#19, sale#40, refund#41]
-Condition : (isnotnull(sale#40) AND (cast(sale#40 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(refund#41)), DecimalType(21,2), true)))
+Condition : (isnotnull(sale#40) AND (cast(sale#40 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(refund#41)), DecimalType(21,2))))
 
 (32) Project [codegen id : 10]
 Output [1]: [cs_item_sk#19]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/simplified.txt
index b5ebf7af31bed..00becee05ec8c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/simplified.txt
@@ -113,7 +113,7 @@ WholeStageCodegen (88)
                                                                                                                                                                     WholeStageCodegen (10)
                                                                                                                                                                       Project [cs_item_sk]
                                                                                                                                                                         Filter [sale,refund]
-                                                                                                                                                                          HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sale,refund,sum,sum,isEmpty]
+                                                                                                                                                                          HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sale,refund,sum,sum,isEmpty]
                                                                                                                                                                             InputAdapter
                                                                                                                                                                               Exchange [cs_item_sk] #13
                                                                                                                                                                                 WholeStageCodegen (9)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt
index ddaa34ab4e657..426b408190662 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt
@@ -302,7 +302,7 @@ Input [8]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cr_item_sk#
 (27) HashAggregate [codegen id : 8]
 Input [5]: [cs_item_sk#19, cs_ext_list_price#21, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28]
 Keys [1]: [cs_item_sk#19]
-Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#21)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))]
+Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#21)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2)))]
 Aggregate Attributes [3]: [sum#31, sum#32, isEmpty#33]
 Results [4]: [cs_item_sk#19, sum#34, sum#35, isEmpty#36]
 
@@ -313,13 +313,13 @@ Arguments: hashpartitioning(cs_item_sk#19, 5), ENSURE_REQUIREMENTS, [id=#37]
 (29) HashAggregate [codegen id : 9]
 Input [4]: [cs_item_sk#19, sum#34, sum#35, isEmpty#36]
 Keys [1]: [cs_item_sk#19]
-Functions [2]: [sum(UnscaledValue(cs_ext_list_price#21)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))]
-Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#21))#38, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))#39]
-Results [3]: [cs_item_sk#19, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#21))#38,17,2) AS sale#40, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))#39 AS refund#41]
+Functions [2]: [sum(UnscaledValue(cs_ext_list_price#21)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2)))]
+Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#21))#38, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2)))#39]
+Results [3]: [cs_item_sk#19, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#21))#38,17,2) AS sale#40, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2)))#39 AS refund#41]
 
 (30) Filter [codegen id : 9]
 Input [3]: [cs_item_sk#19, sale#40, refund#41]
-Condition : (isnotnull(sale#40) AND (cast(sale#40 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(refund#41)), DecimalType(21,2), true)))
+Condition : (isnotnull(sale#40) AND (cast(sale#40 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(refund#41)), DecimalType(21,2))))
 
 (31) Project [codegen id : 9]
 Output [1]: [cs_item_sk#19]
@@ -739,13 +739,13 @@ Output [4]: [cs_item_sk#139, sum#140, sum#141, isEmpty#142]
 (125) HashAggregate [codegen id : 35]
 Input [4]: [cs_item_sk#139, sum#140, sum#141, isEmpty#142]
 Keys [1]: [cs_item_sk#139]
-Functions [2]: [sum(UnscaledValue(cs_ext_list_price#143)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#144 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#145 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#146 as decimal(9,2)))), DecimalType(9,2), true))]
-Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#143))#38, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#144 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#145 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#146 as decimal(9,2)))), DecimalType(9,2), true))#39]
-Results [3]: [cs_item_sk#139, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#143))#38,17,2) AS sale#40, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#144 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#145 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#146 as decimal(9,2)))), DecimalType(9,2), true))#39 AS refund#41]
+Functions [2]: [sum(UnscaledValue(cs_ext_list_price#143)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#144 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#145 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#146 as decimal(9,2)))), DecimalType(9,2)))]
+Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#143))#38, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#144 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#145 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#146 as decimal(9,2)))), DecimalType(9,2)))#39]
+Results [3]: [cs_item_sk#139, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#143))#38,17,2) AS sale#40, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#144 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#145 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#146 as decimal(9,2)))), DecimalType(9,2)))#39 AS refund#41]
 
 (126) Filter [codegen id : 35]
 Input [3]: [cs_item_sk#139, sale#40, refund#41]
-Condition : (isnotnull(sale#40) AND (cast(sale#40 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(refund#41)), DecimalType(21,2), true)))
+Condition : (isnotnull(sale#40) AND (cast(sale#40 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(refund#41)), DecimalType(21,2))))
 
 (127) Project [codegen id : 35]
 Output [1]: [cs_item_sk#139]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/simplified.txt
index 6917f8f6c6e2d..859101af5baf2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/simplified.txt
@@ -77,7 +77,7 @@ WholeStageCodegen (54)
                                                                                                   Sort [cs_item_sk]
                                                                                                     Project [cs_item_sk]
                                                                                                       Filter [sale,refund]
-                                                                                                        HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sale,refund,sum,sum,isEmpty]
+                                                                                                        HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sale,refund,sum,sum,isEmpty]
                                                                                                           InputAdapter
                                                                                                             Exchange [cs_item_sk] #6
                                                                                                               WholeStageCodegen (8)
@@ -254,7 +254,7 @@ WholeStageCodegen (54)
                                                                                                   Sort [cs_item_sk]
                                                                                                     Project [cs_item_sk]
                                                                                                       Filter [sale,refund]
-                                                                                                        HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sale,refund,sum,sum,isEmpty]
+                                                                                                        HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sale,refund,sum,sum,isEmpty]
                                                                                                           InputAdapter
                                                                                                             ReusedExchange [cs_item_sk,sum,sum,isEmpty] #6
                                                                                           InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/explain.txt
index 7e9f2f0c6777b..00d9676dc2ec9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/explain.txt
@@ -167,7 +167,7 @@ Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d
 (22) HashAggregate [codegen id : 7]
 Input [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#16, i_class#17, i_category#18, i_product_name#19]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
 Aggregate Attributes [2]: [sum#21, isEmpty#22]
 Results [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#23, isEmpty#24]
 
@@ -178,9 +178,9 @@ Arguments: hashpartitioning(i_category#18, i_class#17, i_brand#16, i_product_nam
 (24) HashAggregate [codegen id : 8]
 Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#23, isEmpty#24]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
-Results [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, cast(sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 as decimal(38,2)) AS sumsales#27]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26]
+Results [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, cast(sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26 as decimal(38,2)) AS sumsales#27]
 
 (25) ReusedExchange [Reuses operator id: 23]
 Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#28, isEmpty#29]
@@ -188,9 +188,9 @@ Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8
 (26) HashAggregate [codegen id : 16]
 Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#28, isEmpty#29]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
-Results [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26]
+Results [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26 AS sumsales#30]
 
 (27) HashAggregate [codegen id : 16]
 Input [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, sumsales#30]
@@ -216,9 +216,9 @@ Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8
 (31) HashAggregate [codegen id : 25]
 Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#39, isEmpty#40]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
-Results [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26]
+Results [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26 AS sumsales#30]
 
 (32) HashAggregate [codegen id : 25]
 Input [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, sumsales#30]
@@ -244,9 +244,9 @@ Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8
 (36) HashAggregate [codegen id : 34]
 Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#50, isEmpty#51]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
-Results [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26]
+Results [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26 AS sumsales#30]
 
 (37) HashAggregate [codegen id : 34]
 Input [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, sumsales#30]
@@ -272,9 +272,9 @@ Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8
 (41) HashAggregate [codegen id : 43]
 Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#62, isEmpty#63]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
-Results [5]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26]
+Results [5]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26 AS sumsales#30]
 
 (42) HashAggregate [codegen id : 43]
 Input [5]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, sumsales#30]
@@ -300,9 +300,9 @@ Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8
 (46) HashAggregate [codegen id : 52]
 Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#75, isEmpty#76]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
-Results [4]: [i_category#18, i_class#17, i_brand#16, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26]
+Results [4]: [i_category#18, i_class#17, i_brand#16, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26 AS sumsales#30]
 
 (47) HashAggregate [codegen id : 52]
 Input [4]: [i_category#18, i_class#17, i_brand#16, sumsales#30]
@@ -328,9 +328,9 @@ Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8
 (51) HashAggregate [codegen id : 61]
 Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#89, isEmpty#90]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
-Results [3]: [i_category#18, i_class#17, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26]
+Results [3]: [i_category#18, i_class#17, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26 AS sumsales#30]
 
 (52) HashAggregate [codegen id : 61]
 Input [3]: [i_category#18, i_class#17, sumsales#30]
@@ -356,9 +356,9 @@ Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8
 (56) HashAggregate [codegen id : 70]
 Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#104, isEmpty#105]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
-Results [2]: [i_category#18, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26]
+Results [2]: [i_category#18, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26 AS sumsales#30]
 
 (57) HashAggregate [codegen id : 70]
 Input [2]: [i_category#18, sumsales#30]
@@ -384,9 +384,9 @@ Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8
 (61) HashAggregate [codegen id : 79]
 Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#120, isEmpty#121]
 Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26]
-Results [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#26 AS sumsales#30]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26]
+Results [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26 AS sumsales#30]
 
 (62) HashAggregate [codegen id : 79]
 Input [1]: [sumsales#30]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/simplified.txt
index 2e4627c7d48aa..8b39e27c4ca40 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/simplified.txt
@@ -9,7 +9,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                 Exchange [i_category] #1
                   Union
                     WholeStageCodegen (8)
-                      HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                      HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                         InputAdapter
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] #2
                             WholeStageCodegen (7)
@@ -63,7 +63,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy] #7
                             WholeStageCodegen (16)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (26)
@@ -72,7 +72,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy] #8
                             WholeStageCodegen (25)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (35)
@@ -81,7 +81,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year] #9
                             WholeStageCodegen (34)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (44)
@@ -90,7 +90,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name] #10
                             WholeStageCodegen (43)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (53)
@@ -99,7 +99,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand] #11
                             WholeStageCodegen (52)
                               HashAggregate [i_category,i_class,i_brand,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (62)
@@ -108,7 +108,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class] #12
                             WholeStageCodegen (61)
                               HashAggregate [i_category,i_class,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (71)
@@ -117,7 +117,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category] #13
                             WholeStageCodegen (70)
                               HashAggregate [i_category,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (80)
@@ -126,6 +126,6 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange #14
                             WholeStageCodegen (79)
                               HashAggregate [sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/explain.txt
index 4c344cef9d4a8..d0208d6e24e2f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/explain.txt
@@ -152,7 +152,7 @@ Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d
 (19) HashAggregate [codegen id : 4]
 Input [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#15, i_class#16, i_category#17, i_product_name#18]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
+Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
 Aggregate Attributes [2]: [sum#20, isEmpty#21]
 Results [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#22, isEmpty#23]
 
@@ -163,9 +163,9 @@ Arguments: hashpartitioning(i_category#17, i_class#16, i_brand#15, i_product_nam
 (21) HashAggregate [codegen id : 5]
 Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#22, isEmpty#23]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
-Results [9]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, cast(sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 as decimal(38,2)) AS sumsales#26]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25]
+Results [9]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, cast(sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25 as decimal(38,2)) AS sumsales#26]
 
 (22) ReusedExchange [Reuses operator id: 20]
 Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#27, isEmpty#28]
@@ -173,9 +173,9 @@ Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8
 (23) HashAggregate [codegen id : 10]
 Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#27, isEmpty#28]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
-Results [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25]
+Results [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25 AS sumsales#29]
 
 (24) HashAggregate [codegen id : 10]
 Input [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, sumsales#29]
@@ -201,9 +201,9 @@ Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8
 (28) HashAggregate [codegen id : 16]
 Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#38, isEmpty#39]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
-Results [7]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25]
+Results [7]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25 AS sumsales#29]
 
 (29) HashAggregate [codegen id : 16]
 Input [7]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, sumsales#29]
@@ -229,9 +229,9 @@ Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8
 (33) HashAggregate [codegen id : 22]
 Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#49, isEmpty#50]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
-Results [6]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25]
+Results [6]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25 AS sumsales#29]
 
 (34) HashAggregate [codegen id : 22]
 Input [6]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, sumsales#29]
@@ -257,9 +257,9 @@ Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8
 (38) HashAggregate [codegen id : 28]
 Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#61, isEmpty#62]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
-Results [5]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25]
+Results [5]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25 AS sumsales#29]
 
 (39) HashAggregate [codegen id : 28]
 Input [5]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, sumsales#29]
@@ -285,9 +285,9 @@ Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8
 (43) HashAggregate [codegen id : 34]
 Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#74, isEmpty#75]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
-Results [4]: [i_category#17, i_class#16, i_brand#15, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25]
+Results [4]: [i_category#17, i_class#16, i_brand#15, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25 AS sumsales#29]
 
 (44) HashAggregate [codegen id : 34]
 Input [4]: [i_category#17, i_class#16, i_brand#15, sumsales#29]
@@ -313,9 +313,9 @@ Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8
 (48) HashAggregate [codegen id : 40]
 Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#88, isEmpty#89]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
-Results [3]: [i_category#17, i_class#16, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25]
+Results [3]: [i_category#17, i_class#16, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25 AS sumsales#29]
 
 (49) HashAggregate [codegen id : 40]
 Input [3]: [i_category#17, i_class#16, sumsales#29]
@@ -341,9 +341,9 @@ Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8
 (53) HashAggregate [codegen id : 46]
 Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#103, isEmpty#104]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
-Results [2]: [i_category#17, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25]
+Results [2]: [i_category#17, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25 AS sumsales#29]
 
 (54) HashAggregate [codegen id : 46]
 Input [2]: [i_category#17, sumsales#29]
@@ -369,9 +369,9 @@ Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8
 (58) HashAggregate [codegen id : 52]
 Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#119, isEmpty#120]
 Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25]
-Results [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2), true), 0.00))#25 AS sumsales#29]
+Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25]
+Results [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25 AS sumsales#29]
 
 (59) HashAggregate [codegen id : 52]
 Input [1]: [sumsales#29]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/simplified.txt
index d3a866b3ddf29..35d285165618b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/simplified.txt
@@ -9,7 +9,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                 Exchange [i_category] #1
                   Union
                     WholeStageCodegen (5)
-                      HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                      HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                         InputAdapter
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] #2
                             WholeStageCodegen (4)
@@ -54,7 +54,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy] #6
                             WholeStageCodegen (10)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (17)
@@ -63,7 +63,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy] #7
                             WholeStageCodegen (16)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (23)
@@ -72,7 +72,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year] #8
                             WholeStageCodegen (22)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (29)
@@ -81,7 +81,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name] #9
                             WholeStageCodegen (28)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (35)
@@ -90,7 +90,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand] #10
                             WholeStageCodegen (34)
                               HashAggregate [i_category,i_class,i_brand,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (41)
@@ -99,7 +99,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class] #11
                             WholeStageCodegen (40)
                               HashAggregate [i_category,i_class,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (47)
@@ -108,7 +108,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category] #12
                             WholeStageCodegen (46)
                               HashAggregate [i_category,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (53)
@@ -117,6 +117,6 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange #13
                             WholeStageCodegen (52)
                               HashAggregate [sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/explain.txt
index 864593f67a1e1..7ee6ada91dfea 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/explain.txt
@@ -428,7 +428,7 @@ Arguments: [customer_id#69 ASC NULLS FIRST], false, 0
 (77) SortMergeJoin [codegen id : 35]
 Left keys [1]: [customer_id#17]
 Right keys [1]: [customer_id#69]
-Join condition: (CASE WHEN (year_total#54 > 0.00) THEN CheckOverflow((promote_precision(year_total#70) / promote_precision(year_total#54)), DecimalType(37,20), true) END > CASE WHEN (year_total#18 > 0.00) THEN CheckOverflow((promote_precision(year_total#37) / promote_precision(year_total#18)), DecimalType(37,20), true) END)
+Join condition: (CASE WHEN (year_total#54 > 0.00) THEN CheckOverflow((promote_precision(year_total#70) / promote_precision(year_total#54)), DecimalType(37,20)) END > CASE WHEN (year_total#18 > 0.00) THEN CheckOverflow((promote_precision(year_total#37) / promote_precision(year_total#18)), DecimalType(37,20)) END)
 
 (78) Project [codegen id : 35]
 Output [3]: [customer_id#34, customer_first_name#35, customer_last_name#36]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/explain.txt
index 8e7250c4fc4d3..a2c8929c7f285 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/explain.txt
@@ -397,7 +397,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=
 (69) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [customer_id#16]
 Right keys [1]: [customer_id#67]
-Join condition: (CASE WHEN (year_total#52 > 0.00) THEN CheckOverflow((promote_precision(year_total#68) / promote_precision(year_total#52)), DecimalType(37,20), true) END > CASE WHEN (year_total#17 > 0.00) THEN CheckOverflow((promote_precision(year_total#35) / promote_precision(year_total#17)), DecimalType(37,20), true) END)
+Join condition: (CASE WHEN (year_total#52 > 0.00) THEN CheckOverflow((promote_precision(year_total#68) / promote_precision(year_total#52)), DecimalType(37,20)) END > CASE WHEN (year_total#17 > 0.00) THEN CheckOverflow((promote_precision(year_total#35) / promote_precision(year_total#17)), DecimalType(37,20)) END)
 
 (70) Project [codegen id : 16]
 Output [3]: [customer_id#32, customer_first_name#33, customer_last_name#34]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt
index cd66823f10e8c..27a2b5f734281 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt
@@ -226,7 +226,7 @@ Right keys [2]: [cr_order_number#18, cr_item_sk#17]
 Join condition: None
 
 (23) Project [codegen id : 7]
-Output [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, (cs_quantity#3 - coalesce(cr_return_quantity#19, 0)) AS sales_cnt#23, CheckOverflow((promote_precision(cast(cs_ext_sales_price#4 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#20, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#24]
+Output [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, (cs_quantity#3 - coalesce(cr_return_quantity#19, 0)) AS sales_cnt#23, CheckOverflow((promote_precision(cast(cs_ext_sales_price#4 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#20, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#24]
 Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#15, cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20]
 
 (24) Scan parquet default.store_sales
@@ -308,7 +308,7 @@ Right keys [2]: [sr_ticket_number#39, sr_item_sk#38]
 Join condition: None
 
 (42) Project [codegen id : 14]
-Output [7]: [d_year#36, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34, (ss_quantity#27 - coalesce(sr_return_quantity#40, 0)) AS sales_cnt#44, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#41, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#45]
+Output [7]: [d_year#36, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34, (ss_quantity#27 - coalesce(sr_return_quantity#40, 0)) AS sales_cnt#44, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#41, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#45]
 Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34, d_year#36, sr_item_sk#38, sr_ticket_number#39, sr_return_quantity#40, sr_return_amt#41]
 
 (43) Scan parquet default.web_sales
@@ -390,7 +390,7 @@ Right keys [2]: [wr_order_number#60, wr_item_sk#59]
 Join condition: None
 
 (61) Project [codegen id : 21]
-Output [7]: [d_year#57, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55, (ws_quantity#48 - coalesce(wr_return_quantity#61, 0)) AS sales_cnt#65, CheckOverflow((promote_precision(cast(ws_ext_sales_price#49 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#62, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#66]
+Output [7]: [d_year#57, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55, (ws_quantity#48 - coalesce(wr_return_quantity#61, 0)) AS sales_cnt#65, CheckOverflow((promote_precision(cast(ws_ext_sales_price#49 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#62, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#66]
 Input [13]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55, d_year#57, wr_item_sk#59, wr_order_number#60, wr_return_quantity#61, wr_return_amt#62]
 
 (62) Union
@@ -499,7 +499,7 @@ Right keys [2]: [cr_order_number#93, cr_item_sk#92]
 Join condition: None
 
 (85) Project [codegen id : 32]
-Output [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, (cs_quantity#80 - coalesce(cr_return_quantity#94, 0)) AS sales_cnt#23, CheckOverflow((promote_precision(cast(cs_ext_sales_price#81 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#95, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#24]
+Output [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, (cs_quantity#80 - coalesce(cr_return_quantity#94, 0)) AS sales_cnt#23, CheckOverflow((promote_precision(cast(cs_ext_sales_price#81 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#95, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#24]
 Input [13]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90, cr_item_sk#92, cr_order_number#93, cr_return_quantity#94, cr_return_amount#95]
 
 (86) Scan parquet default.store_sales
@@ -562,7 +562,7 @@ Right keys [2]: [sr_ticket_number#110, sr_item_sk#109]
 Join condition: None
 
 (100) Project [codegen id : 39]
-Output [7]: [d_year#107, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105, (ss_quantity#98 - coalesce(sr_return_quantity#111, 0)) AS sales_cnt#44, CheckOverflow((promote_precision(cast(ss_ext_sales_price#99 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#112, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#45]
+Output [7]: [d_year#107, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105, (ss_quantity#98 - coalesce(sr_return_quantity#111, 0)) AS sales_cnt#44, CheckOverflow((promote_precision(cast(ss_ext_sales_price#99 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#112, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#45]
 Input [13]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105, d_year#107, sr_item_sk#109, sr_ticket_number#110, sr_return_quantity#111, sr_return_amt#112]
 
 (101) Scan parquet default.web_sales
@@ -625,7 +625,7 @@ Right keys [2]: [wr_order_number#127, wr_item_sk#126]
 Join condition: None
 
 (115) Project [codegen id : 46]
-Output [7]: [d_year#124, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122, (ws_quantity#115 - coalesce(wr_return_quantity#128, 0)) AS sales_cnt#65, CheckOverflow((promote_precision(cast(ws_ext_sales_price#116 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#129, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#66]
+Output [7]: [d_year#124, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122, (ws_quantity#115 - coalesce(wr_return_quantity#128, 0)) AS sales_cnt#65, CheckOverflow((promote_precision(cast(ws_ext_sales_price#116 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#129, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#66]
 Input [13]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122, d_year#124, wr_item_sk#126, wr_order_number#127, wr_return_quantity#128, wr_return_amt#129]
 
 (116) Union
@@ -677,10 +677,10 @@ Arguments: [i_brand_id#85 ASC NULLS FIRST, i_class_id#86 ASC NULLS FIRST, i_cate
 (125) SortMergeJoin [codegen id : 51]
 Left keys [4]: [i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
 Right keys [4]: [i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88]
-Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#75 as decimal(17,2))) / promote_precision(cast(sales_cnt#134 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000)
+Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#75 as decimal(17,2))) / promote_precision(cast(sales_cnt#134 as decimal(17,2)))), DecimalType(37,20)) < 0.90000000000000000000)
 
 (126) Project [codegen id : 51]
-Output [10]: [d_year#90 AS prev_year#137, d_year#15 AS year#138, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#134 AS prev_yr_cnt#139, sales_cnt#75 AS curr_yr_cnt#140, (sales_cnt#75 - sales_cnt#134) AS sales_cnt_diff#141, CheckOverflow((promote_precision(cast(sales_amt#76 as decimal(19,2))) - promote_precision(cast(sales_amt#135 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#142]
+Output [10]: [d_year#90 AS prev_year#137, d_year#15 AS year#138, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#134 AS prev_yr_cnt#139, sales_cnt#75 AS curr_yr_cnt#140, (sales_cnt#75 - sales_cnt#134) AS sales_cnt_diff#141, CheckOverflow((promote_precision(cast(sales_amt#76 as decimal(19,2))) - promote_precision(cast(sales_amt#135 as decimal(19,2)))), DecimalType(19,2)) AS sales_amt_diff#142]
 Input [14]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#75, sales_amt#76, d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#134, sales_amt#135]
 
 (127) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt
index cd66823f10e8c..27a2b5f734281 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt
@@ -226,7 +226,7 @@ Right keys [2]: [cr_order_number#18, cr_item_sk#17]
 Join condition: None
 
 (23) Project [codegen id : 7]
-Output [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, (cs_quantity#3 - coalesce(cr_return_quantity#19, 0)) AS sales_cnt#23, CheckOverflow((promote_precision(cast(cs_ext_sales_price#4 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#20, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#24]
+Output [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, (cs_quantity#3 - coalesce(cr_return_quantity#19, 0)) AS sales_cnt#23, CheckOverflow((promote_precision(cast(cs_ext_sales_price#4 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#20, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#24]
 Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#15, cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20]
 
 (24) Scan parquet default.store_sales
@@ -308,7 +308,7 @@ Right keys [2]: [sr_ticket_number#39, sr_item_sk#38]
 Join condition: None
 
 (42) Project [codegen id : 14]
-Output [7]: [d_year#36, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34, (ss_quantity#27 - coalesce(sr_return_quantity#40, 0)) AS sales_cnt#44, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#41, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#45]
+Output [7]: [d_year#36, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34, (ss_quantity#27 - coalesce(sr_return_quantity#40, 0)) AS sales_cnt#44, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#41, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#45]
 Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34, d_year#36, sr_item_sk#38, sr_ticket_number#39, sr_return_quantity#40, sr_return_amt#41]
 
 (43) Scan parquet default.web_sales
@@ -390,7 +390,7 @@ Right keys [2]: [wr_order_number#60, wr_item_sk#59]
 Join condition: None
 
 (61) Project [codegen id : 21]
-Output [7]: [d_year#57, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55, (ws_quantity#48 - coalesce(wr_return_quantity#61, 0)) AS sales_cnt#65, CheckOverflow((promote_precision(cast(ws_ext_sales_price#49 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#62, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#66]
+Output [7]: [d_year#57, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55, (ws_quantity#48 - coalesce(wr_return_quantity#61, 0)) AS sales_cnt#65, CheckOverflow((promote_precision(cast(ws_ext_sales_price#49 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#62, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#66]
 Input [13]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55, d_year#57, wr_item_sk#59, wr_order_number#60, wr_return_quantity#61, wr_return_amt#62]
 
 (62) Union
@@ -499,7 +499,7 @@ Right keys [2]: [cr_order_number#93, cr_item_sk#92]
 Join condition: None
 
 (85) Project [codegen id : 32]
-Output [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, (cs_quantity#80 - coalesce(cr_return_quantity#94, 0)) AS sales_cnt#23, CheckOverflow((promote_precision(cast(cs_ext_sales_price#81 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#95, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#24]
+Output [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, (cs_quantity#80 - coalesce(cr_return_quantity#94, 0)) AS sales_cnt#23, CheckOverflow((promote_precision(cast(cs_ext_sales_price#81 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#95, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#24]
 Input [13]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90, cr_item_sk#92, cr_order_number#93, cr_return_quantity#94, cr_return_amount#95]
 
 (86) Scan parquet default.store_sales
@@ -562,7 +562,7 @@ Right keys [2]: [sr_ticket_number#110, sr_item_sk#109]
 Join condition: None
 
 (100) Project [codegen id : 39]
-Output [7]: [d_year#107, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105, (ss_quantity#98 - coalesce(sr_return_quantity#111, 0)) AS sales_cnt#44, CheckOverflow((promote_precision(cast(ss_ext_sales_price#99 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#112, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#45]
+Output [7]: [d_year#107, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105, (ss_quantity#98 - coalesce(sr_return_quantity#111, 0)) AS sales_cnt#44, CheckOverflow((promote_precision(cast(ss_ext_sales_price#99 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#112, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#45]
 Input [13]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105, d_year#107, sr_item_sk#109, sr_ticket_number#110, sr_return_quantity#111, sr_return_amt#112]
 
 (101) Scan parquet default.web_sales
@@ -625,7 +625,7 @@ Right keys [2]: [wr_order_number#127, wr_item_sk#126]
 Join condition: None
 
 (115) Project [codegen id : 46]
-Output [7]: [d_year#124, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122, (ws_quantity#115 - coalesce(wr_return_quantity#128, 0)) AS sales_cnt#65, CheckOverflow((promote_precision(cast(ws_ext_sales_price#116 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#129, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#66]
+Output [7]: [d_year#124, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122, (ws_quantity#115 - coalesce(wr_return_quantity#128, 0)) AS sales_cnt#65, CheckOverflow((promote_precision(cast(ws_ext_sales_price#116 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#129, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#66]
 Input [13]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122, d_year#124, wr_item_sk#126, wr_order_number#127, wr_return_quantity#128, wr_return_amt#129]
 
 (116) Union
@@ -677,10 +677,10 @@ Arguments: [i_brand_id#85 ASC NULLS FIRST, i_class_id#86 ASC NULLS FIRST, i_cate
 (125) SortMergeJoin [codegen id : 51]
 Left keys [4]: [i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
 Right keys [4]: [i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88]
-Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#75 as decimal(17,2))) / promote_precision(cast(sales_cnt#134 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000)
+Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#75 as decimal(17,2))) / promote_precision(cast(sales_cnt#134 as decimal(17,2)))), DecimalType(37,20)) < 0.90000000000000000000)
 
 (126) Project [codegen id : 51]
-Output [10]: [d_year#90 AS prev_year#137, d_year#15 AS year#138, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#134 AS prev_yr_cnt#139, sales_cnt#75 AS curr_yr_cnt#140, (sales_cnt#75 - sales_cnt#134) AS sales_cnt_diff#141, CheckOverflow((promote_precision(cast(sales_amt#76 as decimal(19,2))) - promote_precision(cast(sales_amt#135 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#142]
+Output [10]: [d_year#90 AS prev_year#137, d_year#15 AS year#138, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#134 AS prev_yr_cnt#139, sales_cnt#75 AS curr_yr_cnt#140, (sales_cnt#75 - sales_cnt#134) AS sales_cnt_diff#141, CheckOverflow((promote_precision(cast(sales_amt#76 as decimal(19,2))) - promote_precision(cast(sales_amt#135 as decimal(19,2)))), DecimalType(19,2)) AS sales_amt_diff#142]
 Input [14]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#75, sales_amt#76, d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#134, sales_amt#135]
 
 (127) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt
index 4d27141fd8465..335e1aee4e5ca 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt
@@ -238,7 +238,7 @@ Right keys [1]: [s_store_sk#23]
 Join condition: None
 
 (30) Project [codegen id : 8]
-Output [5]: [store channel AS channel#34, s_store_sk#7 AS id#35, sales#16, coalesce(returns#31, 0.00) AS returns#36, CheckOverflow((promote_precision(cast(profit#17 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#32, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#37]
+Output [5]: [store channel AS channel#34, s_store_sk#7 AS id#35, sales#16, coalesce(returns#31, 0.00) AS returns#36, CheckOverflow((promote_precision(cast(profit#17 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#32, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#37]
 Input [6]: [s_store_sk#7, sales#16, profit#17, s_store_sk#23, returns#31, profit_loss#32]
 
 (31) Scan parquet default.catalog_sales
@@ -329,7 +329,7 @@ Arguments: IdentityBroadcastMode, [id=#65]
 Join condition: None
 
 (49) Project [codegen id : 14]
-Output [5]: [catalog channel AS channel#66, cs_call_center_sk#38 AS id#67, sales#50, returns#63, CheckOverflow((promote_precision(cast(profit#51 as decimal(18,2))) - promote_precision(cast(profit_loss#64 as decimal(18,2)))), DecimalType(18,2), true) AS profit#68]
+Output [5]: [catalog channel AS channel#66, cs_call_center_sk#38 AS id#67, sales#50, returns#63, CheckOverflow((promote_precision(cast(profit#51 as decimal(18,2))) - promote_precision(cast(profit_loss#64 as decimal(18,2)))), DecimalType(18,2)) AS profit#68]
 Input [5]: [cs_call_center_sk#38, sales#50, profit#51, returns#63, profit_loss#64]
 
 (50) Scan parquet default.web_sales
@@ -471,7 +471,7 @@ Right keys [1]: [wp_web_page_sk#90]
 Join condition: None
 
 (79) Project [codegen id : 22]
-Output [5]: [web channel AS channel#101, wp_web_page_sk#74 AS id#102, sales#83, coalesce(returns#98, 0.00) AS returns#103, CheckOverflow((promote_precision(cast(profit#84 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#99, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#104]
+Output [5]: [web channel AS channel#101, wp_web_page_sk#74 AS id#102, sales#83, coalesce(returns#98, 0.00) AS returns#103, CheckOverflow((promote_precision(cast(profit#84 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#99, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#104]
 Input [6]: [wp_web_page_sk#74, sales#83, profit#84, wp_web_page_sk#90, returns#98, profit_loss#99]
 
 (80) Union
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt
index a1d99b72c8147..815eabe2fe0e8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt
@@ -238,7 +238,7 @@ Right keys [1]: [s_store_sk#23]
 Join condition: None
 
 (30) Project [codegen id : 8]
-Output [5]: [store channel AS channel#34, s_store_sk#7 AS id#35, sales#16, coalesce(returns#31, 0.00) AS returns#36, CheckOverflow((promote_precision(cast(profit#17 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#32, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#37]
+Output [5]: [store channel AS channel#34, s_store_sk#7 AS id#35, sales#16, coalesce(returns#31, 0.00) AS returns#36, CheckOverflow((promote_precision(cast(profit#17 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#32, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#37]
 Input [6]: [s_store_sk#7, sales#16, profit#17, s_store_sk#23, returns#31, profit_loss#32]
 
 (31) Scan parquet default.catalog_sales
@@ -329,7 +329,7 @@ Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#53))#62,17,2) AS re
 Join condition: None
 
 (49) Project [codegen id : 14]
-Output [5]: [catalog channel AS channel#66, cs_call_center_sk#38 AS id#67, sales#50, returns#64, CheckOverflow((promote_precision(cast(profit#51 as decimal(18,2))) - promote_precision(cast(profit_loss#65 as decimal(18,2)))), DecimalType(18,2), true) AS profit#68]
+Output [5]: [catalog channel AS channel#66, cs_call_center_sk#38 AS id#67, sales#50, returns#64, CheckOverflow((promote_precision(cast(profit#51 as decimal(18,2))) - promote_precision(cast(profit_loss#65 as decimal(18,2)))), DecimalType(18,2)) AS profit#68]
 Input [5]: [cs_call_center_sk#38, sales#50, profit#51, returns#64, profit_loss#65]
 
 (50) Scan parquet default.web_sales
@@ -471,7 +471,7 @@ Right keys [1]: [wp_web_page_sk#90]
 Join condition: None
 
 (79) Project [codegen id : 22]
-Output [5]: [web channel AS channel#101, wp_web_page_sk#74 AS id#102, sales#83, coalesce(returns#98, 0.00) AS returns#103, CheckOverflow((promote_precision(cast(profit#84 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#99, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#104]
+Output [5]: [web channel AS channel#101, wp_web_page_sk#74 AS id#102, sales#83, coalesce(returns#98, 0.00) AS returns#103, CheckOverflow((promote_precision(cast(profit#84 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#99, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#104]
 Input [6]: [wp_web_page_sk#74, sales#83, profit#84, wp_web_page_sk#90, returns#98, profit_loss#99]
 
 (80) Union
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/explain.txt
index b54f3fa20c63f..133d5272ec111 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/explain.txt
@@ -382,7 +382,7 @@ Right keys [3]: [cs_sold_year#83, cs_item_sk#60, cs_customer_sk#84]
 Join condition: None
 
 (69) Project [codegen id : 23]
-Output [13]: [round((cast(ss_qty#27 as double) / cast(coalesce((ws_qty#56 + cs_qty#85), 1) as double)), 2) AS ratio#88, ss_qty#27 AS store_qty#89, ss_wc#28 AS store_wholesale_cost#90, ss_sp#29 AS store_sales_price#91, (coalesce(ws_qty#56, 0) + coalesce(cs_qty#85, 0)) AS other_chan_qty#92, CheckOverflow((promote_precision(cast(coalesce(ws_wc#57, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#86, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_wholesale_cost#93, CheckOverflow((promote_precision(cast(coalesce(ws_sp#58, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#87, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_sales_price#94, ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, ss_qty#27, ss_wc#28, ss_sp#29]
+Output [13]: [round((cast(ss_qty#27 as double) / cast(coalesce((ws_qty#56 + cs_qty#85), 1) as double)), 2) AS ratio#88, ss_qty#27 AS store_qty#89, ss_wc#28 AS store_wholesale_cost#90, ss_sp#29 AS store_sales_price#91, (coalesce(ws_qty#56, 0) + coalesce(cs_qty#85, 0)) AS other_chan_qty#92, CheckOverflow((promote_precision(cast(coalesce(ws_wc#57, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#86, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_wholesale_cost#93, CheckOverflow((promote_precision(cast(coalesce(ws_sp#58, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#87, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_sales_price#94, ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, ss_qty#27, ss_wc#28, ss_sp#29]
 Input [15]: [ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, ss_qty#27, ss_wc#28, ss_sp#29, ws_qty#56, ws_wc#57, ws_sp#58, cs_sold_year#83, cs_item_sk#60, cs_customer_sk#84, cs_qty#85, cs_wc#86, cs_sp#87]
 
 (70) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/explain.txt
index b54f3fa20c63f..133d5272ec111 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/explain.txt
@@ -382,7 +382,7 @@ Right keys [3]: [cs_sold_year#83, cs_item_sk#60, cs_customer_sk#84]
 Join condition: None
 
 (69) Project [codegen id : 23]
-Output [13]: [round((cast(ss_qty#27 as double) / cast(coalesce((ws_qty#56 + cs_qty#85), 1) as double)), 2) AS ratio#88, ss_qty#27 AS store_qty#89, ss_wc#28 AS store_wholesale_cost#90, ss_sp#29 AS store_sales_price#91, (coalesce(ws_qty#56, 0) + coalesce(cs_qty#85, 0)) AS other_chan_qty#92, CheckOverflow((promote_precision(cast(coalesce(ws_wc#57, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#86, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_wholesale_cost#93, CheckOverflow((promote_precision(cast(coalesce(ws_sp#58, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#87, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_sales_price#94, ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, ss_qty#27, ss_wc#28, ss_sp#29]
+Output [13]: [round((cast(ss_qty#27 as double) / cast(coalesce((ws_qty#56 + cs_qty#85), 1) as double)), 2) AS ratio#88, ss_qty#27 AS store_qty#89, ss_wc#28 AS store_wholesale_cost#90, ss_sp#29 AS store_sales_price#91, (coalesce(ws_qty#56, 0) + coalesce(cs_qty#85, 0)) AS other_chan_qty#92, CheckOverflow((promote_precision(cast(coalesce(ws_wc#57, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#86, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_wholesale_cost#93, CheckOverflow((promote_precision(cast(coalesce(ws_sp#58, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#87, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_sales_price#94, ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, ss_qty#27, ss_wc#28, ss_sp#29]
 Input [15]: [ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, ss_qty#27, ss_wc#28, ss_sp#29, ws_qty#56, ws_wc#57, ws_sp#58, cs_sold_year#83, cs_item_sk#60, cs_customer_sk#84, cs_qty#85, cs_wc#86, cs_sp#87]
 
 (70) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt
index 34777c108a268..a9ea4905b9fb7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt
@@ -283,7 +283,7 @@ Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#
 (37) HashAggregate [codegen id : 9]
 Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#24]
 Keys [1]: [s_store_id#24]
-Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
+Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
 Aggregate Attributes [5]: [sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30]
 Results [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35]
 
@@ -294,9 +294,9 @@ Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, [id=#36]
 (39) HashAggregate [codegen id : 10]
 Input [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35]
 Keys [1]: [s_store_id#24]
-Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
-Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#37, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#39]
-Results [5]: [store channel AS channel#40, concat(store, s_store_id#24) AS id#41, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#37,17,2) AS sales#42, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38 AS returns#43, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#39 AS profit#44]
+Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#37, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#39]
+Results [5]: [store channel AS channel#40, concat(store, s_store_id#24) AS id#41, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#37,17,2) AS sales#42, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38 AS returns#43, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#39 AS profit#44]
 
 (40) Scan parquet default.catalog_sales
 Output [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51]
@@ -422,7 +422,7 @@ Input [7]: [cs_catalog_page_sk#45, cs_ext_sales_price#49, cs_net_profit#50, cr_r
 (68) HashAggregate [codegen id : 19]
 Input [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#63]
 Keys [1]: [cp_catalog_page_id#63]
-Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#49)), partial_sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
+Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#49)), partial_sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
 Aggregate Attributes [5]: [sum#65, sum#66, isEmpty#67, sum#68, isEmpty#69]
 Results [6]: [cp_catalog_page_id#63, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74]
 
@@ -433,9 +433,9 @@ Arguments: hashpartitioning(cp_catalog_page_id#63, 5), ENSURE_REQUIREMENTS, [id=
 (70) HashAggregate [codegen id : 20]
 Input [6]: [cp_catalog_page_id#63, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74]
 Keys [1]: [cp_catalog_page_id#63]
-Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#49)), sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
-Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#49))#76, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#78]
-Results [5]: [catalog channel AS channel#79, concat(catalog_page, cp_catalog_page_id#63) AS id#80, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#49))#76,17,2) AS sales#81, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77 AS returns#82, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#78 AS profit#83]
+Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#49)), sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#49))#76, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#78]
+Results [5]: [catalog channel AS channel#79, concat(catalog_page, cp_catalog_page_id#63) AS id#80, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#49))#76,17,2) AS sales#81, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77 AS returns#82, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#78 AS profit#83]
 
 (71) Scan parquet default.web_sales
 Output [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90]
@@ -561,7 +561,7 @@ Input [7]: [ws_web_site_sk#85, ws_ext_sales_price#88, ws_net_profit#89, wr_retur
 (99) HashAggregate [codegen id : 29]
 Input [5]: [ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#102]
 Keys [1]: [web_site_id#102]
-Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#88)), partial_sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
+Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#88)), partial_sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
 Aggregate Attributes [5]: [sum#104, sum#105, isEmpty#106, sum#107, isEmpty#108]
 Results [6]: [web_site_id#102, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113]
 
@@ -572,9 +572,9 @@ Arguments: hashpartitioning(web_site_id#102, 5), ENSURE_REQUIREMENTS, [id=#114]
 (101) HashAggregate [codegen id : 30]
 Input [6]: [web_site_id#102, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113]
 Keys [1]: [web_site_id#102]
-Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#88)), sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
-Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#88))#115, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#117]
-Results [5]: [web channel AS channel#118, concat(web_site, web_site_id#102) AS id#119, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#88))#115,17,2) AS sales#120, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116 AS returns#121, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#117 AS profit#122]
+Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#88)), sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#88))#115, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#117]
+Results [5]: [web channel AS channel#118, concat(web_site, web_site_id#102) AS id#119, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#88))#115,17,2) AS sales#120, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116 AS returns#121, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#117 AS profit#122]
 
 (102) Union
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt
index ef6c39d87b482..af80e8a825183 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                               InputAdapter
                                 Union
                                   WholeStageCodegen (10)
-                                    HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                    HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
                                       InputAdapter
                                         Exchange [s_store_id] #3
                                           WholeStageCodegen (9)
@@ -86,7 +86,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                             InputAdapter
                                                               Scan parquet default.store [s_store_sk,s_store_id]
                                   WholeStageCodegen (20)
-                                    HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                    HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
                                       InputAdapter
                                         Exchange [cp_catalog_page_id] #10
                                           WholeStageCodegen (19)
@@ -137,7 +137,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                             InputAdapter
                                                               Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
                                   WholeStageCodegen (30)
-                                    HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                    HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
                                       InputAdapter
                                         Exchange [web_site_id] #14
                                           WholeStageCodegen (29)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt
index 3e68f3fe694fc..03e744ac87b53 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt
@@ -283,7 +283,7 @@ Input [7]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#
 (37) HashAggregate [codegen id : 9]
 Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#18]
 Keys [1]: [s_store_id#18]
-Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
+Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
 Aggregate Attributes [5]: [sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30]
 Results [6]: [s_store_id#18, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35]
 
@@ -294,9 +294,9 @@ Arguments: hashpartitioning(s_store_id#18, 5), ENSURE_REQUIREMENTS, [id=#36]
 (39) HashAggregate [codegen id : 10]
 Input [6]: [s_store_id#18, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35]
 Keys [1]: [s_store_id#18]
-Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
-Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#37, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#39]
-Results [5]: [store channel AS channel#40, concat(store, s_store_id#18) AS id#41, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#37,17,2) AS sales#42, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38 AS returns#43, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#39 AS profit#44]
+Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#37, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#39]
+Results [5]: [store channel AS channel#40, concat(store, s_store_id#18) AS id#41, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#37,17,2) AS sales#42, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38 AS returns#43, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#39 AS profit#44]
 
 (40) Scan parquet default.catalog_sales
 Output [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51]
@@ -422,7 +422,7 @@ Input [7]: [cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_a
 (68) HashAggregate [codegen id : 19]
 Input [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#61]
 Keys [1]: [cp_catalog_page_id#61]
-Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#49)), partial_sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
+Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#49)), partial_sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
 Aggregate Attributes [5]: [sum#65, sum#66, isEmpty#67, sum#68, isEmpty#69]
 Results [6]: [cp_catalog_page_id#61, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74]
 
@@ -433,9 +433,9 @@ Arguments: hashpartitioning(cp_catalog_page_id#61, 5), ENSURE_REQUIREMENTS, [id=
 (70) HashAggregate [codegen id : 20]
 Input [6]: [cp_catalog_page_id#61, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74]
 Keys [1]: [cp_catalog_page_id#61]
-Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#49)), sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
-Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#49))#76, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#78]
-Results [5]: [catalog channel AS channel#79, concat(catalog_page, cp_catalog_page_id#61) AS id#80, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#49))#76,17,2) AS sales#81, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77 AS returns#82, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#78 AS profit#83]
+Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#49)), sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#49))#76, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#78]
+Results [5]: [catalog channel AS channel#79, concat(catalog_page, cp_catalog_page_id#61) AS id#80, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#49))#76,17,2) AS sales#81, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77 AS returns#82, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#78 AS profit#83]
 
 (71) Scan parquet default.web_sales
 Output [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90]
@@ -561,7 +561,7 @@ Input [7]: [ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, wr_return_a
 (99) HashAggregate [codegen id : 29]
 Input [5]: [ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#100]
 Keys [1]: [web_site_id#100]
-Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#88)), partial_sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
+Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#88)), partial_sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
 Aggregate Attributes [5]: [sum#104, sum#105, isEmpty#106, sum#107, isEmpty#108]
 Results [6]: [web_site_id#100, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113]
 
@@ -572,9 +572,9 @@ Arguments: hashpartitioning(web_site_id#100, 5), ENSURE_REQUIREMENTS, [id=#114]
 (101) HashAggregate [codegen id : 30]
 Input [6]: [web_site_id#100, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113]
 Keys [1]: [web_site_id#100]
-Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#88)), sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))]
-Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#88))#115, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#117]
-Results [5]: [web channel AS channel#118, concat(web_site, web_site_id#100) AS id#119, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#88))#115,17,2) AS sales#120, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116 AS returns#121, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#117 AS profit#122]
+Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#88)), sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#88))#115, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#117]
+Results [5]: [web channel AS channel#118, concat(web_site, web_site_id#100) AS id#119, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#88))#115,17,2) AS sales#120, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116 AS returns#121, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#117 AS profit#122]
 
 (102) Union
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/simplified.txt
index d3fc38799fe0e..169957c1c164e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                               InputAdapter
                                 Union
                                   WholeStageCodegen (10)
-                                    HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                    HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
                                       InputAdapter
                                         Exchange [s_store_id] #3
                                           WholeStageCodegen (9)
@@ -86,7 +86,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                               InputAdapter
                                                                 Scan parquet default.promotion [p_promo_sk,p_channel_tv]
                                   WholeStageCodegen (20)
-                                    HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                    HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
                                       InputAdapter
                                         Exchange [cp_catalog_page_id] #10
                                           WholeStageCodegen (19)
@@ -137,7 +137,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                   InputAdapter
                                                     ReusedExchange [p_promo_sk] #9
                                   WholeStageCodegen (30)
-                                    HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                    HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
                                       InputAdapter
                                         Exchange [web_site_id] #14
                                           WholeStageCodegen (29)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/explain.txt
index 2c31ce69e5e5a..fd1c4b503eaa8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/explain.txt
@@ -122,7 +122,7 @@ Input [8]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_pri
 Arguments: [sum(_w1#20) windowspecdefinition(i_class#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#10]
 
 (22) Project [codegen id : 9]
-Output [7]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23]
+Output [7]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17)) AS revenueratio#23]
 Input [9]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, _we0#22]
 
 (23) Exchange
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/explain.txt
index 259338b39c245..68e7dba19dbab 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/explain.txt
@@ -107,7 +107,7 @@ Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_pric
 Arguments: [sum(_w1#19) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#21], [i_class#9]
 
 (19) Project [codegen id : 6]
-Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17), true) AS revenueratio#22]
+Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17)) AS revenueratio#22]
 Input [9]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, _we0#21]
 
 (20) Exchange
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q1/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q1/explain.txt
index cc0d21d19409b..c2fdfb24d2d85 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q1/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q1/explain.txt
@@ -31,7 +31,7 @@ Input [7]: [l_quantity#1, l_extendedprice#2, l_discount#3, l_tax#4, l_returnflag
 (5) HashAggregate [codegen id : 1]
 Input [6]: [l_quantity#1, l_extendedprice#2, l_discount#3, l_tax#4, l_returnflag#5, l_linestatus#6]
 Keys [2]: [l_returnflag#5, l_linestatus#6]
-Functions [8]: [partial_sum(l_quantity#1), partial_sum(l_extendedprice#2), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)), partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0), true) as decimal(22,0)))), DecimalType(34,0), true)), partial_avg(UnscaledValue(l_quantity#1)), partial_avg(UnscaledValue(l_extendedprice#2)), partial_avg(UnscaledValue(l_discount#3)), partial_count(1)]
+Functions [8]: [partial_sum(l_quantity#1), partial_sum(l_extendedprice#2), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))), partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0)) as decimal(22,0)))), DecimalType(34,0))), partial_avg(UnscaledValue(l_quantity#1)), partial_avg(UnscaledValue(l_extendedprice#2)), partial_avg(UnscaledValue(l_discount#3)), partial_count(1)]
 Aggregate Attributes [15]: [sum#8, isEmpty#9, sum#10, isEmpty#11, sum#12, isEmpty#13, sum#14, isEmpty#15, sum#16, count#17, sum#18, count#19, sum#20, count#21, count#22]
 Results [17]: [l_returnflag#5, l_linestatus#6, sum#23, isEmpty#24, sum#25, isEmpty#26, sum#27, isEmpty#28, sum#29, isEmpty#30, sum#31, count#32, sum#33, count#34, sum#35, count#36, count#37]
 
@@ -42,9 +42,9 @@ Arguments: hashpartitioning(l_returnflag#5, l_linestatus#6, 5), ENSURE_REQUIREME
 (7) HashAggregate [codegen id : 2]
 Input [17]: [l_returnflag#5, l_linestatus#6, sum#23, isEmpty#24, sum#25, isEmpty#26, sum#27, isEmpty#28, sum#29, isEmpty#30, sum#31, count#32, sum#33, count#34, sum#35, count#36, count#37]
 Keys [2]: [l_returnflag#5, l_linestatus#6]
-Functions [8]: [sum(l_quantity#1), sum(l_extendedprice#2), sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)), sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0), true) as decimal(22,0)))), DecimalType(34,0), true)), avg(UnscaledValue(l_quantity#1)), avg(UnscaledValue(l_extendedprice#2)), avg(UnscaledValue(l_discount#3)), count(1)]
-Aggregate Attributes [8]: [sum(l_quantity#1)#39, sum(l_extendedprice#2)#40, sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#41, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0), true) as decimal(22,0)))), DecimalType(34,0), true))#42, avg(UnscaledValue(l_quantity#1))#43, avg(UnscaledValue(l_extendedprice#2))#44, avg(UnscaledValue(l_discount#3))#45, count(1)#46]
-Results [10]: [l_returnflag#5, l_linestatus#6, sum(l_quantity#1)#39 AS sum_qty#47, sum(l_extendedprice#2)#40 AS sum_base_price#48, sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#41 AS sum_disc_price#49, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0), true) as decimal(22,0)))), DecimalType(34,0), true))#42 AS sum_charge#50, cast((avg(UnscaledValue(l_quantity#1))#43 / 1.0) as decimal(14,4)) AS avg_qty#51, cast((avg(UnscaledValue(l_extendedprice#2))#44 / 1.0) as decimal(14,4)) AS avg_price#52, cast((avg(UnscaledValue(l_discount#3))#45 / 1.0) as decimal(14,4)) AS avg_disc#53, count(1)#46 AS count_order#54]
+Functions [8]: [sum(l_quantity#1), sum(l_extendedprice#2), sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))), sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0)) as decimal(22,0)))), DecimalType(34,0))), avg(UnscaledValue(l_quantity#1)), avg(UnscaledValue(l_extendedprice#2)), avg(UnscaledValue(l_discount#3)), count(1)]
+Aggregate Attributes [8]: [sum(l_quantity#1)#39, sum(l_extendedprice#2)#40, sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#41, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0)) as decimal(22,0)))), DecimalType(34,0)))#42, avg(UnscaledValue(l_quantity#1))#43, avg(UnscaledValue(l_extendedprice#2))#44, avg(UnscaledValue(l_discount#3))#45, count(1)#46]
+Results [10]: [l_returnflag#5, l_linestatus#6, sum(l_quantity#1)#39 AS sum_qty#47, sum(l_extendedprice#2)#40 AS sum_base_price#48, sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#41 AS sum_disc_price#49, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0)) as decimal(22,0)))), DecimalType(34,0)))#42 AS sum_charge#50, cast((avg(UnscaledValue(l_quantity#1))#43 / 1.0) as decimal(14,4)) AS avg_qty#51, cast((avg(UnscaledValue(l_extendedprice#2))#44 / 1.0) as decimal(14,4)) AS avg_price#52, cast((avg(UnscaledValue(l_discount#3))#45 / 1.0) as decimal(14,4)) AS avg_disc#53, count(1)#46 AS count_order#54]
 
 (8) Exchange
 Input [10]: [l_returnflag#5, l_linestatus#6, sum_qty#47, sum_base_price#48, sum_disc_price#49, sum_charge#50, avg_qty#51, avg_price#52, avg_disc#53, count_order#54]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q1/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q1/simplified.txt
index f94c3d6b5b4d8..68e8e39486e48 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q1/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q1/simplified.txt
@@ -3,7 +3,7 @@ WholeStageCodegen (3)
     InputAdapter
       Exchange [l_returnflag,l_linestatus] #1
         WholeStageCodegen (2)
-          HashAggregate [l_returnflag,l_linestatus,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,count,sum,count,sum,count,count] [sum(l_quantity),sum(l_extendedprice),sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)),sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax as decimal(11,0)))), DecimalType(11,0), true) as decimal(22,0)))), DecimalType(34,0), true)),avg(UnscaledValue(l_quantity)),avg(UnscaledValue(l_extendedprice)),avg(UnscaledValue(l_discount)),count(1),sum_qty,sum_base_price,sum_disc_price,sum_charge,avg_qty,avg_price,avg_disc,count_order,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,count,sum,count,sum,count,count]
+          HashAggregate [l_returnflag,l_linestatus,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,count,sum,count,sum,count,count] [sum(l_quantity),sum(l_extendedprice),sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))),sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax as decimal(11,0)))), DecimalType(11,0)) as decimal(22,0)))), DecimalType(34,0))),avg(UnscaledValue(l_quantity)),avg(UnscaledValue(l_extendedprice)),avg(UnscaledValue(l_discount)),count(1),sum_qty,sum_base_price,sum_disc_price,sum_charge,avg_qty,avg_price,avg_disc,count_order,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,count,sum,count,sum,count,count]
             InputAdapter
               Exchange [l_returnflag,l_linestatus] #2
                 WholeStageCodegen (1)
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q10/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q10/explain.txt
index 4cd56105a252b..08be511944f36 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q10/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q10/explain.txt
@@ -134,7 +134,7 @@ Input [11]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acc
 (24) HashAggregate [codegen id : 4]
 Input [9]: [c_custkey#1, c_name#2, c_address#3, c_phone#5, c_acctbal#6, c_comment#7, l_extendedprice#13, l_discount#14, n_name#18]
 Keys [7]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#18, c_address#3, c_comment#7]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#13 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#14 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#13 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#14 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
 Aggregate Attributes [2]: [sum#20, isEmpty#21]
 Results [9]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#18, c_address#3, c_comment#7, sum#22, isEmpty#23]
 
@@ -145,9 +145,9 @@ Arguments: hashpartitioning(c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_nam
 (26) HashAggregate [codegen id : 5]
 Input [9]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#18, c_address#3, c_comment#7, sum#22, isEmpty#23]
 Keys [7]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#18, c_address#3, c_comment#7]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#13 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#14 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#13 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#14 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#25]
-Results [8]: [c_custkey#1, c_name#2, sum(CheckOverflow((promote_precision(cast(l_extendedprice#13 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#14 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#25 AS revenue#26, c_acctbal#6, n_name#18, c_address#3, c_phone#5, c_comment#7]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#13 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#14 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#13 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#14 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#25]
+Results [8]: [c_custkey#1, c_name#2, sum(CheckOverflow((promote_precision(cast(l_extendedprice#13 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#14 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#25 AS revenue#26, c_acctbal#6, n_name#18, c_address#3, c_phone#5, c_comment#7]
 
 (27) TakeOrderedAndProject
 Input [8]: [c_custkey#1, c_name#2, revenue#26, c_acctbal#6, n_name#18, c_address#3, c_phone#5, c_comment#7]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q10/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q10/simplified.txt
index eb09255ad799f..86cee35abda3d 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q10/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q10/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [revenue,c_custkey,c_name,c_acctbal,n_name,c_address,c_phone,c_comment]
   WholeStageCodegen (5)
-    HashAggregate [c_custkey,c_name,c_acctbal,c_phone,n_name,c_address,c_comment,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)),revenue,sum,isEmpty]
+    HashAggregate [c_custkey,c_name,c_acctbal,c_phone,n_name,c_address,c_comment,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))),revenue,sum,isEmpty]
       InputAdapter
         Exchange [c_custkey,c_name,c_acctbal,c_phone,n_name,c_address,c_comment] #1
           WholeStageCodegen (4)
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q11/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q11/explain.txt
index c210d30019ad8..bc7e629fd7dd8 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q11/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q11/explain.txt
@@ -98,7 +98,7 @@ Input [5]: [ps_partkey#1, ps_availqty#3, ps_supplycost#4, s_nationkey#6, n_natio
 (17) HashAggregate [codegen id : 3]
 Input [3]: [ps_partkey#1, ps_availqty#3, ps_supplycost#4]
 Keys [1]: [ps_partkey#1]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0)))]
 Aggregate Attributes [2]: [sum#11, isEmpty#12]
 Results [3]: [ps_partkey#1, sum#13, isEmpty#14]
 
@@ -109,9 +109,9 @@ Arguments: hashpartitioning(ps_partkey#1, 5), ENSURE_REQUIREMENTS, [id=#15]
 (19) HashAggregate [codegen id : 4]
 Input [3]: [ps_partkey#1, sum#13, isEmpty#14]
 Keys [1]: [ps_partkey#1]
-Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0), true))#16]
-Results [2]: [ps_partkey#1, sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0), true))#16 AS value#17]
+Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0)))#16]
+Results [2]: [ps_partkey#1, sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0)))#16 AS value#17]
 
 (20) Filter [codegen id : 4]
 Input [2]: [ps_partkey#1, value#17]
@@ -183,7 +183,7 @@ Input [4]: [ps_availqty#22, ps_supplycost#23, s_nationkey#25, n_nationkey#26]
 (32) HashAggregate [codegen id : 3]
 Input [2]: [ps_availqty#22, ps_supplycost#23]
 Keys: []
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#23) * promote_precision(cast(ps_availqty#22 as decimal(10,0)))), DecimalType(21,0), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#23) * promote_precision(cast(ps_availqty#22 as decimal(10,0)))), DecimalType(21,0)))]
 Aggregate Attributes [2]: [sum#27, isEmpty#28]
 Results [2]: [sum#29, isEmpty#30]
 
@@ -194,8 +194,8 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#31]
 (34) HashAggregate [codegen id : 4]
 Input [2]: [sum#29, isEmpty#30]
 Keys: []
-Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#23) * promote_precision(cast(ps_availqty#22 as decimal(10,0)))), DecimalType(21,0), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#23) * promote_precision(cast(ps_availqty#22 as decimal(10,0)))), DecimalType(21,0), true))#32]
-Results [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#23) * promote_precision(cast(ps_availqty#22 as decimal(10,0)))), DecimalType(21,0), true))#32 as decimal(38,10))) * 0.0001000000), DecimalType(38,6), true) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#33]
+Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#23) * promote_precision(cast(ps_availqty#22 as decimal(10,0)))), DecimalType(21,0)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#23) * promote_precision(cast(ps_availqty#22 as decimal(10,0)))), DecimalType(21,0)))#32]
+Results [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#23) * promote_precision(cast(ps_availqty#22 as decimal(10,0)))), DecimalType(21,0)))#32 as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#33]
 
 
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q11/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q11/simplified.txt
index f94cf82874cf3..bdafa6c8b43c1 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q11/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q11/simplified.txt
@@ -6,7 +6,7 @@ WholeStageCodegen (5)
           Filter [value]
             Subquery #1
               WholeStageCodegen (4)
-                HashAggregate [sum,isEmpty] [sum(CheckOverflow((promote_precision(ps_supplycost) * promote_precision(cast(ps_availqty as decimal(10,0)))), DecimalType(21,0), true)),(sum((ps_supplycost * ps_availqty)) * 0.0001000000),sum,isEmpty]
+                HashAggregate [sum,isEmpty] [sum(CheckOverflow((promote_precision(ps_supplycost) * promote_precision(cast(ps_availqty as decimal(10,0)))), DecimalType(21,0))),(sum((ps_supplycost * ps_availqty)) * 0.0001000000),sum,isEmpty]
                   InputAdapter
                     Exchange #5
                       WholeStageCodegen (3)
@@ -23,7 +23,7 @@ WholeStageCodegen (5)
                                     ReusedExchange [s_suppkey,s_nationkey] #3
                               InputAdapter
                                 ReusedExchange [n_nationkey] #4
-            HashAggregate [ps_partkey,sum,isEmpty] [sum(CheckOverflow((promote_precision(ps_supplycost) * promote_precision(cast(ps_availqty as decimal(10,0)))), DecimalType(21,0), true)),value,sum,isEmpty]
+            HashAggregate [ps_partkey,sum,isEmpty] [sum(CheckOverflow((promote_precision(ps_supplycost) * promote_precision(cast(ps_availqty as decimal(10,0)))), DecimalType(21,0))),value,sum,isEmpty]
               InputAdapter
                 Exchange [ps_partkey] #2
                   WholeStageCodegen (3)
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q14/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q14/explain.txt
index 98e3b4a5e8fac..0e923aebe1e11 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q14/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q14/explain.txt
@@ -62,7 +62,7 @@ Input [5]: [l_partkey#1, l_extendedprice#2, l_discount#3, p_partkey#5, p_type#6]
 (11) HashAggregate [codegen id : 2]
 Input [3]: [l_extendedprice#2, l_discount#3, p_type#6]
 Keys: []
-Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true) ELSE 0 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
+Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) ELSE 0 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
 Aggregate Attributes [4]: [sum#8, isEmpty#9, sum#10, isEmpty#11]
 Results [4]: [sum#12, isEmpty#13, sum#14, isEmpty#15]
 
@@ -73,7 +73,7 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#16]
 (13) HashAggregate [codegen id : 3]
 Input [4]: [sum#12, isEmpty#13, sum#14, isEmpty#15]
 Keys: []
-Functions [2]: [sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true) ELSE 0 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true) ELSE 0 END)#17, sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#18]
-Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.00 * promote_precision(cast(sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true) ELSE 0 END)#17 as decimal(34,2)))), DecimalType(38,2), true)) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#18 as decimal(38,2)))), DecimalType(38,6), true) AS promo_revenue#19]
+Functions [2]: [sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) ELSE 0 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
+Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) ELSE 0 END)#17, sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#18]
+Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.00 * promote_precision(cast(sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) ELSE 0 END)#17 as decimal(34,2)))), DecimalType(38,2))) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#18 as decimal(38,2)))), DecimalType(38,6)) AS promo_revenue#19]
 
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q14/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q14/simplified.txt
index 8f46e5fff4efa..ca3c30110de04 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q14/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q14/simplified.txt
@@ -1,5 +1,5 @@
 WholeStageCodegen (3)
-  HashAggregate [sum,isEmpty,sum,isEmpty] [sum(CASE WHEN StartsWith(p_type, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true) ELSE 0 END),sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)),promo_revenue,sum,isEmpty,sum,isEmpty]
+  HashAggregate [sum,isEmpty,sum,isEmpty] [sum(CASE WHEN StartsWith(p_type, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) ELSE 0 END),sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))),promo_revenue,sum,isEmpty,sum,isEmpty]
     InputAdapter
       Exchange #1
         WholeStageCodegen (2)
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q15/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q15/explain.txt
index a64943b45fefd..a615b73893782 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q15/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q15/explain.txt
@@ -52,7 +52,7 @@ Input [4]: [l_suppkey#5, l_extendedprice#6, l_discount#7, l_shipdate#8]
 (8) HashAggregate [codegen id : 1]
 Input [3]: [l_suppkey#5, l_extendedprice#6, l_discount#7]
 Keys [1]: [l_suppkey#5]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
 Aggregate Attributes [2]: [sum#9, isEmpty#10]
 Results [3]: [l_suppkey#5, sum#11, isEmpty#12]
 
@@ -63,9 +63,9 @@ Arguments: hashpartitioning(l_suppkey#5, 5), ENSURE_REQUIREMENTS, [id=#13]
 (10) HashAggregate [codegen id : 2]
 Input [3]: [l_suppkey#5, sum#11, isEmpty#12]
 Keys [1]: [l_suppkey#5]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#14]
-Results [2]: [l_suppkey#5 AS supplier_no#15, sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#14 AS total_revenue#16]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#14]
+Results [2]: [l_suppkey#5 AS supplier_no#15, sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#14 AS total_revenue#16]
 
 (11) Filter [codegen id : 2]
 Input [2]: [supplier_no#15, total_revenue#16]
@@ -128,7 +128,7 @@ Input [4]: [l_suppkey#5, l_extendedprice#6, l_discount#7, l_shipdate#8]
 (21) HashAggregate [codegen id : 1]
 Input [3]: [l_suppkey#5, l_extendedprice#6, l_discount#7]
 Keys [1]: [l_suppkey#5]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
 Aggregate Attributes [2]: [sum#21, isEmpty#22]
 Results [3]: [l_suppkey#5, sum#23, isEmpty#24]
 
@@ -139,9 +139,9 @@ Arguments: hashpartitioning(l_suppkey#5, 5), ENSURE_REQUIREMENTS, [id=#25]
 (23) HashAggregate [codegen id : 2]
 Input [3]: [l_suppkey#5, sum#23, isEmpty#24]
 Keys [1]: [l_suppkey#5]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#14]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#14 AS total_revenue#16]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#14]
+Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#14 AS total_revenue#16]
 
 (24) HashAggregate [codegen id : 2]
 Input [1]: [total_revenue#16]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q15/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q15/simplified.txt
index a492b9e8b5249..ae1de64f65a92 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q15/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q15/simplified.txt
@@ -20,7 +20,7 @@ WholeStageCodegen (4)
                               Exchange #4
                                 WholeStageCodegen (2)
                                   HashAggregate [total_revenue] [max,max]
-                                    HashAggregate [l_suppkey,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)),total_revenue,sum,isEmpty]
+                                    HashAggregate [l_suppkey,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))),total_revenue,sum,isEmpty]
                                       InputAdapter
                                         Exchange [l_suppkey] #5
                                           WholeStageCodegen (1)
@@ -30,7 +30,7 @@ WholeStageCodegen (4)
                                                   ColumnarToRow
                                                     InputAdapter
                                                       Scan parquet default.lineitem [l_suppkey,l_extendedprice,l_discount,l_shipdate]
-                      HashAggregate [l_suppkey,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)),supplier_no,total_revenue,sum,isEmpty]
+                      HashAggregate [l_suppkey,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))),supplier_no,total_revenue,sum,isEmpty]
                         InputAdapter
                           Exchange [l_suppkey] #3
                             WholeStageCodegen (1)
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q17/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q17/explain.txt
index 416b5345d6a82..652bf04238ca2 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q17/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q17/explain.txt
@@ -99,7 +99,7 @@ Input [3]: [l_partkey#9, sum#13, count#14]
 Keys [1]: [l_partkey#9]
 Functions [1]: [avg(UnscaledValue(l_quantity#10))]
 Aggregate Attributes [1]: [avg(UnscaledValue(l_quantity#10))#16]
-Results [2]: [CheckOverflow((0.2000 * promote_precision(cast((avg(UnscaledValue(l_quantity#10))#16 / 1.0) as decimal(14,4)))), DecimalType(16,5), true) AS (0.2 * avg(l_quantity))#17, l_partkey#9]
+Results [2]: [CheckOverflow((0.2000 * promote_precision(cast((avg(UnscaledValue(l_quantity#10))#16 / 1.0) as decimal(14,4)))), DecimalType(16,5)) AS (0.2 * avg(l_quantity))#17, l_partkey#9]
 
 (17) Filter [codegen id : 3]
 Input [2]: [(0.2 * avg(l_quantity))#17, l_partkey#9]
@@ -134,5 +134,5 @@ Input [2]: [sum#21, isEmpty#22]
 Keys: []
 Functions [1]: [sum(l_extendedprice#3)]
 Aggregate Attributes [1]: [sum(l_extendedprice#3)#24]
-Results [1]: [CheckOverflow((promote_precision(cast(sum(l_extendedprice#3)#24 as decimal(21,1))) / 7.0), DecimalType(27,6), true) AS avg_yearly#25]
+Results [1]: [CheckOverflow((promote_precision(cast(sum(l_extendedprice#3)#24 as decimal(21,1))) / 7.0), DecimalType(27,6)) AS avg_yearly#25]
 
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q19/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q19/explain.txt
index 41bff0f6756ce..b5d84e54efc7e 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q19/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q19/explain.txt
@@ -62,7 +62,7 @@ Input [8]: [l_partkey#1, l_quantity#2, l_extendedprice#3, l_discount#4, p_partke
 (11) HashAggregate [codegen id : 2]
 Input [2]: [l_extendedprice#3, l_discount#4]
 Keys: []
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
 Aggregate Attributes [2]: [sum#15, isEmpty#16]
 Results [2]: [sum#17, isEmpty#18]
 
@@ -73,7 +73,7 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#19]
 (13) HashAggregate [codegen id : 3]
 Input [2]: [sum#17, isEmpty#18]
 Keys: []
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#20]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#20 AS revenue#21]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#20]
+Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#20 AS revenue#21]
 
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q19/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q19/simplified.txt
index fc2ac1096938e..24838e5c93109 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q19/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q19/simplified.txt
@@ -1,5 +1,5 @@
 WholeStageCodegen (3)
-  HashAggregate [sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)),revenue,sum,isEmpty]
+  HashAggregate [sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))),revenue,sum,isEmpty]
     InputAdapter
       Exchange #1
         WholeStageCodegen (2)
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q20/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q20/explain.txt
index 32fab608371f3..43d5431a70f2e 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q20/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q20/explain.txt
@@ -135,7 +135,7 @@ Input [4]: [l_partkey#11, l_suppkey#12, sum#17, isEmpty#18]
 Keys [2]: [l_partkey#11, l_suppkey#12]
 Functions [1]: [sum(l_quantity#13)]
 Aggregate Attributes [1]: [sum(l_quantity#13)#20]
-Results [3]: [CheckOverflow((0.5 * promote_precision(cast(sum(l_quantity#13)#20 as decimal(21,1)))), DecimalType(22,1), true) AS (0.5 * sum(l_quantity))#21, l_partkey#11, l_suppkey#12]
+Results [3]: [CheckOverflow((0.5 * promote_precision(cast(sum(l_quantity#13)#20 as decimal(21,1)))), DecimalType(22,1)) AS (0.5 * sum(l_quantity))#21, l_partkey#11, l_suppkey#12]
 
 (22) Filter [codegen id : 4]
 Input [3]: [(0.5 * sum(l_quantity))#21, l_partkey#11, l_suppkey#12]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q3/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q3/explain.txt
index ee09633bda706..e0243ce3bbd52 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q3/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q3/explain.txt
@@ -101,7 +101,7 @@ Input [6]: [o_orderkey#3, o_orderdate#5, o_shippriority#6, l_orderkey#8, l_exten
 (18) HashAggregate [codegen id : 3]
 Input [5]: [o_orderdate#5, o_shippriority#6, l_orderkey#8, l_extendedprice#9, l_discount#10]
 Keys [3]: [l_orderkey#8, o_orderdate#5, o_shippriority#6]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
 Aggregate Attributes [2]: [sum#13, isEmpty#14]
 Results [5]: [l_orderkey#8, o_orderdate#5, o_shippriority#6, sum#15, isEmpty#16]
 
@@ -112,9 +112,9 @@ Arguments: hashpartitioning(l_orderkey#8, o_orderdate#5, o_shippriority#6, 5), E
 (20) HashAggregate [codegen id : 4]
 Input [5]: [l_orderkey#8, o_orderdate#5, o_shippriority#6, sum#15, isEmpty#16]
 Keys [3]: [l_orderkey#8, o_orderdate#5, o_shippriority#6]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#18]
-Results [4]: [l_orderkey#8, sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#18 AS revenue#19, o_orderdate#5, o_shippriority#6]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#18]
+Results [4]: [l_orderkey#8, sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#18 AS revenue#19, o_orderdate#5, o_shippriority#6]
 
 (21) TakeOrderedAndProject
 Input [4]: [l_orderkey#8, revenue#19, o_orderdate#5, o_shippriority#6]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q3/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q3/simplified.txt
index 9e234b2ff6d3d..26c18d19d7e20 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q3/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q3/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [revenue,o_orderdate,l_orderkey,o_shippriority]
   WholeStageCodegen (4)
-    HashAggregate [l_orderkey,o_orderdate,o_shippriority,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)),revenue,sum,isEmpty]
+    HashAggregate [l_orderkey,o_orderdate,o_shippriority,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))),revenue,sum,isEmpty]
       InputAdapter
         Exchange [l_orderkey,o_orderdate,o_shippriority] #1
           WholeStageCodegen (3)
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q5/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q5/explain.txt
index fba8d0ea9629d..c3dbd88338317 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q5/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q5/explain.txt
@@ -201,7 +201,7 @@ Input [5]: [l_extendedprice#9, l_discount#10, n_name#16, n_regionkey#17, r_regio
 (36) HashAggregate [codegen id : 6]
 Input [3]: [l_extendedprice#9, l_discount#10, n_name#16]
 Keys [1]: [n_name#16]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
 Aggregate Attributes [2]: [sum#22, isEmpty#23]
 Results [3]: [n_name#16, sum#24, isEmpty#25]
 
@@ -212,9 +212,9 @@ Arguments: hashpartitioning(n_name#16, 5), ENSURE_REQUIREMENTS, [id=#26]
 (38) HashAggregate [codegen id : 7]
 Input [3]: [n_name#16, sum#24, isEmpty#25]
 Keys [1]: [n_name#16]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#27]
-Results [2]: [n_name#16, sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#27 AS revenue#28]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#27]
+Results [2]: [n_name#16, sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#27 AS revenue#28]
 
 (39) Exchange
 Input [2]: [n_name#16, revenue#28]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q5/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q5/simplified.txt
index aa5c8b0b0b844..a9d8480dc8b98 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q5/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q5/simplified.txt
@@ -3,7 +3,7 @@ WholeStageCodegen (8)
     InputAdapter
       Exchange [revenue] #1
         WholeStageCodegen (7)
-          HashAggregate [n_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)),revenue,sum,isEmpty]
+          HashAggregate [n_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))),revenue,sum,isEmpty]
             InputAdapter
               Exchange [n_name] #2
                 WholeStageCodegen (6)
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q6/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q6/explain.txt
index 3b203b22cc70f..a092574d73c57 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q6/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q6/explain.txt
@@ -12,7 +12,7 @@ Arguments: <empty>, [l_extendedprice#1, l_discount#2]
 (2) HashAggregate [codegen id : 1]
 Input [2]: [l_extendedprice#1, l_discount#2]
 Keys: []
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0), true))]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0)))]
 Aggregate Attributes [2]: [sum#3, isEmpty#4]
 Results [2]: [sum#5, isEmpty#6]
 
@@ -23,7 +23,7 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#7]
 (4) HashAggregate [codegen id : 2]
 Input [2]: [sum#5, isEmpty#6]
 Keys: []
-Functions [1]: [sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0), true))#8]
-Results [1]: [sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0), true))#8 AS revenue#9]
+Functions [1]: [sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0)))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0)))#8]
+Results [1]: [sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0)))#8 AS revenue#9]
 
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q6/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q6/simplified.txt
index 3170df2269ac4..3d026241e9ccd 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q6/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q6/simplified.txt
@@ -1,5 +1,5 @@
 WholeStageCodegen (2)
-  HashAggregate [sum,isEmpty] [sum(CheckOverflow((promote_precision(l_extendedprice) * promote_precision(l_discount)), DecimalType(21,0), true)),revenue,sum,isEmpty]
+  HashAggregate [sum,isEmpty] [sum(CheckOverflow((promote_precision(l_extendedprice) * promote_precision(l_discount)), DecimalType(21,0))),revenue,sum,isEmpty]
     InputAdapter
       Exchange #1
         WholeStageCodegen (1)
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q7/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q7/explain.txt
index 7b20174aa50ce..9994d01a28e5c 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q7/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q7/explain.txt
@@ -167,7 +167,7 @@ Right keys [1]: [n_nationkey#18]
 Join condition: (((n_name#16 = FRANCE) AND (n_name#19 = GERMANY)) OR ((n_name#16 = GERMANY) AND (n_name#19 = FRANCE)))
 
 (30) Project [codegen id : 6]
-Output [4]: [n_name#16 AS supp_nation#20, n_name#19 AS cust_nation#21, year(l_shipdate#7) AS l_year#22, CheckOverflow((promote_precision(cast(l_extendedprice#5 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#6 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true) AS volume#23]
+Output [4]: [n_name#16 AS supp_nation#20, n_name#19 AS cust_nation#21, year(l_shipdate#7) AS l_year#22, CheckOverflow((promote_precision(cast(l_extendedprice#5 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#6 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) AS volume#23]
 Input [7]: [l_extendedprice#5, l_discount#6, l_shipdate#7, c_nationkey#13, n_name#16, n_nationkey#18, n_name#19]
 
 (31) HashAggregate [codegen id : 6]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q8/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q8/explain.txt
index eb8ea81ef33a1..4eb4f811035d8 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q8/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q8/explain.txt
@@ -261,7 +261,7 @@ Right keys [1]: [r_regionkey#25]
 Join condition: None
 
 (47) Project [codegen id : 8]
-Output [3]: [year(o_orderdate#14) AS o_year#28, CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true) AS volume#29, n_name#23 AS nation#30]
+Output [3]: [year(o_orderdate#14) AS o_year#28, CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) AS volume#29, n_name#23 AS nation#30]
 Input [6]: [l_extendedprice#6, l_discount#7, o_orderdate#14, n_regionkey#20, n_name#23, r_regionkey#25]
 
 (48) HashAggregate [codegen id : 8]
@@ -280,7 +280,7 @@ Input [5]: [o_year#28, sum#35, isEmpty#36, sum#37, isEmpty#38]
 Keys [1]: [o_year#28]
 Functions [2]: [sum(CASE WHEN (nation#30 = BRAZIL) THEN volume#29 ELSE 0 END), sum(volume#29)]
 Aggregate Attributes [2]: [sum(CASE WHEN (nation#30 = BRAZIL) THEN volume#29 ELSE 0 END)#40, sum(volume#29)#41]
-Results [2]: [o_year#28, CheckOverflow((promote_precision(sum(CASE WHEN (nation#30 = BRAZIL) THEN volume#29 ELSE 0 END)#40) / promote_precision(sum(volume#29)#41)), DecimalType(38,6), true) AS mkt_share#42]
+Results [2]: [o_year#28, CheckOverflow((promote_precision(sum(CASE WHEN (nation#30 = BRAZIL) THEN volume#29 ELSE 0 END)#40) / promote_precision(sum(volume#29)#41)), DecimalType(38,6)) AS mkt_share#42]
 
 (51) Exchange
 Input [2]: [o_year#28, mkt_share#42]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q9/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q9/explain.txt
index 511c6b80f8cf0..9ed3700e668e0 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q9/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q9/explain.txt
@@ -190,7 +190,7 @@ Right keys [1]: [n_nationkey#20]
 Join condition: None
 
 (34) Project [codegen id : 6]
-Output [3]: [n_name#21 AS nation#23, year(o_orderdate#18) AS o_year#24, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(l_extendedprice#7 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#8 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true) as decimal(23,0))) - promote_precision(cast(CheckOverflow((promote_precision(ps_supplycost#15) * promote_precision(l_quantity#6)), DecimalType(21,0), true) as decimal(23,0)))), DecimalType(23,0), true) AS amount#25]
+Output [3]: [n_name#21 AS nation#23, year(o_orderdate#18) AS o_year#24, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(l_extendedprice#7 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#8 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) as decimal(23,0))) - promote_precision(cast(CheckOverflow((promote_precision(ps_supplycost#15) * promote_precision(l_quantity#6)), DecimalType(21,0)) as decimal(23,0)))), DecimalType(23,0)) AS amount#25]
 Input [8]: [l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#11, ps_supplycost#15, o_orderdate#18, n_nationkey#20, n_name#21]
 
 (35) HashAggregate [codegen id : 6]

From 3a750ca4686e85e715c48c688f48acf7851144ed Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Mon, 21 Feb 2022 18:56:44 +0900
Subject: [PATCH 289/513] [SPARK-38256][BUILD] Upgarde `scalatestplus-mockito`
 to 3.2.11.0

### What changes were proposed in this pull request?
This pr aims to upgarde `scalatestplus-mockito` from 3.2.110.0 to 3.2.11.0 and the actual `mockito` is upgraded from 3.12 to 4.2.0

### Why are the changes needed?
Upgrade `scalatestplus-mockito` and the changes of actually used `mockito` as follows:

- https://github.com/mockito/mockito/releases/tag/v4.0.0
- https://github.com/mockito/mockito/releases/tag/v4.1.0
- https://github.com/mockito/mockito/releases/tag/v4.2.0

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35579 from LuciferYang/upgrade-mockito-42.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 pom.xml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pom.xml b/pom.xml
index 18b75f4d1717e..109448866ca37 100644
--- a/pom.xml
+++ b/pom.xml
@@ -393,7 +393,7 @@
     </dependency>
     <dependency>
       <groupId>org.scalatestplus</groupId>
-      <artifactId>mockito-3-12_${scala.binary.version}</artifactId>
+      <artifactId>mockito-4-2_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -1091,8 +1091,8 @@
       </dependency>
       <dependency>
         <groupId>org.scalatestplus</groupId>
-        <artifactId>mockito-3-12_${scala.binary.version}</artifactId>
-        <version>3.2.10.0</version>
+        <artifactId>mockito-4-2_${scala.binary.version}</artifactId>
+        <version>3.2.11.0</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -1104,13 +1104,13 @@
       <dependency>
         <groupId>org.mockito</groupId>
         <artifactId>mockito-core</artifactId>
-        <version>3.12.4</version>
+        <version>4.2.0</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>org.mockito</groupId>
         <artifactId>mockito-inline</artifactId>
-        <version>3.12.4</version>
+        <version>4.2.0</version>
         <scope>test</scope>
       </dependency>
       <dependency>

From c538d262dea1632deffffdf6adf9d9b31716818f Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Mon, 21 Feb 2022 11:21:27 +0100
Subject: [PATCH 290/513] [SPARK-37427][PYTHON][MLLIB] Inline typehints for
 pyspark.mllib.tree

### What changes were proposed in this pull request?

This PR migrates type `pyspark.mllib.tree` annotations from stub file to inline type hints.

### Why are the changes needed?

Part of ongoing migration of type hints.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing tests.

Closes #35545 from zero323/SPARK-37427.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/mllib/tree.py  | 222 +++++++++++++++++++---------------
 python/pyspark/mllib/tree.pyi | 124 -------------------
 2 files changed, 122 insertions(+), 224 deletions(-)
 delete mode 100644 python/pyspark/mllib/tree.pyi

diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py
index 9b477ffecfd23..e1d87e99c8a5e 100644
--- a/python/pyspark/mllib/tree.py
+++ b/python/pyspark/mllib/tree.py
@@ -23,6 +23,12 @@
 from pyspark.mllib.linalg import _convert_to_vector
 from pyspark.mllib.regression import LabeledPoint
 from pyspark.mllib.util import JavaLoader, JavaSaveable
+from typing import Dict, Optional, Tuple, Union, overload, TYPE_CHECKING
+from pyspark.rdd import RDD
+
+if TYPE_CHECKING:
+    from pyspark.mllib._typing import VectorLike
+
 
 __all__ = [
     "DecisionTreeModel",
@@ -40,7 +46,15 @@ class TreeEnsembleModel(JavaModelWrapper, JavaSaveable):
     .. versionadded:: 1.3.0
     """
 
-    def predict(self, x):
+    @overload
+    def predict(self, x: "VectorLike") -> float:
+        ...
+
+    @overload
+    def predict(self, x: RDD["VectorLike"]) -> RDD[float]:
+        ...
+
+    def predict(self, x: Union["VectorLike", RDD["VectorLike"]]) -> Union[float, RDD[float]]:
         """
         Predict values for a single data point or an RDD of points using
         the model trained.
@@ -60,37 +74,45 @@ def predict(self, x):
             return self.call("predict", _convert_to_vector(x))
 
     @since("1.3.0")
-    def numTrees(self):
+    def numTrees(self) -> int:
         """
         Get number of trees in ensemble.
         """
         return self.call("numTrees")
 
     @since("1.3.0")
-    def totalNumNodes(self):
+    def totalNumNodes(self) -> int:
         """
         Get total number of nodes, summed over all trees in the ensemble.
         """
         return self.call("totalNumNodes")
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """Summary of model"""
         return self._java_model.toString()
 
     @since("1.3.0")
-    def toDebugString(self):
+    def toDebugString(self) -> str:
         """Full model"""
         return self._java_model.toDebugString()
 
 
-class DecisionTreeModel(JavaModelWrapper, JavaSaveable, JavaLoader):
+class DecisionTreeModel(JavaModelWrapper, JavaSaveable, JavaLoader["DecisionTreeModel"]):
     """
     A decision tree model for classification or regression.
 
     .. versionadded:: 1.1.0
     """
 
-    def predict(self, x):
+    @overload
+    def predict(self, x: "VectorLike") -> float:
+        ...
+
+    @overload
+    def predict(self, x: RDD["VectorLike"]) -> RDD[float]:
+        ...
+
+    def predict(self, x: Union["VectorLike", RDD["VectorLike"]]) -> Union[float, RDD[float]]:
         """
         Predict the label of one or more examples.
 
@@ -115,29 +137,29 @@ def predict(self, x):
             return self.call("predict", _convert_to_vector(x))
 
     @since("1.1.0")
-    def numNodes(self):
+    def numNodes(self) -> int:
         """Get number of nodes in tree, including leaf nodes."""
         return self._java_model.numNodes()
 
     @since("1.1.0")
-    def depth(self):
+    def depth(self) -> int:
         """
         Get depth of tree (e.g. depth 0 means 1 leaf node, depth 1
         means 1 internal node + 2 leaf nodes).
         """
         return self._java_model.depth()
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """summary of model."""
         return self._java_model.toString()
 
     @since("1.2.0")
-    def toDebugString(self):
+    def toDebugString(self) -> str:
         """full model."""
         return self._java_model.toDebugString()
 
     @classmethod
-    def _java_loader_class(cls):
+    def _java_loader_class(cls) -> str:
         return "org.apache.spark.mllib.tree.model.DecisionTreeModel"
 
 
@@ -152,16 +174,16 @@ class DecisionTree:
     @classmethod
     def _train(
         cls,
-        data,
-        type,
-        numClasses,
-        features,
-        impurity="gini",
-        maxDepth=5,
-        maxBins=32,
-        minInstancesPerNode=1,
-        minInfoGain=0.0,
-    ):
+        data: RDD[LabeledPoint],
+        type: str,
+        numClasses: int,
+        features: Dict[int, int],
+        impurity: str = "gini",
+        maxDepth: int = 5,
+        maxBins: int = 32,
+        minInstancesPerNode: int = 1,
+        minInfoGain: float = 0.0,
+    ) -> DecisionTreeModel:
         first = data.first()
         assert isinstance(first, LabeledPoint), "the data should be RDD of LabeledPoint"
         model = callMLlibFunc(
@@ -181,15 +203,15 @@ def _train(
     @classmethod
     def trainClassifier(
         cls,
-        data,
-        numClasses,
-        categoricalFeaturesInfo,
-        impurity="gini",
-        maxDepth=5,
-        maxBins=32,
-        minInstancesPerNode=1,
-        minInfoGain=0.0,
-    ):
+        data: RDD[LabeledPoint],
+        numClasses: int,
+        categoricalFeaturesInfo: Dict[int, int],
+        impurity: str = "gini",
+        maxDepth: int = 5,
+        maxBins: int = 32,
+        minInstancesPerNode: int = 1,
+        minInfoGain: float = 0.0,
+    ) -> DecisionTreeModel:
         """
         Train a decision tree model for classification.
 
@@ -276,14 +298,14 @@ def trainClassifier(
     @since("1.1.0")
     def trainRegressor(
         cls,
-        data,
-        categoricalFeaturesInfo,
-        impurity="variance",
-        maxDepth=5,
-        maxBins=32,
-        minInstancesPerNode=1,
-        minInfoGain=0.0,
-    ):
+        data: RDD[LabeledPoint],
+        categoricalFeaturesInfo: Dict[int, int],
+        impurity: str = "variance",
+        maxDepth: int = 5,
+        maxBins: int = 32,
+        minInstancesPerNode: int = 1,
+        minInfoGain: float = 0.0,
+    ) -> DecisionTreeModel:
         """
         Train a decision tree model for regression.
 
@@ -354,7 +376,7 @@ def trainRegressor(
 
 
 @inherit_doc
-class RandomForestModel(TreeEnsembleModel, JavaLoader):
+class RandomForestModel(TreeEnsembleModel, JavaLoader["RandomForestModel"]):
     """
     Represents a random forest model.
 
@@ -362,7 +384,7 @@ class RandomForestModel(TreeEnsembleModel, JavaLoader):
     """
 
     @classmethod
-    def _java_loader_class(cls):
+    def _java_loader_class(cls) -> str:
         return "org.apache.spark.mllib.tree.model.RandomForestModel"
 
 
@@ -374,22 +396,22 @@ class RandomForest:
     .. versionadded:: 1.2.0
     """
 
-    supportedFeatureSubsetStrategies = ("auto", "all", "sqrt", "log2", "onethird")
+    supportedFeatureSubsetStrategies: Tuple[str, ...] = ("auto", "all", "sqrt", "log2", "onethird")
 
     @classmethod
     def _train(
         cls,
-        data,
-        algo,
-        numClasses,
-        categoricalFeaturesInfo,
-        numTrees,
-        featureSubsetStrategy,
-        impurity,
-        maxDepth,
-        maxBins,
-        seed,
-    ):
+        data: RDD[LabeledPoint],
+        algo: str,
+        numClasses: int,
+        categoricalFeaturesInfo: Dict[int, int],
+        numTrees: int,
+        featureSubsetStrategy: str,
+        impurity: str,
+        maxDepth: int,
+        maxBins: int,
+        seed: Optional[int],
+    ) -> RandomForestModel:
         first = data.first()
         assert isinstance(first, LabeledPoint), "the data should be RDD of LabeledPoint"
         if featureSubsetStrategy not in cls.supportedFeatureSubsetStrategies:
@@ -414,16 +436,16 @@ def _train(
     @classmethod
     def trainClassifier(
         cls,
-        data,
-        numClasses,
-        categoricalFeaturesInfo,
-        numTrees,
-        featureSubsetStrategy="auto",
-        impurity="gini",
-        maxDepth=4,
-        maxBins=32,
-        seed=None,
-    ):
+        data: RDD[LabeledPoint],
+        numClasses: int,
+        categoricalFeaturesInfo: Dict[int, int],
+        numTrees: int,
+        featureSubsetStrategy: str = "auto",
+        impurity: str = "gini",
+        maxDepth: int = 4,
+        maxBins: int = 32,
+        seed: Optional[int] = None,
+    ) -> RandomForestModel:
         """
         Train a random forest model for binary or multiclass
         classification.
@@ -530,15 +552,15 @@ def trainClassifier(
     @classmethod
     def trainRegressor(
         cls,
-        data,
-        categoricalFeaturesInfo,
-        numTrees,
-        featureSubsetStrategy="auto",
-        impurity="variance",
-        maxDepth=4,
-        maxBins=32,
-        seed=None,
-    ):
+        data: RDD[LabeledPoint],
+        categoricalFeaturesInfo: Dict[int, int],
+        numTrees: int,
+        featureSubsetStrategy: str = "auto",
+        impurity: str = "variance",
+        maxDepth: int = 4,
+        maxBins: int = 32,
+        seed: Optional[int] = None,
+    ) -> RandomForestModel:
         """
         Train a random forest model for regression.
 
@@ -625,7 +647,7 @@ def trainRegressor(
 
 
 @inherit_doc
-class GradientBoostedTreesModel(TreeEnsembleModel, JavaLoader):
+class GradientBoostedTreesModel(TreeEnsembleModel, JavaLoader["GradientBoostedTreesModel"]):
     """
     Represents a gradient-boosted tree model.
 
@@ -633,7 +655,7 @@ class GradientBoostedTreesModel(TreeEnsembleModel, JavaLoader):
     """
 
     @classmethod
-    def _java_loader_class(cls):
+    def _java_loader_class(cls) -> str:
         return "org.apache.spark.mllib.tree.model.GradientBoostedTreesModel"
 
 
@@ -648,15 +670,15 @@ class GradientBoostedTrees:
     @classmethod
     def _train(
         cls,
-        data,
-        algo,
-        categoricalFeaturesInfo,
-        loss,
-        numIterations,
-        learningRate,
-        maxDepth,
-        maxBins,
-    ):
+        data: RDD[LabeledPoint],
+        algo: str,
+        categoricalFeaturesInfo: Dict[int, int],
+        loss: str,
+        numIterations: int,
+        learningRate: float,
+        maxDepth: int,
+        maxBins: int,
+    ) -> GradientBoostedTreesModel:
         first = data.first()
         assert isinstance(first, LabeledPoint), "the data should be RDD of LabeledPoint"
         model = callMLlibFunc(
@@ -675,14 +697,14 @@ def _train(
     @classmethod
     def trainClassifier(
         cls,
-        data,
-        categoricalFeaturesInfo,
-        loss="logLoss",
-        numIterations=100,
-        learningRate=0.1,
-        maxDepth=3,
-        maxBins=32,
-    ):
+        data: RDD[LabeledPoint],
+        categoricalFeaturesInfo: Dict[int, int],
+        loss: str = "logLoss",
+        numIterations: int = 100,
+        learningRate: float = 0.1,
+        maxDepth: int = 3,
+        maxBins: int = 32,
+    ) -> GradientBoostedTreesModel:
         """
         Train a gradient-boosted trees model for classification.
 
@@ -765,14 +787,14 @@ def trainClassifier(
     @classmethod
     def trainRegressor(
         cls,
-        data,
-        categoricalFeaturesInfo,
-        loss="leastSquaresError",
-        numIterations=100,
-        learningRate=0.1,
-        maxDepth=3,
-        maxBins=32,
-    ):
+        data: RDD[LabeledPoint],
+        categoricalFeaturesInfo: Dict[int, int],
+        loss: str = "leastSquaresError",
+        numIterations: int = 100,
+        learningRate: float = 0.1,
+        maxDepth: int = 3,
+        maxBins: int = 32,
+    ) -> GradientBoostedTreesModel:
         """
         Train a gradient-boosted trees model for regression.
 
@@ -851,7 +873,7 @@ def trainRegressor(
         )
 
 
-def _test():
+def _test() -> None:
     import doctest
 
     globs = globals().copy()
diff --git a/python/pyspark/mllib/tree.pyi b/python/pyspark/mllib/tree.pyi
deleted file mode 100644
index fedb494f19062..0000000000000
--- a/python/pyspark/mllib/tree.pyi
+++ /dev/null
@@ -1,124 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import overload
-from typing import Dict, Optional, Tuple
-from pyspark.mllib._typing import VectorLike
-from pyspark.rdd import RDD
-from pyspark.mllib.common import JavaModelWrapper
-from pyspark.mllib.regression import LabeledPoint
-from pyspark.mllib.util import JavaLoader, JavaSaveable
-
-class TreeEnsembleModel(JavaModelWrapper, JavaSaveable):
-    @overload
-    def predict(self, x: VectorLike) -> float: ...
-    @overload
-    def predict(self, x: RDD[VectorLike]) -> RDD[VectorLike]: ...
-    def numTrees(self) -> int: ...
-    def totalNumNodes(self) -> int: ...
-    def toDebugString(self) -> str: ...
-
-class DecisionTreeModel(JavaModelWrapper, JavaSaveable, JavaLoader[DecisionTreeModel]):
-    @overload
-    def predict(self, x: VectorLike) -> float: ...
-    @overload
-    def predict(self, x: RDD[VectorLike]) -> RDD[VectorLike]: ...
-    def numNodes(self) -> int: ...
-    def depth(self) -> int: ...
-    def toDebugString(self) -> str: ...
-
-class DecisionTree:
-    @classmethod
-    def trainClassifier(
-        cls,
-        data: RDD[LabeledPoint],
-        numClasses: int,
-        categoricalFeaturesInfo: Dict[int, int],
-        impurity: str = ...,
-        maxDepth: int = ...,
-        maxBins: int = ...,
-        minInstancesPerNode: int = ...,
-        minInfoGain: float = ...,
-    ) -> DecisionTreeModel: ...
-    @classmethod
-    def trainRegressor(
-        cls,
-        data: RDD[LabeledPoint],
-        categoricalFeaturesInfo: Dict[int, int],
-        impurity: str = ...,
-        maxDepth: int = ...,
-        maxBins: int = ...,
-        minInstancesPerNode: int = ...,
-        minInfoGain: float = ...,
-    ) -> DecisionTreeModel: ...
-
-class RandomForestModel(TreeEnsembleModel, JavaLoader[RandomForestModel]): ...
-
-class RandomForest:
-    supportedFeatureSubsetStrategies: Tuple[str, ...]
-    @classmethod
-    def trainClassifier(
-        cls,
-        data: RDD[LabeledPoint],
-        numClasses: int,
-        categoricalFeaturesInfo: Dict[int, int],
-        numTrees: int,
-        featureSubsetStrategy: str = ...,
-        impurity: str = ...,
-        maxDepth: int = ...,
-        maxBins: int = ...,
-        seed: Optional[int] = ...,
-    ) -> RandomForestModel: ...
-    @classmethod
-    def trainRegressor(
-        cls,
-        data: RDD[LabeledPoint],
-        categoricalFeaturesInfo: Dict[int, int],
-        numTrees: int,
-        featureSubsetStrategy: str = ...,
-        impurity: str = ...,
-        maxDepth: int = ...,
-        maxBins: int = ...,
-        seed: Optional[int] = ...,
-    ) -> RandomForestModel: ...
-
-class GradientBoostedTreesModel(TreeEnsembleModel, JavaLoader[GradientBoostedTreesModel]): ...
-
-class GradientBoostedTrees:
-    @classmethod
-    def trainClassifier(
-        cls,
-        data: RDD[LabeledPoint],
-        categoricalFeaturesInfo: Dict[int, int],
-        loss: str = ...,
-        numIterations: int = ...,
-        learningRate: float = ...,
-        maxDepth: int = ...,
-        maxBins: int = ...,
-    ) -> GradientBoostedTreesModel: ...
-    @classmethod
-    def trainRegressor(
-        cls,
-        data: RDD[LabeledPoint],
-        categoricalFeaturesInfo: Dict[int, int],
-        loss: str = ...,
-        numIterations: int = ...,
-        learningRate: float = ...,
-        maxDepth: int = ...,
-        maxBins: int = ...,
-    ) -> GradientBoostedTreesModel: ...

From a5caf0c3e0d42d5e711647e39bf74fb08212c884 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Mon, 21 Feb 2022 21:28:47 +0800
Subject: [PATCH 291/513] [SPARK-38276][SQL] Add approved TPCDS plans under
 ANSI mode

### What changes were proposed in this pull request?

q83 has a different plan output under ANSI mode. Because of the ANSI type coercion, it can actually push down a `IN` predicate into Parquet data source. The following screenshot contains all the differences between default plan(left) and ansi plan(right):
<img width="1400" alt="Screen Shot 2022-02-21 at 7 05 46 PM" src="https://user-images.githubusercontent.com/1097932/154945698-be57bed8-b4a4-492a-a85b-9a538e518720.png">

This PR is to add approved TPCDS plans under ANSI mode so that we can set up a new job to run tests with ANSI mode on.

### Why are the changes needed?

For passing TPCDS plan stability tests under ANSI mode. We are going to set up a new job to run tests with ANSI mode on https://issues.apache.org/jira/browse/SPARK-38154

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manually turn on ANSI mode and run tests and check whether all the plan stability tests passed.

Closes #35598 from gengliangwang/fixMoreStability.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../approved-plans-v1_4/q83.ansi/explain.txt  | 362 ++++++++++++++++++
 .../q83.ansi/simplified.txt                   |  95 +++++
 .../q83.sf100.ansi/explain.txt                | 362 ++++++++++++++++++
 .../q83.sf100.ansi/simplified.txt             |  95 +++++
 .../apache/spark/sql/PlanStabilitySuite.scala |  13 +-
 5 files changed, 926 insertions(+), 1 deletion(-)
 create mode 100644 sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/explain.txt
 create mode 100644 sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/simplified.txt
 create mode 100644 sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/explain.txt
 create mode 100644 sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/simplified.txt

diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/explain.txt
new file mode 100644
index 0000000000000..c46fce21c25a2
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/explain.txt
@@ -0,0 +1,362 @@
+== Physical Plan ==
+TakeOrderedAndProject (46)
++- * Project (45)
+   +- * BroadcastHashJoin Inner BuildRight (44)
+      :- * Project (30)
+      :  +- * BroadcastHashJoin Inner BuildRight (29)
+      :     :- * HashAggregate (15)
+      :     :  +- Exchange (14)
+      :     :     +- * HashAggregate (13)
+      :     :        +- * Project (12)
+      :     :           +- * BroadcastHashJoin Inner BuildRight (11)
+      :     :              :- * Project (9)
+      :     :              :  +- * BroadcastHashJoin Inner BuildRight (8)
+      :     :              :     :- * Filter (3)
+      :     :              :     :  +- * ColumnarToRow (2)
+      :     :              :     :     +- Scan parquet default.store_returns (1)
+      :     :              :     +- BroadcastExchange (7)
+      :     :              :        +- * Filter (6)
+      :     :              :           +- * ColumnarToRow (5)
+      :     :              :              +- Scan parquet default.item (4)
+      :     :              +- ReusedExchange (10)
+      :     +- BroadcastExchange (28)
+      :        +- * HashAggregate (27)
+      :           +- Exchange (26)
+      :              +- * HashAggregate (25)
+      :                 +- * Project (24)
+      :                    +- * BroadcastHashJoin Inner BuildRight (23)
+      :                       :- * Project (21)
+      :                       :  +- * BroadcastHashJoin Inner BuildRight (20)
+      :                       :     :- * Filter (18)
+      :                       :     :  +- * ColumnarToRow (17)
+      :                       :     :     +- Scan parquet default.catalog_returns (16)
+      :                       :     +- ReusedExchange (19)
+      :                       +- ReusedExchange (22)
+      +- BroadcastExchange (43)
+         +- * HashAggregate (42)
+            +- Exchange (41)
+               +- * HashAggregate (40)
+                  +- * Project (39)
+                     +- * BroadcastHashJoin Inner BuildRight (38)
+                        :- * Project (36)
+                        :  +- * BroadcastHashJoin Inner BuildRight (35)
+                        :     :- * Filter (33)
+                        :     :  +- * ColumnarToRow (32)
+                        :     :     +- Scan parquet default.web_returns (31)
+                        :     +- ReusedExchange (34)
+                        +- ReusedExchange (37)
+
+
+(1) Scan parquet default.store_returns
+Output [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3]
+Batched: true
+Location: InMemoryFileIndex []
+PartitionFilters: [isnotnull(sr_returned_date_sk#3), dynamicpruningexpression(sr_returned_date_sk#3 IN dynamicpruning#4)]
+PushedFilters: [IsNotNull(sr_item_sk)]
+ReadSchema: struct<sr_item_sk:int,sr_return_quantity:int>
+
+(2) ColumnarToRow [codegen id : 5]
+Input [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3]
+
+(3) Filter [codegen id : 5]
+Input [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3]
+Condition : isnotnull(sr_item_sk#1)
+
+(4) Scan parquet default.item
+Output [2]: [i_item_sk#5, i_item_id#6]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/item]
+PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)]
+ReadSchema: struct<i_item_sk:int,i_item_id:string>
+
+(5) ColumnarToRow [codegen id : 1]
+Input [2]: [i_item_sk#5, i_item_id#6]
+
+(6) Filter [codegen id : 1]
+Input [2]: [i_item_sk#5, i_item_id#6]
+Condition : (isnotnull(i_item_sk#5) AND isnotnull(i_item_id#6))
+
+(7) BroadcastExchange
+Input [2]: [i_item_sk#5, i_item_id#6]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7]
+
+(8) BroadcastHashJoin [codegen id : 5]
+Left keys [1]: [sr_item_sk#1]
+Right keys [1]: [i_item_sk#5]
+Join condition: None
+
+(9) Project [codegen id : 5]
+Output [3]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#6]
+Input [5]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3, i_item_sk#5, i_item_id#6]
+
+(10) ReusedExchange [Reuses operator id: 62]
+Output [1]: [d_date_sk#8]
+
+(11) BroadcastHashJoin [codegen id : 5]
+Left keys [1]: [sr_returned_date_sk#3]
+Right keys [1]: [d_date_sk#8]
+Join condition: None
+
+(12) Project [codegen id : 5]
+Output [2]: [sr_return_quantity#2, i_item_id#6]
+Input [4]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#6, d_date_sk#8]
+
+(13) HashAggregate [codegen id : 5]
+Input [2]: [sr_return_quantity#2, i_item_id#6]
+Keys [1]: [i_item_id#6]
+Functions [1]: [partial_sum(sr_return_quantity#2)]
+Aggregate Attributes [1]: [sum#9]
+Results [2]: [i_item_id#6, sum#10]
+
+(14) Exchange
+Input [2]: [i_item_id#6, sum#10]
+Arguments: hashpartitioning(i_item_id#6, 5), ENSURE_REQUIREMENTS, [id=#11]
+
+(15) HashAggregate [codegen id : 18]
+Input [2]: [i_item_id#6, sum#10]
+Keys [1]: [i_item_id#6]
+Functions [1]: [sum(sr_return_quantity#2)]
+Aggregate Attributes [1]: [sum(sr_return_quantity#2)#12]
+Results [2]: [i_item_id#6 AS item_id#13, sum(sr_return_quantity#2)#12 AS sr_item_qty#14]
+
+(16) Scan parquet default.catalog_returns
+Output [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17]
+Batched: true
+Location: InMemoryFileIndex []
+PartitionFilters: [isnotnull(cr_returned_date_sk#17), dynamicpruningexpression(cr_returned_date_sk#17 IN dynamicpruning#4)]
+PushedFilters: [IsNotNull(cr_item_sk)]
+ReadSchema: struct<cr_item_sk:int,cr_return_quantity:int>
+
+(17) ColumnarToRow [codegen id : 10]
+Input [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17]
+
+(18) Filter [codegen id : 10]
+Input [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17]
+Condition : isnotnull(cr_item_sk#15)
+
+(19) ReusedExchange [Reuses operator id: 7]
+Output [2]: [i_item_sk#18, i_item_id#19]
+
+(20) BroadcastHashJoin [codegen id : 10]
+Left keys [1]: [cr_item_sk#15]
+Right keys [1]: [i_item_sk#18]
+Join condition: None
+
+(21) Project [codegen id : 10]
+Output [3]: [cr_return_quantity#16, cr_returned_date_sk#17, i_item_id#19]
+Input [5]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17, i_item_sk#18, i_item_id#19]
+
+(22) ReusedExchange [Reuses operator id: 62]
+Output [1]: [d_date_sk#20]
+
+(23) BroadcastHashJoin [codegen id : 10]
+Left keys [1]: [cr_returned_date_sk#17]
+Right keys [1]: [d_date_sk#20]
+Join condition: None
+
+(24) Project [codegen id : 10]
+Output [2]: [cr_return_quantity#16, i_item_id#19]
+Input [4]: [cr_return_quantity#16, cr_returned_date_sk#17, i_item_id#19, d_date_sk#20]
+
+(25) HashAggregate [codegen id : 10]
+Input [2]: [cr_return_quantity#16, i_item_id#19]
+Keys [1]: [i_item_id#19]
+Functions [1]: [partial_sum(cr_return_quantity#16)]
+Aggregate Attributes [1]: [sum#21]
+Results [2]: [i_item_id#19, sum#22]
+
+(26) Exchange
+Input [2]: [i_item_id#19, sum#22]
+Arguments: hashpartitioning(i_item_id#19, 5), ENSURE_REQUIREMENTS, [id=#23]
+
+(27) HashAggregate [codegen id : 11]
+Input [2]: [i_item_id#19, sum#22]
+Keys [1]: [i_item_id#19]
+Functions [1]: [sum(cr_return_quantity#16)]
+Aggregate Attributes [1]: [sum(cr_return_quantity#16)#24]
+Results [2]: [i_item_id#19 AS item_id#25, sum(cr_return_quantity#16)#24 AS cr_item_qty#26]
+
+(28) BroadcastExchange
+Input [2]: [item_id#25, cr_item_qty#26]
+Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#27]
+
+(29) BroadcastHashJoin [codegen id : 18]
+Left keys [1]: [item_id#13]
+Right keys [1]: [item_id#25]
+Join condition: None
+
+(30) Project [codegen id : 18]
+Output [3]: [item_id#13, sr_item_qty#14, cr_item_qty#26]
+Input [4]: [item_id#13, sr_item_qty#14, item_id#25, cr_item_qty#26]
+
+(31) Scan parquet default.web_returns
+Output [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30]
+Batched: true
+Location: InMemoryFileIndex []
+PartitionFilters: [isnotnull(wr_returned_date_sk#30), dynamicpruningexpression(wr_returned_date_sk#30 IN dynamicpruning#4)]
+PushedFilters: [IsNotNull(wr_item_sk)]
+ReadSchema: struct<wr_item_sk:int,wr_return_quantity:int>
+
+(32) ColumnarToRow [codegen id : 16]
+Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30]
+
+(33) Filter [codegen id : 16]
+Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30]
+Condition : isnotnull(wr_item_sk#28)
+
+(34) ReusedExchange [Reuses operator id: 7]
+Output [2]: [i_item_sk#31, i_item_id#32]
+
+(35) BroadcastHashJoin [codegen id : 16]
+Left keys [1]: [wr_item_sk#28]
+Right keys [1]: [i_item_sk#31]
+Join condition: None
+
+(36) Project [codegen id : 16]
+Output [3]: [wr_return_quantity#29, wr_returned_date_sk#30, i_item_id#32]
+Input [5]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30, i_item_sk#31, i_item_id#32]
+
+(37) ReusedExchange [Reuses operator id: 62]
+Output [1]: [d_date_sk#33]
+
+(38) BroadcastHashJoin [codegen id : 16]
+Left keys [1]: [wr_returned_date_sk#30]
+Right keys [1]: [d_date_sk#33]
+Join condition: None
+
+(39) Project [codegen id : 16]
+Output [2]: [wr_return_quantity#29, i_item_id#32]
+Input [4]: [wr_return_quantity#29, wr_returned_date_sk#30, i_item_id#32, d_date_sk#33]
+
+(40) HashAggregate [codegen id : 16]
+Input [2]: [wr_return_quantity#29, i_item_id#32]
+Keys [1]: [i_item_id#32]
+Functions [1]: [partial_sum(wr_return_quantity#29)]
+Aggregate Attributes [1]: [sum#34]
+Results [2]: [i_item_id#32, sum#35]
+
+(41) Exchange
+Input [2]: [i_item_id#32, sum#35]
+Arguments: hashpartitioning(i_item_id#32, 5), ENSURE_REQUIREMENTS, [id=#36]
+
+(42) HashAggregate [codegen id : 17]
+Input [2]: [i_item_id#32, sum#35]
+Keys [1]: [i_item_id#32]
+Functions [1]: [sum(wr_return_quantity#29)]
+Aggregate Attributes [1]: [sum(wr_return_quantity#29)#37]
+Results [2]: [i_item_id#32 AS item_id#38, sum(wr_return_quantity#29)#37 AS wr_item_qty#39]
+
+(43) BroadcastExchange
+Input [2]: [item_id#38, wr_item_qty#39]
+Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#40]
+
+(44) BroadcastHashJoin [codegen id : 18]
+Left keys [1]: [item_id#13]
+Right keys [1]: [item_id#38]
+Join condition: None
+
+(45) Project [codegen id : 18]
+Output [8]: [item_id#13, sr_item_qty#14, (((cast(sr_item_qty#14 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#26, (((cast(cr_item_qty#26 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#44]
+Input [5]: [item_id#13, sr_item_qty#14, cr_item_qty#26, item_id#38, wr_item_qty#39]
+
+(46) TakeOrderedAndProject
+Input [8]: [item_id#13, sr_item_qty#14, sr_dev#41, cr_item_qty#26, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44]
+Arguments: 100, [item_id#13 ASC NULLS FIRST, sr_item_qty#14 ASC NULLS FIRST], [item_id#13, sr_item_qty#14, sr_dev#41, cr_item_qty#26, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44]
+
+===== Subqueries =====
+
+Subquery:1 Hosting operator id = 1 Hosting Expression = sr_returned_date_sk#3 IN dynamicpruning#4
+BroadcastExchange (62)
++- * Project (61)
+   +- * BroadcastHashJoin LeftSemi BuildRight (60)
+      :- * Filter (49)
+      :  +- * ColumnarToRow (48)
+      :     +- Scan parquet default.date_dim (47)
+      +- BroadcastExchange (59)
+         +- * Project (58)
+            +- * BroadcastHashJoin LeftSemi BuildRight (57)
+               :- * ColumnarToRow (51)
+               :  +- Scan parquet default.date_dim (50)
+               +- BroadcastExchange (56)
+                  +- * Project (55)
+                     +- * Filter (54)
+                        +- * ColumnarToRow (53)
+                           +- Scan parquet default.date_dim (52)
+
+
+(47) Scan parquet default.date_dim
+Output [2]: [d_date_sk#8, d_date#45]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_date:date>
+
+(48) ColumnarToRow [codegen id : 3]
+Input [2]: [d_date_sk#8, d_date#45]
+
+(49) Filter [codegen id : 3]
+Input [2]: [d_date_sk#8, d_date#45]
+Condition : isnotnull(d_date_sk#8)
+
+(50) Scan parquet default.date_dim
+Output [2]: [d_date#46, d_week_seq#47]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+ReadSchema: struct<d_date:date,d_week_seq:int>
+
+(51) ColumnarToRow [codegen id : 2]
+Input [2]: [d_date#46, d_week_seq#47]
+
+(52) Scan parquet default.date_dim
+Output [2]: [d_date#48, d_week_seq#49]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [In(d_date, [2000-06-30,2000-09-27,2000-11-17])]
+ReadSchema: struct<d_date:date,d_week_seq:int>
+
+(53) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date#48, d_week_seq#49]
+
+(54) Filter [codegen id : 1]
+Input [2]: [d_date#48, d_week_seq#49]
+Condition : d_date#48 IN (2000-06-30,2000-09-27,2000-11-17)
+
+(55) Project [codegen id : 1]
+Output [1]: [d_week_seq#49]
+Input [2]: [d_date#48, d_week_seq#49]
+
+(56) BroadcastExchange
+Input [1]: [d_week_seq#49]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50]
+
+(57) BroadcastHashJoin [codegen id : 2]
+Left keys [1]: [d_week_seq#47]
+Right keys [1]: [d_week_seq#49]
+Join condition: None
+
+(58) Project [codegen id : 2]
+Output [1]: [d_date#46]
+Input [2]: [d_date#46, d_week_seq#47]
+
+(59) BroadcastExchange
+Input [1]: [d_date#46]
+Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#51]
+
+(60) BroadcastHashJoin [codegen id : 3]
+Left keys [1]: [d_date#45]
+Right keys [1]: [d_date#46]
+Join condition: None
+
+(61) Project [codegen id : 3]
+Output [1]: [d_date_sk#8]
+Input [2]: [d_date_sk#8, d_date#45]
+
+(62) BroadcastExchange
+Input [1]: [d_date_sk#8]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#52]
+
+Subquery:2 Hosting operator id = 16 Hosting Expression = cr_returned_date_sk#17 IN dynamicpruning#4
+
+Subquery:3 Hosting operator id = 31 Hosting Expression = wr_returned_date_sk#30 IN dynamicpruning#4
+
+
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/simplified.txt
new file mode 100644
index 0000000000000..29ff19d7450c8
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/simplified.txt
@@ -0,0 +1,95 @@
+TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty,wr_dev,average]
+  WholeStageCodegen (18)
+    Project [item_id,sr_item_qty,cr_item_qty,wr_item_qty]
+      BroadcastHashJoin [item_id,item_id]
+        Project [item_id,sr_item_qty,cr_item_qty]
+          BroadcastHashJoin [item_id,item_id]
+            HashAggregate [i_item_id,sum] [sum(sr_return_quantity),item_id,sr_item_qty,sum]
+              InputAdapter
+                Exchange [i_item_id] #1
+                  WholeStageCodegen (5)
+                    HashAggregate [i_item_id,sr_return_quantity] [sum,sum]
+                      Project [sr_return_quantity,i_item_id]
+                        BroadcastHashJoin [sr_returned_date_sk,d_date_sk]
+                          Project [sr_return_quantity,sr_returned_date_sk,i_item_id]
+                            BroadcastHashJoin [sr_item_sk,i_item_sk]
+                              Filter [sr_item_sk]
+                                ColumnarToRow
+                                  InputAdapter
+                                    Scan parquet default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk]
+                                      SubqueryBroadcast [d_date_sk] #1
+                                        BroadcastExchange #2
+                                          WholeStageCodegen (3)
+                                            Project [d_date_sk]
+                                              BroadcastHashJoin [d_date,d_date]
+                                                Filter [d_date_sk]
+                                                  ColumnarToRow
+                                                    InputAdapter
+                                                      Scan parquet default.date_dim [d_date_sk,d_date]
+                                                InputAdapter
+                                                  BroadcastExchange #3
+                                                    WholeStageCodegen (2)
+                                                      Project [d_date]
+                                                        BroadcastHashJoin [d_week_seq,d_week_seq]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.date_dim [d_date,d_week_seq]
+                                                          InputAdapter
+                                                            BroadcastExchange #4
+                                                              WholeStageCodegen (1)
+                                                                Project [d_week_seq]
+                                                                  Filter [d_date]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet default.date_dim [d_date,d_week_seq]
+                              InputAdapter
+                                BroadcastExchange #5
+                                  WholeStageCodegen (1)
+                                    Filter [i_item_sk,i_item_id]
+                                      ColumnarToRow
+                                        InputAdapter
+                                          Scan parquet default.item [i_item_sk,i_item_id]
+                          InputAdapter
+                            ReusedExchange [d_date_sk] #2
+            InputAdapter
+              BroadcastExchange #6
+                WholeStageCodegen (11)
+                  HashAggregate [i_item_id,sum] [sum(cr_return_quantity),item_id,cr_item_qty,sum]
+                    InputAdapter
+                      Exchange [i_item_id] #7
+                        WholeStageCodegen (10)
+                          HashAggregate [i_item_id,cr_return_quantity] [sum,sum]
+                            Project [cr_return_quantity,i_item_id]
+                              BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
+                                Project [cr_return_quantity,cr_returned_date_sk,i_item_id]
+                                  BroadcastHashJoin [cr_item_sk,i_item_sk]
+                                    Filter [cr_item_sk]
+                                      ColumnarToRow
+                                        InputAdapter
+                                          Scan parquet default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk]
+                                            ReusedSubquery [d_date_sk] #1
+                                    InputAdapter
+                                      ReusedExchange [i_item_sk,i_item_id] #5
+                                InputAdapter
+                                  ReusedExchange [d_date_sk] #2
+        InputAdapter
+          BroadcastExchange #8
+            WholeStageCodegen (17)
+              HashAggregate [i_item_id,sum] [sum(wr_return_quantity),item_id,wr_item_qty,sum]
+                InputAdapter
+                  Exchange [i_item_id] #9
+                    WholeStageCodegen (16)
+                      HashAggregate [i_item_id,wr_return_quantity] [sum,sum]
+                        Project [wr_return_quantity,i_item_id]
+                          BroadcastHashJoin [wr_returned_date_sk,d_date_sk]
+                            Project [wr_return_quantity,wr_returned_date_sk,i_item_id]
+                              BroadcastHashJoin [wr_item_sk,i_item_sk]
+                                Filter [wr_item_sk]
+                                  ColumnarToRow
+                                    InputAdapter
+                                      Scan parquet default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk]
+                                        ReusedSubquery [d_date_sk] #1
+                                InputAdapter
+                                  ReusedExchange [i_item_sk,i_item_id] #5
+                            InputAdapter
+                              ReusedExchange [d_date_sk] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/explain.txt
new file mode 100644
index 0000000000000..bda63681ef500
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/explain.txt
@@ -0,0 +1,362 @@
+== Physical Plan ==
+TakeOrderedAndProject (46)
++- * Project (45)
+   +- * BroadcastHashJoin Inner BuildRight (44)
+      :- * Project (30)
+      :  +- * BroadcastHashJoin Inner BuildRight (29)
+      :     :- * HashAggregate (15)
+      :     :  +- Exchange (14)
+      :     :     +- * HashAggregate (13)
+      :     :        +- * Project (12)
+      :     :           +- * BroadcastHashJoin Inner BuildRight (11)
+      :     :              :- * Project (6)
+      :     :              :  +- * BroadcastHashJoin Inner BuildRight (5)
+      :     :              :     :- * Filter (3)
+      :     :              :     :  +- * ColumnarToRow (2)
+      :     :              :     :     +- Scan parquet default.store_returns (1)
+      :     :              :     +- ReusedExchange (4)
+      :     :              +- BroadcastExchange (10)
+      :     :                 +- * Filter (9)
+      :     :                    +- * ColumnarToRow (8)
+      :     :                       +- Scan parquet default.item (7)
+      :     +- BroadcastExchange (28)
+      :        +- * HashAggregate (27)
+      :           +- Exchange (26)
+      :              +- * HashAggregate (25)
+      :                 +- * Project (24)
+      :                    +- * BroadcastHashJoin Inner BuildRight (23)
+      :                       :- * Project (21)
+      :                       :  +- * BroadcastHashJoin Inner BuildRight (20)
+      :                       :     :- * Filter (18)
+      :                       :     :  +- * ColumnarToRow (17)
+      :                       :     :     +- Scan parquet default.catalog_returns (16)
+      :                       :     +- ReusedExchange (19)
+      :                       +- ReusedExchange (22)
+      +- BroadcastExchange (43)
+         +- * HashAggregate (42)
+            +- Exchange (41)
+               +- * HashAggregate (40)
+                  +- * Project (39)
+                     +- * BroadcastHashJoin Inner BuildRight (38)
+                        :- * Project (36)
+                        :  +- * BroadcastHashJoin Inner BuildRight (35)
+                        :     :- * Filter (33)
+                        :     :  +- * ColumnarToRow (32)
+                        :     :     +- Scan parquet default.web_returns (31)
+                        :     +- ReusedExchange (34)
+                        +- ReusedExchange (37)
+
+
+(1) Scan parquet default.store_returns
+Output [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3]
+Batched: true
+Location: InMemoryFileIndex []
+PartitionFilters: [isnotnull(sr_returned_date_sk#3), dynamicpruningexpression(sr_returned_date_sk#3 IN dynamicpruning#4)]
+PushedFilters: [IsNotNull(sr_item_sk)]
+ReadSchema: struct<sr_item_sk:int,sr_return_quantity:int>
+
+(2) ColumnarToRow [codegen id : 5]
+Input [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3]
+
+(3) Filter [codegen id : 5]
+Input [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3]
+Condition : isnotnull(sr_item_sk#1)
+
+(4) ReusedExchange [Reuses operator id: 62]
+Output [1]: [d_date_sk#5]
+
+(5) BroadcastHashJoin [codegen id : 5]
+Left keys [1]: [sr_returned_date_sk#3]
+Right keys [1]: [d_date_sk#5]
+Join condition: None
+
+(6) Project [codegen id : 5]
+Output [2]: [sr_item_sk#1, sr_return_quantity#2]
+Input [4]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3, d_date_sk#5]
+
+(7) Scan parquet default.item
+Output [2]: [i_item_sk#6, i_item_id#7]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/item]
+PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)]
+ReadSchema: struct<i_item_sk:int,i_item_id:string>
+
+(8) ColumnarToRow [codegen id : 4]
+Input [2]: [i_item_sk#6, i_item_id#7]
+
+(9) Filter [codegen id : 4]
+Input [2]: [i_item_sk#6, i_item_id#7]
+Condition : (isnotnull(i_item_sk#6) AND isnotnull(i_item_id#7))
+
+(10) BroadcastExchange
+Input [2]: [i_item_sk#6, i_item_id#7]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8]
+
+(11) BroadcastHashJoin [codegen id : 5]
+Left keys [1]: [sr_item_sk#1]
+Right keys [1]: [i_item_sk#6]
+Join condition: None
+
+(12) Project [codegen id : 5]
+Output [2]: [sr_return_quantity#2, i_item_id#7]
+Input [4]: [sr_item_sk#1, sr_return_quantity#2, i_item_sk#6, i_item_id#7]
+
+(13) HashAggregate [codegen id : 5]
+Input [2]: [sr_return_quantity#2, i_item_id#7]
+Keys [1]: [i_item_id#7]
+Functions [1]: [partial_sum(sr_return_quantity#2)]
+Aggregate Attributes [1]: [sum#9]
+Results [2]: [i_item_id#7, sum#10]
+
+(14) Exchange
+Input [2]: [i_item_id#7, sum#10]
+Arguments: hashpartitioning(i_item_id#7, 5), ENSURE_REQUIREMENTS, [id=#11]
+
+(15) HashAggregate [codegen id : 18]
+Input [2]: [i_item_id#7, sum#10]
+Keys [1]: [i_item_id#7]
+Functions [1]: [sum(sr_return_quantity#2)]
+Aggregate Attributes [1]: [sum(sr_return_quantity#2)#12]
+Results [2]: [i_item_id#7 AS item_id#13, sum(sr_return_quantity#2)#12 AS sr_item_qty#14]
+
+(16) Scan parquet default.catalog_returns
+Output [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17]
+Batched: true
+Location: InMemoryFileIndex []
+PartitionFilters: [isnotnull(cr_returned_date_sk#17), dynamicpruningexpression(cr_returned_date_sk#17 IN dynamicpruning#4)]
+PushedFilters: [IsNotNull(cr_item_sk)]
+ReadSchema: struct<cr_item_sk:int,cr_return_quantity:int>
+
+(17) ColumnarToRow [codegen id : 10]
+Input [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17]
+
+(18) Filter [codegen id : 10]
+Input [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17]
+Condition : isnotnull(cr_item_sk#15)
+
+(19) ReusedExchange [Reuses operator id: 62]
+Output [1]: [d_date_sk#18]
+
+(20) BroadcastHashJoin [codegen id : 10]
+Left keys [1]: [cr_returned_date_sk#17]
+Right keys [1]: [d_date_sk#18]
+Join condition: None
+
+(21) Project [codegen id : 10]
+Output [2]: [cr_item_sk#15, cr_return_quantity#16]
+Input [4]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17, d_date_sk#18]
+
+(22) ReusedExchange [Reuses operator id: 10]
+Output [2]: [i_item_sk#19, i_item_id#20]
+
+(23) BroadcastHashJoin [codegen id : 10]
+Left keys [1]: [cr_item_sk#15]
+Right keys [1]: [i_item_sk#19]
+Join condition: None
+
+(24) Project [codegen id : 10]
+Output [2]: [cr_return_quantity#16, i_item_id#20]
+Input [4]: [cr_item_sk#15, cr_return_quantity#16, i_item_sk#19, i_item_id#20]
+
+(25) HashAggregate [codegen id : 10]
+Input [2]: [cr_return_quantity#16, i_item_id#20]
+Keys [1]: [i_item_id#20]
+Functions [1]: [partial_sum(cr_return_quantity#16)]
+Aggregate Attributes [1]: [sum#21]
+Results [2]: [i_item_id#20, sum#22]
+
+(26) Exchange
+Input [2]: [i_item_id#20, sum#22]
+Arguments: hashpartitioning(i_item_id#20, 5), ENSURE_REQUIREMENTS, [id=#23]
+
+(27) HashAggregate [codegen id : 11]
+Input [2]: [i_item_id#20, sum#22]
+Keys [1]: [i_item_id#20]
+Functions [1]: [sum(cr_return_quantity#16)]
+Aggregate Attributes [1]: [sum(cr_return_quantity#16)#24]
+Results [2]: [i_item_id#20 AS item_id#25, sum(cr_return_quantity#16)#24 AS cr_item_qty#26]
+
+(28) BroadcastExchange
+Input [2]: [item_id#25, cr_item_qty#26]
+Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#27]
+
+(29) BroadcastHashJoin [codegen id : 18]
+Left keys [1]: [item_id#13]
+Right keys [1]: [item_id#25]
+Join condition: None
+
+(30) Project [codegen id : 18]
+Output [3]: [item_id#13, sr_item_qty#14, cr_item_qty#26]
+Input [4]: [item_id#13, sr_item_qty#14, item_id#25, cr_item_qty#26]
+
+(31) Scan parquet default.web_returns
+Output [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30]
+Batched: true
+Location: InMemoryFileIndex []
+PartitionFilters: [isnotnull(wr_returned_date_sk#30), dynamicpruningexpression(wr_returned_date_sk#30 IN dynamicpruning#4)]
+PushedFilters: [IsNotNull(wr_item_sk)]
+ReadSchema: struct<wr_item_sk:int,wr_return_quantity:int>
+
+(32) ColumnarToRow [codegen id : 16]
+Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30]
+
+(33) Filter [codegen id : 16]
+Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30]
+Condition : isnotnull(wr_item_sk#28)
+
+(34) ReusedExchange [Reuses operator id: 62]
+Output [1]: [d_date_sk#31]
+
+(35) BroadcastHashJoin [codegen id : 16]
+Left keys [1]: [wr_returned_date_sk#30]
+Right keys [1]: [d_date_sk#31]
+Join condition: None
+
+(36) Project [codegen id : 16]
+Output [2]: [wr_item_sk#28, wr_return_quantity#29]
+Input [4]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30, d_date_sk#31]
+
+(37) ReusedExchange [Reuses operator id: 10]
+Output [2]: [i_item_sk#32, i_item_id#33]
+
+(38) BroadcastHashJoin [codegen id : 16]
+Left keys [1]: [wr_item_sk#28]
+Right keys [1]: [i_item_sk#32]
+Join condition: None
+
+(39) Project [codegen id : 16]
+Output [2]: [wr_return_quantity#29, i_item_id#33]
+Input [4]: [wr_item_sk#28, wr_return_quantity#29, i_item_sk#32, i_item_id#33]
+
+(40) HashAggregate [codegen id : 16]
+Input [2]: [wr_return_quantity#29, i_item_id#33]
+Keys [1]: [i_item_id#33]
+Functions [1]: [partial_sum(wr_return_quantity#29)]
+Aggregate Attributes [1]: [sum#34]
+Results [2]: [i_item_id#33, sum#35]
+
+(41) Exchange
+Input [2]: [i_item_id#33, sum#35]
+Arguments: hashpartitioning(i_item_id#33, 5), ENSURE_REQUIREMENTS, [id=#36]
+
+(42) HashAggregate [codegen id : 17]
+Input [2]: [i_item_id#33, sum#35]
+Keys [1]: [i_item_id#33]
+Functions [1]: [sum(wr_return_quantity#29)]
+Aggregate Attributes [1]: [sum(wr_return_quantity#29)#37]
+Results [2]: [i_item_id#33 AS item_id#38, sum(wr_return_quantity#29)#37 AS wr_item_qty#39]
+
+(43) BroadcastExchange
+Input [2]: [item_id#38, wr_item_qty#39]
+Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#40]
+
+(44) BroadcastHashJoin [codegen id : 18]
+Left keys [1]: [item_id#13]
+Right keys [1]: [item_id#38]
+Join condition: None
+
+(45) Project [codegen id : 18]
+Output [8]: [item_id#13, sr_item_qty#14, (((cast(sr_item_qty#14 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#26, (((cast(cr_item_qty#26 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#44]
+Input [5]: [item_id#13, sr_item_qty#14, cr_item_qty#26, item_id#38, wr_item_qty#39]
+
+(46) TakeOrderedAndProject
+Input [8]: [item_id#13, sr_item_qty#14, sr_dev#41, cr_item_qty#26, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44]
+Arguments: 100, [item_id#13 ASC NULLS FIRST, sr_item_qty#14 ASC NULLS FIRST], [item_id#13, sr_item_qty#14, sr_dev#41, cr_item_qty#26, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44]
+
+===== Subqueries =====
+
+Subquery:1 Hosting operator id = 1 Hosting Expression = sr_returned_date_sk#3 IN dynamicpruning#4
+BroadcastExchange (62)
++- * Project (61)
+   +- * BroadcastHashJoin LeftSemi BuildRight (60)
+      :- * Filter (49)
+      :  +- * ColumnarToRow (48)
+      :     +- Scan parquet default.date_dim (47)
+      +- BroadcastExchange (59)
+         +- * Project (58)
+            +- * BroadcastHashJoin LeftSemi BuildRight (57)
+               :- * ColumnarToRow (51)
+               :  +- Scan parquet default.date_dim (50)
+               +- BroadcastExchange (56)
+                  +- * Project (55)
+                     +- * Filter (54)
+                        +- * ColumnarToRow (53)
+                           +- Scan parquet default.date_dim (52)
+
+
+(47) Scan parquet default.date_dim
+Output [2]: [d_date_sk#5, d_date#45]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_date:date>
+
+(48) ColumnarToRow [codegen id : 3]
+Input [2]: [d_date_sk#5, d_date#45]
+
+(49) Filter [codegen id : 3]
+Input [2]: [d_date_sk#5, d_date#45]
+Condition : isnotnull(d_date_sk#5)
+
+(50) Scan parquet default.date_dim
+Output [2]: [d_date#46, d_week_seq#47]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+ReadSchema: struct<d_date:date,d_week_seq:int>
+
+(51) ColumnarToRow [codegen id : 2]
+Input [2]: [d_date#46, d_week_seq#47]
+
+(52) Scan parquet default.date_dim
+Output [2]: [d_date#48, d_week_seq#49]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [In(d_date, [2000-06-30,2000-09-27,2000-11-17])]
+ReadSchema: struct<d_date:date,d_week_seq:int>
+
+(53) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date#48, d_week_seq#49]
+
+(54) Filter [codegen id : 1]
+Input [2]: [d_date#48, d_week_seq#49]
+Condition : d_date#48 IN (2000-06-30,2000-09-27,2000-11-17)
+
+(55) Project [codegen id : 1]
+Output [1]: [d_week_seq#49]
+Input [2]: [d_date#48, d_week_seq#49]
+
+(56) BroadcastExchange
+Input [1]: [d_week_seq#49]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50]
+
+(57) BroadcastHashJoin [codegen id : 2]
+Left keys [1]: [d_week_seq#47]
+Right keys [1]: [d_week_seq#49]
+Join condition: None
+
+(58) Project [codegen id : 2]
+Output [1]: [d_date#46]
+Input [2]: [d_date#46, d_week_seq#47]
+
+(59) BroadcastExchange
+Input [1]: [d_date#46]
+Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#51]
+
+(60) BroadcastHashJoin [codegen id : 3]
+Left keys [1]: [d_date#45]
+Right keys [1]: [d_date#46]
+Join condition: None
+
+(61) Project [codegen id : 3]
+Output [1]: [d_date_sk#5]
+Input [2]: [d_date_sk#5, d_date#45]
+
+(62) BroadcastExchange
+Input [1]: [d_date_sk#5]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#52]
+
+Subquery:2 Hosting operator id = 16 Hosting Expression = cr_returned_date_sk#17 IN dynamicpruning#4
+
+Subquery:3 Hosting operator id = 31 Hosting Expression = wr_returned_date_sk#30 IN dynamicpruning#4
+
+
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/simplified.txt
new file mode 100644
index 0000000000000..7f38503363767
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/simplified.txt
@@ -0,0 +1,95 @@
+TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty,wr_dev,average]
+  WholeStageCodegen (18)
+    Project [item_id,sr_item_qty,cr_item_qty,wr_item_qty]
+      BroadcastHashJoin [item_id,item_id]
+        Project [item_id,sr_item_qty,cr_item_qty]
+          BroadcastHashJoin [item_id,item_id]
+            HashAggregate [i_item_id,sum] [sum(sr_return_quantity),item_id,sr_item_qty,sum]
+              InputAdapter
+                Exchange [i_item_id] #1
+                  WholeStageCodegen (5)
+                    HashAggregate [i_item_id,sr_return_quantity] [sum,sum]
+                      Project [sr_return_quantity,i_item_id]
+                        BroadcastHashJoin [sr_item_sk,i_item_sk]
+                          Project [sr_item_sk,sr_return_quantity]
+                            BroadcastHashJoin [sr_returned_date_sk,d_date_sk]
+                              Filter [sr_item_sk]
+                                ColumnarToRow
+                                  InputAdapter
+                                    Scan parquet default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk]
+                                      SubqueryBroadcast [d_date_sk] #1
+                                        BroadcastExchange #2
+                                          WholeStageCodegen (3)
+                                            Project [d_date_sk]
+                                              BroadcastHashJoin [d_date,d_date]
+                                                Filter [d_date_sk]
+                                                  ColumnarToRow
+                                                    InputAdapter
+                                                      Scan parquet default.date_dim [d_date_sk,d_date]
+                                                InputAdapter
+                                                  BroadcastExchange #3
+                                                    WholeStageCodegen (2)
+                                                      Project [d_date]
+                                                        BroadcastHashJoin [d_week_seq,d_week_seq]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.date_dim [d_date,d_week_seq]
+                                                          InputAdapter
+                                                            BroadcastExchange #4
+                                                              WholeStageCodegen (1)
+                                                                Project [d_week_seq]
+                                                                  Filter [d_date]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet default.date_dim [d_date,d_week_seq]
+                              InputAdapter
+                                ReusedExchange [d_date_sk] #2
+                          InputAdapter
+                            BroadcastExchange #5
+                              WholeStageCodegen (4)
+                                Filter [i_item_sk,i_item_id]
+                                  ColumnarToRow
+                                    InputAdapter
+                                      Scan parquet default.item [i_item_sk,i_item_id]
+            InputAdapter
+              BroadcastExchange #6
+                WholeStageCodegen (11)
+                  HashAggregate [i_item_id,sum] [sum(cr_return_quantity),item_id,cr_item_qty,sum]
+                    InputAdapter
+                      Exchange [i_item_id] #7
+                        WholeStageCodegen (10)
+                          HashAggregate [i_item_id,cr_return_quantity] [sum,sum]
+                            Project [cr_return_quantity,i_item_id]
+                              BroadcastHashJoin [cr_item_sk,i_item_sk]
+                                Project [cr_item_sk,cr_return_quantity]
+                                  BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
+                                    Filter [cr_item_sk]
+                                      ColumnarToRow
+                                        InputAdapter
+                                          Scan parquet default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk]
+                                            ReusedSubquery [d_date_sk] #1
+                                    InputAdapter
+                                      ReusedExchange [d_date_sk] #2
+                                InputAdapter
+                                  ReusedExchange [i_item_sk,i_item_id] #5
+        InputAdapter
+          BroadcastExchange #8
+            WholeStageCodegen (17)
+              HashAggregate [i_item_id,sum] [sum(wr_return_quantity),item_id,wr_item_qty,sum]
+                InputAdapter
+                  Exchange [i_item_id] #9
+                    WholeStageCodegen (16)
+                      HashAggregate [i_item_id,wr_return_quantity] [sum,sum]
+                        Project [wr_return_quantity,i_item_id]
+                          BroadcastHashJoin [wr_item_sk,i_item_sk]
+                            Project [wr_item_sk,wr_return_quantity]
+                              BroadcastHashJoin [wr_returned_date_sk,d_date_sk]
+                                Filter [wr_item_sk]
+                                  ColumnarToRow
+                                    InputAdapter
+                                      Scan parquet default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk]
+                                        ReusedSubquery [d_date_sk] #1
+                                InputAdapter
+                                  ReusedExchange [d_date_sk] #2
+                            InputAdapter
+                              ReusedExchange [i_item_sk,i_item_id] #5
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
index 262a6920d29c6..a0207e9b01920 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecutionSuite
 import org.apache.spark.sql.execution.exchange.{Exchange, ReusedExchangeExec, ValidateRequirements}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.tags.ExtendedSQLTest
 
 // scalastyle:off line.size.limit
@@ -82,8 +83,18 @@ trait PlanStabilitySuite extends DisableAdaptiveExecutionSuite {
 
   def goldenFilePath: String
 
+  private val approvedAnsiPlans: Seq[String] = Seq(
+    "q83",
+    "q83.sf100"
+  )
+
   private def getDirForTest(name: String): File = {
-    new File(goldenFilePath, name)
+    val goldenFileName = if (SQLConf.get.ansiEnabled && approvedAnsiPlans.contains(name)) {
+      name + ".ansi"
+    } else {
+      name
+    }
+    new File(goldenFilePath, goldenFileName)
   }
 
   private def isApproved(

From 871bbf9dcb3136c6e1b640204247f8461dcf1a98 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Mon, 21 Feb 2022 14:45:28 -0800
Subject: [PATCH 292/513] [SPARK-38274][BUILD] Upgarde `JUnit4` to `4.13.2` and
 upgrade corresponding `junit-interface` to `0.13.3`

### What changes were proposed in this pull request?
The main change of this pr as follows:

- Upgarde `Junit4` to `4.13.2`: https://github.com/junit-team/junit4/blob/HEAD/doc/ReleaseNotes4.13.2.md
- Change  `junit-interface` groupId from `com.novocode` to  `com.github.sbt` due to the organization name has changed to "com.github.sbt" in 0.12 and upgrade `junit-interface` version to `0.13.3` refer to [https://github.com/sbt/junit-interface/#setup](https://github.com/sbt/junit-interface/#setup)

### Why are the changes needed?
Upgarde test framework.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35597 from LuciferYang/junit-and-junit-interface.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 pom.xml                  | 8 ++++----
 project/SparkBuild.scala | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pom.xml b/pom.xml
index 109448866ca37..02b0aa8594abf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -407,7 +407,7 @@
       <scope>test</scope>
     </dependency>
     <dependency>
-      <groupId>com.novocode</groupId>
+      <groupId>com.github.sbt</groupId>
       <artifactId>junit-interface</artifactId>
       <scope>test</scope>
     </dependency>
@@ -1128,7 +1128,7 @@
       <dependency>
         <groupId>junit</groupId>
         <artifactId>junit</artifactId>
-        <version>4.13.1</version>
+        <version>4.13.2</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -1144,9 +1144,9 @@
         <scope>test</scope>
       </dependency>
       <dependency>
-        <groupId>com.novocode</groupId>
+        <groupId>com.github.sbt</groupId>
         <artifactId>junit-interface</artifactId>
-        <version>0.11</version>
+        <version>0.13.3</version>
         <scope>test</scope>
       </dependency>
       <dependency>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 1b8b258af2776..e9ef514c4e331 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -1227,7 +1227,7 @@ object TestSettings {
     (Test / testOptions) += Tests.Argument(TestFrameworks.ScalaTest, "-W", "120", "300"),
     (Test / testOptions) += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"),
     // Enable Junit testing.
-    libraryDependencies += "com.novocode" % "junit-interface" % "0.11" % "test",
+    libraryDependencies += "com.github.sbt" % "junit-interface" % "0.13.3" % "test",
     // `parallelExecutionInTest` controls whether test suites belonging to the same SBT project
     // can run in parallel with one another. It does NOT control whether tests execute in parallel
     // within the same JVM (which is controlled by `testForkedParallel`) or whether test cases

From 1112240ffc5456778124e43ec75aae5d8cb7121f Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Tue, 22 Feb 2022 09:58:07 +0900
Subject: [PATCH 293/513] [SPARK-38259][BUILD] Upgrade Netty to 4.1.74

### What changes were proposed in this pull request?
This pr aims to upgrade Netty to 4.1.74, the release notes of this version as follows:

- https://netty.io/news/2022/02/08/4-1-74-Final.html

### Why are the changes needed?
Upgrade dependency to avoid some potential bugs

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35581 from LuciferYang/netty-4174.

Lead-authored-by: yangjie01 <yangjie01@baidu.com>
Co-authored-by: YangJie <yangjie01@baidu.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 dev/deps/spark-deps-hadoop-2-hive-2.3 | 30 +++++++++++++--------------
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 30 +++++++++++++--------------
 pom.xml                               |  2 +-
 3 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index 9ab65b48ea2c0..efd380c89d456 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -200,21 +200,21 @@ metrics-jmx/4.2.7//metrics-jmx-4.2.7.jar
 metrics-json/4.2.7//metrics-json-4.2.7.jar
 metrics-jvm/4.2.7//metrics-jvm-4.2.7.jar
 minlog/1.3.0//minlog-1.3.0.jar
-netty-all/4.1.73.Final//netty-all-4.1.73.Final.jar
-netty-buffer/4.1.73.Final//netty-buffer-4.1.73.Final.jar
-netty-codec/4.1.73.Final//netty-codec-4.1.73.Final.jar
-netty-common/4.1.73.Final//netty-common-4.1.73.Final.jar
-netty-handler/4.1.73.Final//netty-handler-4.1.73.Final.jar
-netty-resolver/4.1.73.Final//netty-resolver-4.1.73.Final.jar
-netty-tcnative-classes/2.0.46.Final//netty-tcnative-classes-2.0.46.Final.jar
-netty-transport-classes-epoll/4.1.73.Final//netty-transport-classes-epoll-4.1.73.Final.jar
-netty-transport-classes-kqueue/4.1.73.Final//netty-transport-classes-kqueue-4.1.73.Final.jar
-netty-transport-native-epoll/4.1.73.Final/linux-aarch_64/netty-transport-native-epoll-4.1.73.Final-linux-aarch_64.jar
-netty-transport-native-epoll/4.1.73.Final/linux-x86_64/netty-transport-native-epoll-4.1.73.Final-linux-x86_64.jar
-netty-transport-native-kqueue/4.1.73.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.73.Final-osx-aarch_64.jar
-netty-transport-native-kqueue/4.1.73.Final/osx-x86_64/netty-transport-native-kqueue-4.1.73.Final-osx-x86_64.jar
-netty-transport-native-unix-common/4.1.73.Final//netty-transport-native-unix-common-4.1.73.Final.jar
-netty-transport/4.1.73.Final//netty-transport-4.1.73.Final.jar
+netty-all/4.1.74.Final//netty-all-4.1.74.Final.jar
+netty-buffer/4.1.74.Final//netty-buffer-4.1.74.Final.jar
+netty-codec/4.1.74.Final//netty-codec-4.1.74.Final.jar
+netty-common/4.1.74.Final//netty-common-4.1.74.Final.jar
+netty-handler/4.1.74.Final//netty-handler-4.1.74.Final.jar
+netty-resolver/4.1.74.Final//netty-resolver-4.1.74.Final.jar
+netty-tcnative-classes/2.0.48.Final//netty-tcnative-classes-2.0.48.Final.jar
+netty-transport-classes-epoll/4.1.74.Final//netty-transport-classes-epoll-4.1.74.Final.jar
+netty-transport-classes-kqueue/4.1.74.Final//netty-transport-classes-kqueue-4.1.74.Final.jar
+netty-transport-native-epoll/4.1.74.Final/linux-aarch_64/netty-transport-native-epoll-4.1.74.Final-linux-aarch_64.jar
+netty-transport-native-epoll/4.1.74.Final/linux-x86_64/netty-transport-native-epoll-4.1.74.Final-linux-x86_64.jar
+netty-transport-native-kqueue/4.1.74.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.74.Final-osx-aarch_64.jar
+netty-transport-native-kqueue/4.1.74.Final/osx-x86_64/netty-transport-native-kqueue-4.1.74.Final-osx-x86_64.jar
+netty-transport-native-unix-common/4.1.74.Final//netty-transport-native-unix-common-4.1.74.Final.jar
+netty-transport/4.1.74.Final//netty-transport-4.1.74.Final.jar
 objenesis/3.2//objenesis-3.2.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 7d8729193cb28..73644ee4ed75c 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -186,21 +186,21 @@ metrics-jmx/4.2.7//metrics-jmx-4.2.7.jar
 metrics-json/4.2.7//metrics-json-4.2.7.jar
 metrics-jvm/4.2.7//metrics-jvm-4.2.7.jar
 minlog/1.3.0//minlog-1.3.0.jar
-netty-all/4.1.73.Final//netty-all-4.1.73.Final.jar
-netty-buffer/4.1.73.Final//netty-buffer-4.1.73.Final.jar
-netty-codec/4.1.73.Final//netty-codec-4.1.73.Final.jar
-netty-common/4.1.73.Final//netty-common-4.1.73.Final.jar
-netty-handler/4.1.73.Final//netty-handler-4.1.73.Final.jar
-netty-resolver/4.1.73.Final//netty-resolver-4.1.73.Final.jar
-netty-tcnative-classes/2.0.46.Final//netty-tcnative-classes-2.0.46.Final.jar
-netty-transport-classes-epoll/4.1.73.Final//netty-transport-classes-epoll-4.1.73.Final.jar
-netty-transport-classes-kqueue/4.1.73.Final//netty-transport-classes-kqueue-4.1.73.Final.jar
-netty-transport-native-epoll/4.1.73.Final/linux-aarch_64/netty-transport-native-epoll-4.1.73.Final-linux-aarch_64.jar
-netty-transport-native-epoll/4.1.73.Final/linux-x86_64/netty-transport-native-epoll-4.1.73.Final-linux-x86_64.jar
-netty-transport-native-kqueue/4.1.73.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.73.Final-osx-aarch_64.jar
-netty-transport-native-kqueue/4.1.73.Final/osx-x86_64/netty-transport-native-kqueue-4.1.73.Final-osx-x86_64.jar
-netty-transport-native-unix-common/4.1.73.Final//netty-transport-native-unix-common-4.1.73.Final.jar
-netty-transport/4.1.73.Final//netty-transport-4.1.73.Final.jar
+netty-all/4.1.74.Final//netty-all-4.1.74.Final.jar
+netty-buffer/4.1.74.Final//netty-buffer-4.1.74.Final.jar
+netty-codec/4.1.74.Final//netty-codec-4.1.74.Final.jar
+netty-common/4.1.74.Final//netty-common-4.1.74.Final.jar
+netty-handler/4.1.74.Final//netty-handler-4.1.74.Final.jar
+netty-resolver/4.1.74.Final//netty-resolver-4.1.74.Final.jar
+netty-tcnative-classes/2.0.48.Final//netty-tcnative-classes-2.0.48.Final.jar
+netty-transport-classes-epoll/4.1.74.Final//netty-transport-classes-epoll-4.1.74.Final.jar
+netty-transport-classes-kqueue/4.1.74.Final//netty-transport-classes-kqueue-4.1.74.Final.jar
+netty-transport-native-epoll/4.1.74.Final/linux-aarch_64/netty-transport-native-epoll-4.1.74.Final-linux-aarch_64.jar
+netty-transport-native-epoll/4.1.74.Final/linux-x86_64/netty-transport-native-epoll-4.1.74.Final-linux-x86_64.jar
+netty-transport-native-kqueue/4.1.74.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.74.Final-osx-aarch_64.jar
+netty-transport-native-kqueue/4.1.74.Final/osx-x86_64/netty-transport-native-kqueue-4.1.74.Final-osx-x86_64.jar
+netty-transport-native-unix-common/4.1.74.Final//netty-transport-native-unix-common-4.1.74.Final.jar
+netty-transport/4.1.74.Final//netty-transport-4.1.74.Final.jar
 objenesis/3.2//objenesis-3.2.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
diff --git a/pom.xml b/pom.xml
index 02b0aa8594abf..788cf8c95aaf7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -811,7 +811,7 @@
       <dependency>
         <groupId>io.netty</groupId>
         <artifactId>netty-all</artifactId>
-        <version>4.1.73.Final</version>
+        <version>4.1.74.Final</version>
         <exclusions>
           <exclusion>
             <groupId>io.netty</groupId>

From 881f562f7b6a2bed76b01f956bc02c4b87ad6b80 Mon Sep 17 00:00:00 2001
From: Franck Thang <stelyus@outlook.com>
Date: Tue, 22 Feb 2022 13:49:32 +0800
Subject: [PATCH 294/513] [SPARK-37290][SQL] - Exponential planning time in
 case of non-deterministic function

### What changes were proposed in this pull request?

When using non-deterministic function, the method getAllValidConstraints can throw an OOM

```
  protected def getAllValidConstraints(projectList: Seq[NamedExpression]): ExpressionSet = {
    var allConstraints = child.constraints
    projectList.foreach {
      case a  Alias(l: Literal, _) =>
        allConstraints += EqualNullSafe(a.toAttribute, l)
      case a  Alias(e, _) =>
        // For every alias in `projectList`, replace the reference in constraints by its attribute.
        allConstraints ++= allConstraints.map(_ transform {
          case expr: Expression if expr.semanticEquals(e) =>
            a.toAttribute
        })
        allConstraints += EqualNullSafe(e, a.toAttribute)
      case _ => // Don't change.
    }

    allConstraints
  }
```
In particular, this line `allConstraints ++= allConstraints.map(...)` can generate an exponential number of expressions
This is because non deterministic functions are considered unique in a ExpressionSet
Therefore, the number of non-deterministic expressions double every time we go through this line

We can filter and keep only deterministic expression because
1 - the `semanticEquals` automatically discard non deterministic expressions
2 - this method is only used in one code path, and we keep only determinic expressions
```
lazy val constraints: ExpressionSet = {
    if (conf.constraintPropagationEnabled) {
      validConstraints
        .union(inferAdditionalConstraints(validConstraints))
        .union(constructIsNotNullConstraints(validConstraints, output))
        .filter { c =>
          c.references.nonEmpty && c.references.subsetOf(outputSet) && c.deterministic
        }
    } else {
      ExpressionSet()
    }
  }
```

### Why are the changes needed?
It can lead to an exponential number of expressions and / or OOM

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

Local test

Closes #35233 from Stelyus/SPARK-37290.

Authored-by: Franck Thang <stelyus@outlook.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 49634a2a0eb89..5ae2a7da826a4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -183,7 +183,7 @@ trait UnaryNode extends LogicalPlan with UnaryLike[LogicalPlan] {
     projectList.foreach {
       case a @ Alias(l: Literal, _) =>
         allConstraints += EqualNullSafe(a.toAttribute, l)
-      case a @ Alias(e, _) =>
+      case a @ Alias(e, _) if e.deterministic =>
         // For every alias in `projectList`, replace the reference in constraints by its attribute.
         allConstraints ++= allConstraints.map(_ transform {
           case expr: Expression if expr.semanticEquals(e) =>

From 5ebf7938b6882d343a6aa9e125f24bee394bb25f Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Tue, 22 Feb 2022 15:58:07 +0900
Subject: [PATCH 295/513] [SPARK-38206][SS] Ignore nullability on comparing the
 data type of join keys on stream-stream join

### What changes were proposed in this pull request?

This PR proposes to change the assertion of data type against joining keys on stream-stream join to ignore nullability.

### Why are the changes needed?

The existing requirement on checking data types of joining keys is too restricted, as it also requires the same nullability. In batch query (I checked with HashJoinExec), nullability is ignored when checking data types of joining keys.

### Does this PR introduce _any_ user-facing change?

Yes, end users will no longer encounter the assertion error on join keys with different nullability in both keys.

### How was this patch tested?

New test added.

Closes #35599 from HeartSaVioR/SPARK-38206.

Authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../StreamingSymmetricHashJoinExec.scala      |   8 +-
 .../sql/streaming/StreamingJoinSuite.scala    | 158 ++++++++++++++++++
 2 files changed, 165 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
index adb84a3b7d3fc..81888e0f7e189 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
@@ -174,7 +174,13 @@ case class StreamingSymmetricHashJoinExec(
     joinType == Inner || joinType == LeftOuter || joinType == RightOuter || joinType == FullOuter ||
     joinType == LeftSemi,
     errorMessageForJoinType)
-  require(leftKeys.map(_.dataType) == rightKeys.map(_.dataType))
+
+  // The assertion against join keys is same as hash join for batch query.
+  require(leftKeys.length == rightKeys.length &&
+    leftKeys.map(_.dataType)
+      .zip(rightKeys.map(_.dataType))
+      .forall(types => types._1.sameType(types._2)),
+    "Join keys from two sides should have same length and types")
 
   private val storeConf = new StateStoreConf(conf)
   private val hadoopConfBcast = sparkContext.broadcast(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index e0926ef0a82ff..2fbe6c4fed392 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.streaming
 
 import java.io.File
+import java.lang.{Integer => JInteger}
 import java.sql.Timestamp
 import java.util.{Locale, UUID}
 
@@ -702,6 +703,53 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
         total = Seq(2), updated = Seq(1), droppedByWatermark = Seq(0), removed = Some(Seq(0)))
     )
   }
+
+  test("joining non-nullable left join key with nullable right join key") {
+    val input1 = MemoryStream[Int]
+    val input2 = MemoryStream[JInteger]
+
+    val joined = testForJoinKeyNullability(input1.toDF(), input2.toDF())
+    testStream(joined)(
+      AddData(input1, 1, 5),
+      AddData(input2, JInteger.valueOf(1), JInteger.valueOf(5), JInteger.valueOf(10), null),
+      CheckNewAnswer(Row(1, 1, 2, 3), Row(5, 5, 10, 15))
+    )
+  }
+
+  test("joining nullable left join key with non-nullable right join key") {
+    val input1 = MemoryStream[JInteger]
+    val input2 = MemoryStream[Int]
+
+    val joined = testForJoinKeyNullability(input1.toDF(), input2.toDF())
+    testStream(joined)(
+      AddData(input1, JInteger.valueOf(1), JInteger.valueOf(5), JInteger.valueOf(10), null),
+      AddData(input2, 1, 5),
+      CheckNewAnswer(Row(1, 1, 2, 3), Row(5, 5, 10, 15))
+    )
+  }
+
+  test("joining nullable left join key with nullable right join key") {
+    val input1 = MemoryStream[JInteger]
+    val input2 = MemoryStream[JInteger]
+
+    val joined = testForJoinKeyNullability(input1.toDF(), input2.toDF())
+    testStream(joined)(
+      AddData(input1, JInteger.valueOf(1), JInteger.valueOf(5), JInteger.valueOf(10), null),
+      AddData(input2, JInteger.valueOf(1), JInteger.valueOf(5), null),
+      CheckNewAnswer(
+        Row(JInteger.valueOf(1), JInteger.valueOf(1), JInteger.valueOf(2), JInteger.valueOf(3)),
+        Row(JInteger.valueOf(5), JInteger.valueOf(5), JInteger.valueOf(10), JInteger.valueOf(15)),
+        Row(null, null, null, null))
+    )
+  }
+
+  private def testForJoinKeyNullability(left: DataFrame, right: DataFrame): DataFrame = {
+    val df1 = left.selectExpr("value as leftKey", "value * 2 as leftValue")
+    val df2 = right.selectExpr("value as rightKey", "value * 3 as rightValue")
+
+    df1.join(df2, expr("leftKey <=> rightKey"))
+      .select("leftKey", "rightKey", "leftValue", "rightValue")
+  }
 }
 
 
@@ -1168,6 +1216,116 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite {
       CheckNewAnswer(expectedOutput.head, expectedOutput.tail: _*)
     )
   }
+
+  test("left-outer: joining non-nullable left join key with nullable right join key") {
+    val input1 = MemoryStream[(Int, Int)]
+    val input2 = MemoryStream[(JInteger, Int)]
+
+    val joined = testForLeftOuterJoinKeyNullability(input1.toDF(), input2.toDF())
+
+    testStream(joined)(
+      AddData(input1, (1, 1), (1, 2), (1, 3), (1, 4), (1, 5)),
+      AddData(input2,
+        (JInteger.valueOf(1), 3),
+        (JInteger.valueOf(1), 4),
+        (JInteger.valueOf(1), 5),
+        (JInteger.valueOf(1), 6)
+      ),
+      CheckNewAnswer(
+        Row(1, 1, 3, 3, 10, 6, 9),
+        Row(1, 1, 4, 4, 10, 8, 12),
+        Row(1, 1, 5, 5, 10, 10, 15)),
+      AddData(input1, (1, 21)),
+      // right-null join
+      AddData(input2, (JInteger.valueOf(1), 22)), // watermark = 11, no-data-batch computes nulls
+      CheckNewAnswer(
+        Row(1, null, 1, null, 10, 2, null),
+        Row(1, null, 2, null, 10, 4, null)
+      )
+    )
+  }
+
+  test("left-outer: joining nullable left join key with non-nullable right join key") {
+    val input1 = MemoryStream[(JInteger, Int)]
+    val input2 = MemoryStream[(Int, Int)]
+
+    val joined = testForLeftOuterJoinKeyNullability(input1.toDF(), input2.toDF())
+
+    testStream(joined)(
+      AddData(input1,
+        (JInteger.valueOf(1), 1),
+        (null, 2),
+        (JInteger.valueOf(1), 3),
+        (JInteger.valueOf(1), 4),
+        (JInteger.valueOf(1), 5)),
+      AddData(input2, (1, 3), (1, 4), (1, 5), (1, 6)),
+      CheckNewAnswer(
+        Row(1, 1, 3, 3, 10, 6, 9),
+        Row(1, 1, 4, 4, 10, 8, 12),
+        Row(1, 1, 5, 5, 10, 10, 15)),
+      // right-null join
+      AddData(input1, (JInteger.valueOf(1), 21)),
+      AddData(input2, (1, 22)), // watermark = 11, no-data-batch computes nulls
+      CheckNewAnswer(
+        Row(1, null, 1, null, 10, 2, null),
+        Row(null, null, 2, null, 10, 4, null)
+      )
+    )
+  }
+
+  test("left-outer: joining nullable left join key with nullable right join key") {
+    val input1 = MemoryStream[(JInteger, Int)]
+    val input2 = MemoryStream[(JInteger, Int)]
+
+    val joined = testForLeftOuterJoinKeyNullability(input1.toDF(), input2.toDF())
+
+    testStream(joined)(
+      AddData(input1,
+        (JInteger.valueOf(1), 1),
+        (null, 2),
+        (JInteger.valueOf(1), 3),
+        (null, 4),
+        (JInteger.valueOf(1), 5)),
+      AddData(input2,
+        (JInteger.valueOf(1), 3),
+        (null, 4),
+        (JInteger.valueOf(1), 5),
+        (JInteger.valueOf(1), 6)),
+      CheckNewAnswer(
+        Row(1, 1, 3, 3, 10, 6, 9),
+        Row(null, null, 4, 4, 10, 8, 12),
+        Row(1, 1, 5, 5, 10, 10, 15)),
+      // right-null join
+      AddData(input1, (JInteger.valueOf(1), 21)),
+      AddData(input2, (JInteger.valueOf(1), 22)), // watermark = 11, no-data-batch computes nulls
+      CheckNewAnswer(
+        Row(1, null, 1, null, 10, 2, null),
+        Row(null, null, 2, null, 10, 4, null)
+      )
+    )
+  }
+
+  private def testForLeftOuterJoinKeyNullability(left: DataFrame, right: DataFrame): DataFrame = {
+    val df1 = left
+      .selectExpr("_1 as leftKey1", "_2 as leftKey2", "timestamp_seconds(_2) as leftTime",
+        "_2 * 2 as leftValue")
+      .withWatermark("leftTime", "10 seconds")
+    val df2 = right
+      .selectExpr(
+        "_1 as rightKey1", "_2 as rightKey2", "timestamp_seconds(_2) as rightTime",
+        "_2 * 3 as rightValue")
+      .withWatermark("rightTime", "10 seconds")
+
+    val windowed1 = df1.select('leftKey1, 'leftKey2,
+      window('leftTime, "10 second").as('leftWindow), 'leftValue)
+    val windowed2 = df2.select('rightKey1, 'rightKey2,
+      window('rightTime, "10 second").as('rightWindow), 'rightValue)
+    windowed1.join(windowed2,
+      expr("leftKey1 <=> rightKey1 AND leftKey2 = rightKey2 AND leftWindow = rightWindow"),
+      "left_outer"
+    ).select('leftKey1, 'rightKey1, 'leftKey2, 'rightKey2, $"leftWindow.end".cast("long"),
+      'leftValue, 'rightValue)
+  }
 }
 
 class StreamingFullOuterJoinSuite extends StreamingJoinSuite {

From e6c5687ac7364f1dfb43807d372172abab0f9818 Mon Sep 17 00:00:00 2001
From: allisonwang-db <allison.wang@databricks.com>
Date: Tue, 22 Feb 2022 15:40:29 +0800
Subject: [PATCH 296/513] [SPARK-38155][SQL] Disallow distinct aggregate in
 lateral subqueries with unsupported predicates

### What changes were proposed in this pull request?
This PR blocks lateral subqueries that contain DISTINCT aggregate and correlated non-equality predicates in CheckAnalysis.

### Why are the changes needed?
To avoid incorrect results. `DISTINCT` will be rewritten into Aggregate during the optimization phase and only correlated equality predicates are supported with Aggregate.

Note, we only need to block this pattern for lateral subqueries because 1) Scalar subqueries are either aggregated or can only contain one row, so they will not encounter this issue 2) IN/EXISTS subqueries can potentially have the same issue but it won't impact the results of these subqueries, for example, `SELECT * FROM t1 WHERE c1 IN (SELECT DISTINCT c1 FROM t2 WHERE t1.c1 > t2.c1)`.

### Does this PR introduce _any_ user-facing change?
Yes. Queries with lateral joins that contain DISTINCT aggregate will be blocked:
```
SELECT * FROM t1 JOIN LATERAL (SELECT DISTINCT c2 FROM t2 WHERE c1 > t1.c1)

AnalysisException: Correlated column is not allowed in predicate
```

### How was this patch tested?
Unit tests.

Closes #35469 from allisonwang-db/spark-38155-distinct.

Authored-by: allisonwang-db <allison.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala | 23 ++++++++++++++-----
 .../optimizer/DecorrelateInnerQuery.scala     |  5 ++++
 .../DecorrelateInnerQuerySuite.scala          | 14 +++++++++++
 .../org/apache/spark/sql/SubquerySuite.scala  | 10 ++++++++
 4 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 2da2686bdc847..eacb5b266d660 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -755,7 +755,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
         }
         // Validate to make sure the correlations appearing in the query are valid and
         // allowed by spark.
-        checkCorrelationsInSubquery(expr.plan, isScalarOrLateral = true)
+        checkCorrelationsInSubquery(expr.plan, isScalar = true)
 
       case _: LateralSubquery =>
         assert(plan.isInstanceOf[LateralJoin])
@@ -774,7 +774,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
         }
         // Validate to make sure the correlations appearing in the query are valid and
         // allowed by spark.
-        checkCorrelationsInSubquery(expr.plan, isScalarOrLateral = true)
+        checkCorrelationsInSubquery(expr.plan, isLateral = true)
 
       case inSubqueryOrExistsSubquery =>
         plan match {
@@ -827,7 +827,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
    */
   private def checkCorrelationsInSubquery(
       sub: LogicalPlan,
-      isScalarOrLateral: Boolean = false): Unit = {
+      isScalar: Boolean = false,
+      isLateral: Boolean = false): Unit = {
     // Validate that correlated aggregate expression do not contain a mixture
     // of outer and local references.
     def checkMixedReferencesInsideAggregateExpr(expr: Expression): Unit = {
@@ -852,7 +853,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
     // DecorrelateInnerQuery is enabled. Otherwise, only Filter can only outer references.
     def canHostOuter(plan: LogicalPlan): Boolean = plan match {
       case _: Filter => true
-      case _: Project => isScalarOrLateral && SQLConf.get.decorrelateInnerQueryEnabled
+      case _: Project => (isScalar || isLateral) && SQLConf.get.decorrelateInnerQueryEnabled
       case _ => false
     }
 
@@ -980,8 +981,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
       // up to the operator producing the correlated values.
 
       // Category 1:
-      // ResolvedHint, Distinct, LeafNode, Repartition, and SubqueryAlias
-      case _: ResolvedHint | _: Distinct | _: LeafNode | _: Repartition | _: SubqueryAlias =>
+      // ResolvedHint, LeafNode, Repartition, and SubqueryAlias
+      case _: ResolvedHint | _: LeafNode | _: Repartition | _: SubqueryAlias =>
 
       // Category 2:
       // These operators can be anywhere in a correlated subquery.
@@ -1015,6 +1016,16 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
         failOnInvalidOuterReference(a)
         failOnUnsupportedCorrelatedPredicate(unsupportedPredicates.toSeq, a)
 
+      // Distinct does not host any correlated expressions, but during the optimization phase
+      // it will be rewritten as Aggregate, which can only be on a correlation path if the
+      // correlation contains only the supported correlated equality predicates.
+      // Only block it for lateral subqueries because scalar subqueries must be aggregated
+      // and it does not impact the results for IN/EXISTS subqueries.
+      case d: Distinct =>
+        if (isLateral) {
+          failOnUnsupportedCorrelatedPredicate(unsupportedPredicates.toSeq, d)
+        }
+
       // Join can host correlated expressions.
       case j @ Join(left, right, joinType, _, _) =>
         joinType match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala
index 71f3897ccf50b..49a159496d2c4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala
@@ -599,6 +599,11 @@ object DecorrelateInnerQuery extends PredicateHelper {
               (newAggregate, joinCond, outerReferenceMap)
             }
 
+          case d: Distinct =>
+            val (newChild, joinCond, outerReferenceMap) =
+              decorrelate(d.child, parentOuterReferences, aggregated = true)
+            (d.copy(child = newChild), joinCond, outerReferenceMap)
+
           case j @ Join(left, right, joinType, condition, _) =>
             val outerReferences = collectOuterReferences(j.expressions)
             // Join condition containing outer references is not supported.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuerySuite.scala
index b8886a5c0b2fe..dc50039da200b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuerySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuerySuite.scala
@@ -282,4 +282,18 @@ class DecorrelateInnerQuerySuite extends PlanTest {
       ).analyze
     check(innerPlan, outerPlan, correctAnswer, Seq(y <=> y, x === a, y === z))
   }
+
+  test("SPARK-38155: distinct with non-equality correlated predicates") {
+    val outerPlan = testRelation2
+    val innerPlan =
+      Distinct(
+        Project(Seq(b),
+          Filter(OuterReference(x) > a, testRelation)))
+    val correctAnswer =
+      Distinct(
+        Project(Seq(b, x),
+          Filter(x > a,
+            DomainJoin(Seq(x), testRelation))))
+    check(innerPlan, outerPlan, correctAnswer, Seq(x <=> x))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 89157be3097a6..b14404b0c3a3a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -1982,4 +1982,14 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
         Row(4, null) :: Nil)
     }
   }
+
+  test("SPARK-38155: disallow distinct aggregate in lateral subqueries") {
+    withTempView("t1", "t2") {
+      Seq((0, 1)).toDF("c1", "c2").createOrReplaceTempView("t1")
+      Seq((1, 2), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t2")
+      assert(intercept[AnalysisException] {
+        sql("SELECT * FROM t1 JOIN LATERAL (SELECT DISTINCT c2 FROM t2 WHERE c1 > t1.c1)")
+      }.getMessage.contains("Correlated column is not allowed in predicate"))
+    }
+  }
 }

From a103a49fa88e4bb4bba6a842f1d78a188fafeacf Mon Sep 17 00:00:00 2001
From: itholic <haejoon.lee@databricks.com>
Date: Tue, 22 Feb 2022 16:41:16 +0900
Subject: [PATCH 297/513] [SPARK-38279][TESTS][3.2] Pin MarkupSafe to 2.0.1 fix
 linter failure

This PR proposes to pin the Python package `markupsafe` to 2.0.1 to fix the CI failure as below.

```
ImportError: cannot import name 'soft_unicode' from 'markupsafe' (/home/runner/work/_temp/setup-sam-43osIE/.venv/lib/python3.10/site-packages/markupsafe/__init__.py)
```

Since `markupsafe==2.1.0` has removed `soft_unicode`, `from markupsafe import soft_unicode` no longer working properly.

See https://github.com/aws/aws-sam-cli/issues/3661 for more detail.

To fix the CI failure on branch-3.2

No.

The existing tests are should be passed

Closes #35602 from itholic/SPARK-38279.

Authored-by: itholic <haejoon.lee@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 79099cf7baf6e094884b5f77e82a4915272f15c5)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .github/workflows/build_and_test.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 266d4ab9e575c..67f57218d316b 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -529,8 +529,10 @@ jobs:
         #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
         # Jinja2 3.0.0+ causes error when building with Sphinx.
         #   See also https://issues.apache.org/jira/browse/SPARK-35375.
-        python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme ipython nbsphinx numpydoc 'jinja2<3.0.0'
-        python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8'
+        # Pin the MarkupSafe to 2.0.1 to resolve the CI error.
+        #   See also https://issues.apache.org/jira/browse/SPARK-38279.
+        python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme ipython nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1'
+        python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 
         apt-get update -y
         apt-get install -y ruby ruby-dev
         Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'markdown', 'e1071', 'roxygen2'), repos='https://cloud.r-project.org/')"

From 48b56c0ff6f1fc40c410fd8f7b78eeaa041cb664 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 22 Feb 2022 18:51:36 +0900
Subject: [PATCH 298/513] [SPARK-38278][PYTHON] Add SparkContext.addArchive in
 PySpark

### What changes were proposed in this pull request?

This PR proposes to add `SparkContext.addArchive` in PySpark side that's added in https://github.com/apache/spark/pull/30486.

### Why are the changes needed?

To have the same API parity with the Scala side.

### Does this PR introduce _any_ user-facing change?

Yes, this PR exposes an API (`SparkContext.addArchive`) that exists in Scala side.

### How was this patch tested?

Doctest was added.

Closes #35603 from HyukjinKwon/python-addArchive.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Kousuke Saruta <sarutak@oss.nttdata.com>
---
 python/docs/source/reference/pyspark.rst |  1 +
 python/pyspark/context.py                | 44 ++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/python/docs/source/reference/pyspark.rst b/python/docs/source/reference/pyspark.rst
index 6d4d0b55477c1..bf4e66ee3353e 100644
--- a/python/docs/source/reference/pyspark.rst
+++ b/python/docs/source/reference/pyspark.rst
@@ -53,6 +53,7 @@ Spark Context APIs
 
     SparkContext.PACKAGE_EXTENSIONS
     SparkContext.accumulator
+    SparkContext.addArchive
     SparkContext.addFile
     SparkContext.addPyFile
     SparkContext.applicationId
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 68f748e68faad..2f1746b0a4346 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -1278,6 +1278,50 @@ def addPyFile(self, path: str) -> None:
 
         importlib.invalidate_caches()
 
+    def addArchive(self, path: str) -> None:
+        """
+        Add an archive to be downloaded with this Spark job on every node.
+        The `path` passed can be either a local file, a file in HDFS
+        (or other Hadoop-supported filesystems), or an HTTP, HTTPS or
+        FTP URI.
+
+        To access the file in Spark jobs, use :meth:`SparkFiles.get` with the
+        filename to find its download/unpacked location. The given path should
+        be one of .zip, .tar, .tar.gz, .tgz and .jar.
+
+        .. versionadded:: 3.3.0
+
+        Notes
+        -----
+        A path can be added only once. Subsequent additions of the same path are ignored.
+        This API is experimental.
+
+        Examples
+        --------
+        Creates a zipped file that contains a text file written '100'.
+
+        >>> import zipfile
+        >>> from pyspark import SparkFiles
+        >>> path = os.path.join(tempdir, "test.txt")
+        >>> zip_path = os.path.join(tempdir, "test.zip")
+        >>> with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipped:
+        ...     with open(path, "w") as f:
+        ...         _ = f.write("100")
+        ...     zipped.write(path, os.path.basename(path))
+        >>> sc.addArchive(zip_path)
+
+        Reads the '100' as an integer in the zipped file, and processes
+        it with the data in the RDD.
+
+        >>> def func(iterator):
+        ...    with open("%s/test.txt" % SparkFiles.get("test.zip")) as f:
+        ...        v = int(f.readline())
+        ...        return [x * int(v) for x in iterator]
+        >>> sc.parallelize([1, 2, 3, 4]).mapPartitions(func).collect()
+        [100, 200, 300, 400]
+        """
+        self._jsc.sc().addArchive(path)
+
     def setCheckpointDir(self, dirName: str) -> None:
         """
         Set the directory under which RDDs are going to be checkpointed. The

From ef818ed86ce41be55bd962a5c809974f957f8734 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Tue, 22 Feb 2022 19:12:02 +0800
Subject: [PATCH 299/513] [SPARK-38283][SQL] Test invalid datetime parsing
 under ANSI mode

### What changes were proposed in this pull request?

Run datetime-parsing-invalid.sql under ANSI mode in SQLQueryTestSuite for improving test coverage.

Also, we can simply set ANSI mode as off in DateFunctionsSuite, so that the test suite can pass after we set up a new test job with ANSI on.

### Why are the changes needed?

Improve test coverage and fix DateFunctionsSuite under ANSI mode.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

UT

Closes #35606 from gengliangwang/fixDateFuncSuite.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../inputs/ansi/datetime-parsing-invalid.sql  |   2 +
 .../ansi/datetime-parsing-invalid.sql.out     | 263 ++++++++++++++++++
 .../apache/spark/sql/DateFunctionsSuite.scala |   6 +-
 3 files changed, 270 insertions(+), 1 deletion(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/ansi/datetime-parsing-invalid.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out

diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/datetime-parsing-invalid.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/datetime-parsing-invalid.sql
new file mode 100644
index 0000000000000..70022f33337d4
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/datetime-parsing-invalid.sql
@@ -0,0 +1,2 @@
+--IMPORT datetime-parsing-invalid.sql
+
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out
new file mode 100644
index 0000000000000..e6dd07b5658f6
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out
@@ -0,0 +1,263 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 29
+
+
+-- !query
+select to_timestamp('294248', 'y')
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+long overflow
+
+
+-- !query
+select to_timestamp('1', 'yy')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkUpgradeException
+You may get a different result due to the upgrading of Spark 3.0: Fail to parse '1' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+
+
+-- !query
+select to_timestamp('-12', 'yy')
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text '-12' could not be parsed at index 0. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
+select to_timestamp('123', 'yy')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkUpgradeException
+You may get a different result due to the upgrading of Spark 3.0: Fail to parse '123' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+
+
+-- !query
+select to_timestamp('1', 'yyy')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkUpgradeException
+You may get a different result due to the upgrading of Spark 3.0: Fail to parse '1' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+
+
+-- !query
+select to_timestamp('1234567', 'yyyyyyy')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkUpgradeException
+You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+
+
+-- !query
+select to_timestamp('366', 'D')
+-- !query schema
+struct<>
+-- !query output
+java.time.DateTimeException
+Invalid date 'DayOfYear 366' as '1970' is not a leap year. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
+select to_timestamp('9', 'DD')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkUpgradeException
+You may get a different result due to the upgrading of Spark 3.0: Fail to parse '9' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+
+
+-- !query
+select to_timestamp('366', 'DD')
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text '366' could not be parsed, unparsed text found at index 2. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
+select to_timestamp('9', 'DDD')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkUpgradeException
+You may get a different result due to the upgrading of Spark 3.0: Fail to parse '9' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+
+
+-- !query
+select to_timestamp('99', 'DDD')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkUpgradeException
+You may get a different result due to the upgrading of Spark 3.0: Fail to parse '99' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+
+
+-- !query
+select to_timestamp('30-365', 'dd-DDD')
+-- !query schema
+struct<>
+-- !query output
+java.time.DateTimeException
+Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
+select to_timestamp('11-365', 'MM-DDD')
+-- !query schema
+struct<>
+-- !query output
+java.time.DateTimeException
+Conflict found: Field MonthOfYear 11 differs from MonthOfYear 12 derived from 1970-12-31. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
+select to_timestamp('2019-366', 'yyyy-DDD')
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text '2019-366' could not be parsed: Invalid date 'DayOfYear 366' as '2019' is not a leap year. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
+select to_timestamp('12-30-365', 'MM-dd-DDD')
+-- !query schema
+struct<>
+-- !query output
+java.time.DateTimeException
+Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
+select to_timestamp('2020-01-365', 'yyyy-dd-DDD')
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text '2020-01-365' could not be parsed: Conflict found: Field DayOfMonth 30 differs from DayOfMonth 1 derived from 2020-12-30. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
+select to_timestamp('2020-10-350', 'yyyy-MM-DDD')
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text '2020-10-350' could not be parsed: Conflict found: Field MonthOfYear 12 differs from MonthOfYear 10 derived from 2020-12-15. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
+select to_timestamp('2020-11-31-366', 'yyyy-MM-dd-DDD')
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text '2020-11-31-366' could not be parsed: Invalid date 'NOVEMBER 31'. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
+select from_csv('2018-366', 'date Date', map('dateFormat', 'yyyy-DDD'))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkUpgradeException
+You may get a different result due to the upgrading of Spark 3.0: Fail to parse '2018-366' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+
+
+-- !query
+select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text '2020-01-27T20:06:11.847' could not be parsed at index 10. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
+select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text 'Unparseable' could not be parsed at index 0. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
+select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text '2020-01-27T20:06:11.847' could not be parsed at index 10. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
+select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text 'Unparseable' could not be parsed at index 0. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
+select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text '2020-01-27T20:06:11.847' could not be parsed at index 10. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
+select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text 'Unparseable' could not be parsed at index 0. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
+select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text '2020-01-27T20:06:11.847' could not be parsed at index 10. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
+select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text 'Unparseable' could not be parsed at index 0. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
+select cast("Unparseable" as timestamp)
+-- !query schema
+struct<>
+-- !query output
+java.time.DateTimeException
+Cannot cast Unparseable to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
+select cast("Unparseable" as date)
+-- !query schema
+struct<>
+-- !query output
+java.time.DateTimeException
+Cannot cast Unparseable to DateType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index 543f845aff735..762bc15b4791e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -23,7 +23,7 @@ import java.time.{Instant, LocalDateTime, ZoneId}
 import java.util.{Locale, TimeZone}
 import java.util.concurrent.TimeUnit
 
-import org.apache.spark.{SparkException, SparkUpgradeException}
+import org.apache.spark.{SparkConf, SparkException, SparkUpgradeException}
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{CEST, LA}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.functions._
@@ -35,6 +35,10 @@ import org.apache.spark.unsafe.types.CalendarInterval
 class DateFunctionsSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
 
+  // The test cases which throw exceptions under ANSI mode are covered by date.sql and
+  // datetime-parsing-invalid.sql in org.apache.spark.sql.SQLQueryTestSuite.
+  override def sparkConf: SparkConf = super.sparkConf.set(SQLConf.ANSI_ENABLED.key, "false")
+
   test("function current_date") {
     val df1 = Seq((1, 2), (3, 1)).toDF("a", "b")
     val d0 = DateTimeUtils.currentDate(ZoneId.systemDefault())

From c82e0fe8de67cf4eef33184f6cb0a7eb9461c9f4 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Tue, 22 Feb 2022 12:29:05 +0100
Subject: [PATCH 300/513] [SPARK-37422][PYTHON][MLLIB] Inline typehints for
 pyspark.mllib.feature

### What changes were proposed in this pull request?

This PR migrates type `pyspark.mllib.feature` annotations from stub file to inline type hints.

### Why are the changes needed?

Part of ongoing migration of type hints.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing tests.

Closes #35546 from zero323/SPARK-37422.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/mllib/feature.py  | 218 ++++++++++++++++++++++---------
 python/pyspark/mllib/feature.pyi | 169 ------------------------
 2 files changed, 155 insertions(+), 232 deletions(-)
 delete mode 100644 python/pyspark/mllib/feature.pyi

diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index 320ba0029a0c8..c17bb8c15723f 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -20,6 +20,8 @@
 """
 import sys
 import warnings
+from typing import Dict, Hashable, Iterable, List, Optional, Tuple, Union, overload, TYPE_CHECKING
+
 from py4j.protocol import Py4JJavaError
 
 from pyspark import since
@@ -28,6 +30,15 @@
 from pyspark.mllib.linalg import Vectors, _convert_to_vector
 from pyspark.mllib.util import JavaLoader, JavaSaveable
 
+from pyspark.context import SparkContext
+from pyspark.mllib.linalg import Vector
+from pyspark.mllib.regression import LabeledPoint
+from py4j.java_collections import JavaMap
+
+if TYPE_CHECKING:
+    from pyspark.mllib._typing import VectorLike
+    from py4j.java_collections import JavaMap  # type: ignore[import]
+
 __all__ = [
     "Normalizer",
     "StandardScalerModel",
@@ -48,7 +59,17 @@ class VectorTransformer:
     Base class for transformation of a vector or RDD of vector
     """
 
-    def transform(self, vector):
+    @overload
+    def transform(self, vector: "VectorLike") -> Vector:
+        ...
+
+    @overload
+    def transform(self, vector: RDD["VectorLike"]) -> RDD[Vector]:
+        ...
+
+    def transform(
+        self, vector: Union["VectorLike", RDD["VectorLike"]]
+    ) -> Union[Vector, RDD[Vector]]:
         """
         Applies transformation on a vector.
 
@@ -94,11 +115,21 @@ class Normalizer(VectorTransformer):
     DenseVector([0.0, 0.5, 1.0])
     """
 
-    def __init__(self, p=2.0):
+    def __init__(self, p: float = 2.0):
         assert p >= 1.0, "p should be greater than 1.0"
         self.p = float(p)
 
-    def transform(self, vector):
+    @overload
+    def transform(self, vector: "VectorLike") -> Vector:
+        ...
+
+    @overload
+    def transform(self, vector: RDD["VectorLike"]) -> RDD[Vector]:
+        ...
+
+    def transform(
+        self, vector: Union["VectorLike", RDD["VectorLike"]]
+    ) -> Union[Vector, RDD[Vector]]:
         """
         Applies unit length normalization on a vector.
 
@@ -127,7 +158,17 @@ class JavaVectorTransformer(JavaModelWrapper, VectorTransformer):
     Wrapper for the model in JVM
     """
 
-    def transform(self, vector):
+    @overload
+    def transform(self, vector: "VectorLike") -> Vector:
+        ...
+
+    @overload
+    def transform(self, vector: RDD["VectorLike"]) -> RDD[Vector]:
+        ...
+
+    def transform(
+        self, vector: Union["VectorLike", RDD["VectorLike"]]
+    ) -> Union[Vector, RDD[Vector]]:
         """
         Applies transformation on a vector or an RDD[Vector].
 
@@ -156,7 +197,17 @@ class StandardScalerModel(JavaVectorTransformer):
     .. versionadded:: 1.2.0
     """
 
-    def transform(self, vector):
+    @overload
+    def transform(self, vector: "VectorLike") -> Vector:
+        ...
+
+    @overload
+    def transform(self, vector: RDD["VectorLike"]) -> RDD[Vector]:
+        ...
+
+    def transform(
+        self, vector: Union["VectorLike", RDD["VectorLike"]]
+    ) -> Union[Vector, RDD[Vector]]:
         """
         Applies standardization transformation on a vector.
 
@@ -183,7 +234,7 @@ def transform(self, vector):
         return JavaVectorTransformer.transform(self, vector)
 
     @since("1.4.0")
-    def setWithMean(self, withMean):
+    def setWithMean(self, withMean: bool) -> "StandardScalerModel":
         """
         Setter of the boolean which decides
         whether it uses mean or not
@@ -192,7 +243,7 @@ def setWithMean(self, withMean):
         return self
 
     @since("1.4.0")
-    def setWithStd(self, withStd):
+    def setWithStd(self, withStd: bool) -> "StandardScalerModel":
         """
         Setter of the boolean which decides
         whether it uses std or not
@@ -200,33 +251,33 @@ def setWithStd(self, withStd):
         self.call("setWithStd", withStd)
         return self
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def withStd(self):
+    def withStd(self) -> bool:
         """
         Returns if the model scales the data to unit standard deviation.
         """
         return self.call("withStd")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def withMean(self):
+    def withMean(self) -> bool:
         """
         Returns if the model centers the data before scaling.
         """
         return self.call("withMean")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def std(self):
+    def std(self) -> Vector:
         """
         Return the column standard deviation values.
         """
         return self.call("std")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def mean(self):
+    def mean(self) -> Vector:
         """
         Return the column mean values.
         """
@@ -271,13 +322,13 @@ class StandardScaler:
     True
     """
 
-    def __init__(self, withMean=False, withStd=True):
+    def __init__(self, withMean: bool = False, withStd: bool = True):
         if not (withMean or withStd):
             warnings.warn("Both withMean and withStd are false. The model does nothing.")
         self.withMean = withMean
         self.withStd = withStd
 
-    def fit(self, dataset):
+    def fit(self, dataset: RDD["VectorLike"]) -> "StandardScalerModel":
         """
         Computes the mean and variance and stores as a model to be used
         for later scaling.
@@ -306,7 +357,17 @@ class ChiSqSelectorModel(JavaVectorTransformer):
     .. versionadded:: 1.4.0
     """
 
-    def transform(self, vector):
+    @overload
+    def transform(self, vector: "VectorLike") -> Vector:
+        ...
+
+    @overload
+    def transform(self, vector: RDD["VectorLike"]) -> RDD[Vector]:
+        ...
+
+    def transform(
+        self, vector: Union["VectorLike", RDD["VectorLike"]]
+    ) -> Union[Vector, RDD[Vector]]:
         """
         Applies transformation on a vector.
 
@@ -379,12 +440,12 @@ class ChiSqSelector:
 
     def __init__(
         self,
-        numTopFeatures=50,
-        selectorType="numTopFeatures",
-        percentile=0.1,
-        fpr=0.05,
-        fdr=0.05,
-        fwe=0.05,
+        numTopFeatures: int = 50,
+        selectorType: str = "numTopFeatures",
+        percentile: float = 0.1,
+        fpr: float = 0.05,
+        fdr: float = 0.05,
+        fwe: float = 0.05,
     ):
         self.numTopFeatures = numTopFeatures
         self.selectorType = selectorType
@@ -394,7 +455,7 @@ def __init__(
         self.fwe = fwe
 
     @since("2.1.0")
-    def setNumTopFeatures(self, numTopFeatures):
+    def setNumTopFeatures(self, numTopFeatures: int) -> "ChiSqSelector":
         """
         set numTopFeature for feature selection by number of top features.
         Only applicable when selectorType = "numTopFeatures".
@@ -403,7 +464,7 @@ def setNumTopFeatures(self, numTopFeatures):
         return self
 
     @since("2.1.0")
-    def setPercentile(self, percentile):
+    def setPercentile(self, percentile: float) -> "ChiSqSelector":
         """
         set percentile [0.0, 1.0] for feature selection by percentile.
         Only applicable when selectorType = "percentile".
@@ -412,7 +473,7 @@ def setPercentile(self, percentile):
         return self
 
     @since("2.1.0")
-    def setFpr(self, fpr):
+    def setFpr(self, fpr: float) -> "ChiSqSelector":
         """
         set FPR [0.0, 1.0] for feature selection by FPR.
         Only applicable when selectorType = "fpr".
@@ -421,7 +482,7 @@ def setFpr(self, fpr):
         return self
 
     @since("2.2.0")
-    def setFdr(self, fdr):
+    def setFdr(self, fdr: float) -> "ChiSqSelector":
         """
         set FDR [0.0, 1.0] for feature selection by FDR.
         Only applicable when selectorType = "fdr".
@@ -430,7 +491,7 @@ def setFdr(self, fdr):
         return self
 
     @since("2.2.0")
-    def setFwe(self, fwe):
+    def setFwe(self, fwe: float) -> "ChiSqSelector":
         """
         set FWE [0.0, 1.0] for feature selection by FWE.
         Only applicable when selectorType = "fwe".
@@ -439,7 +500,7 @@ def setFwe(self, fwe):
         return self
 
     @since("2.1.0")
-    def setSelectorType(self, selectorType):
+    def setSelectorType(self, selectorType: str) -> "ChiSqSelector":
         """
         set the selector type of the ChisqSelector.
         Supported options: "numTopFeatures" (default), "percentile", "fpr", "fdr", "fwe".
@@ -447,7 +508,7 @@ def setSelectorType(self, selectorType):
         self.selectorType = str(selectorType)
         return self
 
-    def fit(self, data):
+    def fit(self, data: RDD[LabeledPoint]) -> "ChiSqSelectorModel":
         """
         Returns a ChiSquared feature selector.
 
@@ -500,7 +561,7 @@ class PCA:
     -4.013...
     """
 
-    def __init__(self, k):
+    def __init__(self, k: int):
         """
         Parameters
         ----------
@@ -509,7 +570,7 @@ def __init__(self, k):
         """
         self.k = int(k)
 
-    def fit(self, data):
+    def fit(self, data: RDD["VectorLike"]) -> PCAModel:
         """
         Computes a [[PCAModel]] that contains the principal components of the input vectors.
 
@@ -548,12 +609,12 @@ class HashingTF:
     SparseVector(100, {...})
     """
 
-    def __init__(self, numFeatures=1 << 20):
+    def __init__(self, numFeatures: int = 1 << 20):
         self.numFeatures = numFeatures
         self.binary = False
 
     @since("2.0.0")
-    def setBinary(self, value):
+    def setBinary(self, value: bool) -> "HashingTF":
         """
         If True, term frequency vector will be binary such that non-zero
         term counts will be set to 1
@@ -563,12 +624,22 @@ def setBinary(self, value):
         return self
 
     @since("1.2.0")
-    def indexOf(self, term):
+    def indexOf(self, term: Hashable) -> int:
         """Returns the index of the input term."""
         return hash(term) % self.numFeatures
 
+    @overload
+    def transform(self, document: Iterable[Hashable]) -> Vector:
+        ...
+
+    @overload
+    def transform(self, document: RDD[Iterable[Hashable]]) -> RDD[Vector]:
+        ...
+
     @since("1.2.0")
-    def transform(self, document):
+    def transform(
+        self, document: Union[Iterable[Hashable], RDD[Iterable[Hashable]]]
+    ) -> Union[Vector, RDD[Vector]]:
         """
         Transforms the input document (list of terms) to term frequency
         vectors, or transform the RDD of document to RDD of term
@@ -577,7 +648,7 @@ def transform(self, document):
         if isinstance(document, RDD):
             return document.map(self.transform)
 
-        freq = {}
+        freq: Dict[int, float] = {}
         for term in document:
             i = self.indexOf(term)
             freq[i] = 1.0 if self.binary else freq.get(i, 0) + 1.0
@@ -591,7 +662,15 @@ class IDFModel(JavaVectorTransformer):
     .. versionadded:: 1.2.0
     """
 
-    def transform(self, x):
+    @overload
+    def transform(self, x: "VectorLike") -> Vector:
+        ...
+
+    @overload
+    def transform(self, x: RDD["VectorLike"]) -> RDD[Vector]:
+        ...
+
+    def transform(self, x: Union["VectorLike", RDD["VectorLike"]]) -> Union[Vector, RDD[Vector]]:
         """
         Transforms term frequency (TF) vectors to TF-IDF vectors.
 
@@ -621,21 +700,21 @@ def transform(self, x):
         return JavaVectorTransformer.transform(self, x)
 
     @since("1.4.0")
-    def idf(self):
+    def idf(self) -> Vector:
         """
         Returns the current IDF vector.
         """
         return self.call("idf")
 
     @since("3.0.0")
-    def docFreq(self):
+    def docFreq(self) -> List[int]:
         """
         Returns the document frequency.
         """
         return self.call("docFreq")
 
     @since("3.0.0")
-    def numDocs(self):
+    def numDocs(self) -> int:
         """
         Returns number of documents evaluated to compute idf
         """
@@ -684,10 +763,10 @@ class IDF:
     SparseVector(4, {1: 0.0, 3: 0.5754})
     """
 
-    def __init__(self, minDocFreq=0):
+    def __init__(self, minDocFreq: int = 0):
         self.minDocFreq = minDocFreq
 
-    def fit(self, dataset):
+    def fit(self, dataset: RDD["VectorLike"]) -> IDFModel:
         """
         Computes the inverse document frequency.
 
@@ -704,12 +783,12 @@ def fit(self, dataset):
         return IDFModel(jmodel)
 
 
-class Word2VecModel(JavaVectorTransformer, JavaSaveable, JavaLoader):
+class Word2VecModel(JavaVectorTransformer, JavaSaveable, JavaLoader["Word2VecModel"]):
     """
     class for Word2Vec model
     """
 
-    def transform(self, word):
+    def transform(self, word: str) -> Vector:  # type: ignore[override]
         """
         Transforms a word to its vector representation
 
@@ -734,7 +813,7 @@ def transform(self, word):
         except Py4JJavaError:
             raise ValueError("%s not found" % word)
 
-    def findSynonyms(self, word, num):
+    def findSynonyms(self, word: Union[str, "VectorLike"], num: int) -> Iterable[Tuple[str, float]]:
         """
         Find synonyms of a word
 
@@ -763,7 +842,7 @@ def findSynonyms(self, word, num):
         return zip(words, similarity)
 
     @since("1.4.0")
-    def getVectors(self):
+    def getVectors(self) -> "JavaMap":
         """
         Returns a map of words to their vector representations.
         """
@@ -771,10 +850,12 @@ def getVectors(self):
 
     @classmethod
     @since("1.5.0")
-    def load(cls, sc, path):
+    def load(cls, sc: SparkContext, path: str) -> "Word2VecModel":
         """
         Load a model from the given path.
         """
+        assert sc._jvm is not None
+
         jmodel = sc._jvm.org.apache.spark.mllib.feature.Word2VecModel.load(sc._jsc.sc(), path)
         model = sc._jvm.org.apache.spark.mllib.api.python.Word2VecModelWrapper(jmodel)
         return Word2VecModel(model)
@@ -837,7 +918,7 @@ class Word2Vec:
     ...     pass
     """
 
-    def __init__(self):
+    def __init__(self) -> None:
         """
         Construct Word2Vec instance
         """
@@ -845,12 +926,12 @@ def __init__(self):
         self.learningRate = 0.025
         self.numPartitions = 1
         self.numIterations = 1
-        self.seed = None
+        self.seed: Optional[int] = None
         self.minCount = 5
         self.windowSize = 5
 
     @since("1.2.0")
-    def setVectorSize(self, vectorSize):
+    def setVectorSize(self, vectorSize: int) -> "Word2Vec":
         """
         Sets vector size (default: 100).
         """
@@ -858,7 +939,7 @@ def setVectorSize(self, vectorSize):
         return self
 
     @since("1.2.0")
-    def setLearningRate(self, learningRate):
+    def setLearningRate(self, learningRate: float) -> "Word2Vec":
         """
         Sets initial learning rate (default: 0.025).
         """
@@ -866,7 +947,7 @@ def setLearningRate(self, learningRate):
         return self
 
     @since("1.2.0")
-    def setNumPartitions(self, numPartitions):
+    def setNumPartitions(self, numPartitions: int) -> "Word2Vec":
         """
         Sets number of partitions (default: 1). Use a small number for
         accuracy.
@@ -875,7 +956,7 @@ def setNumPartitions(self, numPartitions):
         return self
 
     @since("1.2.0")
-    def setNumIterations(self, numIterations):
+    def setNumIterations(self, numIterations: int) -> "Word2Vec":
         """
         Sets number of iterations (default: 1), which should be smaller
         than or equal to number of partitions.
@@ -884,7 +965,7 @@ def setNumIterations(self, numIterations):
         return self
 
     @since("1.2.0")
-    def setSeed(self, seed):
+    def setSeed(self, seed: int) -> "Word2Vec":
         """
         Sets random seed.
         """
@@ -892,7 +973,7 @@ def setSeed(self, seed):
         return self
 
     @since("1.4.0")
-    def setMinCount(self, minCount):
+    def setMinCount(self, minCount: int) -> "Word2Vec":
         """
         Sets minCount, the minimum number of times a token must appear
         to be included in the word2vec model's vocabulary (default: 5).
@@ -901,14 +982,14 @@ def setMinCount(self, minCount):
         return self
 
     @since("2.0.0")
-    def setWindowSize(self, windowSize):
+    def setWindowSize(self, windowSize: int) -> "Word2Vec":
         """
         Sets window size (default: 5).
         """
         self.windowSize = windowSize
         return self
 
-    def fit(self, data):
+    def fit(self, data: RDD[List[str]]) -> "Word2VecModel":
         """
         Computes the vector representation of each word in vocabulary.
 
@@ -959,13 +1040,24 @@ class ElementwiseProduct(VectorTransformer):
     [DenseVector([2.0, 2.0, 9.0]), DenseVector([9.0, 6.0, 12.0])]
     """
 
-    def __init__(self, scalingVector):
+    def __init__(self, scalingVector: Vector) -> None:
         self.scalingVector = _convert_to_vector(scalingVector)
 
-    @since("1.5.0")
-    def transform(self, vector):
+    @overload
+    def transform(self, vector: "VectorLike") -> Vector:
+        ...
+
+    @overload
+    def transform(self, vector: RDD["VectorLike"]) -> RDD[Vector]:
+        ...
+
+    def transform(
+        self, vector: Union["VectorLike", RDD["VectorLike"]]
+    ) -> Union[Vector, RDD[Vector]]:
         """
         Computes the Hadamard product of the vector.
+
+        .. versionadded:: 1.5.0
         """
         if isinstance(vector, RDD):
             vector = vector.map(_convert_to_vector)
@@ -975,7 +1067,7 @@ def transform(self, vector):
         return callMLlibFunc("elementwiseProductVector", self.scalingVector, vector)
 
 
-def _test():
+def _test() -> None:
     import doctest
     from pyspark.sql import SparkSession
 
diff --git a/python/pyspark/mllib/feature.pyi b/python/pyspark/mllib/feature.pyi
deleted file mode 100644
index e7ab7fc81a8ff..0000000000000
--- a/python/pyspark/mllib/feature.pyi
+++ /dev/null
@@ -1,169 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import overload
-from typing import Iterable, Hashable, List, Tuple, Union
-
-from pyspark.mllib._typing import VectorLike
-from pyspark.context import SparkContext
-from pyspark.rdd import RDD
-from pyspark.mllib.common import JavaModelWrapper
-from pyspark.mllib.linalg import Vector
-from pyspark.mllib.regression import LabeledPoint
-from pyspark.mllib.util import JavaLoader, JavaSaveable
-
-from py4j.java_collections import JavaMap  # type: ignore[import]
-
-class VectorTransformer:
-    @overload
-    def transform(self, vector: VectorLike) -> Vector: ...
-    @overload
-    def transform(self, vector: RDD[VectorLike]) -> RDD[Vector]: ...
-
-class Normalizer(VectorTransformer):
-    p: float
-    def __init__(self, p: float = ...) -> None: ...
-    @overload
-    def transform(self, vector: VectorLike) -> Vector: ...
-    @overload
-    def transform(self, vector: RDD[VectorLike]) -> RDD[Vector]: ...
-
-class JavaVectorTransformer(JavaModelWrapper, VectorTransformer):
-    @overload
-    def transform(self, vector: VectorLike) -> Vector: ...
-    @overload
-    def transform(self, vector: RDD[VectorLike]) -> RDD[Vector]: ...
-
-class StandardScalerModel(JavaVectorTransformer):
-    @overload
-    def transform(self, vector: VectorLike) -> Vector: ...
-    @overload
-    def transform(self, vector: RDD[VectorLike]) -> RDD[Vector]: ...
-    def setWithMean(self, withMean: bool) -> StandardScalerModel: ...
-    def setWithStd(self, withStd: bool) -> StandardScalerModel: ...
-    @property
-    def withStd(self) -> bool: ...
-    @property
-    def withMean(self) -> bool: ...
-    @property
-    def std(self) -> Vector: ...
-    @property
-    def mean(self) -> Vector: ...
-
-class StandardScaler:
-    withMean: bool
-    withStd: bool
-    def __init__(self, withMean: bool = ..., withStd: bool = ...) -> None: ...
-    def fit(self, dataset: RDD[VectorLike]) -> StandardScalerModel: ...
-
-class ChiSqSelectorModel(JavaVectorTransformer):
-    @overload
-    def transform(self, vector: VectorLike) -> Vector: ...
-    @overload
-    def transform(self, vector: RDD[VectorLike]) -> RDD[Vector]: ...
-
-class ChiSqSelector:
-    numTopFeatures: int
-    selectorType: str
-    percentile: float
-    fpr: float
-    fdr: float
-    fwe: float
-    def __init__(
-        self,
-        numTopFeatures: int = ...,
-        selectorType: str = ...,
-        percentile: float = ...,
-        fpr: float = ...,
-        fdr: float = ...,
-        fwe: float = ...,
-    ) -> None: ...
-    def setNumTopFeatures(self, numTopFeatures: int) -> ChiSqSelector: ...
-    def setPercentile(self, percentile: float) -> ChiSqSelector: ...
-    def setFpr(self, fpr: float) -> ChiSqSelector: ...
-    def setFdr(self, fdr: float) -> ChiSqSelector: ...
-    def setFwe(self, fwe: float) -> ChiSqSelector: ...
-    def setSelectorType(self, selectorType: str) -> ChiSqSelector: ...
-    def fit(self, data: RDD[LabeledPoint]) -> ChiSqSelectorModel: ...
-
-class PCAModel(JavaVectorTransformer): ...
-
-class PCA:
-    k: int
-    def __init__(self, k: int) -> None: ...
-    def fit(self, data: RDD[VectorLike]) -> PCAModel: ...
-
-class HashingTF:
-    numFeatures: int
-    binary: bool
-    def __init__(self, numFeatures: int = ...) -> None: ...
-    def setBinary(self, value: bool) -> HashingTF: ...
-    def indexOf(self, term: Hashable) -> int: ...
-    @overload
-    def transform(self, document: Iterable[Hashable]) -> Vector: ...
-    @overload
-    def transform(self, document: RDD[Iterable[Hashable]]) -> RDD[Vector]: ...
-
-class IDFModel(JavaVectorTransformer):
-    @overload
-    def transform(self, x: VectorLike) -> Vector: ...
-    @overload
-    def transform(self, x: RDD[VectorLike]) -> RDD[Vector]: ...
-    def idf(self) -> Vector: ...
-    def docFreq(self) -> List[int]: ...
-    def numDocs(self) -> int: ...
-
-class IDF:
-    minDocFreq: int
-    def __init__(self, minDocFreq: int = ...) -> None: ...
-    def fit(self, dataset: RDD[VectorLike]) -> IDFModel: ...
-
-class Word2VecModel(JavaVectorTransformer, JavaSaveable, JavaLoader[Word2VecModel]):
-    def transform(self, word: str) -> Vector: ...  # type: ignore
-    def findSynonyms(
-        self, word: Union[str, VectorLike], num: int
-    ) -> Iterable[Tuple[str, float]]: ...
-    def getVectors(self) -> JavaMap: ...
-    @classmethod
-    def load(cls, sc: SparkContext, path: str) -> Word2VecModel: ...
-
-class Word2Vec:
-    vectorSize: int
-    learningRate: float
-    numPartitions: int
-    numIterations: int
-    seed: int
-    minCount: int
-    windowSize: int
-    def __init__(self) -> None: ...
-    def setVectorSize(self, vectorSize: int) -> Word2Vec: ...
-    def setLearningRate(self, learningRate: float) -> Word2Vec: ...
-    def setNumPartitions(self, numPartitions: int) -> Word2Vec: ...
-    def setNumIterations(self, numIterations: int) -> Word2Vec: ...
-    def setSeed(self, seed: int) -> Word2Vec: ...
-    def setMinCount(self, minCount: int) -> Word2Vec: ...
-    def setWindowSize(self, windowSize: int) -> Word2Vec: ...
-    def fit(self, data: RDD[List[str]]) -> Word2VecModel: ...
-
-class ElementwiseProduct(VectorTransformer):
-    scalingVector: Vector
-    def __init__(self, scalingVector: Vector) -> None: ...
-    @overload
-    def transform(self, vector: VectorLike) -> Vector: ...
-    @overload
-    def transform(self, vector: RDD[VectorLike]) -> RDD[Vector]: ...

From b68327968a7a5f7ac1afa9cc270204c9eaddcb75 Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@foxmail.com>
Date: Tue, 22 Feb 2022 21:04:43 +0800
Subject: [PATCH 301/513] [SPARK-38271] PoissonSampler may output more rows
 than MaxRows

### What changes were proposed in this pull request?
when `replacement=true`, `Sample.maxRows` returns `None`

### Why are the changes needed?
the underlying impl of `SampleExec` can not guarantee that its number of output rows <= `Sample.maxRows`

```
scala> val df = spark.range(0, 1000)
df: org.apache.spark.sql.Dataset[Long] = [id: bigint]

scala> df.count
res0: Long = 1000

scala> df.sample(true, 0.999999, 10).count
res1: Long = 1004
```

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
existing testsuites

Closes #35593 from zhengruifeng/fix_sample_maxRows.

Authored-by: Ruifeng Zheng <ruifengz@foxmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../plans/logical/basicLogicalOperators.scala       |  6 +++++-
 .../catalyst/optimizer/CombiningLimitsSuite.scala   | 13 +++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 21e150ad8413c..3283a4dee86bc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -1346,7 +1346,11 @@ case class Sample(
       s"Sampling fraction ($fraction) must be on interval [0, 1] without replacement")
   }
 
-  override def maxRows: Option[Long] = child.maxRows
+  override def maxRows: Option[Long] = {
+    // when withReplacement is true, PoissonSampler is applied in SampleExec,
+    // which may output more rows than child.maxRows.
+    if (withReplacement) None else child.maxRows
+  }
   override def output: Seq[Attribute] = child.output
 
   override protected def withNewChildInternal(newChild: LogicalPlan): Sample =
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
index 46e9dea730eb7..d3cbaa8c41e2d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
@@ -159,6 +159,19 @@ class CombiningLimitsSuite extends PlanTest {
     )
   }
 
+  test("SPARK-38271: PoissonSampler may output more rows than child.maxRows") {
+    val query = testRelation.select().sample(0, 0.2, true, 1)
+    assert(query.maxRows.isEmpty)
+    val optimized = Optimize.execute(query.analyze)
+    assert(optimized.maxRows.isEmpty)
+    // can not eliminate Limit since Sample.maxRows is None
+    checkPlanAndMaxRow(
+      query.limit(10),
+      query.limit(10),
+      10
+    )
+  }
+
   test("SPARK-33497: Eliminate Limit if Deduplicate max rows not larger than Limit") {
     checkPlanAndMaxRow(
       testRelation.deduplicate("a".attr).limit(10),

From 43822cdd228a3ba49c47637c525d731d00772f64 Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove73@gmail.com>
Date: Tue, 22 Feb 2022 08:42:47 -0600
Subject: [PATCH 302/513] [SPARK-38060][SQL] Respect allowNonNumericNumbers
 when parsing quoted NaN and Infinity values in JSON reader

Signed-off-by: Andy Grove <andygrove73gmail.com>

### What changes were proposed in this pull request?

When parsing JSON unquoted `NaN` and `Infinity`values for floating-point columns we get the expected behavior as shown below where valid values are returned when the parsing option `allowNonNumericNumbers` is enabled and `null` otherwise.

| Value     | allowNonNumericNumbers=true | allowNonNumericNumbers=false |
| --------- | --------------------------- | ---------------------------- |
| NaN       | Double.NaN                  | null                         |
| +INF      | Double.PositiveInfinity     | null                         |
| +Infinity | Double.PositiveInfinity     | null                         |
| Infinity  | Double.PositiveInfinity     | null                         |
| -INF      | Double.NegativeInfinity     | null                         |
| -Infinity | Double.NegativeInfinity     | null                         |

However, when these values are quoted we get the following unexpected behavior due to a different code path being used that is inconsistent with Jackson's parsing and that ignores the `allowNonNumericNumbers` parser option.

| Value       | allowNonNumericNumbers=true | allowNonNumericNumbers=false |
| ----------- | --------------------------- | ---------------------------- |
| "NaN"       | Double.NaN                  | Double.NaN                   |
| "+INF"      | null                        | null                         |
| "+Infinity" | null                        | null                         |
| "Infinity"  | Double.PositiveInfinity     | Double.PositiveInfinity      |
| "-INF"      | null                        | null                         |
| "-Infinity" | Double.NegativeInfinity     | Double.NegativeInfinity      |

This PR updates the code path that handles quoted non-numeric numbers to make it consistent with the path that handles the unquoted values.

### Why are the changes needed?

The current behavior does not match the documented behavior in https://spark.apache.org/docs/latest/sql-data-sources-json.html

### Does this PR introduce _any_ user-facing change?

Yes, parsing of quoted `NaN` and `Infinity` values will now be consistent with the unquoted versions.

### How was this patch tested?

Unit tests are updated.

Closes #35573 from andygrove/SPARK-38060.

Authored-by: Andy Grove <andygrove73@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 docs/core-migration-guide.md                  |  2 +
 .../sql/catalyst/json/JacksonParser.scala     | 18 ++++++---
 .../json/JsonParsingOptionsSuite.scala        | 39 +++++++++++++++++++
 .../datasources/json/JsonSuite.scala          |  6 +++
 4 files changed, 59 insertions(+), 6 deletions(-)

diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md
index 745b80d6eecb2..588433c36444d 100644
--- a/docs/core-migration-guide.md
+++ b/docs/core-migration-guide.md
@@ -26,6 +26,8 @@ license: |
 
 - Since Spark 3.3, Spark migrates its log4j dependency from 1.x to 2.x because log4j 1.x has reached end of life and is no longer supported by the community. Vulnerabilities reported after August 2015 against log4j 1.x were not checked and will not be fixed. Users should rewrite original log4j properties files using log4j2 syntax (XML, JSON, YAML, or properties format). Spark rewrites the `conf/log4j.properties.template` which is included in Spark distribution, to `conf/log4j2.properties.template` with log4j2 properties format.
 
+- Since Spark 3.3, when reading values from a JSON attribute defined as `FloatType` or `DoubleType`, the strings `"+Infinity"`, `"+INF"`, and `"-INF"` are now parsed to the appropriate values, in addition to the already supported `"Infinity"` and `"-Infinity"` variations. This change was made to improve consistency with Jackson's parsing of the unquoted versions of these values. Also, the `allowNonNumericNumbers` option is now respected so these strings will now be considered invalid if this option is disabled.
+
 ## Upgrading from Core 3.1 to 3.2
 
 - Since Spark 3.2, `spark.scheduler.allocation.file` supports read remote file using hadoop filesystem which means if the path has no scheme Spark will respect hadoop configuration to read it. To restore the behavior before Spark 3.2, you can specify the local scheme for `spark.scheduler.allocation.file` e.g. `file:///path/to/file`.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index a1f9487fe2e08..abcbdb83813b0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -204,9 +204,12 @@ class JacksonParser(
         case VALUE_STRING if parser.getTextLength >= 1 =>
           // Special case handling for NaN and Infinity.
           parser.getText match {
-            case "NaN" => Float.NaN
-            case "Infinity" => Float.PositiveInfinity
-            case "-Infinity" => Float.NegativeInfinity
+            case "NaN" if options.allowNonNumericNumbers =>
+              Float.NaN
+            case "+INF" | "+Infinity" | "Infinity" if options.allowNonNumericNumbers =>
+              Float.PositiveInfinity
+            case "-INF" | "-Infinity" if options.allowNonNumericNumbers =>
+              Float.NegativeInfinity
             case _ => throw QueryExecutionErrors.cannotParseStringAsDataTypeError(
               parser, VALUE_STRING, FloatType)
           }
@@ -220,9 +223,12 @@ class JacksonParser(
         case VALUE_STRING if parser.getTextLength >= 1 =>
           // Special case handling for NaN and Infinity.
           parser.getText match {
-            case "NaN" => Double.NaN
-            case "Infinity" => Double.PositiveInfinity
-            case "-Infinity" => Double.NegativeInfinity
+            case "NaN" if options.allowNonNumericNumbers =>
+              Double.NaN
+            case "+INF" | "+Infinity" | "Infinity" if options.allowNonNumericNumbers =>
+              Double.PositiveInfinity
+            case "-INF" | "-Infinity" if options.allowNonNumericNumbers =>
+              Double.NegativeInfinity
             case _ => throw QueryExecutionErrors.cannotParseStringAsDataTypeError(
               parser, VALUE_STRING, DoubleType)
           }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
index e9fe79a0641b9..703085dca66f1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
@@ -130,6 +130,45 @@ class JsonParsingOptionsSuite extends QueryTest with SharedSparkSession {
         Double.NegativeInfinity, Double.NegativeInfinity))
   }
 
+  test("allowNonNumericNumbers on - quoted") {
+    val str =
+      """{"c0":"NaN", "c1":"+INF", "c2":"+Infinity", "c3":"Infinity", "c4":"-INF",
+        |"c5":"-Infinity"}""".stripMargin
+    val df = spark.read
+      .schema(new StructType()
+        .add("c0", "double")
+        .add("c1", "double")
+        .add("c2", "double")
+        .add("c3", "double")
+        .add("c4", "double")
+        .add("c5", "double"))
+      .option("allowNonNumericNumbers", true).json(Seq(str).toDS())
+    checkAnswer(
+      df,
+      Row(
+        Double.NaN,
+        Double.PositiveInfinity, Double.PositiveInfinity, Double.PositiveInfinity,
+        Double.NegativeInfinity, Double.NegativeInfinity))
+  }
+
+  test("allowNonNumericNumbers off - quoted") {
+    val str =
+      """{"c0":"NaN", "c1":"+INF", "c2":"+Infinity", "c3":"Infinity", "c4":"-INF",
+        |"c5":"-Infinity"}""".stripMargin
+    val df = spark.read
+      .schema(new StructType()
+        .add("c0", "double")
+        .add("c1", "double")
+        .add("c2", "double")
+        .add("c3", "double")
+        .add("c4", "double")
+        .add("c5", "double"))
+      .option("allowNonNumericNumbers", false).json(Seq(str).toDS())
+    checkAnswer(
+      df,
+      Row(null, null, null, null, null, null))
+  }
+
   test("allowBackslashEscapingAnyCharacter off") {
     val str = """{"name": "Cazen Lee", "price": "\$10"}"""
     val df = spark.read.option("allowBackslashEscapingAnyCharacter", "false").json(Seq(str).toDS())
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 3daad301c23b1..bd01975ce9608 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -2021,13 +2021,19 @@ abstract class JsonSuite
   test("SPARK-18772: Parse special floats correctly") {
     val jsons = Seq(
       """{"a": "NaN"}""",
+      """{"a": "+INF"}""",
+      """{"a": "-INF"}""",
       """{"a": "Infinity"}""",
+      """{"a": "+Infinity"}""",
       """{"a": "-Infinity"}""")
 
     // positive cases
     val checks: Seq[Double => Boolean] = Seq(
       _.isNaN,
       _.isPosInfinity,
+      _.isNegInfinity,
+      _.isPosInfinity,
+      _.isPosInfinity,
       _.isNegInfinity)
 
     Seq(FloatType, DoubleType).foreach { dt =>

From bd4461119c36577a958bdbdd8e280c1b620c7e93 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Wed, 23 Feb 2022 00:51:22 +0800
Subject: [PATCH 303/513] [SPARK-38290][SQL] Fix JsonSuite and ParquetIOSuite
 under ANSI mode

### What changes were proposed in this pull request?

Fix  all the data source test failures with ANSI mode on:

- JsonLegacyTimeParserSuite
- JsonV1Suite
- JsonV2Suite
- ParquetIOSuite

### Why are the changes needed?

For setting up a new GA test job with ANSI mode on

### Does this PR introduce _any_ user-facing change?

No
### How was this patch tested?

Manually turn on ANSI mode and run tests

Closes #35611 from gengliangwang/fixJsonSuite.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../datasources/json/JsonSuite.scala          | 39 +++++++++++--------
 .../datasources/parquet/ParquetIOSuite.scala  |  7 +++-
 2 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index bd01975ce9608..132b8f9be9dcc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -59,6 +59,9 @@ abstract class JsonSuite
 
   override protected def dataSourceFormat = "json"
 
+  override protected def sparkConf: SparkConf =
+    super.sparkConf.set(SQLConf.ANSI_STRICT_INDEX_OPERATOR.key, "false")
+
   test("Type promotion") {
     def checkTypePromotion(expected: Any, actual: Any): Unit = {
       assert(expected.getClass == actual.getClass,
@@ -452,12 +455,6 @@ abstract class JsonSuite
           Row(null, 21474836570L, 1.1, 21474836470L, "92233720368547758070", null) :: Nil
       )
 
-      // Number and Boolean conflict: resolve the type as number in this query.
-      checkAnswer(
-        sql("select num_bool - 10 from jsonTable where num_bool > 11"),
-        Row(2)
-      )
-
       // Widening to LongType
       checkAnswer(
         sql("select num_num_1 - 100 from jsonTable where num_num_1 > 11"),
@@ -482,17 +479,27 @@ abstract class JsonSuite
         Row(101.2) :: Row(21474836471.2) :: Nil
       )
 
-      // Number and String conflict: resolve the type as number in this query.
-      checkAnswer(
-        sql("select num_str + 1.2 from jsonTable where num_str > 14d"),
-        Row(92233720368547758071.2)
-      )
+      // The following tests are about type coercion instead of JSON data source.
+      // Here we simply forcus on the behavior of non-Ansi.
+      if(!SQLConf.get.ansiEnabled) {
+        // Number and Boolean conflict: resolve the type as number in this query.
+        checkAnswer(
+          sql("select num_bool - 10 from jsonTable where num_bool > 11"),
+          Row(2)
+        )
 
-      // Number and String conflict: resolve the type as number in this query.
-      checkAnswer(
-        sql("select num_str + 1.2 from jsonTable where num_str >= 92233720368547758060"),
-        Row(new java.math.BigDecimal("92233720368547758071.2").doubleValue)
-      )
+        // Number and String conflict: resolve the type as number in this query.
+        checkAnswer(
+          sql("select num_str + 1.2 from jsonTable where num_str > 14d"),
+          Row(92233720368547758071.2)
+        )
+
+        // Number and String conflict: resolve the type as number in this query.
+        checkAnswer(
+          sql("select num_str + 1.2 from jsonTable where num_str >= 92233720368547758060"),
+          Row(new java.math.BigDecimal("92233720368547758071.2").doubleValue)
+        )
+      }
 
       // String and Boolean conflict: resolve the type as string.
       checkAnswer(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 1e2bb9104cfc0..c70ac8084a841 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -191,7 +191,12 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
         .coalesce(1)
     }
 
-    val combinations = Seq((5, 2), (1, 0), (1, 1), (18, 10), (18, 17), (19, 0), (38, 37))
+    var combinations = Seq((5, 2), (1, 0), (18, 10), (18, 17), (19, 0), (38, 37))
+    // If ANSI mode is on, the combination (1, 1) will cause a runtime error. Otherwise, the
+    // decimal RDD contains all null values and should be able to read back from Parquet.
+    if (!SQLConf.get.ansiEnabled) {
+      combinations = combinations++ Seq((1, 1))
+    }
     for ((precision, scale) <- combinations) {
       withTempPath { dir =>
         val data = makeDecimalRDD(DecimalType(precision, scale))

From 27dbf6fe67c81887ee656a69fc327f3cb5ae56f2 Mon Sep 17 00:00:00 2001
From: bjornjorgensen <bjornjorgensen@gmail.com>
Date: Tue, 22 Feb 2022 13:02:14 -0800
Subject: [PATCH 304/513] [SPARK-38291][BUILD][TESTS] Upgrade `postgresql` from
 42.3.0 to 42.3.3

### What changes were proposed in this pull request?
Upgrade Postgresql 42.3.0 to 42.3.3
[Postgresql changelog 42.3.3](https://jdbc.postgresql.org/documentation/changelog.html#version_42.3.3)

### Why are the changes needed?
[CVE-2022-21724](https://nvd.nist.gov/vuln/detail/CVE-2022-21724)
and
[Arbitrary File Write Vulnerability](https://github.com/advisories/GHSA-673j-qm5f-xpv8)

By upgrading postgresql from 42.3.0 to 42.3.3 we will resolve these issues.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
All test must pass.

Closes #35614 from bjornjorgensen/postgresql-from-42.3.0-to-42.3.3.

Authored-by: bjornjorgensen <bjornjorgensen@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 788cf8c95aaf7..23e567c5c9ae0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1181,7 +1181,7 @@
       <dependency>
         <groupId>org.postgresql</groupId>
         <artifactId>postgresql</artifactId>
-        <version>42.3.0</version>
+        <version>42.3.3</version>
         <scope>test</scope>
       </dependency>
       <dependency>

From a11f7994f56ecf68a9aeda5b424de17d82105d02 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 23 Feb 2022 09:16:56 +0900
Subject: [PATCH 305/513] [SPARK-38121][PYTHON][SQL][FOLLOW-UP] Make
 df.sparkSession return the session that created DataFrame when SQLContext is
 used

### What changes were proposed in this pull request?

This PR is a followup of https://github.com/apache/spark/pull/35410 that makes `df.sparkSession` return the session that created DataFrame when `SQLContext` is used.

### Why are the changes needed?

See https://github.com/apache/spark/pull/35410#discussion_r810431358

### Does this PR introduce _any_ user-facing change?

No to end users as it's not released yet. See also https://github.com/apache/spark/pull/35410#discussion_r810431358

### How was this patch tested?

```python
>>> from pyspark.sql import DataFrame
>>> DataFrame(sqlContext.range(1)._jdf, sqlContext).sparkSession
/.../spark/python/pyspark/sql/dataframe.py:127: UserWarning: DataFrame constructor is internal. Do not directly use it.
  warnings.warn("DataFrame constructor is internal. Do not directly use it.")
<pyspark.sql.session.SparkSession object at 0x7fa7483972b0>
```

Closes #35575 from HyukjinKwon/SPARK-38121-followup2.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/dataframe.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 4b778bdc2005a..76c407624ac7d 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -116,7 +116,6 @@ def __init__(
     ):
         from pyspark.sql.context import SQLContext
 
-        self._session: Optional["SparkSession"] = None
         self._sql_ctx: Optional["SQLContext"] = None
 
         if isinstance(sql_ctx, SQLContext):
@@ -127,8 +126,10 @@ def __init__(
             # was kept with an warning because it's used intensively by third-party libraries.
             warnings.warn("DataFrame constructor is internal. Do not directly use it.")
             self._sql_ctx = sql_ctx
+            session = sql_ctx.sparkSession
         else:
-            self._session = sql_ctx
+            session = sql_ctx
+        self._session: "SparkSession" = session
 
         self._sc: SparkContext = sql_ctx._sc
         self._jdf: JavaObject = jdf
@@ -152,7 +153,7 @@ def sql_ctx(self) -> "SQLContext":
             self._sql_ctx = SQLContext._get_or_create(self._sc)
         return self._sql_ctx
 
-    @property  # type: ignore[misc]
+    @property
     def sparkSession(self) -> "SparkSession":
         """Returns Spark session that created this :class:`DataFrame`.
 
@@ -164,10 +165,6 @@ def sparkSession(self) -> "SparkSession":
         >>> type(df.sparkSession)
         <class 'pyspark.sql.session.SparkSession'>
         """
-        from pyspark.sql.session import SparkSession
-
-        if self._session is None:
-            self._session = SparkSession._getActiveSessionOrCreate()
         return self._session
 
     @property  # type: ignore[misc]

From 4d75d47056f2afd5efbeaabad942a1d3d1528bfd Mon Sep 17 00:00:00 2001
From: Erik Krogen <xkrogen@apache.org>
Date: Wed, 23 Feb 2022 10:13:35 +0800
Subject: [PATCH 306/513] [SPARK-38062][CORE] Avoid resolving placeholder
 hostname for FallbackStorage in BlockManagerDecommissioner

### What changes were proposed in this pull request?
This updates `BlockManagerDecommissioner` to avoid treating "remote", the placeholder hostname used by `FALLBACK_BLOCK_MANAGER_ID`, as a valid hostname and attempting to perform a network transfer to it. If the `peer` it encounters matches the fallback block manager ID, it now goes directly to accessing `fallbackStorage`, instead of first attempting to treat it like a valid block manager ID.

In addition, this reverts the changes from SPARK-37318, which should no longer be necessary now that the underlying issue is resolved.

### Why are the changes needed?
See SPARK-38062 for a much more detailed explanation. The gist of it is that:
- Attempting to resolve "remote" can behave unexpectedly in some DNS environments. This can cause failures of the `FallbackStorageSuite` tests, but also could potentially cause issues in a production deployment.
- SPARK-37318 "fixes" the tests by skipping them if such a DNS environment is detected, but this has the obvious drawback of disabling the tests, and doesn't address the problem for production environments.
- Even if resolving "remote" does quickly fail, as the current code expects, it is semantically wrong -- we should not treat this placeholder as a valid hostname.

### Does this PR introduce _any_ user-facing change?
`FallbackStorage` may be resolved slightly quicker, as it removes an unnecessary lookup step, but it should be negligible in most environments. No other user-facing changes.

### How was this patch tested?
The DNS environment in which unit tests are run in an automated fashion at my company means that we experience an issue very similar to what is described in SPARK-37318. Without this patch, tests in `FallbackStorageSuite` consistently fail, exceeding their timeouts. With this patch, the tests consistently (and quickly!) succeed.

Closes #35358 from xkrogen/xkrogen-SPARK-38062-fallbackstorage-remote-host.

Authored-by: Erik Krogen <xkrogen@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../storage/BlockManagerDecommissioner.scala  | 31 ++++++++++++-------
 .../spark/storage/FallbackStorageSuite.scala  | 14 ++-------
 2 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
index aef5cbf07d681..cb01faf7d401d 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
@@ -109,17 +109,21 @@ private[storage] class BlockManagerDecommissioner(
               s"to $peer ($retryCount / $maxReplicationFailuresForDecommission)")
             // Migrate the components of the blocks.
             try {
-              blocks.foreach { case (blockId, buffer) =>
-                logDebug(s"Migrating sub-block ${blockId}")
-                bm.blockTransferService.uploadBlockSync(
-                  peer.host,
-                  peer.port,
-                  peer.executorId,
-                  blockId,
-                  buffer,
-                  StorageLevel.DISK_ONLY,
-                  null) // class tag, we don't need for shuffle
-                logDebug(s"Migrated sub-block $blockId")
+              if (fallbackStorage.isDefined && peer == FallbackStorage.FALLBACK_BLOCK_MANAGER_ID) {
+                fallbackStorage.foreach(_.copy(shuffleBlockInfo, bm))
+              } else {
+                blocks.foreach { case (blockId, buffer) =>
+                  logDebug(s"Migrating sub-block ${blockId}")
+                  bm.blockTransferService.uploadBlockSync(
+                    peer.host,
+                    peer.port,
+                    peer.executorId,
+                    blockId,
+                    buffer,
+                    StorageLevel.DISK_ONLY,
+                    null) // class tag, we don't need for shuffle
+                  logDebug(s"Migrated sub-block $blockId")
+                }
               }
               logInfo(s"Migrated $shuffleBlockInfo to $peer")
             } catch {
@@ -131,7 +135,10 @@ private[storage] class BlockManagerDecommissioner(
                 // driver a no longer referenced RDD with shuffle files.
                 if (bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo).size < blocks.size) {
                   logWarning(s"Skipping block $shuffleBlockInfo, block deleted.")
-                } else if (fallbackStorage.isDefined) {
+                } else if (fallbackStorage.isDefined
+                    // Confirm peer is not the fallback BM ID because fallbackStorage would already
+                    // have been used in the try-block above so there's no point trying again
+                    && peer != FallbackStorage.FALLBACK_BLOCK_MANAGER_ID) {
                   fallbackStorage.foreach(_.copy(shuffleBlockInfo, bm))
                 } else {
                   logError(s"Error occurred during migrating $shuffleBlockInfo", e)
diff --git a/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala b/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala
index 7d648c979cd60..3828e9d8297a6 100644
--- a/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala
@@ -17,14 +17,13 @@
 package org.apache.spark.storage
 
 import java.io.{DataOutputStream, File, FileOutputStream, IOException}
-import java.net.{InetAddress, UnknownHostException}
 import java.nio.file.Files
 
 import scala.concurrent.duration._
 
 import org.apache.hadoop.conf.Configuration
 import org.mockito.{ArgumentMatchers => mc}
-import org.mockito.Mockito.{mock, times, verify, when}
+import org.mockito.Mockito.{mock, never, verify, when}
 import org.scalatest.concurrent.Eventually.{eventually, interval, timeout}
 
 import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite, TestUtils}
@@ -42,13 +41,6 @@ import org.apache.spark.util.Utils.tryWithResource
 class FallbackStorageSuite extends SparkFunSuite with LocalSparkContext {
 
   def getSparkConf(initialExecutor: Int = 1, minExecutor: Int = 1): SparkConf = {
-    // Some DNS always replies for all hostnames including unknown host names
-    try {
-      InetAddress.getByName(FallbackStorage.FALLBACK_BLOCK_MANAGER_ID.host)
-      assume(false)
-    } catch {
-      case _: UnknownHostException =>
-    }
     new SparkConf(false)
       .setAppName(getClass.getName)
       .set(SPARK_MASTER, s"local-cluster[$initialExecutor,1,1024]")
@@ -179,8 +171,8 @@ class FallbackStorageSuite extends SparkFunSuite with LocalSparkContext {
       decommissioner.start()
       val fallbackStorage = new FallbackStorage(conf)
       eventually(timeout(10.second), interval(1.seconds)) {
-        // uploadBlockSync is not used
-        verify(blockTransferService, times(1))
+        // uploadBlockSync should not be used, verify that it is not called
+        verify(blockTransferService, never())
           .uploadBlockSync(mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.any())
 
         Seq("shuffle_1_1_0.index", "shuffle_1_1_0.data").foreach { filename =>

From ceb32c9e67b5e9456c4f82366d9695bb59e32762 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Tue, 22 Feb 2022 18:40:01 -0800
Subject: [PATCH 307/513] [SPARK-38272][K8S][TESTS] Use `docker-desktop`
 instead of `docker-for-desktop` for Docker K8S IT deployMode and context name

### What changes were proposed in this pull request?
Change `docker-for-desktop` to `docker-desktop`.

### Why are the changes needed?
The context name of the kubernetes on docker for desktop should be `docker-desktop` rather than `docker-for-desktop`

```
$ k config current-context
docker-desktop
```

According to the [comments](https://github.com/docker/for-win/issues/5089#issuecomment-582752325),  since docker desktop v2.4 (current is v4.5.1), `docker` are using use a alias `docker-for-desktop` to link `docker-desktop` cluster for legacy.

See also here: https://github.com/apache/spark/pull/35557#issuecomment-1046609601 .

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
- CI passed
- build/sbt -Dspark.kubernetes.test.deployMode=docker-for-desktop -Pvolcano -Pkubernetes -Pkubernetes-integration-tests -Dtest.exclude.tags=minikube,r "kubernetes-integration-tests/test"
- build/sbt -Dspark.kubernetes.test.deployMode=docker-desktop -Pvolcano -Pkubernetes -Pkubernetes-integration-tests -Dtest.exclude.tags=minikube,r "kubernetes-integration-tests/test"

Closes #35595 from Yikun/SPARK-38272.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 resource-managers/kubernetes/integration-tests/README.md  | 8 ++++----
 .../scripts/setup-integration-test-env.sh                 | 2 +-
 .../spark/deploy/k8s/integrationtest/TestConstants.scala  | 1 +
 .../integrationtest/backend/IntegrationTestBackend.scala  | 2 +-
 .../backend/docker/DockerForDesktopBackend.scala          | 2 +-
 5 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md
index eb32e81d8f75a..edd3bf5f7afe8 100644
--- a/resource-managers/kubernetes/integration-tests/README.md
+++ b/resource-managers/kubernetes/integration-tests/README.md
@@ -51,11 +51,11 @@ Uses the local `minikube` cluster, this requires that `minikube` 1.7.3 or greate
 at least 4 CPUs and 6GB memory (some users have reported success with as few as 3 CPUs and 4GB memory).  The tests will 
 check if `minikube` is started and abort early if it isn't currently running.
 
-### `docker-for-desktop`
+### `docker-desktop`
 
 Since July 2018 Docker for Desktop provide an optional Kubernetes cluster that can be enabled as described in this 
 [blog post](https://blog.docker.com/2018/07/kubernetes-is-now-available-in-docker-desktop-stable-channel/).  Assuming 
-this is enabled using this backend will auto-configure itself from the `docker-for-desktop` context that Docker creates 
+this is enabled using this backend will auto-configure itself from the `docker-desktop` context that Docker creates
 in your `~/.kube/config` file. If your config file is in a different location you should set the `KUBECONFIG` 
 environment variable appropriately.
 
@@ -139,7 +139,7 @@ properties to Maven.  For example:
                             -Dspark.kubernetes.test.imageTag=sometag \
                             -Dspark.kubernetes.test.imageRepo=docker.io/somerepo \
                             -Dspark.kubernetes.test.namespace=spark-int-tests \
-                            -Dspark.kubernetes.test.deployMode=docker-for-desktop \
+                            -Dspark.kubernetes.test.deployMode=docker-desktop \
                             -Dtest.include.tags=k8s
                             
                             
@@ -172,7 +172,7 @@ to the wrapper scripts and using the wrapper scripts will simply set these appro
     <td><code>spark.kubernetes.test.deployMode</code></td>
     <td>
       The integration test backend to use.  Acceptable values are <code>minikube</code>, 
-      <code>docker-for-desktop</code> and <code>cloud</code>.
+      <code>docker-desktop</code> and <code>cloud</code>.
     <td><code>minikube</code></td>
   </tr>
   <tr>
diff --git a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
index f79b1f82add67..e4a92b60c981d 100755
--- a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
+++ b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
@@ -136,7 +136,7 @@ then
       fi
       ;;
 
-    docker-for-desktop)
+    docker-desktop | docker-for-desktop)
        # Only need to build as this will place it in our local Docker repo which is all
        # we need for Docker for Desktop to work so no need to also push
        $SPARK_INPUT_DIR/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $JAVA_IMAGE_TAG_BUILD_ARG $LANGUAGE_BINDING_BUILD_ARGS build
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/TestConstants.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/TestConstants.scala
index 2b1fd08164616..c46839f1dffcc 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/TestConstants.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/TestConstants.scala
@@ -19,6 +19,7 @@ package org.apache.spark.deploy.k8s.integrationtest
 object TestConstants {
   val BACKEND_MINIKUBE = "minikube"
   val BACKEND_DOCKER_FOR_DESKTOP = "docker-for-desktop"
+  val BACKEND_DOCKER_DESKTOP = "docker-desktop"
   val BACKEND_CLOUD = "cloud"
 
   val CONFIG_KEY_DEPLOY_MODE = "spark.kubernetes.test.deployMode"
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/IntegrationTestBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/IntegrationTestBackend.scala
index 36c3b6ad2ec5d..ced8151b709b5 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/IntegrationTestBackend.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/IntegrationTestBackend.scala
@@ -43,7 +43,7 @@ private[spark] object IntegrationTestBackendFactory {
       case BACKEND_MINIKUBE => MinikubeTestBackend
       case BACKEND_CLOUD =>
         new KubeConfigBackend(System.getProperty(CONFIG_KEY_KUBE_CONFIG_CONTEXT))
-      case BACKEND_DOCKER_FOR_DESKTOP => DockerForDesktopBackend
+      case BACKEND_DOCKER_FOR_DESKTOP | BACKEND_DOCKER_DESKTOP => DockerForDesktopBackend
       case _ => throw new IllegalArgumentException("Invalid " +
         CONFIG_KEY_DEPLOY_MODE + ": " + deployMode)
     }
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/docker/DockerForDesktopBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/docker/DockerForDesktopBackend.scala
index 81a11ae9dcdc6..f206befc64ff1 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/docker/DockerForDesktopBackend.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/docker/DockerForDesktopBackend.scala
@@ -20,6 +20,6 @@ import org.apache.spark.deploy.k8s.integrationtest.TestConstants
 import org.apache.spark.deploy.k8s.integrationtest.backend.cloud.KubeConfigBackend
 
 private[spark] object DockerForDesktopBackend
-  extends KubeConfigBackend(TestConstants.BACKEND_DOCKER_FOR_DESKTOP) {
+  extends KubeConfigBackend(TestConstants.BACKEND_DOCKER_DESKTOP) {
 
 }

From 25342179447914d76123b8d3ae7bddf34e4bcfba Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Tue, 22 Feb 2022 18:47:11 -0800
Subject: [PATCH 308/513] [SPARK-38260][BUILD][CORE] Remove `commons-net`
 dependency in `hadoop-3` profile

### What changes were proposed in this pull request?
[SPARK-1189](https://github.com/apache/spark/pull/33/files)  introduces maven dependence on `commons-net`, and `org.apache.commons.net.util.Base64` is used in `SparkSaslServer`,  but `SparkSaslServer` has changed to use `io.netty.handler.codec.base64.Base64` and there is no explicit dependency on `commons-net` in Spark code, so this pr removed this dependency.

After this pr Spark with `hadoop-3` profile no longer need `commons-net`, but Spark with `hadoop-2` still need it due to `hadoop-2.7.4` use `commons-net` directly.

### Why are the changes needed?
Remove unnecessary maven dependency.

### Does this PR introduce _any_ user-facing change?
`commons-net` jar no longer exists in Spark-Client with hadoop-3.x

### How was this patch tested?
Pass GA

Closes #35582 from LuciferYang/SPARK-38260.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 core/pom.xml                          | 4 ----
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 1 -
 pom.xml                               | 5 -----
 3 files changed, 10 deletions(-)

diff --git a/core/pom.xml b/core/pom.xml
index ac429fc4309f4..3d095914e5caf 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -250,10 +250,6 @@
       <groupId>org.roaringbitmap</groupId>
       <artifactId>RoaringBitmap</artifactId>
     </dependency>
-    <dependency>
-      <groupId>commons-net</groupId>
-      <artifactId>commons-net</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.scala-lang.modules</groupId>
       <artifactId>scala-xml_${scala.binary.version}</artifactId>
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 73644ee4ed75c..2de677e53fe2f 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -49,7 +49,6 @@ commons-lang/2.6//commons-lang-2.6.jar
 commons-lang3/3.12.0//commons-lang3-3.12.0.jar
 commons-logging/1.1.3//commons-logging-1.1.3.jar
 commons-math3/3.6.1//commons-math3-3.6.1.jar
-commons-net/3.1//commons-net-3.1.jar
 commons-pool/1.5.4//commons-pool-1.5.4.jar
 commons-text/1.9//commons-text-1.9.jar
 compress-lzf/1.0.3//compress-lzf-1.0.3.jar
diff --git a/pom.xml b/pom.xml
index 23e567c5c9ae0..d1e391c8539c7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -803,11 +803,6 @@
         <artifactId>RoaringBitmap</artifactId>
         <version>0.9.23</version>
       </dependency>
-      <dependency>
-        <groupId>commons-net</groupId>
-        <artifactId>commons-net</artifactId>
-        <version>3.1</version>
-      </dependency>
       <dependency>
         <groupId>io.netty</groupId>
         <artifactId>netty-all</artifactId>

From 43e93b581ea5f7a1ba6cf943e6624f6847ebc3a8 Mon Sep 17 00:00:00 2001
From: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
Date: Tue, 22 Feb 2022 19:27:43 -0800
Subject: [PATCH 309/513] [SPARK-38241][K8S][TESTS] Close KubernetesClient in
 K8S integrations tests

### What changes were proposed in this pull request?

Close org.apache.spark.deploy.k8s.integrationtest.backend.IntegrationTestBackend#getKubernetesClient in the `cleanUp()` of the backend implementations.

### Why are the changes needed?

It is a good practice to cleanup resources at the end of the tests, so they do not leak and affect other tests.

Recently I've noticed the following in the output:
```
===== POSSIBLE THREAD LEAK IN SUITE o.a.s.deploy.k8s.integrationtest.VolcanoSuite, threads: OkHttp [https://192.168.49.2:8443/.](https://192.168.49.2:8443/).. (daemon=false), scala-execution-context-global-26 (daemon=true), OkHttp ConnectionPool (daemon=true), scala-execution-context-global-24 (daemon=true), Okio Watchdog (daemon=true), scala-execution-context-global-23 (daemon=true), scala-execution-context-global-21 (daemon=true), scala-execution-context-global-22 (daemon=true), OkHttp WebSocket [https://192.168.49.2:8443/.](https://192.168.49.2:8443/).. (daemon=false), scala-execution-context-global-25 (daemon=true), scala-execution-context-global-20 (daemon=true) =====
```

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Run the IT tests

Closes #35555 from martin-g/close-kubernetes-client-in-tests.

Authored-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../k8s/integrationtest/backend/cloud/KubeConfigBackend.scala  | 3 +++
 .../integrationtest/backend/minikube/MinikubeTestBackend.scala | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala
index 0fbed4a220e68..83535488cc0ab 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala
@@ -60,6 +60,9 @@ private[spark] class KubeConfigBackend(var context: String)
   }
 
   override def cleanUp(): Unit = {
+    if (defaultClient != null) {
+      defaultClient.close()
+    }
     super.cleanUp()
   }
 
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala
index 8c8f848114d1c..f2ca57f89d0aa 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala
@@ -34,6 +34,9 @@ private[spark] object MinikubeTestBackend extends IntegrationTestBackend {
   }
 
   override def cleanUp(): Unit = {
+    if (defaultClient != null) {
+      defaultClient.close()
+    }
     super.cleanUp()
   }
 

From b46b74ce0521d1d5e7c09cadad0e9639e31214cb Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 23 Feb 2022 14:12:39 +0900
Subject: [PATCH 310/513] [SPARK-38297][PYTHON] Explicitly cast the return
 value at DataFrame.to_numpy in POS

### What changes were proposed in this pull request?

MyPy build currently fails as below:

```
starting mypy annotations test...
annotations failed mypy checks:
python/pyspark/pandas/generic.py:585: error: Incompatible return value type (got "Union[ndarray[Any, Any], ExtensionArray]", expected "ndarray[Any, Any]")  [return-value]
Found 1 error in 1 file (checked 324 source files)
1
```

https://github.com/apache/spark/runs/5298261168?check_suite_focus=true

I tried to reproduce in my local by matching NumPy and MyPy versions but failed. So I decided to work around the problem first by explicitly casting to make MyPy happy.

### Why are the changes needed?

To make the build pass.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

CI in this PR should verify if it's fixed.

Closes #35617 from HyukjinKwon/SPARK-38297.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/pandas/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py
index 63ce25ec5f2b2..a1b1a14395232 100644
--- a/python/pyspark/pandas/generic.py
+++ b/python/pyspark/pandas/generic.py
@@ -582,7 +582,7 @@ def to_numpy(self) -> np.ndarray:
             "`to_numpy` loads all data into the driver's memory. "
             "It should only be used if the resulting NumPy ndarray is expected to be small."
         )
-        return self._to_pandas().values
+        return cast(np.ndarray, self._to_pandas().values)
 
     @property
     def values(self) -> np.ndarray:

From fab4ceb157baac870f6d50b942084bb9b2cd4ad2 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 23 Feb 2022 15:32:00 +0800
Subject: [PATCH 311/513] [SPARK-38240][SQL] Improve RuntimeReplaceable and add
 a guideline for adding new functions

### What changes were proposed in this pull request?

This PR improves `RuntimeReplaceable` so that it can
1. Customize the type coercion behavior instead of always inheriting from the replacement expression. This is useful for expressions like `ToBinary`, where its replacement expression can be `Cast` that does not have type coercion.
2. Support aggregate functions.

This PR also adds a guideline for adding new SQL functions, with `RuntimeReplaceable` and `ExpressionBuilder`. See https://github.com/apache/spark/pull/35534/files#diff-6c6ba3e220b9d155160e4e25305fdd3a4835b7ce9eba230a7ae70bdd97047313R330

### Why are the changes needed?

Since we are keep adding new functions, it's better to make `RuntimeReplaceable` more useful and set up a standard for adding functions.

### Does this PR introduce _any_ user-facing change?

Improves error messages of some functions.

### How was this patch tested?

existing tests

Closes #35534 from cloud-fan/refactor.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../examples/extensions/AgeExample.scala      |  13 +-
 .../sql/catalyst/analysis/CheckAnalysis.scala |   4 +
 .../catalyst/analysis/FunctionRegistry.scala  |  63 +++-
 .../catalyst/analysis/TimeTravelSpec.scala    |   2 +-
 .../sql/catalyst/expressions/Expression.scala |  81 ++---
 .../sql/catalyst/expressions/TryEval.scala    |  51 ++-
 .../expressions/aggregate/CountIf.scala       |  35 +-
 .../expressions/aggregate/RegrCount.scala     |  19 +-
 ...aluableAggs.scala => boolAggregates.scala} |  41 +--
 .../expressions/collectionOperations.scala    |  53 ++-
 .../expressions/datetimeExpressions.scala     | 343 +++++++++---------
 .../expressions/intervalExpressions.scala     |  10 +-
 .../expressions/mathExpressions.scala         |  97 +++--
 .../spark/sql/catalyst/expressions/misc.scala |  91 ++---
 .../expressions/nullExpressions.scala         |  54 +--
 .../expressions/regexpExpressions.scala       |  19 +-
 .../expressions/stringExpressions.scala       | 207 +++++------
 .../catalyst/optimizer/finishAnalysis.scala   |  21 +-
 .../sql/catalyst/parser/AstBuilder.scala      |   2 +-
 .../sql/catalyst/trees/TreePatterns.scala     |   3 -
 .../spark/sql/catalyst/util/package.scala     |   4 +-
 .../sql/errors/QueryCompilationErrors.scala   |  24 +-
 .../sql/errors/QueryExecutionErrors.scala     |   8 +-
 .../expressions/DateExpressionsSuite.scala    |   8 +-
 .../org/apache/spark/sql/functions.scala      |   4 +-
 .../sql-functions/sql-expression-schema.md    |  20 +-
 .../sql-tests/inputs/string-functions.sql     |   9 +-
 .../sql-tests/results/ansi/map.sql.out        |   4 +-
 .../results/ansi/string-functions.sql.out     |  28 +-
 .../ceil-floor-with-scale-param.sql.out       |  14 +-
 .../sql-tests/results/extract.sql.out         |   4 +-
 .../sql-tests/results/group-by.sql.out        |  12 +-
 .../resources/sql-tests/results/map.sql.out   |   4 +-
 .../results/string-functions.sql.out          |  28 +-
 .../sql-tests/results/timestamp-ltz.sql.out   |   2 +-
 .../results/udf/udf-group-by.sql.out          |   8 +-
 .../spark/sql/DataFrameAggregateSuite.scala   |   3 +-
 37 files changed, 657 insertions(+), 736 deletions(-)
 rename sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/{UnevaluableAggs.scala => boolAggregates.scala} (63%)

diff --git a/examples/src/main/scala/org/apache/spark/examples/extensions/AgeExample.scala b/examples/src/main/scala/org/apache/spark/examples/extensions/AgeExample.scala
index d25f2204994c7..e4840241006db 100644
--- a/examples/src/main/scala/org/apache/spark/examples/extensions/AgeExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/extensions/AgeExample.scala
@@ -18,14 +18,15 @@
 package org.apache.spark.examples.extensions
 
 import org.apache.spark.sql.catalyst.expressions.{CurrentDate, Expression, RuntimeReplaceable, SubtractDates}
+import org.apache.spark.sql.catalyst.trees.UnaryLike
 
 /**
  * How old are you in days?
  */
-case class AgeExample(birthday: Expression, child: Expression) extends RuntimeReplaceable {
-
-  def this(birthday: Expression) = this(birthday, SubtractDates(CurrentDate(), birthday))
-  override def exprsReplaced: Seq[Expression] = Seq(birthday)
-
-  override protected def withNewChildInternal(newChild: Expression): Expression = copy(newChild)
+case class AgeExample(birthday: Expression) extends RuntimeReplaceable with UnaryLike[Expression] {
+  override lazy val replacement: Expression = SubtractDates(CurrentDate(), birthday)
+  override def child: Expression = birthday
+  override protected def withNewChildInternal(newChild: Expression): Expression = {
+    copy(birthday = newChild)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index eacb5b266d660..0bf748cdb8518 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -199,6 +199,10 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
             failAnalysis(s"invalid cast from ${c.child.dataType.catalogString} to " +
               c.dataType.catalogString)
 
+          case e: RuntimeReplaceable if !e.replacement.resolved =>
+            throw new IllegalStateException("Illegal RuntimeReplaceable: " + e +
+              "\nReplacement is unresolved: " + e.replacement)
+
           case g: Grouping =>
             failAnalysis("grouping() can only be used with GroupingSets/Cube/Rollup")
           case g: GroupingID =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 12fa7231b0a91..6cf0fd11b5488 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -111,9 +111,11 @@ object FunctionRegistryBase {
       name: String,
       since: Option[String]): (ExpressionInfo, Seq[Expression] => T) = {
     val runtimeClass = scala.reflect.classTag[T].runtimeClass
-    // For `RuntimeReplaceable`, skip the constructor with most arguments, which is the main
-    // constructor and contains non-parameter `child` and should not be used as function builder.
-    val constructors = if (classOf[RuntimeReplaceable].isAssignableFrom(runtimeClass)) {
+    // For `InheritAnalysisRules`, skip the constructor with most arguments, which is the main
+    // constructor and contains non-parameter `replacement` and should not be used as
+    // function builder.
+    val isRuntime = classOf[InheritAnalysisRules].isAssignableFrom(runtimeClass)
+    val constructors = if (isRuntime) {
       val all = runtimeClass.getConstructors
       val maxNumArgs = all.map(_.getParameterCount).max
       all.filterNot(_.getParameterCount == maxNumArgs)
@@ -324,7 +326,36 @@ object FunctionRegistry {
 
   val FUNC_ALIAS = TreeNodeTag[String]("functionAliasName")
 
-  // Note: Whenever we add a new entry here, make sure we also update ExpressionToSQLSuite
+  // ==============================================================================================
+  //                          The guideline for adding SQL functions
+  // ==============================================================================================
+  // To add a SQL function, we usually need to create a new `Expression` for the function, and
+  // implement the function logic in both the interpretation code path and codegen code path of the
+  // `Expression`. We also need to define the type coercion behavior for the function inputs, by
+  // extending `ImplicitCastInputTypes` or updating type coercion rules directly.
+  //
+  // It's much simpler if the SQL function can be implemented with existing expression(s). There are
+  // a few cases:
+  //   - The function is simply an alias of another function. We can just register the same
+  //     expression with a different function name, e.g. `expression[Rand]("random", true)`.
+  //   - The function is mostly the same with another function, but has a different parameter list.
+  //     We can use `RuntimeReplaceable` to create a new expression, which can customize the
+  //     parameter list and analysis behavior (type coercion). The `RuntimeReplaceable` expression
+  //     will be replaced by the actual expression at the end of analysis. See `Left` as an example.
+  //   - The function can be implemented by combining some existing expressions. We can use
+  //     `RuntimeReplaceable` to define the combination. See `ParseToDate` as an example.
+  //     We can also inherit the analysis behavior from the replacement expression, by
+  //     mixing `InheritAnalysisRules`. See `TryAdd` as an example.
+  //   - Similarly, we can use `RuntimeReplaceableAggregate` to implement new aggregate functions.
+  //
+  // Sometimes, multiple functions share the same/similar expression replacement logic and it's
+  // tedious to create many similar `RuntimeReplaceable` expressions. We can use `ExpressionBuilder`
+  // to share the replacement logic. See `ParseToTimestampLTZExpressionBuilder` as an example.
+  //
+  // With these tools, we can even implement a new SQL function with a Java (static) method, and
+  // then create a `RuntimeReplaceable` expression to call the Java method with `Invoke` or
+  // `StaticInvoke` expression. By doing so we don't need to implement codegen for new functions
+  // anymore. See `AesEncrypt`/`AesDecrypt` as an example.
   val expressions: Map[String, (ExpressionInfo, FunctionBuilder)] = Map(
     // misc non-aggregate functions
     expression[Abs]("abs"),
@@ -336,7 +367,7 @@ object FunctionRegistry {
     expression[Inline]("inline"),
     expressionGeneratorOuter[Inline]("inline_outer"),
     expression[IsNaN]("isnan"),
-    expression[IfNull]("ifnull"),
+    expression[Nvl]("ifnull", setAlias = true),
     expression[IsNull]("isnull"),
     expression[IsNotNull]("isnotnull"),
     expression[Least]("least"),
@@ -565,8 +596,9 @@ object FunctionRegistry {
     expression[ToBinary]("to_binary"),
     expression[ToUnixTimestamp]("to_unix_timestamp"),
     expression[ToUTCTimestamp]("to_utc_timestamp"),
-    expression[ParseToTimestampNTZ]("to_timestamp_ntz"),
-    expression[ParseToTimestampLTZ]("to_timestamp_ltz"),
+    // We keep the 2 expression builders below to have different function docs.
+    expressionBuilder("to_timestamp_ntz", ParseToTimestampNTZExpressionBuilder, setAlias = true),
+    expressionBuilder("to_timestamp_ltz", ParseToTimestampLTZExpressionBuilder, setAlias = true),
     expression[TruncDate]("trunc"),
     expression[TruncTimestamp]("date_trunc"),
     expression[UnixTimestamp]("unix_timestamp"),
@@ -578,13 +610,15 @@ object FunctionRegistry {
     expression[SessionWindow]("session_window"),
     expression[MakeDate]("make_date"),
     expression[MakeTimestamp]("make_timestamp"),
-    expression[MakeTimestampNTZ]("make_timestamp_ntz"),
-    expression[MakeTimestampLTZ]("make_timestamp_ltz"),
+    // We keep the 2 expression builders below to have different function docs.
+    expressionBuilder("make_timestamp_ntz", MakeTimestampNTZExpressionBuilder, setAlias = true),
+    expressionBuilder("make_timestamp_ltz", MakeTimestampLTZExpressionBuilder, setAlias = true),
     expression[MakeInterval]("make_interval"),
     expression[MakeDTInterval]("make_dt_interval"),
     expression[MakeYMInterval]("make_ym_interval"),
-    expression[DatePart]("date_part"),
     expression[Extract]("extract"),
+    // We keep the `DatePartExpressionBuilder` to have different function docs.
+    expressionBuilder("date_part", DatePartExpressionBuilder, setAlias = true),
     expression[DateFromUnixDate]("date_from_unix_date"),
     expression[UnixDate]("unix_date"),
     expression[SecondsToTimestamp]("timestamp_seconds"),
@@ -806,12 +840,13 @@ object FunctionRegistry {
   }
 
   private def expressionBuilder[T <: ExpressionBuilder : ClassTag](
-      name: String, builder: T, setAlias: Boolean = false)
-  : (String, (ExpressionInfo, FunctionBuilder)) = {
+      name: String,
+      builder: T,
+      setAlias: Boolean = false): (String, (ExpressionInfo, FunctionBuilder)) = {
     val info = FunctionRegistryBase.expressionInfo[T](name, None)
     val funcBuilder = (expressions: Seq[Expression]) => {
       assert(expressions.forall(_.resolved), "function arguments must be resolved.")
-      val expr = builder.build(expressions)
+      val expr = builder.build(name, expressions)
       if (setAlias) expr.setTagValue(FUNC_ALIAS, name)
       expr
     }
@@ -915,5 +950,5 @@ object TableFunctionRegistry {
 }
 
 trait ExpressionBuilder {
-  def build(expressions: Seq[Expression]): Expression
+  def build(funcName: String, expressions: Seq[Expression]): Expression
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TimeTravelSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TimeTravelSpec.scala
index cbb6e8bb06a4c..7e79c03b5ff6c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TimeTravelSpec.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TimeTravelSpec.scala
@@ -41,7 +41,7 @@ object TimeTravelSpec {
         throw QueryCompilationErrors.invalidTimestampExprForTimeTravel(ts)
       }
       val tsToEval = ts.transform {
-        case r: RuntimeReplaceable => r.child
+        case r: RuntimeReplaceable => r.replacement
         case _: Unevaluable =>
           throw QueryCompilationErrors.invalidTimestampExprForTimeTravel(ts)
         case e if !e.deterministic =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 32b25f51b8efe..4ff5267af03eb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -21,7 +21,7 @@ import java.util.Locale
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult, TypeCoercion}
-import org.apache.spark.sql.catalyst.expressions.aggregate.DeclarativeAggregate
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.trees.{BinaryLike, LeafLike, QuaternaryLike, TernaryLike, TreeNode, UnaryLike}
@@ -352,34 +352,41 @@ trait Unevaluable extends Expression {
  * An expression that gets replaced at runtime (currently by the optimizer) into a different
  * expression for evaluation. This is mainly used to provide compatibility with other databases.
  * For example, we use this to support "nvl" by replacing it with "coalesce".
- *
- * A RuntimeReplaceable should have the original parameters along with a "child" expression in the
- * case class constructor, and define a normal constructor that accepts only the original
- * parameters. For an example, see [[Nvl]]. To make sure the explain plan and expression SQL
- * works correctly, the implementation should also override flatArguments method and sql method.
  */
-trait RuntimeReplaceable extends UnaryExpression with Unevaluable {
-  override def nullable: Boolean = child.nullable
-  override def dataType: DataType = child.dataType
+trait RuntimeReplaceable extends Expression {
+  def replacement: Expression
+
+  override val nodePatterns: Seq[TreePattern] = Seq(RUNTIME_REPLACEABLE)
+  override def nullable: Boolean = replacement.nullable
+  override def dataType: DataType = replacement.dataType
   // As this expression gets replaced at optimization with its `child" expression,
   // two `RuntimeReplaceable` are considered to be semantically equal if their "child" expressions
   // are semantically equal.
-  override lazy val preCanonicalized: Expression = child.preCanonicalized
+  override lazy val preCanonicalized: Expression = replacement.preCanonicalized
 
-  /**
-   * Only used to generate SQL representation of this expression.
-   *
-   * Implementations should override this with original parameters
-   */
-  def exprsReplaced: Seq[Expression]
-
-  override def sql: String = mkString(exprsReplaced.map(_.sql))
-
-  final override val nodePatterns: Seq[TreePattern] = Seq(RUNTIME_REPLACEABLE)
+  final override def eval(input: InternalRow = null): Any =
+    throw QueryExecutionErrors.cannotEvaluateExpressionError(this)
+  final override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
+    throw QueryExecutionErrors.cannotGenerateCodeForExpressionError(this)
+}
 
-  def mkString(childrenString: Seq[String]): String = {
-    prettyName + childrenString.mkString("(", ", ", ")")
+/**
+ * An add-on of [[RuntimeReplaceable]]. It makes `replacement` the child of the expression, to
+ * inherit the analysis rules for it, such as type coercion. The implementation should put
+ * `replacement` in the case class constructor, and define a normal constructor that accepts only
+ * the original parameters. For an example, see [[TryAdd]]. To make sure the explain plan and
+ * expression SQL works correctly, the implementation should also implement the `parameters` method.
+ */
+trait InheritAnalysisRules extends UnaryLike[Expression] { self: RuntimeReplaceable =>
+  override def child: Expression = replacement
+  def parameters: Seq[Expression]
+  override def flatArguments: Iterator[Any] = parameters.iterator
+  // This method is used to generate a SQL string with transformed inputs. This is necessary as
+  // the actual inputs are not the children of this expression.
+  def makeSQLString(childrenSQL: Seq[String]): String = {
+    prettyName + childrenSQL.mkString("(", ", ", ")")
   }
+  final override def sql: String = makeSQLString(parameters.map(_.sql))
 }
 
 /**
@@ -388,29 +395,13 @@ trait RuntimeReplaceable extends UnaryExpression with Unevaluable {
  * with other databases. For example, we use this to support every, any/some aggregates by rewriting
  * them with Min and Max respectively.
  */
-trait UnevaluableAggregate extends DeclarativeAggregate {
-
-  override def nullable: Boolean = true
-
-  override lazy val aggBufferAttributes =
-    throw QueryExecutionErrors.evaluateUnevaluableAggregateUnsupportedError(
-      "aggBufferAttributes", this)
-
-  override lazy val initialValues: Seq[Expression] =
-    throw QueryExecutionErrors.evaluateUnevaluableAggregateUnsupportedError(
-      "initialValues", this)
-
-  override lazy val updateExpressions: Seq[Expression] =
-    throw QueryExecutionErrors.evaluateUnevaluableAggregateUnsupportedError(
-      "updateExpressions", this)
-
-  override lazy val mergeExpressions: Seq[Expression] =
-    throw QueryExecutionErrors.evaluateUnevaluableAggregateUnsupportedError(
-      "mergeExpressions", this)
-
-  override lazy val evaluateExpression: Expression =
-    throw QueryExecutionErrors.evaluateUnevaluableAggregateUnsupportedError(
-      "evaluateExpression", this)
+abstract class RuntimeReplaceableAggregate extends AggregateFunction with RuntimeReplaceable {
+  def aggBufferSchema: StructType = throw new IllegalStateException(
+    "RuntimeReplaceableAggregate.aggBufferSchema should not be called")
+  def aggBufferAttributes: Seq[AttributeReference] = throw new IllegalStateException(
+    "RuntimeReplaceableAggregate.aggBufferAttributes should not be called")
+  def inputAggBufferAttributes: Seq[AttributeReference] = throw new IllegalStateException(
+    "RuntimeReplaceableAggregate.inputAggBufferAttributes should not be called")
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala
index 4663d4826286a..7a8a689a1bd3e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala
@@ -75,19 +75,17 @@ case class TryEval(child: Expression) extends UnaryExpression with NullIntoleran
   since = "3.2.0",
   group = "math_funcs")
 // scalastyle:on line.size.limit
-case class TryAdd(left: Expression, right: Expression, child: Expression)
-    extends RuntimeReplaceable {
+case class TryAdd(left: Expression, right: Expression, replacement: Expression)
+    extends RuntimeReplaceable with InheritAnalysisRules {
   def this(left: Expression, right: Expression) =
     this(left, right, TryEval(Add(left, right, failOnError = true)))
 
-  override def flatArguments: Iterator[Any] = Iterator(left, right)
-
-  override def exprsReplaced: Seq[Expression] = Seq(left, right)
-
   override def prettyName: String = "try_add"
 
+  override def parameters: Seq[Expression] = Seq(left, right)
+
   override protected def withNewChildInternal(newChild: Expression): Expression =
-    this.copy(child = newChild)
+    this.copy(replacement = newChild)
 }
 
 // scalastyle:off line.size.limit
@@ -110,19 +108,18 @@ case class TryAdd(left: Expression, right: Expression, child: Expression)
   since = "3.2.0",
   group = "math_funcs")
 // scalastyle:on line.size.limit
-case class TryDivide(left: Expression, right: Expression, child: Expression)
-    extends RuntimeReplaceable {
+case class TryDivide(left: Expression, right: Expression, replacement: Expression)
+  extends RuntimeReplaceable with InheritAnalysisRules {
   def this(left: Expression, right: Expression) =
     this(left, right, TryEval(Divide(left, right, failOnError = true)))
 
-  override def flatArguments: Iterator[Any] = Iterator(left, right)
-
-  override def exprsReplaced: Seq[Expression] = Seq(left, right)
-
   override def prettyName: String = "try_divide"
 
-  override protected def withNewChildInternal(newChild: Expression): Expression =
-    this.copy(child = newChild)
+  override def parameters: Seq[Expression] = Seq(left, right)
+
+  override protected def withNewChildInternal(newChild: Expression): Expression = {
+    copy(replacement = newChild)
+  }
 }
 
 @ExpressionDescription(
@@ -145,19 +142,17 @@ case class TryDivide(left: Expression, right: Expression, child: Expression)
   """,
   since = "3.3.0",
   group = "math_funcs")
-case class TrySubtract(left: Expression, right: Expression, child: Expression)
-  extends RuntimeReplaceable {
+case class TrySubtract(left: Expression, right: Expression, replacement: Expression)
+  extends RuntimeReplaceable with InheritAnalysisRules {
   def this(left: Expression, right: Expression) =
     this(left, right, TryEval(Subtract(left, right, failOnError = true)))
 
-  override def flatArguments: Iterator[Any] = Iterator(left, right)
-
-  override def exprsReplaced: Seq[Expression] = Seq(left, right)
-
   override def prettyName: String = "try_subtract"
 
+  override def parameters: Seq[Expression] = Seq(left, right)
+
   override protected def withNewChildInternal(newChild: Expression): Expression =
-    this.copy(child = newChild)
+    this.copy(replacement = newChild)
 }
 
 @ExpressionDescription(
@@ -174,17 +169,15 @@ case class TrySubtract(left: Expression, right: Expression, child: Expression)
   """,
   since = "3.3.0",
   group = "math_funcs")
-case class TryMultiply(left: Expression, right: Expression, child: Expression)
-  extends RuntimeReplaceable {
+case class TryMultiply(left: Expression, right: Expression, replacement: Expression)
+  extends RuntimeReplaceable with InheritAnalysisRules {
   def this(left: Expression, right: Expression) =
     this(left, right, TryEval(Multiply(left, right, failOnError = true)))
 
-  override def flatArguments: Iterator[Any] = Iterator(left, right)
-
-  override def exprsReplaced: Seq[Expression] = Seq(left, right)
-
   override def prettyName: String = "try_multiply"
 
+  override def parameters: Seq[Expression] = Seq(left, right)
+
   override protected def withNewChildInternal(newChild: Expression): Expression =
-    this.copy(child = newChild)
+    this.copy(replacement = newChild)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountIf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountIf.scala
index 66800b277ffed..6973641a6bf33 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountIf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountIf.scala
@@ -17,11 +17,9 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
-import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription, ImplicitCastInputTypes, UnevaluableAggregate}
-import org.apache.spark.sql.catalyst.trees.TreePattern.{COUNT_IF, TreePattern}
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription, ImplicitCastInputTypes, Literal, NullIf, RuntimeReplaceableAggregate}
 import org.apache.spark.sql.catalyst.trees.UnaryLike
-import org.apache.spark.sql.types.{AbstractDataType, BooleanType, DataType, LongType}
+import org.apache.spark.sql.types.{AbstractDataType, BooleanType}
 
 @ExpressionDescription(
   usage = """
@@ -36,30 +34,11 @@ import org.apache.spark.sql.types.{AbstractDataType, BooleanType, DataType, Long
   """,
   group = "agg_funcs",
   since = "3.0.0")
-case class CountIf(predicate: Expression) extends UnevaluableAggregate with ImplicitCastInputTypes
-  with UnaryLike[Expression] {
-
-  override def prettyName: String = "count_if"
-
-  override def child: Expression = predicate
-
-  override def nullable: Boolean = false
-
-  override def dataType: DataType = LongType
-
+case class CountIf(child: Expression) extends RuntimeReplaceableAggregate
+  with ImplicitCastInputTypes with UnaryLike[Expression] {
+  override lazy val replacement: Expression = Count(new NullIf(child, Literal.FalseLiteral))
+  override def nodeName: String = "count_if"
   override def inputTypes: Seq[AbstractDataType] = Seq(BooleanType)
-
-  final override val nodePatterns: Seq[TreePattern] = Seq(COUNT_IF)
-
-  override def checkInputDataTypes(): TypeCheckResult = predicate.dataType match {
-    case BooleanType =>
-      TypeCheckResult.TypeCheckSuccess
-    case _ =>
-      TypeCheckResult.TypeCheckFailure(
-        s"function $prettyName requires boolean type, not ${predicate.dataType.catalogString}"
-      )
-  }
-
   override protected def withNewChildInternal(newChild: Expression): CountIf =
-    copy(predicate = newChild)
+    copy(child = newChild)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/RegrCount.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/RegrCount.scala
index 57dbc14a1702d..80df0128ccd7a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/RegrCount.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/RegrCount.scala
@@ -17,10 +17,9 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
-import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription, ImplicitCastInputTypes, UnevaluableAggregate}
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription, ImplicitCastInputTypes, RuntimeReplaceableAggregate}
 import org.apache.spark.sql.catalyst.trees.BinaryLike
-import org.apache.spark.sql.catalyst.trees.TreePattern.{REGR_COUNT, TreePattern}
-import org.apache.spark.sql.types.{AbstractDataType, DataType, LongType, NumericType}
+import org.apache.spark.sql.types.{AbstractDataType, NumericType}
 
 @ExpressionDescription(
   usage = """
@@ -38,18 +37,10 @@ import org.apache.spark.sql.types.{AbstractDataType, DataType, LongType, Numeric
   group = "agg_funcs",
   since = "3.3.0")
 case class RegrCount(left: Expression, right: Expression)
-  extends UnevaluableAggregate with ImplicitCastInputTypes with BinaryLike[Expression] {
-
-  override def prettyName: String = "regr_count"
-
-  override def nullable: Boolean = false
-
-  override def dataType: DataType = LongType
-
+  extends RuntimeReplaceableAggregate with ImplicitCastInputTypes with BinaryLike[Expression] {
+  override lazy val replacement: Expression = Count(Seq(left, right))
+  override def nodeName: String = "regr_count"
   override def inputTypes: Seq[AbstractDataType] = Seq(NumericType, NumericType)
-
-  final override val nodePatterns: Seq[TreePattern] = Seq(REGR_COUNT)
-
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): RegrCount =
     this.copy(left = newLeft, right = newRight)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/boolAggregates.scala
similarity index 63%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/boolAggregates.scala
index 244e9d9755752..59c75f21c9a0f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/boolAggregates.scala
@@ -17,33 +17,10 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
-import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.trees.TreePattern.{BOOL_AGG, TreePattern}
 import org.apache.spark.sql.catalyst.trees.UnaryLike
 import org.apache.spark.sql.types._
 
-abstract class UnevaluableBooleanAggBase(arg: Expression)
-  extends UnevaluableAggregate with ImplicitCastInputTypes with UnaryLike[Expression] {
-
-  override def child: Expression = arg
-
-  override def dataType: DataType = BooleanType
-
-  override def inputTypes: Seq[AbstractDataType] = Seq(BooleanType)
-
-  final override val nodePatterns: Seq[TreePattern] = Seq(BOOL_AGG)
-
-  override def checkInputDataTypes(): TypeCheckResult = {
-    arg.dataType match {
-      case dt if dt != BooleanType =>
-        TypeCheckResult.TypeCheckFailure(s"Input to function '$prettyName' should have been " +
-          s"${BooleanType.simpleString}, but it's [${arg.dataType.catalogString}].")
-      case _ => TypeCheckResult.TypeCheckSuccess
-    }
-  }
-}
-
 @ExpressionDescription(
   usage = "_FUNC_(expr) - Returns true if all values of `expr` are true.",
   examples = """
@@ -57,10 +34,13 @@ abstract class UnevaluableBooleanAggBase(arg: Expression)
   """,
   group = "agg_funcs",
   since = "3.0.0")
-case class BoolAnd(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
-  override def nodeName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("bool_and")
+case class BoolAnd(child: Expression) extends RuntimeReplaceableAggregate
+  with ImplicitCastInputTypes with UnaryLike[Expression] {
+  override lazy val replacement: Expression = Min(child)
+  override def nodeName: String = "bool_and"
+  override def inputTypes: Seq[AbstractDataType] = Seq(BooleanType)
   override protected def withNewChildInternal(newChild: Expression): Expression =
-    copy(arg = newChild)
+    copy(child = newChild)
 }
 
 @ExpressionDescription(
@@ -76,8 +56,11 @@ case class BoolAnd(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
   """,
   group = "agg_funcs",
   since = "3.0.0")
-case class BoolOr(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
-  override def nodeName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("bool_or")
+case class BoolOr(child: Expression) extends RuntimeReplaceableAggregate
+  with ImplicitCastInputTypes with UnaryLike[Expression] {
+  override lazy val replacement: Expression = Max(child)
+  override def nodeName: String = "bool_or"
+  override def inputTypes: Seq[AbstractDataType] = Seq(BooleanType)
   override protected def withNewChildInternal(newChild: Expression): Expression =
-    copy(arg = newChild)
+    copy(child = newChild)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 65b6a05fbeb47..0cd8593c766df 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion, Un
 import org.apache.spark.sql.catalyst.expressions.ArraySortLike.NullOrder
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
+import org.apache.spark.sql.catalyst.trees.BinaryLike
 import org.apache.spark.sql.catalyst.trees.TreePattern.{ARRAYS_ZIP, CONCAT, TreePattern}
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
@@ -182,19 +183,41 @@ case class MapKeys(child: Expression)
   """,
   group = "map_funcs",
   since = "3.3.0")
-case class MapContainsKey(
-    left: Expression,
-    right: Expression,
-    child: Expression) extends RuntimeReplaceable {
-  def this(left: Expression, right: Expression) =
-    this(left, right, ArrayContains(MapKeys(left), right))
+case class MapContainsKey(left: Expression, right: Expression)
+  extends RuntimeReplaceable with BinaryLike[Expression] with ImplicitCastInputTypes {
+
+  override lazy val replacement: Expression = ArrayContains(MapKeys(left), right)
 
-  override def exprsReplaced: Seq[Expression] = Seq(left, right)
+  override def inputTypes: Seq[AbstractDataType] = {
+    (left.dataType, right.dataType) match {
+      case (_, NullType) => Seq.empty
+      case (MapType(kt, vt, valueContainsNull), dt) =>
+        TypeCoercion.findWiderTypeWithoutStringPromotionForTwo(kt, dt) match {
+          case Some(widerType) => Seq(MapType(widerType, vt, valueContainsNull), widerType)
+          case _ => Seq.empty
+        }
+      case _ => Seq.empty
+    }
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    (left.dataType, right.dataType) match {
+      case (_, NullType) =>
+        TypeCheckResult.TypeCheckFailure("Null typed values cannot be used as arguments")
+      case (MapType(kt, _, _), dt) if kt.sameType(dt) =>
+        TypeUtils.checkForOrderingExpr(kt, s"function $prettyName")
+      case _ => TypeCheckResult.TypeCheckFailure(s"Input to function $prettyName should have " +
+        s"been ${MapType.simpleString} followed by a value with same key type, but it's " +
+        s"[${left.dataType.catalogString}, ${right.dataType.catalogString}].")
+    }
+  }
 
   override def prettyName: String = "map_contains_key"
 
-  override protected def withNewChildInternal(newChild: Expression): MapContainsKey =
-    copy(child = newChild)
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): Expression = {
+    copy(newLeft, newRight)
+  }
 }
 
 @ExpressionDescription(
@@ -2229,19 +2252,17 @@ case class ElementAt(
   """,
   since = "3.3.0",
   group = "map_funcs")
-case class TryElementAt(left: Expression, right: Expression, child: Expression)
-  extends RuntimeReplaceable {
+case class TryElementAt(left: Expression, right: Expression, replacement: Expression)
+  extends RuntimeReplaceable with InheritAnalysisRules {
   def this(left: Expression, right: Expression) =
     this(left, right, ElementAt(left, right, failOnError = false))
 
-  override def flatArguments: Iterator[Any] = Iterator(left, right)
-
-  override def exprsReplaced: Seq[Expression] = Seq(left, right)
-
   override def prettyName: String = "try_element_at"
 
+  override def parameters: Seq[Expression] = Seq(left, right)
+
   override protected def withNewChildInternal(newChild: Expression): Expression =
-    this.copy(child = newChild)
+    this.copy(replacement = newChild)
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index e73e989c9c99e..9780b9df0e031 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -26,6 +26,7 @@ import org.apache.commons.text.StringEscapeUtils
 
 import org.apache.spark.SparkDateTimeException
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, FunctionRegistry}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.trees.TreePattern._
@@ -1112,25 +1113,15 @@ case class GetTimestamp(
   group = "datetime_funcs",
   since = "3.3.0")
 // scalastyle:on line.size.limit
-case class ParseToTimestampNTZ(
-    left: Expression,
-    format: Option[Expression],
-    child: Expression) extends RuntimeReplaceable {
-
-  def this(left: Expression, format: Expression) = {
-    this(left, Option(format), GetTimestamp(left, format, TimestampNTZType))
+object ParseToTimestampNTZExpressionBuilder extends ExpressionBuilder {
+  override def build(funcName: String, expressions: Seq[Expression]): Expression = {
+    val numArgs = expressions.length
+    if (numArgs == 1 || numArgs == 2) {
+      ParseToTimestamp(expressions(0), expressions.drop(1).lastOption, TimestampNTZType)
+    } else {
+      throw QueryCompilationErrors.invalidFunctionArgumentNumberError(Seq(1, 2), funcName, numArgs)
+    }
   }
-
-  def this(left: Expression) = this(left, None, Cast(left, TimestampNTZType))
-
-  override def flatArguments: Iterator[Any] = Iterator(left, format)
-  override def exprsReplaced: Seq[Expression] = left +: format.toSeq
-
-  override def prettyName: String = "to_timestamp_ntz"
-  override def dataType: DataType = TimestampNTZType
-
-  override protected def withNewChildInternal(newChild: Expression): ParseToTimestampNTZ =
-    copy(child = newChild)
 }
 
 /**
@@ -1159,25 +1150,15 @@ case class ParseToTimestampNTZ(
   group = "datetime_funcs",
   since = "3.3.0")
 // scalastyle:on line.size.limit
-case class ParseToTimestampLTZ(
-    left: Expression,
-    format: Option[Expression],
-    child: Expression) extends RuntimeReplaceable {
-
-  def this(left: Expression, format: Expression) = {
-    this(left, Option(format), GetTimestamp(left, format, TimestampType))
+object ParseToTimestampLTZExpressionBuilder extends ExpressionBuilder {
+  override def build(funcName: String, expressions: Seq[Expression]): Expression = {
+    val numArgs = expressions.length
+    if (numArgs == 1 || numArgs == 2) {
+      ParseToTimestamp(expressions(0), expressions.drop(1).lastOption, TimestampType)
+    } else {
+      throw QueryCompilationErrors.invalidFunctionArgumentNumberError(Seq(1, 2), funcName, numArgs)
+    }
   }
-
-  def this(left: Expression) = this(left, None, Cast(left, TimestampType))
-
-  override def flatArguments: Iterator[Any] = Iterator(left, format)
-  override def exprsReplaced: Seq[Expression] = left +: format.toSeq
-
-  override def prettyName: String = "to_timestamp_ltz"
-  override def dataType: DataType = TimestampType
-
-  override protected def withNewChildInternal(newChild: Expression): ParseToTimestampLTZ =
-    copy(child = newChild)
 }
 
 abstract class ToTimestamp
@@ -1606,12 +1587,19 @@ case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[S
 case class DatetimeSub(
     start: Expression,
     interval: Expression,
-    child: Expression) extends RuntimeReplaceable {
-  override def exprsReplaced: Seq[Expression] = Seq(start, interval)
+    replacement: Expression) extends RuntimeReplaceable with InheritAnalysisRules {
+
+  override def parameters: Seq[Expression] = Seq(start, interval)
+
+  override def makeSQLString(childrenSQL: Seq[String]): String = {
+    childrenSQL.mkString(" - ")
+  }
+
   override def toString: String = s"$start - $interval"
-  override def mkString(childrenString: Seq[String]): String = childrenString.mkString(" - ")
-  override protected def withNewChildInternal(newChild: Expression): DatetimeSub =
-    copy(child = newChild)
+
+  override protected def withNewChildInternal(newChild: Expression): Expression = {
+    copy(replacement = newChild)
+  }
 }
 
 /**
@@ -1991,25 +1979,48 @@ case class MonthsBetween(
   group = "datetime_funcs",
   since = "1.5.0")
 // scalastyle:on line.size.limit
-case class ParseToDate(left: Expression, format: Option[Expression], child: Expression)
-  extends RuntimeReplaceable {
+case class ParseToDate(
+    left: Expression,
+    format: Option[Expression],
+    timeZoneId: Option[String] = None)
+  extends RuntimeReplaceable with ImplicitCastInputTypes with TimeZoneAwareExpression {
+
+  override lazy val replacement: Expression = format.map { f =>
+    Cast(GetTimestamp(left, f, TimestampType, timeZoneId), DateType, timeZoneId)
+  }.getOrElse(Cast(left, DateType, timeZoneId)) // backwards compatibility
 
   def this(left: Expression, format: Expression) = {
-    this(left, Option(format), Cast(GetTimestamp(left, format, TimestampType), DateType))
+    this(left, Option(format))
   }
 
   def this(left: Expression) = {
-    // backwards compatibility
-    this(left, None, Cast(left, DateType))
+    this(left, None)
   }
 
-  override def exprsReplaced: Seq[Expression] = left +: format.toSeq
-  override def flatArguments: Iterator[Any] = Iterator(left, format)
-
   override def prettyName: String = "to_date"
 
-  override protected def withNewChildInternal(newChild: Expression): ParseToDate =
-    copy(child = newChild)
+  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
+    copy(timeZoneId = Some(timeZoneId))
+
+  override def nodePatternsInternal(): Seq[TreePattern] = Seq(RUNTIME_REPLACEABLE)
+
+  override def children: Seq[Expression] = left +: format.toSeq
+
+  override def inputTypes: Seq[AbstractDataType] = {
+    // Note: ideally this function should only take string input, but we allow more types here to
+    // be backward compatible.
+    TypeCollection(StringType, DateType, TimestampType, TimestampNTZType) +:
+      format.map(_ => StringType).toSeq
+  }
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[Expression]): Expression = {
+    if (format.isDefined) {
+      copy(left = newChildren.head, format = Some(newChildren.last))
+    } else {
+      copy(left = newChildren.head)
+    }
+  }
 }
 
 /**
@@ -2043,23 +2054,44 @@ case class ParseToTimestamp(
     left: Expression,
     format: Option[Expression],
     override val dataType: DataType,
-    child: Expression) extends RuntimeReplaceable {
+    timeZoneId: Option[String] = None)
+  extends RuntimeReplaceable with ImplicitCastInputTypes with TimeZoneAwareExpression {
+
+  override lazy val replacement: Expression = format.map { f =>
+    GetTimestamp(left, f, dataType, timeZoneId)
+  }.getOrElse(Cast(left, dataType, timeZoneId))
 
   def this(left: Expression, format: Expression) = {
-    this(left, Option(format), SQLConf.get.timestampType,
-      GetTimestamp(left, format, SQLConf.get.timestampType))
+    this(left, Option(format), SQLConf.get.timestampType)
   }
 
   def this(left: Expression) =
-    this(left, None, SQLConf.get.timestampType, Cast(left, SQLConf.get.timestampType))
+    this(left, None, SQLConf.get.timestampType)
 
-  override def flatArguments: Iterator[Any] = Iterator(left, format)
-  override def exprsReplaced: Seq[Expression] = left +: format.toSeq
+  override def nodeName: String = "to_timestamp"
 
-  override def prettyName: String = "to_timestamp"
+  override def nodePatternsInternal(): Seq[TreePattern] = Seq(RUNTIME_REPLACEABLE)
 
-  override protected def withNewChildInternal(newChild: Expression): ParseToTimestamp =
-    copy(child = newChild)
+  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
+    copy(timeZoneId = Some(timeZoneId))
+
+  override def children: Seq[Expression] = left +: format.toSeq
+
+  override def inputTypes: Seq[AbstractDataType] = {
+    // Note: ideally this function should only take string input, but we allow more types here to
+    // be backward compatible.
+    TypeCollection(StringType, DateType, TimestampType, TimestampNTZType) +:
+      format.map(_ => StringType).toSeq
+  }
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[Expression]): Expression = {
+    if (format.isDefined) {
+      copy(left = newChildren.head, format = Some(newChildren.last))
+    } else {
+      copy(left = newChildren.head)
+    }
+  }
 }
 
 trait TruncInstant extends BinaryExpression with ImplicitCastInputTypes {
@@ -2410,32 +2442,22 @@ case class MakeDate(
   group = "datetime_funcs",
   since = "3.3.0")
 // scalastyle:on line.size.limit
-case class MakeTimestampNTZ(
-    year: Expression,
-    month: Expression,
-    day: Expression,
-    hour: Expression,
-    min: Expression,
-    sec: Expression,
-    failOnError: Boolean = SQLConf.get.ansiEnabled,
-    child: Expression) extends RuntimeReplaceable {
-  def this(
-      year: Expression,
-      month: Expression,
-      day: Expression,
-      hour: Expression,
-      min: Expression,
-      sec: Expression) = {
-    this(year, month, day, hour, min, sec, failOnError = SQLConf.get.ansiEnabled,
-      MakeTimestamp(year, month, day, hour, min, sec, dataType = TimestampNTZType))
+object MakeTimestampNTZExpressionBuilder extends ExpressionBuilder {
+  override def build(funcName: String, expressions: Seq[Expression]): Expression = {
+    val numArgs = expressions.length
+    if (numArgs == 6) {
+      MakeTimestamp(
+        expressions(0),
+        expressions(1),
+        expressions(2),
+        expressions(3),
+        expressions(4),
+        expressions(5),
+        dataType = TimestampNTZType)
+    } else {
+      throw QueryCompilationErrors.invalidFunctionArgumentNumberError(Seq(6), funcName, numArgs)
+    }
   }
-
-  override def prettyName: String = "make_timestamp_ntz"
-
-  override def exprsReplaced: Seq[Expression] = Seq(year, month, day, hour, min, sec)
-
-  override protected def withNewChildInternal(newChild: Expression): Expression =
-    copy(child = newChild)
 }
 
 // scalastyle:off line.size.limit
@@ -2469,45 +2491,23 @@ case class MakeTimestampNTZ(
   group = "datetime_funcs",
   since = "3.3.0")
 // scalastyle:on line.size.limit
-case class MakeTimestampLTZ(
-    year: Expression,
-    month: Expression,
-    day: Expression,
-    hour: Expression,
-    min: Expression,
-    sec: Expression,
-    timezone: Option[Expression],
-    failOnError: Boolean = SQLConf.get.ansiEnabled,
-    child: Expression) extends RuntimeReplaceable {
-  def this(
-     year: Expression,
-     month: Expression,
-     day: Expression,
-     hour: Expression,
-     min: Expression,
-     sec: Expression) = {
-    this(year, month, day, hour, min, sec, None, failOnError = SQLConf.get.ansiEnabled,
-      MakeTimestamp(year, month, day, hour, min, sec, dataType = TimestampType))
-  }
-
-  def this(
-      year: Expression,
-      month: Expression,
-      day: Expression,
-      hour: Expression,
-      min: Expression,
-      sec: Expression,
-      timezone: Expression) = {
-    this(year, month, day, hour, min, sec, Some(timezone), failOnError = SQLConf.get.ansiEnabled,
-      MakeTimestamp(year, month, day, hour, min, sec, Some(timezone), dataType = TimestampType))
+object MakeTimestampLTZExpressionBuilder extends ExpressionBuilder {
+  override def build(funcName: String, expressions: Seq[Expression]): Expression = {
+    val numArgs = expressions.length
+    if (numArgs == 6 || numArgs == 7) {
+      MakeTimestamp(
+        expressions(0),
+        expressions(1),
+        expressions(2),
+        expressions(3),
+        expressions(4),
+        expressions(5),
+        expressions.drop(6).lastOption,
+        dataType = TimestampType)
+    } else {
+      throw QueryCompilationErrors.invalidFunctionArgumentNumberError(Seq(6), funcName, numArgs)
+    }
   }
-
-  override def prettyName: String = "make_timestamp_ltz"
-
-  override def exprsReplaced: Seq[Expression] = Seq(year, month, day, hour, min, sec)
-
-  override protected def withNewChildInternal(newChild: Expression): Expression =
-    copy(child = newChild)
 }
 
 // scalastyle:off line.size.limit
@@ -2699,7 +2699,7 @@ case class MakeTimestamp(
     })
   }
 
-  override def prettyName: String = "make_timestamp"
+  override def nodeName: String = "make_timestamp"
 
 //  override def children: Seq[Expression] = Seq(year, month, day, hour, min, sec) ++ timezone
   override protected def withNewChildrenInternal(
@@ -2720,8 +2720,7 @@ object DatePart {
 
   def parseExtractField(
       extractField: String,
-      source: Expression,
-      errorHandleFunc: => Nothing): Expression = extractField.toUpperCase(Locale.ROOT) match {
+      source: Expression): Expression = extractField.toUpperCase(Locale.ROOT) match {
     case "YEAR" | "Y" | "YEARS" | "YR" | "YRS" => Year(source)
     case "YEAROFWEEK" => YearOfWeek(source)
     case "QUARTER" | "QTR" => Quarter(source)
@@ -2734,29 +2733,8 @@ object DatePart {
     case "HOUR" | "H" | "HOURS" | "HR" | "HRS" => Hour(source)
     case "MINUTE" | "M" | "MIN" | "MINS" | "MINUTES" => Minute(source)
     case "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => SecondWithFraction(source)
-    case _ => errorHandleFunc
-  }
-
-  def toEquivalentExpr(field: Expression, source: Expression): Expression = {
-    if (!field.foldable) {
-      throw QueryCompilationErrors.unfoldableFieldUnsupportedError
-    }
-    val fieldEval = field.eval()
-    if (fieldEval == null) {
-      Literal(null, DoubleType)
-    } else {
-      val fieldStr = fieldEval.asInstanceOf[UTF8String].toString
-
-      def analysisException =
-        throw QueryCompilationErrors.literalTypeUnsupportedForSourceTypeError(fieldStr, source)
-
-      source.dataType match {
-        case _: AnsiIntervalType | CalendarIntervalType =>
-          ExtractIntervalPart.parseExtractField(fieldStr, source, analysisException)
-        case _ =>
-          DatePart.parseExtractField(fieldStr, source, analysisException)
-      }
-    }
+    case _ =>
+      throw QueryCompilationErrors.literalTypeUnsupportedForSourceTypeError(extractField, source)
   }
 }
 
@@ -2793,20 +2771,17 @@ object DatePart {
   group = "datetime_funcs",
   since = "3.0.0")
 // scalastyle:on line.size.limit
-case class DatePart(field: Expression, source: Expression, child: Expression)
-  extends RuntimeReplaceable {
-
-  def this(field: Expression, source: Expression) = {
-    this(field, source, DatePart.toEquivalentExpr(field, source))
+object DatePartExpressionBuilder extends ExpressionBuilder {
+  override def build(funcName: String, expressions: Seq[Expression]): Expression = {
+    val numArgs = expressions.length
+    if (numArgs == 2) {
+      val field = expressions(0)
+      val source = expressions(1)
+      Extract(field, source, Extract.createExpr(funcName, field, source))
+    } else {
+      throw QueryCompilationErrors.invalidFunctionArgumentNumberError(Seq(2), funcName, numArgs)
+    }
   }
-
-  override def flatArguments: Iterator[Any] = Iterator(field, source)
-  override def exprsReplaced: Seq[Expression] = Seq(field, source)
-
-  override def prettyName: String = "date_part"
-
-  override protected def withNewChildInternal(newChild: Expression): DatePart =
-    copy(child = newChild)
 }
 
 // scalastyle:off line.size.limit
@@ -2862,23 +2837,45 @@ case class DatePart(field: Expression, source: Expression, child: Expression)
   group = "datetime_funcs",
   since = "3.0.0")
 // scalastyle:on line.size.limit
-case class Extract(field: Expression, source: Expression, child: Expression)
-  extends RuntimeReplaceable {
+case class Extract(field: Expression, source: Expression, replacement: Expression)
+  extends RuntimeReplaceable with InheritAnalysisRules {
 
-  def this(field: Expression, source: Expression) = {
-    this(field, source, DatePart.toEquivalentExpr(field, source))
-  }
+  def this(field: Expression, source: Expression) =
+    this(field, source, Extract.createExpr("extract", field, source))
 
-  override def flatArguments: Iterator[Any] = Iterator(field, source)
+  override def parameters: Seq[Expression] = Seq(field, source)
 
-  override def exprsReplaced: Seq[Expression] = Seq(field, source)
+  override def makeSQLString(childrenSQL: Seq[String]): String = {
+    getTagValue(FunctionRegistry.FUNC_ALIAS) match {
+      case Some("date_part") => s"$prettyName(${childrenSQL.mkString(", ")})"
+      case _ => s"$prettyName(${childrenSQL.mkString(" FROM ")})"
+    }
+  }
 
-  override def mkString(childrenString: Seq[String]): String = {
-    prettyName + childrenString.mkString("(", " FROM ", ")")
+  override protected def withNewChildInternal(newChild: Expression): Expression = {
+    copy(replacement = newChild)
   }
+}
 
-  override protected def withNewChildInternal(newChild: Expression): Extract =
-    copy(child = newChild)
+object Extract {
+  def createExpr(funcName: String, field: Expression, source: Expression): Expression = {
+    // both string and null literals are allowed.
+    if ((field.dataType == StringType || field.dataType == NullType) && field.foldable) {
+      val fieldStr = field.eval().asInstanceOf[UTF8String]
+      if (fieldStr == null) {
+        Literal(null, DoubleType)
+      } else {
+        source.dataType match {
+          case _: AnsiIntervalType | CalendarIntervalType =>
+            ExtractIntervalPart.parseExtractField(fieldStr.toString, source)
+          case _ =>
+            DatePart.parseExtractField(fieldStr.toString, source)
+        }
+      }
+    } else {
+      throw QueryCompilationErrors.requireLiteralParameter(funcName, "field", "string")
+    }
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
index 5568d7c4a6cba..c461b8f51eedc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGe
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.MONTHS_PER_YEAR
 import org.apache.spark.sql.catalyst.util.IntervalUtils
 import org.apache.spark.sql.catalyst.util.IntervalUtils._
-import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.DayTimeIntervalType.{DAY, HOUR, MINUTE, SECOND}
@@ -122,10 +122,7 @@ case class ExtractANSIIntervalSeconds(child: Expression)
 
 object ExtractIntervalPart {
 
-  def parseExtractField(
-      extractField: String,
-      source: Expression,
-      errorHandleFunc: => Nothing): Expression = {
+  def parseExtractField(extractField: String, source: Expression): Expression = {
     (extractField.toUpperCase(Locale.ROOT), source.dataType) match {
       case ("YEAR" | "Y" | "YEARS" | "YR" | "YRS", YearMonthIntervalType(start, end))
         if isUnitInIntervalRange(YEAR, start, end) =>
@@ -157,7 +154,8 @@ object ExtractIntervalPart {
         ExtractANSIIntervalSeconds(source)
       case ("SECOND" | "S" | "SEC" | "SECONDS" | "SECS", CalendarIntervalType) =>
         ExtractIntervalSeconds(source)
-      case _ => errorHandleFunc
+      case _ =>
+        throw QueryCompilationErrors.literalTypeUnsupportedForSourceTypeError(extractField, source)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index d34b8379bb601..f64b6ea078a46 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -269,28 +269,32 @@ case class Ceil(child: Expression) extends UnaryMathExpression(math.ceil, "CEIL"
   override protected def withNewChildInternal(newChild: Expression): Ceil = copy(child = newChild)
 }
 
-trait CeilFloorExpressionBuilder extends ExpressionBuilder {
-  val functionName: String
-  def build(expressions: Seq[Expression]): Expression
-
-  def extractChildAndScaleParam(expressions: Seq[Expression]): (Expression, Expression) = {
-    val child = expressions(0)
-    val scale = expressions(1)
-    if (! (scale.foldable && scale.dataType == DataTypes.IntegerType)) {
-      throw QueryCompilationErrors.invalidScaleParameterRoundBase(functionName)
-    }
-    val scaleV = scale.eval(EmptyRow)
-    if (scaleV == null) {
-      throw QueryCompilationErrors.invalidScaleParameterRoundBase(functionName)
+trait CeilFloorExpressionBuilderBase extends ExpressionBuilder {
+  protected def buildWithOneParam(param: Expression): Expression
+  protected def buildWithTwoParams(param1: Expression, param2: Expression): Expression
+
+  override def build(funcName: String, expressions: Seq[Expression]): Expression = {
+    val numArgs = expressions.length
+    if (numArgs == 1) {
+      buildWithOneParam(expressions.head)
+    } else if (numArgs == 2) {
+      val scale = expressions(1)
+      if (!(scale.foldable && scale.dataType == IntegerType)) {
+        throw QueryCompilationErrors.requireLiteralParameter(funcName, "scale", "int")
+      }
+      if (scale.eval() == null) {
+        throw QueryCompilationErrors.requireLiteralParameter(funcName, "scale", "int")
+      }
+      buildWithTwoParams(expressions(0), scale)
+    } else {
+      throw QueryCompilationErrors.invalidFunctionArgumentNumberError(Seq(2), funcName, numArgs)
     }
-    (child, scale)
   }
 }
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = """
-  _FUNC_(expr[, scale]) - Returns the smallest number after rounding up that is not smaller
-  than `expr`. A optional `scale` parameter can be specified to control the rounding behavior.""",
+  usage = "_FUNC_(expr[, scale]) - Returns the smallest number after rounding up that is not smaller than `expr`. An optional `scale` parameter can be specified to control the rounding behavior.",
   examples = """
     Examples:
       > SELECT _FUNC_(-0.1);
@@ -304,24 +308,17 @@ trait CeilFloorExpressionBuilder extends ExpressionBuilder {
   """,
   since = "3.3.0",
   group = "math_funcs")
-object CeilExpressionBuilder extends CeilFloorExpressionBuilder {
-  val functionName: String = "ceil"
-
-  def build(expressions: Seq[Expression]): Expression = {
-    if (expressions.length == 1) {
-      Ceil(expressions.head)
-    } else if (expressions.length == 2) {
-      val (child, scale) = extractChildAndScaleParam(expressions)
-      RoundCeil(child, scale)
-    } else {
-      throw QueryCompilationErrors.invalidNumberOfFunctionParameters(functionName)
-    }
-  }
+// scalastyle:on line.size.limit
+object CeilExpressionBuilder extends CeilFloorExpressionBuilderBase {
+  override protected def buildWithOneParam(param: Expression): Expression = Ceil(param)
+
+  override protected def buildWithTwoParams(param1: Expression, param2: Expression): Expression =
+    RoundCeil(param1, param2)
 }
 
 case class RoundCeil(child: Expression, scale: Expression)
   extends RoundBase(child, scale, BigDecimal.RoundingMode.CEILING, "ROUND_CEILING")
-    with Serializable with ImplicitCastInputTypes {
+    with ImplicitCastInputTypes {
 
   override def inputTypes: Seq[AbstractDataType] = Seq(DecimalType, IntegerType)
 
@@ -335,9 +332,11 @@ case class RoundCeil(child: Expression, scale: Expression)
     case t => t
   }
 
-  override protected def withNewChildrenInternal(newLeft: Expression, newRight: Expression)
-  : RoundCeil = copy(child = newLeft, scale = newRight)
   override def nodeName: String = "ceil"
+
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): RoundCeil =
+    copy(child = newLeft, scale = newRight)
 }
 
 @ExpressionDescription(
@@ -539,10 +538,9 @@ case class Floor(child: Expression) extends UnaryMathExpression(math.floor, "FLO
   copy(child = newChild)
 }
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = """
-  _FUNC_(expr[, scale]) - Returns the largest number after rounding down that is not greater
-  than `expr`. An optional `scale` parameter can be specified to control the rounding behavior.""",
+  usage = " _FUNC_(expr[, scale]) - Returns the largest number after rounding down that is not greater than `expr`. An optional `scale` parameter can be specified to control the rounding behavior.",
   examples = """
     Examples:
       > SELECT _FUNC_(-0.1);
@@ -556,24 +554,17 @@ case class Floor(child: Expression) extends UnaryMathExpression(math.floor, "FLO
   """,
   since = "3.3.0",
   group = "math_funcs")
-object FloorExpressionBuilder extends CeilFloorExpressionBuilder {
-  val functionName: String = "floor"
-
-  def build(expressions: Seq[Expression]): Expression = {
-    if (expressions.length == 1) {
-      Floor(expressions.head)
-    } else if (expressions.length == 2) {
-      val(child, scale) = extractChildAndScaleParam(expressions)
-      RoundFloor(child, scale)
-    } else {
-      throw QueryCompilationErrors.invalidNumberOfFunctionParameters(functionName)
-    }
-  }
+// scalastyle:on line.size.limit
+object FloorExpressionBuilder extends CeilFloorExpressionBuilderBase {
+  override protected def buildWithOneParam(param: Expression): Expression = Floor(param)
+
+  override protected def buildWithTwoParams(param1: Expression, param2: Expression): Expression =
+    RoundFloor(param1, param2)
 }
 
 case class RoundFloor(child: Expression, scale: Expression)
   extends RoundBase(child, scale, BigDecimal.RoundingMode.FLOOR, "ROUND_FLOOR")
-    with Serializable with ImplicitCastInputTypes {
+    with ImplicitCastInputTypes {
 
   override def inputTypes: Seq[AbstractDataType] = Seq(DecimalType, IntegerType)
 
@@ -587,9 +578,11 @@ case class RoundFloor(child: Expression, scale: Expression)
     case t => t
   }
 
-  override protected def withNewChildrenInternal(newLeft: Expression, newRight: Expression)
-  : RoundFloor = copy(child = newLeft, scale = newRight)
   override def nodeName: String = "floor"
+
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): RoundFloor =
+    copy(child = newLeft, scale = newRight)
 }
 
 object Factorial {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 941ccb7088393..eb21bd555db7d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -126,8 +126,8 @@ object RaiseError {
   """,
   since = "2.0.0",
   group = "misc_funcs")
-case class AssertTrue(left: Expression, right: Expression, child: Expression)
-  extends RuntimeReplaceable {
+case class AssertTrue(left: Expression, right: Expression, replacement: Expression)
+  extends RuntimeReplaceable with InheritAnalysisRules {
 
   override def prettyName: String = "assert_true"
 
@@ -139,11 +139,10 @@ case class AssertTrue(left: Expression, right: Expression, child: Expression)
     this(left, Literal(s"'${left.simpleString(SQLConf.get.maxToStringFields)}' is not true!"))
   }
 
-  override def flatArguments: Iterator[Any] = Iterator(left, right)
-  override def exprsReplaced: Seq[Expression] = Seq(left, right)
+  override def parameters: Seq[Expression] = Seq(left, right)
 
   override protected def withNewChildInternal(newChild: Expression): AssertTrue =
-    copy(child = newChild)
+    copy(replacement = newChild)
 }
 
 object AssertTrue {
@@ -341,31 +340,31 @@ case class AesEncrypt(
     input: Expression,
     key: Expression,
     mode: Expression,
-    padding: Expression,
-    child: Expression)
-  extends RuntimeReplaceable {
-
-  def this(input: Expression, key: Expression, mode: Expression, padding: Expression) = {
-    this(
-      input,
-      key,
-      mode,
-      padding,
-      StaticInvoke(
-        classOf[ExpressionImplUtils],
-        BinaryType,
-        "aesEncrypt",
-        Seq(input, key, mode, padding),
-        Seq(BinaryType, BinaryType, StringType, StringType)))
-  }
+    padding: Expression)
+  extends RuntimeReplaceable with ImplicitCastInputTypes {
+
+  override lazy val replacement: Expression = StaticInvoke(
+    classOf[ExpressionImplUtils],
+    BinaryType,
+    "aesEncrypt",
+    Seq(input, key, mode, padding),
+    inputTypes)
+
   def this(input: Expression, key: Expression, mode: Expression) =
     this(input, key, mode, Literal("DEFAULT"))
   def this(input: Expression, key: Expression) =
     this(input, key, Literal("GCM"))
 
-  def exprsReplaced: Seq[Expression] = Seq(input, key, mode, padding)
-  protected def withNewChildInternal(newChild: Expression): AesEncrypt =
-    copy(child = newChild)
+  override def prettyName: String = "aes_encrypt"
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType, BinaryType, StringType, StringType)
+
+  override def children: Seq[Expression] = Seq(input, key, mode, padding)
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[Expression]): Expression = {
+    copy(newChildren(0), newChildren(1), newChildren(2), newChildren(3))
+  }
 }
 
 /**
@@ -405,30 +404,32 @@ case class AesDecrypt(
     input: Expression,
     key: Expression,
     mode: Expression,
-    padding: Expression,
-    child: Expression)
-  extends RuntimeReplaceable {
-
-  def this(input: Expression, key: Expression, mode: Expression, padding: Expression) = {
-    this(
-      input,
-      key,
-      mode,
-      padding,
-      StaticInvoke(
-        classOf[ExpressionImplUtils],
-        BinaryType,
-        "aesDecrypt",
-        Seq(input, key, mode, padding),
-        Seq(BinaryType, BinaryType, StringType, StringType)))
-  }
+    padding: Expression)
+  extends RuntimeReplaceable with ImplicitCastInputTypes {
+
+  override lazy val replacement: Expression = StaticInvoke(
+    classOf[ExpressionImplUtils],
+    BinaryType,
+    "aesDecrypt",
+    Seq(input, key, mode, padding),
+    inputTypes)
+
   def this(input: Expression, key: Expression, mode: Expression) =
     this(input, key, mode, Literal("DEFAULT"))
   def this(input: Expression, key: Expression) =
     this(input, key, Literal("GCM"))
 
-  def exprsReplaced: Seq[Expression] = Seq(input, key)
-  protected def withNewChildInternal(newChild: Expression): AesDecrypt =
-    copy(child = newChild)
+  override def inputTypes: Seq[AbstractDataType] = {
+    Seq(BinaryType, BinaryType, StringType, StringType)
+  }
+
+  override def prettyName: String = "aes_decrypt"
+
+  override def children: Seq[Expression] = Seq(input, key, mode, padding)
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[Expression]): Expression = {
+    copy(newChildren(0), newChildren(1), newChildren(2), newChildren(3))
+  }
 }
 // scalastyle:on line.size.limit
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
index a15126a3347a3..3c6a9b8e78041 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
@@ -129,29 +129,6 @@ case class Coalesce(children: Seq[Expression]) extends ComplexTypeMergingExpress
 }
 
 
-@ExpressionDescription(
-  usage = "_FUNC_(expr1, expr2) - Returns `expr2` if `expr1` is null, or `expr1` otherwise.",
-  examples = """
-    Examples:
-      > SELECT _FUNC_(NULL, array('2'));
-       ["2"]
-  """,
-  since = "2.0.0",
-  group = "conditional_funcs")
-case class IfNull(left: Expression, right: Expression, child: Expression)
-  extends RuntimeReplaceable {
-
-  def this(left: Expression, right: Expression) = {
-    this(left, right, Coalesce(Seq(left, right)))
-  }
-
-  override def flatArguments: Iterator[Any] = Iterator(left, right)
-  override def exprsReplaced: Seq[Expression] = Seq(left, right)
-
-  override protected def withNewChildInternal(newChild: Expression): IfNull = copy(child = newChild)
-}
-
-
 @ExpressionDescription(
   usage = "_FUNC_(expr1, expr2) - Returns null if `expr1` equals to `expr2`, or `expr1` otherwise.",
   examples = """
@@ -161,17 +138,18 @@ case class IfNull(left: Expression, right: Expression, child: Expression)
   """,
   since = "2.0.0",
   group = "conditional_funcs")
-case class NullIf(left: Expression, right: Expression, child: Expression)
-  extends RuntimeReplaceable {
+case class NullIf(left: Expression, right: Expression, replacement: Expression)
+  extends RuntimeReplaceable with InheritAnalysisRules {
 
   def this(left: Expression, right: Expression) = {
     this(left, right, If(EqualTo(left, right), Literal.create(null, left.dataType), left))
   }
 
-  override def flatArguments: Iterator[Any] = Iterator(left, right)
-  override def exprsReplaced: Seq[Expression] = Seq(left, right)
+  override def parameters: Seq[Expression] = Seq(left, right)
 
-  override protected def withNewChildInternal(newChild: Expression): NullIf = copy(child = newChild)
+  override protected def withNewChildInternal(newChild: Expression): NullIf = {
+    copy(replacement = newChild)
+  }
 }
 
 
@@ -184,16 +162,17 @@ case class NullIf(left: Expression, right: Expression, child: Expression)
   """,
   since = "2.0.0",
   group = "conditional_funcs")
-case class Nvl(left: Expression, right: Expression, child: Expression) extends RuntimeReplaceable {
+case class Nvl(left: Expression, right: Expression, replacement: Expression)
+  extends RuntimeReplaceable with InheritAnalysisRules {
 
   def this(left: Expression, right: Expression) = {
     this(left, right, Coalesce(Seq(left, right)))
   }
 
-  override def flatArguments: Iterator[Any] = Iterator(left, right)
-  override def exprsReplaced: Seq[Expression] = Seq(left, right)
+  override def parameters: Seq[Expression] = Seq(left, right)
 
-  override protected def withNewChildInternal(newChild: Expression): Nvl = copy(child = newChild)
+  override protected def withNewChildInternal(newChild: Expression): Nvl =
+    copy(replacement = newChild)
 }
 
 
@@ -208,17 +187,18 @@ case class Nvl(left: Expression, right: Expression, child: Expression) extends R
   since = "2.0.0",
   group = "conditional_funcs")
 // scalastyle:on line.size.limit
-case class Nvl2(expr1: Expression, expr2: Expression, expr3: Expression, child: Expression)
-  extends RuntimeReplaceable {
+case class Nvl2(expr1: Expression, expr2: Expression, expr3: Expression, replacement: Expression)
+  extends RuntimeReplaceable with InheritAnalysisRules {
 
   def this(expr1: Expression, expr2: Expression, expr3: Expression) = {
     this(expr1, expr2, expr3, If(IsNotNull(expr1), expr2, expr3))
   }
 
-  override def flatArguments: Iterator[Any] = Iterator(expr1, expr2, expr3)
-  override def exprsReplaced: Seq[Expression] = Seq(expr1, expr2, expr3)
+  override def parameters: Seq[Expression] = Seq(expr1, expr2, expr3)
 
-  override protected def withNewChildInternal(newChild: Expression): Nvl2 = copy(child = newChild)
+  override protected def withNewChildInternal(newChild: Expression): Nvl2 = {
+    copy(replacement = newChild)
+  }
 }
 
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 889c53bc548bb..368cbfd6be641 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
+import org.apache.spark.sql.catalyst.trees.BinaryLike
 import org.apache.spark.sql.catalyst.trees.TreePattern.{LIKE_FAMLIY, TreePattern}
 import org.apache.spark.sql.catalyst.util.{GenericArrayData, StringUtils}
 import org.apache.spark.sql.errors.QueryExecutionErrors
@@ -240,18 +241,20 @@ case class Like(left: Expression, right: Expression, escapeChar: Char)
 case class ILike(
     left: Expression,
     right: Expression,
-    escapeChar: Char,
-    child: Expression) extends RuntimeReplaceable {
-  def this(left: Expression, right: Expression, escapeChar: Char) =
-    this(left, right, escapeChar, Like(Lower(left), Lower(right), escapeChar))
+    escapeChar: Char) extends RuntimeReplaceable
+  with ImplicitCastInputTypes with BinaryLike[Expression] {
+
+  override lazy val replacement: Expression = Like(Lower(left), Lower(right), escapeChar)
+
   def this(left: Expression, right: Expression) =
     this(left, right, '\\')
 
-  override def exprsReplaced: Seq[Expression] = Seq(left, right)
-  override def flatArguments: Iterator[Any] = Iterator(left, right, escapeChar)
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType)
 
-  override protected def withNewChildInternal(newChild: Expression): ILike =
-    copy(child = newChild)
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): Expression = {
+    copy(left = newLeft, right = newRight)
+  }
 }
 
 sealed abstract class MultiLikeBase
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 56cd224dd8c53..021ddbe9ade01 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, FunctionRegist
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
+import org.apache.spark.sql.catalyst.trees.BinaryLike
 import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, UPPER_OR_LOWER}
 import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, TypeUtils}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
@@ -1047,8 +1048,8 @@ case class StringTrim(srcStr: Expression, trimStr: Option[Expression] = None)
   """,
   since = "3.2.0",
   group = "string_funcs")
-case class StringTrimBoth(srcStr: Expression, trimStr: Option[Expression], child: Expression)
-  extends RuntimeReplaceable {
+case class StringTrimBoth(srcStr: Expression, trimStr: Option[Expression], replacement: Expression)
+  extends RuntimeReplaceable with InheritAnalysisRules {
 
   def this(srcStr: Expression, trimStr: Expression) = {
     this(srcStr, Option(trimStr), StringTrim(srcStr, trimStr))
@@ -1058,13 +1059,12 @@ case class StringTrimBoth(srcStr: Expression, trimStr: Option[Expression], child
     this(srcStr, None, StringTrim(srcStr))
   }
 
-  override def exprsReplaced: Seq[Expression] = srcStr +: trimStr.toSeq
-  override def flatArguments: Iterator[Any] = Iterator(srcStr, trimStr)
-
   override def prettyName: String = "btrim"
 
+  override def parameters: Seq[Expression] = srcStr +: trimStr.toSeq
+
   override protected def withNewChildInternal(newChild: Expression): StringTrimBoth =
-    copy(child = newChild)
+    copy(replacement = newChild)
 }
 
 object StringTrimLeft {
@@ -1376,17 +1376,17 @@ case class StringLocate(substr: Expression, str: Expression, start: Expression)
 }
 
 trait PadExpressionBuilderBase extends ExpressionBuilder {
-  override def build(expressions: Seq[Expression]): Expression = {
+  override def build(funcName: String, expressions: Seq[Expression]): Expression = {
     val numArgs = expressions.length
     if (numArgs == 2) {
       if (expressions(0).dataType == BinaryType) {
-        createBinaryPad(expressions(0), expressions(1), Literal(Array[Byte](0)))
+        BinaryPad(funcName, expressions(0), expressions(1), Literal(Array[Byte](0)))
       } else {
         createStringPad(expressions(0), expressions(1), Literal(" "))
       }
     } else if (numArgs == 3) {
       if (expressions(0).dataType == BinaryType && expressions(2).dataType == BinaryType) {
-        createBinaryPad(expressions(0), expressions(1), expressions(2))
+        BinaryPad(funcName, expressions(0), expressions(1), expressions(2))
       } else {
         createStringPad(expressions(0), expressions(1), expressions(2))
       }
@@ -1395,8 +1395,6 @@ trait PadExpressionBuilderBase extends ExpressionBuilder {
     }
   }
 
-  protected def funcName: String
-  protected def createBinaryPad(str: Expression, len: Expression, pad: Expression): Expression
   protected def createStringPad(str: Expression, len: Expression, pad: Expression): Expression
 }
 
@@ -1423,10 +1421,6 @@ trait PadExpressionBuilderBase extends ExpressionBuilder {
   since = "1.5.0",
   group = "string_funcs")
 object LPadExpressionBuilder extends PadExpressionBuilderBase {
-  override def funcName: String = "lpad"
-  override def createBinaryPad(str: Expression, len: Expression, pad: Expression): Expression = {
-    new BinaryLPad(str, len, pad)
-  }
   override def createStringPad(str: Expression, len: Expression, pad: Expression): Expression = {
     StringLPad(str, len, pad)
   }
@@ -1459,21 +1453,28 @@ case class StringLPad(str: Expression, len: Expression, pad: Expression)
     copy(str = newFirst, len = newSecond, pad = newThird)
 }
 
-case class BinaryLPad(str: Expression, len: Expression, pad: Expression, child: Expression)
-  extends RuntimeReplaceable {
+case class BinaryPad(funcName: String, str: Expression, len: Expression, pad: Expression)
+  extends RuntimeReplaceable with ImplicitCastInputTypes {
+  assert(funcName == "lpad" || funcName == "rpad")
 
-  def this(str: Expression, len: Expression, pad: Expression) = this(str, len, pad, StaticInvoke(
+  override lazy val replacement: Expression = StaticInvoke(
     classOf[ByteArray],
     BinaryType,
-    "lpad",
+    funcName,
     Seq(str, len, pad),
-    Seq(BinaryType, IntegerType, BinaryType),
+    inputTypes,
     returnNullable = false)
-  )
 
-  override def prettyName: String = "lpad"
-  def exprsReplaced: Seq[Expression] = Seq(str, len, pad)
-  protected def withNewChildInternal(newChild: Expression): BinaryLPad = copy(child = newChild)
+  override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType, IntegerType, BinaryType)
+
+  override def nodeName: String = funcName
+
+  override def children: Seq[Expression] = Seq(str, len, pad)
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[Expression]): Expression = {
+    copy(str = newChildren(0), len = newChildren(1), pad = newChildren(2))
+  }
 }
 
 @ExpressionDescription(
@@ -1499,10 +1500,6 @@ case class BinaryLPad(str: Expression, len: Expression, pad: Expression, child:
   since = "1.5.0",
   group = "string_funcs")
 object RPadExpressionBuilder extends PadExpressionBuilderBase {
-  override def funcName: String = "rpad"
-  override def createBinaryPad(str: Expression, len: Expression, pad: Expression): Expression = {
-    new BinaryRPad(str, len, pad)
-  }
   override def createStringPad(str: Expression, len: Expression, pad: Expression): Expression = {
     StringRPad(str, len, pad)
   }
@@ -1535,23 +1532,6 @@ case class StringRPad(str: Expression, len: Expression, pad: Expression = Litera
     copy(str = newFirst, len = newSecond, pad = newThird)
 }
 
-case class BinaryRPad(str: Expression, len: Expression, pad: Expression, child: Expression)
-  extends RuntimeReplaceable {
-
-  def this(str: Expression, len: Expression, pad: Expression) = this(str, len, pad, StaticInvoke(
-    classOf[ByteArray],
-    BinaryType,
-    "rpad",
-    Seq(str, len, pad),
-    Seq(BinaryType, IntegerType, BinaryType),
-    returnNullable = false)
-  )
-
-  override def prettyName: String = "rpad"
-  def exprsReplaced: Seq[Expression] = Seq(str, len, pad)
-  protected def withNewChildInternal(newChild: Expression): BinaryRPad = copy(child = newChild)
-}
-
 object ParseUrl {
   private val HOST = UTF8String.fromString("HOST")
   private val PATH = UTF8String.fromString("PATH")
@@ -2025,16 +2005,26 @@ case class Substring(str: Expression, pos: Expression, len: Expression)
   since = "2.3.0",
   group = "string_funcs")
 // scalastyle:on line.size.limit
-case class Right(str: Expression, len: Expression, child: Expression) extends RuntimeReplaceable {
-  def this(str: Expression, len: Expression) = {
-    this(str, len, If(IsNull(str), Literal(null, StringType), If(LessThanOrEqual(len, Literal(0)),
-      Literal(UTF8String.EMPTY_UTF8, StringType), new Substring(str, UnaryMinus(len)))))
-  }
-
-  override def flatArguments: Iterator[Any] = Iterator(str, len)
-  override def exprsReplaced: Seq[Expression] = Seq(str, len)
+case class Right(str: Expression, len: Expression) extends RuntimeReplaceable
+  with ImplicitCastInputTypes with BinaryLike[Expression] {
+
+  override lazy val replacement: Expression = If(
+    IsNull(str),
+    Literal(null, StringType),
+    If(
+      LessThanOrEqual(len, Literal(0)),
+      Literal(UTF8String.EMPTY_UTF8, StringType),
+      new Substring(str, UnaryMinus(len))
+    )
+  )
 
-  override protected def withNewChildInternal(newChild: Expression): Right = copy(child = newChild)
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, IntegerType)
+  override def left: Expression = str
+  override def right: Expression = len
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): Expression = {
+    copy(str = newLeft, len = newRight)
+  }
 }
 
 /**
@@ -2051,14 +2041,21 @@ case class Right(str: Expression, len: Expression, child: Expression) extends Ru
   since = "2.3.0",
   group = "string_funcs")
 // scalastyle:on line.size.limit
-case class Left(str: Expression, len: Expression, child: Expression) extends RuntimeReplaceable {
-  def this(str: Expression, len: Expression) = {
-    this(str, len, Substring(str, Literal(1), len))
+case class Left(str: Expression, len: Expression) extends RuntimeReplaceable
+  with ImplicitCastInputTypes with BinaryLike[Expression] {
+
+  override lazy val replacement: Expression = Substring(str, Literal(1), len)
+
+  override def inputTypes: Seq[AbstractDataType] = {
+    Seq(TypeCollection(StringType, BinaryType), IntegerType)
   }
 
-  override def flatArguments: Iterator[Any] = Iterator(str, len)
-  override def exprsReplaced: Seq[Expression] = Seq(str, len)
-  override protected def withNewChildInternal(newChild: Expression): Left = copy(child = newChild)
+  override def left: Expression = str
+  override def right: Expression = len
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): Expression = {
+    copy(str = newLeft, len = newRight)
+  }
 }
 
 /**
@@ -2438,16 +2435,16 @@ object Decode {
   since = "3.2.0",
   group = "string_funcs")
 // scalastyle:on line.size.limit
-case class Decode(params: Seq[Expression], child: Expression) extends RuntimeReplaceable {
+case class Decode(params: Seq[Expression], replacement: Expression)
+  extends RuntimeReplaceable with InheritAnalysisRules {
 
-  def this(params: Seq[Expression]) = {
-    this(params, Decode.createExpr(params))
-  }
+  def this(params: Seq[Expression]) = this(params, Decode.createExpr(params))
 
-  override def flatArguments: Iterator[Any] = Iterator(params)
-  override def exprsReplaced: Seq[Expression] = params
+  override def parameters: Seq[Expression] = params
 
-  override protected def withNewChildInternal(newChild: Expression): Decode = copy(child = newChild)
+  override protected def withNewChildInternal(newChild: Expression): Expression = {
+    copy(replacement = newChild)
+  }
 }
 
 /**
@@ -2557,56 +2554,52 @@ case class Encode(value: Expression, charset: Expression)
   since = "3.3.0",
   group = "string_funcs")
 // scalastyle:on line.size.limit
-case class ToBinary(expr: Expression, format: Option[Expression], child: Expression)
-  extends RuntimeReplaceable {
-
-  def this(expr: Expression, format: Expression) = this(expr, Option(format),
-    format match {
-      case lit if (lit.foldable && Seq(StringType, NullType).contains(lit.dataType)) =>
-        val value = lit.eval()
-        if (value == null) Literal(null, BinaryType)
-        else {
-          value.asInstanceOf[UTF8String].toString.toLowerCase(Locale.ROOT) match {
-            case "hex" => Unhex(expr)
-            case "utf-8" => Encode(expr, Literal("UTF-8"))
-            case "base64" => UnBase64(expr)
-            case _ => lit
-          }
-        }
-
-      case other => other
+case class ToBinary(expr: Expression, format: Option[Expression]) extends RuntimeReplaceable
+  with ImplicitCastInputTypes {
+
+  override lazy val replacement: Expression = format.map { f =>
+    assert(f.foldable && (f.dataType == StringType || f.dataType == NullType))
+    val value = f.eval()
+    if (value == null) {
+      Literal(null, BinaryType)
+    } else {
+      value.asInstanceOf[UTF8String].toString.toLowerCase(Locale.ROOT) match {
+        case "hex" => Unhex(expr)
+        case "utf-8" => Encode(expr, Literal("UTF-8"))
+        case "base64" => UnBase64(expr)
+        case other => throw QueryCompilationErrors.invalidStringLiteralParameter(
+          "to_binary", "format", other,
+          Some("The value has to be a case-insensitive string literal of " +
+            "'hex', 'utf-8', or 'base64'."))
+      }
     }
-  )
+  }.getOrElse(Unhex(expr))
 
-  def this(expr: Expression) = this(expr, None, Unhex(expr))
+  def this(expr: Expression) = this(expr, None)
 
-  override def flatArguments: Iterator[Any] = Iterator(expr, format)
-  override def exprsReplaced: Seq[Expression] = expr +: format.toSeq
+  def this(expr: Expression, format: Expression) = this(expr, Some({
+    // We perform this check in the constructor to make it eager and not go through type coercion.
+    if (format.foldable && (format.dataType == StringType || format.dataType == NullType)) {
+      format
+    } else {
+      throw QueryCompilationErrors.requireLiteralParameter("to_binary", "format", "string")
+    }
+  }))
 
   override def prettyName: String = "to_binary"
-  override def dataType: DataType = BinaryType
 
-  override def checkInputDataTypes(): TypeCheckResult = {
-    def checkFormat(lit: Expression) = {
-      if (lit.foldable && Seq(StringType, NullType).contains(lit.dataType)) {
-        val value = lit.eval()
-        value == null ||
-          Seq("hex", "utf-8", "base64").contains(
-            value.asInstanceOf[UTF8String].toString.toLowerCase(Locale.ROOT))
-      } else false
-    }
+  override def children: Seq[Expression] = expr +: format.toSeq
+
+  override def inputTypes: Seq[AbstractDataType] = children.map(_ => StringType)
 
-    if (format.forall(checkFormat)) {
-      super.checkInputDataTypes()
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[Expression]): Expression = {
+    if (format.isDefined) {
+      copy(expr = newChildren.head, format = Some(newChildren.last))
     } else {
-      TypeCheckResult.TypeCheckFailure(
-        s"Unsupported encoding format: $format. The format has to be " +
-          s"a case-insensitive string literal of 'hex', 'utf-8', or 'base64'")
+      copy(expr = newChildren.head)
     }
   }
-
-  override protected def withNewChildInternal(newChild: Expression): ToBinary =
-    copy(child = newChild)
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index 645ff6bdee975..7b896e2c9607c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -21,7 +21,6 @@ import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.CurrentUserContext.CURRENT_USER
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.trees.TreePattern._
@@ -32,26 +31,18 @@ import org.apache.spark.util.Utils
 
 
 /**
- * Finds all the expressions that are unevaluable and replace/rewrite them with semantically
- * equivalent expressions that can be evaluated. Currently we replace two kinds of expressions:
- * 1) [[RuntimeReplaceable]] expressions
- * 2) [[UnevaluableAggregate]] expressions such as Every, Some, Any, CountIf
+ * Finds all the [[RuntimeReplaceable]] expressions that are unevaluable and replace them
+ * with semantically equivalent expressions that can be evaluated.
+ *
  * This is mainly used to provide compatibility with other databases.
  * Few examples are:
- *   we use this to support "nvl" by replacing it with "coalesce".
+ *   we use this to support "left" by replacing it with "substring".
  *   we use this to replace Every and Any with Min and Max respectively.
- *
- * TODO: In future, explore an option to replace aggregate functions similar to
- * how RuntimeReplaceable does.
  */
 object ReplaceExpressions extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressionsWithPruning(
-    _.containsAnyPattern(RUNTIME_REPLACEABLE, COUNT_IF, BOOL_AGG, REGR_COUNT)) {
-    case e: RuntimeReplaceable => e.child
-    case CountIf(predicate) => Count(new NullIf(predicate, Literal.FalseLiteral))
-    case BoolOr(arg) => Max(arg)
-    case BoolAnd(arg) => Min(arg)
-    case RegrCount(left, right) => Count(Seq(left, right))
+    _.containsAnyPattern(RUNTIME_REPLACEABLE)) {
+    case e: RuntimeReplaceable => e.replacement
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 08f2cb92b93e0..257df58b00e01 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1670,7 +1670,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
               str.charAt(0)
             }.getOrElse('\\')
             val likeExpr = ctx.kind.getType match {
-              case SqlBaseParser.ILIKE => new ILike(e, expression(ctx.pattern), escapeChar)
+              case SqlBaseParser.ILIKE => ILike(e, expression(ctx.pattern), escapeChar)
               case _ => Like(e, expression(ctx.pattern), escapeChar)
             }
             invertIfNotDefined(likeExpr)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
index 8db2f55e0ce63..b595966bcc235 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
@@ -33,13 +33,11 @@ object TreePattern extends Enumeration  {
   val GROUPING_ANALYTICS: Value = Value
   val BINARY_ARITHMETIC: Value = Value
   val BINARY_COMPARISON: Value = Value
-  val BOOL_AGG: Value = Value
   val CASE_WHEN: Value = Value
   val CAST: Value = Value
   val COALESCE: Value = Value
   val CONCAT: Value = Value
   val COUNT: Value = Value
-  val COUNT_IF: Value = Value
   val CREATE_NAMED_STRUCT: Value = Value
   val CURRENT_LIKE: Value = Value
   val DESERIALIZE_TO_OBJECT: Value = Value
@@ -74,7 +72,6 @@ object TreePattern extends Enumeration  {
   val PIVOT: Value = Value
   val PLAN_EXPRESSION: Value = Value
   val PYTHON_UDF: Value = Value
-  val REGR_COUNT: Value = Value
   val RUNTIME_REPLACEABLE: Value = Value
   val SCALAR_SUBQUERY: Value = Value
   val SCALA_UDF: Value = Value
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
index e26f397bb0b52..ed9dc0332aa0e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
@@ -135,8 +135,8 @@ package object util extends Logging {
       PrettyAttribute(usePrettyExpression(e.child).sql + "." + name, e.dataType)
     case e: GetArrayStructFields =>
       PrettyAttribute(usePrettyExpression(e.child) + "." + e.field.name, e.dataType)
-    case r: RuntimeReplaceable =>
-      PrettyAttribute(r.mkString(r.exprsReplaced.map(toPrettySQL)), r.dataType)
+    case r: InheritAnalysisRules =>
+      PrettyAttribute(r.makeSQLString(r.parameters.map(toPrettySQL)), r.dataType)
     case c: CastBase if !c.getTagValue(Cast.USER_SPECIFIED_CAST).getOrElse(false) =>
       PrettyAttribute(usePrettyExpression(c.child).sql, c.dataType)
     case p: PythonUDF => PrettyPythonUDF(p.name, p.dataType, p.children)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 28be81d6ae439..880c28d904195 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -721,8 +721,20 @@ object QueryCompilationErrors {
       s"Acceptable modes are ${PermissiveMode.name} and ${FailFastMode.name}.")
   }
 
-  def unfoldableFieldUnsupportedError(): Throwable = {
-    new AnalysisException("The field parameter needs to be a foldable string value.")
+  def requireLiteralParameter(
+      funcName: String, argName: String, requiredType: String): Throwable = {
+    new AnalysisException(
+      s"The '$argName' parameter of function '$funcName' needs to be a $requiredType literal.")
+  }
+
+  def invalidStringLiteralParameter(
+      funcName: String,
+      argName: String,
+      invalidValue: String,
+      allowedValues: Option[String] = None): Throwable = {
+    val endingMsg = allowedValues.map(" " + _).getOrElse("")
+    new AnalysisException(s"Invalid value for the '$argName' parameter of function '$funcName': " +
+      s"$invalidValue.$endingMsg")
   }
 
   def literalTypeUnsupportedForSourceTypeError(field: String, source: Expression): Throwable = {
@@ -2375,12 +2387,4 @@ object QueryCompilationErrors {
     new AnalysisException(
       "Sinks cannot request distribution and ordering in continuous execution mode")
   }
-
-  def invalidScaleParameterRoundBase(function: String): Throwable = {
-    new AnalysisException(s"The 'scale' parameter of function '$function' must be an int constant.")
-  }
-
-  def invalidNumberOfFunctionParameters(function: String): Throwable = {
-    new AnalysisException(s"Invalid number of parameters to the function '$function'.")
-  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index d1db0177dfd23..fcf9a6b5171cc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -42,7 +42,7 @@ import org.apache.spark.sql.catalyst.ScalaReflection.Schema
 import org.apache.spark.sql.catalyst.WalkedTypePath
 import org.apache.spark.sql.catalyst.analysis.UnresolvedGenerator
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogTable}
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, UnevaluableAggregate}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.JoinType
 import org.apache.spark.sql.catalyst.plans.logical.{DomainJoin, LogicalPlan}
@@ -126,12 +126,6 @@ object QueryExecutionErrors {
       messageParameters = Array.empty)
   }
 
-  def evaluateUnevaluableAggregateUnsupportedError(
-      methodName: String, unEvaluable: UnevaluableAggregate): Throwable = {
-    new SparkUnsupportedOperationException(errorClass = "INTERNAL_ERROR",
-      messageParameters = Array(s"Cannot evaluate expression: $methodName: $unEvaluable"))
-  }
-
   def dataTypeUnsupportedError(dataType: String, failure: String): Throwable = {
     new SparkIllegalArgumentException(errorClass = "UNSUPPORTED_DATATYPE",
       messageParameters = Array(dataType + failure))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 31d7da354460f..84603ee23a8b6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -1481,8 +1481,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("Consistent error handling for datetime formatting and parsing functions") {
 
     def checkException[T <: Exception : ClassTag](c: String): Unit = {
-      checkExceptionInExpression[T](new ParseToTimestamp(Literal("1"), Literal(c)).child, c)
-      checkExceptionInExpression[T](new ParseToDate(Literal("1"), Literal(c)).child, c)
+      checkExceptionInExpression[T](new ParseToTimestamp(Literal("1"), Literal(c)).replacement, c)
+      checkExceptionInExpression[T](new ParseToDate(Literal("1"), Literal(c)).replacement, c)
       checkExceptionInExpression[T](ToUnixTimestamp(Literal("1"), Literal(c)), c)
       checkExceptionInExpression[T](UnixTimestamp(Literal("1"), Literal(c)), c)
       if (!Set("E", "F", "q", "Q").contains(c)) {
@@ -1502,10 +1502,10 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("SPARK-31896: Handle am-pm timestamp parsing when hour is missing") {
     checkEvaluation(
-      new ParseToTimestamp(Literal("PM"), Literal("a")).child,
+      new ParseToTimestamp(Literal("PM"), Literal("a")).replacement,
       Timestamp.valueOf("1970-01-01 12:00:00.0"))
     checkEvaluation(
-      new ParseToTimestamp(Literal("11:11 PM"), Literal("mm:ss a")).child,
+      new ParseToTimestamp(Literal("11:11 PM"), Literal("mm:ss a")).replacement,
       Timestamp.valueOf("1970-01-01 12:11:11.0"))
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index ea410a67ed279..58e855e2314ce 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2772,7 +2772,7 @@ object functions {
    * @since 3.3.0
    */
   def lpad(str: Column, len: Int, pad: Array[Byte]): Column = withExpr {
-    new BinaryLPad(str.expr, lit(len).expr, lit(pad).expr)
+    BinaryPad("lpad", str.expr, lit(len).expr, lit(pad).expr)
   }
 
   /**
@@ -2861,7 +2861,7 @@ object functions {
    * @since 3.3.0
    */
   def rpad(str: Column, len: Int, pad: Array[Byte]): Column = withExpr {
-    new BinaryRPad(str.expr, lit(len).expr, lit(pad).expr)
+    BinaryPad("rpad", str.expr, lit(len).expr, lit(pad).expr)
   }
 
   /**
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 8b1a12f9d9c63..a817440fcfe9b 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -11,8 +11,8 @@
 | org.apache.spark.sql.catalyst.expressions.Acosh | acosh | SELECT acosh(1) | struct<ACOSH(1):double> |
 | org.apache.spark.sql.catalyst.expressions.Add | + | SELECT 1 + 2 | struct<(1 + 2):int> |
 | org.apache.spark.sql.catalyst.expressions.AddMonths | add_months | SELECT add_months('2016-08-31', 1) | struct<add_months(2016-08-31, 1):date> |
-| org.apache.spark.sql.catalyst.expressions.AesDecrypt | aes_decrypt | SELECT aes_decrypt(unhex('83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94'), '0000111122223333') | struct<aesdecrypt(unhex(83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94), 0000111122223333):binary> |
-| org.apache.spark.sql.catalyst.expressions.AesEncrypt | aes_encrypt | SELECT hex(aes_encrypt('Spark', '0000111122223333')) | struct<hex(aesencrypt(Spark, 0000111122223333, GCM, DEFAULT)):string> |
+| org.apache.spark.sql.catalyst.expressions.AesDecrypt | aes_decrypt | SELECT aes_decrypt(unhex('83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94'), '0000111122223333') | struct<aes_decrypt(unhex(83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94), 0000111122223333, GCM, DEFAULT):binary> |
+| org.apache.spark.sql.catalyst.expressions.AesEncrypt | aes_encrypt | SELECT hex(aes_encrypt('Spark', '0000111122223333')) | struct<hex(aes_encrypt(Spark, 0000111122223333, GCM, DEFAULT)):string> |
 | org.apache.spark.sql.catalyst.expressions.And | and | SELECT true and true | struct<(true AND true):boolean> |
 | org.apache.spark.sql.catalyst.expressions.ArrayAggregate | aggregate | SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) | struct<aggregate(array(1, 2, 3), 0, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable())):int> |
 | org.apache.spark.sql.catalyst.expressions.ArrayContains | array_contains | SELECT array_contains(array(1, 2, 3), 2) | struct<array_contains(array(1, 2, 3), 2):boolean> |
@@ -99,7 +99,7 @@
 | org.apache.spark.sql.catalyst.expressions.DateDiff | datediff | SELECT datediff('2009-07-31', '2009-07-30') | struct<datediff(2009-07-31, 2009-07-30):int> |
 | org.apache.spark.sql.catalyst.expressions.DateFormatClass | date_format | SELECT date_format('2016-04-08', 'y') | struct<date_format(2016-04-08, y):string> |
 | org.apache.spark.sql.catalyst.expressions.DateFromUnixDate | date_from_unix_date | SELECT date_from_unix_date(1) | struct<date_from_unix_date(1):date> |
-| org.apache.spark.sql.catalyst.expressions.DatePart | date_part | SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') | struct<date_part(YEAR, TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
+| org.apache.spark.sql.catalyst.expressions.DatePartExpressionBuilder$ | date_part | SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') | struct<date_part(YEAR, TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
 | org.apache.spark.sql.catalyst.expressions.DateSub | date_sub | SELECT date_sub('2016-07-30', 1) | struct<date_sub(2016-07-30, 1):date> |
 | org.apache.spark.sql.catalyst.expressions.DayOfMonth | day | SELECT day('2009-07-30') | struct<day(2009-07-30):int> |
 | org.apache.spark.sql.catalyst.expressions.DayOfMonth | dayofmonth | SELECT dayofmonth('2009-07-30') | struct<dayofmonth(2009-07-30):int> |
@@ -120,7 +120,7 @@
 | org.apache.spark.sql.catalyst.expressions.Explode | explode | SELECT explode(array(10, 20)) | struct<col:int> |
 | org.apache.spark.sql.catalyst.expressions.Explode | explode_outer | SELECT explode_outer(array(10, 20)) | struct<col:int> |
 | org.apache.spark.sql.catalyst.expressions.Expm1 | expm1 | SELECT expm1(0) | struct<EXPM1(0):double> |
-| org.apache.spark.sql.catalyst.expressions.Extract | extract | SELECT extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456') | struct<extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
+| org.apache.spark.sql.catalyst.expressions.ExtractExpressionBuilder$ | extract | SELECT extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456') | struct<extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
 | org.apache.spark.sql.catalyst.expressions.Factorial | factorial | SELECT factorial(5) | struct<factorial(5):bigint> |
 | org.apache.spark.sql.catalyst.expressions.FindInSet | find_in_set | SELECT find_in_set('ab','abc,b,ab,c,def') | struct<find_in_set(ab, abc,b,ab,c,def):int> |
 | org.apache.spark.sql.catalyst.expressions.Flatten | flatten | SELECT flatten(array(array(1, 2), array(3, 4))) | struct<flatten(array(array(1, 2), array(3, 4))):array<int>> |
@@ -141,7 +141,6 @@
 | org.apache.spark.sql.catalyst.expressions.Hypot | hypot | SELECT hypot(3, 4) | struct<HYPOT(3, 4):double> |
 | org.apache.spark.sql.catalyst.expressions.ILike | ilike | SELECT ilike('Spark', '_Park') | struct<ilike(Spark, _Park):boolean> |
 | org.apache.spark.sql.catalyst.expressions.If | if | SELECT if(1 < 2, 'a', 'b') | struct<(IF((1 < 2), a, b)):string> |
-| org.apache.spark.sql.catalyst.expressions.IfNull | ifnull | SELECT ifnull(NULL, array('2')) | struct<ifnull(NULL, array(2)):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.In | in | SELECT 1 in(1, 2, 3) | struct<(1 IN (1, 2, 3)):boolean> |
 | org.apache.spark.sql.catalyst.expressions.InitCap | initcap | SELECT initcap('sPark sql') | struct<initcap(sPark sql):string> |
 | org.apache.spark.sql.catalyst.expressions.Inline | inline | SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) | struct<col1:int,col2:string> |
@@ -182,8 +181,8 @@
 | org.apache.spark.sql.catalyst.expressions.MakeDate | make_date | SELECT make_date(2013, 7, 15) | struct<make_date(2013, 7, 15):date> |
 | org.apache.spark.sql.catalyst.expressions.MakeInterval | make_interval | SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) | struct<make_interval(100, 11, 1, 1, 12, 30, 1.001001):interval> |
 | org.apache.spark.sql.catalyst.expressions.MakeTimestamp | make_timestamp | SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp(2014, 12, 28, 6, 30, 45.887):timestamp> |
-| org.apache.spark.sql.catalyst.expressions.MakeTimestampLTZ | make_timestamp_ltz | SELECT make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887):timestamp> |
-| org.apache.spark.sql.catalyst.expressions.MakeTimestampNTZ | make_timestamp_ntz | SELECT make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887):timestamp_ntz> |
+| org.apache.spark.sql.catalyst.expressions.MakeTimestampLTZExpressionBuilder$ | make_timestamp_ltz | SELECT make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887):timestamp> |
+| org.apache.spark.sql.catalyst.expressions.MakeTimestampNTZExpressionBuilder$ | make_timestamp_ntz | SELECT make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887):timestamp_ntz> |
 | org.apache.spark.sql.catalyst.expressions.MakeYMInterval | make_ym_interval | SELECT make_ym_interval(1, 2) | struct<make_ym_interval(1, 2):interval year to month> |
 | org.apache.spark.sql.catalyst.expressions.MapConcat | map_concat | SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c')) | struct<map_concat(map(1, a, 2, b), map(3, c)):map<int,string>> |
 | org.apache.spark.sql.catalyst.expressions.MapContainsKey | map_contains_key | SELECT map_contains_key(map(1, 'a', 2, 'b'), 1) | struct<map_contains_key(map(1, a, 2, b), 1):boolean> |
@@ -211,15 +210,16 @@
 | org.apache.spark.sql.catalyst.expressions.Now | now | SELECT now() | struct<now():timestamp> |
 | org.apache.spark.sql.catalyst.expressions.NthValue | nth_value | SELECT a, b, nth_value(b, 2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,nth_value(b, 2) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
 | org.apache.spark.sql.catalyst.expressions.NullIf | nullif | SELECT nullif(2, 2) | struct<nullif(2, 2):int> |
+| org.apache.spark.sql.catalyst.expressions.Nvl | ifnull | SELECT ifnull(NULL, array('2')) | struct<ifnull(NULL, array(2)):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.Nvl | nvl | SELECT nvl(NULL, array('2')) | struct<nvl(NULL, array(2)):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.Nvl2 | nvl2 | SELECT nvl2(NULL, 2, 1) | struct<nvl2(NULL, 2, 1):int> |
 | org.apache.spark.sql.catalyst.expressions.OctetLength | octet_length | SELECT octet_length('Spark SQL') | struct<octet_length(Spark SQL):int> |
 | org.apache.spark.sql.catalyst.expressions.Or | or | SELECT true or false | struct<(true OR false):boolean> |
 | org.apache.spark.sql.catalyst.expressions.Overlay | overlay | SELECT overlay('Spark SQL' PLACING '_' FROM 6) | struct<overlay(Spark SQL, _, 6, -1):string> |
 | org.apache.spark.sql.catalyst.expressions.ParseToDate | to_date | SELECT to_date('2009-07-30 04:17:52') | struct<to_date(2009-07-30 04:17:52):date> |
-| org.apache.spark.sql.catalyst.expressions.ParseToTimestamp | to_timestamp | SELECT to_timestamp('2016-12-31 00:12:00') | struct<to_timestamp(2016-12-31 00:12:00):timestamp> |
-| org.apache.spark.sql.catalyst.expressions.ParseToTimestampLTZ | to_timestamp_ltz | SELECT to_timestamp_ltz('2016-12-31 00:12:00') | struct<to_timestamp_ltz(2016-12-31 00:12:00):timestamp> |
-| org.apache.spark.sql.catalyst.expressions.ParseToTimestampNTZ | to_timestamp_ntz | SELECT to_timestamp_ntz('2016-12-31 00:12:00') | struct<to_timestamp_ntz(2016-12-31 00:12:00):timestamp_ntz> |
+| org.apache.spark.sql.catalyst.expressions.ParseToTimestampExpressionBuilder$ | to_timestamp | SELECT to_timestamp('2016-12-31 00:12:00') | struct<to_timestamp(2016-12-31 00:12:00):timestamp> |
+| org.apache.spark.sql.catalyst.expressions.ParseToTimestampLTZExpressionBuilder$ | to_timestamp_ltz | SELECT to_timestamp_ltz('2016-12-31 00:12:00') | struct<to_timestamp_ltz(2016-12-31 00:12:00):timestamp> |
+| org.apache.spark.sql.catalyst.expressions.ParseToTimestampNTZExpressionBuilder$ | to_timestamp_ntz | SELECT to_timestamp_ntz('2016-12-31 00:12:00') | struct<to_timestamp_ntz(2016-12-31 00:12:00):timestamp_ntz> |
 | org.apache.spark.sql.catalyst.expressions.ParseUrl | parse_url | SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') | struct<parse_url(http://spark.apache.org/path?query=1, HOST):string> |
 | org.apache.spark.sql.catalyst.expressions.PercentRank | percent_rank | SELECT a, b, percent_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,PERCENT_RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double> |
 | org.apache.spark.sql.catalyst.expressions.Pi | pi | SELECT pi() | struct<PI():double> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index 94eb96f6249a0..fef16b7fe7d75 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -141,14 +141,17 @@ select to_binary('abc');
 select to_binary('abc', 'utf-8');
 select to_binary('abc', 'base64');
 select to_binary('abc', 'hex');
+-- 'format' parameter can be any foldable string value, not just literal.
 select to_binary('abc', concat('utf', '-8'));
-select to_binary('abc', concat('base', '64'));
+-- 'format' parameter is case insensitive.
 select to_binary('abc', 'Hex');
-select to_binary('abc', 'UTF-8');
+-- null inputs lead to null result.
 select to_binary('abc', null);
 select to_binary(null, 'utf-8');
 select to_binary(null, null);
 select to_binary(null, cast(null as string));
+-- 'format' parameter must be string type or void type.
 select to_binary(null, cast(null as int));
-select to_binary('abc', 'invalidFormat');
 select to_binary('abc', 1);
+-- invalid inputs.
+select to_binary('abc', 'invalidFormat');
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out
index 7a27a89e5bb95..5f7bd9faa79e9 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out
@@ -74,7 +74,7 @@ select map_contains_key(map('1', 'a', '2', 'b'), 1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'array_contains(map_keys(map('1', 'a', '2', 'b')), 1)' due to data type mismatch: Input to function array_contains should have been array followed by a value with same element type, but it's [array<string>, int].; line 1 pos 7
+cannot resolve 'map_contains_key(map('1', 'a', '2', 'b'), 1)' due to data type mismatch: Input to function map_contains_key should have been map followed by a value with same key type, but it's [map<string,string>, int].; line 1 pos 7
 
 
 -- !query
@@ -83,7 +83,7 @@ select map_contains_key(map(1, 'a', 2, 'b'), '1')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'array_contains(map_keys(map(1, 'a', 2, 'b')), '1')' due to data type mismatch: Input to function array_contains should have been array followed by a value with same element type, but it's [array<int>, string].; line 1 pos 7
+cannot resolve 'map_contains_key(map(1, 'a', 2, 'b'), '1')' due to data type mismatch: Input to function map_contains_key should have been map followed by a value with same key type, but it's [map<int,string>, string].; line 1 pos 7
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
index ec7f41dcf4bff..913f1cfb5ae42 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 117
+-- Number of queries: 115
 
 
 -- !query
@@ -866,14 +866,6 @@ struct<to_binary(abc, concat(utf, -8)):binary>
 abc
 
 
--- !query
-select to_binary('abc', concat('base', '64'))
--- !query schema
-struct<to_binary(abc, concat(base, 64)):binary>
--- !query output
-i�
-
-
 -- !query
 select to_binary('abc', 'Hex')
 -- !query schema
@@ -882,14 +874,6 @@ struct<to_binary(abc, Hex):binary>
 � 
 
 
--- !query
-select to_binary('abc', 'UTF-8')
--- !query schema
-struct<to_binary(abc, UTF-8):binary>
--- !query output
-abc
-
-
 -- !query
 select to_binary('abc', null)
 -- !query schema
@@ -928,22 +912,22 @@ select to_binary(null, cast(null as int))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'to_binary(NULL, CAST(NULL AS INT))' due to data type mismatch: Unsupported encoding format: Some(cast(null as int)). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7
+The 'format' parameter of function 'to_binary' needs to be a string literal.; line 1 pos 7
 
 
 -- !query
-select to_binary('abc', 'invalidFormat')
+select to_binary('abc', 1)
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'to_binary('abc', 'invalidFormat')' due to data type mismatch: Unsupported encoding format: Some(invalidFormat). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7
+The 'format' parameter of function 'to_binary' needs to be a string literal.; line 1 pos 7
 
 
 -- !query
-select to_binary('abc', 1)
+select to_binary('abc', 'invalidFormat')
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'to_binary('abc', 1)' due to data type mismatch: Unsupported encoding format: Some(1). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7
+Invalid value for the 'format' parameter of function 'to_binary': invalidformat. The value has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'.
diff --git a/sql/core/src/test/resources/sql-tests/results/ceil-floor-with-scale-param.sql.out b/sql/core/src/test/resources/sql-tests/results/ceil-floor-with-scale-param.sql.out
index 1ec00af1237cf..132bd96350fb1 100644
--- a/sql/core/src/test/resources/sql-tests/results/ceil-floor-with-scale-param.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ceil-floor-with-scale-param.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 18
+-- Number of queries: 24
 
 
 -- !query
@@ -80,7 +80,7 @@ SELECT CEIL(2.5, null)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The 'scale' parameter of function 'ceil' must be an int constant.; line 1 pos 7
+The 'scale' parameter of function 'ceil' needs to be a int literal.; line 1 pos 7
 
 
 -- !query
@@ -89,7 +89,7 @@ SELECT CEIL(2.5, 'a')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The 'scale' parameter of function 'ceil' must be an int constant.; line 1 pos 7
+The 'scale' parameter of function 'ceil' needs to be a int literal.; line 1 pos 7
 
 
 -- !query
@@ -98,7 +98,7 @@ SELECT CEIL(2.5, 0, 0)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid number of parameters to the function 'ceil'.; line 1 pos 7
+Invalid number of arguments for function ceil. Expected: 2; Found: 3; line 1 pos 7
 
 
 -- !query
@@ -179,7 +179,7 @@ SELECT FLOOR(2.5, null)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The 'scale' parameter of function 'floor' must be an int constant.; line 1 pos 7
+The 'scale' parameter of function 'floor' needs to be a int literal.; line 1 pos 7
 
 
 -- !query
@@ -188,7 +188,7 @@ SELECT FLOOR(2.5, 'a')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The 'scale' parameter of function 'floor' must be an int constant.; line 1 pos 7
+The 'scale' parameter of function 'floor' needs to be a int literal.; line 1 pos 7
 
 
 -- !query
@@ -197,4 +197,4 @@ SELECT FLOOR(2.5, 0, 0)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid number of parameters to the function 'floor'.; line 1 pos 7
+Invalid number of arguments for function floor. Expected: 2; Found: 3; line 1 pos 7
diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out
index e3f676dfd1f5e..55776d3243689 100644
--- a/sql/core/src/test/resources/sql-tests/results/extract.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out
@@ -660,7 +660,7 @@ select date_part(c, c) from t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The field parameter needs to be a foldable string value.; line 1 pos 7
+The 'field' parameter of function 'date_part' needs to be a string literal.; line 1 pos 7
 
 
 -- !query
@@ -677,7 +677,7 @@ select date_part(i, i) from t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The field parameter needs to be a foldable string value.; line 1 pos 7
+The 'field' parameter of function 'date_part' needs to be a string literal.; line 1 pos 7
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index cd0fa486cdb6f..400d6c91ba702 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -470,7 +470,7 @@ SELECT every(1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'every(1)' due to data type mismatch: Input to function 'every' should have been boolean, but it's [int].; line 1 pos 7
+cannot resolve 'every(1)' due to data type mismatch: argument 1 requires boolean type, however, '1' is of int type.; line 1 pos 7
 
 
 -- !query
@@ -479,7 +479,7 @@ SELECT some(1S)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'some(1S)' due to data type mismatch: Input to function 'some' should have been boolean, but it's [smallint].; line 1 pos 7
+cannot resolve 'some(1S)' due to data type mismatch: argument 1 requires boolean type, however, '1S' is of smallint type.; line 1 pos 7
 
 
 -- !query
@@ -488,7 +488,7 @@ SELECT any(1L)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'any(1L)' due to data type mismatch: Input to function 'any' should have been boolean, but it's [bigint].; line 1 pos 7
+cannot resolve 'any(1L)' due to data type mismatch: argument 1 requires boolean type, however, '1L' is of bigint type.; line 1 pos 7
 
 
 -- !query
@@ -497,7 +497,7 @@ SELECT every("true")
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'every('true')' due to data type mismatch: Input to function 'every' should have been boolean, but it's [string].; line 1 pos 7
+cannot resolve 'every('true')' due to data type mismatch: argument 1 requires boolean type, however, ''true'' is of string type.; line 1 pos 7
 
 
 -- !query
@@ -506,7 +506,7 @@ SELECT bool_and(1.0)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'bool_and(1.0BD)' due to data type mismatch: Input to function 'bool_and' should have been boolean, but it's [decimal(2,1)].; line 1 pos 7
+cannot resolve 'bool_and(1.0BD)' due to data type mismatch: argument 1 requires boolean type, however, '1.0BD' is of decimal(2,1) type.; line 1 pos 7
 
 
 -- !query
@@ -515,7 +515,7 @@ SELECT bool_or(1.0D)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'bool_or(1.0D)' due to data type mismatch: Input to function 'bool_or' should have been boolean, but it's [double].; line 1 pos 7
+cannot resolve 'bool_or(1.0D)' due to data type mismatch: argument 1 requires boolean type, however, '1.0D' is of double type.; line 1 pos 7
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/map.sql.out b/sql/core/src/test/resources/sql-tests/results/map.sql.out
index aa13fee451d11..b615a62581108 100644
--- a/sql/core/src/test/resources/sql-tests/results/map.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/map.sql.out
@@ -72,7 +72,7 @@ select map_contains_key(map('1', 'a', '2', 'b'), 1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'array_contains(map_keys(map('1', 'a', '2', 'b')), 1)' due to data type mismatch: Input to function array_contains should have been array followed by a value with same element type, but it's [array<string>, int].; line 1 pos 7
+cannot resolve 'map_contains_key(map('1', 'a', '2', 'b'), 1)' due to data type mismatch: Input to function map_contains_key should have been map followed by a value with same key type, but it's [map<string,string>, int].; line 1 pos 7
 
 
 -- !query
@@ -81,4 +81,4 @@ select map_contains_key(map(1, 'a', 2, 'b'), '1')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'array_contains(map_keys(map(1, 'a', 2, 'b')), '1')' due to data type mismatch: Input to function array_contains should have been array followed by a value with same element type, but it's [array<int>, string].; line 1 pos 7
+cannot resolve 'map_contains_key(map(1, 'a', 2, 'b'), '1')' due to data type mismatch: Input to function map_contains_key should have been map followed by a value with same key type, but it's [map<int,string>, string].; line 1 pos 7
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index bb2974db2322b..bf4348d76349e 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 117
+-- Number of queries: 115
 
 
 -- !query
@@ -862,14 +862,6 @@ struct<to_binary(abc, concat(utf, -8)):binary>
 abc
 
 
--- !query
-select to_binary('abc', concat('base', '64'))
--- !query schema
-struct<to_binary(abc, concat(base, 64)):binary>
--- !query output
-i�
-
-
 -- !query
 select to_binary('abc', 'Hex')
 -- !query schema
@@ -878,14 +870,6 @@ struct<to_binary(abc, Hex):binary>
 � 
 
 
--- !query
-select to_binary('abc', 'UTF-8')
--- !query schema
-struct<to_binary(abc, UTF-8):binary>
--- !query output
-abc
-
-
 -- !query
 select to_binary('abc', null)
 -- !query schema
@@ -924,22 +908,22 @@ select to_binary(null, cast(null as int))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'to_binary(NULL, CAST(NULL AS INT))' due to data type mismatch: Unsupported encoding format: Some(cast(null as int)). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7
+The 'format' parameter of function 'to_binary' needs to be a string literal.; line 1 pos 7
 
 
 -- !query
-select to_binary('abc', 'invalidFormat')
+select to_binary('abc', 1)
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'to_binary('abc', 'invalidFormat')' due to data type mismatch: Unsupported encoding format: Some(invalidFormat). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7
+The 'format' parameter of function 'to_binary' needs to be a string literal.; line 1 pos 7
 
 
 -- !query
-select to_binary('abc', 1)
+select to_binary('abc', 'invalidFormat')
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'to_binary('abc', 1)' due to data type mismatch: Unsupported encoding format: Some(1). The format has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'; line 1 pos 7
+Invalid value for the 'format' parameter of function 'to_binary': invalidformat. The value has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'.
diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz.sql.out
index 48036c6a34808..057cdf1db845c 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz.sql.out
@@ -45,7 +45,7 @@ struct<make_timestamp_ltz(2021, 7, 11, 6, 30, 45.678):timestamp>
 -- !query
 SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 45.678, 'CET')
 -- !query schema
-struct<make_timestamp_ltz(2021, 7, 11, 6, 30, 45.678):timestamp>
+struct<make_timestamp_ltz(2021, 7, 11, 6, 30, 45.678, CET):timestamp>
 -- !query output
 2021-07-10 21:30:45.678
 
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
index 5db0f4dac54a7..d543c6a1bb742 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
@@ -380,7 +380,7 @@ SELECT every(udf(1))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'every(CAST(udf(cast(1 as string)) AS INT))' due to data type mismatch: Input to function 'every' should have been boolean, but it's [int].; line 1 pos 7
+cannot resolve 'every(CAST(udf(cast(1 as string)) AS INT))' due to data type mismatch: argument 1 requires boolean type, however, 'CAST(udf(cast(1 as string)) AS INT)' is of int type.; line 1 pos 7
 
 
 -- !query
@@ -389,7 +389,7 @@ SELECT some(udf(1S))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'some(CAST(udf(cast(1 as string)) AS SMALLINT))' due to data type mismatch: Input to function 'some' should have been boolean, but it's [smallint].; line 1 pos 7
+cannot resolve 'some(CAST(udf(cast(1 as string)) AS SMALLINT))' due to data type mismatch: argument 1 requires boolean type, however, 'CAST(udf(cast(1 as string)) AS SMALLINT)' is of smallint type.; line 1 pos 7
 
 
 -- !query
@@ -398,7 +398,7 @@ SELECT any(udf(1L))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'any(CAST(udf(cast(1 as string)) AS BIGINT))' due to data type mismatch: Input to function 'any' should have been boolean, but it's [bigint].; line 1 pos 7
+cannot resolve 'any(CAST(udf(cast(1 as string)) AS BIGINT))' due to data type mismatch: argument 1 requires boolean type, however, 'CAST(udf(cast(1 as string)) AS BIGINT)' is of bigint type.; line 1 pos 7
 
 
 -- !query
@@ -407,7 +407,7 @@ SELECT udf(every("true"))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'every('true')' due to data type mismatch: Input to function 'every' should have been boolean, but it's [string].; line 1 pos 11
+cannot resolve 'every('true')' due to data type mismatch: argument 1 requires boolean type, however, ''true'' is of string type.; line 1 pos 11
 
 
 -- !query
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 215d38d8b1677..42293bcd1f35a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -1028,7 +1028,8 @@ class DataFrameAggregateSuite extends QueryTest
       val error = intercept[AnalysisException] {
         sql("SELECT COUNT_IF(x) FROM tempView")
       }
-      assert(error.message.contains("function count_if requires boolean type"))
+      assert(error.message.contains("cannot resolve 'count_if(tempview.x)' due to data type " +
+        "mismatch: argument 1 requires boolean type, however, 'tempview.x' is of string type"))
     }
   }
 

From b4251563cc038ec4a9f49c881b49127a2d9c3705 Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Wed, 23 Feb 2022 15:34:31 +0800
Subject: [PATCH 312/513] [SPARK-38162][SQL] Optimize one row plan in normal
 and AQE Optimizer

### What changes were proposed in this pull request?

- Add a new rule `OptimizeOneMaxRowPlan` in normal Optimizer and AQE Optimizer.
- Move the similar optimization of `EliminateSorts` into `OptimizeOneMaxRowPlan`, also update its comment and test

### Why are the changes needed?

Optimize the plan if its max row is equal to or less than 1 in these cases:

 - if the max rows of the child of sort less than or equal to 1, remove the sort
 - if the max rows per partition of the child of local sort less than or equal to 1,
   remove the local sort
 - if the max rows of the child of aggregate less than or equal to 1 and its child and
   it's grouping only(include the rewritten distinct plan), convert aggregate to project
 - if the max rows of the child of aggregate less than or equal to 1,
   set distinct to false in all aggregate expression

### Does this PR introduce _any_ user-facing change?

no, only change the plan

### How was this patch tested?

- Add a new test `OptimizeOneMaxRowPlanSuite` for normal optimizer
- Add test in `AdaptiveQueryExecSuite` for AQE optimizer

Closes #35473 from ulysses-you/SPARK-38162.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/dsl/package.scala      |   2 +-
 .../optimizer/OptimizeOneRowPlan.scala        |  49 +++++++++
 .../sql/catalyst/optimizer/Optimizer.scala    |  13 +--
 .../sql/catalyst/rules/RuleIdCollection.scala |   1 +
 .../analysis/AnalysisErrorSuite.scala         |   2 +-
 .../optimizer/EliminateSortsSuite.scala       |  10 --
 .../optimizer/OptimizeOneRowPlanSuite.scala   | 104 ++++++++++++++++++
 .../sql/execution/adaptive/AQEOptimizer.scala |   5 +-
 .../adaptive/AdaptiveQueryExecSuite.scala     |  54 +++++++++
 9 files changed, 219 insertions(+), 21 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlan.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlanSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 0988bef30290c..62b3ee7440745 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -440,7 +440,7 @@ package object dsl {
       def groupBy(groupingExprs: Expression*)(aggregateExprs: Expression*): LogicalPlan = {
         val aliasedExprs = aggregateExprs.map {
           case ne: NamedExpression => ne
-          case e => Alias(e, e.toString)()
+          case e => UnresolvedAlias(e)
         }
         Aggregate(groupingExprs, aliasedExprs, logicalPlan)
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlan.scala
new file mode 100644
index 0000000000000..83646611578cb
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlan.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.expressions.aggregate._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.catalyst.trees.TreePattern._
+
+/**
+ * The rule is applied both normal and AQE Optimizer. It optimizes plan using max rows:
+ *   - if the max rows of the child of sort is less than or equal to 1, remove the sort
+ *   - if the max rows per partition of the child of local sort is less than or equal to 1,
+ *     remove the local sort
+ *   - if the max rows of the child of aggregate is less than or equal to 1 and its child and
+ *     it's grouping only(include the rewritten distinct plan), convert aggregate to project
+ *   - if the max rows of the child of aggregate is less than or equal to 1,
+ *     set distinct to false in all aggregate expression
+ */
+object OptimizeOneRowPlan extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    plan.transformUpWithPruning(_.containsAnyPattern(SORT, AGGREGATE), ruleId) {
+      case Sort(_, _, child) if child.maxRows.exists(_ <= 1L) => child
+      case Sort(_, false, child) if child.maxRowsPerPartition.exists(_ <= 1L) => child
+      case agg @ Aggregate(_, _, child) if agg.groupOnly && child.maxRows.exists(_ <= 1L) =>
+        Project(agg.aggregateExpressions, child)
+      case agg: Aggregate if agg.child.maxRows.exists(_ <= 1L) =>
+        agg.transformExpressions {
+          case aggExpr: AggregateExpression if aggExpr.isDistinct =>
+            aggExpr.copy(isDistinct = false)
+        }
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 058e30dca1d20..f7ff566b14a95 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -238,6 +238,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
       // PropagateEmptyRelation can change the nullability of an attribute from nullable to
       // non-nullable when an empty relation child of a Union is removed
       UpdateAttributeNullability) :+
+    Batch("Optimize One Row Plan", fixedPoint, OptimizeOneRowPlan) :+
     // The following batch should be executed after batch "Join Reorder" and "LocalRelation".
     Batch("Check Cartesian Products", Once,
       CheckCartesianProducts) :+
@@ -1390,15 +1391,14 @@ object CombineFilters extends Rule[LogicalPlan] with PredicateHelper {
  * Removes Sort operations if they don't affect the final output ordering.
  * Note that changes in the final output ordering may affect the file size (SPARK-32318).
  * This rule handles the following cases:
- * 1) if the child maximum number of rows less than or equal to 1
- * 2) if the sort order is empty or the sort order does not have any reference
- * 3) if the Sort operator is a local sort and the child is already sorted
- * 4) if there is another Sort operator separated by 0...n Project, Filter, Repartition or
+ * 1) if the sort order is empty or the sort order does not have any reference
+ * 2) if the Sort operator is a local sort and the child is already sorted
+ * 3) if there is another Sort operator separated by 0...n Project, Filter, Repartition or
  *    RepartitionByExpression, RebalancePartitions (with deterministic expressions) operators
- * 5) if the Sort operator is within Join separated by 0...n Project, Filter, Repartition or
+ * 4) if the Sort operator is within Join separated by 0...n Project, Filter, Repartition or
  *    RepartitionByExpression, RebalancePartitions (with deterministic expressions) operators only
  *    and the Join condition is deterministic
- * 6) if the Sort operator is within GroupBy separated by 0...n Project, Filter, Repartition or
+ * 5) if the Sort operator is within GroupBy separated by 0...n Project, Filter, Repartition or
  *    RepartitionByExpression, RebalancePartitions (with deterministic expressions) operators only
  *    and the aggregate function is order irrelevant
  */
@@ -1407,7 +1407,6 @@ object EliminateSorts extends Rule[LogicalPlan] {
     _.containsPattern(SORT))(applyLocally)
 
   private val applyLocally: PartialFunction[LogicalPlan, LogicalPlan] = {
-    case Sort(_, _, child) if child.maxRows.exists(_ <= 1L) => recursiveRemoveSort(child)
     case s @ Sort(orders, _, child) if orders.isEmpty || orders.exists(_.child.foldable) =>
       val newOrders = orders.filterNot(_.child.foldable)
       if (newOrders.isEmpty) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
index 935e51cd4a3d9..e36a76b0b26cb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
@@ -121,6 +121,7 @@ object RuleIdCollection {
       "org.apache.spark.sql.catalyst.optimizer.ObjectSerializerPruning" ::
       "org.apache.spark.sql.catalyst.optimizer.OptimizeCsvJsonExprs" ::
       "org.apache.spark.sql.catalyst.optimizer.OptimizeIn" ::
+      "org.apache.spark.sql.catalyst.optimizer.OptimizeOneRowPlan" ::
       "org.apache.spark.sql.catalyst.optimizer.Optimizer$OptimizeSubqueries" ::
       "org.apache.spark.sql.catalyst.optimizer.OptimizeRepartition" ::
       "org.apache.spark.sql.catalyst.optimizer.OptimizeWindowFunctions" ::
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 8f690e2021602..683f004c61913 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -305,7 +305,7 @@ class AnalysisErrorSuite extends AnalysisTest {
       .where(sum($"b") > 0)
       .orderBy($"havingCondition".asc),
     "MISSING_COLUMN",
-    Array("havingCondition", "max('b)"))
+    Array("havingCondition", "max(b)"))
 
   errorTest(
     "unresolved star expansion in max",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
index 6dc464c1cd582..01ecbd808c251 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
@@ -422,14 +422,4 @@ class EliminateSortsSuite extends AnalysisTest {
       comparePlans(optimized, correctAnswer)
     }
   }
-
-  test("SPARK-35906: Remove order by if the maximum number of rows less than or equal to 1") {
-    comparePlans(
-      Optimize.execute(testRelation.groupBy()(count(1).as("cnt")).orderBy('cnt.asc)).analyze,
-      testRelation.groupBy()(count(1).as("cnt")).analyze)
-
-    comparePlans(
-      Optimize.execute(testRelation.limit(Literal(1)).orderBy('a.asc).orderBy('a.asc)).analyze,
-      testRelation.limit(Literal(1)).analyze)
-  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlanSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlanSuite.scala
new file mode 100644
index 0000000000000..3266febb9ed69
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlanSuite.scala
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+
+class OptimizeOneRowPlanSuite extends PlanTest {
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("Replace Operators", Once, ReplaceDistinctWithAggregate) ::
+      Batch("Eliminate Sorts", Once, EliminateSorts) ::
+      Batch("Optimize One Row Plan", FixedPoint(10), OptimizeOneRowPlan) :: Nil
+  }
+
+  private val t1 = LocalRelation.fromExternalRows(Seq($"a".int), data = Seq(Row(1)))
+  private val t2 = LocalRelation.fromExternalRows(Seq($"a".int), data = Seq(Row(1), Row(2)))
+
+  test("SPARK-35906: Remove order by if the maximum number of rows less than or equal to 1") {
+    comparePlans(
+      Optimize.execute(t2.groupBy()(count(1).as("cnt")).orderBy('cnt.asc)).analyze,
+      t2.groupBy()(count(1).as("cnt")).analyze)
+
+    comparePlans(
+      Optimize.execute(t2.limit(Literal(1)).orderBy('a.asc).orderBy('a.asc)).analyze,
+      t2.limit(Literal(1)).analyze)
+  }
+
+  test("Remove sort") {
+    // remove local sort
+    val plan1 = LocalLimit(0, t1).union(LocalLimit(0, t2)).sortBy($"a".desc).analyze
+    val expected = LocalLimit(0, t1).union(LocalLimit(0, t2)).analyze
+    comparePlans(Optimize.execute(plan1), expected)
+
+    // do not remove
+    val plan2 = t2.orderBy($"a".desc).analyze
+    comparePlans(Optimize.execute(plan2), plan2)
+
+    val plan3 = t2.sortBy($"a".desc).analyze
+    comparePlans(Optimize.execute(plan3), plan3)
+  }
+
+  test("Convert group only aggregate to project") {
+    val plan1 = t1.groupBy($"a")($"a").analyze
+    comparePlans(Optimize.execute(plan1), t1.select($"a").analyze)
+
+    val plan2 = t1.groupBy($"a" + 1)($"a" + 1).analyze
+    comparePlans(Optimize.execute(plan2), t1.select($"a" + 1).analyze)
+
+    // do not remove
+    val plan3 = t2.groupBy($"a")($"a").analyze
+    comparePlans(Optimize.execute(plan3), plan3)
+
+    val plan4 = t1.groupBy($"a")(sum($"a")).analyze
+    comparePlans(Optimize.execute(plan4), plan4)
+
+    val plan5 = t1.groupBy()(sum($"a")).analyze
+    comparePlans(Optimize.execute(plan5), plan5)
+  }
+
+  test("Remove distinct in aggregate expression") {
+    val plan1 = t1.groupBy($"a")(sumDistinct($"a").as("s")).analyze
+    val expected1 = t1.groupBy($"a")(sum($"a").as("s")).analyze
+    comparePlans(Optimize.execute(plan1), expected1)
+
+    val plan2 = t1.groupBy()(sumDistinct($"a").as("s")).analyze
+    val expected2 = t1.groupBy()(sum($"a").as("s")).analyze
+    comparePlans(Optimize.execute(plan2), expected2)
+
+    // do not remove
+    val plan3 = t2.groupBy($"a")(sumDistinct($"a").as("s")).analyze
+    comparePlans(Optimize.execute(plan3), plan3)
+  }
+
+  test("Remove in complex case") {
+    val plan1 = t1.groupBy($"a")($"a").orderBy($"a".asc).analyze
+    val expected1 = t1.select($"a").analyze
+    comparePlans(Optimize.execute(plan1), expected1)
+
+    val plan2 = t1.groupBy($"a")(sumDistinct($"a").as("s")).orderBy($"s".asc).analyze
+    val expected2 = t1.groupBy($"a")(sum($"a").as("s")).analyze
+    comparePlans(Optimize.execute(plan2), expected2)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
index d81827e4701e4..06e9c180584a9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution.adaptive
 
 import org.apache.spark.sql.catalyst.analysis.UpdateAttributeNullability
-import org.apache.spark.sql.catalyst.optimizer.{ConvertToLocalRelation, EliminateLimits}
+import org.apache.spark.sql.catalyst.optimizer.{ConvertToLocalRelation, EliminateLimits, OptimizeOneRowPlan}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, LogicalPlanIntegrity, PlanHelper}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.internal.SQLConf
@@ -40,7 +40,8 @@ class AQEOptimizer(conf: SQLConf) extends RuleExecutor[LogicalPlan] {
       ConvertToLocalRelation,
       UpdateAttributeNullability),
     Batch("Dynamic Join Selection", Once, DynamicJoinSelection),
-    Batch("Eliminate Limits", fixedPoint, EliminateLimits)
+    Batch("Eliminate Limits", fixedPoint, EliminateLimits),
+    Batch("Optimize One Row Plan", fixedPoint, OptimizeOneRowPlan)
   )
 
   final override protected def batches: Seq[Batch] = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index d1c7064ad7763..ef4c2d0e08031 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.{Dataset, QueryTest, Row, SparkSession, Strategy}
 import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
 import org.apache.spark.sql.execution.{CollectLimitExec, CommandResultExec, LocalTableScanExec, PartialReducerPartitionSpec, QueryExecution, ReusedSubqueryExec, ShuffledRowRDD, SortExec, SparkPlan, UnaryExecNode, UnionExec}
+import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
 import org.apache.spark.sql.execution.command.DataWritingCommandExec
 import org.apache.spark.sql.execution.datasources.noop.NoopDataSource
 import org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec
@@ -126,6 +127,12 @@ class AdaptiveQueryExecSuite
     }
   }
 
+  private def findTopLevelAggregate(plan: SparkPlan): Seq[BaseAggregateExec] = {
+    collect(plan) {
+      case agg: BaseAggregateExec => agg
+    }
+  }
+
   private def findTopLevelLimit(plan: SparkPlan): Seq[CollectLimitExec] = {
     collect(plan) {
       case l: CollectLimitExec => l
@@ -2484,6 +2491,53 @@ class AdaptiveQueryExecSuite
       }
     }
   }
+
+  test("SPARK-38162: Optimize one row plan in AQE Optimizer") {
+    withTempView("v") {
+      spark.sparkContext.parallelize(
+        (1 to 4).map(i => TestData(i, i.toString)), 2)
+        .toDF("c1", "c2").createOrReplaceTempView("v")
+
+      // remove sort
+      val (origin1, adaptive1) = runAdaptiveAndVerifyResult(
+        """
+          |SELECT * FROM v where c1 = 1 order by c1, c2
+          |""".stripMargin)
+      assert(findTopLevelSort(origin1).size == 1)
+      assert(findTopLevelSort(adaptive1).isEmpty)
+
+      // convert group only aggregate to project
+      val (origin2, adaptive2) = runAdaptiveAndVerifyResult(
+        """
+          |SELECT distinct c1 FROM (SELECT /*+ repartition(c1) */ * FROM v where c1 = 1)
+          |""".stripMargin)
+      assert(findTopLevelAggregate(origin2).size == 2)
+      assert(findTopLevelAggregate(adaptive2).isEmpty)
+
+      // remove distinct in aggregate
+      val (origin3, adaptive3) = runAdaptiveAndVerifyResult(
+        """
+          |SELECT sum(distinct c1) FROM (SELECT /*+ repartition(c1) */ * FROM v where c1 = 1)
+          |""".stripMargin)
+      assert(findTopLevelAggregate(origin3).size == 4)
+      assert(findTopLevelAggregate(adaptive3).size == 2)
+
+      // do not optimize if the aggregate is inside query stage
+      val (origin4, adaptive4) = runAdaptiveAndVerifyResult(
+        """
+          |SELECT distinct c1 FROM v where c1 = 1
+          |""".stripMargin)
+      assert(findTopLevelAggregate(origin4).size == 2)
+      assert(findTopLevelAggregate(adaptive4).size == 2)
+
+      val (origin5, adaptive5) = runAdaptiveAndVerifyResult(
+        """
+          |SELECT sum(distinct c1) FROM v where c1 = 1
+          |""".stripMargin)
+      assert(findTopLevelAggregate(origin5).size == 4)
+      assert(findTopLevelAggregate(adaptive5).size == 4)
+    }
+  }
 }
 
 /**

From bf220785003d2667e36184357efa2f015892bbb8 Mon Sep 17 00:00:00 2001
From: itholic <haejoon.lee@databricks.com>
Date: Wed, 23 Feb 2022 18:31:49 +0900
Subject: [PATCH 313/513] [SPARK-38235][SQL][TESTS] Add test util for testing
 grouped aggregate pandas UDF

### What changes were proposed in this pull request?

This PR proposes to add test util `TestGroupedAggPandasUDF` to help testing grouped aggregate pandas UDF more comfortable.

### Why are the changes needed?

To improve testability by integrating various UDF tests with existing test utilities.

### Does this PR introduce _any_ user-facing change?

No, it's test only.

### How was this patch tested?

Manually tested, and this will be tested in the SPARK-38107 when testing grouped aggregate pandas UDF related errors.

Closes #35615 from itholic/SPARK-38235.

Authored-by: itholic <haejoon.lee@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../spark/sql/IntegratedUDFTestUtils.scala    | 100 ++++++++++++++++--
 1 file changed, 94 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
index 76b3324e3e1c5..86c8c8261e833 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
@@ -31,15 +31,16 @@ import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, ExprId, Pyth
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.execution.python.UserDefinedPythonFunction
 import org.apache.spark.sql.expressions.SparkUserDefinedFunction
-import org.apache.spark.sql.types.{DataType, StringType}
+import org.apache.spark.sql.types.{DataType, IntegerType, StringType}
 
 /**
- * This object targets to integrate various UDF test cases so that Scalar UDF, Python UDF and
- * Scalar Pandas UDFs can be tested in SBT & Maven tests.
+ * This object targets to integrate various UDF test cases so that Scalar UDF, Python UDF,
+ * Scalar Pandas UDF and Grouped Aggregate Pandas UDF can be tested in SBT & Maven tests.
  *
- * The available UDFs are special. It defines an UDF wrapped by cast. So, the input column is
- * casted into string, UDF returns strings as are, and then output column is casted back to
- * the input column. In this way, UDF is virtually no-op.
+ * The available UDFs are special. For Scalar UDF, Python UDF and Scalar Pandas UDF,
+ * it defines an UDF wrapped by cast. So, the input column is casted into string,
+ * UDF returns strings as are, and then output column is casted back to the input column.
+ * In this way, UDF is virtually no-op.
  *
  * Note that, due to this implementation limitation, complex types such as map, array and struct
  * types do not work with this UDFs because they cannot be same after the cast roundtrip.
@@ -69,6 +70,28 @@ import org.apache.spark.sql.types.{DataType, StringType}
  *   df.select(expr("udf_name(id)")
  *   df.select(pandasTestUDF(df("id")))
  * }}}
+ *
+ * For Grouped Aggregate Pandas UDF, it defines an UDF that calculates the count using pandas.
+ * The UDF returns the count of the given column. In this way, UDF is virtually not no-op.
+ *
+ * To register Grouped Aggregate Pandas UDF in SQL:
+ * {{{
+ *   val groupedAggPandasTestUDF = TestGroupedAggPandasUDF(name = "udf_name")
+ *   registerTestUDF(groupedAggPandasTestUDF, spark)
+ * }}}
+ *
+ * To use it in Scala API and SQL:
+ * {{{
+ *   sql("SELECT udf_name(1)")
+ *   val df = Seq(
+ *     (536361, "85123A", 2, 17850),
+ *     (536362, "85123B", 4, 17850),
+ *     (536363, "86123A", 6, 17851)
+ *   ).toDF("InvoiceNo", "StockCode", "Quantity", "CustomerID")
+ *
+ *   df.groupBy("CustomerID").agg(expr("udf_name(Quantity)"))
+ *   df.groupBy("CustomerID").agg(groupedAggPandasTestUDF(df("Quantity")))
+ * }}}
  */
 object IntegratedUDFTestUtils extends SQLHelper {
   import scala.sys.process._
@@ -190,6 +213,28 @@ object IntegratedUDFTestUtils extends SQLHelper {
     throw new RuntimeException(s"Python executable [$pythonExec] and/or pyspark are unavailable.")
   }
 
+  private lazy val pandasGroupedAggFunc: Array[Byte] = if (shouldTestGroupedAggPandasUDFs) {
+    var binaryPandasFunc: Array[Byte] = null
+    withTempPath { path =>
+      Process(
+        Seq(
+          pythonExec,
+          "-c",
+          "from pyspark.sql.types import IntegerType; " +
+            "from pyspark.serializers import CloudPickleSerializer; " +
+            s"f = open('$path', 'wb');" +
+            "f.write(CloudPickleSerializer().dumps((" +
+            "lambda x: x.agg('count'), IntegerType())))"),
+        None,
+        "PYTHONPATH" -> s"$pysparkPythonPath:$pythonPath").!!
+      binaryPandasFunc = Files.readAllBytes(path.toPath)
+    }
+    assert(binaryPandasFunc != null)
+    binaryPandasFunc
+  } else {
+    throw new RuntimeException(s"Python executable [$pythonExec] and/or pyspark are unavailable.")
+  }
+
   // Make sure this map stays mutable - this map gets updated later in Python runners.
   private val workerEnv = new java.util.HashMap[String, String]()
   workerEnv.put("PYTHONPATH", s"$pysparkPythonPath:$pythonPath")
@@ -209,6 +254,8 @@ object IntegratedUDFTestUtils extends SQLHelper {
   lazy val shouldTestScalarPandasUDFs: Boolean =
     isPythonAvailable && isPandasAvailable && isPyArrowAvailable
 
+  lazy val shouldTestGroupedAggPandasUDFs: Boolean = shouldTestScalarPandasUDFs
+
   /**
    * A base trait for various UDFs defined in this object.
    */
@@ -333,6 +380,46 @@ object IntegratedUDFTestUtils extends SQLHelper {
     val prettyName: String = "Scalar Pandas UDF"
   }
 
+  /**
+   * A Grouped Aggregate Pandas UDF that takes one column, executes the
+   * Python native function calculating the count of the column using pandas.
+   *
+   * Virtually equivalent to:
+   *
+   * {{{
+   *   import pandas as pd
+   *   from pyspark.sql.functions import pandas_udf
+   *
+   *   df = spark.createDataFrame(
+   *       [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)], ("id", "v"))
+   *
+   *   @pandas_udf("double")
+   *   def pandas_count(v: pd.Series) -> int:
+   *       return v.count()
+   *
+   *   count_col = pandas_count(df['v'])
+   * }}}
+   */
+  case class TestGroupedAggPandasUDF(name: String) extends TestUDF {
+    private[IntegratedUDFTestUtils] lazy val udf = new UserDefinedPythonFunction(
+      name = name,
+      func = PythonFunction(
+        command = pandasGroupedAggFunc,
+        envVars = workerEnv.clone().asInstanceOf[java.util.Map[String, String]],
+        pythonIncludes = List.empty[String].asJava,
+        pythonExec = pythonExec,
+        pythonVer = pythonVer,
+        broadcastVars = List.empty[Broadcast[PythonBroadcast]].asJava,
+        accumulator = null),
+      dataType = IntegerType,
+      pythonEvalType = PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF,
+      udfDeterministic = true)
+
+    def apply(exprs: Column*): Column = udf(exprs: _*)
+
+    val prettyName: String = "Grouped Aggregate Pandas UDF"
+  }
+
   /**
    * A Scala UDF that takes one column, casts into string, executes the
    * Scala native function, and casts back to the type of input column.
@@ -387,6 +474,7 @@ object IntegratedUDFTestUtils extends SQLHelper {
   def registerTestUDF(testUDF: TestUDF, session: SparkSession): Unit = testUDF match {
     case udf: TestPythonUDF => session.udf.registerPython(udf.name, udf.udf)
     case udf: TestScalarPandasUDF => session.udf.registerPython(udf.name, udf.udf)
+    case udf: TestGroupedAggPandasUDF => session.udf.registerPython(udf.name, udf.udf)
     case udf: TestScalaUDF => session.udf.register(udf.name, udf.udf)
     case other => throw new RuntimeException(s"Unknown UDF class [${other.getClass}]")
   }

From e18a93d7789bb6db18ce3e74756341abdb43a5ad Mon Sep 17 00:00:00 2001
From: Xinyi Yu <xinyi.yu@databricks.com>
Date: Wed, 23 Feb 2022 19:22:24 +0800
Subject: [PATCH 314/513] [SPARK-38295][SQL][TESTS] Fix
 ArithmeticExpressionSuite under ANSI mode

### What changes were proposed in this pull request?
Fix all the failure test cases (about 8 in total) in org.apache.spark.sql.catalyst.expressions.ArithmeticExpressionSuite.

Most of them are divided by zero. For these test cases, wrap with ANSI as false. Also add exact same test input wrapping with ANSI true, but set the expectation to the exceptions of divide by zero.

### Why are the changes needed?
To set up a new GA job to run tests with ANSI mode before 3.3.0 release.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Test locally with both ANSI on and off, both passed.

Closes #35616 from anchovYu/ansi-tests-arithmetic.

Authored-by: Xinyi Yu <xinyi.yu@databricks.com>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../ArithmeticExpressionSuite.scala           | 114 ++++++++++++++----
 1 file changed, 88 insertions(+), 26 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
index 31d7a4b0a87e0..522313ffeb184 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
@@ -78,10 +78,12 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
       checkEvaluation(UnaryMinus(input), convert(-1))
       checkEvaluation(UnaryMinus(Literal.create(null, dataType)), null)
     }
-    checkEvaluation(UnaryMinus(Literal(Long.MinValue)), Long.MinValue)
-    checkEvaluation(UnaryMinus(Literal(Int.MinValue)), Int.MinValue)
-    checkEvaluation(UnaryMinus(Literal(Short.MinValue)), Short.MinValue)
-    checkEvaluation(UnaryMinus(Literal(Byte.MinValue)), Byte.MinValue)
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      checkEvaluation(UnaryMinus(Literal(Long.MinValue)), Long.MinValue)
+      checkEvaluation(UnaryMinus(Literal(Int.MinValue)), Int.MinValue)
+      checkEvaluation(UnaryMinus(Literal(Short.MinValue)), Short.MinValue)
+      checkEvaluation(UnaryMinus(Literal(Byte.MinValue)), Byte.MinValue)
+    }
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       checkExceptionInExpression[ArithmeticException](
         UnaryMinus(Literal(Long.MinValue)), "overflow")
@@ -170,7 +172,13 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
       checkEvaluation(Divide(left, right), convert(2))
       checkEvaluation(Divide(Literal.create(null, left.dataType), right), null)
       checkEvaluation(Divide(left, Literal.create(null, right.dataType)), null)
-      checkEvaluation(Divide(left, Literal(convert(0))), null)  // divide by zero
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+        checkEvaluation(Divide(left, Literal(convert(0))), null) // divide by zero
+      }
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+        checkExceptionInExpression[ArithmeticException](
+          Divide(left, Literal(convert(0))), "divide by zero")
+      }
     }
 
     Seq("true", "false").foreach { failOnError =>
@@ -194,7 +202,13 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
       checkEvaluation(IntegralDivide(left, right), 0L)
       checkEvaluation(IntegralDivide(Literal.create(null, left.dataType), right), null)
       checkEvaluation(IntegralDivide(left, Literal.create(null, right.dataType)), null)
-      checkEvaluation(IntegralDivide(left, Literal(convert(0))), null)  // divide by zero
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+        checkEvaluation(IntegralDivide(left, Literal(convert(0))), null) // divide by zero
+      }
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+        checkExceptionInExpression[ArithmeticException](
+          IntegralDivide(left, Literal(convert(0))), "divide by zero")
+      }
     }
     checkEvaluation(IntegralDivide(positiveLongLit, negativeLongLit), 0L)
 
@@ -222,7 +236,13 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
       checkEvaluation(Remainder(left, right), convert(1))
       checkEvaluation(Remainder(Literal.create(null, left.dataType), right), null)
       checkEvaluation(Remainder(left, Literal.create(null, right.dataType)), null)
-      checkEvaluation(Remainder(left, Literal(convert(0))), null)  // mod by 0
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+        checkEvaluation(Remainder(left, Literal(convert(0))), null) // mod by 0
+      }
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+        checkExceptionInExpression[ArithmeticException](
+          Remainder(left, Literal(convert(0))), "divide by zero")
+      }
     }
     checkEvaluation(Remainder(positiveShortLit, positiveShortLit), 0.toShort)
     checkEvaluation(Remainder(negativeShortLit, negativeShortLit), 0.toShort)
@@ -304,7 +324,13 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
       checkEvaluation(Pmod(left, right), convert(1))
       checkEvaluation(Pmod(Literal.create(null, left.dataType), right), null)
       checkEvaluation(Pmod(left, Literal.create(null, right.dataType)), null)
-      checkEvaluation(Pmod(left, Literal(convert(0))), null)  // mod by 0
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+        checkEvaluation(Pmod(left, Literal(convert(0))), null) // mod by 0
+      }
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+        checkExceptionInExpression[ArithmeticException](
+          Pmod(left, Literal(convert(0))), "divide by zero")
+      }
     }
     checkEvaluation(Pmod(Literal(-7), Literal(3)), 2)
     checkEvaluation(Pmod(Literal(7.2D), Literal(4.1D)), 3.1000000000000005)
@@ -461,15 +487,24 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(IntegralDivide(Literal(Decimal(1)), Literal(Decimal(2))), 0L)
     checkEvaluation(IntegralDivide(Literal(Decimal(2.4)), Literal(Decimal(1.1))), 2L)
     checkEvaluation(IntegralDivide(Literal(Decimal(1.2)), Literal(Decimal(1.1))), 1L)
-    checkEvaluation(IntegralDivide(Literal(Decimal(0.2)), Literal(Decimal(0.0))), null)
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      checkEvaluation(
+        IntegralDivide(Literal(Decimal(0.2)), Literal(Decimal(0.0))), null) // mod by 0
+    }
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      checkExceptionInExpression[ArithmeticException](
+        IntegralDivide(Literal(Decimal(0.2)), Literal(Decimal(0.0))), "divide by zero")
+    }
     // overflows long and so returns a wrong result
     checkEvaluation(DecimalPrecision.decimalAndDecimal.apply(IntegralDivide(
       Literal(Decimal("99999999999999999999999999999999999")), Literal(Decimal(0.001)))),
       687399551400672280L)
     // overflow during promote precision
-    checkEvaluation(DecimalPrecision.decimalAndDecimal.apply(IntegralDivide(
-      Literal(Decimal("99999999999999999999999999999999999999")), Literal(Decimal(0.00001)))),
-      null)
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      checkEvaluation(DecimalPrecision.decimalAndDecimal.apply(IntegralDivide(
+        Literal(Decimal("99999999999999999999999999999999999999")), Literal(Decimal(0.00001)))),
+        null)
+    }
   }
 
   test("SPARK-24598: overflow on long returns wrong result") {
@@ -701,13 +736,25 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
   }
 
   test("SPARK-36921: Support YearMonthIntervalType by div") {
-    checkEvaluation(IntegralDivide(Literal(Period.ZERO), Literal(Period.ZERO)), null)
-    checkEvaluation(IntegralDivide(Literal(Period.ofYears(1)),
-      Literal(Period.ZERO)), null)
-    checkEvaluation(IntegralDivide(Period.ofMonths(Int.MinValue),
-      Literal(Period.ZERO)), null)
-    checkEvaluation(IntegralDivide(Period.ofMonths(Int.MaxValue),
-      Literal(Period.ZERO)), null)
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      checkEvaluation(IntegralDivide(Literal(Period.ZERO), Literal(Period.ZERO)), null)
+      checkEvaluation(IntegralDivide(Literal(Period.ofYears(1)),
+        Literal(Period.ZERO)), null)
+      checkEvaluation(IntegralDivide(Period.ofMonths(Int.MinValue),
+        Literal(Period.ZERO)), null)
+      checkEvaluation(IntegralDivide(Period.ofMonths(Int.MaxValue),
+        Literal(Period.ZERO)), null)
+    }
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      checkExceptionInExpression[ArithmeticException](
+        IntegralDivide(Literal(Period.ZERO), Literal(Period.ZERO)), "divide by zero")
+      checkExceptionInExpression[ArithmeticException](
+        IntegralDivide(Literal(Period.ofYears(1)), Literal(Period.ZERO)), "divide by zero")
+      checkExceptionInExpression[ArithmeticException](
+        IntegralDivide(Period.ofMonths(Int.MinValue), Literal(Period.ZERO)), "divide by zero")
+      checkExceptionInExpression[ArithmeticException](
+        IntegralDivide(Period.ofMonths(Int.MaxValue), Literal(Period.ZERO)), "divide by zero")
+    }
 
     checkEvaluation(IntegralDivide(Literal.create(null, YearMonthIntervalType()),
       Literal.create(null, YearMonthIntervalType())), null)
@@ -741,13 +788,28 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
       Literal(Period.ofMonths(-5))), -2L)
   }
   test("SPARK-36921: Support DayTimeIntervalType by div") {
-    checkEvaluation(IntegralDivide(Literal(Duration.ZERO), Literal(Duration.ZERO)), null)
-    checkEvaluation(IntegralDivide(Literal(Duration.ofDays(1)),
-      Literal(Duration.ZERO)), null)
-    checkEvaluation(IntegralDivide(Literal(Duration.of(Long.MaxValue, ChronoUnit.MICROS)),
-      Literal(Duration.ZERO)), null)
-    checkEvaluation(IntegralDivide(Literal(Duration.of(Long.MinValue, ChronoUnit.MICROS)),
-      Literal(Duration.ZERO)), null)
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      checkEvaluation(IntegralDivide(Literal(Duration.ZERO), Literal(Duration.ZERO)), null)
+      checkEvaluation(IntegralDivide(Literal(Duration.ofDays(1)),
+        Literal(Duration.ZERO)), null)
+      checkEvaluation(IntegralDivide(Literal(Duration.of(Long.MaxValue, ChronoUnit.MICROS)),
+        Literal(Duration.ZERO)), null)
+      checkEvaluation(IntegralDivide(Literal(Duration.of(Long.MinValue, ChronoUnit.MICROS)),
+        Literal(Duration.ZERO)), null)
+    }
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      checkExceptionInExpression[ArithmeticException](
+        IntegralDivide(Literal(Duration.ZERO), Literal(Duration.ZERO)), "divide by zero")
+      checkExceptionInExpression[ArithmeticException](
+        IntegralDivide(Literal(Duration.ofDays(1)),
+          Literal(Duration.ZERO)), "divide by zero")
+      checkExceptionInExpression[ArithmeticException](
+        IntegralDivide(Literal(Duration.of(Long.MaxValue, ChronoUnit.MICROS)),
+          Literal(Duration.ZERO)), "divide by zero")
+      checkExceptionInExpression[ArithmeticException](
+        IntegralDivide(Literal(Duration.of(Long.MinValue, ChronoUnit.MICROS)),
+          Literal(Duration.ZERO)), "divide by zero")
+    }
 
     checkEvaluation(IntegralDivide(Literal.create(null, DayTimeIntervalType()),
       Literal.create(null, DayTimeIntervalType())), null)

From a2448a4068d255fac951be2dcf36db08145533e7 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Wed, 23 Feb 2022 23:19:56 +0800
Subject: [PATCH 315/513] [SPARK-38304][SQL] Elt() should return null if index
 is null under ANSI mode

### What changes were proposed in this pull request?

Elt() should return null if the input index is null under ANSI mode, which is consistent with MySQL where the function is from.
Before changes:
<img width="824" alt="image" src="https://user-images.githubusercontent.com/1097932/155308033-2e47b49a-b98b-4fd6-b1f1-d89762452fba.png">

After changes:
The query returns null.

### Why are the changes needed?

Bug fix

### Does this PR introduce _any_ user-facing change?

Yes, SQL function Elt() returns null if the input index is null under ANSI mode, instead of runtime error.

### How was this patch tested?

UT

Closes #35629 from gengliangwang/fixEltErrorMsg.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/stringExpressions.scala       | 22 +++++++-----
 .../test/resources/sql-tests/inputs/array.sql |  4 +++
 .../sql-tests/results/ansi/array.sql.out      | 34 ++++++++++++++++++-
 .../resources/sql-tests/results/array.sql.out | 34 ++++++++++++++++++-
 4 files changed, 83 insertions(+), 11 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 021ddbe9ade01..b9670646c91a6 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -362,15 +362,19 @@ case class Elt(
     ev.copy(
       code"""
          |${index.code}
-         |final int $indexVal = ${index.value};
-         |${CodeGenerator.JAVA_BOOLEAN} $indexMatched = false;
-         |$inputVal = null;
-         |do {
-         |  $codes
-         |} while (false);
-         |$indexOutOfBoundBranch
-         |final ${CodeGenerator.javaType(dataType)} ${ev.value} = $inputVal;
-         |final boolean ${ev.isNull} = ${ev.value} == null;
+         |boolean ${ev.isNull} = ${index.isNull};
+         |${CodeGenerator.javaType(dataType)} ${ev.value} = null;
+         |if (!${index.isNull}) {
+         |  final int $indexVal = ${index.value};
+         |  ${CodeGenerator.JAVA_BOOLEAN} $indexMatched = false;
+         |  $inputVal = null;
+         |  do {
+         |    $codes
+         |  } while (false);
+         |  $indexOutOfBoundBranch
+         |  ${ev.value} = $inputVal;
+         |  ${ev.isNull} = ${ev.value} == null;
+         |}
        """.stripMargin)
   }
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/array.sql b/sql/core/src/test/resources/sql-tests/inputs/array.sql
index f73b653659eb4..0223ce5475832 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/array.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/array.sql
@@ -99,6 +99,10 @@ select element_at(array(1, 2, 3), 0);
 select elt(4, '123', '456');
 select elt(0, '123', '456');
 select elt(-1, '123', '456');
+select elt(null, '123', '456');
+select elt(null, '123', null);
+select elt(1, '123', null);
+select elt(2, '123', null);
 
 select array(1, 2, 3)[5];
 select array(1, 2, 3)[-1];
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
index b412493b60aa3..f2b355279f5f2 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 29
+-- Number of queries: 33
 
 
 -- !query
@@ -216,6 +216,38 @@ org.apache.spark.SparkArrayIndexOutOfBoundsException
 Invalid index: -1, numElements: 2. If necessary set spark.sql.ansi.enabled to false to bypass this error.
 
 
+-- !query
+select elt(null, '123', '456')
+-- !query schema
+struct<elt(NULL, 123, 456):string>
+-- !query output
+NULL
+
+
+-- !query
+select elt(null, '123', null)
+-- !query schema
+struct<elt(NULL, 123, NULL):string>
+-- !query output
+NULL
+
+
+-- !query
+select elt(1, '123', null)
+-- !query schema
+struct<elt(1, 123, NULL):string>
+-- !query output
+123
+
+
+-- !query
+select elt(2, '123', null)
+-- !query schema
+struct<elt(2, 123, NULL):string>
+-- !query output
+NULL
+
+
 -- !query
 select array(1, 2, 3)[5]
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out
index 76fdf035ad4ec..9d42b8a46a5a1 100644
--- a/sql/core/src/test/resources/sql-tests/results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 20
+-- Number of queries: 24
 
 
 -- !query
@@ -211,6 +211,38 @@ struct<elt(-1, 123, 456):string>
 NULL
 
 
+-- !query
+select elt(null, '123', '456')
+-- !query schema
+struct<elt(NULL, 123, 456):string>
+-- !query output
+NULL
+
+
+-- !query
+select elt(null, '123', null)
+-- !query schema
+struct<elt(NULL, 123, NULL):string>
+-- !query output
+NULL
+
+
+-- !query
+select elt(1, '123', null)
+-- !query schema
+struct<elt(1, 123, NULL):string>
+-- !query output
+123
+
+
+-- !query
+select elt(2, '123', null)
+-- !query schema
+struct<elt(2, 123, NULL):string>
+-- !query output
+NULL
+
+
 -- !query
 select array(1, 2, 3)[5]
 -- !query schema

From 8ad85f84db528e7d91b19e1efe165300e7405945 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Wed, 23 Feb 2022 10:46:31 -0800
Subject: [PATCH 316/513] [SPARK-38299][SQL] Clean up deprecated usage of
 `StringBuilder.newBuilder`

### What changes were proposed in this pull request?
`StringBuilder.newBuilder` has deprecated since Scala 2.13.0:

https://github.com/scala/scala/blob/de73fdb0a8552f0b2444334bb7b31c0a95a79474/src/library/scala/collection/mutable/StringBuilder.scala#L479-L482

this pr change to use `new StringBuilder()` instead of it.

### Why are the changes needed?
Clean up deprecated API usage.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35621 from LuciferYang/stringbuilder.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../scala/org/apache/spark/sql/catalyst/util/package.scala  | 2 +-
 .../org/apache/spark/sql/execution/command/tables.scala     | 6 +++---
 .../sql/execution/datasources/v2/ShowCreateTableExec.scala  | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
index ed9dc0332aa0e..d8e895b57ae94 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
@@ -159,7 +159,7 @@ package object util extends Logging {
   def toPrettySQL(e: Expression): String = usePrettyExpression(e).sql
 
   def escapeSingleQuotedString(str: String): String = {
-    val builder = StringBuilder.newBuilder
+    val builder = new StringBuilder
 
     str.foreach {
       case '\'' => builder ++= s"\\\'"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 5c33080c97337..ac4bb8395a3b1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -1125,7 +1125,7 @@ case class ShowCreateTableCommand(
         }
       }
 
-      val builder = StringBuilder.newBuilder
+      val builder = new StringBuilder
 
       val stmt = if (tableMetadata.tableType == VIEW) {
         builder ++= s"CREATE VIEW ${table.quoted} "
@@ -1153,7 +1153,7 @@ case class ShowCreateTableCommand(
     // TODO: some Hive fileformat + row serde might be mapped to Spark data source, e.g. CSV.
     val source = HiveSerDe.serdeToSource(hiveSerde)
     if (source.isEmpty) {
-      val builder = StringBuilder.newBuilder
+      val builder = new StringBuilder
       hiveSerde.serde.foreach { serde =>
         builder ++= s" SERDE: $serde"
       }
@@ -1260,7 +1260,7 @@ case class ShowCreateTableAsSerdeCommand(
       reportUnsupportedError(metadata.unsupportedFeatures)
     }
 
-    val builder = StringBuilder.newBuilder
+    val builder = new StringBuilder
 
     val tableTypeString = metadata.tableType match {
       case EXTERNAL => " EXTERNAL TABLE"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
index abe8d67adb856..06f5a08ffd9c7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
@@ -36,7 +36,7 @@ case class ShowCreateTableExec(
     output: Seq[Attribute],
     table: Table) extends V2CommandExec with LeafExecNode {
   override protected def run(): Seq[InternalRow] = {
-    val builder = StringBuilder.newBuilder
+    val builder = new StringBuilder
     showCreateTable(table, builder)
     Seq(InternalRow(UTF8String.fromString(builder.toString)))
   }

From 2fe5b042bb589d2c21ca4ad2f87b5d76013c4496 Mon Sep 17 00:00:00 2001
From: leesf <leesf@apache.org>
Date: Wed, 23 Feb 2022 11:03:35 -0800
Subject: [PATCH 317/513] [SPARK-38301][BUILD] Remove unused scala-actors
 dependency

### What changes were proposed in this pull request?

Remove unused `scala-actors` dependency

### Why are the changes needed?

Simplify pom dependency.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing tests.

Closes #35625 from leesf/SPARK-38301.

Authored-by: leesf <leesf@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 pom.xml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/pom.xml b/pom.xml
index d1e391c8539c7..4e9198c71289b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1057,11 +1057,6 @@
         <artifactId>scala-library</artifactId>
         <version>${scala.version}</version>
       </dependency>
-      <dependency>
-        <groupId>org.scala-lang</groupId>
-        <artifactId>scala-actors</artifactId>
-        <version>${scala.version}</version>
-      </dependency>
       <dependency>
         <groupId>org.scala-lang.modules</groupId>
         <artifactId>scala-parser-combinators_${scala.binary.version}</artifactId>

From 0bc16c632bb836ede8b5b204af120ed12265bcea Mon Sep 17 00:00:00 2001
From: bjornjorgensen <bjornjorgensen@gmail.com>
Date: Wed, 23 Feb 2022 11:04:36 -0800
Subject: [PATCH 318/513] [SPARK-38287][BUILD][SQL][TESTS] Upgrade `h2` from
 2.0.204 to 2.1.210 in /sql/core

### What changes were proposed in this pull request?
Bump h2 from 2.0.204 to 2.1.210 in /sql/core

### Why are the changes needed?
[Arbitrary code execution in H2 Console](https://github.com/advisories/GHSA-45hx-wfhj-473x)
and
[CVE-2021-42392](https://nvd.nist.gov/vuln/detail/CVE-2021-42392)

### Does this PR introduce _any_ user-facing change?
Some users use remote security scanners and this is one of the issues that comes up. How this can do some damage with spark is highly uncertain. but let's remove the uncertainty that any user may have.

### How was this patch tested?
All test must pass.

Closes #35630 from bjornjorgensen/Bump-h2-from-2.0.204-to-2.1.210-in-/sql/core.

Authored-by: bjornjorgensen <bjornjorgensen@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 sql/core/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 7842ab36bb1b7..3002a3b4a876d 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -153,7 +153,7 @@
     <dependency>
       <groupId>com.h2database</groupId>
       <artifactId>h2</artifactId>
-      <version>2.0.204</version>
+      <version>2.1.210</version>
       <scope>test</scope>
     </dependency>
     <dependency>

From 9257224fab0681f23b4a81a75bc8b3cd58683347 Mon Sep 17 00:00:00 2001
From: Xinyi Yu <xinyi.yu@databricks.com>
Date: Thu, 24 Feb 2022 10:21:53 +0800
Subject: [PATCH 319/513] [SPARK-38281][SQL][TESTS] Fix AnalysisSuite under
 ANSI mode

### What changes were proposed in this pull request?
Fix the org.apache.spark.sql.catalyst.analysis.AnalysisSuite under ANSI mode.
With ANSI on, there won't be a type cast on string a / string b. Make the cast assertion only when ANSI is off.

### Why are the changes needed?
To set up a new GA job to run tests with ANSI mode before 3.3.0 release.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Test locally with both ANSI on and off, both passed.

Closes #35604 from anchovYu/ansi-tests-analysis.

Authored-by: Xinyi Yu <xinyi.yu@databricks.com>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../apache/spark/sql/catalyst/analysis/AnalysisSuite.scala    | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index ff05b797e7cd7..fff25b59eff98 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -221,7 +221,9 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     val pl = plan.asInstanceOf[Project].projectList
 
     assert(pl(0).dataType == DoubleType)
-    assert(pl(1).dataType == DoubleType)
+    if (!SQLConf.get.ansiEnabled) {
+      assert(pl(1).dataType == DoubleType)
+    }
     assert(pl(2).dataType == DoubleType)
     assert(pl(3).dataType == DoubleType)
     assert(pl(4).dataType == DoubleType)

From b28241d4755c859a7ef715af020d7e6abad6cf9a Mon Sep 17 00:00:00 2001
From: Xinyi Yu <xinyi.yu@databricks.com>
Date: Thu, 24 Feb 2022 10:38:34 +0800
Subject: [PATCH 320/513] [SPARK-38307][SQL][TESTS] Fix
 ExpressionTypeCheckingSuite and CollectionExpressionsSuite under ANSI mode

### What changes were proposed in this pull request?
Fix the ExpressionTypeCheckingSuite and CollectionExpressionsSuite under ANSI mode.
For all the failed test cases, wrap with ANSI as false. For some test cases, also add exact same test input wrapping with ANSI true, but set the expectation to the exceptions.

### Why are the changes needed?
To set up a new GA job to run tests with ANSI mode before 3.3.0 release.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Test locally with both ANSI on and off, both passed.

Closes #35634 from anchovYu/ansi-tests-expressiontypechecking-collectionexpressions.

Authored-by: Xinyi Yu <xinyi.yu@databricks.com>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../ExpressionTypeCheckingSuite.scala         | 14 ++++++++---
 .../CollectionExpressionsSuite.scala          | 24 ++++++++++++-------
 2 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
index 239d886303a02..da6b981fb4bf6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
@@ -23,10 +23,12 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
+import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
-class ExpressionTypeCheckingSuite extends SparkFunSuite {
+class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper {
 
   val testRelation = LocalRelation(
     Symbol("intField").int,
@@ -103,8 +105,14 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite {
     assertSuccess(GreaterThanOrEqual(Symbol("intField"), Symbol("stringField")))
 
     // We will transform EqualTo with numeric and boolean types to CaseKeyWhen
-    assertSuccess(EqualTo(Symbol("intField"), Symbol("booleanField")))
-    assertSuccess(EqualNullSafe(Symbol("intField"), Symbol("booleanField")))
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      assertSuccess(EqualTo(Symbol("intField"), Symbol("booleanField")))
+      assertSuccess(EqualNullSafe(Symbol("intField"), Symbol("booleanField")))
+    }
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      assertError(EqualTo(Symbol("intField"), Symbol("booleanField")), "differing types")
+      assertError(EqualNullSafe(Symbol("intField"), Symbol("booleanField")), "differing types")
+    }
 
     assertErrorForDifferingTypes(EqualTo(Symbol("intField"), Symbol("mapField")))
     assertErrorForDifferingTypes(EqualNullSafe(Symbol("intField"), Symbol("mapField")))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index 7dfa3ea6f5a15..3cf3b4469a4d2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -66,7 +66,9 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
   }
 
   test("Array and Map Size - legacy") {
-    withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "true") {
+    withSQLConf(
+      SQLConf.LEGACY_SIZE_OF_NULL.key -> "true",
+      SQLConf.ANSI_ENABLED.key -> "false") {
       testSize(sizeOfNull = -1)
     }
   }
@@ -1437,8 +1439,10 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       checkEvaluation(ElementAt(a0, Literal(0)), null)
     }.getMessage.contains("SQL array indices start at 1")
     intercept[Exception] { checkEvaluation(ElementAt(a0, Literal(1.1)), null) }
-    checkEvaluation(ElementAt(a0, Literal(4)), null)
-    checkEvaluation(ElementAt(a0, Literal(-4)), null)
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      checkEvaluation(ElementAt(a0, Literal(4)), null)
+      checkEvaluation(ElementAt(a0, Literal(-4)), null)
+    }
 
     checkEvaluation(ElementAt(a0, Literal(1)), 1)
     checkEvaluation(ElementAt(a0, Literal(2)), 2)
@@ -1464,9 +1468,10 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
 
     assert(ElementAt(m0, Literal(1.0)).checkInputDataTypes().isFailure)
 
-    checkEvaluation(ElementAt(m0, Literal("d")), null)
-
-    checkEvaluation(ElementAt(m1, Literal("a")), null)
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      checkEvaluation(ElementAt(m0, Literal("d")), null)
+      checkEvaluation(ElementAt(m1, Literal("a")), null)
+    }
 
     checkEvaluation(ElementAt(m0, Literal("a")), "1")
     checkEvaluation(ElementAt(m0, Literal("b")), "2")
@@ -1480,9 +1485,10 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       MapType(BinaryType, StringType))
     val mb1 = Literal.create(Map[Array[Byte], String](), MapType(BinaryType, StringType))
 
-    checkEvaluation(ElementAt(mb0, Literal(Array[Byte](1, 2, 3))), null)
-
-    checkEvaluation(ElementAt(mb1, Literal(Array[Byte](1, 2))), null)
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      checkEvaluation(ElementAt(mb0, Literal(Array[Byte](1, 2, 3))), null)
+      checkEvaluation(ElementAt(mb1, Literal(Array[Byte](1, 2))), null)
+    }
     checkEvaluation(ElementAt(mb0, Literal(Array[Byte](2, 1), BinaryType)), "2")
     checkEvaluation(ElementAt(mb0, Literal(Array[Byte](3, 4))), null)
   }

From 683bc46ff9a791ab6b9cd3cb95be6bbc368121e0 Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@foxmail.com>
Date: Thu, 24 Feb 2022 10:49:52 +0800
Subject: [PATCH 321/513] [SPARK-38286][SQL] Union's maxRows and
 maxRowsPerPartition may overflow

### What changes were proposed in this pull request?
check Union's maxRows and maxRowsPerPartition

### Why are the changes needed?
Union's maxRows and maxRowsPerPartition may overflow:

case 1:
```
scala> val df1 = spark.range(0, Long.MaxValue, 1, 1)
df1: org.apache.spark.sql.Dataset[Long] = [id: bigint]

scala> val df2 = spark.range(0, 100, 1, 10)
df2: org.apache.spark.sql.Dataset[Long] = [id: bigint]

scala> val union = df1.union(df2)
union: org.apache.spark.sql.Dataset[Long] = [id: bigint]

scala> union.queryExecution.logical.maxRowsPerPartition
res19: Option[Long] = Some(-9223372036854775799)

scala> union.queryExecution.logical.maxRows
res20: Option[Long] = Some(-9223372036854775709)
```

case 2:
```
scala> val n = 2000000
n: Int = 2000000

scala> val df1 = spark.range(0, n, 1, 1).selectExpr("id % 5 as key1", "id as value1")
df1: org.apache.spark.sql.DataFrame = [key1: bigint, value1: bigint]

scala> val df2 = spark.range(0, n, 1, 2).selectExpr("id % 3 as key2", "id as value2")
df2: org.apache.spark.sql.DataFrame = [key2: bigint, value2: bigint]

scala> val df3 = spark.range(0, n, 1, 3).selectExpr("id % 4 as key3", "id as value3")
df3: org.apache.spark.sql.DataFrame = [key3: bigint, value3: bigint]

scala> val joined = df1.join(df2, col("key1") === col("key2")).join(df3, col("key1") === col("key3"))
joined: org.apache.spark.sql.DataFrame = [key1: bigint, value1: bigint ... 4 more fields]

scala> val unioned = joined.select(col("key1"), col("value3")).union(joined.select(col("key1"), col("value2")))
unioned: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [key1: bigint, value3: bigint]

scala> unioned.queryExecution.optimizedPlan.maxRows
res32: Option[Long] = Some(-2446744073709551616)

scala> unioned.queryExecution.optimizedPlan.maxRows
res33: Option[Long] = Some(-2446744073709551616)
```

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
added testsuite

Closes #35609 from zhengruifeng/union_maxRows_validate.

Authored-by: Ruifeng Zheng <ruifengz@foxmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../plans/logical/basicLogicalOperators.scala | 30 ++++++++++++++-----
 .../sql/catalyst/plans/LogicalPlanSuite.scala |  8 +++++
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 3283a4dee86bc..02d6a1d3cce76 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -277,11 +277,18 @@ case class Union(
   assert(!allowMissingCol || byName, "`allowMissingCol` can be true only if `byName` is true.")
 
   override def maxRows: Option[Long] = {
-    if (children.exists(_.maxRows.isEmpty)) {
-      None
-    } else {
-      Some(children.flatMap(_.maxRows).sum)
+    var sum = BigInt(0)
+    children.foreach { child =>
+      if (child.maxRows.isDefined) {
+        sum += child.maxRows.get
+        if (!sum.isValidLong) {
+          return None
+        }
+      } else {
+        return None
+      }
     }
+    Some(sum.toLong)
   }
 
   final override val nodePatterns: Seq[TreePattern] = Seq(UNION)
@@ -290,11 +297,18 @@ case class Union(
    * Note the definition has assumption about how union is implemented physically.
    */
   override def maxRowsPerPartition: Option[Long] = {
-    if (children.exists(_.maxRowsPerPartition.isEmpty)) {
-      None
-    } else {
-      Some(children.flatMap(_.maxRowsPerPartition).sum)
+    var sum = BigInt(0)
+    children.foreach { child =>
+      if (child.maxRowsPerPartition.isDefined) {
+        sum += child.maxRowsPerPartition.get
+        if (!sum.isValidLong) {
+          return None
+        }
+      } else {
+        return None
+      }
     }
+    Some(sum.toLong)
   }
 
   def duplicateResolved: Boolean = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala
index 0cd6d8164fe8d..acb41b097efbb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala
@@ -105,4 +105,12 @@ class LogicalPlanSuite extends SparkFunSuite {
     assert(Range(0, 100, 1, 3).select('id).maxRowsPerPartition === Some(34))
     assert(Range(0, 100, 1, 3).where('id % 2 === 1).maxRowsPerPartition === Some(34))
   }
+
+  test("SPARK-38286: Union's maxRows and maxRowsPerPartition may overflow") {
+    val query1 = Range(0, Long.MaxValue, 1, 1)
+    val query2 = Range(0, 100, 1, 10)
+    val query = query1.union(query2)
+    assert(query.maxRows.isEmpty)
+    assert(query.maxRowsPerPartition.isEmpty)
+  }
 }

From 47c5b4ce634cabebe73ca29677087ce05cb312b0 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Thu, 24 Feb 2022 12:27:29 +0900
Subject: [PATCH 322/513] [SPARK-38060][SQL][DOCS][FOLLOW-UP] Move migration
 guide note from CORE's to SQL's

### What changes were proposed in this pull request?

This PR is a followup of https://github.com/apache/spark/pull/35573 that moves the migration guide note from CORE's to SQL's.

### Why are the changes needed?

The change is for SQL. It was a small mistake.

### Does this PR introduce _any_ user-facing change?

Migration note is changed to SQL's. The change has not been released yet so no impact to end users.

### How was this patch tested?

CI should test it out.

Closes #35641 from HyukjinKwon/SPARK-38060-followup.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/core-migration-guide.md | 2 --
 docs/sql-migration-guide.md  | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md
index 588433c36444d..745b80d6eecb2 100644
--- a/docs/core-migration-guide.md
+++ b/docs/core-migration-guide.md
@@ -26,8 +26,6 @@ license: |
 
 - Since Spark 3.3, Spark migrates its log4j dependency from 1.x to 2.x because log4j 1.x has reached end of life and is no longer supported by the community. Vulnerabilities reported after August 2015 against log4j 1.x were not checked and will not be fixed. Users should rewrite original log4j properties files using log4j2 syntax (XML, JSON, YAML, or properties format). Spark rewrites the `conf/log4j.properties.template` which is included in Spark distribution, to `conf/log4j2.properties.template` with log4j2 properties format.
 
-- Since Spark 3.3, when reading values from a JSON attribute defined as `FloatType` or `DoubleType`, the strings `"+Infinity"`, `"+INF"`, and `"-INF"` are now parsed to the appropriate values, in addition to the already supported `"Infinity"` and `"-Infinity"` variations. This change was made to improve consistency with Jackson's parsing of the unquoted versions of these values. Also, the `allowNonNumericNumbers` option is now respected so these strings will now be considered invalid if this option is disabled.
-
 ## Upgrading from Core 3.1 to 3.2
 
 - Since Spark 3.2, `spark.scheduler.allocation.file` supports read remote file using hadoop filesystem which means if the path has no scheme Spark will respect hadoop configuration to read it. To restore the behavior before Spark 3.2, you can specify the local scheme for `spark.scheduler.allocation.file` e.g. `file:///path/to/file`.
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 0893f46c89dce..6ea86e3ca2c4c 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -60,6 +60,8 @@ license: |
 
   - Since Spark 3.3, DROP FUNCTION fails if the function name matches one of the built-in functions' name and is not qualified. In Spark 3.2 or earlier, DROP FUNCTION can still drop a persistent function even if the name is not qualified and is the same as a built-in function's name.
 
+  - Since Spark 3.3, when reading values from a JSON attribute defined as `FloatType` or `DoubleType`, the strings `"+Infinity"`, `"+INF"`, and `"-INF"` are now parsed to the appropriate values, in addition to the already supported `"Infinity"` and `"-Infinity"` variations. This change was made to improve consistency with Jackson's parsing of the unquoted versions of these values. Also, the `allowNonNumericNumbers` option is now respected so these strings will now be considered invalid if this option is disabled.
+
 ## Upgrading from Spark SQL 3.1 to 3.2
 
   - Since Spark 3.2, ADD FILE/JAR/ARCHIVE commands require each path to be enclosed by `"` or `'` if the path contains whitespaces.

From fb543a7294dc74d211dc58cbd1c1d30a1939c344 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Thu, 24 Feb 2022 13:02:37 +0800
Subject: [PATCH 323/513] [SPARK-38306][SQL] Fix
 ExplainSuite,StatisticsCollectionSuite and StringFunctionsSuite under ANSI
 mode

### What changes were proposed in this pull request?

Fix ExplainSuite,StatisticsCollectionSuite and StringFunctionsSuite under ANSI mode

### Why are the changes needed?

To set up a new GA test job with ANSI mode on
### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manually turn on ANSI mode and test

Closes #35633 from gengliangwang/fixStringFunc.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../org/apache/spark/sql/ExplainSuite.scala      | 11 +++++------
 .../spark/sql/StatisticsCollectionSuite.scala    |  4 ++--
 .../apache/spark/sql/StringFunctionsSuite.scala  | 16 ++++++++++------
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index 99bdfc829b442..67240c5525f34 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -217,8 +217,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
     // AND                                               conjunction
     // OR                                                disjunction
     // ---------------------------------------------------------------------------------------
-    checkKeywordsExistsInExplain(sql("select 'a' || 1 + 2"),
-      "Project [null AS (concat(a, 1) + 2)#x]")
+    checkKeywordsExistsInExplain(sql("select '1' || 1 + 2"),
+      "Project [13", " AS (concat(1, 1) + 2)#x")
     checkKeywordsExistsInExplain(sql("select 1 - 2 || 'b'"),
       "Project [-1b AS concat((1 - 2), b)#x]")
     checkKeywordsExistsInExplain(sql("select 2 * 4  + 3 || 'b'"),
@@ -232,12 +232,11 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
   }
 
   test("explain for these functions; use range to avoid constant folding") {
-    val df = sql("select ifnull(id, 'x'), nullif(id, 'x'), nvl(id, 'x'), nvl2(id, 'x', 'y') " +
+    val df = sql("select ifnull(id, 1), nullif(id, 1), nvl(id, 1), nvl2(id, 1, 2) " +
       "from range(2)")
     checkKeywordsExistsInExplain(df,
-      "Project [cast(id#xL as string) AS ifnull(id, x)#x, " +
-        "id#xL AS nullif(id, x)#xL, cast(id#xL as string) AS nvl(id, x)#x, " +
-        "x AS nvl2(id, x, y)#x]")
+      "Project [id#xL AS ifnull(id, 1)#xL, if ((id#xL = 1)) null " +
+        "else id#xL AS nullif(id, 1)#xL, id#xL AS nvl(id, 1)#xL, 1 AS nvl2(id, 1, 2)#x]")
   }
 
   test("SPARK-26659: explain of DataWritingCommandExec should not contain duplicate cmd.nodeName") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index 0987825c88117..57fc49ddc8131 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -409,7 +409,7 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
         val df = spark.range(1000L).select('id,
           'id * 2 as "FLD1",
           'id * 12 as "FLD2",
-          lit("aaa") + 'id as "fld3")
+          lit(null).cast(DoubleType) + 'id as "fld3")
         df.write
           .mode(SaveMode.Overwrite)
           .bucketBy(10, "id", "FLD1", "FLD2")
@@ -425,7 +425,7 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
              |WHERE  t1.fld3 IN (-123.23,321.23)
           """.stripMargin)
         df2.createTempView("TBL2")
-        sql("SELECT * FROM tbl2 WHERE fld3 IN ('qqq', 'qwe')  ").queryExecution.executedPlan
+        sql("SELECT * FROM tbl2 WHERE fld3 IN (0,1)  ").queryExecution.executedPlan
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index 30a6600c31765..2f118f236e2c4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -112,9 +112,11 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
     val df = Seq[(String, String, String, Int)](("hello", "world", null, 15))
       .toDF("a", "b", "c", "d")
 
-    checkAnswer(
-      df.selectExpr("elt(0, a, b, c)", "elt(1, a, b, c)", "elt(4, a, b, c)"),
-      Row(null, "hello", null))
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      checkAnswer(
+        df.selectExpr("elt(0, a, b, c)", "elt(1, a, b, c)", "elt(4, a, b, c)"),
+        Row(null, "hello", null))
+    }
 
     // check implicit type cast
     checkAnswer(
@@ -383,9 +385,11 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
       Row("host", "/file;param", "query;p2", null, "http", "/file;param?query;p2",
         "user:pass@host", "user:pass", null))
 
-    testUrl(
-      "inva lid://user:pass@host/file;param?query;p2",
-      Row(null, null, null, null, null, null, null, null, null))
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      testUrl(
+        "inva lid://user:pass@host/file;param?query;p2",
+        Row(null, null, null, null, null, null, null, null, null))
+    }
 
   }
 

From 43576430232194bfe436d4b6e2bc59b20f7d5513 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Wed, 23 Feb 2022 19:32:38 -1000
Subject: [PATCH 324/513] [SPARK-37923][SQL][FOLLOWUP] Rename
 MultipleBucketTransformsError in QueryExecutionErrors to
 multipleBucketTransformsError

### What changes were proposed in this pull request?
SPARK-37923 defines `QueryExecutionErrors.MultipleBucketTransformsError`,  but the function name should start with lowercase letter, to this method rename `MultipleBucketTransformsError` to `multipleBucketTransformsError`.

### Why are the changes needed?
function name should start with lowercase letter.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35623 from LuciferYang/SPARK-37923-FOLLOWUP.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: huaxingao <huaxin_gao@apple.com>
---
 .../apache/spark/sql/connector/catalog/CatalogV2Implicits.scala | 2 +-
 .../org/apache/spark/sql/errors/QueryExecutionErrors.scala      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
index 07098eed4f9c4..04af7eda6aaa9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
@@ -60,7 +60,7 @@ private[sql] object CatalogV2Implicits {
           identityCols += col
 
         case BucketTransform(numBuckets, col, sortCol) =>
-          if (bucketSpec.nonEmpty) throw QueryExecutionErrors.MultipleBucketTransformsError
+          if (bucketSpec.nonEmpty) throw QueryExecutionErrors.multipleBucketTransformsError
           if (sortCol.isEmpty) {
             bucketSpec = Some(BucketSpec(numBuckets, col.map(_.fieldNames.mkString(".")), Nil))
           } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index fcf9a6b5171cc..1a7cbac914dfe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -1951,7 +1951,7 @@ object QueryExecutionErrors {
       s"The input string '$input' does not match the given number format: '$format'")
   }
 
-  def MultipleBucketTransformsError(): Throwable = {
+  def multipleBucketTransformsError(): Throwable = {
     new UnsupportedOperationException("Multiple bucket transforms are not supported.")
   }
 

From c4b013f4129969668323d20e40869411b35ed5e7 Mon Sep 17 00:00:00 2001
From: Yikf <yikaifei1@gmail.com>
Date: Thu, 24 Feb 2022 14:00:44 +0800
Subject: [PATCH 325/513] [SPARK-38229][FOLLOWUP][SQL] Clean up unnecessary
 code for code simplification

### What changes were proposed in this pull request?

Clean up unnecessary code for code simplification, see [comment](https://github.com/apache/spark/pull/35541#discussion_r812638358)

### Why are the changes needed?
code simplification

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Exist ut

Closes #35642 from Yikf/r.

Authored-by: Yikf <yikaifei1@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/catalyst/parser/AstBuilder.scala | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 257df58b00e01..604b424ab9b26 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2941,15 +2941,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
     (multipartIdentifier, temporary, ifNotExists, ctx.EXTERNAL != null)
   }
 
-  /**
-   * Validate a replace table statement and return the [[TableIdentifier]].
-   */
-  override def visitReplaceTableHeader(
-      ctx: ReplaceTableHeaderContext): Seq[String] = withOrigin(ctx) {
-    val multipartIdentifier = ctx.multipartIdentifier.parts.asScala.map(_.getText).toSeq
-    multipartIdentifier
-  }
-
   /**
    * Parse a qualified name to a multipart name.
    */
@@ -3543,7 +3534,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    * }}}
    */
   override def visitReplaceTable(ctx: ReplaceTableContext): LogicalPlan = withOrigin(ctx) {
-    val table = visitReplaceTableHeader(ctx.replaceTableHeader)
+    val table = visitMultipartIdentifier(ctx.replaceTableHeader.multipartIdentifier())
     val orCreate = ctx.replaceTableHeader().CREATE() != null
     val (partTransforms, partCols, bucketSpec, properties, options, location, comment, serdeInfo) =
       visitCreateTableClauses(ctx.createTableClauses())

From 5190048354be0aa7044b583d1eb42f9ec3b54974 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Thu, 24 Feb 2022 08:06:24 -0800
Subject: [PATCH 326/513] [SPARK-38300][SQL] Use `ByteStreams.toByteArray` to
 simplify `fileToString` and `resourceToBytes` in catalyst.util

### What changes were proposed in this pull request?
This pr replace the manually `InputStream` to `ByteArray` codes with `com.google.common.io.ByteStreams.toByteArray` to simplify the methods `fileToString` and `resourceToBytes` in `catalyst.util`.  At the same time, the new method has better efficiency.

### Why are the changes needed?
Simplify code and use more efficient method.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

- Pass GA
- Manual test:

 we can prove that the `ByteStreams.toByteArray` is more efficient than old code through the following test:

```scala
def testToByteArray(fileSie: Int): Unit = {
    // Prepare data
    val bytes = RandomUtils.nextBytes(fileSie)
    val file = File.createTempFile(s"$fileSie-${UUID.randomUUID()}", ".dat")
    val fileForGuavaApi = File.createTempFile(s"$fileSie-${UUID.randomUUID()}", ".dat")
    file.deleteOnExit()
    fileForGuavaApi.deleteOnExit()
    FileUtils.writeByteArrayToFile(file, bytes)
    FileUtils.writeByteArrayToFile(fileForGuavaApi, bytes)

    val benchmark = new Benchmark(s"ToByteArray with fileSize $fileSie bytes ", 1, output = output)

    benchmark.addCase("toByteArray: byte by byte copy") { _: Int =>
      toByteArray(file)
    }

    benchmark.addCase("toByteArray: use Guava api") { _: Int =>
      toByteArrayUseGuava(fileForGuavaApi)
    }
    benchmark.run()
  }

  private def toByteArrayUseGuava(file: File): Array[Byte] = {
    val inStream = new FileInputStream(file)
    try {
      ByteStreams.toByteArray(inStream)
    } finally {
      inStream.close()
    }
  }

  private def toByteArray(file: File): Array[Byte] = {
    val inStream = new FileInputStream(file)
    val outStream = new ByteArrayOutputStream
    try {
      var reading = true
      while (reading) {
        inStream.read() match {
          case -1 => reading = false
          case c => outStream.write(c)
        }
      }
      outStream.flush()
    } finally {
      inStream.close()
    }
    outStream.toByteArray
  }

  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
    // Test 1K, 1M, 10M, 100M fileSize
    Seq(1024, 1024 * 1024, 1024 * 1024 * 10, 1024 * 1024 * 100).foreach { fileSize =>
      testToByteArray(fileSize)
    }
  }
```

The results of the above  test code using GA are as follows:

```
OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
Intel(R) Xeon(R) Platinum 8272CL CPU  2.60GHz
ToByteArray with fileSize 1024 bytes :    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
toByteArray: byte by byte copy                        1              1           0          0.0      781310.0       1.0X
toByteArray: use Guava api                            0              0           0          0.2        6100.0     128.1X

OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
Intel(R) Xeon(R) Platinum 8272CL CPU  2.60GHz
ToByteArray with fileSize 1048576 bytes :  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
toByteArray: byte by byte copy                      812            817           4          0.0   812133973.0       1.0X
toByteArray: use Guava api                            1              1           0          0.0     1002514.0     810.1X

OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
Intel(R) Xeon(R) Platinum 8272CL CPU  2.60GHz
ToByteArray with fileSize 10485760 bytes :  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
toByteArray: byte by byte copy                      8203           8206           4          0.0  8203332962.0       1.0X
toByteArray: use Guava api                            18             20           2          0.0    18325138.0     447.7X

OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
Intel(R) Xeon(R) Platinum 8272CL CPU  2.60GHz
ToByteArray with fileSize 104857600 bytes :  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------
toByteArray: byte by byte copy                      82543          82671         181          0.0 82542615028.0       1.0X
toByteArray: use Guava api                            182            195          24          0.0   182350535.0     452.7X
```

Closes #35622 from LuciferYang/SPARK-38300.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/catalyst/util/package.scala     | 30 ++++---------------
 1 file changed, 6 insertions(+), 24 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
index d8e895b57ae94..e06072cbed282 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
@@ -22,6 +22,8 @@ import java.nio.charset.Charset
 import java.nio.charset.StandardCharsets.UTF_8
 import java.util.concurrent.atomic.AtomicBoolean
 
+import com.google.common.io.ByteStreams
+
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.internal.SQLConf
@@ -48,42 +50,22 @@ package object util extends Logging {
 
   def fileToString(file: File, encoding: Charset = UTF_8): String = {
     val inStream = new FileInputStream(file)
-    val outStream = new ByteArrayOutputStream
     try {
-      var reading = true
-      while ( reading ) {
-        inStream.read() match {
-          case -1 => reading = false
-          case c => outStream.write(c)
-        }
-      }
-      outStream.flush()
-    }
-    finally {
+      new String(ByteStreams.toByteArray(inStream), encoding)
+    } finally {
       inStream.close()
     }
-    new String(outStream.toByteArray, encoding)
   }
 
   def resourceToBytes(
       resource: String,
       classLoader: ClassLoader = Utils.getSparkClassLoader): Array[Byte] = {
     val inStream = classLoader.getResourceAsStream(resource)
-    val outStream = new ByteArrayOutputStream
     try {
-      var reading = true
-      while ( reading ) {
-        inStream.read() match {
-          case -1 => reading = false
-          case c => outStream.write(c)
-        }
-      }
-      outStream.flush()
-    }
-    finally {
+      ByteStreams.toByteArray(inStream)
+    } finally {
       inStream.close()
     }
-    outStream.toByteArray
   }
 
   def resourceToString(

From 43c89dca89d1a4c0dc63354f46b5bd4b39cdda65 Mon Sep 17 00:00:00 2001
From: Kevin Sewell <kevins_25@apple.com>
Date: Thu, 24 Feb 2022 08:14:07 -0800
Subject: [PATCH 327/513] [SPARK-38273][SQL] `decodeUnsafeRows`'s iterators
 should close underlying input streams

### What changes were proposed in this pull request?
Wrapping the DataInputStream in the SparkPlan.decodeUnsafeRows method with a NextIterator as opposed to a plain Iterator, this will allow us to close the DataInputStream properly. This happens in Spark driver only.

### Why are the changes needed?
SPARK-34647 replaced the ZstdInputStream with ZstdInputStreamNoFinalizer. This meant that all usages of `CompressionCodec.compressedInputStream` would need to manually close the stream as this would no longer be handled by the finaliser mechanism.

In SparkPlan, the result of `CompressionCodec.compressedInputStream` is wrapped in an Iterator which never calls close.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

#### Spark Shell Configuration
```bash
$> export SPARK_SUBMIT_OPTS="-XX:+AlwaysPreTouch -Xms1g"
$> $SPARK_HOME/bin/spark-shell --conf spark.io.compression.codec=zstd
```

#### Test Script
```scala
import java.sql.Timestamp
import java.time.Instant
import spark.implicits._

case class Record(timestamp: Timestamp, batch: Long, value: Long)

(1 to 300).foreach { batch =>
  sc.parallelize(1 to 1000000).map(Record(Timestamp.from(Instant.now()), batch, _)).toDS.write.parquet(s"test_data/batch_$batch")
}

(1 to 300).foreach(batch => spark.read.parquet(s"test_data/batch_$batch").as[Record].repartition().collect())

```

#### Memory Monitor
```shell
$> while true; do echo \"$(date +%Y-%m-%d' '%H:%M:%S)\",$(pmap -x <PID> | grep "total kB" | awk '{print $4}'); sleep 10; done;
```

#### Results

##### Before
```
"2022-02-22 11:55:23",1400016
"2022-02-22 11:55:33",1522024
"2022-02-22 11:55:43",1587812
"2022-02-22 11:55:53",1631868
"2022-02-22 11:56:03",1657252
"2022-02-22 11:56:13",1659728
"2022-02-22 11:56:23",1664640
"2022-02-22 11:56:33",1674152
"2022-02-22 11:56:43",1697320
"2022-02-22 11:56:53",1689636
"2022-02-22 11:57:03",1783888
"2022-02-22 11:57:13",1896920
"2022-02-22 11:57:23",1950492
"2022-02-22 11:57:33",2010968
"2022-02-22 11:57:44",2066560
"2022-02-22 11:57:54",2108232
"2022-02-22 11:58:04",2158188
"2022-02-22 11:58:14",2211344
"2022-02-22 11:58:24",2260180
"2022-02-22 11:58:34",2316352
"2022-02-22 11:58:44",2367412
"2022-02-22 11:58:54",2420916
"2022-02-22 11:59:04",2472132
"2022-02-22 11:59:14",2519888
"2022-02-22 11:59:24",2571372
"2022-02-22 11:59:34",2621992
"2022-02-22 11:59:44",2672400
"2022-02-22 11:59:54",2728924
"2022-02-22 12:00:04",2777712
"2022-02-22 12:00:14",2834272
"2022-02-22 12:00:24",2881344
"2022-02-22 12:00:34",2935552
"2022-02-22 12:00:44",2984896
"2022-02-22 12:00:54",3034116
"2022-02-22 12:01:04",3087092
"2022-02-22 12:01:14",3134432
"2022-02-22 12:01:25",3198316
"2022-02-22 12:01:35",3193484
"2022-02-22 12:01:45",3193212
"2022-02-22 12:01:55",3192872
"2022-02-22 12:02:05",3191772
"2022-02-22 12:02:15",3187780
"2022-02-22 12:02:25",3177084
"2022-02-22 12:02:35",3173292
"2022-02-22 12:02:45",3173292
"2022-02-22 12:02:55",3173292
```

##### After
```
"2022-02-22 12:05:03",1377124
"2022-02-22 12:05:13",1425132
"2022-02-22 12:05:23",1564060
"2022-02-22 12:05:33",1616116
"2022-02-22 12:05:43",1637448
"2022-02-22 12:05:53",1637700
"2022-02-22 12:06:03",1653912
"2022-02-22 12:06:13",1659532
"2022-02-22 12:06:23",1673368
"2022-02-22 12:06:33",1687580
"2022-02-22 12:06:43",1711076
"2022-02-22 12:06:53",1849752
"2022-02-22 12:07:03",1861528
"2022-02-22 12:07:13",1871200
"2022-02-22 12:07:24",1878860
"2022-02-22 12:07:34",1879332
"2022-02-22 12:07:44",1886552
"2022-02-22 12:07:54",1884160
"2022-02-22 12:08:04",1880924
"2022-02-22 12:08:14",1876084
"2022-02-22 12:08:24",1878800
"2022-02-22 12:08:34",1879068
"2022-02-22 12:08:44",1880088
"2022-02-22 12:08:54",1880160
"2022-02-22 12:09:04",1880496
"2022-02-22 12:09:14",1891672
"2022-02-22 12:09:24",1878552
"2022-02-22 12:09:34",1876136
"2022-02-22 12:09:44",1890056
"2022-02-22 12:09:54",1878076
"2022-02-22 12:10:04",1882440
"2022-02-22 12:10:14",1893172
"2022-02-22 12:10:24",1894216
"2022-02-22 12:10:34",1894204
"2022-02-22 12:10:44",1894716
"2022-02-22 12:10:54",1894720
"2022-02-22 12:11:04",1894720
"2022-02-22 12:11:15",1895232
"2022-02-22 12:11:25",1895496
"2022-02-22 12:11:35",1895496
```

Closes #35613 from kevins-29/spark-38273.

Lead-authored-by: Kevin Sewell <kevins_25@apple.com>
Co-authored-by: kevins-29 <100220899+kevins-29@users.noreply.github.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/execution/SparkPlan.scala       | 22 ++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index f56beeb79db72..bb1c5c3873cd8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -37,6 +37,7 @@ import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.vectorized.ColumnarBatch
+import org.apache.spark.util.NextIterator
 
 object SparkPlan {
   /** The original [[LogicalPlan]] from which this [[SparkPlan]] is converted. */
@@ -384,10 +385,9 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
     val bis = new ByteArrayInputStream(bytes)
     val ins = new DataInputStream(codec.compressedInputStream(bis))
 
-    new Iterator[InternalRow] {
+    new NextIterator[InternalRow] {
       private var sizeOfNextRow = ins.readInt()
-      override def hasNext: Boolean = sizeOfNextRow >= 0
-      override def next(): InternalRow = {
+      private def _next(): InternalRow = {
         val bs = new Array[Byte](sizeOfNextRow)
         ins.readFully(bs)
         val row = new UnsafeRow(nFields)
@@ -395,6 +395,22 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
         sizeOfNextRow = ins.readInt()
         row
       }
+
+      override def getNext(): InternalRow = {
+        if (sizeOfNextRow >= 0) {
+          try {
+            _next()
+          } catch {
+            case t: Throwable if ins != null =>
+              ins.close()
+              throw t
+          }
+        } else {
+          finished = true
+          null
+        }
+      }
+      override def close(): Unit = ins.close()
     }
   }
 

From e58872d67b1cd8e0b0fb71aec1ba2023e5a1991f Mon Sep 17 00:00:00 2001
From: weixiuli <weixiuli@jd.com>
Date: Thu, 24 Feb 2022 18:49:04 -0600
Subject: [PATCH 328/513] [SPARK-38191][CORE] The staging directory of write
 job only needs to be initialized once in HadoopMapReduceCommitProtocol

### What changes were proposed in this pull request?

Use a stagingDir constant instead of the  stagingDir method in HadoopMapReduceCommitProtocol.

### Why are the changes needed?
The stagingDir method will be called many times  in commitJob, especially in traversing partitionPaths when the dynamicPartitionOverwrite is true.  So, we should use a stagingDir constant instead of the  stagingDir method to avoid multiple function calls.

### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Pass the CIs.

Closes #35492 from weixiuli/staging-directory-initialize.

Authored-by: weixiuli <weixiuli@jd.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../spark/internal/io/HadoopMapReduceCommitProtocol.scala       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
index a39e9abd9bdc4..8742901f5716d 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
@@ -104,7 +104,7 @@ class HadoopMapReduceCommitProtocol(
    * The staging directory of this write job. Spark uses it to deal with files with absolute output
    * path, or writing data into partitioned directory with dynamicPartitionOverwrite=true.
    */
-  protected def stagingDir = getStagingDir(path, jobId)
+  protected lazy val stagingDir = getStagingDir(path, jobId)
 
   protected def setupCommitter(context: TaskAttemptContext): OutputCommitter = {
     val format = context.getOutputFormatClass.getConstructor().newInstance()

From 9758d55918dfec236e8ac9f1655a9ff0acd7156e Mon Sep 17 00:00:00 2001
From: bjornjorgensen <bjornjorgensen@gmail.com>
Date: Fri, 25 Feb 2022 11:43:36 +0900
Subject: [PATCH 329/513] [SPARK-38303][BUILD] Upgrade `ansi-regex` from 5.0.0
 to 5.0.1 in /dev

### What changes were proposed in this pull request?
Upgrade ansi-regex from 5.0.0 to 5.0.1 in /dev

### Why are the changes needed?
[CVE-2021-3807](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-3807)

[Releases notes at github](https://github.com/chalk/ansi-regex/releases)

By upgrading ansi-regex from 5.0.0 to 5.0.1 we will resolve this issue.

### Does this PR introduce _any_ user-facing change?
Some users use remote security scanners and this is one of the issues that comes up. How this can do some damage with spark is highly uncertain. but let's remove the uncertainty that any user may have.

### How was this patch tested?
All test must pass.

Closes #35628 from bjornjorgensen/ansi-regex-from-5.0.0-to-5.0.1.

Authored-by: bjornjorgensen <bjornjorgensen@gmail.com>
Signed-off-by: Kousuke Saruta <sarutak@oss.nttdata.com>
---
 dev/package-lock.json | 3189 ++++++++++++++++++++++++++++-------------
 dev/package.json      |    3 +-
 2 files changed, 2229 insertions(+), 963 deletions(-)

diff --git a/dev/package-lock.json b/dev/package-lock.json
index a57f45bcf7184..c2a61b389ac53 100644
--- a/dev/package-lock.json
+++ b/dev/package-lock.json
@@ -1,979 +1,2244 @@
 {
-    "requires": true,
-    "lockfileVersion": 1,
-    "dependencies": {
-        "@babel/code-frame": {
-            "version": "7.12.11",
-            "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.12.11.tgz",
-            "integrity": "sha512-Zt1yodBx1UcyiePMSkWnU4hPqhwq7hGi2nFL1LeA3EUl+q2LQx16MISgJ0+z7dnmgvP9QtIleuETGOiOH1RcIw==",
-            "dev": true,
-            "requires": {
-                "@babel/highlight": "^7.10.4"
-            }
-        },
-        "@babel/helper-validator-identifier": {
-            "version": "7.14.0",
-            "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.14.0.tgz",
-            "integrity": "sha512-V3ts7zMSu5lfiwWDVWzRDGIN+lnCEUdaXgtVHJgLb1rGaA6jMrtB9EmE7L18foXJIE8Un/A/h6NJfGQp/e1J4A==",
-            "dev": true
-        },
-        "@babel/highlight": {
-            "version": "7.14.0",
-            "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.14.0.tgz",
-            "integrity": "sha512-YSCOwxvTYEIMSGaBQb5kDDsCopDdiUGsqpatp3fOlI4+2HQSkTmEVWnVuySdAC5EWCqSWWTv0ib63RjR7dTBdg==",
-            "dev": true,
-            "requires": {
-                "@babel/helper-validator-identifier": "^7.14.0",
-                "chalk": "^2.0.0",
-                "js-tokens": "^4.0.0"
-            },
-            "dependencies": {
-                "chalk": {
-                    "version": "2.4.2",
-                    "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz",
-                    "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==",
-                    "dev": true,
-                    "requires": {
-                        "ansi-styles": "^3.2.1",
-                        "escape-string-regexp": "^1.0.5",
-                        "supports-color": "^5.3.0"
-                    }
-                }
-            }
-        },
-        "@eslint/eslintrc": {
-            "version": "0.4.0",
-            "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-0.4.0.tgz",
-            "integrity": "sha512-2ZPCc+uNbjV5ERJr+aKSPRwZgKd2z11x0EgLvb1PURmUrn9QNRXFqje0Ldq454PfAVyaJYyrDvvIKSFP4NnBog==",
-            "dev": true,
-            "requires": {
-                "ajv": "^6.12.4",
-                "debug": "^4.1.1",
-                "espree": "^7.3.0",
-                "globals": "^12.1.0",
-                "ignore": "^4.0.6",
-                "import-fresh": "^3.2.1",
-                "js-yaml": "^3.13.1",
-                "minimatch": "^3.0.4",
-                "strip-json-comments": "^3.1.1"
-            },
-            "dependencies": {
-                "globals": {
-                    "version": "12.4.0",
-                    "resolved": "https://registry.npmjs.org/globals/-/globals-12.4.0.tgz",
-                    "integrity": "sha512-BWICuzzDvDoH54NHKCseDanAhE3CeDorgDL5MT6LMXXj2WCnd9UC2szdk4AWLfjdgNBCXLUanXYcpBBKOSWGwg==",
-                    "dev": true,
-                    "requires": {
-                        "type-fest": "^0.8.1"
-                    }
-                }
-            }
-        },
-        "acorn": {
-            "version": "7.4.1",
-            "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.4.1.tgz",
-            "integrity": "sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A==",
-            "dev": true
-        },
-        "acorn-jsx": {
-            "version": "5.3.1",
-            "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.1.tgz",
-            "integrity": "sha512-K0Ptm/47OKfQRpNQ2J/oIN/3QYiK6FwW+eJbILhsdxh2WTLdl+30o8aGdTbm5JbffpFFAg/g+zi1E+jvJha5ng==",
-            "dev": true
-        },
-        "ajv": {
-            "version": "6.12.6",
-            "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
-            "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
-            "dev": true,
-            "requires": {
-                "fast-deep-equal": "^3.1.1",
-                "fast-json-stable-stringify": "^2.0.0",
-                "json-schema-traverse": "^0.4.1",
-                "uri-js": "^4.2.2"
-            }
-        },
-        "ansi-colors": {
-            "version": "4.1.1",
-            "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.1.tgz",
-            "integrity": "sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA==",
-            "dev": true
-        },
-        "ansi-regex": {
-            "version": "5.0.0",
-            "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.0.tgz",
-            "integrity": "sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==",
-            "dev": true
-        },
-        "ansi-styles": {
-            "version": "3.2.1",
-            "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz",
-            "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==",
-            "dev": true,
-            "requires": {
-                "color-convert": "^1.9.0"
-            }
-        },
-        "argparse": {
-            "version": "1.0.10",
-            "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz",
-            "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==",
-            "dev": true,
-            "requires": {
-                "sprintf-js": "~1.0.2"
-            }
-        },
-        "astral-regex": {
-            "version": "2.0.0",
-            "resolved": "https://registry.npmjs.org/astral-regex/-/astral-regex-2.0.0.tgz",
-            "integrity": "sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==",
-            "dev": true
-        },
-        "balanced-match": {
-            "version": "1.0.2",
-            "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
-            "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
-            "dev": true
-        },
-        "brace-expansion": {
-            "version": "1.1.11",
-            "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
-            "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
-            "dev": true,
-            "requires": {
-                "balanced-match": "^1.0.0",
-                "concat-map": "0.0.1"
-            }
-        },
-        "callsites": {
-            "version": "3.1.0",
-            "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
-            "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==",
-            "dev": true
-        },
+  "name": "dev",
+  "lockfileVersion": 2,
+  "requires": true,
+  "packages": {
+    "": {
+      "devDependencies": {
+        "ansi-regex": "^5.0.1",
+        "eslint": "^7.25.0"
+      }
+    },
+    "node_modules/@babel/code-frame": {
+      "version": "7.12.11",
+      "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.12.11.tgz",
+      "integrity": "sha512-Zt1yodBx1UcyiePMSkWnU4hPqhwq7hGi2nFL1LeA3EUl+q2LQx16MISgJ0+z7dnmgvP9QtIleuETGOiOH1RcIw==",
+      "dev": true,
+      "dependencies": {
+        "@babel/highlight": "^7.10.4"
+      }
+    },
+    "node_modules/@babel/helper-validator-identifier": {
+      "version": "7.14.0",
+      "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.14.0.tgz",
+      "integrity": "sha512-V3ts7zMSu5lfiwWDVWzRDGIN+lnCEUdaXgtVHJgLb1rGaA6jMrtB9EmE7L18foXJIE8Un/A/h6NJfGQp/e1J4A==",
+      "dev": true
+    },
+    "node_modules/@babel/highlight": {
+      "version": "7.14.0",
+      "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.14.0.tgz",
+      "integrity": "sha512-YSCOwxvTYEIMSGaBQb5kDDsCopDdiUGsqpatp3fOlI4+2HQSkTmEVWnVuySdAC5EWCqSWWTv0ib63RjR7dTBdg==",
+      "dev": true,
+      "dependencies": {
+        "@babel/helper-validator-identifier": "^7.14.0",
+        "chalk": "^2.0.0",
+        "js-tokens": "^4.0.0"
+      }
+    },
+    "node_modules/@babel/highlight/node_modules/chalk": {
+      "version": "2.4.2",
+      "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz",
+      "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==",
+      "dev": true,
+      "dependencies": {
+        "ansi-styles": "^3.2.1",
+        "escape-string-regexp": "^1.0.5",
+        "supports-color": "^5.3.0"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/@eslint/eslintrc": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-0.4.0.tgz",
+      "integrity": "sha512-2ZPCc+uNbjV5ERJr+aKSPRwZgKd2z11x0EgLvb1PURmUrn9QNRXFqje0Ldq454PfAVyaJYyrDvvIKSFP4NnBog==",
+      "dev": true,
+      "dependencies": {
+        "ajv": "^6.12.4",
+        "debug": "^4.1.1",
+        "espree": "^7.3.0",
+        "globals": "^12.1.0",
+        "ignore": "^4.0.6",
+        "import-fresh": "^3.2.1",
+        "js-yaml": "^3.13.1",
+        "minimatch": "^3.0.4",
+        "strip-json-comments": "^3.1.1"
+      },
+      "engines": {
+        "node": "^10.12.0 || >=12.0.0"
+      }
+    },
+    "node_modules/@eslint/eslintrc/node_modules/globals": {
+      "version": "12.4.0",
+      "resolved": "https://registry.npmjs.org/globals/-/globals-12.4.0.tgz",
+      "integrity": "sha512-BWICuzzDvDoH54NHKCseDanAhE3CeDorgDL5MT6LMXXj2WCnd9UC2szdk4AWLfjdgNBCXLUanXYcpBBKOSWGwg==",
+      "dev": true,
+      "dependencies": {
+        "type-fest": "^0.8.1"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/acorn": {
+      "version": "7.4.1",
+      "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.4.1.tgz",
+      "integrity": "sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A==",
+      "dev": true,
+      "bin": {
+        "acorn": "bin/acorn"
+      },
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
+    "node_modules/acorn-jsx": {
+      "version": "5.3.1",
+      "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.1.tgz",
+      "integrity": "sha512-K0Ptm/47OKfQRpNQ2J/oIN/3QYiK6FwW+eJbILhsdxh2WTLdl+30o8aGdTbm5JbffpFFAg/g+zi1E+jvJha5ng==",
+      "dev": true,
+      "peerDependencies": {
+        "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0"
+      }
+    },
+    "node_modules/ajv": {
+      "version": "6.12.6",
+      "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
+      "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
+      "dev": true,
+      "dependencies": {
+        "fast-deep-equal": "^3.1.1",
+        "fast-json-stable-stringify": "^2.0.0",
+        "json-schema-traverse": "^0.4.1",
+        "uri-js": "^4.2.2"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/epoberezkin"
+      }
+    },
+    "node_modules/ansi-colors": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.1.tgz",
+      "integrity": "sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA==",
+      "dev": true,
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/ansi-regex": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
+      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
+      "dev": true,
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/ansi-styles": {
+      "version": "3.2.1",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz",
+      "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==",
+      "dev": true,
+      "dependencies": {
+        "color-convert": "^1.9.0"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/argparse": {
+      "version": "1.0.10",
+      "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz",
+      "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==",
+      "dev": true,
+      "dependencies": {
+        "sprintf-js": "~1.0.2"
+      }
+    },
+    "node_modules/astral-regex": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/astral-regex/-/astral-regex-2.0.0.tgz",
+      "integrity": "sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==",
+      "dev": true,
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/balanced-match": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
+      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
+      "dev": true
+    },
+    "node_modules/brace-expansion": {
+      "version": "1.1.11",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
+      "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
+      "dev": true,
+      "dependencies": {
+        "balanced-match": "^1.0.0",
+        "concat-map": "0.0.1"
+      }
+    },
+    "node_modules/callsites": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
+      "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==",
+      "dev": true,
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/chalk": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.1.tgz",
+      "integrity": "sha512-diHzdDKxcU+bAsUboHLPEDQiw0qEe0qd7SYUn3HgcFlWgbDcfLGswOHYeGrHKzG9z6UYf01d9VFMfZxPM1xZSg==",
+      "dev": true,
+      "dependencies": {
+        "ansi-styles": "^4.1.0",
+        "supports-color": "^7.1.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/chalk?sponsor=1"
+      }
+    },
+    "node_modules/chalk/node_modules/ansi-styles": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
+      "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
+      "dev": true,
+      "dependencies": {
+        "color-convert": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/chalk/node_modules/color-convert": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
+      "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
+      "dev": true,
+      "dependencies": {
+        "color-name": "~1.1.4"
+      },
+      "engines": {
+        "node": ">=7.0.0"
+      }
+    },
+    "node_modules/chalk/node_modules/color-name": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
+      "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
+      "dev": true
+    },
+    "node_modules/chalk/node_modules/has-flag": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
+      "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==",
+      "dev": true,
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/chalk/node_modules/supports-color": {
+      "version": "7.2.0",
+      "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
+      "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==",
+      "dev": true,
+      "dependencies": {
+        "has-flag": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/color-convert": {
+      "version": "1.9.3",
+      "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz",
+      "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==",
+      "dev": true,
+      "dependencies": {
+        "color-name": "1.1.3"
+      }
+    },
+    "node_modules/color-name": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
+      "integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=",
+      "dev": true
+    },
+    "node_modules/concat-map": {
+      "version": "0.0.1",
+      "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
+      "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=",
+      "dev": true
+    },
+    "node_modules/cross-spawn": {
+      "version": "7.0.3",
+      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
+      "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
+      "dev": true,
+      "dependencies": {
+        "path-key": "^3.1.0",
+        "shebang-command": "^2.0.0",
+        "which": "^2.0.1"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/debug": {
+      "version": "4.3.1",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.1.tgz",
+      "integrity": "sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==",
+      "dev": true,
+      "dependencies": {
+        "ms": "2.1.2"
+      },
+      "engines": {
+        "node": ">=6.0"
+      },
+      "peerDependenciesMeta": {
+        "supports-color": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/deep-is": {
+      "version": "0.1.3",
+      "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.3.tgz",
+      "integrity": "sha1-s2nW+128E+7PUk+RsHD+7cNXzzQ=",
+      "dev": true
+    },
+    "node_modules/doctrine": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz",
+      "integrity": "sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==",
+      "dev": true,
+      "dependencies": {
+        "esutils": "^2.0.2"
+      },
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/emoji-regex": {
+      "version": "8.0.0",
+      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
+      "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
+      "dev": true
+    },
+    "node_modules/enquirer": {
+      "version": "2.3.6",
+      "resolved": "https://registry.npmjs.org/enquirer/-/enquirer-2.3.6.tgz",
+      "integrity": "sha512-yjNnPr315/FjS4zIsUxYguYUPP2e1NK4d7E7ZOLiyYCcbFBiTMyID+2wvm2w6+pZ/odMA7cRkjhsPbltwBOrLg==",
+      "dev": true,
+      "dependencies": {
+        "ansi-colors": "^4.1.1"
+      },
+      "engines": {
+        "node": ">=8.6"
+      }
+    },
+    "node_modules/escape-string-regexp": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
+      "integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=",
+      "dev": true,
+      "engines": {
+        "node": ">=0.8.0"
+      }
+    },
+    "node_modules/eslint": {
+      "version": "7.25.0",
+      "resolved": "https://registry.npmjs.org/eslint/-/eslint-7.25.0.tgz",
+      "integrity": "sha512-TVpSovpvCNpLURIScDRB6g5CYu/ZFq9GfX2hLNIV4dSBKxIWojeDODvYl3t0k0VtMxYeR8OXPCFE5+oHMlGfhw==",
+      "dev": true,
+      "dependencies": {
+        "@babel/code-frame": "7.12.11",
+        "@eslint/eslintrc": "^0.4.0",
+        "ajv": "^6.10.0",
+        "chalk": "^4.0.0",
+        "cross-spawn": "^7.0.2",
+        "debug": "^4.0.1",
+        "doctrine": "^3.0.0",
+        "enquirer": "^2.3.5",
+        "eslint-scope": "^5.1.1",
+        "eslint-utils": "^2.1.0",
+        "eslint-visitor-keys": "^2.0.0",
+        "espree": "^7.3.1",
+        "esquery": "^1.4.0",
+        "esutils": "^2.0.2",
+        "file-entry-cache": "^6.0.1",
+        "functional-red-black-tree": "^1.0.1",
+        "glob-parent": "^5.0.0",
+        "globals": "^13.6.0",
+        "ignore": "^4.0.6",
+        "import-fresh": "^3.0.0",
+        "imurmurhash": "^0.1.4",
+        "is-glob": "^4.0.0",
+        "js-yaml": "^3.13.1",
+        "json-stable-stringify-without-jsonify": "^1.0.1",
+        "levn": "^0.4.1",
+        "lodash": "^4.17.21",
+        "minimatch": "^3.0.4",
+        "natural-compare": "^1.4.0",
+        "optionator": "^0.9.1",
+        "progress": "^2.0.0",
+        "regexpp": "^3.1.0",
+        "semver": "^7.2.1",
+        "strip-ansi": "^6.0.0",
+        "strip-json-comments": "^3.1.0",
+        "table": "^6.0.4",
+        "text-table": "^0.2.0",
+        "v8-compile-cache": "^2.0.3"
+      },
+      "bin": {
+        "eslint": "bin/eslint.js"
+      },
+      "engines": {
+        "node": "^10.12.0 || >=12.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/eslint"
+      }
+    },
+    "node_modules/eslint-scope": {
+      "version": "5.1.1",
+      "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-5.1.1.tgz",
+      "integrity": "sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==",
+      "dev": true,
+      "dependencies": {
+        "esrecurse": "^4.3.0",
+        "estraverse": "^4.1.1"
+      },
+      "engines": {
+        "node": ">=8.0.0"
+      }
+    },
+    "node_modules/eslint-utils": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/eslint-utils/-/eslint-utils-2.1.0.tgz",
+      "integrity": "sha512-w94dQYoauyvlDc43XnGB8lU3Zt713vNChgt4EWwhXAP2XkBvndfxF0AgIqKOOasjPIPzj9JqgwkwbCYD0/V3Zg==",
+      "dev": true,
+      "dependencies": {
+        "eslint-visitor-keys": "^1.1.0"
+      },
+      "engines": {
+        "node": ">=6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/mysticatea"
+      }
+    },
+    "node_modules/eslint-utils/node_modules/eslint-visitor-keys": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz",
+      "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==",
+      "dev": true,
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/eslint-visitor-keys": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-2.1.0.tgz",
+      "integrity": "sha512-0rSmRBzXgDzIsD6mGdJgevzgezI534Cer5L/vyMX0kHzT/jiB43jRhd9YUlMGYLQy2zprNmoT8qasCGtY+QaKw==",
+      "dev": true,
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/espree": {
+      "version": "7.3.1",
+      "resolved": "https://registry.npmjs.org/espree/-/espree-7.3.1.tgz",
+      "integrity": "sha512-v3JCNCE64umkFpmkFGqzVKsOT0tN1Zr+ueqLZfpV1Ob8e+CEgPWa+OxCoGH3tnhimMKIaBm4m/vaRpJ/krRz2g==",
+      "dev": true,
+      "dependencies": {
+        "acorn": "^7.4.0",
+        "acorn-jsx": "^5.3.1",
+        "eslint-visitor-keys": "^1.3.0"
+      },
+      "engines": {
+        "node": "^10.12.0 || >=12.0.0"
+      }
+    },
+    "node_modules/espree/node_modules/eslint-visitor-keys": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz",
+      "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==",
+      "dev": true,
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/esprima": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz",
+      "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==",
+      "dev": true,
+      "bin": {
+        "esparse": "bin/esparse.js",
+        "esvalidate": "bin/esvalidate.js"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/esquery": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.4.0.tgz",
+      "integrity": "sha512-cCDispWt5vHHtwMY2YrAQ4ibFkAL8RbH5YGBnZBc90MolvvfkkQcJro/aZiAQUlQ3qgrYS6D6v8Gc5G5CQsc9w==",
+      "dev": true,
+      "dependencies": {
+        "estraverse": "^5.1.0"
+      },
+      "engines": {
+        "node": ">=0.10"
+      }
+    },
+    "node_modules/esquery/node_modules/estraverse": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz",
+      "integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==",
+      "dev": true,
+      "engines": {
+        "node": ">=4.0"
+      }
+    },
+    "node_modules/esrecurse": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz",
+      "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==",
+      "dev": true,
+      "dependencies": {
+        "estraverse": "^5.2.0"
+      },
+      "engines": {
+        "node": ">=4.0"
+      }
+    },
+    "node_modules/esrecurse/node_modules/estraverse": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz",
+      "integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==",
+      "dev": true,
+      "engines": {
+        "node": ">=4.0"
+      }
+    },
+    "node_modules/estraverse": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz",
+      "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==",
+      "dev": true,
+      "engines": {
+        "node": ">=4.0"
+      }
+    },
+    "node_modules/esutils": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz",
+      "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==",
+      "dev": true,
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/fast-deep-equal": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
+      "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==",
+      "dev": true
+    },
+    "node_modules/fast-json-stable-stringify": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz",
+      "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==",
+      "dev": true
+    },
+    "node_modules/fast-levenshtein": {
+      "version": "2.0.6",
+      "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz",
+      "integrity": "sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=",
+      "dev": true
+    },
+    "node_modules/file-entry-cache": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz",
+      "integrity": "sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==",
+      "dev": true,
+      "dependencies": {
+        "flat-cache": "^3.0.4"
+      },
+      "engines": {
+        "node": "^10.12.0 || >=12.0.0"
+      }
+    },
+    "node_modules/flat-cache": {
+      "version": "3.0.4",
+      "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-3.0.4.tgz",
+      "integrity": "sha512-dm9s5Pw7Jc0GvMYbshN6zchCA9RgQlzzEZX3vylR9IqFfS8XciblUXOKfW6SiuJ0e13eDYZoZV5wdrev7P3Nwg==",
+      "dev": true,
+      "dependencies": {
+        "flatted": "^3.1.0",
+        "rimraf": "^3.0.2"
+      },
+      "engines": {
+        "node": "^10.12.0 || >=12.0.0"
+      }
+    },
+    "node_modules/flatted": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.1.1.tgz",
+      "integrity": "sha512-zAoAQiudy+r5SvnSw3KJy5os/oRJYHzrzja/tBDqrZtNhUw8bt6y8OBzMWcjWr+8liV8Eb6yOhw8WZ7VFZ5ZzA==",
+      "dev": true
+    },
+    "node_modules/fs.realpath": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
+      "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=",
+      "dev": true
+    },
+    "node_modules/functional-red-black-tree": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/functional-red-black-tree/-/functional-red-black-tree-1.0.1.tgz",
+      "integrity": "sha1-GwqzvVU7Kg1jmdKcDj6gslIHgyc=",
+      "dev": true
+    },
+    "node_modules/glob": {
+      "version": "7.1.6",
+      "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.6.tgz",
+      "integrity": "sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==",
+      "dev": true,
+      "dependencies": {
+        "fs.realpath": "^1.0.0",
+        "inflight": "^1.0.4",
+        "inherits": "2",
+        "minimatch": "^3.0.4",
+        "once": "^1.3.0",
+        "path-is-absolute": "^1.0.0"
+      },
+      "engines": {
+        "node": "*"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
+    "node_modules/glob-parent": {
+      "version": "5.1.2",
+      "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
+      "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==",
+      "dev": true,
+      "dependencies": {
+        "is-glob": "^4.0.1"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/globals": {
+      "version": "13.8.0",
+      "resolved": "https://registry.npmjs.org/globals/-/globals-13.8.0.tgz",
+      "integrity": "sha512-rHtdA6+PDBIjeEvA91rpqzEvk/k3/i7EeNQiryiWuJH0Hw9cpyJMAt2jtbAwUaRdhD+573X4vWw6IcjKPasi9Q==",
+      "dev": true,
+      "dependencies": {
+        "type-fest": "^0.20.2"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/globals/node_modules/type-fest": {
+      "version": "0.20.2",
+      "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.20.2.tgz",
+      "integrity": "sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==",
+      "dev": true,
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/has-flag": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
+      "integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0=",
+      "dev": true,
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/ignore": {
+      "version": "4.0.6",
+      "resolved": "https://registry.npmjs.org/ignore/-/ignore-4.0.6.tgz",
+      "integrity": "sha512-cyFDKrqc/YdcWFniJhzI42+AzS+gNwmUzOSFcRCQYwySuBBBy/KjuxWLZ/FHEH6Moq1NizMOBWyTcv8O4OZIMg==",
+      "dev": true,
+      "engines": {
+        "node": ">= 4"
+      }
+    },
+    "node_modules/import-fresh": {
+      "version": "3.3.0",
+      "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.0.tgz",
+      "integrity": "sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==",
+      "dev": true,
+      "dependencies": {
+        "parent-module": "^1.0.0",
+        "resolve-from": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/imurmurhash": {
+      "version": "0.1.4",
+      "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz",
+      "integrity": "sha1-khi5srkoojixPcT7a21XbyMUU+o=",
+      "dev": true,
+      "engines": {
+        "node": ">=0.8.19"
+      }
+    },
+    "node_modules/inflight": {
+      "version": "1.0.6",
+      "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
+      "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=",
+      "dev": true,
+      "dependencies": {
+        "once": "^1.3.0",
+        "wrappy": "1"
+      }
+    },
+    "node_modules/inherits": {
+      "version": "2.0.4",
+      "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
+      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
+      "dev": true
+    },
+    "node_modules/is-extglob": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
+      "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=",
+      "dev": true,
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/is-fullwidth-code-point": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
+      "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
+      "dev": true,
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/is-glob": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.1.tgz",
+      "integrity": "sha512-5G0tKtBTFImOqDnLB2hG6Bp2qcKEFduo4tZu9MT/H6NQv/ghhy30o55ufafxJ/LdH79LLs2Kfrn85TLKyA7BUg==",
+      "dev": true,
+      "dependencies": {
+        "is-extglob": "^2.1.1"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/isexe": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
+      "integrity": "sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=",
+      "dev": true
+    },
+    "node_modules/js-tokens": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
+      "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
+      "dev": true
+    },
+    "node_modules/js-yaml": {
+      "version": "3.14.1",
+      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz",
+      "integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==",
+      "dev": true,
+      "dependencies": {
+        "argparse": "^1.0.7",
+        "esprima": "^4.0.0"
+      },
+      "bin": {
+        "js-yaml": "bin/js-yaml.js"
+      }
+    },
+    "node_modules/json-schema-traverse": {
+      "version": "0.4.1",
+      "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
+      "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==",
+      "dev": true
+    },
+    "node_modules/json-stable-stringify-without-jsonify": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz",
+      "integrity": "sha1-nbe1lJatPzz+8wp1FC0tkwrXJlE=",
+      "dev": true
+    },
+    "node_modules/levn": {
+      "version": "0.4.1",
+      "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz",
+      "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==",
+      "dev": true,
+      "dependencies": {
+        "prelude-ls": "^1.2.1",
+        "type-check": "~0.4.0"
+      },
+      "engines": {
+        "node": ">= 0.8.0"
+      }
+    },
+    "node_modules/lodash": {
+      "version": "4.17.21",
+      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
+      "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==",
+      "dev": true
+    },
+    "node_modules/lodash.clonedeep": {
+      "version": "4.5.0",
+      "resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
+      "integrity": "sha1-4j8/nE+Pvd6HJSnBBxhXoIblzO8=",
+      "dev": true
+    },
+    "node_modules/lodash.flatten": {
+      "version": "4.4.0",
+      "resolved": "https://registry.npmjs.org/lodash.flatten/-/lodash.flatten-4.4.0.tgz",
+      "integrity": "sha1-8xwiIlqWMtK7+OSt2+8kCqdlph8=",
+      "dev": true
+    },
+    "node_modules/lodash.truncate": {
+      "version": "4.4.2",
+      "resolved": "https://registry.npmjs.org/lodash.truncate/-/lodash.truncate-4.4.2.tgz",
+      "integrity": "sha1-WjUNoLERO4N+z//VgSy+WNbq4ZM=",
+      "dev": true
+    },
+    "node_modules/lru-cache": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz",
+      "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==",
+      "dev": true,
+      "dependencies": {
+        "yallist": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/minimatch": {
+      "version": "3.0.4",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz",
+      "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==",
+      "dev": true,
+      "dependencies": {
+        "brace-expansion": "^1.1.7"
+      },
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/ms": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
+      "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==",
+      "dev": true
+    },
+    "node_modules/natural-compare": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz",
+      "integrity": "sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=",
+      "dev": true
+    },
+    "node_modules/once": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
+      "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=",
+      "dev": true,
+      "dependencies": {
+        "wrappy": "1"
+      }
+    },
+    "node_modules/optionator": {
+      "version": "0.9.1",
+      "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz",
+      "integrity": "sha512-74RlY5FCnhq4jRxVUPKDaRwrVNXMqsGsiW6AJw4XK8hmtm10wC0ypZBLw5IIp85NZMr91+qd1RvvENwg7jjRFw==",
+      "dev": true,
+      "dependencies": {
+        "deep-is": "^0.1.3",
+        "fast-levenshtein": "^2.0.6",
+        "levn": "^0.4.1",
+        "prelude-ls": "^1.2.1",
+        "type-check": "^0.4.0",
+        "word-wrap": "^1.2.3"
+      },
+      "engines": {
+        "node": ">= 0.8.0"
+      }
+    },
+    "node_modules/parent-module": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz",
+      "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==",
+      "dev": true,
+      "dependencies": {
+        "callsites": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/path-is-absolute": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
+      "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=",
+      "dev": true,
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/path-key": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
+      "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
+      "dev": true,
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/prelude-ls": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz",
+      "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==",
+      "dev": true,
+      "engines": {
+        "node": ">= 0.8.0"
+      }
+    },
+    "node_modules/progress": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz",
+      "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==",
+      "dev": true,
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
+    "node_modules/punycode": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz",
+      "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==",
+      "dev": true,
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/regexpp": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/regexpp/-/regexpp-3.1.0.tgz",
+      "integrity": "sha512-ZOIzd8yVsQQA7j8GCSlPGXwg5PfmA1mrq0JP4nGhh54LaKN3xdai/vHUDu74pKwV8OxseMS65u2NImosQcSD0Q==",
+      "dev": true,
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/mysticatea"
+      }
+    },
+    "node_modules/require-from-string": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz",
+      "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==",
+      "dev": true,
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/resolve-from": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz",
+      "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==",
+      "dev": true,
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/rimraf": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
+      "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==",
+      "dev": true,
+      "dependencies": {
+        "glob": "^7.1.3"
+      },
+      "bin": {
+        "rimraf": "bin.js"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
+    "node_modules/semver": {
+      "version": "7.3.5",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.3.5.tgz",
+      "integrity": "sha512-PoeGJYh8HK4BTO/a9Tf6ZG3veo/A7ZVsYrSA6J8ny9nb3B1VrpkuN+z9OE5wfE5p6H4LchYZsegiQgbJD94ZFQ==",
+      "dev": true,
+      "dependencies": {
+        "lru-cache": "^6.0.0"
+      },
+      "bin": {
+        "semver": "bin/semver.js"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/shebang-command": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
+      "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
+      "dev": true,
+      "dependencies": {
+        "shebang-regex": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/shebang-regex": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
+      "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
+      "dev": true,
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/slice-ansi": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-4.0.0.tgz",
+      "integrity": "sha512-qMCMfhY040cVHT43K9BFygqYbUPFZKHOg7K73mtTWJRb8pyP3fzf4Ixd5SzdEJQ6MRUg/WBnOLxghZtKKurENQ==",
+      "dev": true,
+      "dependencies": {
+        "ansi-styles": "^4.0.0",
+        "astral-regex": "^2.0.0",
+        "is-fullwidth-code-point": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/slice-ansi?sponsor=1"
+      }
+    },
+    "node_modules/slice-ansi/node_modules/ansi-styles": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
+      "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
+      "dev": true,
+      "dependencies": {
+        "color-convert": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/slice-ansi/node_modules/color-convert": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
+      "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
+      "dev": true,
+      "dependencies": {
+        "color-name": "~1.1.4"
+      },
+      "engines": {
+        "node": ">=7.0.0"
+      }
+    },
+    "node_modules/slice-ansi/node_modules/color-name": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
+      "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
+      "dev": true
+    },
+    "node_modules/sprintf-js": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz",
+      "integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw=",
+      "dev": true
+    },
+    "node_modules/string-width": {
+      "version": "4.2.2",
+      "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.2.tgz",
+      "integrity": "sha512-XBJbT3N4JhVumXE0eoLU9DCjcaF92KLNqTmFCnG1pf8duUxFGwtP6AD6nkjw9a3IdiRtL3E2w3JDiE/xi3vOeA==",
+      "dev": true,
+      "dependencies": {
+        "emoji-regex": "^8.0.0",
+        "is-fullwidth-code-point": "^3.0.0",
+        "strip-ansi": "^6.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/strip-ansi": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.0.tgz",
+      "integrity": "sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w==",
+      "dev": true,
+      "dependencies": {
+        "ansi-regex": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/strip-json-comments": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz",
+      "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==",
+      "dev": true,
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/supports-color": {
+      "version": "5.5.0",
+      "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz",
+      "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==",
+      "dev": true,
+      "dependencies": {
+        "has-flag": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/table": {
+      "version": "6.6.0",
+      "resolved": "https://registry.npmjs.org/table/-/table-6.6.0.tgz",
+      "integrity": "sha512-iZMtp5tUvcnAdtHpZTWLPF0M7AgiQsURR2DwmxnJwSy8I3+cY+ozzVvYha3BOLG2TB+L0CqjIz+91htuj6yCXg==",
+      "dev": true,
+      "dependencies": {
+        "ajv": "^8.0.1",
+        "lodash.clonedeep": "^4.5.0",
+        "lodash.flatten": "^4.4.0",
+        "lodash.truncate": "^4.4.2",
+        "slice-ansi": "^4.0.0",
+        "string-width": "^4.2.0",
+        "strip-ansi": "^6.0.0"
+      },
+      "engines": {
+        "node": ">=10.0.0"
+      }
+    },
+    "node_modules/table/node_modules/ajv": {
+      "version": "8.2.0",
+      "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.2.0.tgz",
+      "integrity": "sha512-WSNGFuyWd//XO8n/m/EaOlNLtO0yL8EXT/74LqT4khdhpZjP7lkj/kT5uwRmGitKEVp/Oj7ZUHeGfPtgHhQ5CA==",
+      "dev": true,
+      "dependencies": {
+        "fast-deep-equal": "^3.1.1",
+        "json-schema-traverse": "^1.0.0",
+        "require-from-string": "^2.0.2",
+        "uri-js": "^4.2.2"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/epoberezkin"
+      }
+    },
+    "node_modules/table/node_modules/json-schema-traverse": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz",
+      "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==",
+      "dev": true
+    },
+    "node_modules/text-table": {
+      "version": "0.2.0",
+      "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",
+      "integrity": "sha1-f17oI66AUgfACvLfSoTsP8+lcLQ=",
+      "dev": true
+    },
+    "node_modules/type-check": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz",
+      "integrity": "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==",
+      "dev": true,
+      "dependencies": {
+        "prelude-ls": "^1.2.1"
+      },
+      "engines": {
+        "node": ">= 0.8.0"
+      }
+    },
+    "node_modules/type-fest": {
+      "version": "0.8.1",
+      "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.8.1.tgz",
+      "integrity": "sha512-4dbzIzqvjtgiM5rw1k5rEHtBANKmdudhGyBEajN01fEyhaAIhsoKNy6y7+IN93IfpFtwY9iqi7kD+xwKhQsNJA==",
+      "dev": true,
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/uri-js": {
+      "version": "4.4.1",
+      "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz",
+      "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==",
+      "dev": true,
+      "dependencies": {
+        "punycode": "^2.1.0"
+      }
+    },
+    "node_modules/v8-compile-cache": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/v8-compile-cache/-/v8-compile-cache-2.3.0.tgz",
+      "integrity": "sha512-l8lCEmLcLYZh4nbunNZvQCJc5pv7+RCwa8q/LdUx8u7lsWvPDKmpodJAJNwkAhJC//dFY48KuIEmjtd4RViDrA==",
+      "dev": true
+    },
+    "node_modules/which": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
+      "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
+      "dev": true,
+      "dependencies": {
+        "isexe": "^2.0.0"
+      },
+      "bin": {
+        "node-which": "bin/node-which"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/word-wrap": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz",
+      "integrity": "sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==",
+      "dev": true,
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/wrappy": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
+      "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=",
+      "dev": true
+    },
+    "node_modules/yallist": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz",
+      "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==",
+      "dev": true
+    }
+  },
+  "dependencies": {
+    "@babel/code-frame": {
+      "version": "7.12.11",
+      "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.12.11.tgz",
+      "integrity": "sha512-Zt1yodBx1UcyiePMSkWnU4hPqhwq7hGi2nFL1LeA3EUl+q2LQx16MISgJ0+z7dnmgvP9QtIleuETGOiOH1RcIw==",
+      "dev": true,
+      "requires": {
+        "@babel/highlight": "^7.10.4"
+      }
+    },
+    "@babel/helper-validator-identifier": {
+      "version": "7.14.0",
+      "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.14.0.tgz",
+      "integrity": "sha512-V3ts7zMSu5lfiwWDVWzRDGIN+lnCEUdaXgtVHJgLb1rGaA6jMrtB9EmE7L18foXJIE8Un/A/h6NJfGQp/e1J4A==",
+      "dev": true
+    },
+    "@babel/highlight": {
+      "version": "7.14.0",
+      "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.14.0.tgz",
+      "integrity": "sha512-YSCOwxvTYEIMSGaBQb5kDDsCopDdiUGsqpatp3fOlI4+2HQSkTmEVWnVuySdAC5EWCqSWWTv0ib63RjR7dTBdg==",
+      "dev": true,
+      "requires": {
+        "@babel/helper-validator-identifier": "^7.14.0",
+        "chalk": "^2.0.0",
+        "js-tokens": "^4.0.0"
+      },
+      "dependencies": {
         "chalk": {
-            "version": "4.1.1",
-            "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.1.tgz",
-            "integrity": "sha512-diHzdDKxcU+bAsUboHLPEDQiw0qEe0qd7SYUn3HgcFlWgbDcfLGswOHYeGrHKzG9z6UYf01d9VFMfZxPM1xZSg==",
-            "dev": true,
-            "requires": {
-                "ansi-styles": "^4.1.0",
-                "supports-color": "^7.1.0"
-            },
-            "dependencies": {
-                "ansi-styles": {
-                    "version": "4.3.0",
-                    "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
-                    "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
-                    "dev": true,
-                    "requires": {
-                        "color-convert": "^2.0.1"
-                    }
-                },
-                "color-convert": {
-                    "version": "2.0.1",
-                    "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
-                    "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
-                    "dev": true,
-                    "requires": {
-                        "color-name": "~1.1.4"
-                    }
-                },
-                "color-name": {
-                    "version": "1.1.4",
-                    "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
-                    "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
-                    "dev": true
-                },
-                "has-flag": {
-                    "version": "4.0.0",
-                    "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
-                    "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==",
-                    "dev": true
-                },
-                "supports-color": {
-                    "version": "7.2.0",
-                    "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
-                    "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==",
-                    "dev": true,
-                    "requires": {
-                        "has-flag": "^4.0.0"
-                    }
-                }
-            }
+          "version": "2.4.2",
+          "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz",
+          "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==",
+          "dev": true,
+          "requires": {
+            "ansi-styles": "^3.2.1",
+            "escape-string-regexp": "^1.0.5",
+            "supports-color": "^5.3.0"
+          }
+        }
+      }
+    },
+    "@eslint/eslintrc": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-0.4.0.tgz",
+      "integrity": "sha512-2ZPCc+uNbjV5ERJr+aKSPRwZgKd2z11x0EgLvb1PURmUrn9QNRXFqje0Ldq454PfAVyaJYyrDvvIKSFP4NnBog==",
+      "dev": true,
+      "requires": {
+        "ajv": "^6.12.4",
+        "debug": "^4.1.1",
+        "espree": "^7.3.0",
+        "globals": "^12.1.0",
+        "ignore": "^4.0.6",
+        "import-fresh": "^3.2.1",
+        "js-yaml": "^3.13.1",
+        "minimatch": "^3.0.4",
+        "strip-json-comments": "^3.1.1"
+      },
+      "dependencies": {
+        "globals": {
+          "version": "12.4.0",
+          "resolved": "https://registry.npmjs.org/globals/-/globals-12.4.0.tgz",
+          "integrity": "sha512-BWICuzzDvDoH54NHKCseDanAhE3CeDorgDL5MT6LMXXj2WCnd9UC2szdk4AWLfjdgNBCXLUanXYcpBBKOSWGwg==",
+          "dev": true,
+          "requires": {
+            "type-fest": "^0.8.1"
+          }
+        }
+      }
+    },
+    "acorn": {
+      "version": "7.4.1",
+      "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.4.1.tgz",
+      "integrity": "sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A==",
+      "dev": true
+    },
+    "acorn-jsx": {
+      "version": "5.3.1",
+      "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.1.tgz",
+      "integrity": "sha512-K0Ptm/47OKfQRpNQ2J/oIN/3QYiK6FwW+eJbILhsdxh2WTLdl+30o8aGdTbm5JbffpFFAg/g+zi1E+jvJha5ng==",
+      "dev": true,
+      "requires": {}
+    },
+    "ajv": {
+      "version": "6.12.6",
+      "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
+      "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
+      "dev": true,
+      "requires": {
+        "fast-deep-equal": "^3.1.1",
+        "fast-json-stable-stringify": "^2.0.0",
+        "json-schema-traverse": "^0.4.1",
+        "uri-js": "^4.2.2"
+      }
+    },
+    "ansi-colors": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.1.tgz",
+      "integrity": "sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA==",
+      "dev": true
+    },
+    "ansi-regex": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
+      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
+      "dev": true
+    },
+    "ansi-styles": {
+      "version": "3.2.1",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz",
+      "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==",
+      "dev": true,
+      "requires": {
+        "color-convert": "^1.9.0"
+      }
+    },
+    "argparse": {
+      "version": "1.0.10",
+      "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz",
+      "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==",
+      "dev": true,
+      "requires": {
+        "sprintf-js": "~1.0.2"
+      }
+    },
+    "astral-regex": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/astral-regex/-/astral-regex-2.0.0.tgz",
+      "integrity": "sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==",
+      "dev": true
+    },
+    "balanced-match": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
+      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
+      "dev": true
+    },
+    "brace-expansion": {
+      "version": "1.1.11",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
+      "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
+      "dev": true,
+      "requires": {
+        "balanced-match": "^1.0.0",
+        "concat-map": "0.0.1"
+      }
+    },
+    "callsites": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
+      "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==",
+      "dev": true
+    },
+    "chalk": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.1.tgz",
+      "integrity": "sha512-diHzdDKxcU+bAsUboHLPEDQiw0qEe0qd7SYUn3HgcFlWgbDcfLGswOHYeGrHKzG9z6UYf01d9VFMfZxPM1xZSg==",
+      "dev": true,
+      "requires": {
+        "ansi-styles": "^4.1.0",
+        "supports-color": "^7.1.0"
+      },
+      "dependencies": {
+        "ansi-styles": {
+          "version": "4.3.0",
+          "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
+          "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
+          "dev": true,
+          "requires": {
+            "color-convert": "^2.0.1"
+          }
         },
         "color-convert": {
-            "version": "1.9.3",
-            "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz",
-            "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==",
-            "dev": true,
-            "requires": {
-                "color-name": "1.1.3"
-            }
+          "version": "2.0.1",
+          "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
+          "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
+          "dev": true,
+          "requires": {
+            "color-name": "~1.1.4"
+          }
         },
         "color-name": {
-            "version": "1.1.3",
-            "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
-            "integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=",
-            "dev": true
-        },
-        "concat-map": {
-            "version": "0.0.1",
-            "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
-            "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=",
-            "dev": true
-        },
-        "cross-spawn": {
-            "version": "7.0.3",
-            "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
-            "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
-            "dev": true,
-            "requires": {
-                "path-key": "^3.1.0",
-                "shebang-command": "^2.0.0",
-                "which": "^2.0.1"
-            }
-        },
-        "debug": {
-            "version": "4.3.1",
-            "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.1.tgz",
-            "integrity": "sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==",
-            "dev": true,
-            "requires": {
-                "ms": "2.1.2"
-            }
-        },
-        "deep-is": {
-            "version": "0.1.3",
-            "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.3.tgz",
-            "integrity": "sha1-s2nW+128E+7PUk+RsHD+7cNXzzQ=",
-            "dev": true
-        },
-        "doctrine": {
-            "version": "3.0.0",
-            "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz",
-            "integrity": "sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==",
-            "dev": true,
-            "requires": {
-                "esutils": "^2.0.2"
-            }
-        },
-        "emoji-regex": {
-            "version": "8.0.0",
-            "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
-            "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
-            "dev": true
-        },
-        "enquirer": {
-            "version": "2.3.6",
-            "resolved": "https://registry.npmjs.org/enquirer/-/enquirer-2.3.6.tgz",
-            "integrity": "sha512-yjNnPr315/FjS4zIsUxYguYUPP2e1NK4d7E7ZOLiyYCcbFBiTMyID+2wvm2w6+pZ/odMA7cRkjhsPbltwBOrLg==",
-            "dev": true,
-            "requires": {
-                "ansi-colors": "^4.1.1"
-            }
-        },
-        "escape-string-regexp": {
-            "version": "1.0.5",
-            "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
-            "integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=",
-            "dev": true
-        },
-        "eslint": {
-            "version": "7.25.0",
-            "resolved": "https://registry.npmjs.org/eslint/-/eslint-7.25.0.tgz",
-            "integrity": "sha512-TVpSovpvCNpLURIScDRB6g5CYu/ZFq9GfX2hLNIV4dSBKxIWojeDODvYl3t0k0VtMxYeR8OXPCFE5+oHMlGfhw==",
-            "dev": true,
-            "requires": {
-                "@babel/code-frame": "7.12.11",
-                "@eslint/eslintrc": "^0.4.0",
-                "ajv": "^6.10.0",
-                "chalk": "^4.0.0",
-                "cross-spawn": "^7.0.2",
-                "debug": "^4.0.1",
-                "doctrine": "^3.0.0",
-                "enquirer": "^2.3.5",
-                "eslint-scope": "^5.1.1",
-                "eslint-utils": "^2.1.0",
-                "eslint-visitor-keys": "^2.0.0",
-                "espree": "^7.3.1",
-                "esquery": "^1.4.0",
-                "esutils": "^2.0.2",
-                "file-entry-cache": "^6.0.1",
-                "functional-red-black-tree": "^1.0.1",
-                "glob-parent": "^5.0.0",
-                "globals": "^13.6.0",
-                "ignore": "^4.0.6",
-                "import-fresh": "^3.0.0",
-                "imurmurhash": "^0.1.4",
-                "is-glob": "^4.0.0",
-                "js-yaml": "^3.13.1",
-                "json-stable-stringify-without-jsonify": "^1.0.1",
-                "levn": "^0.4.1",
-                "lodash": "^4.17.21",
-                "minimatch": "^3.0.4",
-                "natural-compare": "^1.4.0",
-                "optionator": "^0.9.1",
-                "progress": "^2.0.0",
-                "regexpp": "^3.1.0",
-                "semver": "^7.2.1",
-                "strip-ansi": "^6.0.0",
-                "strip-json-comments": "^3.1.0",
-                "table": "^6.0.4",
-                "text-table": "^0.2.0",
-                "v8-compile-cache": "^2.0.3"
-            }
-        },
-        "eslint-scope": {
-            "version": "5.1.1",
-            "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-5.1.1.tgz",
-            "integrity": "sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==",
-            "dev": true,
-            "requires": {
-                "esrecurse": "^4.3.0",
-                "estraverse": "^4.1.1"
-            }
-        },
-        "eslint-utils": {
-            "version": "2.1.0",
-            "resolved": "https://registry.npmjs.org/eslint-utils/-/eslint-utils-2.1.0.tgz",
-            "integrity": "sha512-w94dQYoauyvlDc43XnGB8lU3Zt713vNChgt4EWwhXAP2XkBvndfxF0AgIqKOOasjPIPzj9JqgwkwbCYD0/V3Zg==",
-            "dev": true,
-            "requires": {
-                "eslint-visitor-keys": "^1.1.0"
-            },
-            "dependencies": {
-                "eslint-visitor-keys": {
-                    "version": "1.3.0",
-                    "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz",
-                    "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==",
-                    "dev": true
-                }
-            }
-        },
-        "eslint-visitor-keys": {
-            "version": "2.1.0",
-            "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-2.1.0.tgz",
-            "integrity": "sha512-0rSmRBzXgDzIsD6mGdJgevzgezI534Cer5L/vyMX0kHzT/jiB43jRhd9YUlMGYLQy2zprNmoT8qasCGtY+QaKw==",
-            "dev": true
-        },
-        "espree": {
-            "version": "7.3.1",
-            "resolved": "https://registry.npmjs.org/espree/-/espree-7.3.1.tgz",
-            "integrity": "sha512-v3JCNCE64umkFpmkFGqzVKsOT0tN1Zr+ueqLZfpV1Ob8e+CEgPWa+OxCoGH3tnhimMKIaBm4m/vaRpJ/krRz2g==",
-            "dev": true,
-            "requires": {
-                "acorn": "^7.4.0",
-                "acorn-jsx": "^5.3.1",
-                "eslint-visitor-keys": "^1.3.0"
-            },
-            "dependencies": {
-                "eslint-visitor-keys": {
-                    "version": "1.3.0",
-                    "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz",
-                    "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==",
-                    "dev": true
-                }
-            }
-        },
-        "esprima": {
-            "version": "4.0.1",
-            "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz",
-            "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==",
-            "dev": true
-        },
-        "esquery": {
-            "version": "1.4.0",
-            "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.4.0.tgz",
-            "integrity": "sha512-cCDispWt5vHHtwMY2YrAQ4ibFkAL8RbH5YGBnZBc90MolvvfkkQcJro/aZiAQUlQ3qgrYS6D6v8Gc5G5CQsc9w==",
-            "dev": true,
-            "requires": {
-                "estraverse": "^5.1.0"
-            },
-            "dependencies": {
-                "estraverse": {
-                    "version": "5.2.0",
-                    "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz",
-                    "integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==",
-                    "dev": true
-                }
-            }
-        },
-        "esrecurse": {
-            "version": "4.3.0",
-            "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz",
-            "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==",
-            "dev": true,
-            "requires": {
-                "estraverse": "^5.2.0"
-            },
-            "dependencies": {
-                "estraverse": {
-                    "version": "5.2.0",
-                    "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz",
-                    "integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==",
-                    "dev": true
-                }
-            }
-        },
-        "estraverse": {
-            "version": "4.3.0",
-            "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz",
-            "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==",
-            "dev": true
-        },
-        "esutils": {
-            "version": "2.0.3",
-            "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz",
-            "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==",
-            "dev": true
-        },
-        "fast-deep-equal": {
-            "version": "3.1.3",
-            "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
-            "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==",
-            "dev": true
-        },
-        "fast-json-stable-stringify": {
-            "version": "2.1.0",
-            "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz",
-            "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==",
-            "dev": true
-        },
-        "fast-levenshtein": {
-            "version": "2.0.6",
-            "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz",
-            "integrity": "sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=",
-            "dev": true
-        },
-        "file-entry-cache": {
-            "version": "6.0.1",
-            "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz",
-            "integrity": "sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==",
-            "dev": true,
-            "requires": {
-                "flat-cache": "^3.0.4"
-            }
-        },
-        "flat-cache": {
-            "version": "3.0.4",
-            "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-3.0.4.tgz",
-            "integrity": "sha512-dm9s5Pw7Jc0GvMYbshN6zchCA9RgQlzzEZX3vylR9IqFfS8XciblUXOKfW6SiuJ0e13eDYZoZV5wdrev7P3Nwg==",
-            "dev": true,
-            "requires": {
-                "flatted": "^3.1.0",
-                "rimraf": "^3.0.2"
-            }
-        },
-        "flatted": {
-            "version": "3.1.1",
-            "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.1.1.tgz",
-            "integrity": "sha512-zAoAQiudy+r5SvnSw3KJy5os/oRJYHzrzja/tBDqrZtNhUw8bt6y8OBzMWcjWr+8liV8Eb6yOhw8WZ7VFZ5ZzA==",
-            "dev": true
-        },
-        "fs.realpath": {
-            "version": "1.0.0",
-            "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
-            "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=",
-            "dev": true
-        },
-        "functional-red-black-tree": {
-            "version": "1.0.1",
-            "resolved": "https://registry.npmjs.org/functional-red-black-tree/-/functional-red-black-tree-1.0.1.tgz",
-            "integrity": "sha1-GwqzvVU7Kg1jmdKcDj6gslIHgyc=",
-            "dev": true
-        },
-        "glob": {
-            "version": "7.1.6",
-            "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.6.tgz",
-            "integrity": "sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==",
-            "dev": true,
-            "requires": {
-                "fs.realpath": "^1.0.0",
-                "inflight": "^1.0.4",
-                "inherits": "2",
-                "minimatch": "^3.0.4",
-                "once": "^1.3.0",
-                "path-is-absolute": "^1.0.0"
-            }
-        },
-        "glob-parent": {
-            "version": "5.1.2",
-            "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
-            "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==",
-            "dev": true,
-            "requires": {
-                "is-glob": "^4.0.1"
-            }
-        },
-        "globals": {
-            "version": "13.8.0",
-            "resolved": "https://registry.npmjs.org/globals/-/globals-13.8.0.tgz",
-            "integrity": "sha512-rHtdA6+PDBIjeEvA91rpqzEvk/k3/i7EeNQiryiWuJH0Hw9cpyJMAt2jtbAwUaRdhD+573X4vWw6IcjKPasi9Q==",
-            "dev": true,
-            "requires": {
-                "type-fest": "^0.20.2"
-            },
-            "dependencies": {
-                "type-fest": {
-                    "version": "0.20.2",
-                    "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.20.2.tgz",
-                    "integrity": "sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==",
-                    "dev": true
-                }
-            }
+          "version": "1.1.4",
+          "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
+          "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
+          "dev": true
         },
         "has-flag": {
-            "version": "3.0.0",
-            "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
-            "integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0=",
-            "dev": true
-        },
-        "ignore": {
-            "version": "4.0.6",
-            "resolved": "https://registry.npmjs.org/ignore/-/ignore-4.0.6.tgz",
-            "integrity": "sha512-cyFDKrqc/YdcWFniJhzI42+AzS+gNwmUzOSFcRCQYwySuBBBy/KjuxWLZ/FHEH6Moq1NizMOBWyTcv8O4OZIMg==",
-            "dev": true
-        },
-        "import-fresh": {
-            "version": "3.3.0",
-            "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.0.tgz",
-            "integrity": "sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==",
-            "dev": true,
-            "requires": {
-                "parent-module": "^1.0.0",
-                "resolve-from": "^4.0.0"
-            }
-        },
-        "imurmurhash": {
-            "version": "0.1.4",
-            "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz",
-            "integrity": "sha1-khi5srkoojixPcT7a21XbyMUU+o=",
-            "dev": true
-        },
-        "inflight": {
-            "version": "1.0.6",
-            "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
-            "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=",
-            "dev": true,
-            "requires": {
-                "once": "^1.3.0",
-                "wrappy": "1"
-            }
-        },
-        "inherits": {
-            "version": "2.0.4",
-            "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
-            "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
-            "dev": true
-        },
-        "is-extglob": {
-            "version": "2.1.1",
-            "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
-            "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=",
-            "dev": true
-        },
-        "is-fullwidth-code-point": {
-            "version": "3.0.0",
-            "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
-            "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
-            "dev": true
-        },
-        "is-glob": {
-            "version": "4.0.1",
-            "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.1.tgz",
-            "integrity": "sha512-5G0tKtBTFImOqDnLB2hG6Bp2qcKEFduo4tZu9MT/H6NQv/ghhy30o55ufafxJ/LdH79LLs2Kfrn85TLKyA7BUg==",
-            "dev": true,
-            "requires": {
-                "is-extglob": "^2.1.1"
-            }
-        },
-        "isexe": {
-            "version": "2.0.0",
-            "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
-            "integrity": "sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=",
-            "dev": true
-        },
-        "js-tokens": {
-            "version": "4.0.0",
-            "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
-            "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
-            "dev": true
-        },
-        "js-yaml": {
-            "version": "3.14.1",
-            "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz",
-            "integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==",
-            "dev": true,
-            "requires": {
-                "argparse": "^1.0.7",
-                "esprima": "^4.0.0"
-            }
-        },
-        "json-schema-traverse": {
-            "version": "0.4.1",
-            "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
-            "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==",
-            "dev": true
-        },
-        "json-stable-stringify-without-jsonify": {
-            "version": "1.0.1",
-            "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz",
-            "integrity": "sha1-nbe1lJatPzz+8wp1FC0tkwrXJlE=",
-            "dev": true
-        },
-        "levn": {
-            "version": "0.4.1",
-            "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz",
-            "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==",
-            "dev": true,
-            "requires": {
-                "prelude-ls": "^1.2.1",
-                "type-check": "~0.4.0"
-            }
-        },
-        "lodash": {
-            "version": "4.17.21",
-            "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
-            "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==",
-            "dev": true
-        },
-        "lodash.clonedeep": {
-            "version": "4.5.0",
-            "resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
-            "integrity": "sha1-4j8/nE+Pvd6HJSnBBxhXoIblzO8=",
-            "dev": true
-        },
-        "lodash.flatten": {
-            "version": "4.4.0",
-            "resolved": "https://registry.npmjs.org/lodash.flatten/-/lodash.flatten-4.4.0.tgz",
-            "integrity": "sha1-8xwiIlqWMtK7+OSt2+8kCqdlph8=",
-            "dev": true
-        },
-        "lodash.truncate": {
-            "version": "4.4.2",
-            "resolved": "https://registry.npmjs.org/lodash.truncate/-/lodash.truncate-4.4.2.tgz",
-            "integrity": "sha1-WjUNoLERO4N+z//VgSy+WNbq4ZM=",
-            "dev": true
-        },
-        "lru-cache": {
-            "version": "6.0.0",
-            "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz",
-            "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==",
-            "dev": true,
-            "requires": {
-                "yallist": "^4.0.0"
-            }
-        },
-        "minimatch": {
-            "version": "3.0.4",
-            "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz",
-            "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==",
-            "dev": true,
-            "requires": {
-                "brace-expansion": "^1.1.7"
-            }
-        },
-        "ms": {
-            "version": "2.1.2",
-            "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
-            "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==",
-            "dev": true
-        },
-        "natural-compare": {
-            "version": "1.4.0",
-            "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz",
-            "integrity": "sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=",
-            "dev": true
-        },
-        "once": {
-            "version": "1.4.0",
-            "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
-            "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=",
-            "dev": true,
-            "requires": {
-                "wrappy": "1"
-            }
-        },
-        "optionator": {
-            "version": "0.9.1",
-            "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz",
-            "integrity": "sha512-74RlY5FCnhq4jRxVUPKDaRwrVNXMqsGsiW6AJw4XK8hmtm10wC0ypZBLw5IIp85NZMr91+qd1RvvENwg7jjRFw==",
-            "dev": true,
-            "requires": {
-                "deep-is": "^0.1.3",
-                "fast-levenshtein": "^2.0.6",
-                "levn": "^0.4.1",
-                "prelude-ls": "^1.2.1",
-                "type-check": "^0.4.0",
-                "word-wrap": "^1.2.3"
-            }
-        },
-        "parent-module": {
-            "version": "1.0.1",
-            "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz",
-            "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==",
-            "dev": true,
-            "requires": {
-                "callsites": "^3.0.0"
-            }
-        },
-        "path-is-absolute": {
-            "version": "1.0.1",
-            "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
-            "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=",
-            "dev": true
-        },
-        "path-key": {
-            "version": "3.1.1",
-            "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
-            "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
-            "dev": true
-        },
-        "prelude-ls": {
-            "version": "1.2.1",
-            "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz",
-            "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==",
-            "dev": true
-        },
-        "progress": {
-            "version": "2.0.3",
-            "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz",
-            "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==",
-            "dev": true
-        },
-        "punycode": {
-            "version": "2.1.1",
-            "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz",
-            "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==",
-            "dev": true
-        },
-        "regexpp": {
-            "version": "3.1.0",
-            "resolved": "https://registry.npmjs.org/regexpp/-/regexpp-3.1.0.tgz",
-            "integrity": "sha512-ZOIzd8yVsQQA7j8GCSlPGXwg5PfmA1mrq0JP4nGhh54LaKN3xdai/vHUDu74pKwV8OxseMS65u2NImosQcSD0Q==",
-            "dev": true
-        },
-        "require-from-string": {
-            "version": "2.0.2",
-            "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz",
-            "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==",
-            "dev": true
-        },
-        "resolve-from": {
-            "version": "4.0.0",
-            "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz",
-            "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==",
-            "dev": true
-        },
-        "rimraf": {
-            "version": "3.0.2",
-            "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
-            "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==",
-            "dev": true,
-            "requires": {
-                "glob": "^7.1.3"
-            }
-        },
-        "semver": {
-            "version": "7.3.5",
-            "resolved": "https://registry.npmjs.org/semver/-/semver-7.3.5.tgz",
-            "integrity": "sha512-PoeGJYh8HK4BTO/a9Tf6ZG3veo/A7ZVsYrSA6J8ny9nb3B1VrpkuN+z9OE5wfE5p6H4LchYZsegiQgbJD94ZFQ==",
-            "dev": true,
-            "requires": {
-                "lru-cache": "^6.0.0"
-            }
-        },
-        "shebang-command": {
-            "version": "2.0.0",
-            "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
-            "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
-            "dev": true,
-            "requires": {
-                "shebang-regex": "^3.0.0"
-            }
-        },
-        "shebang-regex": {
-            "version": "3.0.0",
-            "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
-            "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
-            "dev": true
-        },
-        "slice-ansi": {
-            "version": "4.0.0",
-            "resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-4.0.0.tgz",
-            "integrity": "sha512-qMCMfhY040cVHT43K9BFygqYbUPFZKHOg7K73mtTWJRb8pyP3fzf4Ixd5SzdEJQ6MRUg/WBnOLxghZtKKurENQ==",
-            "dev": true,
-            "requires": {
-                "ansi-styles": "^4.0.0",
-                "astral-regex": "^2.0.0",
-                "is-fullwidth-code-point": "^3.0.0"
-            },
-            "dependencies": {
-                "ansi-styles": {
-                    "version": "4.3.0",
-                    "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
-                    "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
-                    "dev": true,
-                    "requires": {
-                        "color-convert": "^2.0.1"
-                    }
-                },
-                "color-convert": {
-                    "version": "2.0.1",
-                    "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
-                    "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
-                    "dev": true,
-                    "requires": {
-                        "color-name": "~1.1.4"
-                    }
-                },
-                "color-name": {
-                    "version": "1.1.4",
-                    "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
-                    "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
-                    "dev": true
-                }
-            }
-        },
-        "sprintf-js": {
-            "version": "1.0.3",
-            "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz",
-            "integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw=",
-            "dev": true
-        },
-        "string-width": {
-            "version": "4.2.2",
-            "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.2.tgz",
-            "integrity": "sha512-XBJbT3N4JhVumXE0eoLU9DCjcaF92KLNqTmFCnG1pf8duUxFGwtP6AD6nkjw9a3IdiRtL3E2w3JDiE/xi3vOeA==",
-            "dev": true,
-            "requires": {
-                "emoji-regex": "^8.0.0",
-                "is-fullwidth-code-point": "^3.0.0",
-                "strip-ansi": "^6.0.0"
-            }
-        },
-        "strip-ansi": {
-            "version": "6.0.0",
-            "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.0.tgz",
-            "integrity": "sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w==",
-            "dev": true,
-            "requires": {
-                "ansi-regex": "^5.0.0"
-            }
-        },
-        "strip-json-comments": {
-            "version": "3.1.1",
-            "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz",
-            "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==",
-            "dev": true
+          "version": "4.0.0",
+          "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
+          "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==",
+          "dev": true
         },
         "supports-color": {
-            "version": "5.5.0",
-            "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz",
-            "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==",
-            "dev": true,
-            "requires": {
-                "has-flag": "^3.0.0"
-            }
-        },
-        "table": {
-            "version": "6.6.0",
-            "resolved": "https://registry.npmjs.org/table/-/table-6.6.0.tgz",
-            "integrity": "sha512-iZMtp5tUvcnAdtHpZTWLPF0M7AgiQsURR2DwmxnJwSy8I3+cY+ozzVvYha3BOLG2TB+L0CqjIz+91htuj6yCXg==",
-            "dev": true,
-            "requires": {
-                "ajv": "^8.0.1",
-                "lodash.clonedeep": "^4.5.0",
-                "lodash.flatten": "^4.4.0",
-                "lodash.truncate": "^4.4.2",
-                "slice-ansi": "^4.0.0",
-                "string-width": "^4.2.0",
-                "strip-ansi": "^6.0.0"
-            },
-            "dependencies": {
-                "ajv": {
-                    "version": "8.2.0",
-                    "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.2.0.tgz",
-                    "integrity": "sha512-WSNGFuyWd//XO8n/m/EaOlNLtO0yL8EXT/74LqT4khdhpZjP7lkj/kT5uwRmGitKEVp/Oj7ZUHeGfPtgHhQ5CA==",
-                    "dev": true,
-                    "requires": {
-                        "fast-deep-equal": "^3.1.1",
-                        "json-schema-traverse": "^1.0.0",
-                        "require-from-string": "^2.0.2",
-                        "uri-js": "^4.2.2"
-                    }
-                },
-                "json-schema-traverse": {
-                    "version": "1.0.0",
-                    "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz",
-                    "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==",
-                    "dev": true
-                }
-            }
-        },
-        "text-table": {
-            "version": "0.2.0",
-            "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",
-            "integrity": "sha1-f17oI66AUgfACvLfSoTsP8+lcLQ=",
-            "dev": true
-        },
-        "type-check": {
-            "version": "0.4.0",
-            "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz",
-            "integrity": "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==",
-            "dev": true,
-            "requires": {
-                "prelude-ls": "^1.2.1"
-            }
-        },
+          "version": "7.2.0",
+          "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
+          "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==",
+          "dev": true,
+          "requires": {
+            "has-flag": "^4.0.0"
+          }
+        }
+      }
+    },
+    "color-convert": {
+      "version": "1.9.3",
+      "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz",
+      "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==",
+      "dev": true,
+      "requires": {
+        "color-name": "1.1.3"
+      }
+    },
+    "color-name": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
+      "integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=",
+      "dev": true
+    },
+    "concat-map": {
+      "version": "0.0.1",
+      "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
+      "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=",
+      "dev": true
+    },
+    "cross-spawn": {
+      "version": "7.0.3",
+      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
+      "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
+      "dev": true,
+      "requires": {
+        "path-key": "^3.1.0",
+        "shebang-command": "^2.0.0",
+        "which": "^2.0.1"
+      }
+    },
+    "debug": {
+      "version": "4.3.1",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.1.tgz",
+      "integrity": "sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==",
+      "dev": true,
+      "requires": {
+        "ms": "2.1.2"
+      }
+    },
+    "deep-is": {
+      "version": "0.1.3",
+      "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.3.tgz",
+      "integrity": "sha1-s2nW+128E+7PUk+RsHD+7cNXzzQ=",
+      "dev": true
+    },
+    "doctrine": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz",
+      "integrity": "sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==",
+      "dev": true,
+      "requires": {
+        "esutils": "^2.0.2"
+      }
+    },
+    "emoji-regex": {
+      "version": "8.0.0",
+      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
+      "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
+      "dev": true
+    },
+    "enquirer": {
+      "version": "2.3.6",
+      "resolved": "https://registry.npmjs.org/enquirer/-/enquirer-2.3.6.tgz",
+      "integrity": "sha512-yjNnPr315/FjS4zIsUxYguYUPP2e1NK4d7E7ZOLiyYCcbFBiTMyID+2wvm2w6+pZ/odMA7cRkjhsPbltwBOrLg==",
+      "dev": true,
+      "requires": {
+        "ansi-colors": "^4.1.1"
+      }
+    },
+    "escape-string-regexp": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
+      "integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=",
+      "dev": true
+    },
+    "eslint": {
+      "version": "7.25.0",
+      "resolved": "https://registry.npmjs.org/eslint/-/eslint-7.25.0.tgz",
+      "integrity": "sha512-TVpSovpvCNpLURIScDRB6g5CYu/ZFq9GfX2hLNIV4dSBKxIWojeDODvYl3t0k0VtMxYeR8OXPCFE5+oHMlGfhw==",
+      "dev": true,
+      "requires": {
+        "@babel/code-frame": "7.12.11",
+        "@eslint/eslintrc": "^0.4.0",
+        "ajv": "^6.10.0",
+        "chalk": "^4.0.0",
+        "cross-spawn": "^7.0.2",
+        "debug": "^4.0.1",
+        "doctrine": "^3.0.0",
+        "enquirer": "^2.3.5",
+        "eslint-scope": "^5.1.1",
+        "eslint-utils": "^2.1.0",
+        "eslint-visitor-keys": "^2.0.0",
+        "espree": "^7.3.1",
+        "esquery": "^1.4.0",
+        "esutils": "^2.0.2",
+        "file-entry-cache": "^6.0.1",
+        "functional-red-black-tree": "^1.0.1",
+        "glob-parent": "^5.0.0",
+        "globals": "^13.6.0",
+        "ignore": "^4.0.6",
+        "import-fresh": "^3.0.0",
+        "imurmurhash": "^0.1.4",
+        "is-glob": "^4.0.0",
+        "js-yaml": "^3.13.1",
+        "json-stable-stringify-without-jsonify": "^1.0.1",
+        "levn": "^0.4.1",
+        "lodash": "^4.17.21",
+        "minimatch": "^3.0.4",
+        "natural-compare": "^1.4.0",
+        "optionator": "^0.9.1",
+        "progress": "^2.0.0",
+        "regexpp": "^3.1.0",
+        "semver": "^7.2.1",
+        "strip-ansi": "^6.0.0",
+        "strip-json-comments": "^3.1.0",
+        "table": "^6.0.4",
+        "text-table": "^0.2.0",
+        "v8-compile-cache": "^2.0.3"
+      }
+    },
+    "eslint-scope": {
+      "version": "5.1.1",
+      "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-5.1.1.tgz",
+      "integrity": "sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==",
+      "dev": true,
+      "requires": {
+        "esrecurse": "^4.3.0",
+        "estraverse": "^4.1.1"
+      }
+    },
+    "eslint-utils": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/eslint-utils/-/eslint-utils-2.1.0.tgz",
+      "integrity": "sha512-w94dQYoauyvlDc43XnGB8lU3Zt713vNChgt4EWwhXAP2XkBvndfxF0AgIqKOOasjPIPzj9JqgwkwbCYD0/V3Zg==",
+      "dev": true,
+      "requires": {
+        "eslint-visitor-keys": "^1.1.0"
+      },
+      "dependencies": {
+        "eslint-visitor-keys": {
+          "version": "1.3.0",
+          "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz",
+          "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==",
+          "dev": true
+        }
+      }
+    },
+    "eslint-visitor-keys": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-2.1.0.tgz",
+      "integrity": "sha512-0rSmRBzXgDzIsD6mGdJgevzgezI534Cer5L/vyMX0kHzT/jiB43jRhd9YUlMGYLQy2zprNmoT8qasCGtY+QaKw==",
+      "dev": true
+    },
+    "espree": {
+      "version": "7.3.1",
+      "resolved": "https://registry.npmjs.org/espree/-/espree-7.3.1.tgz",
+      "integrity": "sha512-v3JCNCE64umkFpmkFGqzVKsOT0tN1Zr+ueqLZfpV1Ob8e+CEgPWa+OxCoGH3tnhimMKIaBm4m/vaRpJ/krRz2g==",
+      "dev": true,
+      "requires": {
+        "acorn": "^7.4.0",
+        "acorn-jsx": "^5.3.1",
+        "eslint-visitor-keys": "^1.3.0"
+      },
+      "dependencies": {
+        "eslint-visitor-keys": {
+          "version": "1.3.0",
+          "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz",
+          "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==",
+          "dev": true
+        }
+      }
+    },
+    "esprima": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz",
+      "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==",
+      "dev": true
+    },
+    "esquery": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.4.0.tgz",
+      "integrity": "sha512-cCDispWt5vHHtwMY2YrAQ4ibFkAL8RbH5YGBnZBc90MolvvfkkQcJro/aZiAQUlQ3qgrYS6D6v8Gc5G5CQsc9w==",
+      "dev": true,
+      "requires": {
+        "estraverse": "^5.1.0"
+      },
+      "dependencies": {
+        "estraverse": {
+          "version": "5.2.0",
+          "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz",
+          "integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==",
+          "dev": true
+        }
+      }
+    },
+    "esrecurse": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz",
+      "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==",
+      "dev": true,
+      "requires": {
+        "estraverse": "^5.2.0"
+      },
+      "dependencies": {
+        "estraverse": {
+          "version": "5.2.0",
+          "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz",
+          "integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==",
+          "dev": true
+        }
+      }
+    },
+    "estraverse": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz",
+      "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==",
+      "dev": true
+    },
+    "esutils": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz",
+      "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==",
+      "dev": true
+    },
+    "fast-deep-equal": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
+      "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==",
+      "dev": true
+    },
+    "fast-json-stable-stringify": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz",
+      "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==",
+      "dev": true
+    },
+    "fast-levenshtein": {
+      "version": "2.0.6",
+      "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz",
+      "integrity": "sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=",
+      "dev": true
+    },
+    "file-entry-cache": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz",
+      "integrity": "sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==",
+      "dev": true,
+      "requires": {
+        "flat-cache": "^3.0.4"
+      }
+    },
+    "flat-cache": {
+      "version": "3.0.4",
+      "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-3.0.4.tgz",
+      "integrity": "sha512-dm9s5Pw7Jc0GvMYbshN6zchCA9RgQlzzEZX3vylR9IqFfS8XciblUXOKfW6SiuJ0e13eDYZoZV5wdrev7P3Nwg==",
+      "dev": true,
+      "requires": {
+        "flatted": "^3.1.0",
+        "rimraf": "^3.0.2"
+      }
+    },
+    "flatted": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.1.1.tgz",
+      "integrity": "sha512-zAoAQiudy+r5SvnSw3KJy5os/oRJYHzrzja/tBDqrZtNhUw8bt6y8OBzMWcjWr+8liV8Eb6yOhw8WZ7VFZ5ZzA==",
+      "dev": true
+    },
+    "fs.realpath": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
+      "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=",
+      "dev": true
+    },
+    "functional-red-black-tree": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/functional-red-black-tree/-/functional-red-black-tree-1.0.1.tgz",
+      "integrity": "sha1-GwqzvVU7Kg1jmdKcDj6gslIHgyc=",
+      "dev": true
+    },
+    "glob": {
+      "version": "7.1.6",
+      "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.6.tgz",
+      "integrity": "sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==",
+      "dev": true,
+      "requires": {
+        "fs.realpath": "^1.0.0",
+        "inflight": "^1.0.4",
+        "inherits": "2",
+        "minimatch": "^3.0.4",
+        "once": "^1.3.0",
+        "path-is-absolute": "^1.0.0"
+      }
+    },
+    "glob-parent": {
+      "version": "5.1.2",
+      "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
+      "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==",
+      "dev": true,
+      "requires": {
+        "is-glob": "^4.0.1"
+      }
+    },
+    "globals": {
+      "version": "13.8.0",
+      "resolved": "https://registry.npmjs.org/globals/-/globals-13.8.0.tgz",
+      "integrity": "sha512-rHtdA6+PDBIjeEvA91rpqzEvk/k3/i7EeNQiryiWuJH0Hw9cpyJMAt2jtbAwUaRdhD+573X4vWw6IcjKPasi9Q==",
+      "dev": true,
+      "requires": {
+        "type-fest": "^0.20.2"
+      },
+      "dependencies": {
         "type-fest": {
-            "version": "0.8.1",
-            "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.8.1.tgz",
-            "integrity": "sha512-4dbzIzqvjtgiM5rw1k5rEHtBANKmdudhGyBEajN01fEyhaAIhsoKNy6y7+IN93IfpFtwY9iqi7kD+xwKhQsNJA==",
-            "dev": true
-        },
-        "uri-js": {
-            "version": "4.4.1",
-            "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz",
-            "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==",
-            "dev": true,
-            "requires": {
-                "punycode": "^2.1.0"
-            }
-        },
-        "v8-compile-cache": {
-            "version": "2.3.0",
-            "resolved": "https://registry.npmjs.org/v8-compile-cache/-/v8-compile-cache-2.3.0.tgz",
-            "integrity": "sha512-l8lCEmLcLYZh4nbunNZvQCJc5pv7+RCwa8q/LdUx8u7lsWvPDKmpodJAJNwkAhJC//dFY48KuIEmjtd4RViDrA==",
-            "dev": true
-        },
-        "which": {
-            "version": "2.0.2",
-            "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
-            "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
-            "dev": true,
-            "requires": {
-                "isexe": "^2.0.0"
-            }
+          "version": "0.20.2",
+          "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.20.2.tgz",
+          "integrity": "sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==",
+          "dev": true
+        }
+      }
+    },
+    "has-flag": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
+      "integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0=",
+      "dev": true
+    },
+    "ignore": {
+      "version": "4.0.6",
+      "resolved": "https://registry.npmjs.org/ignore/-/ignore-4.0.6.tgz",
+      "integrity": "sha512-cyFDKrqc/YdcWFniJhzI42+AzS+gNwmUzOSFcRCQYwySuBBBy/KjuxWLZ/FHEH6Moq1NizMOBWyTcv8O4OZIMg==",
+      "dev": true
+    },
+    "import-fresh": {
+      "version": "3.3.0",
+      "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.0.tgz",
+      "integrity": "sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==",
+      "dev": true,
+      "requires": {
+        "parent-module": "^1.0.0",
+        "resolve-from": "^4.0.0"
+      }
+    },
+    "imurmurhash": {
+      "version": "0.1.4",
+      "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz",
+      "integrity": "sha1-khi5srkoojixPcT7a21XbyMUU+o=",
+      "dev": true
+    },
+    "inflight": {
+      "version": "1.0.6",
+      "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
+      "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=",
+      "dev": true,
+      "requires": {
+        "once": "^1.3.0",
+        "wrappy": "1"
+      }
+    },
+    "inherits": {
+      "version": "2.0.4",
+      "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
+      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
+      "dev": true
+    },
+    "is-extglob": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
+      "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=",
+      "dev": true
+    },
+    "is-fullwidth-code-point": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
+      "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
+      "dev": true
+    },
+    "is-glob": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.1.tgz",
+      "integrity": "sha512-5G0tKtBTFImOqDnLB2hG6Bp2qcKEFduo4tZu9MT/H6NQv/ghhy30o55ufafxJ/LdH79LLs2Kfrn85TLKyA7BUg==",
+      "dev": true,
+      "requires": {
+        "is-extglob": "^2.1.1"
+      }
+    },
+    "isexe": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
+      "integrity": "sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=",
+      "dev": true
+    },
+    "js-tokens": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
+      "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
+      "dev": true
+    },
+    "js-yaml": {
+      "version": "3.14.1",
+      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz",
+      "integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==",
+      "dev": true,
+      "requires": {
+        "argparse": "^1.0.7",
+        "esprima": "^4.0.0"
+      }
+    },
+    "json-schema-traverse": {
+      "version": "0.4.1",
+      "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
+      "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==",
+      "dev": true
+    },
+    "json-stable-stringify-without-jsonify": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz",
+      "integrity": "sha1-nbe1lJatPzz+8wp1FC0tkwrXJlE=",
+      "dev": true
+    },
+    "levn": {
+      "version": "0.4.1",
+      "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz",
+      "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==",
+      "dev": true,
+      "requires": {
+        "prelude-ls": "^1.2.1",
+        "type-check": "~0.4.0"
+      }
+    },
+    "lodash": {
+      "version": "4.17.21",
+      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
+      "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==",
+      "dev": true
+    },
+    "lodash.clonedeep": {
+      "version": "4.5.0",
+      "resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
+      "integrity": "sha1-4j8/nE+Pvd6HJSnBBxhXoIblzO8=",
+      "dev": true
+    },
+    "lodash.flatten": {
+      "version": "4.4.0",
+      "resolved": "https://registry.npmjs.org/lodash.flatten/-/lodash.flatten-4.4.0.tgz",
+      "integrity": "sha1-8xwiIlqWMtK7+OSt2+8kCqdlph8=",
+      "dev": true
+    },
+    "lodash.truncate": {
+      "version": "4.4.2",
+      "resolved": "https://registry.npmjs.org/lodash.truncate/-/lodash.truncate-4.4.2.tgz",
+      "integrity": "sha1-WjUNoLERO4N+z//VgSy+WNbq4ZM=",
+      "dev": true
+    },
+    "lru-cache": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz",
+      "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==",
+      "dev": true,
+      "requires": {
+        "yallist": "^4.0.0"
+      }
+    },
+    "minimatch": {
+      "version": "3.0.4",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz",
+      "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==",
+      "dev": true,
+      "requires": {
+        "brace-expansion": "^1.1.7"
+      }
+    },
+    "ms": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
+      "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==",
+      "dev": true
+    },
+    "natural-compare": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz",
+      "integrity": "sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=",
+      "dev": true
+    },
+    "once": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
+      "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=",
+      "dev": true,
+      "requires": {
+        "wrappy": "1"
+      }
+    },
+    "optionator": {
+      "version": "0.9.1",
+      "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz",
+      "integrity": "sha512-74RlY5FCnhq4jRxVUPKDaRwrVNXMqsGsiW6AJw4XK8hmtm10wC0ypZBLw5IIp85NZMr91+qd1RvvENwg7jjRFw==",
+      "dev": true,
+      "requires": {
+        "deep-is": "^0.1.3",
+        "fast-levenshtein": "^2.0.6",
+        "levn": "^0.4.1",
+        "prelude-ls": "^1.2.1",
+        "type-check": "^0.4.0",
+        "word-wrap": "^1.2.3"
+      }
+    },
+    "parent-module": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz",
+      "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==",
+      "dev": true,
+      "requires": {
+        "callsites": "^3.0.0"
+      }
+    },
+    "path-is-absolute": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
+      "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=",
+      "dev": true
+    },
+    "path-key": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
+      "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
+      "dev": true
+    },
+    "prelude-ls": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz",
+      "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==",
+      "dev": true
+    },
+    "progress": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz",
+      "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==",
+      "dev": true
+    },
+    "punycode": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz",
+      "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==",
+      "dev": true
+    },
+    "regexpp": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/regexpp/-/regexpp-3.1.0.tgz",
+      "integrity": "sha512-ZOIzd8yVsQQA7j8GCSlPGXwg5PfmA1mrq0JP4nGhh54LaKN3xdai/vHUDu74pKwV8OxseMS65u2NImosQcSD0Q==",
+      "dev": true
+    },
+    "require-from-string": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz",
+      "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==",
+      "dev": true
+    },
+    "resolve-from": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz",
+      "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==",
+      "dev": true
+    },
+    "rimraf": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
+      "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==",
+      "dev": true,
+      "requires": {
+        "glob": "^7.1.3"
+      }
+    },
+    "semver": {
+      "version": "7.3.5",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.3.5.tgz",
+      "integrity": "sha512-PoeGJYh8HK4BTO/a9Tf6ZG3veo/A7ZVsYrSA6J8ny9nb3B1VrpkuN+z9OE5wfE5p6H4LchYZsegiQgbJD94ZFQ==",
+      "dev": true,
+      "requires": {
+        "lru-cache": "^6.0.0"
+      }
+    },
+    "shebang-command": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
+      "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
+      "dev": true,
+      "requires": {
+        "shebang-regex": "^3.0.0"
+      }
+    },
+    "shebang-regex": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
+      "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
+      "dev": true
+    },
+    "slice-ansi": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-4.0.0.tgz",
+      "integrity": "sha512-qMCMfhY040cVHT43K9BFygqYbUPFZKHOg7K73mtTWJRb8pyP3fzf4Ixd5SzdEJQ6MRUg/WBnOLxghZtKKurENQ==",
+      "dev": true,
+      "requires": {
+        "ansi-styles": "^4.0.0",
+        "astral-regex": "^2.0.0",
+        "is-fullwidth-code-point": "^3.0.0"
+      },
+      "dependencies": {
+        "ansi-styles": {
+          "version": "4.3.0",
+          "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
+          "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
+          "dev": true,
+          "requires": {
+            "color-convert": "^2.0.1"
+          }
         },
-        "word-wrap": {
-            "version": "1.2.3",
-            "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz",
-            "integrity": "sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==",
-            "dev": true
+        "color-convert": {
+          "version": "2.0.1",
+          "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
+          "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
+          "dev": true,
+          "requires": {
+            "color-name": "~1.1.4"
+          }
         },
-        "wrappy": {
-            "version": "1.0.2",
-            "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
-            "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=",
-            "dev": true
+        "color-name": {
+          "version": "1.1.4",
+          "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
+          "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
+          "dev": true
+        }
+      }
+    },
+    "sprintf-js": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz",
+      "integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw=",
+      "dev": true
+    },
+    "string-width": {
+      "version": "4.2.2",
+      "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.2.tgz",
+      "integrity": "sha512-XBJbT3N4JhVumXE0eoLU9DCjcaF92KLNqTmFCnG1pf8duUxFGwtP6AD6nkjw9a3IdiRtL3E2w3JDiE/xi3vOeA==",
+      "dev": true,
+      "requires": {
+        "emoji-regex": "^8.0.0",
+        "is-fullwidth-code-point": "^3.0.0",
+        "strip-ansi": "^6.0.0"
+      }
+    },
+    "strip-ansi": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.0.tgz",
+      "integrity": "sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w==",
+      "dev": true,
+      "requires": {
+        "ansi-regex": "^5.0.0"
+      }
+    },
+    "strip-json-comments": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz",
+      "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==",
+      "dev": true
+    },
+    "supports-color": {
+      "version": "5.5.0",
+      "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz",
+      "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==",
+      "dev": true,
+      "requires": {
+        "has-flag": "^3.0.0"
+      }
+    },
+    "table": {
+      "version": "6.6.0",
+      "resolved": "https://registry.npmjs.org/table/-/table-6.6.0.tgz",
+      "integrity": "sha512-iZMtp5tUvcnAdtHpZTWLPF0M7AgiQsURR2DwmxnJwSy8I3+cY+ozzVvYha3BOLG2TB+L0CqjIz+91htuj6yCXg==",
+      "dev": true,
+      "requires": {
+        "ajv": "^8.0.1",
+        "lodash.clonedeep": "^4.5.0",
+        "lodash.flatten": "^4.4.0",
+        "lodash.truncate": "^4.4.2",
+        "slice-ansi": "^4.0.0",
+        "string-width": "^4.2.0",
+        "strip-ansi": "^6.0.0"
+      },
+      "dependencies": {
+        "ajv": {
+          "version": "8.2.0",
+          "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.2.0.tgz",
+          "integrity": "sha512-WSNGFuyWd//XO8n/m/EaOlNLtO0yL8EXT/74LqT4khdhpZjP7lkj/kT5uwRmGitKEVp/Oj7ZUHeGfPtgHhQ5CA==",
+          "dev": true,
+          "requires": {
+            "fast-deep-equal": "^3.1.1",
+            "json-schema-traverse": "^1.0.0",
+            "require-from-string": "^2.0.2",
+            "uri-js": "^4.2.2"
+          }
         },
-        "yallist": {
-            "version": "4.0.0",
-            "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz",
-            "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==",
-            "dev": true
+        "json-schema-traverse": {
+          "version": "1.0.0",
+          "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz",
+          "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==",
+          "dev": true
         }
+      }
+    },
+    "text-table": {
+      "version": "0.2.0",
+      "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",
+      "integrity": "sha1-f17oI66AUgfACvLfSoTsP8+lcLQ=",
+      "dev": true
+    },
+    "type-check": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz",
+      "integrity": "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==",
+      "dev": true,
+      "requires": {
+        "prelude-ls": "^1.2.1"
+      }
+    },
+    "type-fest": {
+      "version": "0.8.1",
+      "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.8.1.tgz",
+      "integrity": "sha512-4dbzIzqvjtgiM5rw1k5rEHtBANKmdudhGyBEajN01fEyhaAIhsoKNy6y7+IN93IfpFtwY9iqi7kD+xwKhQsNJA==",
+      "dev": true
+    },
+    "uri-js": {
+      "version": "4.4.1",
+      "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz",
+      "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==",
+      "dev": true,
+      "requires": {
+        "punycode": "^2.1.0"
+      }
+    },
+    "v8-compile-cache": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/v8-compile-cache/-/v8-compile-cache-2.3.0.tgz",
+      "integrity": "sha512-l8lCEmLcLYZh4nbunNZvQCJc5pv7+RCwa8q/LdUx8u7lsWvPDKmpodJAJNwkAhJC//dFY48KuIEmjtd4RViDrA==",
+      "dev": true
+    },
+    "which": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
+      "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
+      "dev": true,
+      "requires": {
+        "isexe": "^2.0.0"
+      }
+    },
+    "word-wrap": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz",
+      "integrity": "sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==",
+      "dev": true
+    },
+    "wrappy": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
+      "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=",
+      "dev": true
+    },
+    "yallist": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz",
+      "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==",
+      "dev": true
     }
+  }
 }
diff --git a/dev/package.json b/dev/package.json
index 0391a3983f78f..f975bdde8319a 100644
--- a/dev/package.json
+++ b/dev/package.json
@@ -1,5 +1,6 @@
 {
   "devDependencies": {
-    "eslint": "^7.25.0"
+    "eslint": "^7.25.0",
+    "ansi-regex": "^5.0.1"
   }
 }

From b8b1fbc21c66348d25be3404d3f61099f2a7a9b5 Mon Sep 17 00:00:00 2001
From: Yun Tang <myasuka@live.com>
Date: Fri, 25 Feb 2022 12:07:47 +0900
Subject: [PATCH 330/513] [SPARK-38275][SS] Include the writeBatch's memory
 usage as the total memory usage of RocksDB state store

### What changes were proposed in this pull request?
Include the writeBatch's memoy usage as the total memory usage of RocksDB state store.

Moreover, this PR also includes a hotfix to clear write batch just after `commit`.

### Why are the changes needed?
As the memory used by WriteBatch has no limit, the actual memory usage could be much larger than previously stats without considering the memoy used by write batch.

### Does this PR introduce _any_ user-facing change?
NO

### How was this patch tested?
Test via running jobs with large micro-batch.

Closes #35600 from Myasuka/SPARK-38275.

Authored-by: Yun Tang <myasuka@live.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../spark/sql/execution/streaming/state/RocksDB.scala     | 8 ++++++--
 .../streaming/state/RocksDBStateStoreProvider.scala       | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
index 0ad03169d2053..a5bd489e04fda 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
@@ -371,6 +371,8 @@ class RocksDB(
     val readerMemUsage = getDBProperty("rocksdb.estimate-table-readers-mem")
     val memTableMemUsage = getDBProperty("rocksdb.size-all-mem-tables")
     val blockCacheUsage = getDBProperty("rocksdb.block-cache-usage")
+    // Get the approximate memory usage of this writeBatchWithIndex
+    val writeBatchMemUsage = writeBatch.getWriteBatch.getDataSize
     val nativeOpsHistograms = Seq(
       "get" -> DB_GET,
       "put" -> DB_WRITE,
@@ -404,7 +406,8 @@ class RocksDB(
     RocksDBMetrics(
       numKeysOnLoadedVersion,
       numKeysOnWritingVersion,
-      readerMemUsage + memTableMemUsage + blockCacheUsage,
+      readerMemUsage + memTableMemUsage + blockCacheUsage + writeBatchMemUsage,
+      writeBatchMemUsage,
       totalSSTFilesBytes,
       nativeOpsLatencyMicros.toMap,
       commitLatencyMs,
@@ -617,7 +620,8 @@ object RocksDBConf {
 case class RocksDBMetrics(
     numCommittedKeys: Long,
     numUncommittedKeys: Long,
-    memUsageBytes: Long,
+    totalMemUsageBytes: Long,
+    writeBatchMemUsageBytes: Long,
     totalSSTFilesBytes: Long,
     nativeOpsHistograms: Map[String, RocksDBNativeHistogram],
     lastCommitLatencyMs: Map[String, Long],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala
index c88e6ae3f477c..79614df629927 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala
@@ -148,7 +148,7 @@ private[sql] class RocksDBStateStoreProvider
 
       StateStoreMetrics(
         rocksDBMetrics.numUncommittedKeys,
-        rocksDBMetrics.memUsageBytes,
+        rocksDBMetrics.totalMemUsageBytes,
         stateStoreCustomMetrics)
     }
 

From 860f44fe0f3de8a90e63df363ce405f3ce5a1c8f Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Fri, 25 Feb 2022 11:12:57 +0800
Subject: [PATCH 331/513] [SPARK-38311][SQL] Fix
 DynamicPartitionPruning/BucketedReadSuite/ExpressionInfoSuite under ANSI mode

### What changes were proposed in this pull request?

Fix the following test suites under ANSI mode:

- DynamicPartitionPruningV1SuiteAEOff
- DynamicPartitionPruningV1SuiteAEOn
- DynamicPartitionPruningV2SuiteAEOff
- DynamicPartitionPruningV2SuiteAEOn
- ExpressionInfoSuite
- BucketedReadSuite

To fix ExpressionInfoSuite, this PR changes the invalid examples under ANSI mode and add comments about the behaviors with ANSI on/off.
### Why are the changes needed?

To set up a new GA test job with ANSI mode on

### Does this PR introduce _any_ user-facing change?

No
### How was this patch tested?

Manually turn on ANSI mode and test

Closes #35644 from gengliangwang/fixMoreAnsi.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../expressions/collectionOperations.scala    |  2 --
 .../expressions/datetimeExpressions.scala     | 19 ++++++-------------
 .../sql/catalyst/expressions/predicates.scala |  2 +-
 .../sql/DynamicPartitionPruningSuite.scala    |  3 ++-
 .../spark/sql/sources/BucketedReadSuite.scala |  2 +-
 5 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 0cd8593c766df..e53fc5eef06d3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -90,8 +90,6 @@ trait BinaryArrayExpressionWithImplicitCast extends BinaryExpression
        4
       > SELECT _FUNC_(map('a', 1, 'b', 2));
        2
-      > SELECT _FUNC_(NULL);
-       -1
   """,
   since = "1.5.0",
   group = "collection_funcs")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 9780b9df0e031..6217035df72c4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -2344,8 +2344,9 @@ case class DateDiff(endDate: Expression, startDate: Expression)
     copy(endDate = newLeft, startDate = newRight)
 }
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(year, month, day) - Create date from year, month and day fields.",
+  usage = "_FUNC_(year, month, day) - Create date from year, month and day fields. If the configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs. Otherwise, it will throw an error instead.",
   arguments = """
     Arguments:
       * year - the year to represent, from 1 to 9999
@@ -2356,15 +2357,12 @@ case class DateDiff(endDate: Expression, startDate: Expression)
     Examples:
       > SELECT _FUNC_(2013, 7, 15);
        2013-07-15
-      > SELECT _FUNC_(2019, 13, 1);
-       NULL
       > SELECT _FUNC_(2019, 7, NULL);
        NULL
-      > SELECT _FUNC_(2019, 2, 30);
-       NULL
   """,
   group = "datetime_funcs",
   since = "3.0.0")
+// scalastyle:on line.size.limit
 case class MakeDate(
     year: Expression,
     month: Expression,
@@ -2418,7 +2416,7 @@ case class MakeDate(
 
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(year, month, day, hour, min, sec) - Create local date-time from year, month, day, hour, min, sec fields. ",
+  usage = "_FUNC_(year, month, day, hour, min, sec) - Create local date-time from year, month, day, hour, min, sec fields. If the configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs. Otherwise, it will throw an error instead.",
   arguments = """
     Arguments:
       * year - the year to represent, from 1 to 9999
@@ -2462,7 +2460,7 @@ object MakeTimestampNTZExpressionBuilder extends ExpressionBuilder {
 
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(year, month, day, hour, min, sec[, timezone]) - Create the current timestamp with local time zone from year, month, day, hour, min, sec and timezone fields. ",
+  usage = "_FUNC_(year, month, day, hour, min, sec[, timezone]) - Create the current timestamp with local time zone from year, month, day, hour, min, sec and timezone fields. If the configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs. Otherwise, it will throw an error instead.",
   arguments = """
     Arguments:
       * year - the year to represent, from 1 to 9999
@@ -2483,8 +2481,6 @@ object MakeTimestampNTZExpressionBuilder extends ExpressionBuilder {
        2014-12-27 21:30:45.887
       > SELECT _FUNC_(2019, 6, 30, 23, 59, 60);
        2019-07-01 00:00:00
-      > SELECT _FUNC_(2019, 13, 1, 10, 11, 12, 'PST');
-       NULL
       > SELECT _FUNC_(null, 7, 22, 15, 30, 0);
        NULL
   """,
@@ -2512,8 +2508,7 @@ object MakeTimestampLTZExpressionBuilder extends ExpressionBuilder {
 
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(year, month, day, hour, min, sec[, timezone]) - Create timestamp from year, month, day, hour, min, sec and timezone fields. " +
-    "The result data type is consistent with the value of configuration `spark.sql.timestampType`",
+  usage = "_FUNC_(year, month, day, hour, min, sec[, timezone]) - Create timestamp from year, month, day, hour, min, sec and timezone fields. The result data type is consistent with the value of configuration `spark.sql.timestampType`. If the configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs. Otherwise, it will throw an error instead.",
   arguments = """
     Arguments:
       * year - the year to represent, from 1 to 9999
@@ -2537,8 +2532,6 @@ object MakeTimestampLTZExpressionBuilder extends ExpressionBuilder {
        2019-07-01 00:00:00
       > SELECT _FUNC_(2019, 6, 30, 23, 59, 1);
        2019-06-30 23:59:01
-      > SELECT _FUNC_(2019, 13, 1, 10, 11, 12, 'PST');
-       NULL
       > SELECT _FUNC_(null, 7, 22, 15, 30, 0);
        NULL
   """,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index c09d3e47e460a..a2fd668f495e0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -1134,7 +1134,7 @@ case class LessThanOrEqual(left: Expression, right: Expression)
     Examples:
       > SELECT 2 _FUNC_ 1;
        true
-      > SELECT 2 _FUNC_ '1.1';
+      > SELECT 2 _FUNC_ 1.1;
        true
       > SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52');
        false
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
index 9cef2553b365b..c67b05a5ca238 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
@@ -1153,7 +1153,8 @@ abstract class DynamicPartitionPruningSuiteBase
 
   test("join key with multiple references on the filtering plan") {
     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true",
-      SQLConf.ADAPTIVE_OPTIMIZER_EXCLUDED_RULES.key -> AQEPropagateEmptyRelation.ruleName
+      SQLConf.ADAPTIVE_OPTIMIZER_EXCLUDED_RULES.key -> AQEPropagateEmptyRelation.ruleName,
+      SQLConf.ANSI_ENABLED.key -> "false" // ANSI mode doesn't support "String + String"
     ) {
       // when enable AQE, the reusedExchange is inserted when executed.
       withTable("fact", "dim") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index d90c8732ea287..f4de713d04fc0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -282,7 +282,7 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
     withTable("bucketed_table") {
       val numBuckets = NumBucketsForPruningNullDf
       val bucketSpec = BucketSpec(numBuckets, Seq("j"), Nil)
-      val naNDF = nullDF.selectExpr("i", "cast(if(isnull(j), 'NaN', j) as double) as j", "k")
+      val naNDF = nullDF.selectExpr("i", "try_cast(if(isnull(j), 'NaN', j) as double) as j", "k")
       // json does not support predicate push-down, and thus json is used here
       naNDF.write
         .format("json")

From 6a79539f501ae1bef45feef6615ff60f8947bc54 Mon Sep 17 00:00:00 2001
From: Xinyi Yu <xinyi.yu@databricks.com>
Date: Fri, 25 Feb 2022 11:57:28 +0800
Subject: [PATCH 332/513] [SPARK-38298][SQL][TESTS] Fix DataExpressionSuite,
 NullExpressionsSuite, StringExpressionsSuite, complexTypesSuite, CastSuite
 under ANSI mode

### What changes were proposed in this pull request?
The PR fixes the following tests under ANSI mode:
* DataExpressionSuite,
* NullExpressionsSuite,
* StringExpressionsSuite,
* ComplexTypesSuite,
* CastSuite

Most of them should only work with ANSI off. The fix wrap the corresponding code with
```scala
withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
  // code only works under ANSI off
}
```

The `CastSuite` intentionally tests the case of ANSI off. The fix explicitly set ANSI off in `beforeAll` and reset it in `afterAll`.

### Why are the changes needed?
To set up a new GA job to run tests with ANSI mode before 3.3.0 release.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Test locally with both ANSI on and off, both passed.

Closes #35618 from anchovYu/ansi-tests-multiple.

Authored-by: Xinyi Yu <xinyi.yu@databricks.com>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../sql/catalyst/expressions/CastSuite.scala  |  9 ++++++++
 .../expressions/DateExpressionsSuite.scala    |  4 +++-
 .../expressions/NullExpressionsSuite.scala    | 21 ++++++++++++-------
 .../expressions/StringExpressionsSuite.scala  |  9 +++++---
 .../optimizer/complexTypesSuite.scala         |  8 +++++--
 5 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index ba36fa0314cb8..52e3cb0baf73d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -39,6 +39,15 @@ import org.apache.spark.unsafe.types.UTF8String
  *       in `CastSuiteBase` instead of this file to ensure the test coverage.
  */
 class CastSuite extends CastSuiteBase {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    SQLConf.get.setConf(SQLConf.ANSI_ENABLED, false)
+  }
+
+  override def afterAll(): Unit = {
+    super.afterAll()
+    SQLConf.get.unsetConf(SQLConf.ANSI_ENABLED)
+  }
 
   override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
     v match {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 84603ee23a8b6..0837cc6f131d4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -1462,7 +1462,9 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
           Literal("yyyy-MM-dd'T'HH:mm:ss.SSSz"), TimestampType),
         1580184371847000L)
     }
-    withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> "corrected") {
+    withSQLConf(
+      SQLConf.LEGACY_TIME_PARSER_POLICY.key -> "corrected",
+      SQLConf.ANSI_ENABLED.key -> "false") {
       checkEvaluation(
         GetTimestamp(
           Literal("2020-01-27T20:06:11.847-0800"),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
index 7abea96915d2f..da8e11c0433eb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 class NullExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
@@ -112,25 +113,31 @@ class NullExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     val timestampLit = Literal.create(Timestamp.valueOf("2017-04-12 00:00:00"), TimestampType)
     val decimalLit = Literal.create(BigDecimal.valueOf(10.2), DecimalType(20, 2))
 
-    assert(analyze(new Nvl(decimalLit, stringLit)).dataType == StringType)
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      assert(analyze(new Nvl(decimalLit, stringLit)).dataType == StringType)
+    }
     assert(analyze(new Nvl(doubleLit, decimalLit)).dataType == DoubleType)
     assert(analyze(new Nvl(decimalLit, doubleLit)).dataType == DoubleType)
     assert(analyze(new Nvl(decimalLit, floatLit)).dataType == DoubleType)
     assert(analyze(new Nvl(floatLit, decimalLit)).dataType == DoubleType)
 
-    assert(analyze(new Nvl(timestampLit, stringLit)).dataType == StringType)
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      assert(analyze(new Nvl(timestampLit, stringLit)).dataType == StringType)
+      assert(analyze(new Nvl(intLit, stringLit)).dataType == StringType)
+      assert(analyze(new Nvl(stringLit, doubleLit)).dataType == StringType)
+      assert(analyze(new Nvl(doubleLit, stringLit)).dataType == StringType)
+    }
     assert(analyze(new Nvl(intLit, doubleLit)).dataType == DoubleType)
-    assert(analyze(new Nvl(intLit, stringLit)).dataType == StringType)
-    assert(analyze(new Nvl(stringLit, doubleLit)).dataType == StringType)
-    assert(analyze(new Nvl(doubleLit, stringLit)).dataType == StringType)
 
     assert(analyze(new Nvl(nullLit, intLit)).dataType == IntegerType)
     assert(analyze(new Nvl(doubleLit, nullLit)).dataType == DoubleType)
     assert(analyze(new Nvl(nullLit, stringLit)).dataType == StringType)
 
-    assert(analyze(new Nvl(floatLit, stringLit)).dataType == StringType)
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      assert(analyze(new Nvl(floatLit, stringLit)).dataType == StringType)
+      assert(analyze(new Nvl(floatNullLit, intLit)).dataType == FloatType)
+    }
     assert(analyze(new Nvl(floatLit, doubleLit)).dataType == DoubleType)
-    assert(analyze(new Nvl(floatNullLit, intLit)).dataType == FloatType)
   }
 
   test("AtLeastNNonNulls") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index b54d0a6ef7e3b..b05142add0bab 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -120,9 +120,12 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     testElt(null, null, "hello", "world")
 
     // Invalid ranges
-    testElt(null, 3, "hello", "world")
-    testElt(null, 0, "hello", "world")
-    testElt(null, -1, "hello", "world")
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      // ANSI will throw SparkArrayIndexOutOfBoundsException with invalid index
+      testElt(null, 3, "hello", "world")
+      testElt(null, 0, "hello", "world")
+      testElt(null, -1, "hello", "world")
+    }
 
     // type checking
     assert(Elt(Seq.empty).checkInputDataTypes().isFailure)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
index 00a4212f661d9..11d1b30b4f8cc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.util.GenericArrayData
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 /**
@@ -441,9 +442,12 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
       MapType(BinaryType, StringType))
     val mb1 = Literal.create(Map[Array[Byte], String](), MapType(BinaryType, StringType))
 
-    checkEvaluation(GetMapValue(mb0, Literal(Array[Byte](1, 2, 3))), null)
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      // ANSI will throw exception
+      checkEvaluation(GetMapValue(mb0, Literal(Array[Byte](1, 2, 3))), null)
+      checkEvaluation(GetMapValue(mb1, Literal(Array[Byte](1, 2))), null)
+    }
 
-    checkEvaluation(GetMapValue(mb1, Literal(Array[Byte](1, 2))), null)
     checkEvaluation(GetMapValue(mb0, Literal(Array[Byte](2, 1), BinaryType)), "2")
     checkEvaluation(GetMapValue(mb0, Literal(Array[Byte](3, 4))), null)
   }

From 95f06f32cc29b82326cf1c8915494443ee7d853f Mon Sep 17 00:00:00 2001
From: Jiaan Geng <beliefer@163.com>
Date: Fri, 25 Feb 2022 12:39:41 +0800
Subject: [PATCH 333/513] [SPARK-37614][SQL] Support ANSI Aggregate Function:
 regr_avgx & regr_avgy

### What changes were proposed in this pull request?
`REGR_AVGX` and `REGR_AVGY` are ANSI aggregate functions.

`REGR_AVGX` returns the mean of the independent_variable_expression for all non-null data pairs of the dependent and independent variable arguments.
**Syntax**: `REGR_AVGX(dependent_variable_expression, independent_variable_expression)`
The equation for computing `REGR_AVGX` is: `REGR_AVGX = SUM(x)/n`

**Examples**:
```
> SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
  2.75
> SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
  3.0
> SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
  3.0
```

`REGR_AVGY` returns the mean of the dependent_variable_expression for all non-null data pairs of the dependent and independent variable arguments.
**Syntax**: `REGR_AVGY(dependent_variable_expression, independent_variable_expression)`
The equation for computing `REGR_AVGY ` is: `REGR_AVGY = SUM(y)/n`

**Examples**:
```
> SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
  1.75
> SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
  1.6666666666666667
> SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
  1.5
```

**dependent_variable_expression**: the dependent variable for the regression. A dependent variable is something that is measured in response to a treatment. The expression cannot contain any ordered analytical or aggregate functions.

**independent_variable_expression**: the independent variable for the regression. An independent variable is a treatment: something that is varied under your control to test the behavior of another variable.

The expression cannot contain any ordered analytical or aggregate functions.

The mainstream database supports `REGR_AVGX` and `REGR_AVGY` show below:
**Teradata**
https://docs.teradata.com/r/kmuOwjp1zEYg98JsB8fu_A/KkJgUSq2O6JRU3bCK~0cug
**Snowflake**
https://docs.snowflake.com/en/sql-reference/functions/regr_avgx.html
**Oracle**
https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/REGR_-Linear-Regression-Functions.html#GUID-A675B68F-2A88-4843-BE2C-FCDE9C65F9A9
**Vertica**
https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/SQLReferenceManual/Functions/Aggregate/REGR_AVGX.htm?tocpath=SQL%20Reference%20Manual%7CSQL%20Functions%7CAggregate%20Functions%7C_____24
**DB2**
https://www.ibm.com/docs/en/db2/11.5?topic=af-regression-functions-regr-avgx-regr-avgy-regr-count
**H2**
http://www.h2database.com/html/functions-aggregate.html#regr_avgx
**Postgresql**
https://www.postgresql.org/docs/8.4/functions-aggregate.html
**Sybase**
https://infocenter.sybase.com/help/index.jsp?topic=/com.sybase.help.sqlanywhere.12.0.0/dbreference/regr-avgx-function.html
**Exasol**
https://docs.exasol.com/sql_references/functions/alphabeticallistfunctions/regr_function.htm

### Why are the changes needed?
`REGR_AVGX` and `REGR_AVGY` are very useful.

### Does this PR introduce _any_ user-facing change?
'Yes'. New feature.

### How was this patch tested?
New tests.

Closes #34868 from beliefer/SPARK-37614.

Authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/analysis/FunctionRegistry.scala  |   2 +
 .../expressions/aggregate/RegrCount.scala     |  47 --------
 .../aggregate/linearRegression.scala          | 107 ++++++++++++++++++
 .../sql-functions/sql-expression-schema.md    |  14 ++-
 .../resources/sql-tests/inputs/group-by.sql   |   6 +
 .../inputs/postgreSQL/aggregates_part1.sql    |   2 +-
 .../udf/postgreSQL/udf-aggregates_part1.sql   |   2 +-
 .../sql-tests/results/group-by.sql.out        |  36 +++++-
 .../postgreSQL/aggregates_part1.sql.out       |  10 +-
 .../postgreSQL/udf-aggregates_part1.sql.out   |  10 +-
 10 files changed, 178 insertions(+), 58 deletions(-)
 delete mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/RegrCount.scala
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 6cf0fd11b5488..8fe53de894439 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -493,6 +493,8 @@ object FunctionRegistry {
     expression[BoolOr]("some", true),
     expression[BoolOr]("bool_or"),
     expression[RegrCount]("regr_count"),
+    expression[RegrAvgX]("regr_avgx"),
+    expression[RegrAvgY]("regr_avgy"),
 
     // string functions
     expression[Ascii]("ascii"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/RegrCount.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/RegrCount.scala
deleted file mode 100644
index 80df0128ccd7a..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/RegrCount.scala
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.expressions.aggregate
-
-import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription, ImplicitCastInputTypes, RuntimeReplaceableAggregate}
-import org.apache.spark.sql.catalyst.trees.BinaryLike
-import org.apache.spark.sql.types.{AbstractDataType, NumericType}
-
-@ExpressionDescription(
-  usage = """
-    _FUNC_(expr) - Returns the number of non-null number pairs in a group.
-  """,
-  examples = """
-    Examples:
-      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
-       4
-      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
-       3
-      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
-       2
-  """,
-  group = "agg_funcs",
-  since = "3.3.0")
-case class RegrCount(left: Expression, right: Expression)
-  extends RuntimeReplaceableAggregate with ImplicitCastInputTypes with BinaryLike[Expression] {
-  override lazy val replacement: Expression = Count(Seq(left, right))
-  override def nodeName: String = "regr_count"
-  override def inputTypes: Seq[AbstractDataType] = Seq(NumericType, NumericType)
-  override protected def withNewChildrenInternal(
-      newLeft: Expression, newRight: Expression): RegrCount =
-    this.copy(left = newLeft, right = newRight)
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala
new file mode 100644
index 0000000000000..1ad7cbeb2422a
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.aggregate
+
+import org.apache.spark.sql.catalyst.expressions.{And, Expression, ExpressionDescription, If, ImplicitCastInputTypes, IsNotNull, Literal, RuntimeReplaceableAggregate}
+import org.apache.spark.sql.catalyst.trees.BinaryLike
+import org.apache.spark.sql.types.{AbstractDataType, NumericType}
+
+@ExpressionDescription(
+  usage = """
+    _FUNC_(expr) - Returns the number of non-null number pairs in a group.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
+       4
+      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
+       3
+      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
+       2
+  """,
+  group = "agg_funcs",
+  since = "3.3.0")
+case class RegrCount(left: Expression, right: Expression)
+  extends RuntimeReplaceableAggregate with ImplicitCastInputTypes with BinaryLike[Expression] {
+  override lazy val replacement: Expression = Count(Seq(left, right))
+  override def nodeName: String = "regr_count"
+  override def inputTypes: Seq[AbstractDataType] = Seq(NumericType, NumericType)
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): RegrCount =
+    this.copy(left = newLeft, right = newRight)
+}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(y, x) - Returns the average of the independent variable for non-null pairs in a group, where `y` is the dependent variable and `x` is the independent variable.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
+       2.75
+      > SELECT _FUNC_(y, x) FROM VALUES (1, null) AS tab(y, x);
+       NULL
+      > SELECT _FUNC_(y, x) FROM VALUES (null, 1) AS tab(y, x);
+       NULL
+      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
+       3.0
+      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
+       3.0
+  """,
+  group = "agg_funcs",
+  since = "3.3.0")
+// scalastyle:on line.size.limit
+case class RegrAvgX(left: Expression, right: Expression)
+  extends RuntimeReplaceableAggregate with ImplicitCastInputTypes with BinaryLike[Expression] {
+  override lazy val replacement: Expression =
+    Average(If(And(IsNotNull(left), IsNotNull(right)), right, Literal.create(null, right.dataType)))
+  override def nodeName: String = "regr_avgx"
+  override def inputTypes: Seq[AbstractDataType] = Seq(NumericType, NumericType)
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): RegrAvgX =
+    this.copy(left = newLeft, right = newRight)
+}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(y, x) - Returns the average of the dependent variable for non-null pairs in a group, where `y` is the dependent variable and `x` is the independent variable.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
+       1.75
+      > SELECT _FUNC_(y, x) FROM VALUES (1, null) AS tab(y, x);
+       NULL
+      > SELECT _FUNC_(y, x) FROM VALUES (null, 1) AS tab(y, x);
+       NULL
+      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
+       1.6666666666666667
+      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
+       1.5
+  """,
+  group = "agg_funcs",
+  since = "3.3.0")
+// scalastyle:on line.size.limit
+case class RegrAvgY(left: Expression, right: Expression)
+  extends RuntimeReplaceableAggregate with ImplicitCastInputTypes with BinaryLike[Expression] {
+  override lazy val replacement: Expression =
+    Average(If(And(IsNotNull(left), IsNotNull(right)), left, Literal.create(null, left.dataType)))
+  override def nodeName: String = "regr_avgy"
+  override def inputTypes: Seq[AbstractDataType] = Seq(NumericType, NumericType)
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): RegrAvgY =
+    this.copy(left = newLeft, right = newRight)
+}
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index a817440fcfe9b..2a50fbcd3f7f7 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,6 +1,6 @@
 <!-- Automatically generated by ExpressionsSchemaSuite -->
 ## Summary
-  - Number of queries: 380
+  - Number of queries: 382
   - Number of expressions that missing example: 12
   - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint
 ## Schema of Built-in Functions
@@ -68,8 +68,8 @@
 | org.apache.spark.sql.catalyst.expressions.Cast | timestamp | N/A | N/A |
 | org.apache.spark.sql.catalyst.expressions.Cast | tinyint | N/A | N/A |
 | org.apache.spark.sql.catalyst.expressions.Cbrt | cbrt | SELECT cbrt(27.0) | struct<CBRT(27.0):double> |
-| org.apache.spark.sql.catalyst.expressions.Ceil | ceil | SELECT ceil(-0.1) | struct<CEIL(-0.1):decimal(1,0)> |
-| org.apache.spark.sql.catalyst.expressions.Ceil | ceiling | SELECT ceiling(-0.1) | struct<ceiling(-0.1):decimal(1,0)> |
+| org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder$ | ceil | SELECT ceil(-0.1) | struct<CEIL(-0.1):decimal(1,0)> |
+| org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder$ | ceiling | SELECT ceiling(-0.1) | struct<ceiling(-0.1):decimal(1,0)> |
 | org.apache.spark.sql.catalyst.expressions.Chr | char | SELECT char(65) | struct<char(65):string> |
 | org.apache.spark.sql.catalyst.expressions.Chr | chr | SELECT chr(65) | struct<chr(65):string> |
 | org.apache.spark.sql.catalyst.expressions.Coalesce | coalesce | SELECT coalesce(NULL, 1, NULL) | struct<coalesce(NULL, 1, NULL):int> |
@@ -120,11 +120,11 @@
 | org.apache.spark.sql.catalyst.expressions.Explode | explode | SELECT explode(array(10, 20)) | struct<col:int> |
 | org.apache.spark.sql.catalyst.expressions.Explode | explode_outer | SELECT explode_outer(array(10, 20)) | struct<col:int> |
 | org.apache.spark.sql.catalyst.expressions.Expm1 | expm1 | SELECT expm1(0) | struct<EXPM1(0):double> |
-| org.apache.spark.sql.catalyst.expressions.ExtractExpressionBuilder$ | extract | SELECT extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456') | struct<extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
+| org.apache.spark.sql.catalyst.expressions.Extract | extract | SELECT extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456') | struct<extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
 | org.apache.spark.sql.catalyst.expressions.Factorial | factorial | SELECT factorial(5) | struct<factorial(5):bigint> |
 | org.apache.spark.sql.catalyst.expressions.FindInSet | find_in_set | SELECT find_in_set('ab','abc,b,ab,c,def') | struct<find_in_set(ab, abc,b,ab,c,def):int> |
 | org.apache.spark.sql.catalyst.expressions.Flatten | flatten | SELECT flatten(array(array(1, 2), array(3, 4))) | struct<flatten(array(array(1, 2), array(3, 4))):array<int>> |
-| org.apache.spark.sql.catalyst.expressions.Floor | floor | SELECT floor(-0.1) | struct<FLOOR(-0.1):decimal(1,0)> |
+| org.apache.spark.sql.catalyst.expressions.FloorExpressionBuilder$ | floor | SELECT floor(-0.1) | struct<FLOOR(-0.1):decimal(1,0)> |
 | org.apache.spark.sql.catalyst.expressions.FormatNumber | format_number | SELECT format_number(12332.123456, 4) | struct<format_number(12332.123456, 4):string> |
 | org.apache.spark.sql.catalyst.expressions.FormatString | format_string | SELECT format_string("Hello World %d %s", 100, "days") | struct<format_string(Hello World %d %s, 100, days):string> |
 | org.apache.spark.sql.catalyst.expressions.FormatString | printf | SELECT printf("Hello World %d %s", 100, "days") | struct<printf(Hello World %d %s, 100, days):string> |
@@ -217,7 +217,7 @@
 | org.apache.spark.sql.catalyst.expressions.Or | or | SELECT true or false | struct<(true OR false):boolean> |
 | org.apache.spark.sql.catalyst.expressions.Overlay | overlay | SELECT overlay('Spark SQL' PLACING '_' FROM 6) | struct<overlay(Spark SQL, _, 6, -1):string> |
 | org.apache.spark.sql.catalyst.expressions.ParseToDate | to_date | SELECT to_date('2009-07-30 04:17:52') | struct<to_date(2009-07-30 04:17:52):date> |
-| org.apache.spark.sql.catalyst.expressions.ParseToTimestampExpressionBuilder$ | to_timestamp | SELECT to_timestamp('2016-12-31 00:12:00') | struct<to_timestamp(2016-12-31 00:12:00):timestamp> |
+| org.apache.spark.sql.catalyst.expressions.ParseToTimestamp | to_timestamp | SELECT to_timestamp('2016-12-31 00:12:00') | struct<to_timestamp(2016-12-31 00:12:00):timestamp> |
 | org.apache.spark.sql.catalyst.expressions.ParseToTimestampLTZExpressionBuilder$ | to_timestamp_ltz | SELECT to_timestamp_ltz('2016-12-31 00:12:00') | struct<to_timestamp_ltz(2016-12-31 00:12:00):timestamp> |
 | org.apache.spark.sql.catalyst.expressions.ParseToTimestampNTZExpressionBuilder$ | to_timestamp_ntz | SELECT to_timestamp_ntz('2016-12-31 00:12:00') | struct<to_timestamp_ntz(2016-12-31 00:12:00):timestamp_ntz> |
 | org.apache.spark.sql.catalyst.expressions.ParseUrl | parse_url | SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') | struct<parse_url(http://spark.apache.org/path?query=1, HOST):string> |
@@ -367,6 +367,8 @@
 | org.apache.spark.sql.catalyst.expressions.aggregate.Min | min | SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col) | struct<min(col):int> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.MinBy | min_by | SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct<min_by(x, y):string> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Percentile | percentile | SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) | struct<percentile(col, 0.3, 1):double> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.RegrAvgX | regr_avgx | SELECT regr_avgx(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) | struct<regr_avgx(y, x):double> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.RegrAvgY | regr_avgy | SELECT regr_avgy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) | struct<regr_avgy(y, x):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.RegrCount | regr_count | SELECT regr_count(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) | struct<regr_count(y, x):bigint> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Skewness | skewness | SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) | struct<skewness(col):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop | stddev_pop | SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<stddev_pop(col):double> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index cb82bfa310122..75933f86b2ab3 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -231,6 +231,12 @@ FROM VALUES
   (1,4),(2,3),(1,4),(2,4) AS v(a,b)
 GROUP BY a;
 
+-- SPARK-37614: Support ANSI Aggregate Function: regr_avgx & regr_avgy
+SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression;
+SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL;
+SELECT k, avg(x), avg(y), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k;
+SELECT k, avg(x) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), avg(y) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k;
+
 -- SPARK-37676: Support ANSI Aggregation Function: percentile_cont
 SELECT
  percentile_cont(0.25) WITHIN GROUP (ORDER BY v),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql
index 4cc24c00435cc..d08037268c95c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql
@@ -84,7 +84,7 @@ SELECT regr_count(b, a) FROM aggtest;
 -- SELECT regr_sxx(b, a) FROM aggtest;
 -- SELECT regr_syy(b, a) FROM aggtest;
 -- SELECT regr_sxy(b, a) FROM aggtest;
--- SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest;
+SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest;
 -- SELECT regr_r2(b, a) FROM aggtest;
 -- SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
 SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part1.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part1.sql
index 2b00815bba2e3..6e2ffae48a6ca 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part1.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part1.sql
@@ -84,7 +84,7 @@ SELECT regr_count(b, a) FROM aggtest;
 -- SELECT regr_sxx(b, a) FROM aggtest;
 -- SELECT regr_syy(b, a) FROM aggtest;
 -- SELECT regr_sxy(b, a) FROM aggtest;
--- SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest;
+SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest;
 -- SELECT regr_r2(b, a) FROM aggtest;
 -- SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
 SELECT udf(covar_pop(b, udf(a))), covar_samp(udf(b), a) FROM aggtest;
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 400d6c91ba702..6d5ea7d87ed8f 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 76
+-- Number of queries: 80
 
 
 -- !query
@@ -774,6 +774,40 @@ struct<a:int,collect_list(b):array<int>,collect_list(b):array<int>>
 2	[3,4]	[3,4]
 
 
+-- !query
+SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression
+-- !query schema
+struct<regr_avgx(y, x):double,regr_avgy(y, x):double>
+-- !query output
+22.666666666666668	20.0
+
+
+-- !query
+SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL
+-- !query schema
+struct<regr_avgx(y, x):double,regr_avgy(y, x):double>
+-- !query output
+22.666666666666668	20.0
+
+
+-- !query
+SELECT k, avg(x), avg(y), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct<k:int,avg(x):double,avg(y):double,regr_avgx(y, x):double,regr_avgy(y, x):double>
+-- !query output
+1	NULL	10.0	NULL	NULL
+2	22.666666666666668	21.25	22.666666666666668	20.0
+
+
+-- !query
+SELECT k, avg(x) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), avg(y) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct<k:int,avg(x) FILTER (WHERE ((x IS NOT NULL) AND (y IS NOT NULL))):double,avg(y) FILTER (WHERE ((x IS NOT NULL) AND (y IS NOT NULL))):double,regr_avgx(y, x):double,regr_avgy(y, x):double>
+-- !query output
+1	NULL	NULL	NULL	NULL
+2	22.666666666666668	20.0	22.666666666666668	20.0
+
+
 -- !query
 SELECT
  percentile_cont(0.25) WITHIN GROUP (ORDER BY v),
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out
index 91f8185ff9055..f2c20bced6e1f 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 45
+-- Number of queries: 46
 
 
 -- !query
@@ -296,6 +296,14 @@ struct<regr_count(b, a):bigint>
 4
 
 
+-- !query
+SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest
+-- !query schema
+struct<regr_avgx(b, a):double,regr_avgy(b, a):double>
+-- !query output
+49.5	107.94315227307379
+
+
 -- !query
 SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out
index b75bd58d93c9f..09cf6ee218969 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 44
+-- Number of queries: 45
 
 
 -- !query
@@ -287,6 +287,14 @@ struct<regr_count(b, a):bigint>
 4
 
 
+-- !query
+SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest
+-- !query schema
+struct<regr_avgx(b, a):double,regr_avgy(b, a):double>
+-- !query output
+49.5	107.94315227307379
+
+
 -- !query
 SELECT udf(covar_pop(b, udf(a))), covar_samp(udf(b), a) FROM aggtest
 -- !query schema

From 2dc0527fb6462b6849d3c53c6d83392a8e37cdcc Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Fri, 25 Feb 2022 14:59:10 +0800
Subject: [PATCH 334/513] [SPARK-38322][SQL] Support query stage show runtime
 statistics in formatted explain mode

### What changes were proposed in this pull request?

Add query stage statistics information in formatted explain mode.

### Why are the changes needed?

The formatted explalin mode is the powerful explain mode to show the details of query plan. In AQE, the query stage know its statistics if has already materialized. So it can help to quick check the conversion of plan, e.g. join selection.

A simple example:
```sql
SELECT * FROM t JOIN t2 ON t.c = t2.c;
```
```sql
== Physical Plan ==
AdaptiveSparkPlan (21)
+- == Final Plan ==
   * SortMergeJoin Inner (13)
   :- * Sort (6)
   :  +- AQEShuffleRead (5)
   :     +- ShuffleQueryStage (4), Statistics(sizeInBytes=16.0 B, rowCount=1)
   :        +- Exchange (3)
   :           +- * Filter (2)
   :              +- Scan hive default.t (1)
   +- * Sort (12)
      +- AQEShuffleRead (11)
         +- ShuffleQueryStage (10), Statistics(sizeInBytes=16.0 B, rowCount=1)
            +- Exchange (9)
               +- * Filter (8)
                  +- Scan hive default.t2 (7)
+- == Initial Plan ==
   SortMergeJoin Inner (20)
   :- Sort (16)
   :  +- Exchange (15)
   :     +- Filter (14)
   :        +- Scan hive default.t (1)
   +- Sort (19)
      +- Exchange (18)
         +- Filter (17)
            +- Scan hive default.t2 (7)
```

### Does this PR introduce _any_ user-facing change?

no, only change the output of explain in AQE

### How was this patch tested?

Add test

Closes #35658 from ulysses-you/exchange-statistics.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/adaptive/QueryStageExec.scala  |  4 ++++
 .../org/apache/spark/sql/ExplainSuite.scala      | 16 ++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
index e2f763eb71502..ac1968dab6998 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
@@ -124,6 +124,10 @@ abstract class QueryStageExec extends LeafExecNode {
 
   protected override def stringArgs: Iterator[Any] = Iterator.single(id)
 
+  override def simpleStringWithNodeId(): String = {
+    super.simpleStringWithNodeId() + computeStats().map(", " + _.toString).getOrElse("")
+  }
+
   override def generateTreeString(
       depth: Int,
       lastChildren: Seq[Boolean],
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index 67240c5525f34..a5403ec548d7e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -735,6 +735,22 @@ class ExplainSuiteAE extends ExplainSuiteHelper with EnableAdaptiveExecutionSuit
       }
     }
   }
+
+  test("SPARK-38322: Support query stage show runtime statistics in formatted explain mode") {
+    val df = Seq(1, 2).toDF("c").distinct()
+    val statistics = "Statistics(sizeInBytes=32.0 B, rowCount=2)"
+
+    checkKeywordsNotExistsInExplain(
+      df,
+      FormattedMode,
+      statistics)
+
+    df.collect()
+    checkKeywordsExistsInExplain(
+      df,
+      FormattedMode,
+      statistics)
+  }
 }
 
 case class ExplainSingleData(id: Int)

From e56f8651f2b7e8890cac791db6c01b96946b27bf Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Fri, 25 Feb 2022 15:07:06 +0800
Subject: [PATCH 335/513] [SPARK-38316][SQL][TESTS] Fix
 SQLViewSuite/TriggerAvailableNowSuite/Unwrap*Suite under ANSI mode

### What changes were proposed in this pull request?

Fix the following test suites under ANSI mode:

- TriggerAvailableNowSuite
- GlobalTempViewTestSuite
- LocalTempViewTestSuite
- PersistedViewTestSuite
- SimpleSQLViewSuite
- UnwrapCastInComparisonEndToEndSuite
- UnwrapCastInBinaryComparisonSuite

### Why are the changes needed?

To set up a new GA test job with ANSI mode on

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manually turn on ANSI mode and test .
Also it should pass GA tests.

Closes #35652 from gengliangwang/fix.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../UnwrapCastInBinaryComparisonSuite.scala   |  4 ++-
 .../UnwrapCastInComparisonEndToEndSuite.scala | 27 ++++++++++---------
 .../spark/sql/execution/SQLViewSuite.scala    |  8 ++++--
 .../sql/execution/SQLViewTestSuite.scala      | 12 +++++----
 .../streaming/TriggerAvailableNowSuite.scala  |  2 +-
 5 files changed, 32 insertions(+), 21 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala
index 8da6d373eb3bc..a51be57db6fa7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala
@@ -212,7 +212,9 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp
   }
 
   test("unwrap cast should skip if cannot coerce type") {
-    assertEquivalent(Cast(f, ByteType) > 100.toByte, Cast(f, ByteType) > 100.toByte)
+    if (!conf.ansiEnabled) {
+      assertEquivalent(Cast(f, ByteType) > 100.toByte, Cast(f, ByteType) > 100.toByte)
+    }
   }
 
   test("test getRange()") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala
index 26ec6eeb6c2dc..2c361299b173d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala
@@ -191,18 +191,21 @@ class UnwrapCastInComparisonEndToEndSuite extends QueryTest with SharedSparkSess
   }
 
   test("SPARK-36607: Support BooleanType in UnwrapCastInBinaryComparison") {
-    withTable(t) {
-      Seq(Some(true), Some(false), None).toDF().write.saveAsTable(t)
-      val df = spark.table(t)
-
-      checkAnswer(df.where("value = -1"), Seq.empty)
-      checkAnswer(df.where("value = 0"), Row(false))
-      checkAnswer(df.where("value = 1"), Row(true))
-      checkAnswer(df.where("value = 2"), Seq.empty)
-      checkAnswer(df.where("value <=> -1"), Seq.empty)
-      checkAnswer(df.where("value <=> 0"), Row(false))
-      checkAnswer(df.where("value <=> 1"), Row(true))
-      checkAnswer(df.where("value <=> 2"), Seq.empty)
+    // If ANSI mode is on, Spark disallows comparing Int with Boolean.
+    if (!conf.ansiEnabled) {
+      withTable(t) {
+        Seq(Some(true), Some(false), None).toDF().write.saveAsTable(t)
+        val df = spark.table(t)
+
+        checkAnswer(df.where("value = -1"), Seq.empty)
+        checkAnswer(df.where("value = 0"), Row(false))
+        checkAnswer(df.where("value = 1"), Row(true))
+        checkAnswer(df.where("value = 2"), Seq.empty)
+        checkAnswer(df.where("value <=> -1"), Seq.empty)
+        checkAnswer(df.where("value <=> 0"), Row(false))
+        checkAnswer(df.where("value <=> 1"), Row(true))
+        checkAnswer(df.where("value <=> 2"), Seq.empty)
+      }
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index 1861d9cf045a1..ee6d3525b6f1a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -765,7 +765,9 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
     withTable("t") {
       Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
       withTempView("v1") {
-        sql("CREATE TEMPORARY VIEW v1 AS SELECT 1/0")
+        withSQLConf(ANSI_ENABLED.key -> "false") {
+          sql("CREATE TEMPORARY VIEW v1 AS SELECT 1/0")
+        }
         withSQLConf(
           USE_CURRENT_SQL_CONFIGS_FOR_VIEW.key -> "true",
           ANSI_ENABLED.key -> "true") {
@@ -838,7 +840,9 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         sql("CREATE VIEW v2 (c1) AS SELECT c1 FROM t ORDER BY 1 ASC, c1 DESC")
         sql("CREATE VIEW v3 (c1, count) AS SELECT c1, count(c1) AS cnt FROM t GROUP BY 1")
         sql("CREATE VIEW v4 (a, count) AS SELECT c1 as a, count(c1) AS cnt FROM t GROUP BY a")
-        sql("CREATE VIEW v5 (c1) AS SELECT 1/0 AS invalid")
+        withSQLConf(ANSI_ENABLED.key -> "false") {
+          sql("CREATE VIEW v5 (c1) AS SELECT 1/0 AS invalid")
+        }
 
         withSQLConf(CASE_SENSITIVE.key -> "true") {
           checkAnswer(sql("SELECT * FROM v1"), Seq(Row(2), Row(3), Row(1)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
index 94855f2c42143..7e773fa1ac565 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -119,11 +119,13 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
   test("change SQLConf should not change view behavior - ansiEnabled") {
     withTable("t") {
       Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
-      val viewName = createView("v1", "SELECT 1/0 AS invalid", Seq("c1"))
-      withView(viewName) {
-        Seq("true", "false").foreach { flag =>
-          withSQLConf(ANSI_ENABLED.key -> flag) {
-            checkViewOutput(viewName, Seq(Row(null)))
+      withSQLConf(ANSI_ENABLED.key -> "false") {
+        val viewName = createView("v1", "SELECT 1/0 AS invalid", Seq("c1"))
+        withView(viewName) {
+          Seq("true", "false").foreach { flag =>
+            withSQLConf(ANSI_ENABLED.key -> flag) {
+              checkViewOutput(viewName, Seq(Row(null)))
+            }
           }
         }
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TriggerAvailableNowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TriggerAvailableNowSuite.scala
index 0c7348b91c0a7..cb4410d9da92d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TriggerAvailableNowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TriggerAvailableNowSuite.scala
@@ -128,7 +128,7 @@ class TriggerAvailableNowSuite extends FileStreamSourceTest {
           .option("maxFilesPerTrigger", 1)
           .text(src.getCanonicalPath)
 
-        val df2 = testSource.toDF
+        val df2 = testSource.toDF.selectExpr("cast(value as string)")
 
         def startQuery(): StreamingQuery = {
           df1.union(df2).writeStream

From 29eca8c87f4e8c19c0380f7c30668fd88edee573 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Fri, 25 Feb 2022 17:11:15 +0800
Subject: [PATCH 336/513] [SPARK-38325][SQL] ANSI mode: avoid potential runtime
 error in HashJoin.extractKeyExprAt()

### What changes were proposed in this pull request?

SubqueryBroadcastExec retrieves the partition key from the broadcast results based on the type of HashedRelation returned. If the key is packed inside a Long, we extract it through bitwise operations and cast it as Byte/Short/Int if necessary.

The casting here can cause a potential runtime error. This PR is to fix it.

### Why are the changes needed?

Bug fix
### Does this PR introduce _any_ user-facing change?

Yes, avoid potential runtime error in dynamic pruning under ANSI mode

### How was this patch tested?

UT

Closes #35659 from gengliangwang/fixHashJoin.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../spark/sql/execution/joins/HashJoin.scala  | 27 ++++++++++++++-----
 .../execution/joins/HashedRelationSuite.scala | 22 +++++++++------
 2 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
index 0e8bb84ee5d81..4595ea049ef70 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
@@ -705,6 +705,13 @@ trait HashJoin extends JoinCodegenSupport {
 }
 
 object HashJoin extends CastSupport with SQLConfHelper {
+
+  private def canRewriteAsLongType(keys: Seq[Expression]): Boolean = {
+    // TODO: support BooleanType, DateType and TimestampType
+    keys.forall(_.dataType.isInstanceOf[IntegralType]) &&
+      keys.map(_.dataType.defaultSize).sum <= 8
+  }
+
   /**
    * Try to rewrite the key as LongType so we can use getLong(), if they key can fit with a long.
    *
@@ -712,9 +719,7 @@ object HashJoin extends CastSupport with SQLConfHelper {
    */
   def rewriteKeyExpr(keys: Seq[Expression]): Seq[Expression] = {
     assert(keys.nonEmpty)
-    // TODO: support BooleanType, DateType and TimestampType
-    if (keys.exists(!_.dataType.isInstanceOf[IntegralType])
-      || keys.map(_.dataType.defaultSize).sum > 8) {
+    if (!canRewriteAsLongType(keys)) {
       return keys
     }
 
@@ -736,18 +741,28 @@ object HashJoin extends CastSupport with SQLConfHelper {
    * determine the number of bits to shift
    */
   def extractKeyExprAt(keys: Seq[Expression], index: Int): Expression = {
+    assert(canRewriteAsLongType(keys))
     // jump over keys that have a higher index value than the required key
     if (keys.size == 1) {
       assert(index == 0)
-      cast(BoundReference(0, LongType, nullable = false), keys(index).dataType)
+      Cast(
+        child = BoundReference(0, LongType, nullable = false),
+        dataType = keys(index).dataType,
+        timeZoneId = Option(conf.sessionLocalTimeZone),
+        ansiEnabled = false)
     } else {
       val shiftedBits =
         keys.slice(index + 1, keys.size).map(_.dataType.defaultSize * 8).sum
       val mask = (1L << (keys(index).dataType.defaultSize * 8)) - 1
       // build the schema for unpacking the required key
-      cast(BitwiseAnd(
+      val castChild = BitwiseAnd(
         ShiftRightUnsigned(BoundReference(0, LongType, nullable = false), Literal(shiftedBits)),
-        Literal(mask)), keys(index).dataType)
+        Literal(mask))
+      Cast(
+        child = castChild,
+        dataType = keys(index).dataType,
+        timeZoneId = Option(conf.sessionLocalTimeZone),
+        ansiEnabled = false)
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
index b8ffc47d6ec3c..d5b7ed6c275f4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
@@ -30,6 +30,7 @@ import org.apache.spark.memory.{TaskMemoryManager, UnifiedMemoryManager}
 import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.map.BytesToBytesMap
@@ -610,14 +611,19 @@ class HashedRelationSuite extends SharedSparkSession {
     val keys = Seq(BoundReference(0, ByteType, false),
       BoundReference(1, IntegerType, false),
       BoundReference(2, ShortType, false))
-    val packed = HashJoin.rewriteKeyExpr(keys)
-    val unsafeProj = UnsafeProjection.create(packed)
-    val packedKeys = unsafeProj(row)
-
-    Seq((0, ByteType), (1, IntegerType), (2, ShortType)).foreach { case (i, dt) =>
-      val key = HashJoin.extractKeyExprAt(keys, i)
-      val proj = UnsafeProjection.create(key)
-      assert(proj(packedKeys).get(0, dt) == -i - 1)
+    // Rewrite and exacting key expressions should not cause exception when ANSI mode is on.
+    Seq("false", "true").foreach { ansiEnabled =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled) {
+        val packed = HashJoin.rewriteKeyExpr(keys)
+        val unsafeProj = UnsafeProjection.create(packed)
+        val packedKeys = unsafeProj(row)
+
+        Seq((0, ByteType), (1, IntegerType), (2, ShortType)).foreach { case (i, dt) =>
+          val key = HashJoin.extractKeyExprAt(keys, i)
+          val proj = UnsafeProjection.create(key)
+          assert(proj(packedKeys).get(0, dt) == -i - 1)
+        }
+      }
     }
   }
 

From 64e1f28f1626247cc1361dcb395288227454ca8f Mon Sep 17 00:00:00 2001
From: Sean Owen <srowen@gmail.com>
Date: Fri, 25 Feb 2022 08:34:04 -0600
Subject: [PATCH 337/513] [SPARK-38305][CORE] Explicitly check if source exists
 in unpack() before calling FileUtil methods

### What changes were proposed in this pull request?

Explicitly check existence of source file in Utils.unpack before calling Hadoop FileUtil methods

### Why are the changes needed?

A discussion from the Hadoop community raised a potential issue in calling these methods when a file doesn't exist.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing tests

Closes #35632 from srowen/SPARK-38305.

Authored-by: Sean Owen <srowen@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 core/src/main/scala/org/apache/spark/util/Utils.scala | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index a9d6180d2fd7b..17bec9f666aef 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -593,6 +593,9 @@ private[spark] object Utils extends Logging {
    * basically copied from `org.apache.hadoop.yarn.util.FSDownload.unpack`.
    */
   def unpack(source: File, dest: File): Unit = {
+    if (!source.exists()) {
+      throw new FileNotFoundException(source.getAbsolutePath)
+    }
     val lowerSrc = StringUtils.toLowerCase(source.getName)
     if (lowerSrc.endsWith(".jar")) {
       RunJar.unJar(source, dest, RunJar.MATCH_ANY)

From daa5f9df4a1c8b3cf5db7142e54b765272c1f24c Mon Sep 17 00:00:00 2001
From: Alfonso <alfonso_men@yahoo.com>
Date: Fri, 25 Feb 2022 08:38:51 -0600
Subject: [PATCH 338/513] [MINOR][DOCS] Fix missing field in query

### What changes were proposed in this pull request?

This PR fixes sql query in doc, let the query confrom to the query result in the following

### Why are the changes needed?

Just a fix to doc

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

use project test

Closes #35624 from redsnow1992/patch-1.

Authored-by: Alfonso <alfonso_men@yahoo.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 docs/sql-ref-syntax-qry-select-window.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/sql-ref-syntax-qry-select-window.md b/docs/sql-ref-syntax-qry-select-window.md
index 6e65778559385..9fbebcf407933 100644
--- a/docs/sql-ref-syntax-qry-select-window.md
+++ b/docs/sql-ref-syntax-qry-select-window.md
@@ -109,7 +109,7 @@ SELECT * FROM employees;
 | Alex|      Sales| 30000|   33|
 +-----+-----------+------+-----+
 
-SELECT name, dept, RANK() OVER (PARTITION BY dept ORDER BY salary) AS rank FROM employees;
+SELECT name, dept, salary, RANK() OVER (PARTITION BY dept ORDER BY salary) AS rank FROM employees;
 +-----+-----------+------+----+
 | name|       dept|salary|rank|
 +-----+-----------+------+----+
@@ -125,7 +125,7 @@ SELECT name, dept, RANK() OVER (PARTITION BY dept ORDER BY salary) AS rank FROM
 | Jeff|  Marketing| 35000|   3|
 +-----+-----------+------+----+
 
-SELECT name, dept, DENSE_RANK() OVER (PARTITION BY dept ORDER BY salary ROWS BETWEEN
+SELECT name, dept, salary, DENSE_RANK() OVER (PARTITION BY dept ORDER BY salary ROWS BETWEEN
     UNBOUNDED PRECEDING AND CURRENT ROW) AS dense_rank FROM employees;
 +-----+-----------+------+----------+
 | name|       dept|salary|dense_rank|

From b204710fdba026aeae12efef398ade55efc1b113 Mon Sep 17 00:00:00 2001
From: Adam Binford <adamq43@gmail.com>
Date: Fri, 25 Feb 2022 08:44:14 -0600
Subject: [PATCH 339/513] [MINOR] Add git ignores for vscode and metals

### What changes were proposed in this pull request?

Add new git ignore entries for a Visual Studio Code with Metals Scala plugin setup.

### Why are the changes needed?

To not have to constantly stash and pop my gitignore changes when changing branches.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

It ignores all the VSCode/Metals files I have locally.

Closes #35610 from Kimahriman/vscode-metals-gitignores.

Authored-by: Adam Binford <adamq43@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .gitignore | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.gitignore b/.gitignore
index 560265e4f4cf1..b75878189a975 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,8 @@
 *~
 .java-version
 .DS_Store
+.ammonite
+.bloop
 .bsp/
 .cache
 .classpath
@@ -21,10 +23,12 @@
 # SPARK-35223: Add IssueNavigationLink to make IDEA support hyperlink on JIRA Ticket and GitHub PR on Git plugin.
 !.idea/vcs.xml
 .idea_modules/
+.metals
 .project
 .pydevproject
 .scala_dependencies
 .settings
+.vscode
 /lib/
 R-unit-tests.log
 R/unit-tests.out
@@ -59,6 +63,7 @@ lint-r-report.log
 lint-js-report.log
 log/
 logs/
+metals.sbt
 out/
 project/boot/
 project/build/target/

From dc153f525c8c895b9ceac8dfb3516b601c86a462 Mon Sep 17 00:00:00 2001
From: Cheng Su <chengsu@fb.com>
Date: Fri, 25 Feb 2022 14:48:27 -0800
Subject: [PATCH 340/513] [SPARK-38237][SQL][SS] Allow `ClusteredDistribution`
 to require full clustering keys

### What changes were proposed in this pull request?

This PR is to allow`ClusteredDistribution` (such as operator with window, aggregate, etc) to require full clustering keys. Traditionally operator with `ClusteredDistribution` can be satisfied with `HashPartitioning` on subset of clustering keys. This behavior could potentially lead to data skewness (comments raised from https://github.com/apache/spark/pull/35552). Although we have various way to deal with the data skewness in this case, such as adding `repartition()`, disabling bucketing, adding custom AQE rule etc. There's still case we cannot handle e.g. data skewness in the same stage - (`join(t1.x = t2.x)` followed by `window(t1.x, t1.y)`). With the newly introduced config `spark.sql.requireAllClusterKeysForDistribution`.

### Why are the changes needed?

Allow users to work around data skewness issue when partitioned on subset of keys.

### Does this PR introduce _any_ user-facing change?

Yes, the added config, but disable by default.

### How was this patch tested?

Added unit test in `DataFrameWindowFunctionsSuite.scala` and `DistributionSuite.scala`

Closes #35574 from c21/exact-partition.

Authored-by: Cheng Su <chengsu@fb.com>
Signed-off-by: Chao Sun <sunchao@apple.com>
---
 .../plans/physical/partitioning.scala         | 44 ++++++++++++++---
 .../apache/spark/sql/internal/SQLConf.scala   | 12 +++++
 .../sql/catalyst/DistributionSuite.scala      | 42 ++++++++++++++--
 .../sql/execution/adaptive/AQEUtils.scala     |  2 +-
 .../FlatMapGroupsWithStateExec.scala          |  6 ++-
 .../execution/streaming/StreamExecution.scala |  5 ++
 .../streaming/statefulOperators.scala         | 15 ++++--
 .../sql/DataFrameWindowFunctionsSuite.scala   | 49 ++++++++++++++++++-
 .../spark/sql/execution/PlannerSuite.scala    |  2 +-
 9 files changed, 156 insertions(+), 21 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
index 040f1bfab65b7..78d153c5a0e83 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
@@ -72,9 +72,14 @@ case object AllTuples extends Distribution {
 /**
  * Represents data where tuples that share the same values for the `clustering`
  * [[Expression Expressions]] will be co-located in the same partition.
+ *
+ * @param requireAllClusterKeys When true, `Partitioning` which satisfies this distribution,
+ *                              must match all `clustering` expressions in the same ordering.
  */
 case class ClusteredDistribution(
     clustering: Seq[Expression],
+    requireAllClusterKeys: Boolean = SQLConf.get.getConf(
+      SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_DISTRIBUTION),
     requiredNumPartitions: Option[Int] = None) extends Distribution {
   require(
     clustering != Nil,
@@ -88,6 +93,19 @@ case class ClusteredDistribution(
         s"the actual number of partitions is $numPartitions.")
     HashPartitioning(clustering, numPartitions)
   }
+
+  /**
+   * Checks if `expressions` match all `clustering` expressions in the same ordering.
+   *
+   * `Partitioning` should call this to check its expressions when `requireAllClusterKeys`
+   * is set to true.
+   */
+  def areAllClusterKeysMatched(expressions: Seq[Expression]): Boolean = {
+    expressions.length == clustering.length &&
+      expressions.zip(clustering).forall {
+        case (l, r) => l.semanticEquals(r)
+      }
+  }
 }
 
 /**
@@ -261,8 +279,14 @@ case class HashPartitioning(expressions: Seq[Expression], numPartitions: Int)
           expressions.length == h.expressions.length && expressions.zip(h.expressions).forall {
             case (l, r) => l.semanticEquals(r)
           }
-        case ClusteredDistribution(requiredClustering, _) =>
-          expressions.forall(x => requiredClustering.exists(_.semanticEquals(x)))
+        case c @ ClusteredDistribution(requiredClustering, requireAllClusterKeys, _) =>
+          if (requireAllClusterKeys) {
+            // Checks `HashPartitioning` is partitioned on exactly same clustering keys of
+            // `ClusteredDistribution`.
+            c.areAllClusterKeysMatched(expressions)
+          } else {
+            expressions.forall(x => requiredClustering.exists(_.semanticEquals(x)))
+          }
         case _ => false
       }
     }
@@ -322,8 +346,15 @@ case class RangePartitioning(ordering: Seq[SortOrder], numPartitions: Int)
           //   `RangePartitioning(a, b, c)` satisfies `OrderedDistribution(a, b)`.
           val minSize = Seq(requiredOrdering.size, ordering.size).min
           requiredOrdering.take(minSize) == ordering.take(minSize)
-        case ClusteredDistribution(requiredClustering, _) =>
-          ordering.map(_.child).forall(x => requiredClustering.exists(_.semanticEquals(x)))
+        case c @ ClusteredDistribution(requiredClustering, requireAllClusterKeys, _) =>
+          val expressions = ordering.map(_.child)
+          if (requireAllClusterKeys) {
+            // Checks `RangePartitioning` is partitioned on exactly same clustering keys of
+            // `ClusteredDistribution`.
+            c.areAllClusterKeysMatched(expressions)
+          } else {
+            expressions.forall(x => requiredClustering.exists(_.semanticEquals(x)))
+          }
         case _ => false
       }
     }
@@ -524,10 +555,7 @@ case class HashShuffleSpec(
     // will add shuffles with the default partitioning of `ClusteredDistribution`, which uses all
     // the join keys.
     if (SQLConf.get.getConf(SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION)) {
-      partitioning.expressions.length == distribution.clustering.length &&
-        partitioning.expressions.zip(distribution.clustering).forall {
-          case (l, r) => l.semanticEquals(r)
-        }
+      distribution.areAllClusterKeysMatched(partitioning.expressions)
     } else {
       true
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 3a7ce650ea633..a050156518c2c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -407,6 +407,18 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val REQUIRE_ALL_CLUSTER_KEYS_FOR_DISTRIBUTION =
+    buildConf("spark.sql.requireAllClusterKeysForDistribution")
+      .internal()
+      .doc("When true, the planner requires all the clustering keys as the partition keys " +
+        "(with same ordering) of the children, to eliminate the shuffle for the operator that " +
+        "requires its children be clustered distributed, such as AGGREGATE and WINDOW node. " +
+        "This is to avoid data skews which can lead to significant performance regression if " +
+        "shuffle is eliminated.")
+      .version("3.3.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val RADIX_SORT_ENABLED = buildConf("spark.sql.sort.enableRadixSort")
     .internal()
     .doc("When true, enable use of radix sort when possible. Radix sort is much faster but " +
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala
index e047d4c070bec..a924a9ed02e5d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala
@@ -169,6 +169,24 @@ class DistributionSuite extends SparkFunSuite {
       ClusteredDistribution(Seq($"d", $"e")),
       false)
 
+    // When ClusteredDistribution.requireAllClusterKeys is set to true,
+    // HashPartitioning can only satisfy ClusteredDistribution iff its hash expressions are
+    // exactly same as the required clustering expressions.
+    checkSatisfied(
+      HashPartitioning(Seq($"a", $"b", $"c"), 10),
+      ClusteredDistribution(Seq($"a", $"b", $"c"), requireAllClusterKeys = true),
+      true)
+
+    checkSatisfied(
+      HashPartitioning(Seq($"b", $"c"), 10),
+      ClusteredDistribution(Seq($"a", $"b", $"c"), requireAllClusterKeys = true),
+      false)
+
+    checkSatisfied(
+      HashPartitioning(Seq($"b", $"a", $"c"), 10),
+      ClusteredDistribution(Seq($"a", $"b", $"c"), requireAllClusterKeys = true),
+      false)
+
     // HashPartitioning cannot satisfy OrderedDistribution
     checkSatisfied(
       HashPartitioning(Seq($"a", $"b", $"c"), 10),
@@ -249,22 +267,40 @@ class DistributionSuite extends SparkFunSuite {
       RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
       ClusteredDistribution(Seq($"c", $"d")),
       false)
+
+    // When ClusteredDistribution.requireAllClusterKeys is set to true,
+    // RangePartitioning can only satisfy ClusteredDistribution iff its ordering expressions are
+    // exactly same as the required clustering expressions.
+    checkSatisfied(
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      ClusteredDistribution(Seq($"a", $"b", $"c"), requireAllClusterKeys = true),
+      true)
+
+    checkSatisfied(
+      RangePartitioning(Seq($"a".asc, $"b".asc), 10),
+      ClusteredDistribution(Seq($"a", $"b", $"c"), requireAllClusterKeys = true),
+      false)
+
+    checkSatisfied(
+      RangePartitioning(Seq($"b".asc, $"a".asc, $"c".asc), 10),
+      ClusteredDistribution(Seq($"a", $"b", $"c"), requireAllClusterKeys = true),
+      false)
   }
 
   test("Partitioning.numPartitions must match Distribution.requiredNumPartitions to satisfy it") {
     checkSatisfied(
       SinglePartition,
-      ClusteredDistribution(Seq($"a", $"b", $"c"), Some(10)),
+      ClusteredDistribution(Seq($"a", $"b", $"c"), requiredNumPartitions = Some(10)),
       false)
 
     checkSatisfied(
       HashPartitioning(Seq($"a", $"b", $"c"), 10),
-      ClusteredDistribution(Seq($"a", $"b", $"c"), Some(5)),
+      ClusteredDistribution(Seq($"a", $"b", $"c"), requiredNumPartitions = Some(5)),
       false)
 
     checkSatisfied(
       RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
-      ClusteredDistribution(Seq($"a", $"b", $"c"), Some(5)),
+      ClusteredDistribution(Seq($"a", $"b", $"c"), requiredNumPartitions = Some(5)),
       false)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEUtils.scala
index cbd4ee698df28..51833012a128e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEUtils.scala
@@ -37,7 +37,7 @@ object AQEUtils {
       } else {
         None
       }
-      Some(ClusteredDistribution(h.expressions, numPartitions))
+      Some(ClusteredDistribution(h.expressions, requiredNumPartitions = numPartitions))
     case f: FilterExec => getRequiredDistribution(f.child)
     case s: SortExec if !s.global => getRequiredDistribution(s.child)
     case c: CollectMetricsExec => getRequiredDistribution(c.child)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
index 93ed5916bfb2c..dfcb707376663 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
@@ -96,8 +96,10 @@ case class FlatMapGroupsWithStateExec(
     // NOTE: Please read through the NOTE on the classdoc of StatefulOpClusteredDistribution
     // before making any changes.
     // TODO(SPARK-38204)
-    ClusteredDistribution(groupingAttributes, stateInfo.map(_.numPartitions)) ::
-    ClusteredDistribution(initialStateGroupAttrs, stateInfo.map(_.numPartitions)) ::
+    ClusteredDistribution(
+        groupingAttributes, requiredNumPartitions = stateInfo.map(_.numPartitions)) ::
+      ClusteredDistribution(
+        initialStateGroupAttrs, requiredNumPartitions = stateInfo.map(_.numPartitions)) ::
       Nil
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index bbc6fa05d514b..f9ae65cdc47d5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -287,6 +287,11 @@ abstract class StreamExecution(
         // Disable cost-based join optimization as we do not want stateful operations
         // to be rearranged
         sparkSessionForStream.conf.set(SQLConf.CBO_ENABLED.key, "false")
+        // Disable any config affecting the required child distribution of stateful operators.
+        // Please read through the NOTE on the classdoc of StatefulOpClusteredDistribution for
+        // details.
+        sparkSessionForStream.conf.set(SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_DISTRIBUTION.key,
+          "false")
 
         updateStatusMessage("Initializing sources")
         // force initialization of the logical plan so that the sources can be created
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
index 3ab2ad47e98c4..45c6430f96423 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
@@ -340,7 +340,8 @@ case class StateStoreRestoreExec(
     if (keyExpressions.isEmpty) {
       AllTuples :: Nil
     } else {
-      ClusteredDistribution(keyExpressions, stateInfo.map(_.numPartitions)) :: Nil
+      ClusteredDistribution(keyExpressions,
+        requiredNumPartitions = stateInfo.map(_.numPartitions)) :: Nil
     }
   }
 
@@ -502,7 +503,8 @@ case class StateStoreSaveExec(
     if (keyExpressions.isEmpty) {
       AllTuples :: Nil
     } else {
-      ClusteredDistribution(keyExpressions, stateInfo.map(_.numPartitions)) :: Nil
+      ClusteredDistribution(keyExpressions,
+        requiredNumPartitions = stateInfo.map(_.numPartitions)) :: Nil
     }
   }
 
@@ -582,7 +584,8 @@ case class SessionWindowStateStoreRestoreExec(
     // NOTE: Please read through the NOTE on the classdoc of StatefulOpClusteredDistribution
     // before making any changes.
     // TODO(SPARK-38204)
-    ClusteredDistribution(keyWithoutSessionExpressions, stateInfo.map(_.numPartitions)) :: Nil
+    ClusteredDistribution(keyWithoutSessionExpressions,
+      requiredNumPartitions = stateInfo.map(_.numPartitions)) :: Nil
   }
 
   override def requiredChildOrdering: Seq[Seq[SortOrder]] = {
@@ -696,7 +699,8 @@ case class SessionWindowStateStoreSaveExec(
     // NOTE: Please read through the NOTE on the classdoc of StatefulOpClusteredDistribution
     // before making any changes.
     // TODO(SPARK-38204)
-    ClusteredDistribution(keyExpressions, stateInfo.map(_.numPartitions)) :: Nil
+    ClusteredDistribution(keyExpressions,
+      requiredNumPartitions = stateInfo.map(_.numPartitions)) :: Nil
   }
 
   override def shouldRunAnotherBatch(newMetadata: OffsetSeqMetadata): Boolean = {
@@ -757,7 +761,8 @@ case class StreamingDeduplicateExec(
     // NOTE: Please read through the NOTE on the classdoc of StatefulOpClusteredDistribution
     // before making any changes.
     // TODO(SPARK-38204)
-    ClusteredDistribution(keyExpressions, stateInfo.map(_.numPartitions)) :: Nil
+    ClusteredDistribution(keyExpressions,
+      requiredNumPartitions = stateInfo.map(_.numPartitions)) :: Nil
   }
 
   override protected def doExecute(): RDD[InternalRow] = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index 1491c5a4f26b1..3cf61c3402bc8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -20,9 +20,12 @@ package org.apache.spark.sql
 import org.scalatest.matchers.must.Matchers.the
 
 import org.apache.spark.TestUtils.{assertNotSpilled, assertSpilled}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
 import org.apache.spark.sql.catalyst.optimizer.TransposeWindow
+import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
-import org.apache.spark.sql.execution.exchange.Exchange
+import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, Exchange, ShuffleExchangeExec}
+import org.apache.spark.sql.execution.window.WindowExec
 import org.apache.spark.sql.expressions.{Aggregator, MutableAggregationBuffer, UserDefinedAggregateFunction, Window}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -1071,4 +1074,48 @@ class DataFrameWindowFunctionsSuite extends QueryTest
         Row("a", 1, "x", "x"),
         Row("b", 0, null, null)))
   }
+
+  test("SPARK-38237: require all cluster keys for child required distribution for window query") {
+    def partitionExpressionsColumns(expressions: Seq[Expression]): Seq[String] = {
+      expressions.flatMap {
+        case ref: AttributeReference => Some(ref.name)
+      }
+    }
+
+    def isShuffleExecByRequirement(
+        plan: ShuffleExchangeExec,
+        desiredClusterColumns: Seq[String]): Boolean = plan match {
+      case ShuffleExchangeExec(op: HashPartitioning, _, ENSURE_REQUIREMENTS) =>
+        partitionExpressionsColumns(op.expressions) === desiredClusterColumns
+      case _ => false
+    }
+
+    val df = Seq(("a", 1, 1), ("a", 2, 2), ("b", 1, 3), ("b", 1, 4)).toDF("key1", "key2", "value")
+    val windowSpec = Window.partitionBy("key1", "key2").orderBy("value")
+
+    withSQLConf(
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
+      SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_DISTRIBUTION.key -> "true") {
+
+      val windowed = df
+        // repartition by subset of window partitionBy keys which satisfies ClusteredDistribution
+        .repartition($"key1")
+        .select(
+          lead($"key1", 1).over(windowSpec),
+          lead($"value", 1).over(windowSpec))
+
+      checkAnswer(windowed, Seq(Row("b", 4), Row(null, null), Row(null, null), Row(null, null)))
+
+      val shuffleByRequirement = windowed.queryExecution.executedPlan.find {
+        case w: WindowExec =>
+          w.child.find {
+            case s: ShuffleExchangeExec => isShuffleExecByRequirement(s, Seq("key1", "key2"))
+            case _ => false
+          }.nonEmpty
+        case _ => false
+      }
+
+      assert(shuffleByRequirement.nonEmpty, "Can't find desired shuffle node from the query plan")
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 383b84dc0d8f1..2ab1b6d4963a5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -432,7 +432,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("EnsureRequirements should respect ClusteredDistribution's num partitioning") {
-    val distribution = ClusteredDistribution(Literal(1) :: Nil, Some(13))
+    val distribution = ClusteredDistribution(Literal(1) :: Nil, requiredNumPartitions = Some(13))
     // Number of partitions differ
     val finalPartitioning = HashPartitioning(Literal(1) :: Nil, 13)
     val childPartitioning = HashPartitioning(Literal(1) :: Nil, 5)

From 9eab255ecc1b2a854fc5f054969a3aa8263604a2 Mon Sep 17 00:00:00 2001
From: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
Date: Sat, 26 Feb 2022 09:16:47 -0600
Subject: [PATCH 341/513] [SPARK-38242][CORE] Sort the SparkSubmit debug output

### What changes were proposed in this pull request?

Sort the debug info printed by SparkSubmit when `-verbose` is enabled.

### Why are the changes needed?

This way it is easier to find settings/properties and their values.

### Does this PR introduce _any_ user-facing change?

Yes. The log output changes - some data is sorted.

### How was this patch tested?

Old unit tests still pass & manually check the sorted output.
```
 Main class:
org.apache.spark.deploy.k8s.submit.KubernetesClientApplication
Arguments:
--main-class
--primary-java-resource
local:///opt/spark/examples/jars/spark-examples_2.13-3.3.0-SNAPSHOT.jar
org.apache.spark.examples.SparkPi
Spark config:
(spark.app.name,spark-on-k8s-app)
(spark.app.submitTime,1645106476125)
(spark.driver.cores,1)
(spark.driver.extraJavaOptions,-Dio.netty.tryReflectionSetAccessible=true)
(spark.driver.memory,2048m)
(spark.dynamicAllocation.enabled,true)
(spark.dynamicAllocation.shuffleTracking.enabled,true)
(spark.executor.cores,2)
(spark.executor.extraJavaOptions,-Dio.netty.tryReflectionSetAccessible=true)
(spark.executor.instances,3)
(spark.executor.memory,2048m)
(spark.jars,local:///opt/spark/examples/jars/spark-examples_2.13-3.3.0-SNAPSHOT.jar)
(spark.kubernetes.allocation.batch.delay,1)
(spark.kubernetes.allocation.batch.size,3)
(spark.kubernetes.authenticate.driver.serviceAccountName,spark-account-name)
(spark.kubernetes.driver.container.image,spark/spark:3.3.0-SNAPSHOT-scala_2.13-11-jre-slim)
(spark.kubernetes.executor.container.image,spark/spark:3.3.0-SNAPSHOT-scala_2.13-11-jre-slim)
(spark.kubernetes.namespace,spark-on-k8s)
(spark.master,k8s://https://192.168.49.2:8443)
(spark.network.timeout,300)
(spark.submit.deployMode,cluster)
(spark.submit.pyFiles,)
Classpath elements:
```

Closes #35556 from martin-g/sort-debug-info-SparkSubmit.

Lead-authored-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala   | 2 +-
 .../scala/org/apache/spark/deploy/SparkSubmitArguments.scala    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index d3c5f0eaf0341..bf972b9dd9be6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -908,7 +908,7 @@ private[spark] class SparkSubmit extends Logging {
       logInfo(s"Main class:\n$childMainClass")
       logInfo(s"Arguments:\n${childArgs.mkString("\n")}")
       // sysProps may contain sensitive information, so redact before printing
-      logInfo(s"Spark config:\n${Utils.redact(sparkConf.getAll.toMap).mkString("\n")}")
+      logInfo(s"Spark config:\n${Utils.redact(sparkConf.getAll.toMap).sorted.mkString("\n")}")
       logInfo(s"Classpath elements:\n${childClasspath.mkString("\n")}")
       logInfo("\n")
     }
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 47fbab52659a4..9a5123f218a63 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -327,7 +327,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     |
     |Spark properties used, including those specified through
     | --conf and those from the properties file $propertiesFile:
-    |${Utils.redact(sparkProperties).mkString("  ", "\n  ", "\n")}
+    |${Utils.redact(sparkProperties).sorted.mkString("  ", "\n  ", "\n")}
     """.stripMargin
   }
 

From 3aa0cd4cd3ffbfa68e26d5d3128bda3cd4c2bc7d Mon Sep 17 00:00:00 2001
From: "Qian.Sun" <qian.sun2020@gmail.com>
Date: Sat, 26 Feb 2022 15:58:11 -0800
Subject: [PATCH 342/513] [SPARK-38302][K8S][TESTS] Use `Java 17` in K8S IT in
 case of `spark-tgz` option

### What changes were proposed in this pull request?

This PR aims to use Java 17 in K8s integration tests by default when setting spark-tgz.

### Why are the changes needed?

When setting parameters `spark-tgz` during integration tests, the error that `resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17` cannot be found occurs.

This is due to the default value of `spark.kubernetes.test.dockerFile` is a [relative path](https://github.com/apache/spark/blob/master/resource-managers/kubernetes/integration-tests/pom.xml#L46).

When using the tgz, the working directory is [`$UNPACKED_SPARK_TGZ`](https://github.com/apache/spark/blob/master/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh#L90), and the relative path is invalid.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Runing k8s integration test manaully:
#### sbt
```shell
$ build/sbt -Pkubernetes -Pkubernetes-integration-tests -Dtest.exclude.tags=minikube,r "kubernetes-integration-tests/test"

KubernetesSuite:
- Run SparkPi with no resources
- Run SparkPi with no resources & statefulset allocation
- Run SparkPi with a very long application name.
- Use SparkLauncher.NO_RESOURCE
- Run SparkPi with a master URL without a scheme.
- Run SparkPi with an argument.
- Run SparkPi with custom labels, annotations, and environment variables.
- All pods have the same service account by default
- Run extraJVMOptions check on driver
- Run SparkRemoteFileTest using a remote data file
- Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j2.properties
- Run SparkPi with env and mount secrets.
- Run PySpark on simple pi.py example
- Run PySpark to test a pyfiles example
- Run PySpark with memory customization
- Run in client mode.
- Start pod creation from template
- PVs with local hostpath storage on statefulsets
- PVs with local hostpath and storageClass on statefulsets
- PVs with local storage
- Launcher client dependencies
- SPARK-33615: Launcher client archives
- SPARK-33748: Launcher python client respecting PYSPARK_PYTHON
- SPARK-33748: Launcher python client respecting spark.pyspark.python and spark.pyspark.driver.python
- Launcher python client dependencies using a zip file
- Test basic decommissioning
- Test basic decommissioning with shuffle cleanup
- Test decommissioning with dynamic allocation & shuffle cleanups
- Test decommissioning timeouts
- SPARK-37576: Rolling decommissioning
Run completed in 27 minutes, 8 seconds.
Total number of tests run: 30
Suites: completed 2, aborted 0
Tests: succeeded 30, failed 0, canceled 0, ignored 0, pending 0
All tests passed.
```
#### maven with spark-tgz
```shell
$ bash resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh --spark-tgz $TARBALL_TO_TEST --exclude-tags r

KubernetesSuite:
- Run SparkPi with no resources
- Run SparkPi with no resources & statefulset allocation
- Run SparkPi with a very long application name.
- Use SparkLauncher.NO_RESOURCE
- Run SparkPi with a master URL without a scheme.
- Run SparkPi with an argument.
- Run SparkPi with custom labels, annotations, and environment variables.
- All pods have the same service account by default
- Run extraJVMOptions check on driver
- Run SparkRemoteFileTest using a remote data file
- Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j2.properties
- Run SparkPi with env and mount secrets.
- Run PySpark on simple pi.py example
- Run PySpark to test a pyfiles example
- Run PySpark with memory customization
- Run in client mode.
- Start pod creation from template
- PVs with local hostpath storage on statefulsets
- PVs with local hostpath and storageClass on statefulsets
- PVs with local storage
- Launcher client dependencies
- SPARK-33615: Launcher client archives
- SPARK-33748: Launcher python client respecting PYSPARK_PYTHON
- SPARK-33748: Launcher python client respecting spark.pyspark.python and spark.pyspark.driver.python
- Launcher python client dependencies using a zip file
- Test basic decommissioning
- Test basic decommissioning with shuffle cleanup
- Test decommissioning with dynamic allocation & shuffle cleanups
- Test decommissioning timeouts
- SPARK-37576: Rolling decommissioning
Run completed in 30 minutes, 6 seconds.
Total number of tests run: 30
Suites: completed 2, aborted 0
Tests: succeeded 30, failed 0, canceled 0, ignored 0, pending 0
All tests passed.
```
#### maven without spark-tgz
```shell
$ bash resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh --exclude-tags r

KubernetesSuite:
- Run SparkPi with no resources
- Run SparkPi with no resources & statefulset allocation
- Run SparkPi with a very long application name.
- Use SparkLauncher.NO_RESOURCE
- Run SparkPi with a master URL without a scheme.
- Run SparkPi with an argument.
- Run SparkPi with custom labels, annotations, and environment variables.
- All pods have the same service account by default
- Run extraJVMOptions check on driver
- Run SparkRemoteFileTest using a remote data file
- Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j2.properties
- Run SparkPi with env and mount secrets.
- Run PySpark on simple pi.py example
- Run PySpark to test a pyfiles example
- Run PySpark with memory customization
- Run in client mode.
- Start pod creation from template
- PVs with local hostpath storage on statefulsets
- PVs with local hostpath and storageClass on statefulsets
- PVs with local storage
- Launcher client dependencies
- SPARK-33615: Launcher client archives
- SPARK-33748: Launcher python client respecting PYSPARK_PYTHON
- SPARK-33748: Launcher python client respecting spark.pyspark.python and spark.pyspark.driver.python
- Launcher python client dependencies using a zip file
- Test basic decommissioning
- Test basic decommissioning with shuffle cleanup
- Test decommissioning with dynamic allocation & shuffle cleanups
- Test decommissioning timeouts
- SPARK-37576: Rolling decommissioning
Run completed in 35 minutes, 0 seconds.
Total number of tests run: 30
Suites: completed 2, aborted 0
Tests: succeeded 30, failed 0, canceled 0, ignored 0, pending 0
All tests passed.
```

Closes #35627 from dcoliversun/SPARK-38302.

Authored-by: Qian.Sun <qian.sun2020@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 project/SparkBuild.scala                                    | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml      | 2 +-
 .../integration-tests/scripts/setup-integration-test-env.sh | 6 +++++-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index e9ef514c4e331..0f06e6bcb0897 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -645,7 +645,7 @@ object KubernetesIntegrationTests {
         val bindingsDir = s"$sparkHome/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings"
         val javaImageTag = sys.props.get("spark.kubernetes.test.javaImageTag")
         val dockerFile = sys.props.getOrElse("spark.kubernetes.test.dockerFile",
-            "resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17")
+            s"$sparkHome/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17")
         val extraOptions = if (javaImageTag.isDefined) {
           Seq("-b", s"java_image_tag=$javaImageTag")
         } else {
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 0bc8508cbf86c..318a903c14215 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -43,7 +43,7 @@
     <spark.kubernetes.test.master></spark.kubernetes.test.master>
     <spark.kubernetes.test.namespace></spark.kubernetes.test.namespace>
     <spark.kubernetes.test.serviceAccountName></spark.kubernetes.test.serviceAccountName>
-    <spark.kubernetes.test.dockerFile>resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17</spark.kubernetes.test.dockerFile>
+    <spark.kubernetes.test.dockerFile>Dockerfile.java17</spark.kubernetes.test.dockerFile>
 
     <test.exclude.tags></test.exclude.tags>
     <test.include.tags></test.include.tags>
diff --git a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
index e4a92b60c981d..d8960349f0080 100755
--- a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
+++ b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
@@ -106,7 +106,11 @@ then
     # OpenJDK base-image tag (e.g. 8-jre-slim, 11-jre-slim)
     JAVA_IMAGE_TAG_BUILD_ARG="-b java_image_tag=$JAVA_IMAGE_TAG"
   else
-    JAVA_IMAGE_TAG_BUILD_ARG="-f $DOCKER_FILE"
+    if [[ $DOCKER_FILE = /* ]]; then
+      JAVA_IMAGE_TAG_BUILD_ARG="-f $DOCKER_FILE"
+    else
+      JAVA_IMAGE_TAG_BUILD_ARG="-f $DOCKER_FILE_BASE_PATH/$DOCKER_FILE"
+    fi
   fi
 
   # Build PySpark image

From 89464bf92978accf7a74bb325a62349944c2e672 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Sat, 26 Feb 2022 21:55:46 -0800
Subject: [PATCH 343/513] [SPARK-36488][SQL][FOLLOWUP] Simplify the
 implementation of ResolveReferences#extractStar

### What changes were proposed in this pull request?
SPARK-36488 defines the `ResolveReferences#extractStar` and it use `map + flatten` code pattern, this or simplifies it to `flatMap` and reduces one collection conversion while maintaining the same semantics.

### Why are the changes needed?
Code simplification

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35508 from LuciferYang/SPARK-36488-FOLLOWUP.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: huaxingao <huaxin_gao@apple.com>
---
 .../scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 5cb5f21e9f710..1cb35fdf53616 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1593,7 +1593,7 @@ class Analyzer(override val catalogManager: CatalogManager)
       exprs.exists(_.collect { case _: Star => true }.nonEmpty)
 
     private def extractStar(exprs: Seq[Expression]): Seq[Star] =
-      exprs.map(_.collect { case s: Star => s }).flatten
+      exprs.flatMap(_.collect { case s: Star => s })
 
     /**
      * Expands the matching attribute.*'s in `child`'s output.

From cfd66cfc1f955e218b3f53fd56ca423578f39638 Mon Sep 17 00:00:00 2001
From: dch nguyen <dchvn.dgd@gmail.com>
Date: Mon, 28 Feb 2022 09:24:59 +0900
Subject: [PATCH 344/513] [MINOR][PYTHON] Remove unnecessary quotes in pyspark

### What changes were proposed in this pull request?
Remove unnecessary quotes in pyspark

### Why are the changes needed?
 To make the code clean

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing tests

Closes #35664 from dchvn/remove_unused_quote.

Authored-by: dch nguyen <dchvn.dgd@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/context.py                  | 32 ++++++++++------------
 python/pyspark/mllib/stat/KernelDensity.py |  2 +-
 python/pyspark/sql/context.py              |  2 +-
 python/pyspark/sql/dataframe.py            |  2 +-
 python/pyspark/sql/readwriter.py           |  2 +-
 python/pyspark/sql/session.py              | 12 ++++----
 6 files changed, 25 insertions(+), 27 deletions(-)

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 2f1746b0a4346..3beebb08cde7d 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -575,7 +575,7 @@ def stop(self) -> None:
         with SparkContext._lock:
             SparkContext._active_spark_context = None  # type: ignore[assignment]
 
-    def emptyRDD(self) -> "RDD[Any]":
+    def emptyRDD(self) -> RDD[Any]:
         """
         Create an RDD that has no partitions or elements.
         """
@@ -583,7 +583,7 @@ def emptyRDD(self) -> "RDD[Any]":
 
     def range(
         self, start: int, end: Optional[int] = None, step: int = 1, numSlices: Optional[int] = None
-    ) -> "RDD[int]":
+    ) -> RDD[int]:
         """
         Create a new RDD of int containing elements from `start` to `end`
         (exclusive), increased by `step` every element. Can be called the same
@@ -621,7 +621,7 @@ def range(
 
         return self.parallelize(range(start, end, step), numSlices)
 
-    def parallelize(self, c: Iterable[T], numSlices: Optional[int] = None) -> "RDD[T]":
+    def parallelize(self, c: Iterable[T], numSlices: Optional[int] = None) -> RDD[T]:
         """
         Distribute a local Python collection to form an RDD. Using range
         is recommended if the input represents a range for performance.
@@ -725,7 +725,7 @@ def _serialize_to_jvm(
                 # we eagerly reads the file so we can delete right after.
                 os.unlink(tempFile.name)
 
-    def pickleFile(self, name: str, minPartitions: Optional[int] = None) -> "RDD[Any]":
+    def pickleFile(self, name: str, minPartitions: Optional[int] = None) -> RDD[Any]:
         """
         Load an RDD previously saved using :meth:`RDD.saveAsPickleFile` method.
 
@@ -742,7 +742,7 @@ def pickleFile(self, name: str, minPartitions: Optional[int] = None) -> "RDD[Any
 
     def textFile(
         self, name: str, minPartitions: Optional[int] = None, use_unicode: bool = True
-    ) -> "RDD[str]":
+    ) -> RDD[str]:
         """
         Read a text file from HDFS, a local file system (available on all
         nodes), or any Hadoop-supported file system URI, and return it as an
@@ -767,7 +767,7 @@ def textFile(
 
     def wholeTextFiles(
         self, path: str, minPartitions: Optional[int] = None, use_unicode: bool = True
-    ) -> "RDD[Tuple[str, str]]":
+    ) -> RDD[Tuple[str, str]]:
         """
         Read a directory of text files from HDFS, a local file system
         (available on all nodes), or any  Hadoop-supported file system
@@ -822,9 +822,7 @@ def wholeTextFiles(
             PairDeserializer(UTF8Deserializer(use_unicode), UTF8Deserializer(use_unicode)),
         )
 
-    def binaryFiles(
-        self, path: str, minPartitions: Optional[int] = None
-    ) -> "RDD[Tuple[str, bytes]]":
+    def binaryFiles(self, path: str, minPartitions: Optional[int] = None) -> RDD[Tuple[str, bytes]]:
         """
         Read a directory of binary files from HDFS, a local file system
         (available on all nodes), or any Hadoop-supported file system URI
@@ -843,7 +841,7 @@ def binaryFiles(
             PairDeserializer(UTF8Deserializer(), NoOpSerializer()),
         )
 
-    def binaryRecords(self, path: str, recordLength: int) -> "RDD[bytes]":
+    def binaryRecords(self, path: str, recordLength: int) -> RDD[bytes]:
         """
         Load data from a flat binary file, assuming each record is a set of numbers
         with the specified numerical format (see ByteBuffer), and the number of
@@ -876,7 +874,7 @@ def sequenceFile(
         valueConverter: Optional[str] = None,
         minSplits: Optional[int] = None,
         batchSize: int = 0,
-    ) -> "RDD[Tuple[T, U]]":
+    ) -> RDD[Tuple[T, U]]:
         """
         Read a Hadoop SequenceFile with arbitrary key and value Writable class from HDFS,
         a local file system (available on all nodes), or any Hadoop-supported file system URI.
@@ -931,7 +929,7 @@ def newAPIHadoopFile(
         valueConverter: Optional[str] = None,
         conf: Optional[Dict[str, str]] = None,
         batchSize: int = 0,
-    ) -> "RDD[Tuple[T, U]]":
+    ) -> RDD[Tuple[T, U]]:
         """
         Read a 'new API' Hadoop InputFormat with arbitrary key and value class from HDFS,
         a local file system (available on all nodes), or any Hadoop-supported file system URI.
@@ -990,7 +988,7 @@ def newAPIHadoopRDD(
         valueConverter: Optional[str] = None,
         conf: Optional[Dict[str, str]] = None,
         batchSize: int = 0,
-    ) -> "RDD[Tuple[T, U]]":
+    ) -> RDD[Tuple[T, U]]:
         """
         Read a 'new API' Hadoop InputFormat with arbitrary key and value class, from an arbitrary
         Hadoop configuration, which is passed in as a Python dict.
@@ -1043,7 +1041,7 @@ def hadoopFile(
         valueConverter: Optional[str] = None,
         conf: Optional[Dict[str, str]] = None,
         batchSize: int = 0,
-    ) -> "RDD[Tuple[T, U]]":
+    ) -> RDD[Tuple[T, U]]:
         """
         Read an 'old' Hadoop InputFormat with arbitrary key and value class from HDFS,
         a local file system (available on all nodes), or any Hadoop-supported file system URI.
@@ -1098,7 +1096,7 @@ def hadoopRDD(
         valueConverter: Optional[str] = None,
         conf: Optional[Dict[str, str]] = None,
         batchSize: int = 0,
-    ) -> "RDD[Tuple[T, U]]":
+    ) -> RDD[Tuple[T, U]]:
         """
         Read an 'old' Hadoop InputFormat with arbitrary key and value class, from an arbitrary
         Hadoop configuration, which is passed in as a Python dict.
@@ -1145,7 +1143,7 @@ def _checkpointFile(self, name: str, input_deserializer: PairDeserializer) -> RD
         jrdd = self._jsc.checkpointFile(name)
         return RDD(jrdd, self, input_deserializer)
 
-    def union(self, rdds: List["RDD[T]"]) -> "RDD[T]":
+    def union(self, rdds: List[RDD[T]]) -> RDD[T]:
         """
         Build the union of a list of RDDs.
 
@@ -1464,7 +1462,7 @@ def statusTracker(self) -> StatusTracker:
 
     def runJob(
         self,
-        rdd: "RDD[T]",
+        rdd: RDD[T],
         partitionFunc: Callable[[Iterable[T]], Iterable[U]],
         partitions: Optional[Sequence[int]] = None,
         allowLocal: bool = False,
diff --git a/python/pyspark/mllib/stat/KernelDensity.py b/python/pyspark/mllib/stat/KernelDensity.py
index 103c955df9bae..febf4fd53fd2f 100644
--- a/python/pyspark/mllib/stat/KernelDensity.py
+++ b/python/pyspark/mllib/stat/KernelDensity.py
@@ -46,7 +46,7 @@ def setBandwidth(self, bandwidth: float) -> None:
         """Set bandwidth of each sample. Defaults to 1.0"""
         self._bandwidth = bandwidth
 
-    def setSample(self, sample: "RDD[float]") -> None:
+    def setSample(self, sample: RDD[float]) -> None:
         """Set sample points from the population. Should be a RDD"""
         if not isinstance(sample, RDD):
             raise TypeError("samples should be a RDD, received %s" % type(sample))
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index c6eb6c326fd28..18816d3fd2414 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -364,7 +364,7 @@ def createDataFrame(
 
     def createDataFrame(  # type: ignore[misc]
         self,
-        data: Union["RDD[Any]", Iterable[Any], "PandasDataFrameLike"],
+        data: Union[RDD[Any], Iterable[Any], "PandasDataFrameLike"],
         schema: Optional[Union[AtomicType, StructType, str]] = None,
         samplingRatio: Optional[float] = None,
         verifySchema: bool = True,
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 76c407624ac7d..37e184b23d26c 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -190,7 +190,7 @@ def stat(self) -> "DataFrameStatFunctions":
         """Returns a :class:`DataFrameStatFunctions` for statistic functions."""
         return DataFrameStatFunctions(self)
 
-    def toJSON(self, use_unicode: bool = True) -> "RDD[str]":
+    def toJSON(self, use_unicode: bool = True) -> RDD[str]:
         """Converts a :class:`DataFrame` into a :class:`RDD` of string.
 
         Each row is turned into a JSON document as one element in the returned RDD.
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 8c729c6635814..8c8756d8a8624 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -187,7 +187,7 @@ def load(
 
     def json(
         self,
-        path: Union[str, List[str], "RDD[str]"],
+        path: Union[str, List[str], RDD[str]],
         schema: Optional[Union[StructType, str]] = None,
         primitivesAsString: Optional[Union[bool, str]] = None,
         prefersDecimal: Optional[Union[bool, str]] = None,
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index a41ad156a8596..759859ab33912 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -524,7 +524,7 @@ def _inferSchemaFromList(
 
     def _inferSchema(
         self,
-        rdd: "RDD[Any]",
+        rdd: RDD[Any],
         samplingRatio: Optional[float] = None,
         names: Optional[List[str]] = None,
     ) -> StructType:
@@ -589,10 +589,10 @@ def _inferSchema(
 
     def _createFromRDD(
         self,
-        rdd: "RDD[Any]",
+        rdd: RDD[Any],
         schema: Optional[Union[DataType, List[str]]],
         samplingRatio: Optional[float],
-    ) -> Tuple["RDD[Tuple]", StructType]:
+    ) -> Tuple[RDD[Tuple], StructType]:
         """
         Create an RDD for DataFrame from an existing RDD, returns the RDD and schema.
         """
@@ -618,7 +618,7 @@ def _createFromRDD(
 
     def _createFromLocal(
         self, data: Iterable[Any], schema: Optional[Union[DataType, List[str]]]
-    ) -> Tuple["RDD[Tuple]", StructType]:
+    ) -> Tuple[RDD[Tuple], StructType]:
         """
         Create an RDD for DataFrame from a list or pandas.DataFrame, returns
         the RDD and schema.
@@ -766,7 +766,7 @@ def createDataFrame(
 
     def createDataFrame(  # type: ignore[misc]
         self,
-        data: Union["RDD[Any]", Iterable[Any], "PandasDataFrameLike"],
+        data: Union[RDD[Any], Iterable[Any], "PandasDataFrameLike"],
         schema: Optional[Union[AtomicType, StructType, str]] = None,
         samplingRatio: Optional[float] = None,
         verifySchema: bool = True,
@@ -897,7 +897,7 @@ def createDataFrame(  # type: ignore[misc]
 
     def _create_dataframe(
         self,
-        data: Union["RDD[Any]", Iterable[Any]],
+        data: Union[RDD[Any], Iterable[Any]],
         schema: Optional[Union[DataType, List[str]]],
         samplingRatio: Optional[float],
         verifySchema: bool,

From 588064f408c8ab68effa615a87653e9d57d9d269 Mon Sep 17 00:00:00 2001
From: YangJie <yangjie01@baidu.com>
Date: Mon, 28 Feb 2022 09:56:14 +0900
Subject: [PATCH 345/513] [SPARK-38339][BUILD] Upgrade `RoaringBitmap` to
 0.9.25
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
This pr aims upgrade `RoaringBitmap` from 0.9.23 to 0.9.25.

### Why are the changes needed?
The changes from 0.9.23 to version 0.9.25. are as follows：
- [change list](https://github.com/RoaringBitmap/RoaringBitmap/compare/0.9.23...0.9.25)

[ior fission + benchmark to show iand fission is unprofitable ](https://github.com/RoaringBitmap/RoaringBitmap/pull/543) optimizes the performance of `RoaringBitmap.or` api, and this api is used by Spark code , for example `PushBasedFetchHelper.initiateFallbackFetchForPushMergedBlock`.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass GA

Closes #35668 from LuciferYang/upgrade-RoaringBitmap.

Lead-authored-by: YangJie <yangjie01@baidu.com>
Co-authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 dev/deps/spark-deps-hadoop-2-hive-2.3 | 4 ++--
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 4 ++--
 pom.xml                               | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index efd380c89d456..5eda2b466cdfd 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -1,7 +1,7 @@
 HikariCP/2.5.1//HikariCP-2.5.1.jar
 JLargeArrays/1.5//JLargeArrays-1.5.jar
 JTransforms/3.1//JTransforms-3.1.jar
-RoaringBitmap/0.9.23//RoaringBitmap-0.9.23.jar
+RoaringBitmap/0.9.25//RoaringBitmap-0.9.25.jar
 ST4/4.0.4//ST4-4.0.4.jar
 activation/1.1.1//activation-1.1.1.jar
 aircompressor/0.21//aircompressor-0.21.jar
@@ -243,7 +243,7 @@ scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar
 scala-reflect/2.12.15//scala-reflect-2.12.15.jar
 scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar
 shapeless_2.12/2.3.7//shapeless_2.12-2.3.7.jar
-shims/0.9.23//shims-0.9.23.jar
+shims/0.9.25//shims-0.9.25.jar
 slf4j-api/1.7.32//slf4j-api-1.7.32.jar
 snakeyaml/1.28//snakeyaml-1.28.jar
 snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 2de677e53fe2f..e140f6696f415 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -1,7 +1,7 @@
 HikariCP/2.5.1//HikariCP-2.5.1.jar
 JLargeArrays/1.5//JLargeArrays-1.5.jar
 JTransforms/3.1//JTransforms-3.1.jar
-RoaringBitmap/0.9.23//RoaringBitmap-0.9.23.jar
+RoaringBitmap/0.9.25//RoaringBitmap-0.9.25.jar
 ST4/4.0.4//ST4-4.0.4.jar
 activation/1.1.1//activation-1.1.1.jar
 aircompressor/0.21//aircompressor-0.21.jar
@@ -228,7 +228,7 @@ scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar
 scala-reflect/2.12.15//scala-reflect-2.12.15.jar
 scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar
 shapeless_2.12/2.3.7//shapeless_2.12-2.3.7.jar
-shims/0.9.23//shims-0.9.23.jar
+shims/0.9.25//shims-0.9.25.jar
 slf4j-api/1.7.32//slf4j-api-1.7.32.jar
 snakeyaml/1.28//snakeyaml-1.28.jar
 snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar
diff --git a/pom.xml b/pom.xml
index 4e9198c71289b..80a31afe0333a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -801,7 +801,7 @@
       <dependency>
         <groupId>org.roaringbitmap</groupId>
         <artifactId>RoaringBitmap</artifactId>
-        <version>0.9.23</version>
+        <version>0.9.25</version>
       </dependency>
       <dependency>
         <groupId>io.netty</groupId>

From 0c74bff56fb342fbb4037122ae9c626c17df1853 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Mon, 28 Feb 2022 09:57:46 +0900
Subject: [PATCH 346/513] [SPARK-38338][BUILD][CORE] Remove test dependency on
 `hamcrest`

### What changes were proposed in this pull request?
[SPARK-7081](https://issues.apache.org/jira/browse/SPARK-7081) introduces test dependency on `hamcrest` to extend assertion syntax.

But `hamcrest` is not widely used in Spark code, so this pr use `JUnit4` api to replace the corresponding assertion and remove  the dependence on `hamcrest`.

### Why are the changes needed?
Clean up weak dependencies.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass GA

Closes #35666 from LuciferYang/remove-hamcrest.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../network/crypto/TransportCipherSuite.java  |  4 +---
 core/pom.xml                                  | 10 ---------
 .../sort/UnsafeShuffleWriterSuite.java        | 21 ++++++++-----------
 .../map/AbstractBytesToBytesMapSuite.java     |  7 ++-----
 .../sort/UnsafeExternalSorterSuite.java       |  9 +++-----
 .../sort/UnsafeInMemorySorterSuite.java       |  8 +++----
 pom.xml                                       | 12 -----------
 7 files changed, 18 insertions(+), 53 deletions(-)

diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/TransportCipherSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/TransportCipherSuite.java
index e62b8cb24e0ed..cff115d12b5fe 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/crypto/TransportCipherSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/TransportCipherSuite.java
@@ -28,8 +28,6 @@
 import org.apache.commons.crypto.stream.CryptoOutputStream;
 import org.apache.spark.network.util.MapConfigProvider;
 import org.apache.spark.network.util.TransportConf;
-import org.hamcrest.CoreMatchers;
-import org.hamcrest.MatcherAssert;
 import org.junit.Test;
 
 import static org.junit.Assert.assertEquals;
@@ -81,7 +79,7 @@ CryptoInputStream createInputStream(ReadableByteChannel ch) throws IOException {
       channel.writeInbound(buffer2);
       fail("Should have raised an exception");
     } catch (Throwable expected) {
-      MatcherAssert.assertThat(expected, CoreMatchers.instanceOf(IOException.class));
+      assertEquals(expected.getClass(), IOException.class);
       assertEquals(0, buffer2.refCnt());
     }
 
diff --git a/core/pom.xml b/core/pom.xml
index 3d095914e5caf..9d3b1709af2ac 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -395,16 +395,6 @@
       <artifactId>xml-apis</artifactId>
       <scope>test</scope>
     </dependency>
-    <dependency>
-      <groupId>org.hamcrest</groupId>
-      <artifactId>hamcrest-core</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.hamcrest</groupId>
-      <artifactId>hamcrest-library</artifactId>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-core</artifactId>
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
index 87f9ab32eb585..cd25f32cca89c 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
@@ -54,9 +54,6 @@
 import org.apache.spark.storage.*;
 import org.apache.spark.util.Utils;
 
-import static org.hamcrest.MatcherAssert.assertThat;
-import static org.hamcrest.Matchers.greaterThan;
-import static org.hamcrest.Matchers.lessThan;
 import static org.junit.Assert.*;
 import static org.mockito.Answers.RETURNS_SMART_NULLS;
 import static org.mockito.Mockito.*;
@@ -418,9 +415,9 @@ private void testMergingSpills(
     assertSpillFilesWereCleanedUp();
     ShuffleWriteMetrics shuffleWriteMetrics = taskMetrics.shuffleWriteMetrics();
     assertEquals(dataToWrite.size(), shuffleWriteMetrics.recordsWritten());
-    assertThat(taskMetrics.diskBytesSpilled(), greaterThan(0L));
-    assertThat(taskMetrics.diskBytesSpilled(), lessThan(mergedOutputFile.length()));
-    assertThat(taskMetrics.memoryBytesSpilled(), greaterThan(0L));
+    assertTrue(taskMetrics.diskBytesSpilled() > 0L);
+    assertTrue(taskMetrics.diskBytesSpilled() < mergedOutputFile.length());
+    assertTrue(taskMetrics.memoryBytesSpilled() > 0L);
     assertEquals(mergedOutputFile.length(), shuffleWriteMetrics.bytesWritten());
   }
 
@@ -510,9 +507,9 @@ public void writeEnoughDataToTriggerSpill() throws Exception {
     assertSpillFilesWereCleanedUp();
     ShuffleWriteMetrics shuffleWriteMetrics = taskMetrics.shuffleWriteMetrics();
     assertEquals(dataToWrite.size(), shuffleWriteMetrics.recordsWritten());
-    assertThat(taskMetrics.diskBytesSpilled(), greaterThan(0L));
-    assertThat(taskMetrics.diskBytesSpilled(), lessThan(mergedOutputFile.length()));
-    assertThat(taskMetrics.memoryBytesSpilled(), greaterThan(0L));
+    assertTrue(taskMetrics.diskBytesSpilled() > 0L);
+    assertTrue(taskMetrics.diskBytesSpilled() < mergedOutputFile.length());
+    assertTrue(taskMetrics.memoryBytesSpilled()> 0L);
     assertEquals(mergedOutputFile.length(), shuffleWriteMetrics.bytesWritten());
   }
 
@@ -543,9 +540,9 @@ private void writeEnoughRecordsToTriggerSortBufferExpansionAndSpill() throws Exc
     assertSpillFilesWereCleanedUp();
     ShuffleWriteMetrics shuffleWriteMetrics = taskMetrics.shuffleWriteMetrics();
     assertEquals(dataToWrite.size(), shuffleWriteMetrics.recordsWritten());
-    assertThat(taskMetrics.diskBytesSpilled(), greaterThan(0L));
-    assertThat(taskMetrics.diskBytesSpilled(), lessThan(mergedOutputFile.length()));
-    assertThat(taskMetrics.memoryBytesSpilled(), greaterThan(0L));
+    assertTrue(taskMetrics.diskBytesSpilled() > 0L);
+    assertTrue(taskMetrics.diskBytesSpilled() < mergedOutputFile.length());
+    assertTrue(taskMetrics.memoryBytesSpilled()> 0L);
     assertEquals(mergedOutputFile.length(), shuffleWriteMetrics.bytesWritten());
   }
 
diff --git a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
index 3685f6826752d..277c8ffa99a8f 100644
--- a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
+++ b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
@@ -24,7 +24,6 @@
 
 import scala.Tuple2$;
 
-import org.hamcrest.MatcherAssert;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -49,9 +48,7 @@
 import org.apache.spark.util.Utils;
 import org.apache.spark.internal.config.package$;
 
-import static org.hamcrest.Matchers.greaterThan;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.*;
 import static org.mockito.Answers.RETURNS_SMART_NULLS;
 import static org.mockito.ArgumentMatchers.any;
 import static org.mockito.ArgumentMatchers.anyInt;
@@ -527,7 +524,7 @@ public void failureToGrow() {
           break;
         }
       }
-      MatcherAssert.assertThat(i, greaterThan(0));
+      assertTrue(i > 0);
       Assert.assertFalse(success);
     } finally {
       map.free();
diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
index 025bb47cbff78..04316a62f4f8c 100644
--- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
+++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
@@ -25,7 +25,6 @@
 
 import scala.Tuple2$;
 
-import org.hamcrest.MatcherAssert;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -46,8 +45,6 @@
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.util.Utils;
 
-import static org.hamcrest.Matchers.greaterThan;
-import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.junit.Assert.*;
 import static org.mockito.Answers.RETURNS_SMART_NULLS;
 import static org.mockito.Mockito.*;
@@ -225,7 +222,7 @@ public void testSortTimeMetric() throws Exception {
 
     sorter.insertRecord(null, 0, 0, 0, false);
     sorter.spill();
-    MatcherAssert.assertThat(sorter.getSortTimeNanos(), greaterThan(prevSortTime));
+    assertTrue(sorter.getSortTimeNanos() > prevSortTime);
     prevSortTime = sorter.getSortTimeNanos();
 
     sorter.spill();  // no sort needed
@@ -233,7 +230,7 @@ public void testSortTimeMetric() throws Exception {
 
     sorter.insertRecord(null, 0, 0, 0, false);
     UnsafeSorterIterator iter = sorter.getSortedIterator();
-    MatcherAssert.assertThat(sorter.getSortTimeNanos(), greaterThan(prevSortTime));
+    assertTrue(sorter.getSortTimeNanos() > prevSortTime);
 
     sorter.cleanupResources();
     assertSpillFilesWereCleanedUp();
@@ -252,7 +249,7 @@ public void spillingOccursInResponseToMemoryPressure() throws Exception {
     // The insertion of this record should trigger a spill:
     insertNumber(sorter, 0);
     // Ensure that spill files were created
-    MatcherAssert.assertThat(tempDir.listFiles().length, greaterThanOrEqualTo(1));
+    assertTrue(tempDir.listFiles().length >= 1);
     // Read back the sorted data:
     UnsafeSorterIterator iter = sorter.getSortedIterator();
 
diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorterSuite.java
index 9d4909ddce792..ea1dc9957f466 100644
--- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorterSuite.java
+++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorterSuite.java
@@ -33,10 +33,8 @@
 import org.apache.spark.unsafe.memory.MemoryBlock;
 import org.apache.spark.internal.config.package$;
 
-import static org.hamcrest.MatcherAssert.assertThat;
-import static org.hamcrest.Matchers.greaterThanOrEqualTo;
-import static org.hamcrest.Matchers.isIn;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.mock;
 
 public class UnsafeInMemorySorterSuite {
@@ -137,8 +135,8 @@ public int compare(
       final String str =
         getStringFromDataPage(iter.getBaseObject(), iter.getBaseOffset(), iter.getRecordLength());
       final long keyPrefix = iter.getKeyPrefix();
-      assertThat(str, isIn(Arrays.asList(dataToSort)));
-      assertThat(keyPrefix, greaterThanOrEqualTo(prevPrefix));
+      assertTrue(Arrays.asList(dataToSort).contains(str));
+      assertTrue(keyPrefix >= prevPrefix);
       prevPrefix = keyPrefix;
       iterLength++;
     }
diff --git a/pom.xml b/pom.xml
index 80a31afe0333a..532d7d5529842 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1121,18 +1121,6 @@
         <version>4.13.2</version>
         <scope>test</scope>
       </dependency>
-      <dependency>
-        <groupId>org.hamcrest</groupId>
-        <artifactId>hamcrest-core</artifactId>
-        <version>1.3</version>
-        <scope>test</scope>
-      </dependency>
-      <dependency>
-        <groupId>org.hamcrest</groupId>
-        <artifactId>hamcrest-library</artifactId>
-        <version>1.3</version>
-        <scope>test</scope>
-      </dependency>
       <dependency>
         <groupId>com.github.sbt</groupId>
         <artifactId>junit-interface</artifactId>

From 309c65a353489b70bf5135a01d1f159328eaa3f3 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Mon, 28 Feb 2022 09:59:05 +0900
Subject: [PATCH 347/513] [SPARK-38337][CORE][SQL][DSTREAM][MLLIB] Replace
 `toIterator` with `iterator` for `IterableLike`/`IterableOnce` to cleanup
 deprecated api usage

### What changes were proposed in this pull request?
In Scala 2.12, `IterableLike.toIterator` identified as `deprecatedOverriding`:
```scala
deprecatedOverriding("toIterator should stay consistent with iterator for all Iterables: override iterator instead.", "2.11.0")
override def toIterator: Iterator[A] = iterator
```

In Scala 2.13, `IterableOnce.toIterator`  identified as `deprecated`:

```scala
  deprecated("Use .iterator instead of .toIterator", "2.13.0")
  `inline` final def toIterator: Iterator[A] = iterator
```

This PR replaces `toIterator` with `iterator` as recommended by scaladoc as above.

### Why are the changes needed?
Cleanup deprecated api usage

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass GA

Closes #35665 from LuciferYang/toIterator-is-deprecated.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 core/src/main/scala/org/apache/spark/MapOutputTracker.scala | 4 ++--
 .../main/scala/org/apache/spark/api/python/PythonRDD.scala  | 2 +-
 .../org/apache/spark/deploy/master/ui/MasterPage.scala      | 4 ++--
 .../org/apache/spark/scheduler/TaskSetExcludeList.scala     | 2 +-
 .../main/scala/org/apache/spark/storage/BlockManager.scala  | 2 +-
 .../scala/org/apache/spark/storage/memory/MemoryStore.scala | 2 +-
 .../scala/org/apache/spark/rdd/RDDOperationScopeSuite.scala | 2 +-
 .../apache/spark/shuffle/BlockStoreShuffleReaderSuite.scala | 2 +-
 .../apache/spark/shuffle/sort/SortShuffleWriterSuite.scala  | 4 ++--
 .../spark/storage/ShuffleBlockFetcherIteratorSuite.scala    | 2 +-
 core/src/test/scala/org/apache/spark/util/UtilsSuite.scala  | 2 +-
 .../org/apache/spark/ml/source/image/ImageFileFormat.scala  | 2 +-
 .../org/apache/spark/mllib/evaluation/AreaUnderCurve.scala  | 2 +-
 .../scala/org/apache/spark/mllib/fpm/LocalPrefixSpan.scala  | 2 +-
 .../main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala  | 4 ++--
 .../org/apache/spark/sql/catalyst/analysis/unresolved.scala | 2 +-
 .../spark/sql/catalyst/expressions/objects/objects.scala    | 2 +-
 .../apache/spark/sql/catalyst/json/JsonInferSchema.scala    | 2 +-
 .../org/apache/spark/sql/catalyst/optimizer/objects.scala   | 2 +-
 .../apache/spark/sql/catalyst/util/FailureSafeParser.scala  | 2 +-
 .../apache/spark/sql/catalyst/util/StringKeyHashMap.scala   | 2 +-
 .../org/apache/spark/sql/catalyst/util/StringUtils.scala    | 2 +-
 .../org/apache/spark/sql/catalyst/SQLKeywordSuite.scala     | 2 +-
 .../expressions/codegen/GenerateUnsafeRowJoinerSuite.scala  | 2 +-
 .../org/apache/spark/sql/execution/command/commands.scala   | 4 ++--
 .../scala/org/apache/spark/sql/execution/command/ddl.scala  | 4 ++--
 .../spark/sql/execution/datasources/FileScanRDD.scala       | 2 +-
 .../spark/sql/execution/datasources/orc/OrcUtils.scala      | 2 +-
 .../datasources/v2/FilePartitionReaderFactory.scala         | 4 ++--
 .../spark/sql/execution/datasources/v2/V2CommandExec.scala  | 2 +-
 .../sql/execution/joins/BroadcastNestedLoopJoinExec.scala   | 2 +-
 .../spark/sql/execution/arrow/ArrowConvertersSuite.scala    | 4 ++--
 .../spark/sql/execution/datasources/FileIndexSuite.scala    | 2 +-
 .../spark/sql/execution/joins/HashedRelationSuite.scala     | 2 +-
 .../org/apache/spark/sql/hive/orc/OrcFileOperator.scala     | 2 +-
 .../org/apache/spark/streaming/util/RawTextHelper.scala     | 6 +++---
 .../org/apache/spark/streaming/DStreamClosureSuite.scala    | 4 ++--
 .../org/apache/spark/streaming/InputStreamsSuite.scala      | 4 ++--
 .../apache/spark/streaming/ReceivedBlockHandlerSuite.scala  | 4 ++--
 39 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 9835695794d83..e6ed469250b47 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -1177,7 +1177,7 @@ private[spark] class MapOutputTrackerMaster(
   override def getMapSizesForMergeResult(
       shuffleId: Int,
       partitionId: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
-    Seq.empty.toIterator
+    Seq.empty.iterator
   }
 
   // This method is only called in local-mode. Since push based shuffle won't be
@@ -1186,7 +1186,7 @@ private[spark] class MapOutputTrackerMaster(
       shuffleId: Int,
       partitionId: Int,
       chunkTracker: RoaringBitmap): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
-    Seq.empty.toIterator
+    Seq.empty.iterator
   }
 
   // This method is only called in local-mode.
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 6d4dc3d3dfe92..6dc9e71a00848 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -245,7 +245,7 @@ private[spark] object PythonRDD extends Logging {
             out.writeInt(1)
 
             // Write the next object and signal end of data for this iteration
-            writeIteratorToStream(partitionArray.toIterator, out)
+            writeIteratorToStream(partitionArray.iterator, out)
             out.writeInt(SpecialLengths.END_OF_DATA_SECTION)
             out.flush()
           } else {
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
index 6143321427d4c..2b4d860b92804 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
@@ -76,13 +76,13 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
 
   private def formatMasterResourcesInUse(aliveWorkers: Array[WorkerInfo]): String = {
     val totalInfo = aliveWorkers.map(_.resourcesInfo)
-      .flatMap(_.toIterator)
+      .flatMap(_.iterator)
       .groupBy(_._1) // group by resource name
       .map { case (rName, rInfoArr) =>
       rName -> rInfoArr.map(_._2.addresses.size).sum
     }
     val usedInfo = aliveWorkers.map(_.resourcesInfoUsed)
-      .flatMap(_.toIterator)
+      .flatMap(_.iterator)
       .groupBy(_._1) // group by resource name
       .map { case (rName, rInfoArr) =>
       rName -> rInfoArr.map(_._2.addresses.size).sum
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetExcludeList.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetExcludeList.scala
index d20f3ed65472e..f479e5e32bc2f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetExcludeList.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetExcludeList.scala
@@ -117,7 +117,7 @@ private[scheduler] class TaskSetExcludelist(
     // over the limit, exclude this task from the entire host.
     val execsWithFailuresOnNode = nodeToExecsWithFailures.getOrElseUpdate(host, new HashSet())
     execsWithFailuresOnNode += exec
-    val failuresOnHost = execsWithFailuresOnNode.toIterator.flatMap { exec =>
+    val failuresOnHost = execsWithFailuresOnNode.iterator.flatMap { exec =>
       execToFailures.get(exec).map { failures =>
         // We count task attempts here, not the number of unique executors with failures.  This is
         // because jobs are aborted based on the number task attempts; if we counted unique
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 7ae57f71a129d..abd0beb458b78 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -1872,7 +1872,7 @@ private[spark] class BlockManager(
             serializerManager.dataSerializeStream(
               blockId,
               out,
-              elements.toIterator)(info.classTag.asInstanceOf[ClassTag[T]])
+              elements.iterator)(info.classTag.asInstanceOf[ClassTag[T]])
           }
         case Right(bytes) =>
           diskStore.putBytes(blockId, bytes)
diff --git a/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
index 1d3543ed8b23c..144d8cff7d4fa 100644
--- a/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
@@ -305,7 +305,7 @@ private[spark] class MemoryStore(
         val unrolledIterator = if (valuesHolder.vector != null) {
           valuesHolder.vector.iterator
         } else {
-          valuesHolder.arrayValues.toIterator
+          valuesHolder.arrayValues.iterator
         }
 
         Left(new PartiallyUnrolledIterator(
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDOperationScopeSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDOperationScopeSuite.scala
index 16a92f54f9368..7875cbcc0dfae 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDOperationScopeSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDOperationScopeSuite.scala
@@ -138,5 +138,5 @@ class RDDOperationScopeSuite extends SparkFunSuite with BeforeAndAfter {
 
 private class MyCoolRDD(sc: SparkContext) extends RDD[Int](sc, Nil) {
   override def getPartitions: Array[Partition] = Array.empty
-  override def compute(p: Partition, context: TaskContext): Iterator[Int] = { Nil.toIterator }
+  override def compute(p: Partition, context: TaskContext): Iterator[Int] = { Nil.iterator }
 }
diff --git a/core/src/test/scala/org/apache/spark/shuffle/BlockStoreShuffleReaderSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/BlockStoreShuffleReaderSuite.scala
index d964b28df2983..56b8e0b6df3fd 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/BlockStoreShuffleReaderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/BlockStoreShuffleReaderSuite.scala
@@ -111,7 +111,7 @@ class BlockStoreShuffleReaderSuite extends SparkFunSuite with LocalSparkContext
         val shuffleBlockId = ShuffleBlockId(shuffleId, mapId, reduceId)
         (shuffleBlockId, byteOutputStream.size().toLong, mapId)
       }
-      Seq((localBlockManagerId, shuffleBlockIdsAndSizes)).toIterator
+      Seq((localBlockManagerId, shuffleBlockIdsAndSizes)).iterator
     }
 
     // Create a mocked shuffle handle to pass into HashShuffleReader.
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala
index 6c13c7c8c3c61..9e52b5e15143b 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala
@@ -103,7 +103,7 @@ class SortShuffleWriterSuite
       mapId = 2,
       context,
       shuffleExecutorComponents)
-    writer.write(records.toIterator)
+    writer.write(records.iterator)
     writer.stop(success = true)
     val dataFile = shuffleBlockResolver.getDataFile(shuffleId, 2)
     val writeMetrics = context.taskMetrics().shuffleWriteMetrics
@@ -160,7 +160,7 @@ class SortShuffleWriterSuite
         context,
         new LocalDiskShuffleExecutorComponents(
           conf, shuffleBlockResolver._blockManager, shuffleBlockResolver))
-      writer.write(records.toIterator)
+      writer.write(records.iterator)
       val sorterMethod = PrivateMethod[ExternalSorter[_, _, _]](Symbol("sorter"))
       val sorter = writer.invokePrivate(sorterMethod())
       val expectSpillSize = if (doSpill) records.size else 0
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index fdaf1f8e3ca96..e6f052510462d 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -201,7 +201,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       transfer,
       blockManager.getOrElse(createMockBlockManager()),
       mapOutputTracker,
-      blocksByAddress.toIterator,
+      blocksByAddress.iterator,
       (_, in) => streamWrapperLimitSize.map(new LimitedInputStream(in, _)).getOrElse(in),
       maxBytesInFlight,
       maxReqsInFlight,
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 62cd819177663..973c09884a6d4 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -464,7 +464,7 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
 
   test("get iterator size") {
     val empty = Seq[Int]()
-    assert(Utils.getIteratorSize(empty.toIterator) === 0L)
+    assert(Utils.getIteratorSize(empty.iterator) === 0L)
     val iterator = Iterator.range(0, 5)
     assert(Utils.getIteratorSize(iterator) === 5L)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/image/ImageFileFormat.scala b/mllib/src/main/scala/org/apache/spark/ml/source/image/ImageFileFormat.scala
index 868056bd3cdd9..0995df51c6422 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/source/image/ImageFileFormat.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/source/image/ImageFileFormat.scala
@@ -82,7 +82,7 @@ private[image] class ImageFileFormat extends FileFormat with DataSourceRegister
         }
         val resultOpt = ImageSchema.decode(origin, bytes)
         val filteredResult = if (imageSourceOptions.dropInvalid) {
-          resultOpt.toIterator
+          resultOpt.iterator
         } else {
           Iterator(resultOpt.getOrElse(ImageSchema.invalidImageRow(origin)))
         }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala
index cdb84318833f8..cbe2776f6646c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala
@@ -78,7 +78,7 @@ private[evaluation] object AreaUnderCurve {
    * @param curve an iterator over ordered 2D points stored in pairs representing a curve
    */
   def of(curve: Iterable[(Double, Double)]): Double = {
-    curve.toIterator.sliding(2).withPartial(false).aggregate(0.0)(
+    curve.iterator.sliding(2).withPartial(false).aggregate(0.0)(
       seqop = (auc: Double, points: Seq[(Double, Double)]) => auc + trapezoid(points),
       combop = _ + _
     )
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/LocalPrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/LocalPrefixSpan.scala
index 659f875a6dc98..30c1a89c590d9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/LocalPrefixSpan.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/LocalPrefixSpan.scala
@@ -67,7 +67,7 @@ private[fpm] class LocalPrefixSpan(
       count >= minCount
     }.sorted
     // project and recursively call genFreqPatterns
-    freqItems.toIterator.flatMap { case (item, count) =>
+    freqItems.iterator.flatMap { case (item, count) =>
       val newPrefix = prefix :+ item
       Iterator.single((newPrefix, count)) ++ {
         val projected = postfixes.map(_.project(item)).filter(_.nonEmpty)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
index cd71aac34c268..6f71801814398 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
@@ -220,7 +220,7 @@ object PrefixSpan extends Logging {
     data.flatMap { itemsets =>
       val uniqItems = mutable.Set.empty[Item]
       itemsets.foreach(set => uniqItems ++= set)
-      uniqItems.toIterator.map((_, 1L))
+      uniqItems.iterator.map((_, 1L))
     }.reduceByKey(_ + _).filter { case (_, count) =>
       count >= minCount
     }.sortBy(-_._2).map(_._1).collect()
@@ -478,7 +478,7 @@ object PrefixSpan extends Logging {
         }
         i += 1
       }
-      prefixes.toIterator
+      prefixes.iterator
     }
 
     /** Tests whether this postfix is non-empty. */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index c8ef71eb8b89a..9d24ae4a15950 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -591,7 +591,7 @@ case class GetViewColumnByNameAndOrdinal(
   override def dataType: DataType = throw new UnresolvedException("dataType")
   override def nullable: Boolean = throw new UnresolvedException("nullable")
   override lazy val resolved = false
-  override def stringArgs: Iterator[Any] = super.stringArgs.toSeq.dropRight(1).toIterator
+  override def stringArgs: Iterator[Any] = super.stringArgs.toSeq.dropRight(1).iterator
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 68a55f7f11696..4599c2a2d3055 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -828,7 +828,7 @@ case class MapObjects private(
 
   private def executeFuncOnCollection(inputCollection: Seq[_]): Iterator[_] = {
     val row = new GenericInternalRow(1)
-    inputCollection.toIterator.map { element =>
+    inputCollection.iterator.map { element =>
       row.update(0, element)
       lambdaFunction.eval(row)
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
index 6a63118698106..d08773d846960 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
@@ -100,7 +100,7 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
             wrappedCharException.initCause(e)
             handleJsonErrorsByParseMode(parseMode, columnNameOfCorruptRecord, wrappedCharException)
         }
-      }.reduceOption(typeMerger).toIterator
+      }.reduceOption(typeMerger).iterator
     }
 
     // Here we manually submit a fold-like Spark job, so that we can set the SQLConf when running
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
index c347a2e807ef2..82aef32c5a22f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
@@ -186,7 +186,7 @@ object ObjectSerializerPruning extends Rule[LogicalPlan] {
       serializer: NamedExpression,
       prunedDataType: DataType): NamedExpression = {
     val prunedStructTypes = collectStructType(prunedDataType, ArrayBuffer.empty[StructType])
-      .toIterator
+      .iterator
 
     def transformer: PartialFunction[Expression, Expression] = {
       case m: ExternalMapToCatalyst =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
index ab7c9310bf844..5a9e52a51a27f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
@@ -57,7 +57,7 @@ class FailureSafeParser[IN](
 
   def parse(input: IN): Iterator[InternalRow] = {
     try {
-      rawParser.apply(input).toIterator.map(row => toResultRow(Some(row), () => null))
+      rawParser.apply(input).iterator.map(row => toResultRow(Some(row), () => null))
     } catch {
       case e: BadRecordException => mode match {
         case PermissiveMode =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringKeyHashMap.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringKeyHashMap.scala
index 812d5ded4bf0f..57fecb774bd20 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringKeyHashMap.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringKeyHashMap.scala
@@ -43,7 +43,7 @@ class StringKeyHashMap[T](normalizer: (String) => String) {
 
   def remove(key: String): Option[T] = base.remove(normalizer(key))
 
-  def iterator: Iterator[(String, T)] = base.toIterator
+  def iterator: Iterator[(String, T)] = base.iterator
 
   def clear(): Unit = base.clear()
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
index 0d0f7a07bb478..4ad0337abc45e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
@@ -44,7 +44,7 @@ object StringUtils extends Logging {
    * @return the equivalent Java regular expression of the pattern
    */
   def escapeLikeRegex(pattern: String, escapeChar: Char): String = {
-    val in = pattern.toIterator
+    val in = pattern.iterator
     val out = new StringBuilder()
 
     def fail(message: String) = throw QueryCompilationErrors.invalidPatternError(pattern, message)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala
index 45f88628f3ab3..e9d30156951d2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala
@@ -54,7 +54,7 @@ trait SQLKeywordUtils extends SparkFunSuite with SQLHelper {
     val default = (_: String) => Nil
     var startTagFound = false
     var parseFinished = false
-    val lineIter = sqlSyntaxDefs.toIterator
+    val lineIter = sqlSyntaxDefs.iterator
     while (!parseFinished && lineIter.hasNext) {
       val line = lineIter.next()
       if (line.trim.startsWith(startTag)) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerSuite.scala
index dd67a61015e72..c13cb33201ad7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerSuite.scala
@@ -206,7 +206,7 @@ class GenerateUnsafeRowJoinerSuite extends SparkFunSuite {
     if (actualFixedLength !== expectedFixedLength) {
       actualFixedLength.grouped(8)
         .zip(expectedFixedLength.grouped(8))
-        .zip(mergedSchema.fields.toIterator)
+        .zip(mergedSchema.fields.iterator)
         .foreach {
           case ((actual, expected), field) =>
             assert(actual === expected, s"Fixed length sections are not equal for field $field")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index eed8e039eddd0..c21f330be0647 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -83,7 +83,7 @@ case class ExecutedCommandExec(cmd: RunnableCommand) extends LeafExecNode {
 
   override def executeCollect(): Array[InternalRow] = sideEffectResult.toArray
 
-  override def executeToIterator(): Iterator[InternalRow] = sideEffectResult.toIterator
+  override def executeToIterator(): Iterator[InternalRow] = sideEffectResult.iterator
 
   override def executeTake(limit: Int): Array[InternalRow] = sideEffectResult.take(limit).toArray
 
@@ -124,7 +124,7 @@ case class DataWritingCommandExec(cmd: DataWritingCommand, child: SparkPlan)
 
   override def executeCollect(): Array[InternalRow] = sideEffectResult.toArray
 
-  override def executeToIterator(): Iterator[InternalRow] = sideEffectResult.toIterator
+  override def executeToIterator(): Iterator[InternalRow] = sideEffectResult.iterator
 
   override def executeTake(limit: Int): Array[InternalRow] = sideEffectResult.take(limit).toArray
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 295838eda5a72..5e2a8c1e9ddbd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -469,7 +469,7 @@ case class AlterTableAddPartitionCommand(
     // Also the request to metastore times out when adding lot of partitions in one shot.
     // we should split them into smaller batches
     val batchSize = conf.getConf(SQLConf.ADD_PARTITION_BATCH_SIZE)
-    parts.toIterator.grouped(batchSize).foreach { batch =>
+    parts.iterator.grouped(batchSize).foreach { batch =>
       catalog.createPartitions(table.identifier, batch, ignoreIfExists = ifNotExists)
     }
 
@@ -772,7 +772,7 @@ case class RepairTableCommand(
     // we should split them into smaller batches. Since Hive client is not thread safe, we cannot
     // do this in parallel.
     val batchSize = spark.conf.get(SQLConf.ADD_PARTITION_BATCH_SIZE)
-    partitionSpecsAndLocs.toIterator.grouped(batchSize).foreach { batch =>
+    partitionSpecsAndLocs.iterator.grouped(batchSize).foreach { batch =>
       val now = MILLISECONDS.toSeconds(System.currentTimeMillis())
       val parts = batch.map { case (spec, location) =>
         val params = partitionStats.get(location.toString).map {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index ccef75c2ec46a..20c393a5c0e60 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -93,7 +93,7 @@ class FileScanRDD(
         inputMetrics.setBytesRead(existingBytesRead + getBytesReadCallback())
       }
 
-      private[this] val files = split.asInstanceOf[FilePartition].files.toIterator
+      private[this] val files = split.asInstanceOf[FilePartition].files.iterator
       private[this] var currentFile: PartitionedFile = null
       private[this] var currentIterator: Iterator[Object] = null
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
index 684bab5883394..a96f77b9acbeb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
@@ -147,7 +147,7 @@ object OrcUtils extends Logging {
       : Option[StructType] = {
     val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles
     val conf = sparkSession.sessionState.newHadoopConfWithOptions(options)
-    files.toIterator.map(file => readSchema(file.getPath, conf, ignoreCorruptFiles)).collectFirst {
+    files.iterator.map(file => readSchema(file.getPath, conf, ignoreCorruptFiles)).collectFirst {
       case Some(schema) =>
         logDebug(s"Reading schema from file $files, got Hive schema string: $schema")
         toCatalystSchema(schema)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
index 5e160228c60e3..da4f9e89fde8a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
@@ -26,7 +26,7 @@ abstract class FilePartitionReaderFactory extends PartitionReaderFactory {
   override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {
     assert(partition.isInstanceOf[FilePartition])
     val filePartition = partition.asInstanceOf[FilePartition]
-    val iter = filePartition.files.toIterator.map { file =>
+    val iter = filePartition.files.iterator.map { file =>
       PartitionedFileReader(file, buildReader(file))
     }
     new FilePartitionReader[InternalRow](iter)
@@ -35,7 +35,7 @@ abstract class FilePartitionReaderFactory extends PartitionReaderFactory {
   override def createColumnarReader(partition: InputPartition): PartitionReader[ColumnarBatch] = {
     assert(partition.isInstanceOf[FilePartition])
     val filePartition = partition.asInstanceOf[FilePartition]
-    val iter = filePartition.files.toIterator.map { file =>
+    val iter = filePartition.files.iterator.map { file =>
       PartitionedFileReader(file, buildColumnarReader(file))
     }
     new FilePartitionReader[ColumnarBatch](iter)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala
index fee9137c6ba1d..31e4a772dc1a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala
@@ -48,7 +48,7 @@ abstract class V2CommandExec extends SparkPlan {
    */
   override def executeCollect(): Array[InternalRow] = result.toArray
 
-  override def executeToIterator(): Iterator[InternalRow] = result.toIterator
+  override def executeToIterator(): Iterator[InternalRow] = result.iterator
 
   override def executeTake(limit: Int): Array[InternalRow] = result.take(limit).toArray
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala
index 4de35b9e06c5d..23b5b614369fd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala
@@ -356,7 +356,7 @@ case class BroadcastNestedLoopJoinExec(
           i += 1
         }
       }
-      Seq(matched).toIterator
+      Seq(matched).iterator
     }
 
     matchedBuildRows.fold(new BitSet(relation.value.length))(_ | _)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
index a5ac2d5aa70c9..e876e9d6ff20c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
@@ -1377,7 +1377,7 @@ class ArrowConvertersSuite extends SharedSparkSession {
     val schema = StructType(Seq(StructField("int", IntegerType, nullable = true)))
 
     val ctx = TaskContext.empty()
-    val batchIter = ArrowConverters.toBatchIterator(inputRows.toIterator, schema, 5, null, ctx)
+    val batchIter = ArrowConverters.toBatchIterator(inputRows.iterator, schema, 5, null, ctx)
     val outputRowIter = ArrowConverters.fromBatchIterator(batchIter, schema, null, ctx)
 
     var count = 0
@@ -1398,7 +1398,7 @@ class ArrowConvertersSuite extends SharedSparkSession {
 
     val schema = StructType(Seq(StructField("int", IntegerType, nullable = true)))
     val ctx = TaskContext.empty()
-    val batchIter = ArrowConverters.toBatchIterator(inputRows.toIterator, schema, 5, null, ctx)
+    val batchIter = ArrowConverters.toBatchIterator(inputRows.iterator, schema, 5, null, ctx)
 
     // Write batches to Arrow stream format as a byte array
     val out = new ByteArrayOutputStream()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
index 690623e72c994..08ddc67cd6553 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
@@ -488,7 +488,7 @@ class FileIndexSuite extends SharedSparkSession {
             new Path("file")), Array(new BlockLocation()))
       )
     when(dfs.listLocatedStatus(path)).thenReturn(new RemoteIterator[LocatedFileStatus] {
-      val iter = statuses.toIterator
+      val iter = statuses.iterator
       override def hasNext: Boolean = iter.hasNext
       override def next(): LocatedFileStatus = iter.next
     })
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
index d5b7ed6c275f4..2462fe31a9b66 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
@@ -629,7 +629,7 @@ class HashedRelationSuite extends SharedSparkSession {
 
   test("EmptyHashedRelation override methods behavior test") {
     val buildKey = Seq(BoundReference(0, LongType, false))
-    val hashed = HashedRelation(Seq.empty[InternalRow].toIterator, buildKey, 1, mm)
+    val hashed = HashedRelation(Seq.empty[InternalRow].iterator, buildKey, 1, mm)
     assert(hashed == EmptyHashedRelation)
 
     val key = InternalRow(1L)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
index 1a5f47bf5aa7d..f6a85c4778bd1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
@@ -93,7 +93,7 @@ private[hive] object OrcFileOperator extends Logging {
       : Option[StructType] = {
     // Take the first file where we can open a valid reader if we can find one.  Otherwise just
     // return None to indicate we can't infer the schema.
-    paths.toIterator.map(getFileReader(_, conf, ignoreCorruptFiles)).collectFirst {
+    paths.iterator.map(getFileReader(_, conf, ignoreCorruptFiles)).collectFirst {
       case Some(reader) =>
         val readerInspector = reader.getObjectInspector.asInstanceOf[StructObjectInspector]
         val schema = readerInspector.getTypeName
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
index e207dab7de068..3263f12a4e1ea 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
@@ -47,11 +47,11 @@ object RawTextHelper {
           i += 1
         }
       }
-      map.toIterator.map {
+      map.iterator.map {
         case (k, v) => (k, v)
       }
     }
-    map.toIterator.map{case (k, v) => (k, v)}
+    map.iterator.map{case (k, v) => (k, v)}
   }
 
   /**
@@ -89,7 +89,7 @@ object RawTextHelper {
         }
       }
     }
-    taken.toIterator
+    taken.iterator
   }
 
   /**
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala
index 0576bf560f30e..dad324b53dd04 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala
@@ -90,7 +90,7 @@ class DStreamClosureSuite extends SparkFunSuite with LocalStreamingContext with
     ds.filter { _ => return; true }
   }
   private def testMapPartitions(ds: DStream[Int]): Unit = expectCorrectException {
-    ds.mapPartitions { _ => return; Seq.empty.toIterator }
+    ds.mapPartitions { _ => return; Seq.empty.iterator }
   }
   private def testReduce(ds: DStream[Int]): Unit = expectCorrectException {
     ds.reduce { case (_, _) => return; 1 }
@@ -153,7 +153,7 @@ class DStreamClosureSuite extends SparkFunSuite with LocalStreamingContext with
   }
   private def testUpdateStateByKey(ds: DStream[(Int, Int)]): Unit = {
     val updateF1 = (_: Seq[Int], _: Option[Int]) => { return; Some(1) }
-    val updateF2 = (_: Iterator[(Int, Seq[Int], Option[Int])]) => { return; Seq((1, 1)).toIterator }
+    val updateF2 = (_: Iterator[(Int, Seq[Int], Option[Int])]) => { return; Seq((1, 1)).iterator }
     val updateF3 = (_: Time, _: Int, _: Seq[Int], _: Option[Int]) => {
       return
       Option(1)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
index 03182ae64db3d..174c3ca379363 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
@@ -365,7 +365,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
       // Setup data queued into the stream
       val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
 
-      val inputIterator = input.toIterator
+      val inputIterator = input.iterator
       for (i <- input.indices) {
         // Enqueue more than 1 item per tick but they should dequeue one at a time
         inputIterator.take(2).foreach { i =>
@@ -411,7 +411,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
       val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
 
       // Enqueue the first 3 items (one by one), they should be merged in the next batch
-      val inputIterator = input.toIterator
+      val inputIterator = input.iterator
       inputIterator.take(3).foreach { i =>
         queue.synchronized {
           queue += ssc.sparkContext.makeRDD(Seq(i))
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
index 3bcea1ab2c680..a3b5b38904a2e 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
@@ -363,7 +363,7 @@ abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean)
 
     val blocks = data.grouped(10).toSeq
 
-    storeAndVerify(blocks.map { b => IteratorBlock(b.toIterator) })
+    storeAndVerify(blocks.map { b => IteratorBlock(b.iterator) })
     storeAndVerify(blocks.map { b => ArrayBufferBlock(new ArrayBuffer ++= b) })
     storeAndVerify(blocks.map { b => ByteBufferBlock(dataToByteBuffer(b).toByteBuffer) })
   }
@@ -372,7 +372,7 @@ abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean)
   private def testErrorHandling(receivedBlockHandler: ReceivedBlockHandler): Unit = {
     // Handle error in iterator (e.g. divide-by-zero error)
     intercept[Exception] {
-      val iterator = (10 to (-10, -1)).toIterator.map { _ / 0 }
+      val iterator = (10 to (-10, -1)).iterator.map { _ / 0 }
       receivedBlockHandler.storeBlock(StreamBlockId(1, 1), IteratorBlock(iterator))
     }
 

From 244716f8ef15357468375cc6356f92b5511f633d Mon Sep 17 00:00:00 2001
From: Xinyi Yu <xinyi.yu@databricks.com>
Date: Mon, 28 Feb 2022 14:17:30 +0800
Subject: [PATCH 348/513] [SPARK-38321][SQL][TESTS] Fix
 BooleanSimplificationSuite under ANSI

### What changes were proposed in this pull request?
This PR fixes BooleanSimplificationSuite under ANSI mode. Change the test case to be a long type to avoid casting difference in ANSI on/off.

### Why are the changes needed?
To set up a new GA job to run tests with ANSI mode before 3.3.0 release.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Test locally with both ANSI on and off, both passed.

Closes #35654 from anchovYu/ansi-tests-boolean-simplification.

Authored-by: Xinyi Yu <xinyi.yu@databricks.com>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../sql/catalyst/optimizer/BooleanSimplificationSuite.scala   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
index 07f16f438cc56..41fc6e93cab4f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
@@ -138,8 +138,8 @@ class BooleanSimplificationSuite extends PlanTest with ExpressionEvalHelper with
       'a > 1 && 'b > 3 && 'c > 1)
 
     checkCondition(
-      ('a > 1 || 'b > 3) && (('a > 1 || 'b > 3) && 'd > 0 && (('a > 1 || 'b > 3) && 'c > 1)),
-      ('a > 1 || 'b > 3) && 'd > 0 && 'c > 1)
+      ('a > 1 || 'b > 3) && (('a > 1 || 'b > 3) && 'd > 0L && (('a > 1 || 'b > 3) && 'c > 1)),
+      ('a > 1 || 'b > 3) && 'd > 0L && 'c > 1)
 
     checkCondition(
       'a > 1 && 'b > 2 && 'a > 1 && 'c > 3,

From c7e363fcd2d386daec2b6090d26c98c9bca38595 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Mon, 28 Feb 2022 01:00:54 -0800
Subject: [PATCH 349/513] [SPARK-38244][K8S][BUILD] Upgrade kubernetes-client
 to 5.12.1

### What changes were proposed in this pull request?
Upgrade kubernetes-client to 5.12.1:
Changes list:
- https://github.com/fabric8io/kubernetes-client/releases/tag/v5.12.1
- https://github.com/fabric8io/kubernetes-client/commit/81c11ca5c2044eac2cbebe5107945ec95011e8f4

### Why are the changes needed?
The next kubernetes client version will be 6.x with breaking changes: https://github.com/fabric8io/kubernetes-client/blob/master/CHANGELOG.md#note-breaking-changes-in-the-api .

We'd better to upgrade to latest 5.X to reduce follow upgrade cost.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
- CI
- integration test

Closes #35596 from Yikun/SPARK-38244.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2-hive-2.3 | 42 +++++++++++++--------------
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 42 +++++++++++++--------------
 pom.xml                               |  2 +-
 3 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index 5eda2b466cdfd..d9589da60da19 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -162,27 +162,27 @@ jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/1.7.32//jul-to-slf4j-1.7.32.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/5.12.0//kubernetes-client-5.12.0.jar
-kubernetes-model-admissionregistration/5.12.0//kubernetes-model-admissionregistration-5.12.0.jar
-kubernetes-model-apiextensions/5.12.0//kubernetes-model-apiextensions-5.12.0.jar
-kubernetes-model-apps/5.12.0//kubernetes-model-apps-5.12.0.jar
-kubernetes-model-autoscaling/5.12.0//kubernetes-model-autoscaling-5.12.0.jar
-kubernetes-model-batch/5.12.0//kubernetes-model-batch-5.12.0.jar
-kubernetes-model-certificates/5.12.0//kubernetes-model-certificates-5.12.0.jar
-kubernetes-model-common/5.12.0//kubernetes-model-common-5.12.0.jar
-kubernetes-model-coordination/5.12.0//kubernetes-model-coordination-5.12.0.jar
-kubernetes-model-core/5.12.0//kubernetes-model-core-5.12.0.jar
-kubernetes-model-discovery/5.12.0//kubernetes-model-discovery-5.12.0.jar
-kubernetes-model-events/5.12.0//kubernetes-model-events-5.12.0.jar
-kubernetes-model-extensions/5.12.0//kubernetes-model-extensions-5.12.0.jar
-kubernetes-model-flowcontrol/5.12.0//kubernetes-model-flowcontrol-5.12.0.jar
-kubernetes-model-metrics/5.12.0//kubernetes-model-metrics-5.12.0.jar
-kubernetes-model-networking/5.12.0//kubernetes-model-networking-5.12.0.jar
-kubernetes-model-node/5.12.0//kubernetes-model-node-5.12.0.jar
-kubernetes-model-policy/5.12.0//kubernetes-model-policy-5.12.0.jar
-kubernetes-model-rbac/5.12.0//kubernetes-model-rbac-5.12.0.jar
-kubernetes-model-scheduling/5.12.0//kubernetes-model-scheduling-5.12.0.jar
-kubernetes-model-storageclass/5.12.0//kubernetes-model-storageclass-5.12.0.jar
+kubernetes-client/5.12.1//kubernetes-client-5.12.1.jar
+kubernetes-model-admissionregistration/5.12.1//kubernetes-model-admissionregistration-5.12.1.jar
+kubernetes-model-apiextensions/5.12.1//kubernetes-model-apiextensions-5.12.1.jar
+kubernetes-model-apps/5.12.1//kubernetes-model-apps-5.12.1.jar
+kubernetes-model-autoscaling/5.12.1//kubernetes-model-autoscaling-5.12.1.jar
+kubernetes-model-batch/5.12.1//kubernetes-model-batch-5.12.1.jar
+kubernetes-model-certificates/5.12.1//kubernetes-model-certificates-5.12.1.jar
+kubernetes-model-common/5.12.1//kubernetes-model-common-5.12.1.jar
+kubernetes-model-coordination/5.12.1//kubernetes-model-coordination-5.12.1.jar
+kubernetes-model-core/5.12.1//kubernetes-model-core-5.12.1.jar
+kubernetes-model-discovery/5.12.1//kubernetes-model-discovery-5.12.1.jar
+kubernetes-model-events/5.12.1//kubernetes-model-events-5.12.1.jar
+kubernetes-model-extensions/5.12.1//kubernetes-model-extensions-5.12.1.jar
+kubernetes-model-flowcontrol/5.12.1//kubernetes-model-flowcontrol-5.12.1.jar
+kubernetes-model-metrics/5.12.1//kubernetes-model-metrics-5.12.1.jar
+kubernetes-model-networking/5.12.1//kubernetes-model-networking-5.12.1.jar
+kubernetes-model-node/5.12.1//kubernetes-model-node-5.12.1.jar
+kubernetes-model-policy/5.12.1//kubernetes-model-policy-5.12.1.jar
+kubernetes-model-rbac/5.12.1//kubernetes-model-rbac-5.12.1.jar
+kubernetes-model-scheduling/5.12.1//kubernetes-model-scheduling-5.12.1.jar
+kubernetes-model-storageclass/5.12.1//kubernetes-model-storageclass-5.12.1.jar
 lapack/2.2.1//lapack-2.2.1.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index e140f6696f415..f01e4b5ffdaed 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -147,27 +147,27 @@ jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/1.7.32//jul-to-slf4j-1.7.32.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/5.12.0//kubernetes-client-5.12.0.jar
-kubernetes-model-admissionregistration/5.12.0//kubernetes-model-admissionregistration-5.12.0.jar
-kubernetes-model-apiextensions/5.12.0//kubernetes-model-apiextensions-5.12.0.jar
-kubernetes-model-apps/5.12.0//kubernetes-model-apps-5.12.0.jar
-kubernetes-model-autoscaling/5.12.0//kubernetes-model-autoscaling-5.12.0.jar
-kubernetes-model-batch/5.12.0//kubernetes-model-batch-5.12.0.jar
-kubernetes-model-certificates/5.12.0//kubernetes-model-certificates-5.12.0.jar
-kubernetes-model-common/5.12.0//kubernetes-model-common-5.12.0.jar
-kubernetes-model-coordination/5.12.0//kubernetes-model-coordination-5.12.0.jar
-kubernetes-model-core/5.12.0//kubernetes-model-core-5.12.0.jar
-kubernetes-model-discovery/5.12.0//kubernetes-model-discovery-5.12.0.jar
-kubernetes-model-events/5.12.0//kubernetes-model-events-5.12.0.jar
-kubernetes-model-extensions/5.12.0//kubernetes-model-extensions-5.12.0.jar
-kubernetes-model-flowcontrol/5.12.0//kubernetes-model-flowcontrol-5.12.0.jar
-kubernetes-model-metrics/5.12.0//kubernetes-model-metrics-5.12.0.jar
-kubernetes-model-networking/5.12.0//kubernetes-model-networking-5.12.0.jar
-kubernetes-model-node/5.12.0//kubernetes-model-node-5.12.0.jar
-kubernetes-model-policy/5.12.0//kubernetes-model-policy-5.12.0.jar
-kubernetes-model-rbac/5.12.0//kubernetes-model-rbac-5.12.0.jar
-kubernetes-model-scheduling/5.12.0//kubernetes-model-scheduling-5.12.0.jar
-kubernetes-model-storageclass/5.12.0//kubernetes-model-storageclass-5.12.0.jar
+kubernetes-client/5.12.1//kubernetes-client-5.12.1.jar
+kubernetes-model-admissionregistration/5.12.1//kubernetes-model-admissionregistration-5.12.1.jar
+kubernetes-model-apiextensions/5.12.1//kubernetes-model-apiextensions-5.12.1.jar
+kubernetes-model-apps/5.12.1//kubernetes-model-apps-5.12.1.jar
+kubernetes-model-autoscaling/5.12.1//kubernetes-model-autoscaling-5.12.1.jar
+kubernetes-model-batch/5.12.1//kubernetes-model-batch-5.12.1.jar
+kubernetes-model-certificates/5.12.1//kubernetes-model-certificates-5.12.1.jar
+kubernetes-model-common/5.12.1//kubernetes-model-common-5.12.1.jar
+kubernetes-model-coordination/5.12.1//kubernetes-model-coordination-5.12.1.jar
+kubernetes-model-core/5.12.1//kubernetes-model-core-5.12.1.jar
+kubernetes-model-discovery/5.12.1//kubernetes-model-discovery-5.12.1.jar
+kubernetes-model-events/5.12.1//kubernetes-model-events-5.12.1.jar
+kubernetes-model-extensions/5.12.1//kubernetes-model-extensions-5.12.1.jar
+kubernetes-model-flowcontrol/5.12.1//kubernetes-model-flowcontrol-5.12.1.jar
+kubernetes-model-metrics/5.12.1//kubernetes-model-metrics-5.12.1.jar
+kubernetes-model-networking/5.12.1//kubernetes-model-networking-5.12.1.jar
+kubernetes-model-node/5.12.1//kubernetes-model-node-5.12.1.jar
+kubernetes-model-policy/5.12.1//kubernetes-model-policy-5.12.1.jar
+kubernetes-model-rbac/5.12.1//kubernetes-model-rbac-5.12.1.jar
+kubernetes-model-scheduling/5.12.1//kubernetes-model-scheduling-5.12.1.jar
+kubernetes-model-storageclass/5.12.1//kubernetes-model-storageclass-5.12.1.jar
 lapack/2.2.1//lapack-2.2.1.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
diff --git a/pom.xml b/pom.xml
index 532d7d5529842..dfa34f44ce184 100644
--- a/pom.xml
+++ b/pom.xml
@@ -204,7 +204,7 @@
     <arrow.version>7.0.0</arrow.version>
     <!-- org.fusesource.leveldbjni will be used except on arm64 platform. -->
     <leveldbjni.group>org.fusesource.leveldbjni</leveldbjni.group>
-    <kubernetes-client.version>5.12.0</kubernetes-client.version>
+    <kubernetes-client.version>5.12.1</kubernetes-client.version>
 
     <test.java.home>${java.home}</test.java.home>
 

From 89799b867216ba2eb71e47049bbd6c92f5ee694e Mon Sep 17 00:00:00 2001
From: Johan Nystrom <johan@monomorphic.org>
Date: Mon, 28 Feb 2022 19:33:04 +0800
Subject: [PATCH 350/513] [SPARK-38042][SQL] Ensure that
 ScalaReflection.dataTypeFor works on aliased array types

An aliased array type in a product, in a Dataset or Dataframe, causes an exception:

```
type Data = Array[Long]
val xs:List[(Data,Int)] = List((Array(1),1), (Array(2),2))
sc.parallelize(xs).toDF("a", "b")
```

Causing

```
scala.MatchError: Data (of class scala.reflect.internal.Types$AliasNoArgsTypeRef)
 at org.apache.spark.sql.catalyst.ScalaReflection$.$anonfun$dataTypeFor$1(ScalaReflection.scala:104)
 at scala.reflect.internal.tpe.TypeConstraints$UndoLog.undo(TypeConstraints.scala:69)
 at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects(ScalaReflection.scala:904)
 at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects$(ScalaReflection.scala:903)
 at org.apache.spark.sql.catalyst.ScalaReflection$.cleanUpReflectionObjects(ScalaReflection.scala:49)
 at org.apache.spark.sql.catalyst.ScalaReflection$.dataTypeFor(ScalaReflection.scala:88)
 at org.apache.spark.sql.catalyst.ScalaReflection$.$anonfun$serializerFor$6(ScalaReflection.scala:573)
 at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)
 at scala.collection.immutable.List.foreach(List.scala:392)
 at scala.collection.TraversableLike.map(TraversableLike.scala:238)
 at scala.collection.TraversableLike.map$(TraversableLike.scala:231)
 at scala.collection.immutable.List.map(List.scala:298)
 at org.apache.spark.sql.catalyst.ScalaReflection$.$anonfun$serializerFor$1(ScalaReflection.scala:562)
 at scala.reflect.internal.tpe.TypeConstraints$UndoLog.undo(TypeConstraints.scala:69)
 at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects(ScalaReflection.scala:904)
 at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects$(ScalaReflection.scala:903)
 at org.apache.spark.sql.catalyst.ScalaReflection$.cleanUpReflectionObjects(ScalaReflection.scala:49)
 at org.apache.spark.sql.catalyst.ScalaReflection$.serializerFor(ScalaReflection.scala:432)
 at org.apache.spark.sql.catalyst.ScalaReflection$.$anonfun$serializerForType$1(ScalaReflection.scala:421)
 at scala.reflect.internal.tpe.TypeConstraints$UndoLog.undo(TypeConstraints.scala:69)
 at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects(ScalaReflection.scala:904)
 at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects$(ScalaReflection.scala:903)
 at org.apache.spark.sql.catalyst.ScalaReflection$.cleanUpReflectionObjects(ScalaReflection.scala:49)
 at org.apache.spark.sql.catalyst.ScalaReflection$.serializerForType(ScalaReflection.scala:413)
 at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$.apply(ExpressionEncoder.scala:55)
 at org.apache.spark.sql.Encoders$.product(Encoders.scala:285)
 at org.apache.spark.sql.LowPrioritySQLImplicits.newProductEncoder(SQLImplicits.scala:251)
 at org.apache.spark.sql.LowPrioritySQLImplicits.newProductEncoder$(SQLImplicits.scala:251)
 at org.apache.spark.sql.SQLImplicits.newProductEncoder(SQLImplicits.scala:32)
 ... 48 elided
```

It seems that this can be fixed by changing, in ScalaReflection.dataTypeFor:

```
val TypeRef(_, _, Seq(elementType)) = tpe
```

to

```
val TypeRef(_, _, Seq(elementType)) = tpe.dealias
```

### Why are the changes needed?

Without this change, any attempt to create datasets or dataframes using such types throws the exception above.

### Does this PR introduce _any_ user-facing change?

No, except for preventing this exception from being thrown.

### How was this patch tested?

Added a test to DatasetSuite

Closes #35370 from jtnystrom/spark-38042.

Lead-authored-by: Johan Nystrom <johan@monomorphic.org>
Co-authored-by: Johan Nystrom-Persson <johan@jnpersson.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/catalyst/ScalaReflection.scala   | 2 +-
 .../test/scala/org/apache/spark/sql/DatasetSuite.scala    | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 3fcbf2155f456..fced82c97b445 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -102,7 +102,7 @@ object ScalaReflection extends ScalaReflection {
         val className = getClassNameFromType(tpe)
         className match {
           case "scala.Array" =>
-            val TypeRef(_, _, Seq(elementType)) = tpe
+            val TypeRef(_, _, Seq(elementType)) = tpe.dealias
             arrayClassFor(elementType)
           case other =>
             val clazz = getClassFromType(tpe)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 2ce0754a5d1e7..c846441e9e009 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -48,10 +48,12 @@ object TestForTypeAlias {
   type TwoInt = (Int, Int)
   type ThreeInt = (TwoInt, Int)
   type SeqOfTwoInt = Seq[TwoInt]
+  type IntArray = Array[Int]
 
   def tupleTypeAlias: TwoInt = (1, 1)
   def nestedTupleTypeAlias: ThreeInt = ((1, 1), 2)
   def seqOfTupleTypeAlias: SeqOfTwoInt = Seq((1, 1), (2, 2))
+  def aliasedArrayInTuple: (Int, IntArray) = (1, Array(1))
 }
 
 class DatasetSuite extends QueryTest
@@ -1647,6 +1649,12 @@ class DatasetSuite extends QueryTest
       ("", Seq((1, 1), (2, 2))))
   }
 
+  test("SPARK-38042: Dataset should work with a product containing an aliased array type") {
+    checkDataset(
+      Seq(1).toDS().map(_ => ("", TestForTypeAlias.aliasedArrayInTuple)),
+      ("", (1, Array(1))))
+  }
+
   test("Check RelationalGroupedDataset toString: Single data") {
     val kvDataset = (1 to 3).toDF("id").groupBy("id")
     val expected = "RelationalGroupedDataset: [" +

From 50520fe3eb8237adefdb30938736536148110be1 Mon Sep 17 00:00:00 2001
From: yaohua <yaohua.zhao@databricks.com>
Date: Mon, 28 Feb 2022 19:47:17 +0800
Subject: [PATCH 351/513] [SPARK-38314][SQL] Fix of failing to read parquet
 files after writing the hidden file metadata in

### What changes were proposed in this pull request?
Selecting and then writing df containing hidden file metadata column `_metadata` into a file format like `parquet`, `delta` will still keep the internal `Attribute` metadata information. Then when reading those `parquet`, `delta` files again, it will actually break the code, because it wrongly thinks user data schema`_metadata` is a hidden file source metadata column.

```
// prepare a file source df
df.select("*", "_metadata").write.format("parquet").save(path)

spark.read.format("parquet").load(path).select("*").show()
```
This PR fixes this by cleaning up any remaining metadata information of output columns.

### Why are the changes needed?
Bugfix

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
A new UT

Closes #35650 from Yaohua628/spark-38314.

Authored-by: yaohua <yaohua.zhao@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/namedExpressions.scala        | 15 ++++++++
 .../datasources/FileFormatWriter.scala        | 14 +++++---
 .../datasources/FileMetadataStructSuite.scala | 36 +++++++++++++++++++
 3 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 248584b3d9dcd..d5df6a12aa45b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -475,4 +475,19 @@ object FileSourceMetadataAttribute {
           && attr.metadata.getBoolean(FILE_SOURCE_METADATA_COL_ATTR_KEY) => Some(attr)
       case _ => None
     }
+
+  /**
+   * Cleanup the internal metadata information of an attribute if it is
+   * a [[FileSourceMetadataAttribute]], it will remove both [[METADATA_COL_ATTR_KEY]] and
+   * [[FILE_SOURCE_METADATA_COL_ATTR_KEY]] from the attribute [[Metadata]]
+   */
+  def cleanupFileSourceMetadataInformation(attr: Attribute): Attribute = attr match {
+    case FileSourceMetadataAttribute(attr) => attr.withMetadata(
+      new MetadataBuilder().withMetadata(attr.metadata)
+        .remove(METADATA_COL_ATTR_KEY)
+        .remove(FILE_SOURCE_METADATA_COL_ATTR_KEY)
+        .build()
+    )
+    case attr => attr
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index 409e33448acf8..fe48ddcd1668e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -111,7 +111,11 @@ object FileFormatWriter extends Logging {
     FileOutputFormat.setOutputPath(job, new Path(outputSpec.outputPath))
 
     val partitionSet = AttributeSet(partitionColumns)
-    val dataColumns = outputSpec.outputColumns.filterNot(partitionSet.contains)
+    // cleanup the internal metadata information of
+    // the file source metadata attribute if any before write out
+    val finalOutputSpec = outputSpec.copy(outputColumns = outputSpec.outputColumns
+      .map(FileSourceMetadataAttribute.cleanupFileSourceMetadataInformation))
+    val dataColumns = finalOutputSpec.outputColumns.filterNot(partitionSet.contains)
 
     var needConvert = false
     val projectList: Seq[NamedExpression] = plan.output.map {
@@ -167,12 +171,12 @@ object FileFormatWriter extends Logging {
       uuid = UUID.randomUUID.toString,
       serializableHadoopConf = new SerializableConfiguration(job.getConfiguration),
       outputWriterFactory = outputWriterFactory,
-      allColumns = outputSpec.outputColumns,
+      allColumns = finalOutputSpec.outputColumns,
       dataColumns = dataColumns,
       partitionColumns = partitionColumns,
       bucketSpec = writerBucketSpec,
-      path = outputSpec.outputPath,
-      customPartitionLocations = outputSpec.customPartitionLocations,
+      path = finalOutputSpec.outputPath,
+      customPartitionLocations = finalOutputSpec.customPartitionLocations,
       maxRecordsPerFile = caseInsensitiveOptions.get("maxRecordsPerFile").map(_.toLong)
         .getOrElse(sparkSession.sessionState.conf.maxRecordsPerFile),
       timeZoneId = caseInsensitiveOptions.get(DateTimeUtils.TIMEZONE_OPTION)
@@ -212,7 +216,7 @@ object FileFormatWriter extends Logging {
         // the physical plan may have different attribute ids due to optimizer removing some
         // aliases. Here we bind the expression ahead to avoid potential attribute ids mismatch.
         val orderingExpr = bindReferences(
-          requiredOrdering.map(SortOrder(_, Ascending)), outputSpec.outputColumns)
+          requiredOrdering.map(SortOrder(_, Ascending)), finalOutputSpec.outputColumns)
         val sortPlan = SortExec(
           orderingExpr,
           global = false,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
index 0d391e0dcd5ff..175b42083f26a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
@@ -474,4 +474,40 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
       Seq(Row("jack", 24, 12345L, f0(METADATA_FILE_SIZE)))
     )
   }
+
+  metadataColumnsTest("write _metadata in parquet and read back", schema) { (df, f0, f1) =>
+    // SPARK-38314: Selecting and then writing df containing hidden file
+    // metadata column `_metadata` into parquet files will still keep the internal `Attribute`
+    // metadata information of the column. It will then fail when read again.
+    withTempDir { dir =>
+      df.select("*", "_metadata")
+        .write.format("parquet").save(dir.getCanonicalPath + "/new-data")
+
+      val newDF = spark.read.format("parquet").load(dir.getCanonicalPath + "/new-data")
+
+      // SELECT * will have: name, age, info, _metadata of f0 and f1
+      checkAnswer(
+        newDF.select("*"),
+        Seq(
+          Row("jack", 24, Row(12345L, "uom"),
+            Row(f0(METADATA_FILE_PATH), f0(METADATA_FILE_NAME),
+              f0(METADATA_FILE_SIZE), f0(METADATA_FILE_MODIFICATION_TIME))),
+          Row("lily", 31, Row(54321L, "ucb"),
+            Row(f1(METADATA_FILE_PATH), f1(METADATA_FILE_NAME),
+              f1(METADATA_FILE_SIZE), f1(METADATA_FILE_MODIFICATION_TIME)))
+        )
+      )
+
+      // SELECT _metadata won't override the existing user data (_metadata of f0 and f1)
+      checkAnswer(
+        newDF.select("_metadata"),
+        Seq(
+          Row(Row(f0(METADATA_FILE_PATH), f0(METADATA_FILE_NAME),
+            f0(METADATA_FILE_SIZE), f0(METADATA_FILE_MODIFICATION_TIME))),
+          Row(Row(f1(METADATA_FILE_PATH), f1(METADATA_FILE_NAME),
+            f1(METADATA_FILE_SIZE), f1(METADATA_FILE_MODIFICATION_TIME)))
+        )
+      )
+    }
+  }
 }

From 744a223164c2364b884e6f0df6a41cd8fc318bdc Mon Sep 17 00:00:00 2001
From: Yingyi Bu <yingyi.bu@databricks.com>
Date: Mon, 28 Feb 2022 21:14:52 +0800
Subject: [PATCH 352/513] [SPARK-38347][SQL] Fix nullability propagation in
 transformUpWithNewOutput

### What changes were proposed in this pull request?

In `updateAttr`, let the new Attribute have the same nullability as the Attribute to be replaced.

### Why are the changes needed?

`attrMap` can possibly be populated below an outer join and the outer join changes nullability.

### How was this patch tested?

New unit test - verified that it fails without the fix.

Closes #35677 from sigmod/nullability.

Authored-by: Yingyi Bu <yingyi.bu@databricks.com>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../spark/sql/catalyst/plans/QueryPlan.scala  |  8 ++++-
 .../sql/catalyst/plans/QueryPlanSuite.scala   | 31 +++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 58f2425e53706..5d749b8fc4b53 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -354,7 +354,13 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
   private def updateAttr(a: Attribute, attrMap: AttributeMap[Attribute]): Attribute = {
     attrMap.get(a) match {
       case Some(b) =>
-        AttributeReference(a.name, b.dataType, b.nullable, a.metadata)(b.exprId, a.qualifier)
+        // The new Attribute has to
+        // - use a.nullable, because nullability cannot be propagated bottom-up without considering
+        //   enclosed operators, e.g., operators such as Filters and Outer Joins can change
+        //   nullability;
+        // - use b.dataType because transformUpWithNewOutput is used in the Analyzer for resolution,
+        //   e.g., WidenSetOperationTypes uses it to propagate types bottom-up.
+        AttributeReference(a.name, b.dataType, a.nullable, a.metadata)(b.exprId, a.qualifier)
       case None => a
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/QueryPlanSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/QueryPlanSuite.scala
index fb014bb8391f3..0839092119da3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/QueryPlanSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/QueryPlanSuite.scala
@@ -129,4 +129,35 @@ class QueryPlanSuite extends SparkFunSuite {
     )
     assert(!nonDeterministicPlan.deterministic)
   }
+
+  test("SPARK-38347: Nullability propagation in transformUpWithNewOutput") {
+    // A test rule that replaces Attributes in Project's project list.
+    val testRule = new Rule[LogicalPlan] {
+      override def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithNewOutput {
+        case p @ Project(projectList, _) =>
+          val newProjectList = projectList.map {
+            case a: AttributeReference => a.newInstance()
+            case ne => ne
+          }
+          val newProject = p.copy(projectList = newProjectList)
+          newProject -> p.output.zip(newProject.output)
+      }
+    }
+
+    // Test a Left Outer Join plan in which right-hand-side input attributes are not nullable.
+    // Those attributes should be nullable after join even with a `transformUpWithNewOutput`
+    // started below the Left Outer join.
+    val t1 = LocalRelation('a.int.withNullability(false),
+      'b.int.withNullability(false), 'c.int.withNullability(false))
+    val t2 = LocalRelation('c.int.withNullability(false),
+      'd.int.withNullability(false), 'e.int.withNullability(false))
+    val plan = t1.select($"a", $"b")
+      .join(t2.select($"c", $"d"), LeftOuter, Some($"a" === $"c"))
+      .select($"a" + $"d").analyze
+    // The output Attribute of `plan` is nullable even though `d` is not nullable before the join.
+    assert(plan.output(0).nullable)
+    // The test rule with `transformUpWithNewOutput` should not change the nullability.
+    val planAfterTestRule = testRule(plan)
+    assert(planAfterTestRule.output(0).nullable)
+  }
 }

From 2f5cfb031935cd118182dc3ae37a465494735674 Mon Sep 17 00:00:00 2001
From: allisonwang-db <allison.wang@databricks.com>
Date: Mon, 28 Feb 2022 21:17:30 +0800
Subject: [PATCH 353/513] [SPARK-38180][SQL] Allow safe up-cast expressions in
 correlated equality predicates

### What changes were proposed in this pull request?

This PR relaxes the constraint added in [SPARK-35080](https://issues.apache.org/jira/browse/SPARK-35080) by allowing safe up-cast expressions in correlated equality predicates.

### Why are the changes needed?

Cast expressions are often added by the compiler during query analysis. Correlated equality predicates can be less restrictive to support this common pattern if a cast expression guarantees one-to-one mapping between the child expression and the output datatype (safe up-cast).

### Does this PR introduce _any_ user-facing change?

Yes. Safe up-cast expressions are allowed in correlated equality predicates:
```sql
SELECT (SELECT SUM(b) FROM VALUES (1, 1), (1, 2) t(a, b) WHERE CAST(a AS STRING) = x)
FROM VALUES ('1'), ('2') t(x)
```
Before this change, this query will throw AnalysisException "Correlated column is not allowed in predicate...", and after this change, this query can run successfully.

### How was this patch tested?

Unit tests.

Closes #35486 from allisonwang-db/spark-38180-cast-in-predicates.

Authored-by: allisonwang-db <allison.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala | 29 ++-----------------
 .../optimizer/DecorrelateInnerQuery.scala     | 24 ++++++++++-----
 .../analysis/AnalysisErrorSuite.scala         |  5 ++--
 .../org/apache/spark/sql/SubquerySuite.scala  | 26 +++++++++++++++++
 4 files changed, 48 insertions(+), 36 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 0bf748cdb8518..5c639d102688a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.optimizer.BooleanSimplification
+import org.apache.spark.sql.catalyst.optimizer.{BooleanSimplification, DecorrelateInnerQuery}
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
@@ -937,31 +937,6 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
       }
     }
 
-    def containsAttribute(e: Expression): Boolean = {
-      e.find(_.isInstanceOf[Attribute]).isDefined
-    }
-
-    // Given a correlated predicate, check if it is either a non-equality predicate or
-    // equality predicate that does not guarantee one-on-one mapping between inner and
-    // outer attributes. When the correlated predicate does not contain any attribute
-    // (i.e. only has outer references), it is supported and should return false. E.G.:
-    //   (a = outer(c)) -> false
-    //   (outer(c) = outer(d)) -> false
-    //   (a > outer(c)) -> true
-    //   (a + b = outer(c)) -> true
-    // The last one is true because there can be multiple combinations of (a, b) that
-    // satisfy the equality condition. For example, if outer(c) = 0, then both (0, 0)
-    // and (-1, 1) can make the predicate evaluate to true.
-    def isUnsupportedPredicate(condition: Expression): Boolean = condition match {
-      // Only allow equality condition with one side being an attribute and another
-      // side being an expression without attributes from the inner query. Note
-      // OuterReference is a leaf node and will not be found here.
-      case Equality(_: Attribute, b) => containsAttribute(b)
-      case Equality(a, _: Attribute) => containsAttribute(a)
-      case e @ Equality(_, _) => containsAttribute(e)
-      case _ => true
-    }
-
     val unsupportedPredicates = mutable.ArrayBuffer.empty[Expression]
 
     // Simplify the predicates before validating any unsupported correlation patterns in the plan.
@@ -1008,7 +983,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
       // The other operator is Join. Filter can be anywhere in a correlated subquery.
       case f: Filter =>
         val (correlated, _) = splitConjunctivePredicates(f.condition).partition(containsOuter)
-        unsupportedPredicates ++= correlated.filter(isUnsupportedPredicate)
+        unsupportedPredicates ++= correlated.filterNot(DecorrelateInnerQuery.canPullUpOverAgg)
         failOnInvalidOuterReference(f)
 
       // Aggregate cannot host any correlated expressions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala
index 49a159496d2c4..5ad70f0e30e41 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala
@@ -93,19 +93,29 @@ object DecorrelateInnerQuery extends PredicateHelper {
   /**
    * Check if an expression can be pulled up over an [[Aggregate]] without changing the
    * semantics of the plan. The expression must be an equality predicate that guarantees
-   * one-to-one mapping between inner and outer attributes. More specifically, one side
-   * of the predicate must be an attribute and another side of the predicate must not
-   * contain other attributes from the inner query.
+   * one-to-one mapping between inner and outer attributes.
    * For example:
    *   (a = outer(c)) -> true
    *   (a > outer(c)) -> false
    *   (a + b = outer(c)) -> false
    *   (a = outer(c) - b) -> false
    */
-  private def canPullUpOverAgg(expression: Expression): Boolean = expression match {
-    case Equality(_: Attribute, b) => !containsAttribute(b)
-    case Equality(a, _: Attribute) => !containsAttribute(a)
-    case o => !containsAttribute(o)
+  def canPullUpOverAgg(expression: Expression): Boolean = {
+    def isSupported(e: Expression): Boolean = e match {
+      case _: Attribute => true
+      // Allow Cast expressions that guarantee 1:1 mapping.
+      case Cast(a: Attribute, dataType, _, _) => Cast.canUpCast(a.dataType, dataType)
+      case _ => false
+    }
+
+    // Only allow equality condition with one side being an attribute or an expression that
+    // guarantees 1:1 mapping and another side being an expression without attributes from
+    // the inner query.
+    expression match {
+      case Equality(a, b) if isSupported(a) => !containsAttribute(b)
+      case Equality(a, b) if isSupported(b) => !containsAttribute(a)
+      case o => !containsAttribute(o)
+    }
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 683f004c61913..c69d51938aef0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -793,7 +793,8 @@ class AnalysisErrorSuite extends AnalysisTest {
     val a = AttributeReference("a", IntegerType)()
     val b = AttributeReference("b", IntegerType)()
     val c = AttributeReference("c", IntegerType)()
-    val t1 = LocalRelation(a, b)
+    val d = AttributeReference("d", DoubleType)()
+    val t1 = LocalRelation(a, b, d)
     val t2 = LocalRelation(c)
     val conditions = Seq(
       (abs($"a") === $"c", "abs(a) = outer(c)"),
@@ -801,7 +802,7 @@ class AnalysisErrorSuite extends AnalysisTest {
       ($"a" + 1 === $"c", "(a + 1) = outer(c)"),
       ($"a" + $"b" === $"c", "(a + b) = outer(c)"),
       ($"a" + $"c" === $"b", "(a + outer(c)) = b"),
-      (And($"a" === $"c", Cast($"a", IntegerType) === $"c"), "CAST(a AS INT) = outer(c)"))
+      (And($"a" === $"c", Cast($"d", IntegerType) === $"c"), "CAST(d AS INT) = outer(c)"))
     conditions.foreach { case (cond, msg) =>
       val plan = Project(
         ScalarSubquery(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index b14404b0c3a3a..92c373a33fb24 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -1992,4 +1992,30 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       }.getMessage.contains("Correlated column is not allowed in predicate"))
     }
   }
+
+  test("SPARK-38180: allow safe cast expressions in correlated equality conditions") {
+    withTempView("t1", "t2") {
+      Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t1")
+      Seq((0, 2), (0, 3)).toDF("c1", "c2").createOrReplaceTempView("t2")
+      checkAnswer(sql(
+        """
+          |SELECT (SELECT SUM(c2) FROM t2 WHERE c1 = a)
+          |FROM (SELECT CAST(c1 AS DOUBLE) a FROM t1)
+          |""".stripMargin),
+        Row(5) :: Row(null) :: Nil)
+      checkAnswer(sql(
+        """
+          |SELECT (SELECT SUM(c2) FROM t2 WHERE CAST(c1 AS STRING) = a)
+          |FROM (SELECT CAST(c1 AS STRING) a FROM t1)
+          |""".stripMargin),
+        Row(5) :: Row(null) :: Nil)
+      assert(intercept[AnalysisException] {
+        sql(
+          """
+            |SELECT (SELECT SUM(c2) FROM t2 WHERE CAST(c1 AS SHORT) = a)
+            |FROM (SELECT CAST(c1 AS SHORT) a FROM t1)
+            |""".stripMargin)
+      }.getMessage.contains("Correlated column is not allowed in predicate"))
+    }
+  }
 }

From 07a6f0b97c7696a213322c518a697aa234267d1d Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Mon, 28 Feb 2022 22:16:49 +0800
Subject: [PATCH 354/513] [SPARK-38343][SQL][TESTS] Fix SQLQuerySuite under
 ANSI mode

### What changes were proposed in this pull request?

Fix test failures of SQLQuerySuite under ANSI mode

### Why are the changes needed?

To set up a new GA test job with ANSI mode on

### Does this PR introduce _any_ user-facing change?

No
### How was this patch tested?

Manually turn on ANSI mode and test .
Also it should pass GA tests.

Closes #35674 from gengliangwang/fixSQLQuerySuite.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 198 ++++++++++--------
 1 file changed, 115 insertions(+), 83 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 974e489dcc0ad..326ea314ec68e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -69,8 +69,10 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       val queryCaseWhen = sql("select case when true then 1.0 else '1' end from src ")
       val queryCoalesce = sql("select coalesce(null, 1, '1') from src ")
 
-      checkAnswer(queryCaseWhen, Row("1.0") :: Nil)
-      checkAnswer(queryCoalesce, Row("1") :: Nil)
+      if (!conf.ansiEnabled) {
+        checkAnswer(queryCaseWhen, Row("1.0") :: Nil)
+        checkAnswer(queryCoalesce, Row("1") :: Nil)
+      }
     }
   }
 
@@ -393,10 +395,14 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       testCodeGen(
         "SELECT max(key), min(key), avg(key), count(key), count(distinct key) FROM testData3x",
         Row(100, 1, 50.5, 300, 100) :: Nil)
-      // Aggregate with Code generation handling all null values
-      testCodeGen(
-        "SELECT  sum('a'), avg('a'), count(null) FROM testData",
-        Row(null, null, 0) :: Nil)
+      // Aggregate with Code generation handling all null values.
+      // If ANSI mode is on, there will be an error since 'a' cannot converted as Numeric.
+      // Here we simply test it when ANSI mode is off.
+      if (!conf.ansiEnabled) {
+        testCodeGen(
+          "SELECT  sum('a'), avg('a'), count(null) FROM testData",
+          Row(null, null, 0) :: Nil)
+      }
     } finally {
       spark.catalog.dropTempView("testData3x")
     }
@@ -488,9 +494,11 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
         Seq(Row(Timestamp.valueOf("1969-12-31 16:00:00.001")),
           Row(Timestamp.valueOf("1969-12-31 16:00:00.002"))))
 
-      checkAnswer(sql(
-        "SELECT time FROM timestamps WHERE time='123'"),
-        Nil)
+      if (!conf.ansiEnabled) {
+        checkAnswer(sql(
+          "SELECT time FROM timestamps WHERE time='123'"),
+          Nil)
+      }
     }
   }
 
@@ -939,9 +947,13 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       Row(1, "A") :: Row(1, "a") :: Row(2, "B") :: Row(2, "b") :: Row(3, "C") :: Row(3, "c") ::
       Row(4, "D") :: Row(4, "d") :: Row(5, "E") :: Row(6, "F") :: Nil)
     // Column type mismatches are not allowed, forcing a type coercion.
-    checkAnswer(
-      sql("SELECT n FROM lowerCaseData UNION SELECT L FROM upperCaseData"),
-      ("1" :: "2" :: "3" :: "4" :: "A" :: "B" :: "C" :: "D" :: "E" :: "F" :: Nil).map(Row(_)))
+    // When ANSI mode is on, the String input will be cast as Int in the following Union, which will
+    // cause a runtime error. Here we simply test the case when ANSI mode is off.
+    if (!conf.ansiEnabled) {
+      checkAnswer(
+        sql("SELECT n FROM lowerCaseData UNION SELECT L FROM upperCaseData"),
+        ("1" :: "2" :: "3" :: "4" :: "A" :: "B" :: "C" :: "D" :: "E" :: "F" :: Nil).map(Row(_)))
+    }
     // Column type mismatches where a coercion is not possible, in this case between integer
     // and array types, trigger a TreeNodeException.
     intercept[AnalysisException] {
@@ -1038,32 +1050,35 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
         Row(Row(3, true), Map("C3" -> null)) ::
         Row(Row(4, true), Map("D4" -> 2147483644)) :: Nil)
 
-      checkAnswer(
-        sql("SELECT f1.f11, f2['D4'] FROM applySchema2"),
-        Row(1, null) ::
-        Row(2, null) ::
-        Row(3, null) ::
-        Row(4, 2147483644) :: Nil)
-
-      // The value of a MapType column can be a mutable map.
-      val rowRDD3 = unparsedStrings.map { r =>
-        val values = r.split(",").map(_.trim)
-        val v4 = try values(3).toInt catch {
-          case _: NumberFormatException => null
+      // If ANSI mode is on, there will be an error "Key D4 does not exist".
+      if (!conf.ansiEnabled) {
+        checkAnswer(
+          sql("SELECT f1.f11, f2['D4'] FROM applySchema2"),
+          Row(1, null) ::
+            Row(2, null) ::
+            Row(3, null) ::
+            Row(4, 2147483644) :: Nil)
+
+        // The value of a MapType column can be a mutable map.
+        val rowRDD3 = unparsedStrings.map { r =>
+          val values = r.split(",").map(_.trim)
+          val v4 = try values(3).toInt catch {
+            case _: NumberFormatException => null
+          }
+          Row(Row(values(0).toInt, values(2).toBoolean),
+            scala.collection.mutable.Map(values(1) -> v4))
         }
-        Row(Row(values(0).toInt, values(2).toBoolean),
-          scala.collection.mutable.Map(values(1) -> v4))
-      }
 
-      val df3 = spark.createDataFrame(rowRDD3, schema2)
-      df3.createOrReplaceTempView("applySchema3")
+        val df3 = spark.createDataFrame(rowRDD3, schema2)
+        df3.createOrReplaceTempView("applySchema3")
 
-      checkAnswer(
-        sql("SELECT f1.f11, f2['D4'] FROM applySchema3"),
-        Row(1, null) ::
-        Row(2, null) ::
-        Row(3, null) ::
-        Row(4, 2147483644) :: Nil)
+        checkAnswer(
+          sql("SELECT f1.f11, f2['D4'] FROM applySchema3"),
+          Row(1, null) ::
+            Row(2, null) ::
+            Row(3, null) ::
+            Row(4, 2147483644) :: Nil)
+      }
     }
   }
 
@@ -1403,22 +1418,25 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
   }
 
   test("SPARK-7952: fix the equality check between boolean and numeric types") {
-    withTempView("t") {
-      // numeric field i, boolean field j, result of i = j, result of i <=> j
-      Seq[(Integer, java.lang.Boolean, java.lang.Boolean, java.lang.Boolean)](
-        (1, true, true, true),
-        (0, false, true, true),
-        (2, true, false, false),
-        (2, false, false, false),
-        (null, true, null, false),
-        (null, false, null, false),
-        (0, null, null, false),
-        (1, null, null, false),
-        (null, null, null, true)
-      ).toDF("i", "b", "r1", "r2").createOrReplaceTempView("t")
-
-      checkAnswer(sql("select i = b from t"), sql("select r1 from t"))
-      checkAnswer(sql("select i <=> b from t"), sql("select r2 from t"))
+    // If ANSI mode is on, Spark disallows comparing Int with Boolean.
+    if (!conf.ansiEnabled) {
+      withTempView("t") {
+        // numeric field i, boolean field j, result of i = j, result of i <=> j
+        Seq[(Integer, java.lang.Boolean, java.lang.Boolean, java.lang.Boolean)](
+          (1, true, true, true),
+          (0, false, true, true),
+          (2, true, false, false),
+          (2, false, false, false),
+          (null, true, null, false),
+          (null, false, null, false),
+          (0, null, null, false),
+          (1, null, null, false),
+          (null, null, null, true)
+        ).toDF("i", "b", "r1", "r2").createOrReplaceTempView("t")
+
+        checkAnswer(sql("select i = b from t"), sql("select r1 from t"))
+        checkAnswer(sql("select i <=> b from t"), sql("select r2 from t"))
+      }
     }
   }
 
@@ -3137,16 +3155,20 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       checkAnswer(sql("select * from t1 where d >= '2000-01-01'"), Row(result))
       checkAnswer(sql("select * from t1 where d >= '2000-01-02'"), Nil)
       checkAnswer(sql("select * from t1 where '2000' >= d"), Row(result))
-      checkAnswer(sql("select * from t1 where d > '2000-13'"), Nil)
+      if (!conf.ansiEnabled) {
+        checkAnswer(sql("select * from t1 where d > '2000-13'"), Nil)
+      }
 
       withSQLConf(SQLConf.LEGACY_CAST_DATETIME_TO_STRING.key -> "true") {
         checkAnswer(sql("select * from t1 where d < '2000'"), Nil)
         checkAnswer(sql("select * from t1 where d < '2001'"), Row(result))
-        checkAnswer(sql("select * from t1 where d < '2000-1-1'"), Row(result))
         checkAnswer(sql("select * from t1 where d <= '1999'"), Nil)
         checkAnswer(sql("select * from t1 where d >= '2000'"), Row(result))
-        checkAnswer(sql("select * from t1 where d > '1999-13'"), Row(result))
-        checkAnswer(sql("select to_date('2000-01-01') > '1'"), Row(true))
+        if (!conf.ansiEnabled) {
+          checkAnswer(sql("select * from t1 where d < '2000-1-1'"), Row(result))
+          checkAnswer(sql("select * from t1 where d > '1999-13'"), Row(result))
+          checkAnswer(sql("select to_date('2000-01-01') > '1'"), Row(true))
+        }
       }
     }
   }
@@ -3179,17 +3201,21 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     checkAnswer(sql("select * from t1 where d >= '2000-01-01 01:10:00.000'"), Row(result))
     checkAnswer(sql("select * from t1 where d >= '2000-01-02 01:10:00.000'"), Nil)
     checkAnswer(sql("select * from t1 where '2000' >= d"), Nil)
-    checkAnswer(sql("select * from t1 where d > '2000-13'"), Nil)
+    if (!conf.ansiEnabled) {
+      checkAnswer(sql("select * from t1 where d > '2000-13'"), Nil)
+    }
 
     withSQLConf(SQLConf.LEGACY_CAST_DATETIME_TO_STRING.key -> "true") {
       checkAnswer(sql("select * from t1 where d < '2000'"), Nil)
       checkAnswer(sql("select * from t1 where d < '2001'"), Row(result))
-      checkAnswer(sql("select * from t1 where d <= '2000-1-1'"), Row(result))
       checkAnswer(sql("select * from t1 where d <= '2000-01-02'"), Row(result))
       checkAnswer(sql("select * from t1 where d <= '1999'"), Nil)
       checkAnswer(sql("select * from t1 where d >= '2000'"), Row(result))
-      checkAnswer(sql("select * from t1 where d > '1999-13'"), Row(result))
-      checkAnswer(sql("select to_timestamp('2000-01-01 01:10:00') > '1'"), Row(true))
+      if (!conf.ansiEnabled) {
+        checkAnswer(sql("select * from t1 where d <= '2000-1-1'"), Row(result))
+        checkAnswer(sql("select * from t1 where d > '1999-13'"), Row(result))
+        checkAnswer(sql("select to_timestamp('2000-01-01 01:10:00') > '1'"), Row(true))
+      }
     }
     sql("DROP VIEW t1")
   }
@@ -3254,28 +3280,31 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
   }
 
   test("SPARK-29213: FilterExec should not throw NPE") {
-    withTempView("t1", "t2", "t3") {
-      sql("SELECT ''").as[String].map(identity).toDF("x").createOrReplaceTempView("t1")
-      sql("SELECT * FROM VALUES 0, CAST(NULL AS BIGINT)")
-        .as[java.lang.Long]
-        .map(identity)
-        .toDF("x")
-        .createOrReplaceTempView("t2")
-      sql("SELECT ''").as[String].map(identity).toDF("x").createOrReplaceTempView("t3")
-      sql(
-        """
-          |SELECT t1.x
-          |FROM t1
-          |LEFT JOIN (
-          |    SELECT x FROM (
-          |        SELECT x FROM t2
-          |        UNION ALL
-          |        SELECT SUBSTR(x,5) x FROM t3
-          |    ) a
-          |    WHERE LENGTH(x)>0
-          |) t3
-          |ON t1.x=t3.x
+    // Under ANSI mode, casting string '' as numeric will cause runtime error
+    if (!conf.ansiEnabled) {
+      withTempView("t1", "t2", "t3") {
+        sql("SELECT ''").as[String].map(identity).toDF("x").createOrReplaceTempView("t1")
+        sql("SELECT * FROM VALUES 0, CAST(NULL AS BIGINT)")
+          .as[java.lang.Long]
+          .map(identity)
+          .toDF("x")
+          .createOrReplaceTempView("t2")
+        sql("SELECT ''").as[String].map(identity).toDF("x").createOrReplaceTempView("t3")
+        sql(
+          """
+            |SELECT t1.x
+            |FROM t1
+            |LEFT JOIN (
+            |    SELECT x FROM (
+            |        SELECT x FROM t2
+            |        UNION ALL
+            |        SELECT SUBSTR(x,5) x FROM t3
+            |    ) a
+            |    WHERE LENGTH(x)>0
+            |) t3
+            |ON t1.x=t3.x
         """.stripMargin).collect()
+      }
     }
   }
 
@@ -3295,7 +3324,6 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       sql("CREATE TEMPORARY VIEW tc AS SELECT * FROM VALUES(CAST(1 AS DOUBLE)) AS tc(id)")
       sql("CREATE TEMPORARY VIEW td AS SELECT * FROM VALUES(CAST(1 AS FLOAT)) AS td(id)")
       sql("CREATE TEMPORARY VIEW te AS SELECT * FROM VALUES(CAST(1 AS BIGINT)) AS te(id)")
-      sql("CREATE TEMPORARY VIEW tf AS SELECT * FROM VALUES(CAST(1 AS DECIMAL(38, 38))) AS tf(id)")
       val df1 = sql("SELECT id FROM ta WHERE id IN (SELECT id FROM tb)")
       checkAnswer(df1, Row(new java.math.BigDecimal(1)))
       val df2 = sql("SELECT id FROM ta WHERE id IN (SELECT id FROM tc)")
@@ -3304,8 +3332,12 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       checkAnswer(df3, Row(new java.math.BigDecimal(1)))
       val df4 = sql("SELECT id FROM ta WHERE id IN (SELECT id FROM te)")
       checkAnswer(df4, Row(new java.math.BigDecimal(1)))
-      val df5 = sql("SELECT id FROM ta WHERE id IN (SELECT id FROM tf)")
-      checkAnswer(df5, Array.empty[Row])
+      if (!conf.ansiEnabled) {
+        sql(
+          "CREATE TEMPORARY VIEW tf AS SELECT * FROM VALUES(CAST(1 AS DECIMAL(38, 38))) AS tf(id)")
+        val df5 = sql("SELECT id FROM ta WHERE id IN (SELECT id FROM tf)")
+        checkAnswer(df5, Array.empty[Row])
+      }
     }
   }
 

From 6df10ce3924581d4d71fad52b645b522150e76e2 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 28 Feb 2022 17:46:34 +0300
Subject: [PATCH 355/513] [SPARK-38332][SQL] Add the `DATEADD()` and
 `DATE_ADD()` aliases for `TIMESTAMPADD()`

### What changes were proposed in this pull request?
In the PR, I propose to add two aliases for the `TIMESTAMPADD()` function introduced by https://github.com/apache/spark/pull/35502:
- `DATEADD()`
- `DATE_ADD()`

### Why are the changes needed?
1. To make the migration process from other systems to Spark SQL easier.
2. To achieve feature parity with other DBMSs.

### Does this PR introduce _any_ user-facing change?
No. The new aliases just extend Spark SQL API.

### How was this patch tested?
1. By running the existing test suites:
```
$ build/sbt "test:testOnly *SQLKeywordSuite"
```
3. and new checks:
```
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z date.sql"
```

Closes #35661 from MaxGekk/dateadd.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 docs/sql-ref-ansi-compliance.md               |  2 +
 .../spark/sql/catalyst/parser/SqlBase.g4      |  8 +-
 .../test/resources/sql-tests/inputs/date.sql  | 12 +++
 .../sql-tests/results/ansi/date.sql.out       | 82 ++++++++++++++++++-
 .../resources/sql-tests/results/date.sql.out  | 82 ++++++++++++++++++-
 .../sql-tests/results/datetime-legacy.sql.out | 82 ++++++++++++++++++-
 6 files changed, 264 insertions(+), 4 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index d695693c24de4..c9b49724b6147 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -390,6 +390,8 @@ Below is a list of all the keywords in Spark SQL.
 |DATA|non-reserved|non-reserved|non-reserved|
 |DATABASE|non-reserved|non-reserved|non-reserved|
 |DATABASES|non-reserved|non-reserved|non-reserved|
+|DATEADD|non-reserved|non-reserved|non-reserved|
+|DATE_ADD|non-reserved|non-reserved|non-reserved|
 |DAY|non-reserved|non-reserved|non-reserved|
 |DBPROPERTIES|non-reserved|non-reserved|non-reserved|
 |DEFINED|non-reserved|non-reserved|non-reserved|
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index d44f508707681..1de4460d3e685 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -860,7 +860,7 @@ valueExpression
 
 primaryExpression
     : name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER)                                   #currentLike
-    | TIMESTAMPADD '(' unit=identifier ',' unitsAmount=valueExpression ',' timestamp=valueExpression ')'  #timestampadd
+    | name=(TIMESTAMPADD | DATEADD | DATE_ADD) '(' unit=identifier ',' unitsAmount=valueExpression ',' timestamp=valueExpression ')'  #timestampadd
     | CASE whenClause+ (ELSE elseExpression=expression)? END                                   #searchedCase
     | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END                  #simpleCase
     | name=(CAST | TRY_CAST) '(' expression AS dataType ')'                                    #cast
@@ -1130,6 +1130,8 @@ ansiNonReserved
     | DATA
     | DATABASE
     | DATABASES
+    | DATEADD
+    | DATE_ADD
     | DAY
     | DBPROPERTIES
     | DEFINED
@@ -1377,6 +1379,8 @@ nonReserved
     | DATA
     | DATABASE
     | DATABASES
+    | DATEADD
+    | DATE_ADD
     | DAY
     | DBPROPERTIES
     | DEFINED
@@ -1644,6 +1648,8 @@ DAY: 'DAY';
 DATA: 'DATA';
 DATABASE: 'DATABASE';
 DATABASES: 'DATABASES';
+DATEADD: 'DATEADD';
+DATE_ADD: 'DATE_ADD';
 DBPROPERTIES: 'DBPROPERTIES';
 DEFINED: 'DEFINED';
 DELETE: 'DELETE';
diff --git a/sql/core/src/test/resources/sql-tests/inputs/date.sql b/sql/core/src/test/resources/sql-tests/inputs/date.sql
index 57049eb461325..6fcba1de44dab 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/date.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/date.sql
@@ -140,3 +140,15 @@ select date '2012-01-01' - interval '2-2' year to month,
 select to_date('26/October/2015', 'dd/MMMMM/yyyy');
 select from_json('{"d":"26/October/2015"}', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'));
 select from_csv('26/October/2015', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'));
+
+-- Add a number of units to a timestamp or a date
+select dateadd(MICROSECOND, 1001, timestamp'2022-02-25 01:02:03.123');
+select date_add(MILLISECOND, -1, timestamp'2022-02-25 01:02:03.456');
+select dateadd(SECOND, 58, timestamp'2022-02-25 01:02:03');
+select date_add(MINUTE, -100, date'2022-02-25');
+select dateadd(HOUR, -1, timestamp'2022-02-25 01:02:03');
+select date_add(DAY, 367, date'2022-02-25');
+select dateadd(WEEK, -4, timestamp'2022-02-25 01:02:03');
+select date_add(MONTH, -1, timestamp'2022-02-25 01:02:03');
+select dateadd(QUARTER, 5, date'2022-02-25');
+select date_add(YEAR, 1, date'2022-02-25');
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
index 151dc3340610f..07989aeae17a2 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 77
+-- Number of queries: 87
 
 
 -- !query
@@ -660,3 +660,83 @@ struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
 You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+
+
+-- !query
+select dateadd(MICROSECOND, 1001, timestamp'2022-02-25 01:02:03.123')
+-- !query schema
+struct<timestampadd(MICROSECOND, 1001, TIMESTAMP '2022-02-25 01:02:03.123'):timestamp>
+-- !query output
+2022-02-25 01:02:03.124001
+
+
+-- !query
+select date_add(MILLISECOND, -1, timestamp'2022-02-25 01:02:03.456')
+-- !query schema
+struct<timestampadd(MILLISECOND, -1, TIMESTAMP '2022-02-25 01:02:03.456'):timestamp>
+-- !query output
+2022-02-25 01:02:03.455
+
+
+-- !query
+select dateadd(SECOND, 58, timestamp'2022-02-25 01:02:03')
+-- !query schema
+struct<timestampadd(SECOND, 58, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+-- !query output
+2022-02-25 01:03:01
+
+
+-- !query
+select date_add(MINUTE, -100, date'2022-02-25')
+-- !query schema
+struct<timestampadd(MINUTE, -100, DATE '2022-02-25'):timestamp>
+-- !query output
+2022-02-24 22:20:00
+
+
+-- !query
+select dateadd(HOUR, -1, timestamp'2022-02-25 01:02:03')
+-- !query schema
+struct<timestampadd(HOUR, -1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+-- !query output
+2022-02-25 00:02:03
+
+
+-- !query
+select date_add(DAY, 367, date'2022-02-25')
+-- !query schema
+struct<timestampadd(DAY, 367, DATE '2022-02-25'):timestamp>
+-- !query output
+2023-02-27 00:00:00
+
+
+-- !query
+select dateadd(WEEK, -4, timestamp'2022-02-25 01:02:03')
+-- !query schema
+struct<timestampadd(WEEK, -4, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+-- !query output
+2022-01-28 01:02:03
+
+
+-- !query
+select date_add(MONTH, -1, timestamp'2022-02-25 01:02:03')
+-- !query schema
+struct<timestampadd(MONTH, -1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+-- !query output
+2022-01-25 01:02:03
+
+
+-- !query
+select dateadd(QUARTER, 5, date'2022-02-25')
+-- !query schema
+struct<timestampadd(QUARTER, 5, DATE '2022-02-25'):timestamp>
+-- !query output
+2023-05-25 00:00:00
+
+
+-- !query
+select date_add(YEAR, 1, date'2022-02-25')
+-- !query schema
+struct<timestampadd(YEAR, 1, DATE '2022-02-25'):timestamp>
+-- !query output
+2023-02-25 00:00:00
diff --git a/sql/core/src/test/resources/sql-tests/results/date.sql.out b/sql/core/src/test/resources/sql-tests/results/date.sql.out
index 562028945103e..e3a2d7d00f6f0 100644
--- a/sql/core/src/test/resources/sql-tests/results/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/date.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 77
+-- Number of queries: 87
 
 
 -- !query
@@ -659,3 +659,83 @@ struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
 You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+
+
+-- !query
+select dateadd(MICROSECOND, 1001, timestamp'2022-02-25 01:02:03.123')
+-- !query schema
+struct<timestampadd(MICROSECOND, 1001, TIMESTAMP '2022-02-25 01:02:03.123'):timestamp>
+-- !query output
+2022-02-25 01:02:03.124001
+
+
+-- !query
+select date_add(MILLISECOND, -1, timestamp'2022-02-25 01:02:03.456')
+-- !query schema
+struct<timestampadd(MILLISECOND, -1, TIMESTAMP '2022-02-25 01:02:03.456'):timestamp>
+-- !query output
+2022-02-25 01:02:03.455
+
+
+-- !query
+select dateadd(SECOND, 58, timestamp'2022-02-25 01:02:03')
+-- !query schema
+struct<timestampadd(SECOND, 58, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+-- !query output
+2022-02-25 01:03:01
+
+
+-- !query
+select date_add(MINUTE, -100, date'2022-02-25')
+-- !query schema
+struct<timestampadd(MINUTE, -100, DATE '2022-02-25'):timestamp>
+-- !query output
+2022-02-24 22:20:00
+
+
+-- !query
+select dateadd(HOUR, -1, timestamp'2022-02-25 01:02:03')
+-- !query schema
+struct<timestampadd(HOUR, -1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+-- !query output
+2022-02-25 00:02:03
+
+
+-- !query
+select date_add(DAY, 367, date'2022-02-25')
+-- !query schema
+struct<timestampadd(DAY, 367, DATE '2022-02-25'):timestamp>
+-- !query output
+2023-02-27 00:00:00
+
+
+-- !query
+select dateadd(WEEK, -4, timestamp'2022-02-25 01:02:03')
+-- !query schema
+struct<timestampadd(WEEK, -4, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+-- !query output
+2022-01-28 01:02:03
+
+
+-- !query
+select date_add(MONTH, -1, timestamp'2022-02-25 01:02:03')
+-- !query schema
+struct<timestampadd(MONTH, -1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+-- !query output
+2022-01-25 01:02:03
+
+
+-- !query
+select dateadd(QUARTER, 5, date'2022-02-25')
+-- !query schema
+struct<timestampadd(QUARTER, 5, DATE '2022-02-25'):timestamp>
+-- !query output
+2023-05-25 00:00:00
+
+
+-- !query
+select date_add(YEAR, 1, date'2022-02-25')
+-- !query schema
+struct<timestampadd(YEAR, 1, DATE '2022-02-25'):timestamp>
+-- !query output
+2023-02-25 00:00:00
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index e38df80819fb3..a96fb65579de8 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 170
+-- Number of queries: 180
 
 
 -- !query
@@ -658,6 +658,86 @@ struct<from_csv(26/October/2015):struct<d:date>>
 {"d":2015-10-26}
 
 
+-- !query
+select dateadd(MICROSECOND, 1001, timestamp'2022-02-25 01:02:03.123')
+-- !query schema
+struct<timestampadd(MICROSECOND, 1001, TIMESTAMP '2022-02-25 01:02:03.123'):timestamp>
+-- !query output
+2022-02-25 01:02:03.124001
+
+
+-- !query
+select date_add(MILLISECOND, -1, timestamp'2022-02-25 01:02:03.456')
+-- !query schema
+struct<timestampadd(MILLISECOND, -1, TIMESTAMP '2022-02-25 01:02:03.456'):timestamp>
+-- !query output
+2022-02-25 01:02:03.455
+
+
+-- !query
+select dateadd(SECOND, 58, timestamp'2022-02-25 01:02:03')
+-- !query schema
+struct<timestampadd(SECOND, 58, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+-- !query output
+2022-02-25 01:03:01
+
+
+-- !query
+select date_add(MINUTE, -100, date'2022-02-25')
+-- !query schema
+struct<timestampadd(MINUTE, -100, DATE '2022-02-25'):timestamp>
+-- !query output
+2022-02-24 22:20:00
+
+
+-- !query
+select dateadd(HOUR, -1, timestamp'2022-02-25 01:02:03')
+-- !query schema
+struct<timestampadd(HOUR, -1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+-- !query output
+2022-02-25 00:02:03
+
+
+-- !query
+select date_add(DAY, 367, date'2022-02-25')
+-- !query schema
+struct<timestampadd(DAY, 367, DATE '2022-02-25'):timestamp>
+-- !query output
+2023-02-27 00:00:00
+
+
+-- !query
+select dateadd(WEEK, -4, timestamp'2022-02-25 01:02:03')
+-- !query schema
+struct<timestampadd(WEEK, -4, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+-- !query output
+2022-01-28 01:02:03
+
+
+-- !query
+select date_add(MONTH, -1, timestamp'2022-02-25 01:02:03')
+-- !query schema
+struct<timestampadd(MONTH, -1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+-- !query output
+2022-01-25 01:02:03
+
+
+-- !query
+select dateadd(QUARTER, 5, date'2022-02-25')
+-- !query schema
+struct<timestampadd(QUARTER, 5, DATE '2022-02-25'):timestamp>
+-- !query output
+2023-05-25 00:00:00
+
+
+-- !query
+select date_add(YEAR, 1, date'2022-02-25')
+-- !query schema
+struct<timestampadd(YEAR, 1, DATE '2022-02-25'):timestamp>
+-- !query output
+2023-02-25 00:00:00
+
+
 -- !query
 select timestamp '2019-01-01\t'
 -- !query schema

From 6aa83e7a42555982ea80e16812bed65bc16617a3 Mon Sep 17 00:00:00 2001
From: qitao liu <liuqitao@haizhi.com>
Date: Mon, 28 Feb 2022 23:48:12 +0900
Subject: [PATCH 356/513] =?UTF-8?q?[SPARK-38033][SS]=20The=20SS=20processi?=
 =?UTF-8?q?ng=20cannot=20be=20started=20because=20the=20com=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

The code of method: populateStartOffsets in class: org.apache.spark.sql.execution.streaming.MicroBatchExecution is modified.

### Why are the changes needed?

In some unexpected cases, commit and offset are inconsistent, and offset is not written into HDFS continuously, as follows:

        commits
        /tmp/streaming_xxxxxxxx/commits/113256
        /tmp/streaming_xxxxxxxx/commits/113257

        offsets
        /tmp/streaming_xxxxxxxx/offsets/113257
        /tmp/streaming_xxxxxxxx/offsets/113259
When we start the streaming program, batch ${latestBatchId - 1} is 113258, but offsets 113258 doesn't exist, an exception will be thrown, resulting in the program cannot be started. As an improvement, Spark doesn‘t need to repair itself, but we could probably do some simply analysis and give better error message.

### Does this PR introduce _any_ user-facing change?

Yes.
An error message is logged if the exception is thrown.

### How was this patch tested?

I have provided a test case that can output logs correctly.
We can run test("SPARK-38033: SS cannot be....") in the MicroBatchExecutionSuite class. In fact, I simulated a corresponding scenario to test the original exception.This exception validates normally and outputs a new error message, as follows:

    11:00:26.271 WARN org.apache.spark.sql.execution.streaming.ResolveWriteToStream: spark.sql.adaptive.enabled is not supported in streaming DataFrames/Datasets and will be disabled.
    11:00:26.675 ERROR org.apache.spark.sql.execution.streaming.MicroBatchExecution: The offset log for batch 3 doesn't exist, which is required to restart the query from the latest batch 4 from the offset log. Please ensure there are two subsequent offset logs available for the latest batch via manually deleting the offset file(s). Please also ensure the latest batch for commit log is equal or one batch earlier than the latest batch for offset log.
    11:00:26.690 ERROR org.apache.spark.sql.execution.streaming.MicroBatchExecution: Query [id = d4358946-170c-49a7-823b-d8e4e9126616, runId = 9e7f12b8-6c10-4f36-b5c5-136e1bace8de] terminated with error
    java.lang.IllegalStateException: batch 3 doesn't exist
	    at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$populateStartOffsets$1(MicroBatchExecution.scala:338) ~[classes/:?]
	    at scala.Option.getOrElse(Option.scala:189) ~[scala-library-2.12.15.jar:?]
	    at org.apache.spark.sql.execution.streaming.MicroBatchExecution.populateStartOffsets(MicroBatchExecution.scala:331) ~[classes/:?]
	    at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$2(MicroBatchExecution.scala:222) ~[classes/:?]
	    at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) ~[scala-library-2.12.15.jar:?]
	    at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375) ~[classes/:?]
	    at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373) ~[classes/:?]

Authored-by: LeeeeLiu liuqt1024gmail.com

Closes #35513 from LeeeeLiu/SPARK-38033-m.

Authored-by: qitao liu <liuqitao@haizhi.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../streaming/MicroBatchExecution.scala       |  6 ++++
 .../commits/0                                 |  2 ++
 .../commits/1                                 |  2 ++
 .../commits/2                                 |  2 ++
 .../metadata                                  |  1 +
 .../offsets/0                                 |  3 ++
 .../offsets/1                                 |  3 ++
 .../offsets/2                                 |  3 ++
 .../offsets/4                                 |  3 ++
 .../streaming/MicroBatchExecutionSuite.scala  | 29 +++++++++++++++++--
 10 files changed, 51 insertions(+), 3 deletions(-)
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/commits/0
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/commits/1
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/commits/2
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/metadata
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/offsets/0
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/offsets/1
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/offsets/2
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/offsets/4

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index b5667ee398d65..fb434f488361a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -315,6 +315,12 @@ class MicroBatchExecution(
          * is the second latest batch id in the offset log. */
         if (latestBatchId != 0) {
           val secondLatestOffsets = offsetLog.get(latestBatchId - 1).getOrElse {
+            logError(s"The offset log for batch ${latestBatchId - 1} doesn't exist, " +
+              s"which is required to restart the query from the latest batch $latestBatchId " +
+              "from the offset log. Please ensure there are two subsequent offset logs " +
+              "available for the latest batch via manually deleting the offset file(s). " +
+              "Please also ensure the latest batch for commit log is equal or one batch " +
+              "earlier than the latest batch for offset log.")
             throw new IllegalStateException(s"batch ${latestBatchId - 1} doesn't exist")
           }
           committedOffsets = secondLatestOffsets.toStreamProgress(sources)
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/commits/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/commits/1
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/commits/1
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/commits/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/commits/2
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/commits/2
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/metadata
new file mode 100644
index 0000000000000..4691bccd0a792
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/metadata
@@ -0,0 +1 @@
+{"id":"d4358946-170c-49a7-823b-d8e4e9126616"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/offsets/0
new file mode 100644
index 0000000000000..807d7b0063b96
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1531292029003,"conf":{"spark.sql.shuffle.partitions":"5","spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider"}}
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/offsets/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/offsets/1
new file mode 100644
index 0000000000000..cce541073fb4b
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/offsets/1
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":5000,"batchTimestampMs":1531292030005,"conf":{"spark.sql.shuffle.partitions":"5","spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider"}}
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/offsets/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/offsets/2
new file mode 100644
index 0000000000000..dd9a1936aba55
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/offsets/2
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":5000,"batchTimestampMs":1531292030005,"conf":{"spark.sql.shuffle.partitions":"5","spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider"}}
+2
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/offsets/4 b/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/offsets/4
new file mode 100644
index 0000000000000..54a6fecef7d52
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/offsets/4
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":5000,"batchTimestampMs":1531292030005,"conf":{"spark.sql.shuffle.partitions":"5","spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider"}}
+4
\ No newline at end of file
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala
index a508f923ffa13..53ef9dfbe39fa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.sql.execution.streaming
 
+import java.io.File
+
+import org.apache.commons.io.FileUtils
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
@@ -24,8 +27,9 @@ import org.apache.spark.sql.catalyst.plans.logical.Range
 import org.apache.spark.sql.connector.read.streaming
 import org.apache.spark.sql.connector.read.streaming.SparkDataStream
 import org.apache.spark.sql.functions.{count, timestamp_seconds, window}
-import org.apache.spark.sql.streaming.StreamTest
+import org.apache.spark.sql.streaming.{StreamTest, Trigger}
 import org.apache.spark.sql.types.{LongType, StructType}
+import org.apache.spark.util.Utils
 
 class MicroBatchExecutionSuite extends StreamTest with BeforeAndAfter {
 
@@ -74,6 +78,27 @@ class MicroBatchExecutionSuite extends StreamTest with BeforeAndAfter {
     )
   }
 
+  test("SPARK-38033: SS cannot be started because the commitId and offsetId are inconsistent") {
+    val inputData = MemoryStream[Int]
+    val streamEvent = inputData.toDF().select("value")
+
+    val resourceUri = this.getClass.getResource(
+      "/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/").toURI
+
+    val checkpointDir = Utils.createTempDir().getCanonicalFile
+    // Copy the checkpoint to a temp dir to prevent changes to the original.
+    // Not doing this will lead to the test passing on the first run, but fail subsequent runs.
+    FileUtils.copyDirectory(new File(resourceUri), checkpointDir)
+
+    testStream(streamEvent) (
+      AddData(inputData, 1, 2, 3, 4, 5, 6),
+      StartStream(Trigger.Once, checkpointLocation = checkpointDir.getAbsolutePath),
+      ExpectFailure[IllegalStateException] { e =>
+        assert(e.getMessage.contains("batch 3 doesn't exist"))
+      }
+    )
+  }
+
   test("no-data-batch re-executed after restart should call V1 source.getBatch()") {
     val testSource = ReExecutedBatchTestSource(spark)
     val df = testSource.toDF()
@@ -153,7 +178,6 @@ class MicroBatchExecutionSuite extends StreamTest with BeforeAndAfter {
     )
   }
 
-
   case class ReExecutedBatchTestSource(spark: SparkSession) extends Source {
     @volatile var currentOffset = 0L
     @volatile var getBatchCallCount = 0
@@ -191,4 +215,3 @@ class MicroBatchExecutionSuite extends StreamTest with BeforeAndAfter {
     }
   }
 }
-

From 02aa6a070f42c79e60ee48e20be711bb52b6d4cd Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Tue, 1 Mar 2022 09:42:54 +0800
Subject: [PATCH 357/513] [SPARK-38352][SQL] Fix
 DataFrameAggregateSuite/DataFrameSetOperationsSuite/DataFrameWindowFunctionsSuite
 under ANSI mode

### What changes were proposed in this pull request?

Fix the following test suites under ANSI mode:

- DataFrameAggregateSuite
- DataFrameNaFunctionsSuite
- DataFrameRangeSuite
- DataFrameSetOperationsSuite
- DataFrameWindowFunctionsSuite
- TwoLevelAggregateHashMapSuite
- TwoLevelAggregateHashMapWithVectorizedMapSuite

### Why are the changes needed?

To set up a new GA test job with ANSI mode on

### Does this PR introduce _any_ user-facing change?

No
### How was this patch tested?

Manually turn on ANSI mode and test .
Also it should pass GA tests.

Closes #35682 from gengliangwang/fixAnsi.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../spark/sql/DataFrameAggregateSuite.scala   | 20 ++++++----
 .../spark/sql/DataFrameNaFunctionsSuite.scala | 28 +++++++------
 .../spark/sql/DataFrameRangeSuite.scala       | 16 ++++----
 .../sql/DataFrameSetOperationsSuite.scala     | 37 ++++++++---------
 .../sql/DataFrameWindowFunctionsSuite.scala   | 40 ++++++++++---------
 5 files changed, 79 insertions(+), 62 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 42293bcd1f35a..157736f9777e6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -1025,11 +1025,15 @@ class DataFrameAggregateSuite extends QueryTest
         sql("SELECT x FROM tempView GROUP BY x HAVING COUNT_IF(NULL) > 0"),
         Nil)
 
-      val error = intercept[AnalysisException] {
-        sql("SELECT COUNT_IF(x) FROM tempView")
+      // When ANSI mode is on, it will implicit cast the string as boolean and throw a runtime
+      // error. Here we simply test with ANSI mode off.
+      if (!conf.ansiEnabled) {
+        val error = intercept[AnalysisException] {
+          sql("SELECT COUNT_IF(x) FROM tempView")
+        }
+        assert(error.message.contains("cannot resolve 'count_if(tempview.x)' due to data type " +
+          "mismatch: argument 1 requires boolean type, however, 'tempview.x' is of string type"))
       }
-      assert(error.message.contains("cannot resolve 'count_if(tempview.x)' due to data type " +
-        "mismatch: argument 1 requires boolean type, however, 'tempview.x' is of string type"))
     }
   }
 
@@ -1136,9 +1140,11 @@ class DataFrameAggregateSuite extends QueryTest
     val mapDF = Seq(Tuple1(Map("a" -> "a"))).toDF("col")
     checkAnswer(mapDF.groupBy(struct($"col.a")).count().select("count"), Row(1))
 
-    val nonStringMapDF = Seq(Tuple1(Map(1 -> 1))).toDF("col")
-    // Spark implicit casts string literal "a" to int to match the key type.
-    checkAnswer(nonStringMapDF.groupBy(struct($"col.a")).count().select("count"), Row(1))
+    if (!conf.ansiEnabled) {
+      val nonStringMapDF = Seq(Tuple1(Map(1 -> 1))).toDF("col")
+      // Spark implicit casts string literal "a" to int to match the key type.
+      checkAnswer(nonStringMapDF.groupBy(struct($"col.a")).count().select("count"), Row(1))
+    }
 
     val arrayDF = Seq(Tuple1(Seq(1))).toDF("col")
     val e = intercept[AnalysisException](arrayDF.groupBy(struct($"col.a")).count())
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
index 20ae995af628b..8dbc57c0429c5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
@@ -444,21 +444,25 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
   }
 
   test("replace float with nan") {
-    checkAnswer(
-      createNaNDF().na.replace("*", Map(
-        1.0f -> Float.NaN
-      )),
-      Row(0, 0L, 0.toShort, 0.toByte, Float.NaN, Double.NaN) ::
-      Row(0, 0L, 0.toShort, 0.toByte, Float.NaN, Double.NaN) :: Nil)
+    if (!conf.ansiEnabled) {
+      checkAnswer(
+        createNaNDF().na.replace("*", Map(
+          1.0f -> Float.NaN
+        )),
+        Row(0, 0L, 0.toShort, 0.toByte, Float.NaN, Double.NaN) ::
+          Row(0, 0L, 0.toShort, 0.toByte, Float.NaN, Double.NaN) :: Nil)
+    }
   }
 
   test("replace double with nan") {
-    checkAnswer(
-      createNaNDF().na.replace("*", Map(
-        1.0 -> Double.NaN
-      )),
-      Row(0, 0L, 0.toShort, 0.toByte, Float.NaN, Double.NaN) ::
-      Row(0, 0L, 0.toShort, 0.toByte, Float.NaN, Double.NaN) :: Nil)
+    if (!conf.ansiEnabled) {
+      checkAnswer(
+        createNaNDF().na.replace("*", Map(
+          1.0 -> Double.NaN
+        )),
+        Row(0, 0L, 0.toShort, 0.toByte, Float.NaN, Double.NaN) ::
+          Row(0, 0L, 0.toShort, 0.toByte, Float.NaN, Double.NaN) :: Nil)
+    }
   }
 
   test("SPARK-34417: test fillMap() for column with a dot in the name") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameRangeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameRangeSuite.scala
index fc549e307c80f..917f80e58108e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameRangeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameRangeSuite.scala
@@ -63,13 +63,15 @@ class DataFrameRangeSuite extends QueryTest with SharedSparkSession with Eventua
     val res7 = spark.range(-10, -9, -20, 1).select("id")
     assert(res7.count == 0)
 
-    val res8 = spark.range(Long.MinValue, Long.MaxValue, Long.MaxValue, 100).select("id")
-    assert(res8.count == 3)
-    assert(res8.agg(sum("id")).as("sumid").collect() === Seq(Row(-3)))
-
-    val res9 = spark.range(Long.MaxValue, Long.MinValue, Long.MinValue, 100).select("id")
-    assert(res9.count == 2)
-    assert(res9.agg(sum("id")).as("sumid").collect() === Seq(Row(Long.MaxValue - 1)))
+    if (!conf.ansiEnabled) {
+      val res8 = spark.range(Long.MinValue, Long.MaxValue, Long.MaxValue, 100).select("id")
+      assert(res8.count == 3)
+      assert(res8.agg(sum("id")).as("sumid").collect() === Seq(Row(-3)))
+
+      val res9 = spark.range(Long.MaxValue, Long.MinValue, Long.MinValue, 100).select("id")
+      assert(res9.count == 2)
+      assert(res9.agg(sum("id")).as("sumid").collect() === Seq(Row(Long.MaxValue - 1)))
+    }
 
     // only end provided as argument
     val res10 = spark.range(10).select("id")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
index 19a62c25f5c5b..ca04adf642e15 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
@@ -341,7 +341,7 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
     ).toDF("date", "timestamp", "decimal")
 
     val widenTypedRows = Seq(
-      (new Timestamp(2), 10.5D, "string")
+      (new Timestamp(2), 10.5D, "2021-01-01 00:00:00")
     ).toDF("date", "timestamp", "decimal")
 
     dates.union(widenTypedRows).collect()
@@ -538,24 +538,25 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
   }
 
   test("union by name - type coercion") {
-    var df1 = Seq((1, "a")).toDF("c0", "c1")
-    var df2 = Seq((3, 1L)).toDF("c1", "c0")
-    checkAnswer(df1.unionByName(df2), Row(1L, "a") :: Row(1L, "3") :: Nil)
-
-    df1 = Seq((1, 1.0)).toDF("c0", "c1")
-    df2 = Seq((8L, 3.0)).toDF("c1", "c0")
+    var df1 = Seq((1, 1.0)).toDF("c0", "c1")
+    var df2 = Seq((8L, 3.0)).toDF("c1", "c0")
     checkAnswer(df1.unionByName(df2), Row(1.0, 1.0) :: Row(3.0, 8.0) :: Nil)
-
-    df1 = Seq((2.0f, 7.4)).toDF("c0", "c1")
-    df2 = Seq(("a", 4.0)).toDF("c1", "c0")
-    checkAnswer(df1.unionByName(df2), Row(2.0, "7.4") :: Row(4.0, "a") :: Nil)
-
-    df1 = Seq((1, "a", 3.0)).toDF("c0", "c1", "c2")
-    df2 = Seq((1.2, 2, "bc")).toDF("c2", "c0", "c1")
-    val df3 = Seq(("def", 1.2, 3)).toDF("c1", "c2", "c0")
-    checkAnswer(df1.unionByName(df2.unionByName(df3)),
-      Row(1, "a", 3.0) :: Row(2, "bc", 1.2) :: Row(3, "def", 1.2) :: Nil
-    )
+    if (!conf.ansiEnabled) {
+      df1 = Seq((1, "a")).toDF("c0", "c1")
+      df2 = Seq((3, 1L)).toDF("c1", "c0")
+      checkAnswer(df1.unionByName(df2), Row(1L, "a") :: Row(1L, "3") :: Nil)
+
+      df1 = Seq((2.0f, 7.4)).toDF("c0", "c1")
+      df2 = Seq(("a", 4.0)).toDF("c1", "c0")
+      checkAnswer(df1.unionByName(df2), Row(2.0, "7.4") :: Row(4.0, "a") :: Nil)
+
+      df1 = Seq((1, "a", 3.0)).toDF("c0", "c1", "c2")
+      df2 = Seq((1.2, 2, "bc")).toDF("c2", "c0", "c1")
+      val df3 = Seq(("def", 1.2, 3)).toDF("c1", "c2", "c0")
+      checkAnswer(df1.unionByName(df2.unionByName(df3)),
+        Row(1, "a", 3.0) :: Row(2, "bc", 1.2) :: Row(3, "def", 1.2) :: Nil
+      )
+    }
   }
 
   test("union by name - check case sensitivity") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index 3cf61c3402bc8..f33de7402a71e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -97,7 +97,8 @@ class DataFrameWindowFunctionsSuite extends QueryTest
   }
 
   test("corr, covar_pop, stddev_pop functions in specific window") {
-    withSQLConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE.key -> "true") {
+    withSQLConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE.key -> "true",
+      SQLConf.ANSI_ENABLED.key -> "false") {
       val df = Seq(
         ("a", "p1", 10.0, 20.0),
         ("b", "p1", 20.0, 10.0),
@@ -150,7 +151,8 @@ class DataFrameWindowFunctionsSuite extends QueryTest
   test("SPARK-13860: " +
     "corr, covar_pop, stddev_pop functions in specific window " +
     "LEGACY_STATISTICAL_AGGREGATE off") {
-    withSQLConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE.key -> "false") {
+    withSQLConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE.key -> "false",
+      SQLConf.ANSI_ENABLED.key -> "false") {
       val df = Seq(
         ("a", "p1", 10.0, 20.0),
         ("b", "p1", 20.0, 10.0),
@@ -407,22 +409,24 @@ class DataFrameWindowFunctionsSuite extends QueryTest
   }
 
   test("numerical aggregate functions on string column") {
-    val df = Seq((1, "a", "b")).toDF("key", "value1", "value2")
-    checkAnswer(
-      df.select($"key",
-        var_pop("value1").over(),
-        variance("value1").over(),
-        stddev_pop("value1").over(),
-        stddev("value1").over(),
-        sum("value1").over(),
-        mean("value1").over(),
-        avg("value1").over(),
-        corr("value1", "value2").over(),
-        covar_pop("value1", "value2").over(),
-        covar_samp("value1", "value2").over(),
-        skewness("value1").over(),
-        kurtosis("value1").over()),
-      Seq(Row(1, null, null, null, null, null, null, null, null, null, null, null, null)))
+    if (!conf.ansiEnabled) {
+      val df = Seq((1, "a", "b")).toDF("key", "value1", "value2")
+      checkAnswer(
+        df.select($"key",
+          var_pop("value1").over(),
+          variance("value1").over(),
+          stddev_pop("value1").over(),
+          stddev("value1").over(),
+          sum("value1").over(),
+          mean("value1").over(),
+          avg("value1").over(),
+          corr("value1", "value2").over(),
+          covar_pop("value1", "value2").over(),
+          covar_samp("value1", "value2").over(),
+          skewness("value1").over(),
+          kurtosis("value1").over()),
+        Seq(Row(1, null, null, null, null, null, null, null, null, null, null, null, null)))
+    }
   }
 
   test("statistical functions") {

From 969d672ba79891fb00edd84124b866e1a097c1bd Mon Sep 17 00:00:00 2001
From: hujiahua <hujiahua@youzan.com>
Date: Mon, 28 Feb 2022 21:41:58 -0600
Subject: [PATCH 358/513] [SPARK-37688][CORE] ExecutorMonitor should ignore
 SparkListenerBlockUpdated event if executor was not active

### What changes were proposed in this pull request?

`ExecutorMonitor` should ignore `SparkListenerBlockUpdated` event if executor was not active

### Why are the changes needed?

If not ignored, `ExecutorMonitor` will create a new executor tracker with UNKNOWN_RESOURCE_PROFILE_ID for the dead executor. And `ExecutorAllocationManager` will not remove executor with UNKNOWN_RESOURCE_PROFILE_ID, which cause a executor slot is occupied by the dead executor, so a new one cannot be created.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Add a new test.

Closes #34956 from sleep1661/SPARK-37688.

Authored-by: hujiahua <hujiahua@youzan.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../scheduler/dynalloc/ExecutorMonitor.scala  |  4 ++++
 .../dynalloc/ExecutorMonitorSuite.scala       | 19 +++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala b/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala
index def63b9ead183..defef5bfcf23b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala
@@ -379,6 +379,10 @@ private[spark] class ExecutorMonitor(
   }
 
   override def onBlockUpdated(event: SparkListenerBlockUpdated): Unit = {
+    if (!client.isExecutorActive(event.blockUpdatedInfo.blockManagerId.executorId)) {
+      return
+    }
+
     val exec = ensureExecutorIsTracked(event.blockUpdatedInfo.blockManagerId.executorId,
       UNKNOWN_RESOURCE_PROFILE_ID)
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala
index 6fb89b883a1f8..336198b182c87 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala
@@ -169,6 +169,8 @@ class ExecutorMonitorSuite extends SparkFunSuite {
   }
 
   test("keeps track of stored blocks for each rdd and split") {
+    knownExecs ++= Set("1", "2")
+
     monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
 
     monitor.onBlockUpdated(rddUpdate(1, 0, "1"))
@@ -249,6 +251,7 @@ class ExecutorMonitorSuite extends SparkFunSuite {
   }
 
   test("SPARK-27677: don't track blocks stored on disk when using shuffle service") {
+    knownExecs += "1"
     // First make sure that blocks on disk are counted when no shuffle service is available.
     monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
     monitor.onBlockUpdated(rddUpdate(1, 0, "1", level = StorageLevel.DISK_ONLY))
@@ -458,6 +461,22 @@ class ExecutorMonitorSuite extends SparkFunSuite {
     assert(monitor.timedOutExecutors(idleDeadline).isEmpty)
   }
 
+  test("SPARK-37688: ignore SparkListenerBlockUpdated event if executor was not active") {
+    conf
+      .set(DYN_ALLOCATION_SHUFFLE_TRACKING_TIMEOUT, Long.MaxValue)
+      .set(DYN_ALLOCATION_SHUFFLE_TRACKING_ENABLED, true)
+      .set(SHUFFLE_SERVICE_ENABLED, false)
+    monitor = new ExecutorMonitor(conf, client, null, clock, allocationManagerSource())
+
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
+    monitor.onExecutorRemoved(SparkListenerExecutorRemoved(clock.getTimeMillis(), "1",
+      "heartbeats timeout"))
+    monitor.onBlockUpdated(rddUpdate(1, 1, "1", level = StorageLevel.MEMORY_AND_DISK))
+
+    assert(monitor.executorCount == 0 )
+  }
+
+
   private def idleDeadline: Long = clock.nanoTime() + idleTimeoutNs + 1
   private def storageDeadline: Long = clock.nanoTime() + storageTimeoutNs + 1
   private def shuffleDeadline: Long = clock.nanoTime() + shuffleTimeoutNs + 1

From 9336db7c5bd91b4df2b95f0e249d051ca1f2b12f Mon Sep 17 00:00:00 2001
From: Sean Owen <srowen@gmail.com>
Date: Mon, 28 Feb 2022 22:06:27 -0600
Subject: [PATCH 359/513] Revert "[SPARK-38191][CORE] The staging directory of
 write job only needs to be initialized once in HadoopMapReduceCommitProtocol"

This reverts commit e58872d67b1cd8e0b0fb71aec1ba2023e5a1991f.
---
 .../spark/internal/io/HadoopMapReduceCommitProtocol.scala       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
index 8742901f5716d..a39e9abd9bdc4 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
@@ -104,7 +104,7 @@ class HadoopMapReduceCommitProtocol(
    * The staging directory of this write job. Spark uses it to deal with files with absolute output
    * path, or writing data into partitioned directory with dynamicPartitionOverwrite=true.
    */
-  protected lazy val stagingDir = getStagingDir(path, jobId)
+  protected def stagingDir = getStagingDir(path, jobId)
 
   protected def setupCommitter(context: TaskAttemptContext): OutputCommitter = {
     val format = context.getOutputFormatClass.getConstructor().newInstance()

From 1d068cef38f2323967be83045118cef0e537e8dc Mon Sep 17 00:00:00 2001
From: Linhong Liu <linhong.liu@databricks.com>
Date: Tue, 1 Mar 2022 15:08:37 +0800
Subject: [PATCH 360/513] [SPARK-38318][SQL] Skip view cyclic reference check
 if view is stored as logical plan

### What changes were proposed in this pull request?
In 3.2, we unified the representation of dataset view and SQL view, i.e., we wrap both
of them with `View`. This causes a regression that below case works in 3.1 but failed in 3.2
```sql
sql("select 1").createOrReplaceTempView("v")
sql("select * from v").createOrReplaceTempView("v")
-- in 3.1 it works well, and select will output 1
-- in 3.2 it failed with error: "AnalysisException: Recursive view v detected (cycle: v -> v)"
```
The root cause is in 3.1 we actually never did view cyclic check for dataset view. Because they
are wrapped by `SubqueryAlias` instead of `View`
In this PR, we want to skip the cyclic check if the view is stored as a logical plan.
i.e., `storeAnalyzedPlanForView = true` or view is created by Dataset API.

### Why are the changes needed?
fix regression

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
newly added ut

Closes #35653 from linhongliu-db/SPARK-38318.

Authored-by: Linhong Liu <linhong.liu@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/command/views.scala   |  9 +++--
 .../sql/execution/SQLViewTestSuite.scala      | 33 ++++++++++++++++++-
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index 145287158a58c..eca48a6992433 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -612,12 +612,17 @@ object ViewHelper extends SQLConfHelper with Logging {
     val uncache = getRawTempView(name.table).map { r =>
       needsToUncache(r, aliasedPlan)
     }.getOrElse(false)
+    val storeAnalyzedPlanForView = conf.storeAnalyzedPlanForView || originalText.isEmpty
     if (replace && uncache) {
       logDebug(s"Try to uncache ${name.quotedString} before replacing.")
-      checkCyclicViewReference(analyzedPlan, Seq(name), name)
+      if (!storeAnalyzedPlanForView) {
+        // Skip cyclic check because when stored analyzed plan for view, the depended
+        // view is already converted to the underlying tables. So no cyclic views.
+        checkCyclicViewReference(analyzedPlan, Seq(name), name)
+      }
       CommandUtils.uncacheTableOrView(session, name.quotedString)
     }
-    if (!conf.storeAnalyzedPlanForView && originalText.nonEmpty) {
+    if (!storeAnalyzedPlanForView) {
       TemporaryViewRelation(
         prepareTemporaryView(
           name,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
index 7e773fa1ac565..316b1cfd5e842 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.CatalogFunction
 import org.apache.spark.sql.catalyst.expressions.Expression
@@ -410,6 +410,9 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
 }
 
 abstract class TempViewTestSuite extends SQLViewTestSuite {
+
+  def createOrReplaceDatasetView(df: DataFrame, viewName: String): Unit
+
   test("SPARK-37202: temp view should capture the function registered by catalog API") {
     val funcName = "tempFunc"
     withUserDefinedFunction(funcName -> true) {
@@ -437,6 +440,28 @@ abstract class TempViewTestSuite extends SQLViewTestSuite {
         s"$viewName is a temp view. 'SHOW CREATE TABLE' expects a table or permanent view."))
     }
   }
+
+  test("back compatibility: skip cyclic reference check if view is stored as logical plan") {
+    val viewName = formattedViewName("v")
+    withSQLConf(STORE_ANALYZED_PLAN_FOR_VIEW.key -> "false") {
+      withView(viewName) {
+        createOrReplaceDatasetView(sql("SELECT 1"), "v")
+        createOrReplaceDatasetView(sql(s"SELECT * FROM $viewName"), "v")
+        checkViewOutput(viewName, Seq(Row(1)))
+      }
+    }
+    withSQLConf(STORE_ANALYZED_PLAN_FOR_VIEW.key -> "true") {
+      withView(viewName) {
+        createOrReplaceDatasetView(sql("SELECT 1"), "v")
+        createOrReplaceDatasetView(sql(s"SELECT * FROM $viewName"), "v")
+        checkViewOutput(viewName, Seq(Row(1)))
+
+        createView("v", "SELECT 2", replace = true)
+        createView("v", s"SELECT * FROM $viewName", replace = true)
+        checkViewOutput(viewName, Seq(Row(2)))
+      }
+    }
+  }
 }
 
 class LocalTempViewTestSuite extends TempViewTestSuite with SharedSparkSession {
@@ -445,6 +470,9 @@ class LocalTempViewTestSuite extends TempViewTestSuite with SharedSparkSession {
   override protected def tableIdentifier(viewName: String): TableIdentifier = {
     TableIdentifier(viewName)
   }
+  override def createOrReplaceDatasetView(df: DataFrame, viewName: String): Unit = {
+    df.createOrReplaceTempView(viewName)
+  }
 }
 
 class GlobalTempViewTestSuite extends TempViewTestSuite with SharedSparkSession {
@@ -456,6 +484,9 @@ class GlobalTempViewTestSuite extends TempViewTestSuite with SharedSparkSession
   override protected def tableIdentifier(viewName: String): TableIdentifier = {
     TableIdentifier(viewName, Some(db))
   }
+  override def createOrReplaceDatasetView(df: DataFrame, viewName: String): Unit = {
+    df.createOrReplaceGlobalTempView(viewName)
+  }
 }
 
 class OneTableCatalog extends InMemoryCatalog {

From ad4e5a676b48d0def5db49dd6c88eb779f6a3941 Mon Sep 17 00:00:00 2001
From: yaohua <yaohua.zhao@databricks.com>
Date: Tue, 1 Mar 2022 21:09:30 +0800
Subject: [PATCH 361/513] [SPARK-38323][SQL][STREAMING] Support the hidden file
 metadata in Streaming

### What changes were proposed in this pull request?
Support the hidden file source metadata in streaming, you could do the same thing as batch read/write as follows:
```
spark
  .readStream
  ...
  .select("*", "_metadata")
  .writeStream
  ...
  .start()
```

### Why are the changes needed?
Add more support to the hidden file metadata feature. Before this PR, querying the hidden file metadata struct `_metadata` will fail using `readStream`, `writeStream` streaming APIs.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Add a new UT

Closes #35676 from Yaohua628/spark-38323.

Authored-by: yaohua <yaohua.zhao@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../streaming/MicroBatchExecution.scala       | 19 +++++--
 .../streaming/StreamingRelation.scala         | 33 ++++++++++--
 .../datasources/FileMetadataStructSuite.scala | 54 +++++++++++++++++++
 3 files changed, 97 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index fb434f488361a..3b409fa2f6a72 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable.{Map => MutableMap}
 
 import org.apache.spark.sql.{Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, CurrentBatchTimestamp, CurrentDate, CurrentTimestamp, LocalTimestamp}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, CurrentBatchTimestamp, CurrentDate, CurrentTimestamp, FileSourceMetadataAttribute, LocalTimestamp}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LocalRelation, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.streaming.{StreamingRelationV2, WriteToStream}
 import org.apache.spark.sql.catalyst.trees.TreePattern.CURRENT_LIKE
@@ -30,6 +30,7 @@ import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Tabl
 import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset => OffsetV2, ReadLimit, SparkDataStream, SupportsAdmissionControl, SupportsTriggerAvailableNow}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.SQLExecution
+import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, StreamingDataSourceV2Relation, StreamWriterCommitProgress, WriteToDataSourceV2Exec}
 import org.apache.spark.sql.execution.streaming.sources.WriteToMicroBatchDataSource
 import org.apache.spark.sql.internal.SQLConf
@@ -577,15 +578,23 @@ class MicroBatchExecution(
       // For v1 sources.
       case StreamingExecutionRelation(source, output) =>
         newData.get(source).map { dataPlan =>
+          val hasFileMetadata = output.exists {
+            case FileSourceMetadataAttribute(_) => true
+            case _ => false
+          }
+          val finalDataPlan = dataPlan match {
+            case l: LogicalRelation if hasFileMetadata => l.withMetadataColumns()
+            case _ => dataPlan
+          }
           val maxFields = SQLConf.get.maxToStringFields
-          assert(output.size == dataPlan.output.size,
+          assert(output.size == finalDataPlan.output.size,
             s"Invalid batch: ${truncatedString(output, ",", maxFields)} != " +
-              s"${truncatedString(dataPlan.output, ",", maxFields)}")
+              s"${truncatedString(finalDataPlan.output, ",", maxFields)}")
 
-          val aliases = output.zip(dataPlan.output).map { case (to, from) =>
+          val aliases = output.zip(finalDataPlan.output).map { case (to, from) =>
             Alias(from, to.name)(exprId = to.exprId, explicitMetadata = Some(from.metadata))
           }
-          Project(aliases, dataPlan)
+          Project(aliases, finalDataPlan)
         }.getOrElse {
           LocalRelation(output, isStreaming = true)
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
index 5d4b811defeeb..00962a4f4cdf0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
@@ -21,12 +21,12 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
+import org.apache.spark.sql.catalyst.plans.logical.{ExposesMetadataColumns, LeafNode, LogicalPlan, Statistics}
 import org.apache.spark.sql.connector.read.streaming.SparkDataStream
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.LeafExecNode
-import org.apache.spark.sql.execution.datasources.DataSource
+import org.apache.spark.sql.execution.datasources.{DataSource, FileFormat}
 
 object StreamingRelation {
   def apply(dataSource: DataSource): StreamingRelation = {
@@ -43,7 +43,7 @@ object StreamingRelation {
  * passing to [[StreamExecution]] to run a query.
  */
 case class StreamingRelation(dataSource: DataSource, sourceName: String, output: Seq[Attribute])
-  extends LeafNode with MultiInstanceRelation {
+  extends LeafNode with MultiInstanceRelation with ExposesMetadataColumns {
   override def isStreaming: Boolean = true
   override def toString: String = sourceName
 
@@ -56,6 +56,31 @@ case class StreamingRelation(dataSource: DataSource, sourceName: String, output:
   )
 
   override def newInstance(): LogicalPlan = this.copy(output = output.map(_.newInstance()))
+
+  override lazy val metadataOutput: Seq[AttributeReference] = {
+    dataSource.providingClass match {
+      // If the dataSource provided class is a same or subclass of FileFormat class
+      case f if classOf[FileFormat].isAssignableFrom(f) =>
+        val resolve = conf.resolver
+        val outputNames = outputSet.map(_.name)
+        def isOutputColumn(col: AttributeReference): Boolean = {
+          outputNames.exists(name => resolve(col.name, name))
+        }
+        // filter out the metadata struct column if it has the name conflicting with output columns.
+        // if the file has a column "_metadata",
+        // then the data column should be returned not the metadata struct column
+        Seq(FileFormat.createFileMetadataCol).filterNot(isOutputColumn)
+      case _ => Nil
+    }
+  }
+
+  override def withMetadataColumns(): LogicalPlan = {
+    if (metadataOutput.nonEmpty) {
+      this.copy(output = output ++ metadataOutput)
+    } else {
+      this
+    }
+  }
 }
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
index 175b42083f26a..410fc985dd3bd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
@@ -510,4 +510,58 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
       )
     }
   }
+
+  metadataColumnsTest("file metadata in streaming", schema) { (df, _, _) =>
+    withTempDir { dir =>
+      df.coalesce(1).write.format("json").save(dir.getCanonicalPath + "/source/new-streaming-data")
+
+      val stream = spark.readStream.format("json")
+        .schema(schema)
+        .load(dir.getCanonicalPath + "/source/new-streaming-data")
+        .select("*", "_metadata")
+        .writeStream.format("json")
+        .option("checkpointLocation", dir.getCanonicalPath + "/target/checkpoint")
+        .start(dir.getCanonicalPath + "/target/new-streaming-data")
+
+      stream.processAllAvailable()
+      stream.stop()
+
+      val newDF = spark.read.format("json")
+        .load(dir.getCanonicalPath + "/target/new-streaming-data")
+
+      val sourceFile = new File(dir, "/source/new-streaming-data").listFiles()
+        .filter(_.getName.endsWith(".json")).head
+      val sourceFileMetadata = Map(
+        METADATA_FILE_PATH -> sourceFile.toURI.toString,
+        METADATA_FILE_NAME -> sourceFile.getName,
+        METADATA_FILE_SIZE -> sourceFile.length(),
+        METADATA_FILE_MODIFICATION_TIME -> new Timestamp(sourceFile.lastModified())
+      )
+
+      // SELECT * will have: name, age, info, _metadata of /source/new-streaming-data
+      assert(newDF.select("*").columns.toSet == Set("name", "age", "info", "_metadata"))
+      // Verify the data is expected
+      checkAnswer(
+        newDF.select(col("name"), col("age"), col("info"),
+          col(METADATA_FILE_PATH), col(METADATA_FILE_NAME),
+          // since we are writing _metadata to a json file,
+          // we should explicitly cast the column to timestamp type
+          col(METADATA_FILE_SIZE), to_timestamp(col(METADATA_FILE_MODIFICATION_TIME))),
+        Seq(
+          Row(
+            "jack", 24, Row(12345L, "uom"),
+            sourceFileMetadata(METADATA_FILE_PATH),
+            sourceFileMetadata(METADATA_FILE_NAME),
+            sourceFileMetadata(METADATA_FILE_SIZE),
+            sourceFileMetadata(METADATA_FILE_MODIFICATION_TIME)),
+          Row(
+            "lily", 31, Row(54321L, "ucb"),
+            sourceFileMetadata(METADATA_FILE_PATH),
+            sourceFileMetadata(METADATA_FILE_NAME),
+            sourceFileMetadata(METADATA_FILE_SIZE),
+            sourceFileMetadata(METADATA_FILE_MODIFICATION_TIME))
+        )
+      )
+    }
+  }
 }

From 2da0d0722d5a0aa337d5744a51075449b224e421 Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Tue, 1 Mar 2022 21:15:49 +0800
Subject: [PATCH 362/513] [SPARK-37582][SPARK-37583][SQL] CONTAINS, STARTSWITH,
 ENDSWITH should support all data type

### What changes were proposed in this pull request?
In current realization, `contains`, `startsWith` and `endsWith` only support StringType as input, in this pr, we make these three string function support all type, it follow the blow rules:

1.   contains(binary, binary) -> convert to binary contains
2.  for others, convert to string contains and rely on type coercion

### Why are the changes needed?
Make  function `contains`, `startsWith`, `endsWith` support all type.

### Does this PR introduce _any_ user-facing change?
user can use binary as input.

expression | result
-- | --
contains(encode('Spark SQL', 'utf-8'), 'Spark') |  true
startsWith(encode('Spark SQL', 'utf-8'), 'Spark') |  true
contains(encode('Spark SQL', 'utf-8'), 'SparkSQL') |  false
endsWith(encode('Spark SQL', 'utf-8'), 'Spark') |  false
endsWith(encode('Spark SQL', 'utf-8'), 'SQL') |  true

### How was this patch tested?
added UT

Closes #34848 from AngersZhuuuu/SPARK-37582.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/unsafe/array/ByteArrayMethods.java  |  37 +++++
 .../catalyst/analysis/FunctionRegistry.scala  |   6 +-
 .../expressions/stringExpressions.scala       |  94 ++++++++++++-
 .../sql/catalyst/optimizer/expressions.scala  |  48 +++----
 .../dynamicpruning/PartitionPruning.scala     |   1 +
 .../sql-tests/inputs/string-functions.sql     |  19 +++
 .../results/ansi/string-functions.sql.out     | 130 +++++++++++++++++-
 .../results/string-functions.sql.out          | 130 +++++++++++++++++-
 8 files changed, 429 insertions(+), 36 deletions(-)

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
index 5a7e32b0d9d3b..deb7d2bf1b0f8 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
@@ -19,6 +19,8 @@
 
 import org.apache.spark.unsafe.Platform;
 
+import static org.apache.spark.unsafe.Platform.BYTE_ARRAY_OFFSET;
+
 public class ByteArrayMethods {
 
   private ByteArrayMethods() {
@@ -91,4 +93,39 @@ public static boolean arrayEquals(
     }
     return true;
   }
+
+  public static boolean contains(byte[] arr, byte[] sub) {
+    if (sub.length == 0) {
+      return true;
+    }
+    byte first = sub[0];
+    for (int i = 0; i <= arr.length - sub.length; i++) {
+      if (arr[i] == first && matchAt(arr, sub, i)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  public static boolean startsWith(byte[] array, byte[] target) {
+    if (target.length > array.length) {
+      return false;
+    }
+    return arrayEquals(array, BYTE_ARRAY_OFFSET, target, BYTE_ARRAY_OFFSET, target.length);
+  }
+
+  public static boolean endsWith(byte[] array, byte[] target) {
+    if (target.length > array.length) {
+      return false;
+    }
+    return arrayEquals(array, BYTE_ARRAY_OFFSET + array.length - target.length,
+      target, BYTE_ARRAY_OFFSET, target.length);
+  }
+
+  public static boolean matchAt(byte[] arr, byte[] sub, int pos) {
+    if (sub.length + pos > arr.length || pos < 0) {
+      return false;
+    }
+    return arrayEquals(arr, BYTE_ARRAY_OFFSET + pos, sub, BYTE_ARRAY_OFFSET, sub.length);
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 8fe53de894439..129c62c1b3a86 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -500,9 +500,9 @@ object FunctionRegistry {
     expression[Ascii]("ascii"),
     expression[Chr]("char", true),
     expression[Chr]("chr"),
-    expression[Contains]("contains"),
-    expression[StartsWith]("startswith"),
-    expression[EndsWith]("endswith"),
+    expressionBuilder("contains", ContainsExpressionBuilder),
+    expressionBuilder("startswith", StartsWithExpressionBuilder),
+    expressionBuilder("endswith", EndsWithExpressionBuilder),
     expression[Base64]("base64"),
     expression[BitLength]("bit_length"),
     expression[Length]("char_length", true),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index b9670646c91a6..fc73216b296af 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -37,6 +37,7 @@ import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.UTF8StringBuilder
+import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.unsafe.types.{ByteArray, UTF8String}
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -468,13 +469,62 @@ abstract class StringPredicate extends BinaryExpression
   override def toString: String = s"$nodeName($left, $right)"
 }
 
-/**
- * A function that returns true if the string `left` contains the string `right`.
- */
+trait StringBinaryPredicateExpressionBuilderBase extends ExpressionBuilder {
+  override def build(funcName: String, expressions: Seq[Expression]): Expression = {
+    val numArgs = expressions.length
+    if (numArgs == 2) {
+      if (expressions(0).dataType == BinaryType && expressions(1).dataType == BinaryType) {
+        BinaryPredicate(funcName, expressions(0), expressions(1))
+      } else {
+        createStringPredicate(expressions(0), expressions(1))
+      }
+    } else {
+      throw QueryCompilationErrors.invalidFunctionArgumentNumberError(Seq(2), funcName, numArgs)
+    }
+  }
+
+  protected def createStringPredicate(left: Expression, right: Expression): Expression
+}
+
+object BinaryPredicate {
+  def unapply(expr: Expression): Option[StaticInvoke] = expr match {
+    case s @ StaticInvoke(clz, _, "contains" | "startsWith" | "endsWith", Seq(_, _), _, _, _, _)
+      if clz == classOf[ByteArrayMethods] => Some(s)
+    case _ => None
+  }
+}
+
+case class BinaryPredicate(override val prettyName: String, left: Expression, right: Expression)
+  extends RuntimeReplaceable with ImplicitCastInputTypes with BinaryLike[Expression] {
+
+  private lazy val realFuncName = prettyName match {
+    case "startswith" => "startsWith"
+    case "endswith" => "endsWith"
+    case name => name
+  }
+
+  override lazy val replacement =
+    StaticInvoke(
+      classOf[ByteArrayMethods],
+      BooleanType,
+      realFuncName,
+      Seq(left, right),
+      Seq(BinaryType, BinaryType))
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType, BinaryType)
+
+  override protected def withNewChildrenInternal(
+      newLeft: Expression,
+      newRight: Expression): Expression = {
+    copy(left = newLeft, right = newRight)
+  }
+}
+
 @ExpressionDescription(
   usage = """
     _FUNC_(left, right) - Returns a boolean. The value is True if right is found inside left.
     Returns NULL if either input expression is NULL. Otherwise, returns False.
+    Both left or right must be of STRING or BINARY type.
   """,
   examples = """
     Examples:
@@ -484,10 +534,18 @@ abstract class StringPredicate extends BinaryExpression
        false
       > SELECT _FUNC_('Spark SQL', null);
        NULL
+      > SELECT _FUNC_(x'537061726b2053514c', x'537061726b');
+       true
   """,
   since = "3.3.0",
   group = "string_funcs"
 )
+object ContainsExpressionBuilder extends StringBinaryPredicateExpressionBuilderBase {
+  override protected def createStringPredicate(left: Expression, right: Expression): Expression = {
+    Contains(left, right)
+  }
+}
+
 case class Contains(left: Expression, right: Expression) extends StringPredicate {
   override def compare(l: UTF8String, r: UTF8String): Boolean = l.contains(r)
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -499,8 +557,9 @@ case class Contains(left: Expression, right: Expression) extends StringPredicate
 
 @ExpressionDescription(
   usage = """
-    _FUNC_(left, right) - Returns true if the string `left` starts with the string `right`.
-    Returns NULL if either input expression is NULL.
+    _FUNC_(left, right) - Returns a boolean. The value is True if left starts with right.
+    Returns NULL if either input expression is NULL. Otherwise, returns False.
+    Both left or right must be of STRING or BINARY type.
   """,
   examples = """
     Examples:
@@ -510,10 +569,20 @@ case class Contains(left: Expression, right: Expression) extends StringPredicate
        false
       > SELECT _FUNC_('Spark SQL', null);
        NULL
+      > SELECT _FUNC_(x'537061726b2053514c', x'537061726b');
+       true
+      > SELECT _FUNC_(x'537061726b2053514c', x'53514c');
+       false
   """,
   since = "3.3.0",
   group = "string_funcs"
 )
+object StartsWithExpressionBuilder extends StringBinaryPredicateExpressionBuilderBase {
+  override protected def createStringPredicate(left: Expression, right: Expression): Expression = {
+    StartsWith(left, right)
+  }
+}
+
 case class StartsWith(left: Expression, right: Expression) extends StringPredicate {
   override def compare(l: UTF8String, r: UTF8String): Boolean = l.startsWith(r)
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -525,8 +594,9 @@ case class StartsWith(left: Expression, right: Expression) extends StringPredica
 
 @ExpressionDescription(
   usage = """
-    _FUNC_(left, right) - Returns true if the string `left` ends with the string `right`.
-    Returns NULL if either input expression is NULL.
+    _FUNC_(left, right) - Returns a boolean. The value is True if left ends with right.
+    Returns NULL if either input expression is NULL. Otherwise, returns False.
+    Both left or right must be of STRING or BINARY type.
   """,
   examples = """
     Examples:
@@ -536,10 +606,20 @@ case class StartsWith(left: Expression, right: Expression) extends StringPredica
        false
       > SELECT _FUNC_('Spark SQL', null);
        NULL
+      > SELECT _FUNC_(x'537061726b2053514c', x'537061726b');
+       false
+      > SELECT _FUNC_(x'537061726b2053514c', x'53514c');
+       true
   """,
   since = "3.3.0",
   group = "string_funcs"
 )
+object EndsWithExpressionBuilder extends StringBinaryPredicateExpressionBuilderBase {
+  override protected def createStringPredicate(left: Expression, right: Expression): Expression = {
+    EndsWith(left, right)
+  }
+}
+
 case class EndsWith(left: Expression, right: Expression) extends StringPredicate {
   override def compare(l: UTF8String, r: UTF8String): Boolean = l.endsWith(r)
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index c1e5783d63c6b..5aa134a0c1109 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -21,7 +21,7 @@ import scala.collection.immutable.HashSet
 import scala.collection.mutable.{ArrayBuffer, Stack}
 
 import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, MultiLikeBase, _}
+import org.apache.spark.sql.catalyst.expressions.{MultiLikeBase, _}
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull
@@ -615,17 +615,6 @@ object PushFoldableIntoBranches extends Rule[LogicalPlan] with PredicateHelper {
     case _ => false
   }
 
-  // Not all BinaryExpression can be pushed into (if / case) branches.
-  private def supportedBinaryExpression(e: BinaryExpression): Boolean = e match {
-    case _: BinaryComparison | _: StringPredicate | _: StringRegexExpression => true
-    case _: BinaryArithmetic => true
-    case _: BinaryMathExpression => true
-    case _: AddMonths | _: DateAdd | _: DateAddInterval | _: DateDiff | _: DateSub |
-         _: DateAddYMInterval | _: TimestampAddYMInterval | _: TimeAdd => true
-    case _: FindInSet | _: RoundBase => true
-    case _ => false
-  }
-
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
     _.containsAnyPattern(CASE_WHEN, IF), ruleId) {
     case q: LogicalPlan => q.transformExpressionsUpWithPruning(
@@ -642,30 +631,26 @@ object PushFoldableIntoBranches extends Rule[LogicalPlan] with PredicateHelper {
           branches.map(e => e.copy(_2 = u.withNewChildren(Array(e._2)))),
           Some(u.withNewChildren(Array(elseValue.getOrElse(Literal(null, c.dataType))))))
 
-      case b @ BinaryExpression(i @ If(_, trueValue, falseValue), right)
-          if supportedBinaryExpression(b) && right.foldable &&
-            atMostOneUnfoldable(Seq(trueValue, falseValue)) =>
+      case SupportedBinaryExpr(b, i @ If(_, trueValue, falseValue), right)
+          if right.foldable && atMostOneUnfoldable(Seq(trueValue, falseValue)) =>
         i.copy(
           trueValue = b.withNewChildren(Array(trueValue, right)),
           falseValue = b.withNewChildren(Array(falseValue, right)))
 
-      case b @ BinaryExpression(left, i @ If(_, trueValue, falseValue))
-          if supportedBinaryExpression(b) && left.foldable &&
-            atMostOneUnfoldable(Seq(trueValue, falseValue)) =>
+      case SupportedBinaryExpr(b, left, i @ If(_, trueValue, falseValue))
+          if left.foldable && atMostOneUnfoldable(Seq(trueValue, falseValue)) =>
         i.copy(
           trueValue = b.withNewChildren(Array(left, trueValue)),
           falseValue = b.withNewChildren(Array(left, falseValue)))
 
-      case b @ BinaryExpression(c @ CaseWhen(branches, elseValue), right)
-          if supportedBinaryExpression(b) && right.foldable &&
-            atMostOneUnfoldable(branches.map(_._2) ++ elseValue) =>
+      case SupportedBinaryExpr(b, c @ CaseWhen(branches, elseValue), right)
+          if right.foldable && atMostOneUnfoldable(branches.map(_._2) ++ elseValue) =>
         c.copy(
           branches.map(e => e.copy(_2 = b.withNewChildren(Array(e._2, right)))),
           Some(b.withNewChildren(Array(elseValue.getOrElse(Literal(null, c.dataType)), right))))
 
-      case b @ BinaryExpression(left, c @ CaseWhen(branches, elseValue))
-          if supportedBinaryExpression(b) && left.foldable &&
-            atMostOneUnfoldable(branches.map(_._2) ++ elseValue) =>
+      case SupportedBinaryExpr(b, left, c @ CaseWhen(branches, elseValue))
+          if left.foldable && atMostOneUnfoldable(branches.map(_._2) ++ elseValue) =>
         c.copy(
           branches.map(e => e.copy(_2 = b.withNewChildren(Array(left, e._2)))),
           Some(b.withNewChildren(Array(left, elseValue.getOrElse(Literal(null, c.dataType))))))
@@ -673,6 +658,21 @@ object PushFoldableIntoBranches extends Rule[LogicalPlan] with PredicateHelper {
   }
 }
 
+object SupportedBinaryExpr {
+  def unapply(expr: Expression): Option[(Expression, Expression, Expression)] = expr match {
+    case _: BinaryComparison | _: StringPredicate | _: StringRegexExpression =>
+      Some(expr, expr.children.head, expr.children.last)
+    case _: BinaryArithmetic => Some(expr, expr.children.head, expr.children.last)
+    case _: BinaryMathExpression => Some(expr, expr.children.head, expr.children.last)
+    case _: AddMonths | _: DateAdd | _: DateAddInterval | _: DateDiff | _: DateSub |
+         _: DateAddYMInterval | _: TimestampAddYMInterval | _: TimeAdd =>
+      Some(expr, expr.children.head, expr.children.last)
+    case _: FindInSet | _: RoundBase => Some(expr, expr.children.head, expr.children.last)
+    case BinaryPredicate(expr) =>
+      Some(expr, expr.arguments.head, expr.arguments.last)
+    case _ => None
+  }
+}
 
 /**
  * Simplifies LIKE expressions that do not need full regular expressions to evaluate the condition.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala
index 4b5f724ba6f85..48d31d83c17b0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala
@@ -205,6 +205,7 @@ object PartitionPruning extends Rule[LogicalPlan] with PredicateHelper {
     case _: BinaryComparison => true
     case _: In | _: InSet => true
     case _: StringPredicate => true
+    case BinaryPredicate(_) => true
     case _: MultiLikeBase => true
     case _ => false
   }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index fef16b7fe7d75..e7c01a69bc838 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -126,6 +126,25 @@ SELECT endswith(null, 'Spark');
 SELECT endswith('Spark', null);
 SELECT endswith(null, null);
 
+SELECT contains(x'537061726b2053514c', x'537061726b');
+SELECT contains(x'', x'');
+SELECT contains(x'537061726b2053514c', null);
+SELECT contains(12, '1');
+SELECT contains(true, 'ru');
+SELECT contains(x'12', 12);
+SELECT contains(true, false);
+
+SELECT startswith(x'537061726b2053514c', x'537061726b');
+SELECT startswith(x'537061726b2053514c', x'');
+SELECT startswith(x'', x'');
+SELECT startswith(x'537061726b2053514c', null);
+
+SELECT endswith(x'537061726b2053514c', x'53516c');
+SELECT endsWith(x'537061726b2053514c', x'537061726b');
+SELECT endsWith(x'537061726b2053514c', x'');
+SELECT endsWith(x'', x'');
+SELECT endsWith(x'537061726b2053514c', null);
+
 -- to_number
 select to_number('454', '000');
 select to_number('454.2', '000.0');
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
index 913f1cfb5ae42..b182b5cb6b390 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 115
+-- Number of queries: 131
 
 
 -- !query
@@ -762,6 +762,134 @@ struct<endswith(NULL, NULL):boolean>
 NULL
 
 
+-- !query
+SELECT contains(x'537061726b2053514c', x'537061726b')
+-- !query schema
+struct<contains(X'537061726B2053514C', X'537061726B'):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT contains(x'', x'')
+-- !query schema
+struct<contains(X'', X''):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT contains(x'537061726b2053514c', null)
+-- !query schema
+struct<contains(X'537061726B2053514C', NULL):boolean>
+-- !query output
+NULL
+
+
+-- !query
+SELECT contains(12, '1')
+-- !query schema
+struct<contains(12, 1):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT contains(true, 'ru')
+-- !query schema
+struct<contains(true, ru):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT contains(x'12', 12)
+-- !query schema
+struct<contains(X'12', 12):boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT contains(true, false)
+-- !query schema
+struct<contains(true, false):boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT startswith(x'537061726b2053514c', x'537061726b')
+-- !query schema
+struct<startswith(X'537061726B2053514C', X'537061726B'):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT startswith(x'537061726b2053514c', x'')
+-- !query schema
+struct<startswith(X'537061726B2053514C', X''):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT startswith(x'', x'')
+-- !query schema
+struct<startswith(X'', X''):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT startswith(x'537061726b2053514c', null)
+-- !query schema
+struct<startswith(X'537061726B2053514C', NULL):boolean>
+-- !query output
+NULL
+
+
+-- !query
+SELECT endswith(x'537061726b2053514c', x'53516c')
+-- !query schema
+struct<endswith(X'537061726B2053514C', X'53516C'):boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT endsWith(x'537061726b2053514c', x'537061726b')
+-- !query schema
+struct<endswith(X'537061726B2053514C', X'537061726B'):boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT endsWith(x'537061726b2053514c', x'')
+-- !query schema
+struct<endswith(X'537061726B2053514C', X''):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT endsWith(x'', x'')
+-- !query schema
+struct<endswith(X'', X''):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT endsWith(x'537061726b2053514c', null)
+-- !query schema
+struct<endswith(X'537061726B2053514C', NULL):boolean>
+-- !query output
+NULL
+
+
 -- !query
 select to_number('454', '000')
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index bf4348d76349e..4307df7e61683 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 115
+-- Number of queries: 131
 
 
 -- !query
@@ -758,6 +758,134 @@ struct<endswith(NULL, NULL):boolean>
 NULL
 
 
+-- !query
+SELECT contains(x'537061726b2053514c', x'537061726b')
+-- !query schema
+struct<contains(X'537061726B2053514C', X'537061726B'):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT contains(x'', x'')
+-- !query schema
+struct<contains(X'', X''):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT contains(x'537061726b2053514c', null)
+-- !query schema
+struct<contains(X'537061726B2053514C', NULL):boolean>
+-- !query output
+NULL
+
+
+-- !query
+SELECT contains(12, '1')
+-- !query schema
+struct<contains(12, 1):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT contains(true, 'ru')
+-- !query schema
+struct<contains(true, ru):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT contains(x'12', 12)
+-- !query schema
+struct<contains(X'12', 12):boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT contains(true, false)
+-- !query schema
+struct<contains(true, false):boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT startswith(x'537061726b2053514c', x'537061726b')
+-- !query schema
+struct<startswith(X'537061726B2053514C', X'537061726B'):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT startswith(x'537061726b2053514c', x'')
+-- !query schema
+struct<startswith(X'537061726B2053514C', X''):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT startswith(x'', x'')
+-- !query schema
+struct<startswith(X'', X''):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT startswith(x'537061726b2053514c', null)
+-- !query schema
+struct<startswith(X'537061726B2053514C', NULL):boolean>
+-- !query output
+NULL
+
+
+-- !query
+SELECT endswith(x'537061726b2053514c', x'53516c')
+-- !query schema
+struct<endswith(X'537061726B2053514C', X'53516C'):boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT endsWith(x'537061726b2053514c', x'537061726b')
+-- !query schema
+struct<endswith(X'537061726B2053514C', X'537061726B'):boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT endsWith(x'537061726b2053514c', x'')
+-- !query schema
+struct<endswith(X'537061726B2053514C', X''):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT endsWith(x'', x'')
+-- !query schema
+struct<endswith(X'', X''):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT endsWith(x'537061726b2053514c', null)
+-- !query schema
+struct<endswith(X'537061726B2053514C', NULL):boolean>
+-- !query output
+NULL
+
+
 -- !query
 select to_number('454', '000')
 -- !query schema

From 1b95cfe362db429dcb306e0ff9b73228d18dcc03 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Tue, 1 Mar 2022 08:28:07 -0600
Subject: [PATCH 363/513] [SPARK-38348][BUILD] Upgrade `tink` to 1.6.1

### What changes were proposed in this pull request?
This pr aims upgrade `com.google.crypto.tink:tink` from 1.6.0 to 1.6.1

### Why are the changes needed?
The release notes as follows:

- https://github.com/google/tink/releases/tag/v1.6.1

There is a performance optimization related to `AesGcmJce.encrypt` method in version 1.6.1 and this method used by `o.a.s.network.crypto.AuthEngine#challenge` and `o.a.s.network.crypto.AuthEngine#response` methods in Spark:

- [Java: isAndroid() shouldn't cause Exceptions to be created](https://github.com/google/tink/issues/497)

This optimization reduces the delay of `AesGcmJce.encrypt` method by 70%.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

- Pass GA
- Manual test:

Verify the performance improvement of `AesGcmJce.encrypt` method as follows:

```scala
val valuesPerIteration = 100000
val derivedKeyEncryptingKey: Array[Byte] =
    Array(119, -25, -98, 34, -61, 102, 101, -97, 86, -27, 25, 88, 94, -55, 40, -103)
val ephemeralX25519PublicKey: Array[Byte] =
    Array(-94, 121, -27, 40, -42, -6, 114, 17, -11, 107, 58, -69, -69, -58,
        56, -121, 28, -18, 10, 25, 41, -66, 77, 17, 19, -99, -54, 54, 97, -111, -13, 77)
val aadState: Array[Byte] =
    Array(97, 112, 112, 73, 100, -19, 84, 88, -18, -105, 104, 105, 29, -84,
        94, -110, 84, 38, -109, -85, -55)
val benchmark = new Benchmark("Test AesGcmJceEncrypt", valuesPerIteration, output = output)
benchmark.addCase("Test AesGcmJce encrypt") { _: Int =>
      for (_ <- 0L until valuesPerIteration) {
        new AesGcmJce(derivedKeyEncryptingKey).encrypt(ephemeralX25519PublicKey, aadState)
      }
    }
benchmark.run()
```

**Before**: 5423.0 ns/ per row
**After**: 1538.3 ns /per row

Closes #35679 from LuciferYang/upgrade-tink.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +-
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +-
 pom.xml                               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index d9589da60da19..f6687edc3a1a9 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -255,7 +255,7 @@ stax-api/1.0.1//stax-api-1.0.1.jar
 stream/2.9.6//stream-2.9.6.jar
 super-csv/2.2.0//super-csv-2.2.0.jar
 threeten-extra/1.5.0//threeten-extra-1.5.0.jar
-tink/1.6.0//tink-1.6.0.jar
+tink/1.6.1//tink-1.6.1.jar
 transaction-api/1.1//transaction-api-1.1.jar
 univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar
 velocity/1.5//velocity-1.5.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index f01e4b5ffdaed..eb1e7cd158f2a 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -240,7 +240,7 @@ stax-api/1.0.1//stax-api-1.0.1.jar
 stream/2.9.6//stream-2.9.6.jar
 super-csv/2.2.0//super-csv-2.2.0.jar
 threeten-extra/1.5.0//threeten-extra-1.5.0.jar
-tink/1.6.0//tink-1.6.0.jar
+tink/1.6.1//tink-1.6.1.jar
 transaction-api/1.1//transaction-api-1.1.jar
 univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar
 velocity/1.5//velocity-1.5.jar
diff --git a/pom.xml b/pom.xml
index dfa34f44ce184..bcf3468b169ac 100644
--- a/pom.xml
+++ b/pom.xml
@@ -196,7 +196,7 @@
     <commons-crypto.version>1.1.0</commons-crypto.version>
     <commons-cli.version>1.5.0</commons-cli.version>
     <bouncycastle.version>1.60</bouncycastle.version>
-    <tink.version>1.6.0</tink.version>
+    <tink.version>1.6.1</tink.version>
     <!--
     If you are changing Arrow version specification, please check
     ./python/pyspark/sql/pandas/utils.py, and ./python/setup.py too.

From 615c5d81a7e08c267a2e3f9287f5e56229559a87 Mon Sep 17 00:00:00 2001
From: weixiuli <weixiuli@jd.com>
Date: Tue, 1 Mar 2022 08:29:13 -0600
Subject: [PATCH 364/513] [MINOR] Clean up an unnecessary variable

### What changes were proposed in this pull request?
Clean up an unnecessary variable.
### Why are the changes needed?

There is an unnecessary variable, we should clean up it.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Passed GA.

Closes #35645 from weixiuli/clean-up-unnecessary.

Authored-by: weixiuli <weixiuli@jd.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../spark/sql/execution/datasources/FileFormatWriter.scala       | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index fe48ddcd1668e..643902e7cbcb2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -328,7 +328,6 @@ object FileFormatWriter extends Logging {
     try {
       Utils.tryWithSafeFinallyAndFailureCallbacks(block = {
         // Execute the task to write rows out and commit the task.
-        val taskAttemptID = taskAttemptContext.getTaskAttemptID
         dataWriter.writeWithIterator(iterator)
         dataWriter.commit()
       })(catchBlock = {

From 6cd580346409753c0de48f13d3842d05f7210a89 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Tue, 1 Mar 2022 18:08:51 +0300
Subject: [PATCH 365/513] [SPARK-38284][SQL] Add the `TIMESTAMPDIFF()` function

### What changes were proposed in this pull request?
In the PR, I propose to add new function `TIMESTAMPDIFF` with the following parameters:
1. `unit` - sthis indicates the units of the difference between the given timestamps. Supported string values of `unit` are (case insensitive):
   - YEAR
   - QUARTER
   - MONTH
   - WEEK
   - DAY
   - HOUR
   - MINUTE
   - SECOND
   - MILLISECOND
   - MICROSECOND
2. `timestampStart` - A timestamp which the expression subtracts from `timestampEnd`. Supported types are `timestamp` (`timestamp_ltz`).  The `timestamp_ntz` and `date` values are casted to `timestamp_ltz` implicitly at the session time zone (see the SQL config `spark.sql.session.timeZone`).
3. `timestampEnd` - A timestamp from which the expression subtracts `timestampStart`.  Supported types are `timestamp` (`timestamp_ltz`). The `timestamp_ntz` and `date` values are casted to `timestamp_ltz` implicitly at the session time zone.

The function calculates the amount in terms of this unit, and returns a whole number, representing the number of complete units between the two timestamps. The type of the result is `LONG`. The result will be negative if the end is before the start timestamp.

For example:
```sql
scala> val df = sql("select timestampdiff(YEAR, timestamp'2010-02-16 01:02:03', timestamp'2022-02-16 01:02:03') as `diff in years`")
df: org.apache.spark.sql.DataFrame = [diff in years: bigint]

scala> df.printSchema
root
 |-- diff in years: long (nullable = false)

scala> df.show(false)
+-------------+
|diff in years|
+-------------+
|12           |
+-------------+
```

### Why are the changes needed?
1. To make the migration process from other systems to Spark SQL easier.
2. To achieve feature parity with other DBMSs.

### Does this PR introduce _any_ user-facing change?
No. This is new feature.

### How was this patch tested?
By running new tests:
```
$ build/sbt "test:testOnly *QueryExecutionErrorsSuite"
$ build/sbt "test:testOnly *DateTimeUtilsSuite"
$ build/sbt "sql/test:testOnly org.apache.spark.sql.expressions.ExpressionInfoSuite"
$ build/sbt "sql/testOnly *ExpressionsSchemaSuite"
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z timestamp.sql"
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z timestamp-ansi.sql"
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z datetime-legacy.sql"
$ build/sbt "test:testOnly *DateExpressionsSuite"
$ build/sbt "test:testOnly *SQLKeywordSuite"
```

Closes #35607 from MaxGekk/timestampdiff.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 docs/sql-ref-ansi-compliance.md               |  1 +
 .../spark/sql/catalyst/parser/SqlBase.g4      |  4 +
 .../catalyst/analysis/FunctionRegistry.scala  |  1 +
 .../expressions/datetimeExpressions.scala     | 86 ++++++++++++++++++-
 .../sql/catalyst/parser/AstBuilder.scala      | 11 +++
 .../sql/catalyst/util/DateTimeUtils.scala     | 36 +++++++-
 .../sql/errors/QueryExecutionErrors.scala     |  6 ++
 .../expressions/DateExpressionsSuite.scala    | 63 ++++++++++++++
 .../catalyst/util/DateTimeUtilsSuite.scala    | 54 ++++++++++++
 .../sql-functions/sql-expression-schema.md    |  3 +-
 .../sql-tests/inputs/timestamp-ntz.sql        |  6 ++
 .../resources/sql-tests/inputs/timestamp.sql  |  6 ++
 .../sql-tests/results/ansi/timestamp.sql.out  | 34 +++++++-
 .../sql-tests/results/datetime-legacy.sql.out | 34 +++++++-
 .../sql-tests/results/timestamp-ntz.sql.out   | 34 +++++++-
 .../sql-tests/results/timestamp.sql.out       | 34 +++++++-
 .../timestampNTZ/timestamp-ansi.sql.out       | 34 +++++++-
 .../results/timestampNTZ/timestamp.sql.out    | 34 +++++++-
 .../errors/QueryExecutionErrorsSuite.scala    | 24 ++++--
 19 files changed, 489 insertions(+), 16 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index c9b49724b6147..46bf415a8f012 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -576,6 +576,7 @@ Below is a list of all the keywords in Spark SQL.
 |TIME|reserved|non-reserved|reserved|
 |TIMESTAMP|non-reserved|non-reserved|non-reserved|
 |TIMESTAMPADD|non-reserved|non-reserved|non-reserved|
+|TIMESTAMPDIFF|non-reserved|non-reserved|non-reserved|
 |TO|reserved|non-reserved|reserved|
 |TOUCH|non-reserved|non-reserved|non-reserved|
 |TRAILING|reserved|non-reserved|reserved|
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 1de4460d3e685..ae57b42a1f995 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -861,6 +861,7 @@ valueExpression
 primaryExpression
     : name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER)                                   #currentLike
     | name=(TIMESTAMPADD | DATEADD | DATE_ADD) '(' unit=identifier ',' unitsAmount=valueExpression ',' timestamp=valueExpression ')'  #timestampadd
+    | TIMESTAMPDIFF '(' unit=identifier ',' startTimestamp=valueExpression ',' endTimestamp=valueExpression ')'  #timestampdiff
     | CASE whenClause+ (ELSE elseExpression=expression)? END                                   #searchedCase
     | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END                  #simpleCase
     | name=(CAST | TRY_CAST) '(' expression AS dataType ')'                                    #cast
@@ -1271,6 +1272,7 @@ ansiNonReserved
     | TERMINATED
     | TIMESTAMP
     | TIMESTAMPADD
+    | TIMESTAMPDIFF
     | TOUCH
     | TRANSACTION
     | TRANSACTIONS
@@ -1551,6 +1553,7 @@ nonReserved
     | TIME
     | TIMESTAMP
     | TIMESTAMPADD
+    | TIMESTAMPDIFF
     | TO
     | TOUCH
     | TRAILING
@@ -1831,6 +1834,7 @@ THEN: 'THEN';
 TIME: 'TIME';
 TIMESTAMP: 'TIMESTAMP';
 TIMESTAMPADD: 'TIMESTAMPADD';
+TIMESTAMPDIFF: 'TIMESTAMPDIFF';
 TO: 'TO';
 TOUCH: 'TOUCH';
 TRAILING: 'TRAILING';
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 129c62c1b3a86..bc7eb09ca352e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -631,6 +631,7 @@ object FunctionRegistry {
     expression[UnixMicros]("unix_micros"),
     expression[ConvertTimezone]("convert_timezone"),
     expression[TimestampAdd]("timestampadd"),
+    expression[TimestampDiff]("timestampdiff"),
 
     // collection functions
     expression[CreateArray]("array"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 6217035df72c4..8b5a3879e7bc7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -3063,7 +3063,7 @@ case class ConvertTimezone(
           - "HOUR"
           - "MINUTE"
           - "SECOND"
-          - "MILLISECOND" - milliseconds
+          - "MILLISECOND"
           - "MICROSECOND"
       * quantity - this is the number of units of time that you want to add.
       * timestamp - this is a timestamp (w/ or w/o timezone) to which you want to add.
@@ -3131,3 +3131,87 @@ case class TimestampAdd(
     copy(unit = newFirst, quantity = newSecond, timestamp = newThird)
   }
 }
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(unit, startTimestamp, endTimestamp) - Gets the difference between the timestamps `endTimestamp` and `startTimestamp` in the specified units by truncating the fraction part.",
+  arguments = """
+    Arguments:
+      * unit - this indicates the units of the difference between the given timestamps.
+        Supported string values of `unit` are (case insensitive):
+          - "YEAR"
+          - "QUARTER" - 3 months
+          - "MONTH"
+          - "WEEK" - 7 days
+          - "DAY"
+          - "HOUR"
+          - "MINUTE"
+          - "SECOND"
+          - "MILLISECOND"
+          - "MICROSECOND"
+      * startTimestamp - A timestamp which the expression subtracts from `endTimestamp`.
+      * endTimestamp - A timestamp from which the expression subtracts `startTimestamp`.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('HOUR', timestamp_ntz'2022-02-11 20:30:00', timestamp_ntz'2022-02-12 04:30:00');
+       8
+      > SELECT _FUNC_('MONTH', timestamp_ltz'2022-01-01 00:00:00', timestamp_ltz'2022-02-28 00:00:00');
+       1
+      > SELECT _FUNC_(SECOND, date'2022-01-01', timestamp'2021-12-31 23:59:50');
+       -10
+      > SELECT _FUNC_(YEAR, timestamp'2000-01-01 01:02:03.123456', timestamp'2010-01-01 01:02:03.123456');
+       10
+  """,
+  group = "datetime_funcs",
+  since = "3.3.0")
+// scalastyle:on line.size.limit
+case class TimestampDiff(
+    unit: Expression,
+    startTimestamp: Expression,
+    endTimestamp: Expression,
+    timeZoneId: Option[String] = None)
+  extends TernaryExpression
+  with ImplicitCastInputTypes
+  with NullIntolerant
+  with TimeZoneAwareExpression {
+
+  def this(unit: Expression, quantity: Expression, timestamp: Expression) =
+    this(unit, quantity, timestamp, None)
+
+  override def first: Expression = unit
+  override def second: Expression = startTimestamp
+  override def third: Expression = endTimestamp
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, TimestampType, TimestampType)
+  override def dataType: DataType = LongType
+
+  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
+    copy(timeZoneId = Option(timeZoneId))
+
+  @transient private lazy val zoneIdInEval: ZoneId = zoneIdForType(endTimestamp.dataType)
+
+  override def nullSafeEval(u: Any, startMicros: Any, endMicros: Any): Any = {
+    DateTimeUtils.timestampDiff(
+      u.asInstanceOf[UTF8String].toString,
+      startMicros.asInstanceOf[Long],
+      endMicros.asInstanceOf[Long],
+      zoneIdInEval)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+    val zid = ctx.addReferenceObj("zoneId", zoneIdInEval, classOf[ZoneId].getName)
+    defineCodeGen(ctx, ev, (u, s, e) =>
+      s"""$dtu.timestampDiff($u.toString(), $s, $e, $zid)""")
+  }
+
+  override def prettyName: String = "timestampdiff"
+
+  override protected def withNewChildrenInternal(
+      newFirst: Expression,
+      newSecond: Expression,
+      newThird: Expression): TimestampDiff = {
+    copy(unit = newFirst, startTimestamp = newSecond, endTimestamp = newThird)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 604b424ab9b26..47c03aa316ed6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -4508,4 +4508,15 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       expression(ctx.timestamp))
     UnresolvedFunction("timestampadd", arguments, isDistinct = false)
   }
+
+  /**
+   * Create a TimestampDiff expression.
+   */
+  override def visitTimestampdiff(ctx: TimestampdiffContext): Expression = withOrigin(ctx) {
+    val arguments = Seq(
+      Literal(ctx.unit.getText),
+      expression(ctx.startTimestamp),
+      expression(ctx.endTimestamp))
+    UnresolvedFunction("timestampdiff", arguments, isDistinct = false)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index b5525776c3f99..c2ca43630000f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.util
 
 import java.sql.{Date, Timestamp}
 import java.time._
-import java.time.temporal.{ChronoField, ChronoUnit, IsoFields}
+import java.time.temporal.{ChronoField, ChronoUnit, IsoFields, Temporal}
 import java.util.{Locale, TimeZone}
 import java.util.concurrent.TimeUnit._
 
@@ -1199,4 +1199,38 @@ object DateTimeUtils {
         throw QueryExecutionErrors.invalidUnitInTimestampAdd(unit)
     }
   }
+
+  private val timestampDiffMap = Map[String, (Temporal, Temporal) => Long](
+    "MICROSECOND" -> ChronoUnit.MICROS.between,
+    "MILLISECOND" -> ChronoUnit.MILLIS.between,
+    "SECOND" -> ChronoUnit.SECONDS.between,
+    "MINUTE" -> ChronoUnit.MINUTES.between,
+    "HOUR" -> ChronoUnit.HOURS.between,
+    "DAY" -> ChronoUnit.DAYS.between,
+    "WEEK" -> ChronoUnit.WEEKS.between,
+    "MONTH" -> ChronoUnit.MONTHS.between,
+    "QUARTER" -> ((startTs: Temporal, endTs: Temporal) =>
+      ChronoUnit.MONTHS.between(startTs, endTs) / 3),
+    "YEAR" -> ChronoUnit.YEARS.between)
+
+  /**
+   * Gets the difference between two timestamps.
+   *
+   * @param unit Specifies the interval units in which to express the difference between
+   *             the two timestamp parameters.
+   * @param startTs A timestamp which the function subtracts from `endTs`.
+   * @param endTs A timestamp from which the function subtracts `startTs`.
+   * @param zoneId The time zone ID at which the operation is performed.
+   * @return The time span between two timestamp values, in the units specified.
+   */
+  def timestampDiff(unit: String, startTs: Long, endTs: Long, zoneId: ZoneId): Long = {
+    val unitInUpperCase = unit.toUpperCase(Locale.ROOT)
+    if (timestampDiffMap.contains(unitInUpperCase)) {
+      val startLocalTs = getLocalDateTime(startTs, zoneId)
+      val endLocalTs = getLocalDateTime(endTs, zoneId)
+      timestampDiffMap(unitInUpperCase)(startLocalTs, endLocalTs)
+    } else {
+      throw QueryExecutionErrors.invalidUnitInTimestampDiff(unit)
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 1a7cbac914dfe..6424b32efbc39 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -1972,4 +1972,10 @@ object QueryExecutionErrors {
       errorClass = "INVALID_PARAMETER_VALUE",
       messageParameters = Array("unit", "timestampadd", unit))
   }
+
+  def invalidUnitInTimestampDiff(unit: String): Throwable = {
+    new SparkIllegalArgumentException(
+      errorClass = "INVALID_PARAMETER_VALUE",
+      messageParameters = Array("unit", "timestampdiff", unit))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 0837cc6f131d4..deb720336a1f0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -1949,4 +1949,67 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       }
     }
   }
+
+  test("SPARK-38284: difference between two timestamps in units") {
+    // Check case-insensitivity
+    checkEvaluation(
+      TimestampDiff(
+        Literal("Hour"),
+        Literal(Instant.parse("2022-02-15T12:57:00Z")),
+        Literal(Instant.parse("2022-02-15T13:57:00Z"))),
+      1L)
+    // Check nulls as input values
+    checkEvaluation(
+      TimestampDiff(
+        Literal.create(null, StringType),
+        Literal(Instant.parse("2021-02-15T12:57:00Z")),
+        Literal(Instant.parse("2022-02-15T12:57:00Z"))),
+      null)
+    checkEvaluation(
+      TimestampDiff(
+        Literal("MINUTE"),
+        Literal.create(null, TimestampType),
+        Literal(Instant.parse("2022-02-15T12:57:00Z"))),
+      null)
+    checkEvaluation(
+      TimestampDiff(
+        Literal("MINUTE"),
+        Literal(Instant.parse("2021-02-15T12:57:00Z")),
+        Literal.create(null, TimestampType)),
+      null)
+    // Check crossing the daylight saving time
+    checkEvaluation(
+      TimestampDiff(
+        Literal("HOUR"),
+        Literal(Instant.parse("2022-03-12T23:30:00Z")),
+        Literal(Instant.parse("2022-03-13T05:30:00Z")),
+        Some("America/Los_Angeles")),
+      6L)
+    // Check the leap year
+    checkEvaluation(
+      TimestampDiff(
+        Literal("DAY"),
+        Literal(Instant.parse("2020-02-28T10:11:12Z")),
+        Literal(Instant.parse("2020-03-01T10:21:12Z")),
+        Some("America/Los_Angeles")),
+      2L)
+
+    Seq(
+      "YEAR", "QUARTER", "MONTH",
+      "WEEK", "DAY",
+      "HOUR", "MINUTE", "SECOND",
+      "MILLISECOND", "MICROSECOND"
+    ).foreach { unit =>
+      outstandingTimezonesIds.foreach { tz =>
+        checkConsistencyBetweenInterpretedAndCodegenAllowingException(
+          (startTs: Expression, endTs: Expression) =>
+            TimestampDiff(
+              Literal(unit),
+              startTs,
+              endTs,
+              Some(tz)),
+          TimestampType, TimestampType)
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index d4e8e29c38fa4..6efeba92db38b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -989,4 +989,58 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
     }
     assert(e.getMessage.contains("invalid: SECS"))
   }
+
+  test("SPARK-38284: difference between two timestamps in units") {
+    outstandingZoneIds.foreach { zid =>
+      assert(timestampDiff("MICROSECOND",
+        date(2022, 2, 14, 11, 27, 0, 0, zid), date(2022, 2, 14, 11, 27, 0, 1, zid), zid) === 1)
+      assert(timestampDiff("MILLISECOND",
+        date(2022, 2, 14, 11, 27, 0, 1000, zid), date(2022, 2, 14, 11, 27, 0, 0, zid), zid) === -1)
+      assert(timestampDiff(
+        "SECOND",
+        date(2022, 2, 14, 11, 27, 0, 1001, zid),
+        date(2022, 2, 14, 11, 27, 0, 1002, zid),
+        zid) === 0)
+      assert(timestampDiff(
+        "MINUTE",
+        date(2022, 2, 14, 11, 0, 1, 1, zid),
+        date(2022, 2, 14, 9, 30, 1, 1, zid),
+        zid) === -90)
+      assert(timestampDiff(
+        "HOUR",
+        date(2022, 2, 14, 11, 0, 1, 0, zid),
+        date(2022, 2, 15, 11, 0, 1, 2, zid),
+        zid) === 24)
+      assert(timestampDiff(
+        "DAY",
+        date(2022, 2, 28, 11, 1, 0, 0, zid),
+        date(2022, 3, 1, 11, 1, 0, 0, zid),
+        zid) === 1)
+      assert(timestampDiff("WEEK",
+        date(2022, 2, 14, 11, 43, 0, 1, zid), date(2022, 2, 21, 11, 42, 59, 1, zid), zid) === 0)
+      assert(timestampDiff("MONTH",
+        date(2022, 2, 14, 11, 43, 0, 1, zid), date(2022, 12, 14, 11, 43, 0, 1, zid), zid) === 10)
+      assert(timestampDiff("QUARTER",
+        date(1900, 2, 1, 0, 0, 0, 1, zid), date(1900, 5, 1, 2, 0, 0, 1, zid), zid) === 1)
+      assert(timestampDiff(
+        "YEAR",
+        date(9998, 1, 1, 0, 0, 0, 1, zid),
+        date(9999, 1, 1, 0, 0, 1, 2, zid),
+        zid) === 1)
+      assert(timestampDiff(
+        "YEAR",
+        date(9999, 1, 1, 0, 0, 0, 1, zid),
+        date(1, 1, 1, 0, 0, 0, 1, zid),
+        zid) === -9998)
+    }
+
+    val e = intercept[SparkIllegalArgumentException] {
+      timestampDiff(
+        "SECS",
+        date(1969, 1, 1, 0, 0, 0, 1, getZoneId("UTC")),
+        date(2022, 1, 1, 0, 0, 0, 1, getZoneId("UTC")),
+        getZoneId("UTC"))
+    }
+    assert(e.getMessage.contains("invalid: SECS"))
+  }
 }
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 2a50fbcd3f7f7..88ad1aea77ddd 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,6 +1,6 @@
 <!-- Automatically generated by ExpressionsSchemaSuite -->
 ## Summary
-  - Number of queries: 382
+  - Number of queries: 383
   - Number of expressions that missing example: 12
   - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint
 ## Schema of Built-in Functions
@@ -300,6 +300,7 @@
 | org.apache.spark.sql.catalyst.expressions.Tanh | tanh | SELECT tanh(0) | struct<TANH(0):double> |
 | org.apache.spark.sql.catalyst.expressions.TimeWindow | window | SELECT a, window.start, window.end, count(*) as cnt FROM VALUES ('A1', '2021-01-01 00:00:00'), ('A1', '2021-01-01 00:04:30'), ('A1', '2021-01-01 00:06:00'), ('A2', '2021-01-01 00:01:00') AS tab(a, b) GROUP by a, window(b, '5 minutes') ORDER BY a, start | struct<a:string,start:timestamp,end:timestamp,cnt:bigint> |
 | org.apache.spark.sql.catalyst.expressions.TimestampAdd | timestampadd | SELECT timestampadd('HOUR', 8, timestamp_ntz'2022-02-11 20:30:00') | struct<timestampadd(HOUR, 8, TIMESTAMP_NTZ '2022-02-11 20:30:00'):timestamp_ntz> |
+| org.apache.spark.sql.catalyst.expressions.TimestampDiff | timestampdiff | SELECT timestampdiff('HOUR', timestamp_ntz'2022-02-11 20:30:00', timestamp_ntz'2022-02-12 04:30:00') | struct<timestampdiff(HOUR, TIMESTAMP_NTZ '2022-02-11 20:30:00', TIMESTAMP_NTZ '2022-02-12 04:30:00'):bigint> |
 | org.apache.spark.sql.catalyst.expressions.ToBinary | to_binary | SELECT to_binary('abc', 'utf-8') | struct<to_binary(abc, utf-8):binary> |
 | org.apache.spark.sql.catalyst.expressions.ToDegrees | degrees | SELECT degrees(3.141592653589793) | struct<DEGREES(3.141592653589793):double> |
 | org.apache.spark.sql.catalyst.expressions.ToNumber | to_number | SELECT to_number('454', '999') | struct<to_number(454, 999):decimal(3,0)> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql
index 14266db65a971..bbe5fb7bee6e6 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql
@@ -17,3 +17,9 @@ SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 45.678, 'CET');
 SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 60.007);
 
 SELECT convert_timezone('Europe/Moscow', 'America/Los_Angeles', timestamp_ntz'2022-01-01 00:00:00');
+
+-- Get the difference between timestamps w/o time zone in the specified units
+select timestampdiff('QUARTER', timestamp_ntz'2022-01-01 01:02:03', timestamp_ntz'2022-05-02 05:06:07');
+select timestampdiff(HOUR, timestamp_ntz'2022-02-14 01:02:03', timestamp_ltz'2022-02-14 02:03:04');
+select timestampdiff(YEAR, date'2022-02-15', timestamp_ntz'2023-02-15 10:11:12');
+select timestampdiff('MILLISECOND', timestamp_ntz'2022-02-14 23:59:59.123', date'2022-02-15');
diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql
index 49eb228c33f44..9e1652a6cfa06 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql
@@ -148,3 +148,9 @@ select timestampadd('MONTH', -1, timestamp'2022-02-14 01:02:03');
 select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03');
 select timestampadd(YEAR, 1, date'2022-02-15');
 select timestampadd('SECOND', -1, date'2022-02-15');
+
+-- Get the difference between timestamps in the specified units
+select timestampdiff('MONTH', timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03');
+select timestampdiff(MINUTE, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 02:00:03');
+select timestampdiff(YEAR, date'2022-02-15', date'2023-02-15');
+select timestampdiff('SECOND', date'2022-02-15', timestamp'2022-02-14 23:59:59');
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
index 91e526316864a..2c47ed3abe2c9 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 93
+-- Number of queries: 97
 
 
 -- !query
@@ -803,3 +803,35 @@ select timestampadd('SECOND', -1, date'2022-02-15')
 struct<timestampadd(SECOND, -1, DATE '2022-02-15'):timestamp>
 -- !query output
 2022-02-14 23:59:59
+
+
+-- !query
+select timestampdiff('MONTH', timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03')
+-- !query schema
+struct<timestampdiff(MONTH, TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-01-14 01:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select timestampdiff(MINUTE, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 02:00:03')
+-- !query schema
+struct<timestampdiff(MINUTE, TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-02-14 02:00:03'):bigint>
+-- !query output
+58
+
+
+-- !query
+select timestampdiff(YEAR, date'2022-02-15', date'2023-02-15')
+-- !query schema
+struct<timestampdiff(YEAR, DATE '2022-02-15', DATE '2023-02-15'):bigint>
+-- !query output
+1
+
+
+-- !query
+select timestampdiff('SECOND', date'2022-02-15', timestamp'2022-02-14 23:59:59')
+-- !query schema
+struct<timestampdiff(SECOND, DATE '2022-02-15', TIMESTAMP '2022-02-14 23:59:59'):bigint>
+-- !query output
+-1
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index a96fb65579de8..60752e3fe20dc 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 180
+-- Number of queries: 184
 
 
 -- !query
@@ -1527,3 +1527,35 @@ select timestampadd('SECOND', -1, date'2022-02-15')
 struct<timestampadd(SECOND, -1, DATE '2022-02-15'):timestamp>
 -- !query output
 2022-02-14 23:59:59
+
+
+-- !query
+select timestampdiff('MONTH', timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03')
+-- !query schema
+struct<timestampdiff(MONTH, TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-01-14 01:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select timestampdiff(MINUTE, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 02:00:03')
+-- !query schema
+struct<timestampdiff(MINUTE, TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-02-14 02:00:03'):bigint>
+-- !query output
+58
+
+
+-- !query
+select timestampdiff(YEAR, date'2022-02-15', date'2023-02-15')
+-- !query schema
+struct<timestampdiff(YEAR, DATE '2022-02-15', DATE '2023-02-15'):bigint>
+-- !query output
+1
+
+
+-- !query
+select timestampdiff('SECOND', date'2022-02-15', timestamp'2022-02-14 23:59:59')
+-- !query schema
+struct<timestampdiff(SECOND, DATE '2022-02-15', TIMESTAMP '2022-02-14 23:59:59'):bigint>
+-- !query output
+-1
diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out
index 0ed5beeaddf72..fcd74d88eb633 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 8
+-- Number of queries: 12
 
 
 -- !query
@@ -65,3 +65,35 @@ SELECT convert_timezone('Europe/Moscow', 'America/Los_Angeles', timestamp_ntz'20
 struct<convert_timezone(Europe/Moscow, America/Los_Angeles, TIMESTAMP_NTZ '2022-01-01 00:00:00'):timestamp_ntz>
 -- !query output
 2021-12-31 13:00:00
+
+
+-- !query
+select timestampdiff('QUARTER', timestamp_ntz'2022-01-01 01:02:03', timestamp_ntz'2022-05-02 05:06:07')
+-- !query schema
+struct<timestampdiff(QUARTER, TIMESTAMP_NTZ '2022-01-01 01:02:03', TIMESTAMP_NTZ '2022-05-02 05:06:07'):bigint>
+-- !query output
+1
+
+
+-- !query
+select timestampdiff(HOUR, timestamp_ntz'2022-02-14 01:02:03', timestamp_ltz'2022-02-14 02:03:04')
+-- !query schema
+struct<timestampdiff(HOUR, TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP '2022-02-14 02:03:04'):bigint>
+-- !query output
+1
+
+
+-- !query
+select timestampdiff(YEAR, date'2022-02-15', timestamp_ntz'2023-02-15 10:11:12')
+-- !query schema
+struct<timestampdiff(YEAR, DATE '2022-02-15', TIMESTAMP_NTZ '2023-02-15 10:11:12'):bigint>
+-- !query output
+1
+
+
+-- !query
+select timestampdiff('MILLISECOND', timestamp_ntz'2022-02-14 23:59:59.123', date'2022-02-15')
+-- !query schema
+struct<timestampdiff(MILLISECOND, TIMESTAMP_NTZ '2022-02-14 23:59:59.123', DATE '2022-02-15'):bigint>
+-- !query output
+877
diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
index 34e313eeb8a24..6362a2ac20e0f 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 93
+-- Number of queries: 97
 
 
 -- !query
@@ -797,3 +797,35 @@ select timestampadd('SECOND', -1, date'2022-02-15')
 struct<timestampadd(SECOND, -1, DATE '2022-02-15'):timestamp>
 -- !query output
 2022-02-14 23:59:59
+
+
+-- !query
+select timestampdiff('MONTH', timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03')
+-- !query schema
+struct<timestampdiff(MONTH, TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-01-14 01:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select timestampdiff(MINUTE, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 02:00:03')
+-- !query schema
+struct<timestampdiff(MINUTE, TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-02-14 02:00:03'):bigint>
+-- !query output
+58
+
+
+-- !query
+select timestampdiff(YEAR, date'2022-02-15', date'2023-02-15')
+-- !query schema
+struct<timestampdiff(YEAR, DATE '2022-02-15', DATE '2023-02-15'):bigint>
+-- !query output
+1
+
+
+-- !query
+select timestampdiff('SECOND', date'2022-02-15', timestamp'2022-02-14 23:59:59')
+-- !query schema
+struct<timestampdiff(SECOND, DATE '2022-02-15', TIMESTAMP '2022-02-14 23:59:59'):bigint>
+-- !query output
+-1
diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
index 00ad665ea0198..46b51fc9255b9 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 93
+-- Number of queries: 97
 
 
 -- !query
@@ -801,3 +801,35 @@ select timestampadd('SECOND', -1, date'2022-02-15')
 struct<timestampadd(SECOND, -1, DATE '2022-02-15'):timestamp>
 -- !query output
 2022-02-14 23:59:59
+
+
+-- !query
+select timestampdiff('MONTH', timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03')
+-- !query schema
+struct<timestampdiff(MONTH, TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP_NTZ '2022-01-14 01:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select timestampdiff(MINUTE, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 02:00:03')
+-- !query schema
+struct<timestampdiff(MINUTE, TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP_NTZ '2022-02-14 02:00:03'):bigint>
+-- !query output
+58
+
+
+-- !query
+select timestampdiff(YEAR, date'2022-02-15', date'2023-02-15')
+-- !query schema
+struct<timestampdiff(YEAR, DATE '2022-02-15', DATE '2023-02-15'):bigint>
+-- !query output
+1
+
+
+-- !query
+select timestampdiff('SECOND', date'2022-02-15', timestamp'2022-02-14 23:59:59')
+-- !query schema
+struct<timestampdiff(SECOND, DATE '2022-02-15', TIMESTAMP_NTZ '2022-02-14 23:59:59'):bigint>
+-- !query output
+-1
diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out
index 339e6db3cf0e5..adadae552a7c2 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 93
+-- Number of queries: 97
 
 
 -- !query
@@ -795,3 +795,35 @@ select timestampadd('SECOND', -1, date'2022-02-15')
 struct<timestampadd(SECOND, -1, DATE '2022-02-15'):timestamp>
 -- !query output
 2022-02-14 23:59:59
+
+
+-- !query
+select timestampdiff('MONTH', timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03')
+-- !query schema
+struct<timestampdiff(MONTH, TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP_NTZ '2022-01-14 01:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select timestampdiff(MINUTE, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 02:00:03')
+-- !query schema
+struct<timestampdiff(MINUTE, TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP_NTZ '2022-02-14 02:00:03'):bigint>
+-- !query output
+58
+
+
+-- !query
+select timestampdiff(YEAR, date'2022-02-15', date'2023-02-15')
+-- !query schema
+struct<timestampdiff(YEAR, DATE '2022-02-15', DATE '2023-02-15'):bigint>
+-- !query output
+1
+
+
+-- !query
+select timestampdiff('SECOND', date'2022-02-15', timestamp'2022-02-14 23:59:59')
+-- !query schema
+struct<timestampdiff(SECOND, DATE '2022-02-15', TIMESTAMP_NTZ '2022-02-14 23:59:59'):bigint>
+-- !query output
+-1
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index 57fbeacc31c61..429e41e8c997d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -92,14 +92,24 @@ class QueryExecutionErrorsSuite extends QueryTest with SharedSparkSession {
     }
   }
 
-  test("INVALID_PARAMETER_VALUE: invalid unit passed to timestampadd") {
-    val e = intercept[SparkIllegalArgumentException] {
-      sql("select timestampadd('nanosecond', 100, timestamp'2022-02-13 18:00:00')").collect()
+  test("INVALID_PARAMETER_VALUE: invalid unit passed to timestampadd/timestampdiff") {
+    Seq(
+      "timestampadd" ->
+        "select timestampadd('nanosecond', 100, timestamp'2022-02-13 18:00:00')",
+      "timestampdiff" ->
+        """select timestampdiff(
+          |  'nanosecond',
+          |  timestamp'2022-02-13 18:00:00',
+          |  timestamp'2022-02-22 12:52:00')""".stripMargin
+    ).foreach { case (funcName, sqlStmt) =>
+      val e = intercept[SparkIllegalArgumentException] {
+        sql(sqlStmt).collect()
+      }
+      assert(e.getErrorClass === "INVALID_PARAMETER_VALUE")
+      assert(e.getSqlState === "22023")
+      assert(e.getMessage ===
+        s"The value of parameter(s) 'unit' in $funcName is invalid: nanosecond")
     }
-    assert(e.getErrorClass === "INVALID_PARAMETER_VALUE")
-    assert(e.getSqlState === "22023")
-    assert(e.getMessage ===
-      "The value of parameter(s) 'unit' in timestampadd is invalid: nanosecond")
   }
 
   test("UNSUPPORTED_FEATURE: unsupported combinations of AES modes and padding") {

From a633f77b5120d94eee6beff8615137bf537bbff9 Mon Sep 17 00:00:00 2001
From: chenzhx <chen@apache.org>
Date: Wed, 2 Mar 2022 01:23:05 +0800
Subject: [PATCH 366/513] [SPARK-37932][SQL] Wait to resolve missing attributes
 before applying DeduplicateRelations

### What changes were proposed in this pull request?

When the join with duplicate view like
```
SELECT l1.idFROM v1 l1
 INNER JOIN (
   SELECT id FROM v1
   GROUP BY id  HAVING COUNT(DISTINCT name) > 1
 ) l2
 ON l1.id = l2.id
 GROUP BY l1.name, l1.id;
```
The error stack is:
```
Resolved attribute(s) name#26 missing from id#31,name#32 in operator !Aggregate [id#31], [id#31, count(distinct name#26) AS count(distinct name#26)#33L]. Attribute(s) with the same name appear in the operation: name. Please check if the right attribute(s) are used.;
Aggregate [name#26, id#25], [id#25]
+- Join Inner, (id#25 = id#31)
   :- SubqueryAlias l1
   :  +- SubqueryAlias spark_catalog.default.v1
   :     +- View (`default`.`v1`, [id#25,name#26])
   :        +- Project [cast(id#20 as int) AS id#25, cast(name#21 as string) AS name#26]
   :           +- Project [id#20, name#21]
   :              +- SubqueryAlias spark_catalog.default.t
   :                 +- Relation default.t[id#20,name#21] parquet
   +- SubqueryAlias l2
      +- Project [id#31]
         +- Filter (count(distinct name#26)#33L > cast(1 as bigint))
            +- !Aggregate [id#31], [id#31, count(distinct name#26) AS count(distinct name#26)#33L]
               +- SubqueryAlias spark_catalog.default.v1
                  +- View (`default`.`v1`, [id#31,name#32])
                     +- Project [cast(id#27 as int) AS id#31, cast(name#28 as string) AS name#32]
                        +- Project [id#27, name#28]
                           +- SubqueryAlias spark_catalog.default.t
                              +- Relation default.t[id#27,name#28] parquet
```
Spark will consider the two views to be duplicates, which will cause the query to fail.

### Why are the changes needed?

Fix bug when using join in duplicate views.

### Does this PR introduce _any_ user-facing change?

Yes. When we join with duplicate view, the query would be successful.
DeduplicateRelations should only kick in if the plan's children are all resolved and valid.

### How was this patch tested?

Add new UT

Closes #35684 from chenzhx/SPARK-37932.

Authored-by: chenzhx <chen@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../analysis/DeduplicateRelations.scala       |  7 ++++++-
 .../spark/sql/execution/SQLViewSuite.scala    | 19 +++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala
index 55b1c221c8378..4c351e3237df2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala
@@ -40,7 +40,12 @@ case class ReferenceEqualPlanWrapper(plan: LogicalPlan) {
 
 object DeduplicateRelations extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = {
-    renewDuplicatedRelations(mutable.HashSet.empty, plan)._1.resolveOperatorsUpWithPruning(
+    val newPlan = renewDuplicatedRelations(mutable.HashSet.empty, plan)._1
+    if (newPlan.find(p => p.resolved && p.missingInput.nonEmpty).isDefined) {
+      // Wait for `ResolveMissingReferences` to resolve missing attributes first
+      return newPlan
+    }
+    newPlan.resolveOperatorsUpWithPruning(
       _.containsAnyPattern(JOIN, LATERAL_JOIN, AS_OF_JOIN, INTERSECT, EXCEPT, UNION, COMMAND),
       ruleId) {
       case p: LogicalPlan if !p.childrenResolved => p
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index ee6d3525b6f1a..9e6974a07a4a0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -907,4 +907,23 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       }
     }
   }
+
+  test("SPARK-37932: view join with same view") {
+    withTable("t") {
+      withView("v1") {
+        Seq((1, "test1"), (2, "test2"), (1, "test2")).toDF("id", "name")
+          .write.format("parquet").saveAsTable("t")
+        sql("CREATE VIEW v1 (id, name) AS SELECT id, name FROM t")
+
+        checkAnswer(
+          sql("""SELECT l1.id FROM v1 l1
+                |INNER JOIN (
+                |   SELECT id FROM v1
+                |   GROUP BY id HAVING COUNT(DISTINCT name) > 1
+                | ) l2 ON l1.id = l2.id GROUP BY l1.name, l1.id;
+                |""".stripMargin),
+          Seq(Row(1), Row(1)))
+      }
+    }
+  }
 }

From 5c23c765b3531e23072b0994186a56f7e141d859 Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Wed, 2 Mar 2022 01:28:06 +0800
Subject: [PATCH 367/513] [SPARK-38358][DOC] Add migration guide for
 `spark.sql.hive.convertMetastoreInsertDir`,
 `spark.sql.hive.convertInsertingPartitionedTable`  and
 `spark.sql.hive.convertMetastoreCtas`

### What changes were proposed in this pull request?
Now, we support convert CTAS and INSERT INTO  DIRECTORY and INSERT INTO PARTITION, but we didn't mention it in migration guide. These are important changes, especially `spark.sql.hive.convertInsertingPartitionedTable`

### Why are the changes needed?
Improve document

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Not Need

Closes #35692 from AngersZhuuuu/SPARK-38358.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 6ea86e3ca2c4c..9d7b109650dbe 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -62,6 +62,8 @@ license: |
 
   - Since Spark 3.3, when reading values from a JSON attribute defined as `FloatType` or `DoubleType`, the strings `"+Infinity"`, `"+INF"`, and `"-INF"` are now parsed to the appropriate values, in addition to the already supported `"Infinity"` and `"-Infinity"` variations. This change was made to improve consistency with Jackson's parsing of the unquoted versions of these values. Also, the `allowNonNumericNumbers` option is now respected so these strings will now be considered invalid if this option is disabled.
 
+  - Since Spark 3.3, Spark will try to use built-in data source writer instead of Hive serde in `INSERT OVERWRITE DIRECTORY`. This behavior is effective only if `spark.sql.hive.convertMetastoreParquet` or `spark.sql.hive.convertMetastoreOrc` is enabled respectively for Parquet and ORC formats. To restore the behavior before Spark 3.3, you can set `spark.sql.hive.convertMetastoreInsertDir` to `false`.
+
 ## Upgrading from Spark SQL 3.1 to 3.2
 
   - Since Spark 3.2, ADD FILE/JAR/ARCHIVE commands require each path to be enclosed by `"` or `'` if the path contains whitespaces.
@@ -352,6 +354,10 @@ license: |
 
   - In Spark 3.0, datetime pattern letter `F` is **aligned day of week in month** that represents the concept of the count of days within the period of a week where the weeks are aligned to the start of the month. In Spark version 2.4 and earlier, it is **week of month** that represents the concept of the count of weeks within the month where weeks start on a fixed day-of-week, e.g. `2020-07-30` is 30 days (4 weeks and 2 days) after the first day of the month, so `date_format(date '2020-07-30', 'F')` returns 2 in Spark 3.0, but as a week count in Spark 2.x, it returns 5 because it locates in the 5th week of July 2020, where week one is 2020-07-01 to 07-04.
 
+  - In Spark 3.0, Spark will try to use built-in data source writer instead of Hive serde in `CTAS`. This behavior is effective only if `spark.sql.hive.convertMetastoreParquet` or `spark.sql.hive.convertMetastoreOrc` is enabled respectively for Parquet and ORC formats. To restore the behavior before Spark 3.0, you can set `spark.sql.hive.convertMetastoreCtas` to `false`.
+
+  - In Spark 3.0, Spark will try to use built-in data source writer instead of Hive serde to process inserting into partitioned ORC/Parquet tables created by using the HiveSQL syntax. This behavior is effective only if `spark.sql.hive.convertMetastoreParquet` or `spark.sql.hive.convertMetastoreOrc` is enabled respectively for Parquet and ORC formats. To restore the behavior before Spark 3.0, you can set `spark.sql.hive.convertInsertingPartitionedTable` to `false`.
+
 ### Data Sources
 
   - In Spark version 2.4 and below, when reading a Hive SerDe table with Spark native data sources(parquet/orc), Spark infers the actual file schema and update the table schema in metastore. In Spark 3.0, Spark doesn't infer the schema anymore. This should not cause any problems to end users, but if it does, set `spark.sql.hive.caseSensitiveInferenceMode` to `INFER_AND_SAVE`.

From ccb8af607672d2b2638554a1d4b003420292c0b2 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Tue, 1 Mar 2022 10:41:27 -0800
Subject: [PATCH 368/513] [SPARK-38188][K8S] Support
 `spark.kubernetes.job.queue`

### What changes were proposed in this pull request?
This patch has below changes:
- Add the `queue` configuration `spark.kubernetes.job.queue`
- Add queue scheduling Volcano implementions
- Add a integrations test to make sure `queue` is set to PodGroup, and also validate the queue scheduling.

### Why are the changes needed?
Support queue scheduling with Volcano implementations.

### Does this PR introduce _any_ user-facing change?
Yes, introduce a new configuration `spark.kubernetes.job.queue`.

### How was this patch tested?
- UT passed.
- Spark on Kubernetes Integration test passed.

Closes #35553 from Yikun/SPARK-38188.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/running-on-kubernetes.md                 |   9 ++
 .../org/apache/spark/deploy/k8s/Config.scala  |   7 +
 .../k8s/features/VolcanoFeatureStep.scala     |   8 +-
 .../features/VolcanoFeatureStepSuite.scala    |  11 ++
 .../volcano/disable-queue0-enable-queue1.yml  |  29 ++++
 .../volcano/enable-queue0-enable-queue1.yml   |  29 ++++
 .../k8s/integrationtest/KubernetesSuite.scala |  19 ++-
 .../integrationtest/VolcanoTestsSuite.scala   | 142 +++++++++++++++++-
 8 files changed, 242 insertions(+), 12 deletions(-)
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue0-enable-queue1.yml
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/volcano/enable-queue0-enable-queue1.yml

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index ee0b23012a591..8553d7886acf0 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1356,6 +1356,15 @@ See the [configuration page](configuration.html) for information on Spark config
   </td>
   <td>3.3.0</td>
 </tr>
+<tr>
+  <td><code>spark.kubernetes.job.queue</code></td>
+  <td>(none)</td>
+  <td>
+    The name of the queue to which the job is submitted. This info will be stored in configuration
+    and passed to specific feature step.
+  </td>
+  <td>3.3.0</td>
+</tr>
 <tr>
   <td><code>spark.kubernetes.configMap.maxSize</code></td>
   <td><code>1572864</code></td>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index 91bbb410dca7f..58a4a785b5182 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -292,6 +292,13 @@ private[spark] object Config extends Logging {
       .stringConf
       .createOptional
 
+  val KUBERNETES_JOB_QUEUE = ConfigBuilder("spark.kubernetes.job.queue")
+    .doc("The name of the queue to which the job is submitted. This info " +
+      "will be stored in configuration and passed to specific feature step.")
+    .version("3.3.0")
+    .stringConf
+    .createOptional
+
   val KUBERNETES_EXECUTOR_REQUEST_CORES =
     ConfigBuilder("spark.kubernetes.executor.request.cores")
       .doc("Specify the cpu request for each executor pod")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
index 1c936848db67f..c6efe4d1368a8 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
@@ -20,6 +20,7 @@ import io.fabric8.kubernetes.api.model._
 import io.fabric8.volcano.scheduling.v1beta1.PodGroupBuilder
 
 import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverConf, KubernetesExecutorConf, SparkPod}
+import org.apache.spark.deploy.k8s.Config._
 
 private[spark] class VolcanoFeatureStep extends KubernetesDriverCustomFeatureConfigStep
   with KubernetesExecutorCustomFeatureConfigStep {
@@ -30,6 +31,7 @@ private[spark] class VolcanoFeatureStep extends KubernetesDriverCustomFeatureCon
 
   private lazy val podGroupName = s"${kubernetesConf.appId}-podgroup"
   private lazy val namespace = kubernetesConf.namespace
+  private lazy val queue = kubernetesConf.get(KUBERNETES_JOB_QUEUE)
 
   override def init(config: KubernetesDriverConf): Unit = {
     kubernetesConf = config
@@ -45,8 +47,10 @@ private[spark] class VolcanoFeatureStep extends KubernetesDriverCustomFeatureCon
         .withName(podGroupName)
         .withNamespace(namespace)
       .endMetadata()
-      .build()
-    Seq(podGroup)
+
+    queue.foreach(podGroup.editOrNewSpec().withQueue(_).endSpec())
+
+    Seq(podGroup.build())
   }
 
   override def configurePod(pod: SparkPod): SparkPod = {
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
index cf337f99cab97..eda1ccc36767e 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
@@ -20,6 +20,7 @@ import io.fabric8.volcano.scheduling.v1beta1.PodGroup
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s._
+import org.apache.spark.deploy.k8s.Config._
 
 class VolcanoFeatureStepSuite extends SparkFunSuite {
 
@@ -37,6 +38,16 @@ class VolcanoFeatureStepSuite extends SparkFunSuite {
     assert(podGroup.getMetadata.getName === s"${kubernetesConf.appId}-podgroup")
   }
 
+  test("SPARK-38818: Support `spark.kubernetes.job.queue`") {
+    val sparkConf = new SparkConf()
+      .set(KUBERNETES_JOB_QUEUE.key, "queue1")
+    val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf)
+    val step = new VolcanoFeatureStep()
+    step.init(kubernetesConf)
+    val podGroup = step.getAdditionalPreKubernetesResources().head.asInstanceOf[PodGroup]
+    assert(podGroup.getSpec.getQueue === "queue1")
+  }
+
   test("SPARK-36061: Executor Pod with Volcano PodGroup") {
     val sparkConf = new SparkConf()
     val kubernetesConf = KubernetesTestConf.createExecutorConf(sparkConf)
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue0-enable-queue1.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue0-enable-queue1.yml
new file mode 100644
index 0000000000000..2281e2e8226a2
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue0-enable-queue1.yml
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: scheduling.volcano.sh/v1beta1
+kind: Queue
+metadata:
+  name: queue0
+spec:
+  weight: 0
+---
+apiVersion: scheduling.volcano.sh/v1beta1
+kind: Queue
+metadata:
+  name: queue1
+spec:
+  weight: 1
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/enable-queue0-enable-queue1.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/enable-queue0-enable-queue1.yml
new file mode 100644
index 0000000000000..aadeb2851882e
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/enable-queue0-enable-queue1.yml
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: scheduling.volcano.sh/v1beta1
+kind: Queue
+metadata:
+  name: queue0
+spec:
+  weight: 1
+---
+apiVersion: scheduling.volcano.sh/v1beta1
+kind: Queue
+metadata:
+  name: queue1
+spec:
+  weight: 1
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index 69b736951301e..ca7eae1f0a632 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -212,7 +212,9 @@ class KubernetesSuite extends SparkFunSuite
       driverPodChecker: Pod => Unit = doBasicDriverPodCheck,
       executorPodChecker: Pod => Unit = doBasicExecutorPodCheck,
       appArgs: Array[String] = Array.empty[String],
-      isJVM: Boolean = true ): Unit = {
+      isJVM: Boolean = true,
+      customSparkConf: Option[SparkAppConf] = None,
+      customAppLocator: Option[String] = None): Unit = {
     runSparkApplicationAndVerifyCompletion(
       appResource,
       SPARK_PI_MAIN_CLASS,
@@ -221,7 +223,10 @@ class KubernetesSuite extends SparkFunSuite
       appArgs,
       driverPodChecker,
       executorPodChecker,
-      isJVM)
+      isJVM,
+      customSparkConf = customSparkConf,
+      customAppLocator = customAppLocator
+    )
   }
 
   protected def runDFSReadWriteAndVerifyCompletion(
@@ -336,7 +341,9 @@ class KubernetesSuite extends SparkFunSuite
       pyFiles: Option[String] = None,
       executorPatience: Option[(Option[Interval], Option[Timeout])] = None,
       decommissioningTest: Boolean = false,
-      env: Map[String, String] = Map.empty[String, String]): Unit = {
+      env: Map[String, String] = Map.empty[String, String],
+      customSparkConf: Option[SparkAppConf] = None,
+      customAppLocator: Option[String] = None): Unit = {
 
   // scalastyle:on argcount
     val appArguments = SparkAppArguments(
@@ -370,7 +377,7 @@ class KubernetesSuite extends SparkFunSuite
 
     val execWatcher = kubernetesTestComponents.kubernetesClient
       .pods()
-      .withLabel("spark-app-locator", appLocator)
+      .withLabel("spark-app-locator", customAppLocator.getOrElse(appLocator))
       .withLabel("spark-role", "executor")
       .watch(new Watcher[Pod] {
         logDebug("Beginning watch of executors")
@@ -434,7 +441,7 @@ class KubernetesSuite extends SparkFunSuite
     logDebug("Starting Spark K8s job")
     SparkAppLauncher.launch(
       appArguments,
-      sparkAppConf,
+      customSparkConf.getOrElse(sparkAppConf),
       TIMEOUT.value.toSeconds.toInt,
       sparkHomeDir,
       isJVM,
@@ -443,7 +450,7 @@ class KubernetesSuite extends SparkFunSuite
 
     val driverPod = kubernetesTestComponents.kubernetesClient
       .pods()
-      .withLabel("spark-app-locator", appLocator)
+      .withLabel("spark-app-locator", customAppLocator.getOrElse(appLocator))
       .withLabel("spark-role", "driver")
       .list()
       .getItems
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
index 377a1b8167984..7ffd28b790ceb 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
@@ -16,16 +16,34 @@
  */
 package org.apache.spark.deploy.k8s.integrationtest
 
+import java.io.{File, FileInputStream}
+import java.util.UUID
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+// scalastyle:off executioncontextglobal
+import scala.concurrent.ExecutionContext.Implicits.global
+// scalastyle:on executioncontextglobal
+import scala.concurrent.Future
+
 import io.fabric8.kubernetes.api.model.Pod
+import io.fabric8.kubernetes.client.NamespacedKubernetesClient
 import io.fabric8.volcano.client.VolcanoClient
+import org.scalatest.concurrent.Eventually
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.features.VolcanoFeatureStep
-import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.k8sTestTag
-import org.apache.spark.deploy.k8s.integrationtest.VolcanoSuite.volcanoTag
+import org.apache.spark.internal.config.NETWORK_AUTH_ENABLED
 
 private[spark] trait VolcanoTestsSuite { k8sSuite: KubernetesSuite =>
   import VolcanoTestsSuite._
+  import org.apache.spark.deploy.k8s.integrationtest.VolcanoSuite.volcanoTag
+  import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{k8sTestTag, INTERVAL, TIMEOUT}
+
+  lazy val volcanoClient: VolcanoClient
+    = kubernetesTestComponents.kubernetesClient.adapt(classOf[VolcanoClient])
+  lazy val k8sClient: NamespacedKubernetesClient = kubernetesTestComponents.kubernetesClient
 
   protected def checkScheduler(pod: Pod): Unit = {
     assert(pod.getSpec.getSchedulerName === "volcano")
@@ -37,12 +55,81 @@ private[spark] trait VolcanoTestsSuite { k8sSuite: KubernetesSuite =>
     assert(annotations.get("scheduling.k8s.io/group-name") === s"$appId-podgroup")
   }
 
-  protected def checkPodGroup(pod: Pod): Unit = {
+  protected def checkPodGroup(
+      pod: Pod,
+      queue: Option[String] = None): Unit = {
     val appId = pod.getMetadata.getLabels.get("spark-app-selector")
     val podGroupName = s"$appId-podgroup"
-    val volcanoClient = kubernetesTestComponents.kubernetesClient.adapt(classOf[VolcanoClient])
     val podGroup = volcanoClient.podGroups().withName(podGroupName).get()
     assert(podGroup.getMetadata.getOwnerReferences.get(0).getName === pod.getMetadata.getName)
+    queue.foreach(q => assert(q === podGroup.getSpec.getQueue))
+  }
+
+  private def createOrReplaceYAMLResource(yamlPath: String): Unit = {
+    k8sClient.load(new FileInputStream(yamlPath)).createOrReplace()
+  }
+
+  private def deleteYAMLResource(yamlPath: String): Unit = {
+    k8sClient.load(new FileInputStream(yamlPath)).delete()
+  }
+
+  private def getPods(
+      role: String,
+      groupLocator: String,
+      statusPhase: String): mutable.Buffer[Pod] = {
+    k8sClient
+      .pods()
+      .withLabel("spark-group-locator", groupLocator)
+      .withLabel("spark-role", role)
+      .withField("status.phase", statusPhase)
+      .list()
+      .getItems.asScala
+  }
+
+  def runJobAndVerify(
+      batchSuffix: String,
+      groupLoc: Option[String] = None,
+      queue: Option[String] = None): Unit = {
+    val appLoc = s"${appLocator}${batchSuffix}"
+    val podName = s"${driverPodName}-${batchSuffix}"
+    // create new configuration for every job
+    val conf = createVolcanoSparkConf(podName, appLoc, groupLoc, queue)
+    runSparkPiAndVerifyCompletion(
+      driverPodChecker = (driverPod: Pod) => {
+        checkScheduler(driverPod)
+        checkAnnotaion(driverPod)
+        checkPodGroup(driverPod, queue)
+      },
+      executorPodChecker = (executorPod: Pod) => {
+        checkScheduler(executorPod)
+        checkAnnotaion(executorPod)
+      },
+      customSparkConf = Option(conf),
+      customAppLocator = Option(appLoc)
+    )
+  }
+
+  private def createVolcanoSparkConf(
+      driverPodName: String = driverPodName,
+      appLoc: String = appLocator,
+      groupLoc: Option[String] = None,
+      queue: Option[String] = None): SparkAppConf = {
+    val conf = kubernetesTestComponents.newSparkAppConf()
+      .set(CONTAINER_IMAGE.key, image)
+      .set(KUBERNETES_DRIVER_POD_NAME.key, driverPodName)
+      .set(s"${KUBERNETES_DRIVER_LABEL_PREFIX}spark-app-locator", appLoc)
+      .set(s"${KUBERNETES_EXECUTOR_LABEL_PREFIX}spark-app-locator", appLoc)
+      .set(NETWORK_AUTH_ENABLED.key, "true")
+      // below is volcano specific configuration
+      .set(KUBERNETES_SCHEDULER_NAME.key, "volcano")
+      .set(KUBERNETES_DRIVER_POD_FEATURE_STEPS.key, VOLCANO_FEATURE_STEP)
+      .set(KUBERNETES_EXECUTOR_POD_FEATURE_STEPS.key, VOLCANO_FEATURE_STEP)
+    queue.foreach(conf.set(KUBERNETES_JOB_QUEUE.key, _))
+    groupLoc.foreach { locator =>
+      conf.set(s"${KUBERNETES_DRIVER_LABEL_PREFIX}spark-group-locator", locator)
+      conf.set(s"${KUBERNETES_EXECUTOR_LABEL_PREFIX}spark-group-locator", locator)
+    }
+    conf
   }
 
   test("Run SparkPi with volcano scheduler", k8sTestTag, volcanoTag) {
@@ -63,8 +150,55 @@ private[spark] trait VolcanoTestsSuite { k8sSuite: KubernetesSuite =>
       }
     )
   }
+
+  test("SPARK-38188: Run SparkPi jobs with 2 queues (only 1 enable)", k8sTestTag, volcanoTag) {
+    // Disabled queue0 and enabled queue1
+    createOrReplaceYAMLResource(VOLCANO_Q0_DISABLE_Q1_ENABLE_YAML)
+    // Submit jobs into disabled queue0 and enabled queue1
+    val jobNum = 4
+    (1 to jobNum).foreach { i =>
+      Future {
+        val queueName = s"queue${i % 2}"
+        runJobAndVerify(i.toString, Option(s"$GROUP_PREFIX-$queueName"), Option(queueName))
+      }
+    }
+    // There are two `Succeeded` jobs and two `Pending` jobs
+    Eventually.eventually(TIMEOUT, INTERVAL) {
+      val completedPods = getPods("driver", s"$GROUP_PREFIX-queue1", "Succeeded")
+      assert(completedPods.size === 2)
+      val pendingPods = getPods("driver", s"$GROUP_PREFIX-queue0", "Pending")
+      assert(pendingPods.size === 2)
+    }
+    deleteYAMLResource(VOLCANO_Q0_DISABLE_Q1_ENABLE_YAML)
+  }
+
+  test("SPARK-38188: Run SparkPi jobs with 2 queues (all enable)", k8sTestTag, volcanoTag) {
+    // Enable all queues
+    createOrReplaceYAMLResource(VOLCANO_ENABLE_Q0_AND_Q1_YAML)
+    val jobNum = 4
+    // Submit jobs into these two queues
+    (1 to jobNum).foreach { i =>
+      Future {
+        val queueName = s"queue${i % 2}"
+        runJobAndVerify(i.toString, Option(s"$GROUP_PREFIX"), Option(queueName))
+      }
+    }
+    // All jobs "Succeeded"
+    Eventually.eventually(TIMEOUT, INTERVAL) {
+      val completedPods = getPods("driver", GROUP_PREFIX, "Succeeded")
+      assert(completedPods.size === jobNum)
+    }
+    deleteYAMLResource(VOLCANO_ENABLE_Q0_AND_Q1_YAML)
+  }
 }
 
 private[spark] object VolcanoTestsSuite extends SparkFunSuite {
   val VOLCANO_FEATURE_STEP = classOf[VolcanoFeatureStep].getName
+  val VOLCANO_ENABLE_Q0_AND_Q1_YAML = new File(
+    getClass.getResource("/volcano/enable-queue0-enable-queue1.yml").getFile
+  ).getAbsolutePath
+  val VOLCANO_Q0_DISABLE_Q1_ENABLE_YAML = new File(
+    getClass.getResource("/volcano/disable-queue0-enable-queue1.yml").getFile
+  ).getAbsolutePath
+  val GROUP_PREFIX = "volcano-test" + UUID.randomUUID().toString.replaceAll("-", "")
 }

From 42f118ad1e7ef837c18ce73b22adaf486d238994 Mon Sep 17 00:00:00 2001
From: attilapiros <piros.attila.zsolt@gmail.com>
Date: Tue, 1 Mar 2022 11:32:04 -0800
Subject: [PATCH 369/513] [SPARK-33206][CORE] Fix shuffle index cache weight
 calculation for small index files

### What changes were proposed in this pull request?

Increasing the shuffle index weight with a constant number to avoid underestimating retained memory size caused by the bookkeeping objects: the `java.io.File` (depending on the path ~ 960 bytes) object and the `ShuffleIndexInformation` object (~180 bytes).

### Why are the changes needed?

Underestimating cache entry size easily can cause OOM in the Yarn NodeManager.
In the following analyses of a prod issue (HPROF file) we can see the leak suspect Guava's `LocalCache$Segment` objects:

<img width="943" alt="Screenshot 2022-02-17 at 18 55 40" src="https://user-images.githubusercontent.com/2017933/154541995-44014212-2046-41d6-ba7f-99369ca7d739.png">

Going further we can see a `ShuffleIndexInformation` for a small index file (16 bytes) but the retained heap memory is 1192 bytes:

<img width="1351" alt="image" src="https://user-images.githubusercontent.com/2017933/154645212-e0318d0f-cefa-4ae3-8a3b-97d2b506757d.png">

Finally we can see this is very common within this heap dump (using MAT's Object Query Language):

<img width="1418" alt="image" src="https://user-images.githubusercontent.com/2017933/154547678-44c8af34-1765-4e14-b71a-dc03d1a304aa.png">

I have even exported the data to a CSV and done some calculations with `awk`:

```
$ tail -n+2 export.csv | awk -F, 'BEGIN { numUnderEstimated=0; } { sumOldSize += $1; corrected=$1 + 1176; sumCorrectedSize += corrected; sumRetainedMem += $2; if (corrected < $2) numUnderEstimated+=1; } END { print "sum old size: " sumOldSize / 1024 / 1024   " MB, sum corrected size: " sumCorrectedSize / 1024 / 1024 " MB, sum retained memory:" sumRetainedMem / 1024 / 1024  " MB, num under estimated: " numUnderEstimated }'
```

It gives the followings:
```
sum old size: 76.8785 MB, sum corrected size: 1066.93 MB, sum retained memory:1064.47 MB, num under estimated: 0
```

So using the old calculation we were at 7.6.8 MB way under the default cache limit (100 MB).
Using the correction (applying 1176 as increment to the size) we are at 1066.93 MB (~1GB) which is close to the real retained sum heap: 1064.47 MB (~1GB) and there is no entry which was underestimated.

But we can go further and get rid of `java.io.File` completely and store the `ShuffleIndexInformation` for the file path.
This way not only the cache size estimate is improved but the its size is decreased as well.
Here the path size is not counted into the cache size as that string is interned.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

With the calculations above.

Closes #35559 from attilapiros/SPARK-33206.

Authored-by: attilapiros <piros.attila.zsolt@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../network/shuffle/ExecutorDiskUtils.java    |  7 +-
 .../shuffle/ExternalShuffleBlockResolver.java | 46 ++++++------
 .../shuffle/RemoteBlockPushResolver.java      | 48 +++++++------
 .../shuffle/ShuffleIndexInformation.java      | 23 +++---
 .../shuffle/RemoteBlockPushResolverSuite.java |  4 +-
 .../shuffle/ShuffleIndexInformationSuite.java | 71 +++++++++++++++++++
 .../shuffle/TestShuffleDataContext.java       | 16 +++--
 .../shuffle/IndexShuffleBlockResolver.scala   |  9 ++-
 .../apache/spark/storage/BlockManager.scala   |  2 +-
 .../spark/storage/DiskBlockManager.scala      |  4 +-
 .../spark/storage/BlockManagerSuite.scala     |  3 +-
 11 files changed, 161 insertions(+), 72 deletions(-)
 create mode 100644 common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ShuffleIndexInformationSuite.java

diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExecutorDiskUtils.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExecutorDiskUtils.java
index e5e61aae92d2f..2ed0718628380 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExecutorDiskUtils.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExecutorDiskUtils.java
@@ -27,7 +27,7 @@ public class ExecutorDiskUtils {
    * Hashes a filename into the corresponding local directory, in a manner consistent with
    * Spark's DiskBlockManager.getFile().
    */
-  public static File getFile(String[] localDirs, int subDirsPerLocalDir, String filename) {
+  public static String getFilePath(String[] localDirs, int subDirsPerLocalDir, String filename) {
     int hash = JavaUtils.nonNegativeHash(filename);
     String localDir = localDirs[hash % localDirs.length];
     int subDirId = (hash / localDirs.length) % subDirsPerLocalDir;
@@ -38,9 +38,8 @@ public static File getFile(String[] localDirs, int subDirsPerLocalDir, String fi
     // Unfortunately, we cannot just call the normalization code that java.io.File
     // uses, since it is in the package-private class java.io.FileSystem.
     // So we are creating a File just to get the normalized path back to intern it.
-    // Finally a new File is built and returned with this interned normalized path.
-    final String normalizedInternedPath = new File(notNormalizedPath).getPath().intern();
-    return new File(normalizedInternedPath);
+    // We return this interned normalized path.
+    return new File(notNormalizedPath).getPath().intern();
   }
 
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
index bf8c6ae0ab31a..4b8a5e82d7445 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
@@ -80,7 +80,7 @@ public class ExternalShuffleBlockResolver {
    *  Caches index file information so that we can avoid open/close the index files
    *  for each block fetch.
    */
-  private final LoadingCache<File, ShuffleIndexInformation> shuffleIndexCache;
+  private final LoadingCache<String, ShuffleIndexInformation> shuffleIndexCache;
 
   // Single-threaded Java executor used to perform expensive recursive directory deletion.
   private final Executor directoryCleaner;
@@ -112,15 +112,16 @@ public ExternalShuffleBlockResolver(TransportConf conf, File registeredExecutorF
       Boolean.parseBoolean(conf.get(Constants.SHUFFLE_SERVICE_FETCH_RDD_ENABLED, "false"));
     this.registeredExecutorFile = registeredExecutorFile;
     String indexCacheSize = conf.get("spark.shuffle.service.index.cache.size", "100m");
-    CacheLoader<File, ShuffleIndexInformation> indexCacheLoader =
-        new CacheLoader<File, ShuffleIndexInformation>() {
-          public ShuffleIndexInformation load(File file) throws IOException {
-            return new ShuffleIndexInformation(file);
+    CacheLoader<String, ShuffleIndexInformation> indexCacheLoader =
+        new CacheLoader<String, ShuffleIndexInformation>() {
+          public ShuffleIndexInformation load(String filePath) throws IOException {
+            return new ShuffleIndexInformation(filePath);
           }
         };
     shuffleIndexCache = CacheBuilder.newBuilder()
       .maximumWeight(JavaUtils.byteStringAsBytes(indexCacheSize))
-      .weigher((Weigher<File, ShuffleIndexInformation>) (file, indexInfo) -> indexInfo.getSize())
+      .weigher((Weigher<String, ShuffleIndexInformation>)
+        (filePath, indexInfo) -> indexInfo.getRetainedMemorySize())
       .build(indexCacheLoader);
     db = LevelDBProvider.initLevelDB(this.registeredExecutorFile, CURRENT_VERSION, mapper);
     if (db != null) {
@@ -300,28 +301,35 @@ private void deleteNonShuffleServiceServedFiles(String[] dirs) {
    */
   private ManagedBuffer getSortBasedShuffleBlockData(
     ExecutorShuffleInfo executor, int shuffleId, long mapId, int startReduceId, int endReduceId) {
-    File indexFile = ExecutorDiskUtils.getFile(executor.localDirs, executor.subDirsPerLocalDir,
-      "shuffle_" + shuffleId + "_" + mapId + "_0.index");
+    String indexFilePath =
+      ExecutorDiskUtils.getFilePath(
+        executor.localDirs,
+        executor.subDirsPerLocalDir,
+        "shuffle_" + shuffleId + "_" + mapId + "_0.index");
 
     try {
-      ShuffleIndexInformation shuffleIndexInformation = shuffleIndexCache.get(indexFile);
+      ShuffleIndexInformation shuffleIndexInformation = shuffleIndexCache.get(indexFilePath);
       ShuffleIndexRecord shuffleIndexRecord = shuffleIndexInformation.getIndex(
         startReduceId, endReduceId);
       return new FileSegmentManagedBuffer(
         conf,
-        ExecutorDiskUtils.getFile(executor.localDirs, executor.subDirsPerLocalDir,
-          "shuffle_" + shuffleId + "_" + mapId + "_0.data"),
+        new File(
+          ExecutorDiskUtils.getFilePath(
+            executor.localDirs,
+            executor.subDirsPerLocalDir,
+            "shuffle_" + shuffleId + "_" + mapId + "_0.data")),
         shuffleIndexRecord.getOffset(),
         shuffleIndexRecord.getLength());
     } catch (ExecutionException e) {
-      throw new RuntimeException("Failed to open file: " + indexFile, e);
+      throw new RuntimeException("Failed to open file: " + indexFilePath, e);
     }
   }
 
   public ManagedBuffer getDiskPersistedRddBlockData(
       ExecutorShuffleInfo executor, int rddId, int splitIndex) {
-    File file = ExecutorDiskUtils.getFile(executor.localDirs, executor.subDirsPerLocalDir,
-      "rdd_" + rddId + "_" + splitIndex);
+    File file = new File(
+      ExecutorDiskUtils.getFilePath(
+        executor.localDirs, executor.subDirsPerLocalDir, "rdd_" + rddId + "_" + splitIndex));
     long fileLength = file.length();
     ManagedBuffer res = null;
     if (file.exists()) {
@@ -348,8 +356,8 @@ public int removeBlocks(String appId, String execId, String[] blockIds) {
     }
     int numRemovedBlocks = 0;
     for (String blockId : blockIds) {
-      File file =
-        ExecutorDiskUtils.getFile(executor.localDirs, executor.subDirsPerLocalDir, blockId);
+      File file = new File(
+        ExecutorDiskUtils.getFilePath(executor.localDirs, executor.subDirsPerLocalDir, blockId));
       if (file.delete()) {
         numRemovedBlocks++;
       } else {
@@ -386,10 +394,8 @@ public Cause diagnoseShuffleBlockCorruption(
     ExecutorShuffleInfo executor = executors.get(new AppExecId(appId, execId));
     // This should be in sync with IndexShuffleBlockResolver.getChecksumFile
     String fileName = "shuffle_" + shuffleId + "_" + mapId + "_0.checksum." + algorithm;
-    File checksumFile = ExecutorDiskUtils.getFile(
-      executor.localDirs,
-      executor.subDirsPerLocalDir,
-      fileName);
+    File checksumFile = new File(
+      ExecutorDiskUtils.getFilePath(executor.localDirs, executor.subDirsPerLocalDir, fileName));
     ManagedBuffer data = getBlockData(appId, execId, shuffleId, mapId, reduceId);
     return ShuffleChecksumHelper.diagnoseCorruption(
       algorithm, checksumFile, reduceId, data, checksumByReader);
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
index b823076e57f71..62ab34028963e 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
@@ -102,7 +102,7 @@ public class RemoteBlockPushResolver implements MergedShuffleFileManager {
   private final int ioExceptionsThresholdDuringMerge;
 
   @SuppressWarnings("UnstableApiUsage")
-  private final LoadingCache<File, ShuffleIndexInformation> indexCache;
+  private final LoadingCache<String, ShuffleIndexInformation> indexCache;
 
   @SuppressWarnings("UnstableApiUsage")
   public RemoteBlockPushResolver(TransportConf conf) {
@@ -113,15 +113,16 @@ public RemoteBlockPushResolver(TransportConf conf) {
       NettyUtils.createThreadFactory("spark-shuffle-merged-shuffle-directory-cleaner"));
     this.minChunkSize = conf.minChunkSizeInMergedShuffleFile();
     this.ioExceptionsThresholdDuringMerge = conf.ioExceptionsThresholdDuringMerge();
-    CacheLoader<File, ShuffleIndexInformation> indexCacheLoader =
-      new CacheLoader<File, ShuffleIndexInformation>() {
-        public ShuffleIndexInformation load(File file) throws IOException {
-          return new ShuffleIndexInformation(file);
+    CacheLoader<String, ShuffleIndexInformation> indexCacheLoader =
+      new CacheLoader<String, ShuffleIndexInformation>() {
+        public ShuffleIndexInformation load(String filePath) throws IOException {
+          return new ShuffleIndexInformation(filePath);
         }
     };
     indexCache = CacheBuilder.newBuilder()
       .maximumWeight(conf.mergedIndexCacheSize())
-      .weigher((Weigher<File, ShuffleIndexInformation>)(file, indexInfo) -> indexInfo.getSize())
+      .weigher((Weigher<String, ShuffleIndexInformation>)
+        (filePath, indexInfo) -> indexInfo.getRetainedMemorySize())
       .build(indexCacheLoader);
   }
 
@@ -204,8 +205,8 @@ private AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo(
       // manager receives a pushed block for a given application shuffle partition.
       File dataFile =
         appShuffleInfo.getMergedShuffleDataFile(shuffleId, shuffleMergeId, reduceId);
-      File indexFile =
-        appShuffleInfo.getMergedShuffleIndexFile(shuffleId, shuffleMergeId, reduceId);
+      File indexFile = new File(
+        appShuffleInfo.getMergedShuffleIndexFilePath(shuffleId, shuffleMergeId, reduceId));
       File metaFile =
         appShuffleInfo.getMergedShuffleMetaFile(shuffleId, shuffleMergeId, reduceId);
       try {
@@ -251,8 +252,8 @@ public MergedBlockMeta getMergedBlockMeta(
         shuffleId, shuffleMergeId, reduceId,
         ErrorHandler.BlockFetchErrorHandler.STALE_SHUFFLE_BLOCK_FETCH));
     }
-    File indexFile =
-      appShuffleInfo.getMergedShuffleIndexFile(shuffleId, shuffleMergeId, reduceId);
+    File indexFile = new File(
+      appShuffleInfo.getMergedShuffleIndexFilePath(shuffleId, shuffleMergeId, reduceId));
     if (!indexFile.exists()) {
       throw new RuntimeException(String.format(
         "Merged shuffle index file %s not found", indexFile.getPath()));
@@ -290,18 +291,18 @@ public ManagedBuffer getMergedBlockData(
       throw new RuntimeException(String.format("Merged shuffle data file %s not found",
         dataFile.getPath()));
     }
-    File indexFile =
-      appShuffleInfo.getMergedShuffleIndexFile(shuffleId, shuffleMergeId, reduceId);
+    String indexFilePath =
+      appShuffleInfo.getMergedShuffleIndexFilePath(shuffleId, shuffleMergeId, reduceId);
     try {
       // If we get here, the merged shuffle file should have been properly finalized. Thus we can
       // use the file length to determine the size of the merged shuffle block.
-      ShuffleIndexInformation shuffleIndexInformation = indexCache.get(indexFile);
+      ShuffleIndexInformation shuffleIndexInformation = indexCache.get(indexFilePath);
       ShuffleIndexRecord shuffleIndexRecord = shuffleIndexInformation.getIndex(chunkId);
       return new FileSegmentManagedBuffer(
         conf, dataFile, shuffleIndexRecord.getOffset(), shuffleIndexRecord.getLength());
     } catch (ExecutionException e) {
       throw new RuntimeException(String.format(
-        "Failed to open merged shuffle index file %s", indexFile.getPath()), e);
+        "Failed to open merged shuffle index file %s", indexFilePath), e);
     }
   }
 
@@ -1303,11 +1304,14 @@ public ConcurrentMap<Integer, AppShuffleMergePartitionsInfo> getShuffles() {
      * @see [[org.apache.spark.storage.DiskBlockManager#getMergedShuffleFile(
      *      org.apache.spark.storage.BlockId, scala.Option)]]
      */
-    private File getFile(String filename) {
+    private String getFilePath(String filename) {
       // TODO: [SPARK-33236] Change the message when this service is able to handle NM restart
-      File targetFile = ExecutorDiskUtils.getFile(appPathsInfo.activeLocalDirs,
-        appPathsInfo.subDirsPerLocalDir, filename);
-      logger.debug("Get merged file {}", targetFile.getAbsolutePath());
+      String targetFile =
+        ExecutorDiskUtils.getFilePath(
+          appPathsInfo.activeLocalDirs,
+          appPathsInfo.subDirsPerLocalDir,
+          filename);
+      logger.debug("Get merged file {}", targetFile);
       return targetFile;
     }
 
@@ -1327,16 +1331,16 @@ public File getMergedShuffleDataFile(
         int reduceId) {
       String fileName = String.format("%s.data", generateFileName(appId, shuffleId,
         shuffleMergeId, reduceId));
-      return getFile(fileName);
+      return new File(getFilePath(fileName));
     }
 
-    public File getMergedShuffleIndexFile(
+    public String getMergedShuffleIndexFilePath(
         int shuffleId,
         int shuffleMergeId,
         int reduceId) {
       String indexName = String.format("%s.index", generateFileName(appId, shuffleId,
         shuffleMergeId, reduceId));
-      return getFile(indexName);
+      return getFilePath(indexName);
     }
 
     public File getMergedShuffleMetaFile(
@@ -1345,7 +1349,7 @@ public File getMergedShuffleMetaFile(
         int reduceId) {
       String metaName = String.format("%s.meta", generateFileName(appId, shuffleId,
         shuffleMergeId, reduceId));
-      return getFile(metaName);
+      return new File(getFilePath(metaName));
     }
   }
 
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleIndexInformation.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleIndexInformation.java
index b65aacfcc4b9e..6669255f30299 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleIndexInformation.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleIndexInformation.java
@@ -29,25 +29,28 @@
  * as an in-memory LongBuffer.
  */
 public class ShuffleIndexInformation {
+
+  // The estimate of `ShuffleIndexInformation` memory footprint which is relevant in case of small
+  // index files (i.e. storing only 2 offsets = 16 bytes).
+  static final int INSTANCE_MEMORY_FOOTPRINT = 176;
+
   /** offsets as long buffer */
   private final LongBuffer offsets;
-  private int size;
 
-  public ShuffleIndexInformation(File indexFile) throws IOException {
-    size = (int)indexFile.length();
-    ByteBuffer buffer = ByteBuffer.allocate(size);
+  public ShuffleIndexInformation(String indexFilePath) throws IOException {
+    File indexFile = new File(indexFilePath);
+    ByteBuffer buffer = ByteBuffer.allocate((int)indexFile.length());
     offsets = buffer.asLongBuffer();
     try (DataInputStream dis = new DataInputStream(Files.newInputStream(indexFile.toPath()))) {
       dis.readFully(buffer.array());
     }
   }
 
-  /**
-   * Size of the index file
-   * @return size
-   */
-  public int getSize() {
-    return size;
+  public int getRetainedMemorySize() {
+    // SPARK-33206: here the offsets' capacity is multiplied by 8 as offsets stores long values.
+    // Integer overflow won't be an issue here as long as the number of reducers is under
+    // (Integer.MAX_VALUE - INSTANCE_MEMORY_FOOTPRINT) / 8 - 1 = 268435432.
+    return (offsets.capacity() << 3) + INSTANCE_MEMORY_FOOTPRINT;
   }
 
   /**
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
index 595473376cfcd..603b20c7dbacf 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
@@ -1247,7 +1247,7 @@ void closeAndDeletePartitionFiles(Map<Integer, AppShufflePartitionInfo> partitio
     assertFalse("Meta files on the disk should be cleaned up",
       appShuffleInfo.getMergedShuffleMetaFile(0, 1, 0).exists());
     assertFalse("Index files on the disk should be cleaned up",
-      appShuffleInfo.getMergedShuffleIndexFile(0, 1, 0).exists());
+      new File(appShuffleInfo.getMergedShuffleIndexFilePath(0, 1, 0)).exists());
     stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[2]));
     stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[2]));
     // stream 2 now completes
@@ -1282,7 +1282,7 @@ void closeAndDeletePartitionFiles(Map<Integer, AppShufflePartitionInfo> partitio
     assertFalse("MergedBlock meta file for shuffle 0 and shuffleMergeId 4 should be cleaned"
       + " up", appShuffleInfo.getMergedShuffleMetaFile(0, 4, 0).exists());
     assertFalse("MergedBlock index file for shuffle 0 and shuffleMergeId 4 should be cleaned"
-      + " up", appShuffleInfo.getMergedShuffleIndexFile(0, 4, 0).exists());
+      + " up", new File(appShuffleInfo.getMergedShuffleIndexFilePath(0, 4, 0)).exists());
     assertFalse("MergedBlock data file for shuffle 0 and shuffleMergeId 4 should be cleaned"
       + " up", appShuffleInfo.getMergedShuffleDataFile(0, 4, 0).exists());
   }
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ShuffleIndexInformationSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ShuffleIndexInformationSuite.java
new file mode 100644
index 0000000000000..c4ff8935e2d64
--- /dev/null
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ShuffleIndexInformationSuite.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.IOException;
+
+import java.nio.charset.StandardCharsets;
+
+import static org.junit.Assert.*;
+
+public class ShuffleIndexInformationSuite {
+  private static final String sortBlock0 = "tiny block";
+  private static final String sortBlock1 = "a bit longer block";
+
+  private static TestShuffleDataContext dataContext;
+  private static String blockId;
+
+  @BeforeClass
+  public static void before() throws IOException {
+    dataContext = new TestShuffleDataContext(2, 5);
+
+    dataContext.create();
+    // Write some sort data.
+    blockId = dataContext.insertSortShuffleData(0, 0, new byte[][] {
+        sortBlock0.getBytes(StandardCharsets.UTF_8),
+        sortBlock1.getBytes(StandardCharsets.UTF_8)});
+  }
+
+  @AfterClass
+  public static void afterAll() {
+    dataContext.cleanup();
+  }
+
+  @Test
+  public void test() throws IOException {
+    String path = ExecutorDiskUtils.getFilePath(
+      dataContext.localDirs,
+      dataContext.subDirsPerLocalDir,
+      blockId + ".index");
+    ShuffleIndexInformation s = new ShuffleIndexInformation(path);
+    // the index file contains 3 offsets:
+    //   0, sortBlock0.length, sortBlock0.length + sortBlock1.length
+    assertEquals(0L, s.getIndex(0).getOffset());
+    assertEquals(sortBlock0.length(), s.getIndex(0).getLength());
+
+    assertEquals(sortBlock0.length(), s.getIndex(1).getOffset());
+    assertEquals(sortBlock1.length(), s.getIndex(1).getLength());
+
+    assertEquals((3 * 8) + ShuffleIndexInformation.INSTANCE_MEMORY_FOOTPRINT,
+      s.getRetainedMemorySize());
+  }
+}
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
index fb67d7220a0b4..bcf57ea621979 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
@@ -68,7 +68,8 @@ public void cleanup() {
   }
 
   /** Creates reducer blocks in a sort-based data format within our local dirs. */
-  public void insertSortShuffleData(int shuffleId, int mapId, byte[][] blocks) throws IOException {
+  public String insertSortShuffleData(int shuffleId, int mapId, byte[][] blocks)
+      throws IOException {
     String blockId = "shuffle_" + shuffleId + "_" + mapId + "_0";
 
     OutputStream dataStream = null;
@@ -76,10 +77,10 @@ public void insertSortShuffleData(int shuffleId, int mapId, byte[][] blocks) thr
     boolean suppressExceptionsDuringClose = true;
 
     try {
-      dataStream = new FileOutputStream(
-        ExecutorDiskUtils.getFile(localDirs, subDirsPerLocalDir, blockId + ".data"));
-      indexStream = new DataOutputStream(new FileOutputStream(
-        ExecutorDiskUtils.getFile(localDirs, subDirsPerLocalDir, blockId + ".index")));
+      dataStream = new FileOutputStream(new File(
+        ExecutorDiskUtils.getFilePath(localDirs, subDirsPerLocalDir, blockId + ".data")));
+      indexStream = new DataOutputStream(new FileOutputStream(new File(
+        ExecutorDiskUtils.getFilePath(localDirs, subDirsPerLocalDir, blockId + ".index"))));
 
       long offset = 0;
       indexStream.writeLong(offset);
@@ -93,6 +94,7 @@ public void insertSortShuffleData(int shuffleId, int mapId, byte[][] blocks) thr
       Closeables.close(dataStream, suppressExceptionsDuringClose);
       Closeables.close(indexStream, suppressExceptionsDuringClose);
     }
+    return blockId;
   }
 
   /** Creates spill file(s) within the local dirs. */
@@ -122,11 +124,11 @@ private void insertFile(String filename) throws IOException {
 
   private void insertFile(String filename, byte[] block) throws IOException {
     OutputStream dataStream = null;
-    File file = ExecutorDiskUtils.getFile(localDirs, subDirsPerLocalDir, filename);
+    File file = new File(ExecutorDiskUtils.getFilePath(localDirs, subDirsPerLocalDir, filename));
     Assert.assertFalse("this test file has been already generated", file.exists());
     try {
       dataStream = new FileOutputStream(
-        ExecutorDiskUtils.getFile(localDirs, subDirsPerLocalDir, filename));
+        new File(ExecutorDiskUtils.getFilePath(localDirs, subDirsPerLocalDir, filename)));
       dataStream.write(block);
     } finally {
       Closeables.close(dataStream, false);
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
index 7454a74094541..f1485ec99789d 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
@@ -93,7 +93,8 @@ private[spark] class IndexShuffleBlockResolver(
    def getDataFile(shuffleId: Int, mapId: Long, dirs: Option[Array[String]]): File = {
     val blockId = ShuffleDataBlockId(shuffleId, mapId, NOOP_REDUCE_ID)
     dirs
-      .map(ExecutorDiskUtils.getFile(_, blockManager.subDirsPerLocalDir, blockId.name))
+      .map(d =>
+        new File(ExecutorDiskUtils.getFilePath(d, blockManager.subDirsPerLocalDir, blockId.name)))
       .getOrElse(blockManager.diskBlockManager.getFile(blockId))
   }
 
@@ -109,7 +110,8 @@ private[spark] class IndexShuffleBlockResolver(
       dirs: Option[Array[String]] = None): File = {
     val blockId = ShuffleIndexBlockId(shuffleId, mapId, NOOP_REDUCE_ID)
     dirs
-      .map(ExecutorDiskUtils.getFile(_, blockManager.subDirsPerLocalDir, blockId.name))
+      .map(d =>
+        new File(ExecutorDiskUtils.getFilePath(d, blockManager.subDirsPerLocalDir, blockId.name)))
       .getOrElse(blockManager.diskBlockManager.getFile(blockId))
   }
 
@@ -546,7 +548,8 @@ private[spark] class IndexShuffleBlockResolver(
     val blockId = ShuffleChecksumBlockId(shuffleId, mapId, NOOP_REDUCE_ID)
     val fileName = ShuffleChecksumHelper.getChecksumFileName(blockId.name, algorithm)
     dirs
-      .map(ExecutorDiskUtils.getFile(_, blockManager.subDirsPerLocalDir, fileName))
+      .map(d =>
+        new File(ExecutorDiskUtils.getFilePath(d, blockManager.subDirsPerLocalDir, fileName)))
       .getOrElse(blockManager.diskBlockManager.getFile(fileName))
   }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index abd0beb458b78..d5901888d1abf 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -1202,7 +1202,7 @@ private[spark] class BlockManager(
       blockId: BlockId,
       localDirs: Array[String],
       blockSize: Long): Option[ManagedBuffer] = {
-    val file = ExecutorDiskUtils.getFile(localDirs, subDirsPerLocalDir, blockId.name)
+    val file = new File(ExecutorDiskUtils.getFilePath(localDirs, subDirsPerLocalDir, blockId.name))
     if (file.exists()) {
       val managedBuffer = securityManager.getIOEncryptionKey() match {
         case Some(key) =>
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
index bebe32b95203c..c6a22972d2a0f 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
@@ -79,7 +79,7 @@ private[spark] class DiskBlockManager(
 
   /** Looks up a file by hashing it into one of our local subdirectories. */
   // This method should be kept in sync with
-  // org.apache.spark.network.shuffle.ExecutorDiskUtils#getFile().
+  // org.apache.spark.network.shuffle.ExecutorDiskUtils#getFilePath().
   def getFile(filename: String): File = {
     // Figure out which local directory it hashes to, and which subdirectory in that
     val hash = Utils.nonNegativeHash(filename)
@@ -130,7 +130,7 @@ private[spark] class DiskBlockManager(
       throw new IllegalArgumentException(
         s"Cannot read $filename because merged shuffle dirs is empty")
     }
-    ExecutorDiskUtils.getFile(dirs.get, subDirsPerLocalDir, filename)
+    new File(ExecutorDiskUtils.getFilePath(dirs.get, subDirsPerLocalDir, filename))
   }
 
   /** Check if disk block manager has a block. */
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 22204dd98ccdd..0f99ea819f67f 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -886,7 +886,8 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
         val blockSize = inv.getArguments()(2).asInstanceOf[Long]
         val res = store1.readDiskBlockFromSameHostExecutor(blockId, localDirs, blockSize)
         assert(res.isDefined)
-        val file = ExecutorDiskUtils.getFile(localDirs, store1.subDirsPerLocalDir, blockId.name)
+        val file = new File(
+          ExecutorDiskUtils.getFilePath(localDirs, store1.subDirsPerLocalDir, blockId.name))
         // delete the file behind the blockId
         assert(file.delete())
         sameHostExecutorTried = true

From e81333c1553daf36490bd86f0228d294577b8ba7 Mon Sep 17 00:00:00 2001
From: "wangguangxin.cn" <wangguangxin.cn@bytedance.com>
Date: Tue, 1 Mar 2022 12:10:53 -0800
Subject: [PATCH 370/513] [SPARK-37593][CORE] Reduce default page size by
 `LONG_ARRAY_OFFSET` if `G1GC` and `ON_HEAP` are used

### What changes were proposed in this pull request?
Spark's tungsten memory model usually tries to allocate memory by one `page` each time and allocated by `long[pageSizeBytes/8]` in `HeapMemoryAllocator.allocate`.

Remember that java long array needs extra object header (usually 16 bytes in 64bit system), so the really bytes allocated is `pageSize+16`.

Assume that the `G1HeapRegionSize` is 4M and `pageSizeBytes` is 4M as well. Since every time we need to allocate 4M+16byte memory, so two regions are used with one region only occupies 16byte. Then there are about **50%** memory waste.
It can happenes under different combinations of G1HeapRegionSize (varies from 1M to 32M) and pageSizeBytes (varies from 1M to 64M).

 We can demo it using following piece of code.

```
public static void bufferSizeTest(boolean optimize) {
    long totalAllocatedSize = 0L;
    int blockSize = 1024 * 1024 * 4; // 4m
    if (optimize) {
      blockSize -= 16;
    }
    List<long[]> buffers = new ArrayList<>();
    while (true) {
      long[] arr = new long[blockSize/8];
      buffers.add(arr);
      totalAllocatedSize += blockSize;
      System.out.println("Total allocated size: " + totalAllocatedSize);
    }
  }
```

Run it using following jvm params
```
java -Xmx100m -XX:+UseG1GC -XX:G1HeapRegionSize=4m -XX:-UseGCOverheadLimit -verbose:gc -XX:+UnlockDiagnosticVMOptions -XX:+G1SummarizeConcMark -Xss4m -XX:+ExitOnOutOfMemoryError -XX:ParallelGCThreads=4 -XX:ConcGCThreads=4
```

with optimized = false
```
Total allocated size: 46137344
[GC pause (G1 Humongous Allocation) (young) 44M->44M(100M), 0.0007091 secs]
[GC pause (G1 Evacuation Pause) (young) (initial-mark)-- 48M->48M(100M), 0.0021528 secs]
[GC concurrent-root-region-scan-start]
[GC concurrent-root-region-scan-end, 0.0000021 secs]
[GC concurrent-mark-start]
[GC pause (G1 Evacuation Pause) (young) 48M->48M(100M), 0.0011289 secs]
[Full GC (Allocation Failure)  48M->48M(100M), 0.0017284 secs]
[Full GC (Allocation Failure)  48M->48M(100M), 0.0013437 secs]
Terminating due to java.lang.OutOfMemoryError: Java heap space
```

with optimzied = true
```
Total allocated size: 96468624
[GC pause (G1 Humongous Allocation) (young)-- 92M->92M(100M), 0.0024416 secs]
[Full GC (Allocation Failure)  92M->92M(100M), 0.0019883 secs]
[GC pause (G1 Evacuation Pause) (young) (initial-mark) 96M->96M(100M), 0.0004282 secs]
[GC concurrent-root-region-scan-start]
[GC concurrent-root-region-scan-end, 0.0000040 secs]
[GC concurrent-mark-start]
[GC pause (G1 Evacuation Pause) (young) 96M->96M(100M), 0.0003269 secs]
[Full GC (Allocation Failure)  96M->96M(100M), 0.0012409 secs]
[Full GC (Allocation Failure)  96M->96M(100M), 0.0012607 secs]
Terminating due to java.lang.OutOfMemoryError: Java heap space
```

This PR try to optimize the pageSize to avoid memory waste.

This case exists not only in `MemoryManagement`, but also in other places such as `TorrentBroadcast.blockSize`.  I would like to submit a followup PR if this modification is reasonable.

### Why are the changes needed?
To avoid memory waste in G1 GC

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing UT

Closes #34846 from WangGuangxin/g1_humongous_optimize.

Authored-by: wangguangxin.cn <wangguangxin.cn@bytedance.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../apache/spark/memory/MemoryManager.scala   | 38 +++++++++++++++++--
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala
index c08b47f99dda3..596974f338fd8 100644
--- a/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala
+++ b/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala
@@ -17,8 +17,11 @@
 
 package org.apache.spark.memory
 
+import java.lang.management.{ManagementFactory, PlatformManagedObject}
 import javax.annotation.concurrent.GuardedBy
 
+import scala.util.Try
+
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
@@ -27,6 +30,7 @@ import org.apache.spark.storage.memory.MemoryStore
 import org.apache.spark.unsafe.Platform
 import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.unsafe.memory.MemoryAllocator
+import org.apache.spark.util.Utils
 
 /**
  * An abstract memory manager that enforces how memory is shared between execution and storage.
@@ -242,8 +246,12 @@ private[spark] abstract class MemoryManager(
    * If user didn't explicitly set "spark.buffer.pageSize", we figure out the default value
    * by looking at the number of cores available to the process, and the total amount of memory,
    * and then divide it by a factor of safety.
+   *
+   * SPARK-37593 If we are using G1GC, it's better to take the LONG_ARRAY_OFFSET
+   * into consideration so that the requested memory size is power of 2
+   * and can be divided by G1 heap region size to reduce memory waste within one G1 region.
    */
-  val pageSizeBytes: Long = {
+  private lazy val defaultPageSizeBytes = {
     val minPageSize = 1L * 1024 * 1024   // 1MB
     val maxPageSize = 64L * minPageSize  // 64MB
     val cores = if (numCores > 0) numCores else Runtime.getRuntime.availableProcessors()
@@ -254,10 +262,16 @@ private[spark] abstract class MemoryManager(
       case MemoryMode.OFF_HEAP => offHeapExecutionMemoryPool.poolSize
     }
     val size = ByteArrayMethods.nextPowerOf2(maxTungstenMemory / cores / safetyFactor)
-    val default = math.min(maxPageSize, math.max(minPageSize, size))
-    conf.get(BUFFER_PAGESIZE).getOrElse(default)
+    val chosenPageSize = math.min(maxPageSize, math.max(minPageSize, size))
+    if (isG1GC && tungstenMemoryMode == MemoryMode.ON_HEAP) {
+      chosenPageSize - Platform.LONG_ARRAY_OFFSET
+    } else {
+      chosenPageSize
+    }
   }
 
+  val pageSizeBytes: Long = conf.get(BUFFER_PAGESIZE).getOrElse(defaultPageSizeBytes)
+
   /**
    * Allocates memory for use by Unsafe/Tungsten code.
    */
@@ -267,4 +281,22 @@ private[spark] abstract class MemoryManager(
       case MemoryMode.OFF_HEAP => MemoryAllocator.UNSAFE
     }
   }
+
+  /**
+   * Return whether we are using G1GC or not
+   */
+  private lazy val isG1GC: Boolean = {
+    Try {
+      val clazz = Utils.classForName("com.sun.management.HotSpotDiagnosticMXBean")
+        .asInstanceOf[Class[_ <: PlatformManagedObject]]
+      val vmOptionClazz = Utils.classForName("com.sun.management.VMOption")
+      val hotSpotDiagnosticMXBean = ManagementFactory.getPlatformMXBean(clazz)
+      val vmOptionMethod = clazz.getMethod("getVMOption", classOf[String])
+      val valueMethod = vmOptionClazz.getMethod("getValue")
+
+      val useG1GCObject = vmOptionMethod.invoke(hotSpotDiagnosticMXBean, "UseG1GC")
+      val useG1GC = valueMethod.invoke(useG1GCObject).asInstanceOf[String]
+      "true".equals(useG1GC)
+    }.getOrElse(false)
+  }
 }

From c7b0dd24176f9909031c9fbd151eb7f89fe56a59 Mon Sep 17 00:00:00 2001
From: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
Date: Wed, 2 Mar 2022 10:38:41 +0900
Subject: [PATCH 371/513] [SPARK-38362][BUILD] Move eclipse.m2e Maven plugin
 config in its own profile

### What changes were proposed in this pull request?

Move org.eclipse.m2e:lifecycle-mapping in its own Maven profile
https://stackoverflow.com/a/23707050/497381

### Why are the changes needed?

This Maven "plugin" is needed only inside Eclipse when M2E plugin is being used to map Maven lifecycle with Eclipse's one.
There is no need to bother non-Eclipse users' build with warnings like:
```
WARNING] The POM for org.eclipse.m2e:lifecycle-mapping:jar:1.0.0 is missing, no dependency information available
[WARNING] Failed to retrieve plugin descriptor for org.eclipse.m2e:lifecycle-mapping:1.0.0: Plugin org.eclipse.m2e:lifecycle-mapping:1.0.0 or one of its dependencies could not be resolved: org.eclipse.m2e:lifecycle-mapping:jar:1.0.0 was not found in https://maven-central.storage-download.googleapis.com/maven2/ during a previous attempt. This failure was cached in the local repository and resolution is not reattempted until the update interval of gcs-maven-central-mirror has elapsed or updates are forced
```

### Does this PR introduce _any_ user-facing change?

No!

### How was this patch tested?

Local dev environment.

Closes #35698 from martin-g/spark-38362-move-eclipse-m2e-in-its-own-profile.

Authored-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 pom.xml | 119 +++++++++++++++++++++++++++++++-------------------------
 1 file changed, 67 insertions(+), 52 deletions(-)

diff --git a/pom.xml b/pom.xml
index bcf3468b169ac..6c186e8ce5ea9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3120,58 +3120,6 @@
             </execution>
           </executions>
         </plugin>
-        <!-- This plugin's configuration is used to store Eclipse m2e settings only. -->
-        <!-- It has no influence on the Maven build itself. -->
-        <plugin>
-          <groupId>org.eclipse.m2e</groupId>
-          <artifactId>lifecycle-mapping</artifactId>
-          <version>1.0.0</version>
-          <configuration>
-            <lifecycleMappingMetadata>
-              <pluginExecutions>
-                <pluginExecution>
-                  <pluginExecutionFilter>
-                    <groupId>org.apache.maven.plugins</groupId>
-                    <artifactId>maven-dependency-plugin</artifactId>
-                    <versionRange>[2.8,)</versionRange>
-                    <goals>
-                      <goal>build-classpath</goal>
-                    </goals>
-                  </pluginExecutionFilter>
-                  <action>
-                    <ignore></ignore>
-                  </action>
-                </pluginExecution>
-                <pluginExecution>
-                  <pluginExecutionFilter>
-                    <groupId>org.apache.maven.plugins</groupId>
-                    <artifactId>maven-jar-plugin</artifactId>
-                    <versionRange>3.1.2</versionRange>
-                    <goals>
-                      <goal>test-jar</goal>
-                    </goals>
-                  </pluginExecutionFilter>
-                  <action>
-                    <ignore></ignore>
-                  </action>
-                </pluginExecution>
-                <pluginExecution>
-                  <pluginExecutionFilter>
-                    <groupId>org.apache.maven.plugins</groupId>
-                    <artifactId>maven-antrun-plugin</artifactId>
-                    <versionRange>[${maven-antrun.version},)</versionRange>
-                    <goals>
-                      <goal>run</goal>
-                    </goals>
-                  </pluginExecutionFilter>
-                  <action>
-                    <ignore></ignore>
-                  </action>
-                </pluginExecution>
-              </pluginExecutions>
-            </lifecycleMappingMetadata>
-          </configuration>
-        </plugin>
       </plugins>
     </pluginManagement>
 
@@ -3745,5 +3693,72 @@
         </dependency>
       </dependencies>
     </profile>
+    <profile>
+      <id>only-eclipse</id>
+      <activation>
+        <property>
+          <!-- Eclipse M2E plugin exports this property and auto-activates this profile -->
+          <name>m2e.version</name>
+        </property>
+      </activation>
+      <build>
+        <pluginManagement>
+          <plugins>
+            <!-- This plugin's configuration is used to store Eclipse m2e settings only. -->
+            <!-- It has no influence on the Maven build itself. -->
+            <plugin>
+              <groupId>org.eclipse.m2e</groupId>
+              <artifactId>lifecycle-mapping</artifactId>
+              <version>1.0.0</version>
+              <configuration>
+                <lifecycleMappingMetadata>
+                  <pluginExecutions>
+                    <pluginExecution>
+                      <pluginExecutionFilter>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-dependency-plugin</artifactId>
+                        <versionRange>[2.8,)</versionRange>
+                        <goals>
+                          <goal>build-classpath</goal>
+                        </goals>
+                      </pluginExecutionFilter>
+                      <action>
+                        <ignore></ignore>
+                      </action>
+                    </pluginExecution>
+                    <pluginExecution>
+                      <pluginExecutionFilter>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-jar-plugin</artifactId>
+                        <versionRange>3.1.2</versionRange>
+                        <goals>
+                          <goal>test-jar</goal>
+                        </goals>
+                      </pluginExecutionFilter>
+                      <action>
+                        <ignore></ignore>
+                      </action>
+                    </pluginExecution>
+                    <pluginExecution>
+                      <pluginExecutionFilter>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-antrun-plugin</artifactId>
+                        <versionRange>[${maven-antrun.version},)</versionRange>
+                        <goals>
+                          <goal>run</goal>
+                        </goals>
+                      </pluginExecutionFilter>
+                      <action>
+                        <ignore></ignore>
+                      </action>
+                    </pluginExecution>
+                  </pluginExecutions>
+                </lifecycleMappingMetadata>
+              </configuration>
+            </plugin>
+          </plugins>
+        </pluginManagement>
+      </build>
+    </profile>
   </profiles>
 </project>

From 96bcb0406ea229b1c5ecbb98a1de6ba0d6a238b4 Mon Sep 17 00:00:00 2001
From: weixiuli <weixiuli@jd.com>
Date: Tue, 1 Mar 2022 20:20:30 -0600
Subject: [PATCH 372/513] [SPARK-38344][SHUFFLE] Avoid to submit task when
 there are no requests to push up in push-based shuffle

### What changes were proposed in this pull request?

Avoid to submit task when there are no requests to push up in push-based shuffle.

### Why are the changes needed?
This is a performance improvement to the existing functionality.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA.
Existing unittests.

Closes #35675 from weixiuli/SPARK-38344-SBS.

Authored-by: weixiuli <weixiuli@jd.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../org/apache/spark/shuffle/ShuffleBlockPusher.scala     | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala
index d6972cd470c9e..230ec7efdb14f 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala
@@ -118,11 +118,11 @@ private[spark] class ShuffleBlockPusher(conf: SparkConf) extends Logging {
     pushRequests ++= Utils.randomize(requests)
     if (pushRequests.isEmpty) {
       notifyDriverAboutPushCompletion()
+    } else {
+      submitTask(() => {
+        tryPushUpToMax()
+      })
     }
-
-    submitTask(() => {
-      tryPushUpToMax()
-    })
   }
 
   private[shuffle] def tryPushUpToMax(): Unit = {

From 80f25ad24a871f0ddef939f6a3e2f01370f1fa6f Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Wed, 2 Mar 2022 10:54:03 +0800
Subject: [PATCH 373/513] [SPARK-38363][SQL] Avoid runtime error in
 Dataset.summary()/Dataset.describe() when ANSI mode is on

### What changes were proposed in this pull request?

When executing `df.summary()` or `df.describe()`, Spark SQL converts String columns as Double for the
 percentiles/mean/stddev stats.
```
scala> val person2: DataFrame = Seq(
     |     ("Bob", 16, 176),
     |     ("Alice", 32, 164),
     |     ("David", 60, 192),
     |     ("Amy", 24, 180)).toDF("name", "age", "height")

scala> person2.summary().show()
+-------+-----+------------------+------------------+
|summary| name|               age|            height|
+-------+-----+------------------+------------------+
|  count|    4|                 4|                 4|
|   mean| null|              33.0|             178.0|
| stddev| null|19.148542155126762|11.547005383792515|
|    min|Alice|                16|               164|
|    25%| null|                16|               164|
|    50%| null|                24|               176|
|    75%| null|                32|               180|
|    max|David|                60|               192|
+-------+-----+------------------+------------------+
```

This can cause runtime errors with ANSI mode on.
```
org.apache.spark.SparkNumberFormatException: invalid input syntax for type numeric: Bob
```
This PR is to fix it by using `TryCast` for String columns.

### Why are the changes needed?

For better adoption of the ANSI mode. Since both APIs are for getting a quick summary of the Dataframe, I suggest using `TryCast` for the problematic stats so that both APIs still work under ANSI mode.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

UT

Closes #35699 from gengliangwang/fixSummary.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../sql/execution/stat/StatFunctions.scala    | 15 +++-
 .../org/apache/spark/sql/DataFrameSuite.scala | 86 ++++++++++---------
 2 files changed, 58 insertions(+), 43 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
index 5dc0ff0ac4d1d..9155c1cb6e7ff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
@@ -22,7 +22,7 @@ import java.util.Locale
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Column, DataFrame, Dataset, Row}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Cast, Expression, GenericInternalRow, GetArrayItem, Literal}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Cast, Expression, GenericInternalRow, GetArrayItem, Literal, TryCast}
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.catalyst.util.{GenericArrayData, QuantileSummaries}
@@ -246,6 +246,11 @@ object StatFunctions extends Logging {
     }
     require(percentiles.forall(p => p >= 0 && p <= 1), "Percentiles must be in the range [0, 1]")
 
+    def castAsDoubleIfNecessary(e: Expression): Expression = if (e.dataType == StringType) {
+      TryCast(e, DoubleType)
+    } else {
+      e
+    }
     var percentileIndex = 0
     val statisticFns = selectedStatistics.map { stats =>
       if (stats.endsWith("%")) {
@@ -253,7 +258,7 @@ object StatFunctions extends Logging {
         percentileIndex += 1
         (child: Expression) =>
           GetArrayItem(
-            new ApproximatePercentile(child,
+            new ApproximatePercentile(castAsDoubleIfNecessary(child),
               Literal(new GenericArrayData(percentiles), ArrayType(DoubleType, false)))
               .toAggregateExpression(),
             Literal(index))
@@ -264,8 +269,10 @@ object StatFunctions extends Logging {
             Count(child).toAggregateExpression(isDistinct = true)
           case "approx_count_distinct" => (child: Expression) =>
             HyperLogLogPlusPlus(child).toAggregateExpression()
-          case "mean" => (child: Expression) => Average(child).toAggregateExpression()
-          case "stddev" => (child: Expression) => StddevSamp(child).toAggregateExpression()
+          case "mean" => (child: Expression) =>
+            Average(castAsDoubleIfNecessary(child)).toAggregateExpression()
+          case "stddev" => (child: Expression) =>
+            StddevSamp(castAsDoubleIfNecessary(child)).toAggregateExpression()
           case "min" => (child: Expression) => Min(child).toAggregateExpression()
           case "max" => (child: Expression) => Max(child).toAggregateExpression()
           case _ => throw QueryExecutionErrors.statisticNotRecognizedError(stats)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index cd0bd06413870..c7d05df7a4dbe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -940,29 +940,33 @@ class DataFrameSuite extends QueryTest
 
     def getSchemaAsSeq(df: DataFrame): Seq[String] = df.schema.map(_.name)
 
-    val describeAllCols = person2.describe()
-    assert(getSchemaAsSeq(describeAllCols) === Seq("summary", "name", "age", "height"))
-    checkAnswer(describeAllCols, describeResult)
-    // All aggregate value should have been cast to string
-    describeAllCols.collect().foreach { row =>
-      row.toSeq.foreach { value =>
-        if (value != null) {
-          assert(value.isInstanceOf[String], "expected string but found " + value.getClass)
+    Seq("true", "false").foreach { ansiEnabled =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled) {
+        val describeAllCols = person2.describe()
+        assert(getSchemaAsSeq(describeAllCols) === Seq("summary", "name", "age", "height"))
+        checkAnswer(describeAllCols, describeResult)
+        // All aggregate value should have been cast to string
+        describeAllCols.collect().foreach { row =>
+          row.toSeq.foreach { value =>
+            if (value != null) {
+              assert(value.isInstanceOf[String], "expected string but found " + value.getClass)
+            }
+          }
         }
-      }
-    }
 
-    val describeOneCol = person2.describe("age")
-    assert(getSchemaAsSeq(describeOneCol) === Seq("summary", "age"))
-    checkAnswer(describeOneCol, describeResult.map { case Row(s, _, d, _) => Row(s, d)} )
+        val describeOneCol = person2.describe("age")
+        assert(getSchemaAsSeq(describeOneCol) === Seq("summary", "age"))
+        checkAnswer(describeOneCol, describeResult.map { case Row(s, _, d, _) => Row(s, d) })
 
-    val describeNoCol = person2.select().describe()
-    assert(getSchemaAsSeq(describeNoCol) === Seq("summary"))
-    checkAnswer(describeNoCol, describeResult.map { case Row(s, _, _, _) => Row(s)} )
+        val describeNoCol = person2.select().describe()
+        assert(getSchemaAsSeq(describeNoCol) === Seq("summary"))
+        checkAnswer(describeNoCol, describeResult.map { case Row(s, _, _, _) => Row(s) })
 
-    val emptyDescription = person2.limit(0).describe()
-    assert(getSchemaAsSeq(emptyDescription) === Seq("summary", "name", "age", "height"))
-    checkAnswer(emptyDescription, emptyDescribeResult)
+        val emptyDescription = person2.limit(0).describe()
+        assert(getSchemaAsSeq(emptyDescription) === Seq("summary", "name", "age", "height"))
+        checkAnswer(emptyDescription, emptyDescribeResult)
+      }
+    }
   }
 
   test("summary") {
@@ -988,30 +992,34 @@ class DataFrameSuite extends QueryTest
 
     def getSchemaAsSeq(df: DataFrame): Seq[String] = df.schema.map(_.name)
 
-    val summaryAllCols = person2.summary()
-
-    assert(getSchemaAsSeq(summaryAllCols) === Seq("summary", "name", "age", "height"))
-    checkAnswer(summaryAllCols, summaryResult)
-    // All aggregate value should have been cast to string
-    summaryAllCols.collect().foreach { row =>
-      row.toSeq.foreach { value =>
-        if (value != null) {
-          assert(value.isInstanceOf[String], "expected string but found " + value.getClass)
+    Seq("true", "false").foreach { ansiEnabled =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled) {
+        val summaryAllCols = person2.summary()
+
+        assert(getSchemaAsSeq(summaryAllCols) === Seq("summary", "name", "age", "height"))
+        checkAnswer(summaryAllCols, summaryResult)
+        // All aggregate value should have been cast to string
+        summaryAllCols.collect().foreach { row =>
+          row.toSeq.foreach { value =>
+            if (value != null) {
+              assert(value.isInstanceOf[String], "expected string but found " + value.getClass)
+            }
+          }
         }
-      }
-    }
 
-    val summaryOneCol = person2.select("age").summary()
-    assert(getSchemaAsSeq(summaryOneCol) === Seq("summary", "age"))
-    checkAnswer(summaryOneCol, summaryResult.map { case Row(s, _, d, _) => Row(s, d)} )
+        val summaryOneCol = person2.select("age").summary()
+        assert(getSchemaAsSeq(summaryOneCol) === Seq("summary", "age"))
+        checkAnswer(summaryOneCol, summaryResult.map { case Row(s, _, d, _) => Row(s, d) })
 
-    val summaryNoCol = person2.select().summary()
-    assert(getSchemaAsSeq(summaryNoCol) === Seq("summary"))
-    checkAnswer(summaryNoCol, summaryResult.map { case Row(s, _, _, _) => Row(s)} )
+        val summaryNoCol = person2.select().summary()
+        assert(getSchemaAsSeq(summaryNoCol) === Seq("summary"))
+        checkAnswer(summaryNoCol, summaryResult.map { case Row(s, _, _, _) => Row(s) })
 
-    val emptyDescription = person2.limit(0).summary()
-    assert(getSchemaAsSeq(emptyDescription) === Seq("summary", "name", "age", "height"))
-    checkAnswer(emptyDescription, emptySummaryResult)
+        val emptyDescription = person2.limit(0).summary()
+        assert(getSchemaAsSeq(emptyDescription) === Seq("summary", "name", "age", "height"))
+        checkAnswer(emptyDescription, emptySummaryResult)
+      }
+    }
   }
 
   test("SPARK-34165: Add count_distinct to summary") {

From 5664403bb960b91e2f0fa87f2630b96e4c124701 Mon Sep 17 00:00:00 2001
From: jackierwzhang <ruowang.zhang@databricks.com>
Date: Tue, 1 Mar 2022 19:44:02 -0800
Subject: [PATCH 374/513] [SPARK-38094][SQL][FOLLOWUP] Fix exception message
 and add a test case

### What changes were proposed in this pull request?
Minor follow ups on https://github.com/apache/spark/pull/35385:
1. Add a nested schema test
2. Fixed an error message.

### Why are the changes needed?
Better observability.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Existing test

Closes #35700 from jackierwzhang/SPARK-38094-minor.

Authored-by: jackierwzhang <ruowang.zhang@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../parquet/ParquetReadSupport.scala          |  2 +-
 .../parquet/ParquetFieldIdIOSuite.scala       | 31 ++++++++++++++++++-
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
index 97e691ff7c66c..69684f9466f98 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
@@ -140,7 +140,7 @@ object ParquetReadSupport extends Logging {
         "Spark read schema expects field Ids, " +
           "but Parquet file schema doesn't contain any field Ids.\n" +
         "Please remove the field ids from Spark schema or ignore missing ids by " +
-          "setting `spark.sql.parquet.fieldId.ignoreMissing = true`\n" +
+          s"setting `${SQLConf.IGNORE_MISSING_PARQUET_FIELD_ID.key} = true`\n" +
         s"""
            |Spark read schema:
            |${catalystRequestedSchema.prettyJson}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala
index ff0bb2f92d208..5e01d3f447c96 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.{QueryTest, Row}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{IntegerType, Metadata, MetadataBuilder, StringType, StructType}
+import org.apache.spark.sql.types.{ArrayType, IntegerType, MapType, Metadata, MetadataBuilder, StringType, StructType}
 
 class ParquetFieldIdIOSuite extends QueryTest with ParquetTest with SharedSparkSession  {
 
@@ -107,6 +107,35 @@ class ParquetFieldIdIOSuite extends QueryTest with ParquetTest with SharedSparkS
     }
   }
 
+  test("SPARK-38094: absence of field ids: reading nested schema") {
+    withTempDir { dir =>
+      // now with nested schema/complex type
+      val readSchema =
+        new StructType()
+          .add("a", IntegerType, true, withId(1))
+          .add("b", ArrayType(StringType), true, withId(2))
+          .add("c", new StructType().add("c1", IntegerType, true, withId(6)), true, withId(3))
+          .add("d", MapType(StringType, StringType), true, withId(4))
+          .add("e", IntegerType, true, withId(5))
+
+      val writeSchema =
+        new StructType()
+          .add("a", IntegerType, true, withId(5))
+          .add("randomName", StringType, true)
+
+      val writeData = Seq(Row(100, "text"), Row(200, "more"))
+
+      spark.createDataFrame(writeData.asJava, writeSchema)
+        .write.mode("overwrite").parquet(dir.getCanonicalPath)
+
+      withAllParquetReaders {
+        checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath),
+          // a, b, c, d all couldn't be found
+          Row(null, null, null, null, 100) :: Row(null, null, null, null, 200) :: Nil)
+      }
+    }
+  }
+
   test("multiple id matches") {
     withTempDir { dir =>
       val readSchema =

From f14f6d6dea6c1a8099604596b623f4a191feaa5c Mon Sep 17 00:00:00 2001
From: huaxingao <huaxin_gao@apple.com>
Date: Tue, 1 Mar 2022 20:05:23 -0800
Subject: [PATCH 375/513] [SPARK-38357][SQL][TESTS] Add test coverage for file
 source with OR(data filter, partition filter)

### What changes were proposed in this pull request?
Add test coverage for filter OR which contains both data filter and partition filter
e.g. p is partition col and id is data col
```
SELECT * FROM tmp WHERE (p = 0 AND id > 0) OR (p = 1 AND id = 2)
```

### Why are the changes needed?
Test coverage

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
New UT

Closes #35703 from huaxingao/spark-37593.

Authored-by: huaxingao <huaxin_gao@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../PruneFileSourcePartitionsSuite.scala      | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitionsSuite.scala
index 98d3d65befe60..bf14a7d91233b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitionsSuite.scala
@@ -118,6 +118,28 @@ class PruneFileSourcePartitionsSuite extends PrunePartitionSuiteBase with Shared
     }
   }
 
+  test("SPARK-38357: data + partition filters with OR") {
+    // Force datasource v2 for parquet
+    withSQLConf((SQLConf.USE_V1_SOURCE_LIST.key, "")) {
+      withTempPath { dir =>
+        spark.range(10).coalesce(1).selectExpr("id", "id % 3 as p")
+          .write.partitionBy("p").parquet(dir.getCanonicalPath)
+        withTempView("tmp") {
+          spark.read.parquet(dir.getCanonicalPath).createOrReplaceTempView("tmp");
+          assertPrunedPartitions("SELECT * FROM tmp WHERE (p = 0 AND id > 0) OR (p = 1 AND id = 2)",
+            2,
+            "((tmp.p = 0) || (tmp.p = 1))")
+          assertPrunedPartitions("SELECT * FROM tmp WHERE p = 0 AND id > 0",
+            1,
+            "(tmp.p = 0)")
+          assertPrunedPartitions("SELECT * FROM tmp WHERE p = 0",
+            1,
+            "(tmp.p = 0)")
+        }
+      }
+    }
+  }
+
   protected def collectPartitionFiltersFn(): PartialFunction[SparkPlan, Seq[Expression]] = {
     case scan: FileSourceScanExec => scan.partitionFilters
   }

From 3ab18cc0be295676e073842ecb7e0e51d11fbd75 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 1 Mar 2022 20:23:18 -0800
Subject: [PATCH 376/513] [SPARK-38383][K8S] Support `APP_ID` and `EXECUTOR_ID`
 placeholder in annotations

### What changes were proposed in this pull request?

This PR aims to support `APP_ID` and `EXECUTOR_ID` placeholder in K8s annotation in the same way we did for `EXECUTOR_JAVA_OPTIONS`.

### Why are the changes needed?

Although Apache Spark provides `spark-app-id` already, some custom schedulers are not able to recognize them.

### Does this PR introduce _any_ user-facing change?

No because the pattern strings are very specific.

### How was this patch tested?

Pass the CIs and K8s IT.

This passed like the following on `Docker Desktop K8s`.
```
$ build/sbt -Psparkr -Pkubernetes -Pkubernetes-integration-tests -Dtest.exclude.tags=minikube -Dspark.kubernetes.test.deployMode=docker-for-desktop "kubernetes-integration-tests/test"
[info] KubernetesSuite:
[info] - Run SparkPi with no resources (8 seconds, 789 milliseconds)
[info] - Run SparkPi with no resources & statefulset allocation (8 seconds, 903 milliseconds)
[info] - Run SparkPi with a very long application name. (8 seconds, 586 milliseconds)
[info] - Use SparkLauncher.NO_RESOURCE (8 seconds, 409 milliseconds)
[info] - Run SparkPi with a master URL without a scheme. (8 seconds, 586 milliseconds)
[info] - Run SparkPi with an argument. (8 seconds, 708 milliseconds)
[info] - Run SparkPi with custom labels, annotations, and environment variables. (8 seconds, 626 milliseconds)
[info] - All pods have the same service account by default (8 seconds, 595 milliseconds)
[info] - Run extraJVMOptions check on driver (4 seconds, 324 milliseconds)
[info] - Run SparkRemoteFileTest using a remote data file (8 seconds, 424 milliseconds)
[info] - Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j2.properties (13 seconds, 42 milliseconds)
[info] - Run SparkPi with env and mount secrets. (16 seconds, 600 milliseconds)
[info] - Run PySpark on simple pi.py example (11 seconds, 479 milliseconds)
[info] - Run PySpark to test a pyfiles example (10 seconds, 669 milliseconds)
[info] - Run PySpark with memory customization (8 seconds, 604 milliseconds)
[info] - Run in client mode. (7 seconds, 349 milliseconds)
[info] - Start pod creation from template (8 seconds, 779 milliseconds)
[info] - Test basic decommissioning (42 seconds, 970 milliseconds)
[info] - Test basic decommissioning with shuffle cleanup (42 seconds, 650 milliseconds)
[info] - Test decommissioning with dynamic allocation & shuffle cleanups (2 minutes, 41 seconds)
[info] - Test decommissioning timeouts (43 seconds, 340 milliseconds)
[info] - SPARK-37576: Rolling decommissioning (1 minute, 6 seconds)
[info] - Run SparkR on simple dataframe.R example (11 seconds, 645 milliseconds)
```

Closes #35704 from dongjoon-hyun/SPARK-38383.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../k8s/features/BasicDriverFeatureStep.scala       |  3 ++-
 .../k8s/features/BasicExecutorFeatureStep.scala     |  5 ++++-
 .../k8s/features/BasicDriverFeatureStepSuite.scala  | 13 +++++++++----
 .../k8s/integrationtest/BasicTestsSuite.scala       |  2 ++
 .../k8s/integrationtest/KubernetesSuite.scala       |  3 +++
 5 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
index f2104d433ad49..3b2b5612566a1 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
@@ -142,7 +142,8 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf)
       .editOrNewMetadata()
         .withName(driverPodName)
         .addToLabels(conf.labels.asJava)
-        .addToAnnotations(conf.annotations.asJava)
+        .addToAnnotations(conf.annotations.map { case (k, v) =>
+          (k, Utils.substituteAppNExecIds(v, conf.appId, "")) }.asJava)
         .endMetadata()
       .editOrNewSpec()
         .withRestartPolicy("Never")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
index c6084720c56fe..a7625194bd6e6 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -272,11 +272,14 @@ private[spark] class BasicExecutorFeatureStep(
       case "statefulset" => "Always"
       case _ => "Never"
     }
+    val annotations = kubernetesConf.annotations.map { case (k, v) =>
+      (k, Utils.substituteAppNExecIds(v, kubernetesConf.appId, kubernetesConf.executorId))
+    }
     val executorPodBuilder = new PodBuilder(pod.pod)
       .editOrNewMetadata()
         .withName(name)
         .addToLabels(kubernetesConf.labels.asJava)
-        .addToAnnotations(kubernetesConf.annotations.asJava)
+        .addToAnnotations(annotations.asJava)
         .addToOwnerReferences(ownerReference.toSeq: _*)
         .endMetadata()
       .editOrNewSpec()
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
index 83444e5518e32..0b54599bd1d35 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
@@ -21,7 +21,7 @@ import scala.collection.JavaConverters._
 import io.fabric8.kubernetes.api.model.{ContainerPort, ContainerPortBuilder, LocalObjectReferenceBuilder, Quantity}
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.{KubernetesTestConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesDriverConf, KubernetesTestConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.features.KubernetesFeaturesTestUtils.TestResourceInformation
@@ -36,7 +36,9 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
 
   private val CUSTOM_DRIVER_LABELS = Map("labelkey" -> "labelvalue")
   private val CONTAINER_IMAGE_PULL_POLICY = "IfNotPresent"
-  private val DRIVER_ANNOTATIONS = Map("customAnnotation" -> "customAnnotationValue")
+  private val DRIVER_ANNOTATIONS = Map(
+    "customAnnotation" -> "customAnnotationValue",
+    "yunikorn.apache.org/app-id" -> "{{APPID}}")
   private val DRIVER_ENVS = Map(
     "customDriverEnv1" -> "customDriverEnv2",
     "customDriverEnv2" -> "customDriverEnv2")
@@ -62,7 +64,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
       sparkConf.set(testRInfo.rId.amountConf, testRInfo.count)
       sparkConf.set(testRInfo.rId.vendorConf, testRInfo.vendor)
     }
-    val kubernetesConf = KubernetesTestConf.createDriverConf(
+    val kubernetesConf: KubernetesDriverConf = KubernetesTestConf.createDriverConf(
       sparkConf = sparkConf,
       labels = CUSTOM_DRIVER_LABELS,
       environment = DRIVER_ENVS,
@@ -123,7 +125,10 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
     }
     assert(driverPodMetadata.getLabels === kubernetesConf.labels.asJava)
 
-    assert(driverPodMetadata.getAnnotations.asScala === DRIVER_ANNOTATIONS)
+    val annotations = driverPodMetadata.getAnnotations.asScala
+    DRIVER_ANNOTATIONS.foreach { case (k, v) =>
+      assert(annotations(k) === Utils.substituteAppNExecIds(v, KubernetesTestConf.APP_ID, ""))
+    }
     assert(configuredPod.pod.getSpec.getRestartPolicy === "Never")
     val expectedSparkConf = Map(
       KUBERNETES_DRIVER_POD_NAME.key -> "spark-driver-pod",
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
index 0e79f6c554403..a79442ac63581 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
@@ -82,12 +82,14 @@ private[spark] trait BasicTestsSuite { k8sSuite: KubernetesSuite =>
       .set("spark.kubernetes.driver.label.label2", "label2-value")
       .set("spark.kubernetes.driver.annotation.annotation1", "annotation1-value")
       .set("spark.kubernetes.driver.annotation.annotation2", "annotation2-value")
+      .set("spark.kubernetes.driver.annotation.yunikorn.apache.org/app-id", "{{APP_ID}}")
       .set("spark.kubernetes.driverEnv.ENV1", "VALUE1")
       .set("spark.kubernetes.driverEnv.ENV2", "VALUE2")
       .set("spark.kubernetes.executor.label.label1", "label1-value")
       .set("spark.kubernetes.executor.label.label2", "label2-value")
       .set("spark.kubernetes.executor.annotation.annotation1", "annotation1-value")
       .set("spark.kubernetes.executor.annotation.annotation2", "annotation2-value")
+      .set("spark.kubernetes.executor.annotation.yunikorn.apache.org/app-id", "{{APP_ID}}")
       .set("spark.executorEnv.ENV1", "VALUE1")
       .set("spark.executorEnv.ENV2", "VALUE2")
 
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index ca7eae1f0a632..15ce4874b4035 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -35,6 +35,7 @@ import org.scalatest.matchers.should.Matchers._
 import org.scalatest.time.{Minutes, Seconds, Span}
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.deploy.k8s.Constants.ENV_APPLICATION_ID
 import org.apache.spark.deploy.k8s.integrationtest.TestConstants._
 import org.apache.spark.deploy.k8s.integrationtest.backend.{IntegrationTestBackend, IntegrationTestBackendFactory}
 import org.apache.spark.internal.Logging
@@ -563,6 +564,7 @@ class KubernetesSuite extends SparkFunSuite
     assert(pod.getMetadata.getLabels.get("label2") === "label2-value")
     assert(pod.getMetadata.getAnnotations.get("annotation1") === "annotation1-value")
     assert(pod.getMetadata.getAnnotations.get("annotation2") === "annotation2-value")
+    val appId = pod.getMetadata.getAnnotations.get("yunikorn.apache.org/app-id")
 
     val container = pod.getSpec.getContainers.get(0)
     val envVars = container
@@ -574,6 +576,7 @@ class KubernetesSuite extends SparkFunSuite
       .toMap
     assert(envVars("ENV1") === "VALUE1")
     assert(envVars("ENV2") === "VALUE2")
+    assert(appId === envVars(ENV_APPLICATION_ID))
   }
 
   private def deleteDriverPod(): Unit = {

From 42db298fadfedc262e57b9d7f317a54c056948e9 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 1 Mar 2022 22:14:29 -0800
Subject: [PATCH 377/513] Revert "[SPARK-37090][BUILD] Upgrade `libthrift` to
 0.16.0 to avoid security vulnerabilities"

This reverts commit 4789e1f234a92b3c17d1f962c8f374ef9478612a.
---
 dev/deps/spark-deps-hadoop-2-hive-2.3         |   2 +-
 dev/deps/spark-deps-hadoop-3-hive-2.3         |   2 +-
 pom.xml                                       |   6 +-
 .../hive/service/auth/KerberosSaslHelper.java |   5 +-
 .../hive/service/auth/PlainSaslHelper.java    |   3 +-
 .../service/auth/TSetIpAddressProcessor.java  |   5 +-
 .../cli/thrift/ThriftBinaryCLIService.java    |   6 +
 .../service/cli/thrift/ThriftCLIService.java  |  10 -
 .../thrift/transport/TFramedTransport.java    | 200 ------------------
 9 files changed, 14 insertions(+), 225 deletions(-)
 delete mode 100644 sql/hive/src/main/java/org/apache/thrift/transport/TFramedTransport.java

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index f6687edc3a1a9..7c657469a0b66 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -186,7 +186,7 @@ kubernetes-model-storageclass/5.12.1//kubernetes-model-storageclass-5.12.1.jar
 lapack/2.2.1//lapack-2.2.1.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
-libthrift/0.16.0//libthrift-0.16.0.jar
+libthrift/0.12.0//libthrift-0.12.0.jar
 log4j-1.2-api/2.17.1//log4j-1.2-api-2.17.1.jar
 log4j-api/2.17.1//log4j-api-2.17.1.jar
 log4j-core/2.17.1//log4j-core-2.17.1.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index eb1e7cd158f2a..9eb89065e8718 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -171,7 +171,7 @@ kubernetes-model-storageclass/5.12.1//kubernetes-model-storageclass-5.12.1.jar
 lapack/2.2.1//lapack-2.2.1.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
-libthrift/0.16.0//libthrift-0.16.0.jar
+libthrift/0.12.0//libthrift-0.12.0.jar
 log4j-1.2-api/2.17.1//log4j-1.2-api-2.17.1.jar
 log4j-api/2.17.1//log4j-api-2.17.1.jar
 log4j-core/2.17.1//log4j-core-2.17.1.jar
diff --git a/pom.xml b/pom.xml
index 6c186e8ce5ea9..176d3af786adf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -187,7 +187,7 @@
     <joda.version>2.10.13</joda.version>
     <jodd.version>3.5.2</jodd.version>
     <jsr305.version>3.0.0</jsr305.version>
-    <libthrift.version>0.16.0</libthrift.version>
+    <libthrift.version>0.12.0</libthrift.version>
     <antlr4.version>4.8</antlr4.version>
     <jpam.version>1.1</jpam.version>
     <selenium.version>3.141.59</selenium.version>
@@ -2585,10 +2585,6 @@
             <groupId>org.slf4j</groupId>
             <artifactId>slf4j-api</artifactId>
           </exclusion>
-          <exclusion>
-            <groupId>javax.annotation</groupId>
-            <artifactId>javax.annotation-api</artifactId>
-          </exclusion>
         </exclusions>
       </dependency>
       <dependency>
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
index ef91f94eeec2b..175412ed98c6c 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
@@ -30,7 +30,6 @@
 import org.apache.thrift.TProcessorFactory;
 import org.apache.thrift.transport.TSaslClientTransport;
 import org.apache.thrift.transport.TTransport;
-import org.apache.thrift.transport.TTransportException;
 
 public final class KerberosSaslHelper {
 
@@ -69,8 +68,8 @@ public static TTransport createSubjectAssumedTransport(String principal,
         new TSaslClientTransport("GSSAPI", null, names[0], names[1], saslProps, null,
           underlyingTransport);
       return new TSubjectAssumingTransport(saslTransport);
-    } catch (SaslException | TTransportException se) {
-      throw new IOException("Could not instantiate transport", se);
+    } catch (SaslException se) {
+      throw new IOException("Could not instantiate SASL transport", se);
     }
   }
 
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
index 5ac29950f4f85..c06f6ec34653f 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
@@ -38,7 +38,6 @@
 import org.apache.thrift.transport.TSaslClientTransport;
 import org.apache.thrift.transport.TSaslServerTransport;
 import org.apache.thrift.transport.TTransport;
-import org.apache.thrift.transport.TTransportException;
 import org.apache.thrift.transport.TTransportFactory;
 
 public final class PlainSaslHelper {
@@ -65,7 +64,7 @@ public static TTransportFactory getPlainTransportFactory(String authTypeStr)
   }
 
   public static TTransport getPlainTransport(String username, String password,
-    TTransport underlyingTransport) throws SaslException, TTransportException {
+    TTransport underlyingTransport) throws SaslException {
     return new TSaslClientTransport("PLAIN", null, null, null, new HashMap<String, String>(),
       new PlainCallbackHandler(username, password), underlyingTransport);
   }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
index b727b4e27de8d..1205d21be6be6 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
@@ -45,12 +45,11 @@ public TSetIpAddressProcessor(Iface iface) {
   }
 
   @Override
-  public void process(final TProtocol in, final TProtocol out) throws TException {
+  public boolean process(final TProtocol in, final TProtocol out) throws TException {
     setIpAddress(in);
     setUserName(in);
     try {
-      super.process(in, out);
-      return;
+      return super.process(in, out);
     } finally {
       THREAD_LOCAL_USER_NAME.remove();
       THREAD_LOCAL_IP_ADDRESS.remove();
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
index 025c85eb65801..a980b5118be2a 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
@@ -90,10 +90,16 @@ protected void initializeServer() {
 
       // Server args
       int maxMessageSize = hiveConf.getIntVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_MAX_MESSAGE_SIZE);
+      int requestTimeout = (int) hiveConf.getTimeVar(
+          HiveConf.ConfVars.HIVE_SERVER2_THRIFT_LOGIN_TIMEOUT, TimeUnit.SECONDS);
+      int beBackoffSlotLength = (int) hiveConf.getTimeVar(
+          HiveConf.ConfVars.HIVE_SERVER2_THRIFT_LOGIN_BEBACKOFF_SLOT_LENGTH, TimeUnit.MILLISECONDS);
       TThreadPoolServer.Args sargs = new TThreadPoolServer.Args(serverSocket)
           .processorFactory(processorFactory).transportFactory(transportFactory)
           .protocolFactory(new TBinaryProtocol.Factory())
           .inputProtocolFactory(new TBinaryProtocol.Factory(true, true, maxMessageSize, maxMessageSize))
+          .requestTimeout(requestTimeout).requestTimeoutUnit(TimeUnit.SECONDS)
+          .beBackoffSlotLength(beBackoffSlotLength).beBackoffSlotLengthUnit(TimeUnit.MILLISECONDS)
           .executorService(executorService);
 
       // TCP Server
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
index ddbe89b0b721b..4a223c8666a17 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
@@ -83,16 +83,6 @@ public void setSessionHandle(SessionHandle sessionHandle) {
     public SessionHandle getSessionHandle() {
       return sessionHandle;
     }
-
-    @Override
-    public <T> T unwrap(Class<T> aClass) {
-      return null;
-    }
-
-    @Override
-    public boolean isWrapperFor(Class<?> aClass) {
-      return false;
-    }
   }
 
   public ThriftCLIService(CLIService service, String serviceName) {
diff --git a/sql/hive/src/main/java/org/apache/thrift/transport/TFramedTransport.java b/sql/hive/src/main/java/org/apache/thrift/transport/TFramedTransport.java
deleted file mode 100644
index 4b32108c7d208..0000000000000
--- a/sql/hive/src/main/java/org/apache/thrift/transport/TFramedTransport.java
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.thrift.transport;
-
-
-import org.apache.thrift.TByteArrayOutputStream;
-import org.apache.thrift.TConfiguration;
-
-/**
- * This is based on libthrift-0.12.0 {@link org.apache.thrift.transport.TFramedTransport}.
- * To fix class of org.apache.thrift.transport.TFramedTransport not found after upgrading libthrift.
- *
- * TFramedTransport is a buffered TTransport that ensures a fully read message
- * every time by preceding messages with a 4-byte frame size.
- */
-public class TFramedTransport extends TTransport {
-
-  protected static final int DEFAULT_MAX_LENGTH = 16384000;
-
-  private int maxLength_;
-
-  /**
-   * Underlying transport
-   */
-  private TTransport transport_ = null;
-
-  /**
-   * Buffer for output
-   */
-  private final TByteArrayOutputStream writeBuffer_ =
-      new TByteArrayOutputStream(1024);
-
-  /**
-   * Buffer for input
-   */
-  private final TMemoryInputTransport readBuffer_ =
-      new TMemoryInputTransport(new byte[0]);
-
-  public static class Factory extends TTransportFactory {
-    private int maxLength_;
-
-    public Factory() {
-      maxLength_ = TFramedTransport.DEFAULT_MAX_LENGTH;
-    }
-
-    public Factory(int maxLength) {
-      maxLength_ = maxLength;
-    }
-
-    @Override
-    public TTransport getTransport(TTransport base) throws TTransportException {
-      return new TFramedTransport(base, maxLength_);
-    }
-  }
-
-  /**
-   * Constructor wraps around another transport
-   */
-  public TFramedTransport(TTransport transport, int maxLength) throws TTransportException {
-    transport_ = transport;
-    maxLength_ = maxLength;
-  }
-
-  public TFramedTransport(TTransport transport) throws TTransportException {
-    transport_ = transport;
-    maxLength_ = TFramedTransport.DEFAULT_MAX_LENGTH;
-  }
-
-  public void open() throws TTransportException {
-    transport_.open();
-  }
-
-  public boolean isOpen() {
-    return transport_.isOpen();
-  }
-
-  public void close() {
-    transport_.close();
-  }
-
-  public int read(byte[] buf, int off, int len) throws TTransportException {
-    int got = readBuffer_.read(buf, off, len);
-    if (got > 0) {
-      return got;
-    }
-
-    // Read another frame of data
-    readFrame();
-
-    return readBuffer_.read(buf, off, len);
-  }
-
-  @Override
-  public byte[] getBuffer() {
-    return readBuffer_.getBuffer();
-  }
-
-  @Override
-  public int getBufferPosition() {
-    return readBuffer_.getBufferPosition();
-  }
-
-  @Override
-  public int getBytesRemainingInBuffer() {
-    return readBuffer_.getBytesRemainingInBuffer();
-  }
-
-  @Override
-  public void consumeBuffer(int len) {
-    readBuffer_.consumeBuffer(len);
-  }
-
-  @Override
-  public TConfiguration getConfiguration() {
-    return null;
-  }
-
-  @Override
-  public void updateKnownMessageSize(long l) throws TTransportException {
-
-  }
-
-  @Override
-  public void checkReadBytesAvailable(long l) throws TTransportException {
-
-  }
-
-  public void clear() {
-    readBuffer_.clear();
-  }
-
-  private final byte[] i32buf = new byte[4];
-
-  private void readFrame() throws TTransportException {
-    transport_.readAll(i32buf, 0, 4);
-    int size = decodeFrameSize(i32buf);
-
-    if (size < 0) {
-      close();
-      throw new TTransportException(TTransportException.CORRUPTED_DATA,
-          "Read a negative frame size (" + size + ")!");
-    }
-
-    if (size > maxLength_) {
-      close();
-      throw new TTransportException(TTransportException.CORRUPTED_DATA,
-          "Frame size (" + size + ") larger than max length (" + maxLength_ + ")!");
-    }
-
-    byte[] buff = new byte[size];
-    transport_.readAll(buff, 0, size);
-    readBuffer_.reset(buff);
-  }
-
-  public void write(byte[] buf, int off, int len) throws TTransportException {
-    writeBuffer_.write(buf, off, len);
-  }
-
-  @Override
-  public void flush() throws TTransportException {
-    byte[] buf = writeBuffer_.get();
-    int len = writeBuffer_.len();
-    writeBuffer_.reset();
-
-    encodeFrameSize(len, i32buf);
-    transport_.write(i32buf, 0, 4);
-    transport_.write(buf, 0, len);
-    transport_.flush();
-  }
-
-  public static final void encodeFrameSize(final int frameSize, final byte[] buf) {
-    buf[0] = (byte)(0xff & (frameSize >> 24));
-    buf[1] = (byte)(0xff & (frameSize >> 16));
-    buf[2] = (byte)(0xff & (frameSize >> 8));
-    buf[3] = (byte)(0xff & (frameSize));
-  }
-
-  public static final int decodeFrameSize(final byte[] buf) {
-    return
-        ((buf[0] & 0xff) << 24) |
-            ((buf[1] & 0xff) << 16) |
-            ((buf[2] & 0xff) <<  8) |
-            ((buf[3] & 0xff));
-  }
-}

From b141c15402078c90fd15b21fab71826546ff2f1d Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Wed, 2 Mar 2022 07:34:38 -0600
Subject: [PATCH 378/513] [SPARK-38342][CORE] Clean up deprecated api usage of
 Ivy

### What changes were proposed in this pull request?
This pr use `Ivy.retrieve(ModuleRevisionId, RetrieveOptions)` instead of  deprecated `Ivy.retrieve(ModuleRevisionId, String, RetrieveOptions)` to  clean up deprecation compilation warning.

The refactor way refer to the Implementation of  `RetrieveEngine#retrieve` method as follows:

```java
    Deprecated
    public int retrieve(ModuleRevisionId mrid, String destFilePattern, RetrieveOptions options)
            throws IOException {
        RetrieveOptions retrieveOptions = new RetrieveOptions(options);
        retrieveOptions.setDestArtifactPattern(destFilePattern);

        RetrieveReport result = retrieve(mrid, retrieveOptions);
        return result.getNbrArtifactsCopied();
    }
```

### Why are the changes needed?
Clean up deprecated api usage.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35672 from LuciferYang/cleanup-deprecation-ivy.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index bf972b9dd9be6..dab1474725d9e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -1456,9 +1456,9 @@ private[spark] object SparkSubmitUtils extends Logging {
           throw new RuntimeException(rr.getAllProblemMessages.toString)
         }
         // retrieve all resolved dependencies
+        retrieveOptions.setDestArtifactPattern(packagesDirectory.getAbsolutePath + File.separator +
+          "[organization]_[artifact]-[revision](-[classifier]).[ext]")
         ivy.retrieve(rr.getModuleDescriptor.getModuleRevisionId,
-          packagesDirectory.getAbsolutePath + File.separator +
-            "[organization]_[artifact]-[revision](-[classifier]).[ext]",
           retrieveOptions.setConfs(Array(ivyConfName)))
         resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory)
       } finally {

From f9603287d4a74f4be0486750fdee86386291503e Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Wed, 2 Mar 2022 19:41:51 +0300
Subject: [PATCH 379/513] [SPARK-38389][SQL] Add the `DATEDIFF()` and
 `DATE_DIFF()` aliases for `TIMESTAMPDIFF()`

### What changes were proposed in this pull request?
In the PR, I propose to add two aliases for the `TIMESTAMPDIFF()` function introduced by https://github.com/apache/spark/pull/35607:
- `DATEDIFF()`
- `DATE_DIFF()`

### Why are the changes needed?
1. To make the migration process from other systems to Spark SQL easier.
2. To achieve feature parity with other DBMSs.

### Does this PR introduce _any_ user-facing change?
No. The new aliases just extend Spark SQL API.

### How was this patch tested?
1. By running the existing test suites:
```
$ build/sbt "test:testOnly *SQLKeywordSuite"
```
3. and new checks:
```
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z date.sql"
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z datetime-legacy.sql"
```

Closes #35709 from MaxGekk/datediff.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 docs/sql-ref-ansi-compliance.md               |  2 +
 .../spark/sql/catalyst/parser/SqlBase.g4      |  8 +-
 .../test/resources/sql-tests/inputs/date.sql  | 12 +++
 .../sql-tests/results/ansi/date.sql.out       | 82 ++++++++++++++++++-
 .../resources/sql-tests/results/date.sql.out  | 82 ++++++++++++++++++-
 .../sql-tests/results/datetime-legacy.sql.out | 82 ++++++++++++++++++-
 6 files changed, 264 insertions(+), 4 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 46bf415a8f012..76462062c210b 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -392,6 +392,8 @@ Below is a list of all the keywords in Spark SQL.
 |DATABASES|non-reserved|non-reserved|non-reserved|
 |DATEADD|non-reserved|non-reserved|non-reserved|
 |DATE_ADD|non-reserved|non-reserved|non-reserved|
+|DATEDIFF|non-reserved|non-reserved|non-reserved|
+|DATE_DIFF|non-reserved|non-reserved|non-reserved|
 |DAY|non-reserved|non-reserved|non-reserved|
 |DBPROPERTIES|non-reserved|non-reserved|non-reserved|
 |DEFINED|non-reserved|non-reserved|non-reserved|
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index ae57b42a1f995..3f5052d51f2c5 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -861,7 +861,7 @@ valueExpression
 primaryExpression
     : name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER)                                   #currentLike
     | name=(TIMESTAMPADD | DATEADD | DATE_ADD) '(' unit=identifier ',' unitsAmount=valueExpression ',' timestamp=valueExpression ')'  #timestampadd
-    | TIMESTAMPDIFF '(' unit=identifier ',' startTimestamp=valueExpression ',' endTimestamp=valueExpression ')'  #timestampdiff
+    | name=(TIMESTAMPDIFF | DATEDIFF | DATE_DIFF) '(' unit=identifier ',' startTimestamp=valueExpression ',' endTimestamp=valueExpression ')'  #timestampdiff
     | CASE whenClause+ (ELSE elseExpression=expression)? END                                   #searchedCase
     | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END                  #simpleCase
     | name=(CAST | TRY_CAST) '(' expression AS dataType ')'                                    #cast
@@ -1133,6 +1133,8 @@ ansiNonReserved
     | DATABASES
     | DATEADD
     | DATE_ADD
+    | DATEDIFF
+    | DATE_DIFF
     | DAY
     | DBPROPERTIES
     | DEFINED
@@ -1383,6 +1385,8 @@ nonReserved
     | DATABASES
     | DATEADD
     | DATE_ADD
+    | DATEDIFF
+    | DATE_DIFF
     | DAY
     | DBPROPERTIES
     | DEFINED
@@ -1653,6 +1657,8 @@ DATABASE: 'DATABASE';
 DATABASES: 'DATABASES';
 DATEADD: 'DATEADD';
 DATE_ADD: 'DATE_ADD';
+DATEDIFF: 'DATEDIFF';
+DATE_DIFF: 'DATE_DIFF';
 DBPROPERTIES: 'DBPROPERTIES';
 DEFINED: 'DEFINED';
 DELETE: 'DELETE';
diff --git a/sql/core/src/test/resources/sql-tests/inputs/date.sql b/sql/core/src/test/resources/sql-tests/inputs/date.sql
index 6fcba1de44dab..4c8d5a7b85a33 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/date.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/date.sql
@@ -152,3 +152,15 @@ select dateadd(WEEK, -4, timestamp'2022-02-25 01:02:03');
 select date_add(MONTH, -1, timestamp'2022-02-25 01:02:03');
 select dateadd(QUARTER, 5, date'2022-02-25');
 select date_add(YEAR, 1, date'2022-02-25');
+
+-- Get the difference between timestamps or dates in the specified units
+select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001');
+select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455');
+select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01');
+select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00');
+select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03');
+select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00');
+select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03');
+select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03');
+select date_diff(QUARTER, date'2022-02-25', date'2023-05-25');
+select datediff(YEAR, date'2022-02-25', date'2023-02-25');
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
index 07989aeae17a2..a21512ffe8b7c 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 87
+-- Number of queries: 97
 
 
 -- !query
@@ -740,3 +740,83 @@ select date_add(YEAR, 1, date'2022-02-25')
 struct<timestampadd(YEAR, 1, DATE '2022-02-25'):timestamp>
 -- !query output
 2023-02-25 00:00:00
+
+
+-- !query
+select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001')
+-- !query schema
+struct<timestampdiff(MICROSECOND, TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP '2022-02-25 01:02:03.124001'):bigint>
+-- !query output
+1001
+
+
+-- !query
+select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455')
+-- !query schema
+struct<timestampdiff(MILLISECOND, TIMESTAMP '2022-02-25 01:02:03.456', TIMESTAMP '2022-02-25 01:02:03.455'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01')
+-- !query schema
+struct<timestampdiff(SECOND, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 01:03:01'):bigint>
+-- !query output
+58
+
+
+-- !query
+select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00')
+-- !query schema
+struct<timestampdiff(MINUTE, DATE '2022-02-25', TIMESTAMP '2022-02-24 22:20:00'):bigint>
+-- !query output
+-100
+
+
+-- !query
+select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03')
+-- !query schema
+struct<timestampdiff(HOUR, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 00:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00')
+-- !query schema
+struct<timestampdiff(DAY, DATE '2022-02-25', TIMESTAMP '2023-02-27 00:00:00'):bigint>
+-- !query output
+367
+
+
+-- !query
+select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03')
+-- !query schema
+struct<timestampdiff(WEEK, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-28 01:02:03'):bigint>
+-- !query output
+-4
+
+
+-- !query
+select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03')
+-- !query schema
+struct<timestampdiff(MONTH, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-25 01:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select date_diff(QUARTER, date'2022-02-25', date'2023-05-25')
+-- !query schema
+struct<timestampdiff(QUARTER, DATE '2022-02-25', DATE '2023-05-25'):bigint>
+-- !query output
+5
+
+
+-- !query
+select datediff(YEAR, date'2022-02-25', date'2023-02-25')
+-- !query schema
+struct<timestampdiff(YEAR, DATE '2022-02-25', DATE '2023-02-25'):bigint>
+-- !query output
+1
diff --git a/sql/core/src/test/resources/sql-tests/results/date.sql.out b/sql/core/src/test/resources/sql-tests/results/date.sql.out
index e3a2d7d00f6f0..bd32361eaef06 100644
--- a/sql/core/src/test/resources/sql-tests/results/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/date.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 87
+-- Number of queries: 97
 
 
 -- !query
@@ -739,3 +739,83 @@ select date_add(YEAR, 1, date'2022-02-25')
 struct<timestampadd(YEAR, 1, DATE '2022-02-25'):timestamp>
 -- !query output
 2023-02-25 00:00:00
+
+
+-- !query
+select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001')
+-- !query schema
+struct<timestampdiff(MICROSECOND, TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP '2022-02-25 01:02:03.124001'):bigint>
+-- !query output
+1001
+
+
+-- !query
+select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455')
+-- !query schema
+struct<timestampdiff(MILLISECOND, TIMESTAMP '2022-02-25 01:02:03.456', TIMESTAMP '2022-02-25 01:02:03.455'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01')
+-- !query schema
+struct<timestampdiff(SECOND, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 01:03:01'):bigint>
+-- !query output
+58
+
+
+-- !query
+select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00')
+-- !query schema
+struct<timestampdiff(MINUTE, DATE '2022-02-25', TIMESTAMP '2022-02-24 22:20:00'):bigint>
+-- !query output
+-100
+
+
+-- !query
+select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03')
+-- !query schema
+struct<timestampdiff(HOUR, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 00:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00')
+-- !query schema
+struct<timestampdiff(DAY, DATE '2022-02-25', TIMESTAMP '2023-02-27 00:00:00'):bigint>
+-- !query output
+367
+
+
+-- !query
+select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03')
+-- !query schema
+struct<timestampdiff(WEEK, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-28 01:02:03'):bigint>
+-- !query output
+-4
+
+
+-- !query
+select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03')
+-- !query schema
+struct<timestampdiff(MONTH, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-25 01:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select date_diff(QUARTER, date'2022-02-25', date'2023-05-25')
+-- !query schema
+struct<timestampdiff(QUARTER, DATE '2022-02-25', DATE '2023-05-25'):bigint>
+-- !query output
+5
+
+
+-- !query
+select datediff(YEAR, date'2022-02-25', date'2023-02-25')
+-- !query schema
+struct<timestampdiff(YEAR, DATE '2022-02-25', DATE '2023-02-25'):bigint>
+-- !query output
+1
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index 60752e3fe20dc..8eeed14473fa5 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 184
+-- Number of queries: 194
 
 
 -- !query
@@ -738,6 +738,86 @@ struct<timestampadd(YEAR, 1, DATE '2022-02-25'):timestamp>
 2023-02-25 00:00:00
 
 
+-- !query
+select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001')
+-- !query schema
+struct<timestampdiff(MICROSECOND, TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP '2022-02-25 01:02:03.124001'):bigint>
+-- !query output
+1001
+
+
+-- !query
+select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455')
+-- !query schema
+struct<timestampdiff(MILLISECOND, TIMESTAMP '2022-02-25 01:02:03.456', TIMESTAMP '2022-02-25 01:02:03.455'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01')
+-- !query schema
+struct<timestampdiff(SECOND, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 01:03:01'):bigint>
+-- !query output
+58
+
+
+-- !query
+select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00')
+-- !query schema
+struct<timestampdiff(MINUTE, DATE '2022-02-25', TIMESTAMP '2022-02-24 22:20:00'):bigint>
+-- !query output
+-100
+
+
+-- !query
+select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03')
+-- !query schema
+struct<timestampdiff(HOUR, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 00:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00')
+-- !query schema
+struct<timestampdiff(DAY, DATE '2022-02-25', TIMESTAMP '2023-02-27 00:00:00'):bigint>
+-- !query output
+367
+
+
+-- !query
+select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03')
+-- !query schema
+struct<timestampdiff(WEEK, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-28 01:02:03'):bigint>
+-- !query output
+-4
+
+
+-- !query
+select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03')
+-- !query schema
+struct<timestampdiff(MONTH, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-25 01:02:03'):bigint>
+-- !query output
+-1
+
+
+-- !query
+select date_diff(QUARTER, date'2022-02-25', date'2023-05-25')
+-- !query schema
+struct<timestampdiff(QUARTER, DATE '2022-02-25', DATE '2023-05-25'):bigint>
+-- !query output
+5
+
+
+-- !query
+select datediff(YEAR, date'2022-02-25', date'2023-02-25')
+-- !query schema
+struct<timestampdiff(YEAR, DATE '2022-02-25', DATE '2023-02-25'):bigint>
+-- !query output
+1
+
+
 -- !query
 select timestamp '2019-01-01\t'
 -- !query schema

From 4d4c0444ac71f177711a5dab79b506f5048d9200 Mon Sep 17 00:00:00 2001
From: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
Date: Wed, 2 Mar 2022 10:02:13 -0800
Subject: [PATCH 380/513] [SPARK-38392][K8S][TESTS] Add `spark-` prefix to
 namespaces and `-driver` suffix to drivers during IT

### What changes were proposed in this pull request?

There are two small proposals:
1) prefix the name of the temporary k8s namespaces with `"spark-"` so that the output of `kubectl get ns" is more clear.
2) unify the name of the driver pod in non-test and IT tests to always use `-driver` as a suffix.

### Why are the changes needed?

At the moment the name of the temporary namespace is just UUID without the `-`s. When one reads the result of `kubectl get ns` it is a bit cryptic to see UUIDs.

The names of the driver pods in ITs are not telling me that they are Drivers.
In non-test (i.e. production) the driver pod names are suffixed with `-driver`. I propose the same for IT tests.
Executor pods always use `-exec-` in their pod names, both in non-test and ITs.

### Does this PR introduce _any_ user-facing change?

Yes! Developers who debug IT tests will see more clear names now.

### How was this patch tested?

Manually with `kubectl get ns --watch` and `kubectl get po --watch`.

Closes #35711 from martin-g/k8s-test-names-improvement.

Authored-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/deploy/k8s/integrationtest/KubernetesSuite.scala     | 3 ++-
 .../deploy/k8s/integrationtest/KubernetesTestComponents.scala  | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index 15ce4874b4035..9faf73fb869fd 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -184,7 +184,8 @@ class KubernetesSuite extends SparkFunSuite
 
   protected def setUpTest(): Unit = {
     appLocator = UUID.randomUUID().toString.replaceAll("-", "")
-    driverPodName = "spark-test-app-" + UUID.randomUUID().toString.replaceAll("-", "")
+    driverPodName = "spark-test-app-" +
+      UUID.randomUUID().toString.replaceAll("-", "") + "-driver"
     sparkAppConf = kubernetesTestComponents.newSparkAppConf()
       .set("spark.kubernetes.container.image", image)
       .set("spark.kubernetes.driver.pod.name", driverPodName)
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
index 411857f0229db..4fdb89eab6eb6 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
@@ -37,7 +37,8 @@ private[spark] class KubernetesTestComponents(defaultClient: DefaultKubernetesCl
 
   val namespaceOption = Option(System.getProperty(CONFIG_KEY_KUBE_NAMESPACE))
   val hasUserSpecifiedNamespace = namespaceOption.isDefined
-  val namespace = namespaceOption.getOrElse(UUID.randomUUID().toString.replaceAll("-", ""))
+  val namespace = namespaceOption.getOrElse("spark-" +
+    UUID.randomUUID().toString.replaceAll("-", ""))
   val serviceAccountName =
     Option(System.getProperty(CONFIG_KEY_KUBE_SVC_ACCOUNT))
       .getOrElse("default")

From ad5427ebe644fc01a9b4c19a48f902f584245edf Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@foxmail.com>
Date: Wed, 2 Mar 2022 11:51:06 -0800
Subject: [PATCH 381/513] [SPARK-36553][ML] KMeans avoid compute auxiliary
 statistics for large K

### What changes were proposed in this pull request?

SPARK-31007 introduce an auxiliary statistics to speed up computation in KMeasn.

However, it needs a array of size `k * (k + 1) / 2`, which may cause overflow or OOM when k is too large.

So we should skip this optimization in this case.

### Why are the changes needed?

avoid overflow or OOM when k is too large (like 50,000)

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
existing testsuites

Closes #35457 from zhengruifeng/kmean_k_limit.

Authored-by: Ruifeng Zheng <ruifengz@foxmail.com>
Signed-off-by: huaxingao <huaxin_gao@apple.com>
---
 .../mllib/clustering/DistanceMeasure.scala    | 23 +++++++++++++++++++
 .../spark/mllib/clustering/KMeans.scala       | 15 ++++++++----
 .../spark/mllib/clustering/KMeansModel.scala  | 11 +++++++--
 3 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala
index 9ac473aabecea..e4c29a789b52f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala
@@ -117,6 +117,24 @@ private[spark] abstract class DistanceMeasure extends Serializable {
     packedValues
   }
 
+  /**
+   * @param centers the clustering centers
+   * @param statistics optional statistics to accelerate the computation, which should not
+   *                   change the result.
+   * @param point given point
+   * @return the index of the closest center to the given point, as well as the cost.
+   */
+  def findClosest(
+      centers: Array[VectorWithNorm],
+      statistics: Option[Array[Double]],
+      point: VectorWithNorm): (Int, Double) = {
+    if (statistics.nonEmpty) {
+      findClosest(centers, statistics.get, point)
+    } else {
+      findClosest(centers, point)
+    }
+  }
+
   /**
    * @return the index of the closest center to the given point, as well as the cost.
    */
@@ -253,6 +271,11 @@ object DistanceMeasure {
       case _ => false
     }
   }
+
+  private[clustering] def shouldComputeStatistics(k: Int): Boolean = k < 1000
+
+  private[clustering] def shouldComputeStatisticsLocally(k: Int, numFeatures: Int): Boolean =
+    k.toLong * k * numFeatures < 1000000
 }
 
 private[spark] class EuclideanDistanceMeasure extends DistanceMeasure {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index 76e2928f12236..c140b1b9e0914 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -269,15 +269,22 @@ class KMeans private (
 
     instr.foreach(_.logNumFeatures(numFeatures))
 
-    val shouldDistributed = centers.length * centers.length * numFeatures.toLong > 1000000L
+    val shouldComputeStats =
+      DistanceMeasure.shouldComputeStatistics(centers.length)
+    val shouldComputeStatsLocally =
+      DistanceMeasure.shouldComputeStatisticsLocally(centers.length, numFeatures)
 
     // Execute iterations of Lloyd's algorithm until converged
     while (iteration < maxIterations && !converged) {
       val bcCenters = sc.broadcast(centers)
-      val stats = if (shouldDistributed) {
-        distanceMeasureInstance.computeStatisticsDistributedly(sc, bcCenters)
+      val stats = if (shouldComputeStats) {
+        if (shouldComputeStatsLocally) {
+          Some(distanceMeasureInstance.computeStatistics(centers))
+        } else {
+          Some(distanceMeasureInstance.computeStatisticsDistributedly(sc, bcCenters))
+        }
       } else {
-        distanceMeasureInstance.computeStatistics(centers)
+        None
       }
       val bcStats = sc.broadcast(stats)
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index a24493bb7a8f9..64b352157caf7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -50,9 +50,16 @@ class KMeansModel (@Since("1.0.0") val clusterCenters: Array[Vector],
 
   // TODO: computation of statistics may take seconds, so save it to KMeansModel in training
   @transient private lazy val statistics = if (clusterCenters == null) {
-    null
+    None
   } else {
-    distanceMeasureInstance.computeStatistics(clusterCentersWithNorm)
+    val k = clusterCenters.length
+    val numFeatures = clusterCenters.head.size
+    if (DistanceMeasure.shouldComputeStatistics(k) &&
+        DistanceMeasure.shouldComputeStatisticsLocally(k, numFeatures)) {
+      Some(distanceMeasureInstance.computeStatistics(clusterCentersWithNorm))
+    } else {
+      None
+    }
   }
 
   @Since("2.4.0")

From 829d7fb045e47f1ddd43f2645949ea8257ca330d Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Wed, 2 Mar 2022 11:57:38 -0800
Subject: [PATCH 382/513] [MINOR][SQL][DOCS] Add more examples to
 sql-ref-syntax-ddl-create-table-datasource

### What changes were proposed in this pull request?

Add more examples to sql-ref-syntax-ddl-create-table-datasource:
1. Create partitioned and bucketed table through CTAS.
2. Create bucketed table through CTAS and CTE

### Why are the changes needed?

Improve doc.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual test.

Closes #35712 from wangyum/sql-ref-syntax-ddl-create-table-datasource.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: huaxingao <huaxin_gao@apple.com>
---
 ...ql-ref-syntax-ddl-create-table-datasource.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/docs/sql-ref-syntax-ddl-create-table-datasource.md b/docs/sql-ref-syntax-ddl-create-table-datasource.md
index ba0516afbbfad..9fa5dcb533a33 100644
--- a/docs/sql-ref-syntax-ddl-create-table-datasource.md
+++ b/docs/sql-ref-syntax-ddl-create-table-datasource.md
@@ -132,6 +132,23 @@ CREATE TABLE student (id INT, name STRING, age INT)
     USING CSV
     PARTITIONED BY (age)
     CLUSTERED BY (Id) INTO 4 buckets;
+
+--Create partitioned and bucketed table through CTAS
+CREATE TABLE student_partition_bucket
+    USING parquet
+    PARTITIONED BY (age)
+    CLUSTERED BY (id) INTO 4 buckets
+    AS SELECT * FROM student;
+
+--Create bucketed table through CTAS and CTE
+CREATE TABLE student_bucket
+    USING parquet
+    CLUSTERED BY (id) INTO 4 buckets (
+    WITH tmpTable AS (
+        SELECT * FROM student WHERE id > 100
+    )
+    SELECT * FROM tmpTable
+);
 ```
 
 ### Related Statements

From 226bdec8d99c51a58018f0bd085a51f1907c1e1a Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Wed, 2 Mar 2022 12:02:27 -0800
Subject: [PATCH 383/513] 
 [SPARK-38269][CORE][SQL][SS][ML][MLLIB][MESOS][YARN][K8S][EXAMPLES] Clean up
 redundant type cast

### What changes were proposed in this pull request?
This pr aims to clean up redundant type cast in Spark code.

### Why are the changes needed?
Code simplification

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

- Pass GA
- Manually build a client, check `org.apache.spark.examples.DriverSubmissionTest` and `org.apache.spark.examples.mllib.LDAExample` passed

Closes #35592 from LuciferYang/redundant-cast.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: huaxingao <huaxin_gao@apple.com>
---
 .../org/apache/spark/util/kvstore/ArrayWrappers.java   |  2 +-
 .../java/org/apache/spark/unsafe/types/ByteArray.java  |  2 +-
 .../java/org/apache/spark/unsafe/types/UTF8String.java |  4 ++--
 .../java/org/apache/spark/io/ReadAheadInputStream.java |  2 +-
 .../spark/deploy/history/FsHistoryProvider.scala       |  6 +++---
 .../org/apache/spark/deploy/master/ui/MasterPage.scala |  8 ++++----
 .../scala/org/apache/spark/metrics/MetricsConfig.scala |  6 +++---
 .../apache/spark/rdd/ReliableRDDCheckpointData.scala   |  2 +-
 .../org/apache/spark/resource/ResourceProfile.scala    |  2 +-
 .../main/scala/org/apache/spark/ui/GraphUIData.scala   |  2 +-
 .../org/apache/spark/ui/storage/StoragePage.scala      |  2 +-
 .../scala/org/apache/spark/util/JsonProtocol.scala     |  2 +-
 .../scala/org/apache/spark/util/SizeEstimator.scala    |  4 ++--
 .../apache/spark/examples/DriverSubmissionTest.scala   |  2 +-
 .../org/apache/spark/examples/mllib/LDAExample.scala   |  2 +-
 .../spark/sql/kafka010/KafkaSourceProvider.scala       |  2 +-
 .../spark/ml/classification/LogisticRegression.scala   |  2 +-
 .../apache/spark/ml/classification/NaiveBayes.scala    |  2 +-
 .../main/scala/org/apache/spark/ml/fpm/FPGrowth.scala  |  2 +-
 .../ml/regression/GeneralizedLinearRegression.scala    |  2 +-
 .../org/apache/spark/ml/tree/impl/RandomForest.scala   |  4 ++--
 .../scala/org/apache/spark/ml/tree/treeModels.scala    |  8 ++++----
 .../org/apache/spark/mllib/clustering/LDAModel.scala   |  4 ++--
 .../spark/mllib/evaluation/MulticlassMetrics.scala     | 10 +++++-----
 .../mllib/linalg/distributed/IndexedRowMatrix.scala    |  4 ++--
 .../k8s/KubernetesClusterSchedulerBackend.scala        |  4 ++--
 .../mesos/MesosCoarseGrainedSchedulerBackend.scala     |  2 +-
 .../spark/deploy/yarn/ResourceRequestHelper.scala      |  4 ++--
 .../org/apache/spark/deploy/yarn/YarnRMClient.scala    |  2 +-
 .../sql/catalyst/rules/QueryExecutionMetering.scala    |  2 +-
 .../parquet/VectorizedParquetRecordReader.java         |  2 +-
 .../sql/execution/adaptive/AdaptiveSparkPlanExec.scala |  2 +-
 .../sql/execution/datasources/jdbc/JDBCOptions.scala   |  2 +-
 .../sql/execution/ui/StreamingQueryStatusStore.scala   |  2 +-
 .../spark/sql/streaming/StreamingQueryStatus.scala     |  2 +-
 .../streaming/ui/StreamingQueryStatusListener.scala    |  2 +-
 .../thriftserver/ui/HiveThriftServer2Listener.scala    |  4 ++--
 .../sql/hive/thriftserver/ui/ThriftServerPage.scala    |  2 +-
 .../org/apache/spark/tools/GenerateMIMAIgnore.scala    |  2 +-
 39 files changed, 61 insertions(+), 61 deletions(-)

diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/ArrayWrappers.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/ArrayWrappers.java
index 825355ed5d587..6f9487322bb57 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/ArrayWrappers.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/ArrayWrappers.java
@@ -200,7 +200,7 @@ public int hashCode() {
     public int compareTo(ComparableObjectArray other) {
       int len = Math.min(array.length, other.array.length);
       for (int i = 0; i < len; i++) {
-        int diff = ((Comparable<Object>) array[i]).compareTo((Comparable<Object>) other.array[i]);
+        int diff = ((Comparable<Object>) array[i]).compareTo(other.array[i]);
         if (diff != 0) {
           return diff;
         }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
index 4126cf5150fa9..aae47aa963201 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
@@ -178,7 +178,7 @@ private static void fillWithPattern(byte[] result, int firstPos, int beyondPos,
     for (int pos = firstPos; pos < beyondPos; pos += pad.length) {
       final int jMax = Math.min(pad.length, beyondPos - pos);
       for (int j = 0; j < jMax; ++j) {
-        result[pos + j] = (byte) pad[j];
+        result[pos + j] = pad[j];
       }
     }
   }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index 04c69f89b1e34..98c61cfd9bb9b 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -369,7 +369,7 @@ public UTF8String toUpperCase() {
         // fallback
         return toUpperCaseSlow();
       }
-      int upper = Character.toUpperCase((int) b);
+      int upper = Character.toUpperCase(b);
       if (upper > 127) {
         // fallback
         return toUpperCaseSlow();
@@ -399,7 +399,7 @@ public UTF8String toLowerCase() {
         // fallback
         return toLowerCaseSlow();
       }
-      int lower = Character.toLowerCase((int) b);
+      int lower = Character.toLowerCase(b);
       if (lower > 127) {
         // fallback
         return toLowerCaseSlow();
diff --git a/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java b/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java
index 2e18715b600e0..011fecb315639 100644
--- a/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java
+++ b/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java
@@ -302,7 +302,7 @@ public int available() throws IOException {
     stateChangeLock.lock();
     // Make sure we have no integer overflow.
     try {
-      return (int) Math.min((long) Integer.MAX_VALUE,
+      return (int) Math.min(Integer.MAX_VALUE,
           (long) activeBuffer.remaining() + readAheadBuffer.remaining());
     } finally {
       stateChangeLock.unlock();
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index faa7033a147d9..a2494eb52d0ab 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -144,8 +144,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     val dbPath = Files.createDirectories(new File(path, dir).toPath()).toFile()
     Utils.chmod700(dbPath)
 
-    val metadata = new FsHistoryProviderMetadata(CURRENT_LISTING_VERSION,
-      AppStatusStore.CURRENT_VERSION, logDir.toString())
+    val metadata = FsHistoryProviderMetadata(CURRENT_LISTING_VERSION,
+      AppStatusStore.CURRENT_VERSION, logDir)
 
     try {
       open(dbPath, metadata, conf)
@@ -414,7 +414,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     } else {
       Map()
     }
-    Map("Event log directory" -> logDir.toString) ++ safeMode
+    Map("Event log directory" -> logDir) ++ safeMode
   }
 
   override def start(): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
index 2b4d860b92804..a71eb33a2fe1d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
@@ -277,7 +277,7 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
         s"if (window.confirm('Are you sure you want to kill application ${app.id} ?')) " +
           "{ this.parentNode.submit(); return true; } else { return false; }"
       <form action="app/kill/" method="POST" style="display:inline">
-        <input type="hidden" name="id" value={app.id.toString}/>
+        <input type="hidden" name="id" value={app.id}/>
         <input type="hidden" name="terminate" value="true"/>
         <a href="#" onclick={confirm} class="kill-link">(kill)</a>
       </form>
@@ -328,7 +328,7 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
         s"if (window.confirm('Are you sure you want to kill driver ${driver.id} ?')) " +
           "{ this.parentNode.submit(); return true; } else { return false; }"
       <form action="driver/kill/" method="POST" style="display:inline">
-        <input type="hidden" name="id" value={driver.id.toString}/>
+        <input type="hidden" name="id" value={driver.id}/>
         <input type="hidden" name="terminate" value="true"/>
         <a href="#" onclick={confirm} class="kill-link">(kill)</a>
       </form>
@@ -339,10 +339,10 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
       <td>{driver.worker.map(w =>
         if (w.isAlive()) {
           <a href={UIUtils.makeHref(parent.master.reverseProxy, w.id, w.webUiAddress)}>
-            {w.id.toString}
+            {w.id}
           </a>
         } else {
-          w.id.toString
+          w.id
         }).getOrElse("None")}
       </td>
       <td>{driver.state}</td>
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
index bddd18adc683e..4b53aad6fc48b 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
@@ -107,9 +107,9 @@ private[spark] class MetricsConfig(conf: SparkConf) extends Logging {
   def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = {
     val subProperties = new mutable.HashMap[String, Properties]
     prop.asScala.foreach { kv =>
-      if (regex.findPrefixOf(kv._1.toString).isDefined) {
-        val regex(prefix, suffix) = kv._1.toString
-        subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2.toString)
+      if (regex.findPrefixOf(kv._1).isDefined) {
+        val regex(prefix, suffix) = kv._1
+        subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2)
       }
     }
     subProperties
diff --git a/core/src/main/scala/org/apache/spark/rdd/ReliableRDDCheckpointData.scala b/core/src/main/scala/org/apache/spark/rdd/ReliableRDDCheckpointData.scala
index 0a26b7b0678eb..0d1bc1425161e 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ReliableRDDCheckpointData.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ReliableRDDCheckpointData.scala
@@ -46,7 +46,7 @@ private[spark] class ReliableRDDCheckpointData[T: ClassTag](@transient private v
    */
   def getCheckpointDir: Option[String] = RDDCheckpointData.synchronized {
     if (isCheckpointed) {
-      Some(cpDir.toString)
+      Some(cpDir)
     } else {
       None
     }
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala b/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala
index 339870195044c..087897ff73097 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala
@@ -87,7 +87,7 @@ class ResourceProfile(
   }
 
   private[spark] def getPySparkMemory: Option[Long] = {
-    executorResources.get(ResourceProfile.PYSPARK_MEM).map(_.amount.toLong)
+    executorResources.get(ResourceProfile.PYSPARK_MEM).map(_.amount)
   }
 
   /*
diff --git a/core/src/main/scala/org/apache/spark/ui/GraphUIData.scala b/core/src/main/scala/org/apache/spark/ui/GraphUIData.scala
index 87ff677514461..ab8757ff9d1f2 100644
--- a/core/src/main/scala/org/apache/spark/ui/GraphUIData.scala
+++ b/core/src/main/scala/org/apache/spark/ui/GraphUIData.scala
@@ -102,7 +102,7 @@ private[spark] class GraphUIData(
     val jsForLabels = operationLabels.toSeq.sorted.mkString("[\"", "\",\"", "\"]")
 
     val (maxX, minX, maxY, minY) = if (values != null && values.length > 0) {
-      val xValues = values.map(_._1.toLong)
+      val xValues = values.map(_._1)
       val yValues = values.map(_._2.asScala.toSeq.map(_._2.toLong).sum)
       (xValues.max, xValues.min, yValues.max, yValues.min)
     } else {
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala b/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
index fb43af357f7b8..c1708c320c5d4 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
@@ -110,7 +110,7 @@ private[ui] class StoragePage(parent: SparkUITab, store: AppStatusStore) extends
       // Don't show the tables if there is no stream block
       Nil
     } else {
-      val sorted = blocks.groupBy(_.name).toSeq.sortBy(_._1.toString)
+      val sorted = blocks.groupBy(_.name).toSeq.sortBy(_._1)
 
       <div>
         <h4>Receiver Blocks</h4>
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index 4e68ee0ed83cd..f9b6ed37977cb 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -463,7 +463,7 @@ private[spark] object JsonProtocol {
       case ExecutorLostFailure(executorId, exitCausedByApp, reason) =>
         ("Executor ID" -> executorId) ~
         ("Exit Caused By App" -> exitCausedByApp) ~
-        ("Loss Reason" -> reason.map(_.toString))
+        ("Loss Reason" -> reason)
       case taskKilled: TaskKilled =>
         val accumUpdates = JArray(taskKilled.accumUpdates.map(accumulableInfoToJson).toList)
         ("Kill Reason" -> taskKilled.reason) ~
diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index 9ec93077d0a4c..55d13801d4abc 100644
--- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -262,7 +262,7 @@ object SizeEstimator extends Logging {
         val s2 = sampleArray(array, state, rand, drawn, length)
         val size = math.min(s1, s2)
         state.size += math.max(s1, s2) +
-          (size * ((length - ARRAY_SAMPLE_SIZE) / (ARRAY_SAMPLE_SIZE))).toLong
+          (size * ((length - ARRAY_SAMPLE_SIZE) / ARRAY_SAMPLE_SIZE))
       }
     }
   }
@@ -282,7 +282,7 @@ object SizeEstimator extends Logging {
       drawn.add(index)
       val obj = ScalaRunTime.array_apply(array, index).asInstanceOf[AnyRef]
       if (obj != null) {
-        size += SizeEstimator.estimate(obj, state.visited).toLong
+        size += SizeEstimator.estimate(obj, state.visited)
       }
     }
     size
diff --git a/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala b/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
index ed56108f4b624..94fc755e0ca0f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
@@ -41,7 +41,7 @@ object DriverSubmissionTest {
     env.asScala.filter { case (k, _) => k.contains("SPARK_TEST")}.foreach(println)
 
     println("System properties containing spark.test:")
-    properties.filter { case (k, _) => k.toString.contains("spark.test") }.foreach(println)
+    properties.filter { case (k, _) => k.contains("spark.test") }.foreach(println)
 
     for (i <- 1 until numSecondsToSleep) {
       println(s"Alive for $i out of $numSecondsToSleep seconds")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
index afd529c2c4c13..d80f54d18476f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
@@ -169,7 +169,7 @@ object LDAExample {
     // Print the topics, showing the top-weighted terms for each topic.
     val topicIndices = ldaModel.describeTopics(maxTermsPerTopic = 10)
     val topics = topicIndices.map { case (terms, termWeights) =>
-      terms.zip(termWeights).map { case (term, weight) => (vocabArray(term.toInt), weight) }
+      terms.zip(termWeights).map { case (term, weight) => (vocabArray(term), weight) }
     }
     println(s"${params.k} topics:")
     topics.zipWithIndex.foreach { case (topic, i) =>
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index 3747621b36089..de78992533b22 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -728,7 +728,7 @@ private[kafka010] object KafkaSourceProvider extends Logging {
     parameters
       .keySet
       .filter(_.toLowerCase(Locale.ROOT).startsWith("kafka."))
-      .map { k => k.drop(6).toString -> parameters(k) }
+      .map { k => k.drop(6) -> parameters(k) }
       .toMap
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 42f88d0dbd3e1..f18b8af1a7fa8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -1334,7 +1334,7 @@ object LogisticRegressionModel extends MLReadable[LogisticRegressionModel] {
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.format("parquet").load(dataPath)
 
-      val model = if (major.toInt < 2 || (major.toInt == 2 && minor.toInt == 0)) {
+      val model = if (major < 2 || (major == 2 && minor == 0)) {
         // 2.0 and before
         val Row(numClasses: Int, numFeatures: Int, intercept: Double, coefficients: Vector) =
           MLUtils.convertVectorColumnsToML(data, "coefficients")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
index fd19ec3976e8f..f947268c58515 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
@@ -597,7 +597,7 @@ object NaiveBayesModel extends MLReadable[NaiveBayesModel] {
       val data = sparkSession.read.parquet(dataPath)
       val vecConverted = MLUtils.convertVectorColumnsToML(data, "pi")
 
-      val model = if (major.toInt < 3) {
+      val model = if (major < 3) {
         val Row(pi: Vector, theta: Matrix) =
           MLUtils.convertMatrixColumnsToML(vecConverted, "theta")
             .select("pi", "theta")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
index 8aaa5efdf06c5..465ca6e3cd569 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
@@ -350,7 +350,7 @@ object FPGrowthModel extends MLReadable[FPGrowthModel] {
       implicit val format = DefaultFormats
       val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
       val (major, minor) = VersionUtils.majorMinorVersion(metadata.sparkVersion)
-      val numTrainingRecords = if (major.toInt < 2 || (major.toInt == 2 && minor.toInt < 4)) {
+      val numTrainingRecords = if (major < 2 || (major == 2 && minor < 4)) {
         // 2.3 and before don't store the count
         0L
       } else {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index f7dfda81d4e6f..73da2af29ef3a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -1576,7 +1576,7 @@ class GeneralizedLinearRegressionTrainingSummary private[regression] (
 
       data.foreach { case strRow: Array[String] =>
         strRow.zipWithIndex.map { case (cell: String, i: Int) =>
-          StringUtils.leftPad(cell.toString, colWidths(i))
+          StringUtils.leftPad(cell, colWidths(i))
         }.addString(sb, "", " ", "\n")
       }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
index b6bc7aaeed628..de6c935cdd157 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
@@ -789,8 +789,8 @@ private[spark] object RandomForest extends Logging with Serializable {
     val leftImpurity = leftImpurityCalculator.calculate() // Note: This equals 0 if count = 0
     val rightImpurity = rightImpurityCalculator.calculate()
 
-    val leftWeight = leftCount / totalCount.toDouble
-    val rightWeight = rightCount / totalCount.toDouble
+    val leftWeight = leftCount / totalCount
+    val rightWeight = rightCount / totalCount
 
     val gain = impurity - leftWeight * leftImpurity - rightWeight * rightImpurity
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
index 4858189dea825..cc917db98b328 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
@@ -413,8 +413,8 @@ private[ml] object DecisionTreeModelReadWrite {
 
     val dataPath = new Path(path, "data").toString
     var df = sparkSession.read.parquet(dataPath)
-    val (major, minor) = VersionUtils.majorMinorVersion(metadata.sparkVersion)
-    if (major.toInt < 3) {
+    val (major, _) = VersionUtils.majorMinorVersion(metadata.sparkVersion)
+    if (major < 3) {
       df = df.withColumn("rawCount", lit(-1L))
     }
 
@@ -530,8 +530,8 @@ private[ml] object EnsembleModelReadWrite {
 
     val dataPath = new Path(path, "data").toString
     var df = sparkSession.read.parquet(dataPath)
-    val (major, minor) = VersionUtils.majorMinorVersion(metadata.sparkVersion)
-    if (major.toInt < 3) {
+    val (major, _) = VersionUtils.majorMinorVersion(metadata.sparkVersion)
+    if (major < 3) {
       val newNodeDataCol = df.schema("nodeData").dataType match {
         case StructType(fields) =>
           val cols = fields.map(f => col(s"nodeData.${f.name}")) :+ lit(-1L).as("rawCount")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index ec952520eb9c0..aa8b6a00a427f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -766,7 +766,7 @@ class DistributedLDAModel private[clustering] (
   @Since("1.3.0")
   def topicDistributions: RDD[(Long, Vector)] = {
     graph.vertices.filter(LDA.isDocumentVertex).map { case (docID, topicCounts) =>
-      (docID.toLong, Vectors.fromBreeze(normalize(topicCounts, 1.0)))
+      (docID, Vectors.fromBreeze(normalize(topicCounts, 1.0)))
     }
   }
 
@@ -792,7 +792,7 @@ class DistributedLDAModel private[clustering] (
       } else {
         topicCounts(topIndices).toArray
       }
-      (docID.toLong, topIndices.toArray, weights)
+      (docID, topIndices.toArray, weights)
     }
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
index 1a91801a9da28..92abe3daeebe0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
@@ -149,7 +149,7 @@ class MulticlassMetrics @Since("1.1.0") (predictionAndLabels: RDD[_ <: Product])
   def precision(label: Double): Double = {
     val tp = tpByClass(label)
     val fp = fpByClass.getOrElse(label, 0.0)
-    if (tp + fp == 0) 0 else tp.toDouble / (tp + fp)
+    if (tp + fp == 0) 0 else tp / (tp + fp)
   }
 
   /**
@@ -199,7 +199,7 @@ class MulticlassMetrics @Since("1.1.0") (predictionAndLabels: RDD[_ <: Product])
    */
   @Since("1.1.0")
   lazy val weightedFalsePositiveRate: Double = labelCountByClass.map { case (category, count) =>
-    falsePositiveRate(category) * count.toDouble / labelCount
+    falsePositiveRate(category) * count / labelCount
   }.sum
 
   /**
@@ -208,7 +208,7 @@ class MulticlassMetrics @Since("1.1.0") (predictionAndLabels: RDD[_ <: Product])
    */
   @Since("1.1.0")
   lazy val weightedRecall: Double = labelCountByClass.map { case (category, count) =>
-    recall(category) * count.toDouble / labelCount
+    recall(category) * count / labelCount
   }.sum
 
   /**
@@ -216,7 +216,7 @@ class MulticlassMetrics @Since("1.1.0") (predictionAndLabels: RDD[_ <: Product])
    */
   @Since("1.1.0")
   lazy val weightedPrecision: Double = labelCountByClass.map { case (category, count) =>
-    precision(category) * count.toDouble / labelCount
+    precision(category) * count / labelCount
   }.sum
 
   /**
@@ -225,7 +225,7 @@ class MulticlassMetrics @Since("1.1.0") (predictionAndLabels: RDD[_ <: Product])
    */
   @Since("1.1.0")
   def weightedFMeasure(beta: Double): Double = labelCountByClass.map { case (category, count) =>
-    fMeasure(category, beta) * count.toDouble / labelCount
+    fMeasure(category, beta) * count / labelCount
   }.sum
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index da5d1650694d6..f92ac0789c952 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -133,11 +133,11 @@ class IndexedRowMatrix @Since("1.0.0") (
       val rowInBlock = ir.index % rowsPerBlock
 
       ir.vector match {
-        case SparseVector(size, indices, values) =>
+        case SparseVector(_, indices, values) =>
           indices.zip(values).map { case (index, value) =>
             val blockColumn = index / colsPerBlock
             val columnInBlock = index % colsPerBlock
-            ((blockRow.toInt, blockColumn.toInt), (rowInBlock.toInt, Array((value, columnInBlock))))
+            ((blockRow.toInt, blockColumn), (rowInBlock.toInt, Array((value, columnInBlock))))
           }
         case DenseVector(values) =>
           values.grouped(colsPerBlock)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
index 1bda9cc67d247..43c6597362e41 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -96,7 +96,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
    * @return The application ID
    */
   override def applicationId(): String = {
-    conf.getOption("spark.app.id").map(_.toString).getOrElse(appId)
+    conf.getOption("spark.app.id").getOrElse(appId)
   }
 
   override def start(): Unit = {
@@ -302,7 +302,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
             kubernetesClient.pods()
               .withName(x.podName)
               .edit({p: Pod => new PodBuilder(p).editMetadata()
-                .addToLabels(SPARK_EXECUTOR_ID_LABEL, newId.toString)
+                .addToLabels(SPARK_EXECUTOR_ID_LABEL, newId)
                 .endMetadata()
                 .build()})
           }
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index b7b652d83ffe2..e5a6a5f1ef166 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -525,7 +525,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
             partitionTaskResources(resources, taskCPUs, taskMemory, taskGPUs)
 
           val taskBuilder = MesosTaskInfo.newBuilder()
-            .setTaskId(TaskID.newBuilder().setValue(taskId.toString).build())
+            .setTaskId(TaskID.newBuilder().setValue(taskId).build())
             .setSlaveId(offer.getSlaveId)
             .setCommand(createCommand(offer, taskCPUs + extraCoresPerExecutor, taskId))
             .setName(s"${sc.appName} $taskId")
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala
index 50e822510fd3d..5a5334dc76321 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala
@@ -51,14 +51,14 @@ private object ResourceRequestHelper extends Logging {
       if (splitIndex == -1) {
         val errorMessage = s"Missing suffix for ${componentName}${key}, you must specify" +
           s" a suffix - $AMOUNT is currently the only supported suffix."
-        throw new IllegalArgumentException(errorMessage.toString())
+        throw new IllegalArgumentException(errorMessage)
       }
       val resourceName = key.substring(0, splitIndex)
       val resourceSuffix = key.substring(splitIndex + 1)
       if (!AMOUNT.equals(resourceSuffix)) {
         val errorMessage = s"Unsupported suffix: $resourceSuffix in: ${componentName}${key}, " +
           s"only .$AMOUNT is supported."
-        throw new IllegalArgumentException(errorMessage.toString())
+        throw new IllegalArgumentException(errorMessage)
       }
       (resourceName, value)
     }.toMap
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
index 2f272be60ba25..842611807db4d 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
@@ -124,7 +124,7 @@ private[spark] class YarnRMClient extends Logging {
 
   /** Returns the maximum number of attempts to register the AM. */
   def getMaxRegAttempts(sparkConf: SparkConf, yarnConf: YarnConfiguration): Int = {
-    val sparkMaxAttempts = sparkConf.get(MAX_APP_ATTEMPTS).map(_.toInt)
+    val sparkMaxAttempts = sparkConf.get(MAX_APP_ATTEMPTS)
     val yarnMaxAttempts = yarnConf.getInt(
       YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS)
     sparkMaxAttempts match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/QueryExecutionMetering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/QueryExecutionMetering.scala
index 8efc3593d72f5..b5a5e239b68ba 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/QueryExecutionMetering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/QueryExecutionMetering.scala
@@ -79,7 +79,7 @@ case class QueryExecutionMetering() {
     val maxLengthRuleNames = if (map.isEmpty) {
       0
     } else {
-      map.keys.map(_.toString.length).max
+      map.keys.map(_.length).max
     }
 
     val colRuleName = "Rule".padTo(maxLengthRuleNames, " ").mkString
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
index 50056bf4073e9..0e976be2f652e 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
@@ -305,7 +305,7 @@ public boolean nextBatch() throws IOException {
     if (rowsReturned >= totalRowCount) return false;
     checkEndOfRowGroup();
 
-    int num = (int) Math.min((long) capacity, totalCountLoadedSoFar - rowsReturned);
+    int num = (int) Math.min(capacity, totalCountLoadedSoFar - rowsReturned);
     for (int i = 0; i < columnReaders.length; ++i) {
       if (columnReaders[i] == null) continue;
       columnReaders[i].readBatch(num, columnVectors[i]);
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index 8b31f1738d237..3ec5aadabfaf4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -699,7 +699,7 @@ case class AdaptiveSparkPlanExec(
         p.flatMap(_.metrics.values.map(m => SQLPlanMetric(m.name.get, m.id, m.metricType)))
       }
       context.session.sparkContext.listenerBus.post(SparkListenerSQLAdaptiveSQLMetricUpdates(
-        executionId.toLong, newMetrics))
+        executionId, newMetrics))
     } else {
       val planDescriptionMode = ExplainMode.fromString(conf.uiExplainMode)
       context.session.sparkContext.listenerBus.post(SparkListenerSQLAdaptiveExecutionUpdate(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
index d081e0ace0e44..ad44048ce9c6f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
@@ -216,7 +216,7 @@ class JDBCOptions(
   // The principal name of user's keytab file
   val principal = parameters.getOrElse(JDBC_PRINCIPAL, null)
 
-  val tableComment = parameters.getOrElse(JDBC_TABLE_COMMENT, "").toString
+  val tableComment = parameters.getOrElse(JDBC_TABLE_COMMENT, "")
 
   val refreshKrb5Config = parameters.getOrElse(JDBC_REFRESH_KRB5_CONFIG, "false").toBoolean
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryStatusStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryStatusStore.scala
index 9eb14a6a63063..6a3b4eeb67275 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryStatusStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryStatusStore.scala
@@ -43,7 +43,7 @@ class StreamingQueryStatusStore(store: KVStore) {
   }
 
   private def makeUIData(summary: StreamingQueryData): StreamingQueryUIData = {
-    val runId = summary.runId.toString
+    val runId = summary.runId
     val view = store.view(classOf[StreamingQueryProgressWrapper])
       .index("runId").first(runId).last(runId)
     val recentProgress = KVUtils.viewToSeq(view, Int.MaxValue)(_ => true)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
index 6ca9aacab7247..fe187917ec021 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
@@ -61,7 +61,7 @@ class StreamingQueryStatus protected[sql](
   }
 
   private[sql] def jsonValue: JValue = {
-    ("message" -> JString(message.toString)) ~
+    ("message" -> JString(message)) ~
     ("isDataAvailable" -> JBool(isDataAvailable)) ~
     ("isTriggerActive" -> JBool(isTriggerActive))
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala
index b59ec0477d5d4..55ceab245a968 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala
@@ -64,7 +64,7 @@ private[sql] class StreamingQueryStatusListener(
       .take(numInactiveQueries - inactiveQueryStatusRetention)
     val runIds = toDelete.map { e =>
       store.delete(e.getClass, e.runId)
-      e.runId.toString
+      e.runId
     }
     // Delete wrappers in one pass, as deleting them for each summary is slow
     store.removeAllByIndexValues(classOf[StreamingQueryProgressWrapper], "runId", runIds)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala
index 4cf672e3d9d9e..7b2da6970fb86 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala
@@ -93,7 +93,7 @@ private[thriftserver] class HiveThriftServer2Listener(
     val execList = executionList.values().asScala.filter(_.groupId == groupId).toSeq
     if (execList.nonEmpty) {
       execList.foreach { exec =>
-        exec.jobId += jobId.toString
+        exec.jobId += jobId
         updateLiveStore(exec)
       }
     } else {
@@ -105,7 +105,7 @@ private[thriftserver] class HiveThriftServer2Listener(
       storeExecInfo.foreach { exec =>
         val liveExec = getOrCreateExecution(exec.execId, exec.statement, exec.sessionId,
           exec.startTimestamp, exec.userName)
-        liveExec.jobId += jobId.toString
+        liveExec.jobId += jobId
         updateStoreWithTriggerEnabled(liveExec)
         executionList.remove(liveExec.execId)
       }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
index 54a40e3990f09..d0378efd646e3 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
@@ -232,7 +232,7 @@ private[ui] class SqlStatsPagedTable(
 
     def jobLinks(jobData: Seq[String]): Seq[Node] = {
       jobData.map { jobId =>
-        <a href={jobURL(request, jobId)}>[{jobId.toString}]</a>
+        <a href={jobURL(request, jobId)}>[{jobId}]</a>
       }
     }
 
diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
index a6fee8616df11..ef28095850bad 100644
--- a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
+++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
@@ -69,7 +69,7 @@ object GenerateMIMAIgnore {
         /* Inner classes defined within a private[spark] class or object are effectively
          invisible, so we account for them as package private. */
         lazy val indirectlyPrivateSpark = {
-          val maybeOuter = className.toString.takeWhile(_ != '$')
+          val maybeOuter = className.takeWhile(_ != '$')
           if (maybeOuter != className) {
             isPackagePrivate(mirror.classSymbol(Class.forName(maybeOuter, false, classLoader))) ||
               isPackagePrivateModule(mirror.staticModule(maybeOuter))

From 23db9b440ba70f4edf1f4a604f4829e1831ea502 Mon Sep 17 00:00:00 2001
From: weixiuli <weixiuli@jd.com>
Date: Wed, 2 Mar 2022 18:00:56 -0600
Subject: [PATCH 384/513] [SPARK-38191][CORE][FOLLOWUP] The staging directory
 of write job only needs to be initialized once in
 HadoopMapReduceCommitProtocol

### What changes were proposed in this pull request?

This pr follows up the https://github.com/apache/spark/pull/35492, try to use a stagingDir constant instead of the  stagingDir method in HadoopMapReduceCommitProtocol.

### Why are the changes needed?

In the https://github.com/apache/spark/pull/35492#issuecomment-1054910730

```
./build/sbt -mem 4096 -Phadoop-2 "sql/testOnly org.apache.spark.sql.sources.PartitionedWriteSuite -- -z SPARK-27194"
...
[info]   Cause: org.apache.spark.SparkException: Task not serializable
...
[info]   Cause: java.io.NotSerializableException: org.apache.hadoop.fs.Path
...

```
It's because org.apache.hadoop.fs.Path is serializable in Hadoop3 but not in Hadoop2.  So, we should make the stagingDir  transient to avoid that.

### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?

Passed `./build/sbt -mem 4096 -Phadoop-2 "sql/testOnly org.apache.spark.sql.sources.PartitionedWriteSuite -- -z SPARK-27194"`

Pass the CIs.

Closes #35693 from weixiuli/staging-directory.

Authored-by: weixiuli <weixiuli@jd.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../spark/internal/io/HadoopMapReduceCommitProtocol.scala       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
index a39e9abd9bdc4..3a24da98ecc24 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
@@ -104,7 +104,7 @@ class HadoopMapReduceCommitProtocol(
    * The staging directory of this write job. Spark uses it to deal with files with absolute output
    * path, or writing data into partitioned directory with dynamicPartitionOverwrite=true.
    */
-  protected def stagingDir = getStagingDir(path, jobId)
+  @transient protected lazy val stagingDir = getStagingDir(path, jobId)
 
   protected def setupCommitter(context: TaskAttemptContext): OutputCommitter = {
     val format = context.getOutputFormatClass.getConstructor().newInstance()

From 86e0903bfe8cbe5d451471f902d9727ffc16a5ab Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 3 Mar 2022 00:41:17 -0800
Subject: [PATCH 385/513] [SPARK-38398][K8S][TESTS] Add `priorityClassName`
 integration test case

### What changes were proposed in this pull request?

Apache Spark has been supporting many K8s features via `spark.kubernetes.driver.podTemplateFile` and `spark.kubernetes.executor.podTemplateFile` in an extensible way. This PR aims to add an integration test case for `priorityClassName` pod spec.

In this test case, we use one of the K8s built-in priority classes because we want to run this test on heterogenous K8s environments. In addition, `schedule` test tag is added for some esoteric K8s environments without `system-node-critical` priority class or `system-node-critical` with different values.
- https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/#marking-pod-as-critical
```
$ k get priorityclass
NAME                      VALUE        GLOBAL-DEFAULT   AGE
system-cluster-critical   2000000000   false            4h19m
system-node-critical      2000001000   false            4h19m
```

### Why are the changes needed?

We don't need to enumerate all K8s spec via `spark.kubernetes.xxx` configurations. PodTemplate can do many things.
This example will help the future works for customer schedulers.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the K8s IT. This is tested like the following.

```
$ build/sbt -Psparkr -Pkubernetes -Pkubernetes-integration-tests -Dtest.exclude.tags=minikube -Dspark.kubernetes.test.deployMode=docker-for-desktop "kubernetes-integration-tests/test"
...
[info] KubernetesSuite:
[info] - Run SparkPi with no resources (8 seconds, 866 milliseconds)
[info] - Run SparkPi with no resources & statefulset allocation (10 seconds, 700 milliseconds)
[info] - Run SparkPi with a very long application name. (8 seconds, 634 milliseconds)
[info] - Use SparkLauncher.NO_RESOURCE (8 seconds, 628 milliseconds)
[info] - Run SparkPi with a master URL without a scheme. (8 seconds, 626 milliseconds)
[info] - Run SparkPi with an argument. (8 seconds, 821 milliseconds)
[info] - Run SparkPi with custom labels, annotations, and environment variables. (9 seconds, 675 milliseconds)
[info] - All pods have the same service account by default (8 seconds, 692 milliseconds)
[info] - Run extraJVMOptions check on driver (4 seconds, 599 milliseconds)
[info] - Run SparkRemoteFileTest using a remote data file (8 seconds, 767 milliseconds)
[info] - Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j2.properties (14 seconds, 140 milliseconds)
[info] - Run SparkPi with env and mount secrets. (19 seconds, 62 milliseconds)
[info] - Run PySpark on simple pi.py example (9 seconds, 821 milliseconds)
[info] - Run PySpark to test a pyfiles example (11 seconds, 713 milliseconds)
[info] - Run PySpark with memory customization (9 seconds, 630 milliseconds)
[info] - Run in client mode. (7 seconds, 289 milliseconds)
[info] - Start pod creation from template (8 seconds, 720 milliseconds)
[info] - SPARK-38398: Schedule pod creation from template (8 seconds, 728 milliseconds)
...
```

Closes #35716 from dongjoon-hyun/SPARK-38398.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../resources/driver-schedule-template.yml    | 27 ++++++++++++++++++
 .../k8s/integrationtest/KubernetesSuite.scala |  1 +
 .../integrationtest/PodTemplateSuite.scala    | 28 ++++++++++++++++++-
 3 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/driver-schedule-template.yml

diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/driver-schedule-template.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/driver-schedule-template.yml
new file mode 100644
index 0000000000000..22eaa6c13a85d
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/driver-schedule-template.yml
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: v1
+Kind: Pod
+metadata:
+  labels:
+    template-label-key: driver-template-label-value
+spec:
+  priorityClassName: system-node-critical
+  containers:
+  - name: test-driver-container
+    image: will-be-overwritten
+
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index 9faf73fb869fd..685149f09d72f 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -611,6 +611,7 @@ class KubernetesSuite extends SparkFunSuite
 private[spark] object KubernetesSuite {
   val k8sTestTag = Tag("k8s")
   val localTestTag = Tag("local")
+  val schedulingTestTag = Tag("schedule")
   val rTestTag = Tag("r")
   val MinikubeTag = Tag("minikube")
   val SPARK_PI_MAIN_CLASS: String = "org.apache.spark.examples.SparkPi"
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PodTemplateSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PodTemplateSuite.scala
index e5a847e7210cb..2cd3bb4c4a7d9 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PodTemplateSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PodTemplateSuite.scala
@@ -20,7 +20,7 @@ import java.io.File
 
 import io.fabric8.kubernetes.api.model.Pod
 
-import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.k8sTestTag
+import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{k8sTestTag, schedulingTestTag}
 
 private[spark] trait PodTemplateSuite { k8sSuite: KubernetesSuite =>
 
@@ -46,10 +46,36 @@ private[spark] trait PodTemplateSuite { k8sSuite: KubernetesSuite =>
       }
     )
   }
+
+  test("SPARK-38398: Schedule pod creation from template", k8sTestTag, schedulingTestTag) {
+    sparkAppConf
+      .set("spark.kubernetes.driver.podTemplateFile",
+        DRIVER_SCHEDULE_TEMPLATE_FILE.getAbsolutePath)
+      .set("spark.kubernetes.executor.podTemplateFile", EXECUTOR_TEMPLATE_FILE.getAbsolutePath)
+    runSparkPiAndVerifyCompletion(
+      driverPodChecker = (driverPod: Pod) => {
+        assert(driverPod.getMetadata.getName === driverPodName)
+        assert(driverPod.getSpec.getContainers.get(0).getImage === image)
+        assert(driverPod.getSpec.getContainers.get(0).getName === "test-driver-container")
+        assert(driverPod.getMetadata.getLabels.containsKey(LABEL_KEY))
+        assert(driverPod.getMetadata.getLabels.get(LABEL_KEY) === "driver-template-label-value")
+        assert(driverPod.getSpec.getPriority() === 2000001000)
+      },
+      executorPodChecker = (executorPod: Pod) => {
+        assert(executorPod.getSpec.getContainers.get(0).getImage === image)
+        assert(executorPod.getSpec.getContainers.get(0).getName === "test-executor-container")
+        assert(executorPod.getMetadata.getLabels.containsKey(LABEL_KEY))
+        assert(executorPod.getMetadata.getLabels.get(LABEL_KEY) === "executor-template-label-value")
+        assert(executorPod.getSpec.getPriority() === 0) // When there is no default, 0 is used.
+      }
+    )
+  }
 }
 
 private[spark] object PodTemplateSuite {
   val LABEL_KEY = "template-label-key"
   val DRIVER_TEMPLATE_FILE = new File(getClass.getResource("/driver-template.yml").getFile)
+  val DRIVER_SCHEDULE_TEMPLATE_FILE =
+    new File(getClass.getResource("/driver-schedule-template.yml").getFile)
   val EXECUTOR_TEMPLATE_FILE = new File(getClass.getResource("/executor-template.yml").getFile)
 }

From dfff8d8cd0e747a261bf74374a5797e7c2acaebb Mon Sep 17 00:00:00 2001
From: Yihong He <yihong.he@databricks.com>
Date: Thu, 3 Mar 2022 20:51:08 +0900
Subject: [PATCH 386/513] [SPARK-38353][PYTHON] Instrument __enter__ and
 __exit__ magic methods for Pandas API on Spark

### What changes were proposed in this pull request?

- Add magic method \_\_enter\_\_ and \_\_exit\_\_ into **the special_function list**

### Why are the changes needed?

- Improve the usage data accuracy for **with statement** so that external \_\_enter\_\_ and \_\_exit\_\_ calls are captured instead of internal calls

For example, for the code below:

```python
pdf = pd.DataFrame(
    [(0.2, 0.3), (0.0, 0.6), (0.6, 0.0), (0.2, 0.1)], columns=["dogs", "cats"]
)
psdf = ps.from_pandas(pdf)

with psdf.spark.cache() as cached_df:
    self.assert_eq(isinstance(cached_df, CachedDataFrame), True)
    self.assert_eq(
        repr(cached_df.spark.storage_level), repr(StorageLevel(True, True, False, True))
    )
 ```

Pandas-on-Spark usage logger records the internal call [self.spark.unpersist()](https://github.com/apache/spark/blob/master/python/pyspark/pandas/frame.py#L12518) since \_\_enter\_\_ and \_\_exit\_\_ methods of [CachedDataFrame](https://github.com/apache/spark/blob/master/python/pyspark/pandas/frame.py#L12492) are not instrumented.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing unit tests

Closes #35687 from heyihong/SPARK-38353.

Authored-by: Yihong He <yihong.he@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/pandas/usage_logging/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/pyspark/pandas/usage_logging/__init__.py b/python/pyspark/pandas/usage_logging/__init__.py
index b350faf6b9ca5..10fe616264fb6 100644
--- a/python/pyspark/pandas/usage_logging/__init__.py
+++ b/python/pyspark/pandas/usage_logging/__init__.py
@@ -135,6 +135,8 @@ def attach(logger_module: Union[str, ModuleType]) -> None:
             "__getitem__",
             "__setitem__",
             "__getattr__",
+            "__enter__",
+            "__exit__",
         ]
     )
 

From b71d6d07bb84300b343b9a487049d2566d26a5e9 Mon Sep 17 00:00:00 2001
From: Zhen Li <zhen.li@databricks.com>
Date: Thu, 3 Mar 2022 21:41:32 +0800
Subject: [PATCH 387/513] [SPARK-38378][SQL] Refactoring of the ANTLR grammar
 definition into separate Parser and Lexer files

### What changes were proposed in this pull request?

Separating the mixed parser grammar defined in `SqlBase.g4` into separate parser and lexer grammars.
* The parser grammar disallows any literal definitions. Thus all literals are replaced with names defined in the new lexer.
* The lexer and parser has to be provided in two files. As ANTLR only allows one grammar per file.

### Why are the changes needed?

This gives us a cleaner separation of parser and lexer in the original grammar.
It also enables us to use the full power of ANTLR parser and lexer grammars:
* Access to lexer specific rules: lexer specific rules (e.g. [Lexer mode](https://github.com/antlr/antlr4/blob/master/doc/lexer-rules.md#lexical-modes)) can be used for new SQL features.
* Ability to reuse lexer rules: we can now use inheritance to have multiple lexers sharing a common set of lexical rules.
* Clear order of tokens: the order the tokens are tokenized by the lexer in the order they appear in the lexer.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests. The refactoring should not break any tests.

Closes #35701 from zhenlineo/parser-lexer.

Authored-by: Zhen Li <zhen.li@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/SqlBaseLexer.g4 | 483 ++++++++++++
 .../parser/{SqlBase.g4 => SqlBaseParser.g4}   | 687 +++---------------
 .../sql/catalyst/parser/AstBuilder.scala      |   2 +-
 .../sql/catalyst/parser/ParseDriver.scala     |   4 +-
 .../spark/sql/catalyst/SQLKeywordSuite.scala  |  17 +-
 5 files changed, 616 insertions(+), 577 deletions(-)
 create mode 100644 sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
 rename sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/{SqlBase.g4 => SqlBaseParser.g4} (69%)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
new file mode 100644
index 0000000000000..18bb19e9ccf13
--- /dev/null
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
@@ -0,0 +1,483 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This file is an adaptation of Presto's presto-parser/src/main/antlr4/com/facebook/presto/sql/parser/SqlBase.g4 grammar.
+ */
+
+lexer grammar SqlBaseLexer;
+
+@members {
+  /**
+   * When true, parser should throw ParseExcetion for unclosed bracketed comment.
+   */
+  public boolean has_unclosed_bracketed_comment = false;
+
+  /**
+   * Verify whether current token is a valid decimal token (which contains dot).
+   * Returns true if the character that follows the token is not a digit or letter or underscore.
+   *
+   * For example:
+   * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
+   * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
+   * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
+   * For char stream "12.0D 34.E2+0.12 "  12.0D is a valid decimal token because it is followed
+   * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
+   * which is not a digit or letter or underscore.
+   */
+  public boolean isValidDecimal() {
+    int nextChar = _input.LA(1);
+    if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' ||
+      nextChar == '_') {
+      return false;
+    } else {
+      return true;
+    }
+  }
+
+  /**
+   * This method will be called when we see '/*' and try to match it as a bracketed comment.
+   * If the next character is '+', it should be parsed as hint later, and we cannot match
+   * it as a bracketed comment.
+   *
+   * Returns true if the next character is '+'.
+   */
+  public boolean isHint() {
+    int nextChar = _input.LA(1);
+    if (nextChar == '+') {
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  /**
+   * This method will be called when the character stream ends and try to find out the
+   * unclosed bracketed comment.
+   * If the method be called, it means the end of the entire character stream match,
+   * and we set the flag and fail later.
+   */
+  public void markUnclosedComment() {
+    has_unclosed_bracketed_comment = true;
+  }
+}
+
+SEMICOLON: ';';
+
+LEFT_PAREN: '(';
+RIGHT_PAREN: ')';
+COMMA: ',';
+DOT: '.';
+LEFT_BRACKET: '[';
+RIGHT_BRACKET: ']';
+
+// NOTE: If you add a new token in the list below, you should update the list of keywords
+// and reserved tag in `docs/sql-ref-ansi-compliance.md#sql-keywords`.
+
+//============================
+// Start of the keywords list
+//============================
+//--SPARK-KEYWORD-LIST-START
+ADD: 'ADD';
+AFTER: 'AFTER';
+ALL: 'ALL';
+ALTER: 'ALTER';
+ANALYZE: 'ANALYZE';
+AND: 'AND';
+ANTI: 'ANTI';
+ANY: 'ANY';
+ARCHIVE: 'ARCHIVE';
+ARRAY: 'ARRAY';
+AS: 'AS';
+ASC: 'ASC';
+AT: 'AT';
+AUTHORIZATION: 'AUTHORIZATION';
+BETWEEN: 'BETWEEN';
+BOTH: 'BOTH';
+BUCKET: 'BUCKET';
+BUCKETS: 'BUCKETS';
+BY: 'BY';
+CACHE: 'CACHE';
+CASCADE: 'CASCADE';
+CASE: 'CASE';
+CAST: 'CAST';
+CATALOG: 'CATALOG';
+CATALOGS: 'CATALOGS';
+CHANGE: 'CHANGE';
+CHECK: 'CHECK';
+CLEAR: 'CLEAR';
+CLUSTER: 'CLUSTER';
+CLUSTERED: 'CLUSTERED';
+CODEGEN: 'CODEGEN';
+COLLATE: 'COLLATE';
+COLLECTION: 'COLLECTION';
+COLUMN: 'COLUMN';
+COLUMNS: 'COLUMNS';
+COMMENT: 'COMMENT';
+COMMIT: 'COMMIT';
+COMPACT: 'COMPACT';
+COMPACTIONS: 'COMPACTIONS';
+COMPUTE: 'COMPUTE';
+CONCATENATE: 'CONCATENATE';
+CONSTRAINT: 'CONSTRAINT';
+COST: 'COST';
+CREATE: 'CREATE';
+CROSS: 'CROSS';
+CUBE: 'CUBE';
+CURRENT: 'CURRENT';
+CURRENT_DATE: 'CURRENT_DATE';
+CURRENT_TIME: 'CURRENT_TIME';
+CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP';
+CURRENT_USER: 'CURRENT_USER';
+DAY: 'DAY';
+DATA: 'DATA';
+DATABASE: 'DATABASE';
+DATABASES: 'DATABASES';
+DATEADD: 'DATEADD';
+DATE_ADD: 'DATE_ADD';
+DATEDIFF: 'DATEDIFF';
+DATE_DIFF: 'DATE_DIFF';
+DBPROPERTIES: 'DBPROPERTIES';
+DEFINED: 'DEFINED';
+DELETE: 'DELETE';
+DELIMITED: 'DELIMITED';
+DESC: 'DESC';
+DESCRIBE: 'DESCRIBE';
+DFS: 'DFS';
+DIRECTORIES: 'DIRECTORIES';
+DIRECTORY: 'DIRECTORY';
+DISTINCT: 'DISTINCT';
+DISTRIBUTE: 'DISTRIBUTE';
+DIV: 'DIV';
+DROP: 'DROP';
+ELSE: 'ELSE';
+END: 'END';
+ESCAPE: 'ESCAPE';
+ESCAPED: 'ESCAPED';
+EXCEPT: 'EXCEPT';
+EXCHANGE: 'EXCHANGE';
+EXISTS: 'EXISTS';
+EXPLAIN: 'EXPLAIN';
+EXPORT: 'EXPORT';
+EXTENDED: 'EXTENDED';
+EXTERNAL: 'EXTERNAL';
+EXTRACT: 'EXTRACT';
+FALSE: 'FALSE';
+FETCH: 'FETCH';
+FIELDS: 'FIELDS';
+FILTER: 'FILTER';
+FILEFORMAT: 'FILEFORMAT';
+FIRST: 'FIRST';
+FOLLOWING: 'FOLLOWING';
+FOR: 'FOR';
+FOREIGN: 'FOREIGN';
+FORMAT: 'FORMAT';
+FORMATTED: 'FORMATTED';
+FROM: 'FROM';
+FULL: 'FULL';
+FUNCTION: 'FUNCTION';
+FUNCTIONS: 'FUNCTIONS';
+GLOBAL: 'GLOBAL';
+GRANT: 'GRANT';
+GROUP: 'GROUP';
+GROUPING: 'GROUPING';
+HAVING: 'HAVING';
+HOUR: 'HOUR';
+IF: 'IF';
+IGNORE: 'IGNORE';
+IMPORT: 'IMPORT';
+IN: 'IN';
+INDEX: 'INDEX';
+INDEXES: 'INDEXES';
+INNER: 'INNER';
+INPATH: 'INPATH';
+INPUTFORMAT: 'INPUTFORMAT';
+INSERT: 'INSERT';
+INTERSECT: 'INTERSECT';
+INTERVAL: 'INTERVAL';
+INTO: 'INTO';
+IS: 'IS';
+ITEMS: 'ITEMS';
+JOIN: 'JOIN';
+KEYS: 'KEYS';
+LAST: 'LAST';
+LATERAL: 'LATERAL';
+LAZY: 'LAZY';
+LEADING: 'LEADING';
+LEFT: 'LEFT';
+LIKE: 'LIKE';
+ILIKE: 'ILIKE';
+LIMIT: 'LIMIT';
+LINES: 'LINES';
+LIST: 'LIST';
+LOAD: 'LOAD';
+LOCAL: 'LOCAL';
+LOCATION: 'LOCATION';
+LOCK: 'LOCK';
+LOCKS: 'LOCKS';
+LOGICAL: 'LOGICAL';
+MACRO: 'MACRO';
+MAP: 'MAP';
+MATCHED: 'MATCHED';
+MERGE: 'MERGE';
+MINUTE: 'MINUTE';
+MONTH: 'MONTH';
+MSCK: 'MSCK';
+NAMESPACE: 'NAMESPACE';
+NAMESPACES: 'NAMESPACES';
+NATURAL: 'NATURAL';
+NO: 'NO';
+NOT: 'NOT' | '!';
+NULL: 'NULL';
+NULLS: 'NULLS';
+OF: 'OF';
+ON: 'ON';
+ONLY: 'ONLY';
+OPTION: 'OPTION';
+OPTIONS: 'OPTIONS';
+OR: 'OR';
+ORDER: 'ORDER';
+OUT: 'OUT';
+OUTER: 'OUTER';
+OUTPUTFORMAT: 'OUTPUTFORMAT';
+OVER: 'OVER';
+OVERLAPS: 'OVERLAPS';
+OVERLAY: 'OVERLAY';
+OVERWRITE: 'OVERWRITE';
+PARTITION: 'PARTITION';
+PARTITIONED: 'PARTITIONED';
+PARTITIONS: 'PARTITIONS';
+PERCENTILE_CONT: 'PERCENTILE_CONT';
+PERCENTLIT: 'PERCENT';
+PIVOT: 'PIVOT';
+PLACING: 'PLACING';
+POSITION: 'POSITION';
+PRECEDING: 'PRECEDING';
+PRIMARY: 'PRIMARY';
+PRINCIPALS: 'PRINCIPALS';
+PROPERTIES: 'PROPERTIES';
+PURGE: 'PURGE';
+QUERY: 'QUERY';
+RANGE: 'RANGE';
+RECORDREADER: 'RECORDREADER';
+RECORDWRITER: 'RECORDWRITER';
+RECOVER: 'RECOVER';
+REDUCE: 'REDUCE';
+REFERENCES: 'REFERENCES';
+REFRESH: 'REFRESH';
+RENAME: 'RENAME';
+REPAIR: 'REPAIR';
+REPEATABLE: 'REPEATABLE';
+REPLACE: 'REPLACE';
+RESET: 'RESET';
+RESPECT: 'RESPECT';
+RESTRICT: 'RESTRICT';
+REVOKE: 'REVOKE';
+RIGHT: 'RIGHT';
+RLIKE: 'RLIKE' | 'REGEXP';
+ROLE: 'ROLE';
+ROLES: 'ROLES';
+ROLLBACK: 'ROLLBACK';
+ROLLUP: 'ROLLUP';
+ROW: 'ROW';
+ROWS: 'ROWS';
+SECOND: 'SECOND';
+SCHEMA: 'SCHEMA';
+SCHEMAS: 'SCHEMAS';
+SELECT: 'SELECT';
+SEMI: 'SEMI';
+SEPARATED: 'SEPARATED';
+SERDE: 'SERDE';
+SERDEPROPERTIES: 'SERDEPROPERTIES';
+SESSION_USER: 'SESSION_USER';
+SET: 'SET';
+SETMINUS: 'MINUS';
+SETS: 'SETS';
+SHOW: 'SHOW';
+SKEWED: 'SKEWED';
+SOME: 'SOME';
+SORT: 'SORT';
+SORTED: 'SORTED';
+START: 'START';
+STATISTICS: 'STATISTICS';
+STORED: 'STORED';
+STRATIFY: 'STRATIFY';
+STRUCT: 'STRUCT';
+SUBSTR: 'SUBSTR';
+SUBSTRING: 'SUBSTRING';
+SYNC: 'SYNC';
+SYSTEM_TIME: 'SYSTEM_TIME';
+SYSTEM_VERSION: 'SYSTEM_VERSION';
+TABLE: 'TABLE';
+TABLES: 'TABLES';
+TABLESAMPLE: 'TABLESAMPLE';
+TBLPROPERTIES: 'TBLPROPERTIES';
+TEMPORARY: 'TEMPORARY' | 'TEMP';
+TERMINATED: 'TERMINATED';
+THEN: 'THEN';
+TIME: 'TIME';
+TIMESTAMP: 'TIMESTAMP';
+TIMESTAMPADD: 'TIMESTAMPADD';
+TIMESTAMPDIFF: 'TIMESTAMPDIFF';
+TO: 'TO';
+TOUCH: 'TOUCH';
+TRAILING: 'TRAILING';
+TRANSACTION: 'TRANSACTION';
+TRANSACTIONS: 'TRANSACTIONS';
+TRANSFORM: 'TRANSFORM';
+TRIM: 'TRIM';
+TRUE: 'TRUE';
+TRUNCATE: 'TRUNCATE';
+TRY_CAST: 'TRY_CAST';
+TYPE: 'TYPE';
+UNARCHIVE: 'UNARCHIVE';
+UNBOUNDED: 'UNBOUNDED';
+UNCACHE: 'UNCACHE';
+UNION: 'UNION';
+UNIQUE: 'UNIQUE';
+UNKNOWN: 'UNKNOWN';
+UNLOCK: 'UNLOCK';
+UNSET: 'UNSET';
+UPDATE: 'UPDATE';
+USE: 'USE';
+USER: 'USER';
+USING: 'USING';
+VALUES: 'VALUES';
+VERSION: 'VERSION';
+VIEW: 'VIEW';
+VIEWS: 'VIEWS';
+WHEN: 'WHEN';
+WHERE: 'WHERE';
+WINDOW: 'WINDOW';
+WITH: 'WITH';
+WITHIN: 'WITHIN';
+YEAR: 'YEAR';
+ZONE: 'ZONE';
+//--SPARK-KEYWORD-LIST-END
+//============================
+// End of the keywords list
+//============================
+
+EQ  : '=' | '==';
+NSEQ: '<=>';
+NEQ : '<>';
+NEQJ: '!=';
+LT  : '<';
+LTE : '<=' | '!>';
+GT  : '>';
+GTE : '>=' | '!<';
+
+PLUS: '+';
+MINUS: '-';
+ASTERISK: '*';
+SLASH: '/';
+PERCENT: '%';
+TILDE: '~';
+AMPERSAND: '&';
+PIPE: '|';
+CONCAT_PIPE: '||';
+HAT: '^';
+COLON: ':';
+ARROW: '->';
+HENT_START: '/*+';
+HENT_END: '*/';
+
+STRING
+    : '\'' ( ~('\''|'\\') | ('\\' .) )* '\''
+    | '"' ( ~('"'|'\\') | ('\\' .) )* '"'
+    | 'R\'' (~'\'')* '\''
+    | 'R"'(~'"')* '"'
+    ;
+
+BIGINT_LITERAL
+    : DIGIT+ 'L'
+    ;
+
+SMALLINT_LITERAL
+    : DIGIT+ 'S'
+    ;
+
+TINYINT_LITERAL
+    : DIGIT+ 'Y'
+    ;
+
+INTEGER_VALUE
+    : DIGIT+
+    ;
+
+EXPONENT_VALUE
+    : DIGIT+ EXPONENT
+    | DECIMAL_DIGITS EXPONENT {isValidDecimal()}?
+    ;
+
+DECIMAL_VALUE
+    : DECIMAL_DIGITS {isValidDecimal()}?
+    ;
+
+FLOAT_LITERAL
+    : DIGIT+ EXPONENT? 'F'
+    | DECIMAL_DIGITS EXPONENT? 'F' {isValidDecimal()}?
+    ;
+
+DOUBLE_LITERAL
+    : DIGIT+ EXPONENT? 'D'
+    | DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}?
+    ;
+
+BIGDECIMAL_LITERAL
+    : DIGIT+ EXPONENT? 'BD'
+    | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}?
+    ;
+
+IDENTIFIER
+    : (LETTER | DIGIT | '_')+
+    ;
+
+BACKQUOTED_IDENTIFIER
+    : '`' ( ~'`' | '``' )* '`'
+    ;
+
+fragment DECIMAL_DIGITS
+    : DIGIT+ '.' DIGIT*
+    | '.' DIGIT+
+    ;
+
+fragment EXPONENT
+    : 'E' [+-]? DIGIT+
+    ;
+
+fragment DIGIT
+    : [0-9]
+    ;
+
+fragment LETTER
+    : [A-Z]
+    ;
+
+SIMPLE_COMMENT
+    : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN)
+    ;
+
+BRACKETED_COMMENT
+    : '/*' {!isHint()}? ( BRACKETED_COMMENT | . )*? ('*/' | {markUnclosedComment();} EOF) -> channel(HIDDEN)
+    ;
+
+WS
+    : [ \r\n\t]+ -> channel(HIDDEN)
+    ;
+
+// Catch-all for anything we can't recognize.
+// We use this to be able to ignore and recover all the text
+// when splitting statements with DelimiterLexer
+UNRECOGNIZED
+    : .
+    ;
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
similarity index 69%
rename from sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
rename to sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 3f5052d51f2c5..76a00331edf76 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -14,9 +14,11 @@
  * This file is an adaptation of Presto's presto-parser/src/main/antlr4/com/facebook/presto/sql/parser/SqlBase.g4 grammar.
  */
 
-grammar SqlBase;
+parser grammar SqlBaseParser;
 
-@parser::members {
+options { tokenVocab = SqlBaseLexer; }
+
+@members {
   /**
    * When false, INTERSECT is given the greater precedence over the other set
    * operations (UNION, EXCEPT and MINUS) as per the SQL standard.
@@ -35,63 +37,8 @@ grammar SqlBase;
   public boolean SQL_standard_keyword_behavior = false;
 }
 
-@lexer::members {
-  /**
-   * When true, parser should throw ParseExcetion for unclosed bracketed comment.
-   */
-  public boolean has_unclosed_bracketed_comment = false;
-
-  /**
-   * Verify whether current token is a valid decimal token (which contains dot).
-   * Returns true if the character that follows the token is not a digit or letter or underscore.
-   *
-   * For example:
-   * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
-   * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
-   * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
-   * For char stream "12.0D 34.E2+0.12 "  12.0D is a valid decimal token because it is followed
-   * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
-   * which is not a digit or letter or underscore.
-   */
-  public boolean isValidDecimal() {
-    int nextChar = _input.LA(1);
-    if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' ||
-      nextChar == '_') {
-      return false;
-    } else {
-      return true;
-    }
-  }
-
-  /**
-   * This method will be called when we see '/*' and try to match it as a bracketed comment.
-   * If the next character is '+', it should be parsed as hint later, and we cannot match
-   * it as a bracketed comment.
-   *
-   * Returns true if the next character is '+'.
-   */
-  public boolean isHint() {
-    int nextChar = _input.LA(1);
-    if (nextChar == '+') {
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  /**
-   * This method will be called when the character stream ends and try to find out the
-   * unclosed bracketed comment.
-   * If the method be called, it means the end of the entire character stream match,
-   * and we set the flag and fail later.
-   */
-  public void markUnclosedComment() {
-    has_unclosed_bracketed_comment = true;
-  }
-}
-
 singleStatement
-    : statement ';'* EOF
+    : statement SEMICOLON* EOF
     ;
 
 singleExpression
@@ -136,7 +83,7 @@ statement
         (RESTRICT | CASCADE)?                                          #dropNamespace
     | SHOW namespaces ((FROM | IN) multipartIdentifier)?
         (LIKE? pattern=STRING)?                                        #showNamespaces
-    | createTableHeader ('(' colTypeList ')')? tableProvider?
+    | createTableHeader (LEFT_PAREN colTypeList RIGHT_PAREN)? tableProvider?
         createTableClauses
         (AS? query)?                                                   #createTable
     | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier
@@ -146,7 +93,7 @@ statement
         createFileFormat |
         locationSpec |
         (TBLPROPERTIES tableProps=propertyList))*                      #createTableLike
-    | replaceTableHeader ('(' colTypeList ')')? tableProvider?
+    | replaceTableHeader (LEFT_PAREN colTypeList RIGHT_PAREN)? tableProvider?
         createTableClauses
         (AS? query)?                                                   #replaceTable
     | ANALYZE TABLE multipartIdentifier partitionSpec? COMPUTE STATISTICS
@@ -158,13 +105,13 @@ statement
         columns=qualifiedColTypeWithPositionList                       #addTableColumns
     | ALTER TABLE multipartIdentifier
         ADD (COLUMN | COLUMNS)
-        '(' columns=qualifiedColTypeWithPositionList ')'               #addTableColumns
+        LEFT_PAREN columns=qualifiedColTypeWithPositionList RIGHT_PAREN #addTableColumns
     | ALTER TABLE table=multipartIdentifier
         RENAME COLUMN
         from=multipartIdentifier TO to=errorCapturingIdentifier        #renameTableColumn
     | ALTER TABLE multipartIdentifier
         DROP (COLUMN | COLUMNS)
-        '(' columns=multipartIdentifierList ')'                        #dropTableColumns
+        LEFT_PAREN columns=multipartIdentifierList RIGHT_PAREN         #dropTableColumns
     | ALTER TABLE multipartIdentifier
         DROP (COLUMN | COLUMNS) columns=multipartIdentifierList        #dropTableColumns
     | ALTER (TABLE | VIEW) from=multipartIdentifier
@@ -181,7 +128,8 @@ statement
         colName=multipartIdentifier colType colPosition?               #hiveChangeColumn
     | ALTER TABLE table=multipartIdentifier partitionSpec?
         REPLACE COLUMNS
-        '(' columns=qualifiedColTypeWithPositionList ')'               #hiveReplaceColumns
+        LEFT_PAREN columns=qualifiedColTypeWithPositionList
+        RIGHT_PAREN                                                    #hiveReplaceColumns
     | ALTER TABLE multipartIdentifier (partitionSpec)?
         SET SERDE STRING (WITH SERDEPROPERTIES propertyList)?          #setTableSerDe
     | ALTER TABLE multipartIdentifier (partitionSpec)?
@@ -191,7 +139,7 @@ statement
     | ALTER TABLE multipartIdentifier
         from=partitionSpec RENAME TO to=partitionSpec                  #renameTablePartition
     | ALTER (TABLE | VIEW) multipartIdentifier
-        DROP (IF EXISTS)? partitionSpec (',' partitionSpec)* PURGE?    #dropTablePartitions
+        DROP (IF EXISTS)? partitionSpec (COMMA partitionSpec)* PURGE?  #dropTablePartitions
     | ALTER TABLE multipartIdentifier
         (partitionSpec)? SET locationSpec                              #setTableLocation
     | ALTER TABLE multipartIdentifier RECOVER PARTITIONS               #recoverPartitions
@@ -205,12 +153,12 @@ statement
          (TBLPROPERTIES propertyList))*
         AS query                                                       #createView
     | CREATE (OR REPLACE)? GLOBAL? TEMPORARY VIEW
-        tableIdentifier ('(' colTypeList ')')? tableProvider
+        tableIdentifier (LEFT_PAREN colTypeList RIGHT_PAREN)? tableProvider
         (OPTIONS propertyList)?                                        #createTempViewUsing
     | ALTER VIEW multipartIdentifier AS? query                         #alterViewQuery
     | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF NOT EXISTS)?
         multipartIdentifier AS className=STRING
-        (USING resource (',' resource)*)?                              #createFunction
+        (USING resource (COMMA resource)*)?                            #createFunction
     | DROP TEMPORARY? FUNCTION (IF EXISTS)? multipartIdentifier        #dropFunction
     | EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)?
         statement                                                      #explain
@@ -219,7 +167,7 @@ statement
     | SHOW TABLE EXTENDED ((FROM | IN) ns=multipartIdentifier)?
         LIKE pattern=STRING partitionSpec?                             #showTableExtended
     | SHOW TBLPROPERTIES table=multipartIdentifier
-        ('(' key=propertyKey ')')?                                     #showTblProperties
+        (LEFT_PAREN key=propertyKey RIGHT_PAREN)?                      #showTblProperties
     | SHOW COLUMNS (FROM | IN) table=multipartIdentifier
         ((FROM | IN) ns=multipartIdentifier)?                          #showColumns
     | SHOW VIEWS ((FROM | IN) multipartIdentifier)?
@@ -264,7 +212,7 @@ statement
     | RESET .*?                                                        #resetConfiguration
     | CREATE INDEX (IF NOT EXISTS)? identifier ON TABLE?
         multipartIdentifier (USING indexType=identifier)?
-        '(' columns=multipartIdentifierPropertyList ')'
+        LEFT_PAREN columns=multipartIdentifierPropertyList RIGHT_PAREN
         (OPTIONS options=propertyList)?                                #createIndex
     | DROP INDEX (IF EXISTS)? identifier ON TABLE? multipartIdentifier #dropIndex
     | unsupportedHiveNativeCommands .*?                                #failNativeCommand
@@ -369,7 +317,7 @@ partitionSpecLocation
     ;
 
 partitionSpec
-    : PARTITION '(' partitionVal (',' partitionVal)* ')'
+    : PARTITION LEFT_PAREN partitionVal (COMMA partitionVal)* RIGHT_PAREN
     ;
 
 partitionVal
@@ -397,15 +345,15 @@ describeFuncName
     ;
 
 describeColName
-    : nameParts+=identifier ('.' nameParts+=identifier)*
+    : nameParts+=identifier (DOT nameParts+=identifier)*
     ;
 
 ctes
-    : WITH namedQuery (',' namedQuery)*
+    : WITH namedQuery (COMMA namedQuery)*
     ;
 
 namedQuery
-    : name=errorCapturingIdentifier (columnAliases=identifierList)? AS? '(' query ')'
+    : name=errorCapturingIdentifier (columnAliases=identifierList)? AS? LEFT_PAREN query RIGHT_PAREN
     ;
 
 tableProvider
@@ -425,7 +373,7 @@ createTableClauses
     ;
 
 propertyList
-    : '(' property (',' property)* ')'
+    : LEFT_PAREN property (COMMA property)* RIGHT_PAREN
     ;
 
 property
@@ -433,7 +381,7 @@ property
     ;
 
 propertyKey
-    : identifier ('.' identifier)*
+    : identifier (DOT identifier)*
     | STRING
     ;
 
@@ -445,11 +393,11 @@ propertyValue
     ;
 
 constantList
-    : '(' constant (',' constant)* ')'
+    : LEFT_PAREN constant (COMMA constant)* RIGHT_PAREN
     ;
 
 nestedConstantList
-    : '(' constantList (',' constantList)* ')'
+    : LEFT_PAREN constantList (COMMA constantList)* RIGHT_PAREN
     ;
 
 createFileFormat
@@ -477,17 +425,17 @@ dmlStatementNoWith
     | UPDATE multipartIdentifier tableAlias setClause whereClause?                 #updateTable
     | MERGE INTO target=multipartIdentifier targetAlias=tableAlias
         USING (source=multipartIdentifier |
-          '(' sourceQuery=query')') sourceAlias=tableAlias
+          LEFT_PAREN sourceQuery=query RIGHT_PAREN) sourceAlias=tableAlias
         ON mergeCondition=booleanExpression
         matchedClause*
         notMatchedClause*                                                          #mergeIntoTable
     ;
 
 queryOrganization
-    : (ORDER BY order+=sortItem (',' order+=sortItem)*)?
-      (CLUSTER BY clusterBy+=expression (',' clusterBy+=expression)*)?
-      (DISTRIBUTE BY distributeBy+=expression (',' distributeBy+=expression)*)?
-      (SORT BY sort+=sortItem (',' sort+=sortItem)*)?
+    : (ORDER BY order+=sortItem (COMMA order+=sortItem)*)?
+      (CLUSTER BY clusterBy+=expression (COMMA clusterBy+=expression)*)?
+      (DISTRIBUTE BY distributeBy+=expression (COMMA distributeBy+=expression)*)?
+      (SORT BY sort+=sortItem (COMMA sort+=sortItem)*)?
       windowClause?
       (LIMIT (ALL | limit=expression))?
     ;
@@ -511,7 +459,7 @@ queryPrimary
     | fromStatement                                                         #fromStmt
     | TABLE multipartIdentifier                                             #table
     | inlineTable                                                           #inlineTableDefault1
-    | '(' query ')'                                                         #subquery
+    | LEFT_PAREN query RIGHT_PAREN                                          #subquery
     ;
 
 sortItem
@@ -553,13 +501,13 @@ querySpecification
     ;
 
 transformClause
-    : (SELECT kind=TRANSFORM '(' setQuantifier? expressionSeq ')'
+    : (SELECT kind=TRANSFORM LEFT_PAREN setQuantifier? expressionSeq RIGHT_PAREN
             | kind=MAP setQuantifier? expressionSeq
             | kind=REDUCE setQuantifier? expressionSeq)
       inRowFormat=rowFormat?
       (RECORDWRITER recordWriter=STRING)?
       USING script=STRING
-      (AS (identifierSeq | colTypeList | ('(' (identifierSeq | colTypeList) ')')))?
+      (AS (identifierSeq | colTypeList | (LEFT_PAREN (identifierSeq | colTypeList) RIGHT_PAREN)))?
       outRowFormat=rowFormat?
       (RECORDREADER recordReader=STRING)?
     ;
@@ -587,12 +535,12 @@ matchedAction
 
 notMatchedAction
     : INSERT ASTERISK
-    | INSERT '(' columns=multipartIdentifierList ')'
-        VALUES '(' expression (',' expression)* ')'
+    | INSERT LEFT_PAREN columns=multipartIdentifierList RIGHT_PAREN
+        VALUES LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN
     ;
 
 assignmentList
-    : assignment (',' assignment)*
+    : assignment (COMMA assignment)*
     ;
 
 assignment
@@ -608,16 +556,16 @@ havingClause
     ;
 
 hint
-    : '/*+' hintStatements+=hintStatement (','? hintStatements+=hintStatement)* '*/'
+    : HENT_START hintStatements+=hintStatement (COMMA? hintStatements+=hintStatement)* HENT_END
     ;
 
 hintStatement
     : hintName=identifier
-    | hintName=identifier '(' parameters+=primaryExpression (',' parameters+=primaryExpression)* ')'
+    | hintName=identifier LEFT_PAREN parameters+=primaryExpression (COMMA parameters+=primaryExpression)* RIGHT_PAREN
     ;
 
 fromClause
-    : FROM relation (',' relation)* lateralView* pivotClause?
+    : FROM relation (COMMA relation)* lateralView* pivotClause?
     ;
 
 temporalClause
@@ -627,11 +575,11 @@ temporalClause
 
 aggregationClause
     : GROUP BY groupingExpressionsWithGroupingAnalytics+=groupByClause
-        (',' groupingExpressionsWithGroupingAnalytics+=groupByClause)*
-    | GROUP BY groupingExpressions+=expression (',' groupingExpressions+=expression)* (
+        (COMMA groupingExpressionsWithGroupingAnalytics+=groupByClause)*
+    | GROUP BY groupingExpressions+=expression (COMMA groupingExpressions+=expression)* (
       WITH kind=ROLLUP
     | WITH kind=CUBE
-    | kind=GROUPING SETS '(' groupingSet (',' groupingSet)* ')')?
+    | kind=GROUPING SETS LEFT_PAREN groupingSet (COMMA groupingSet)* RIGHT_PAREN)?
     ;
 
 groupByClause
@@ -640,8 +588,8 @@ groupByClause
     ;
 
 groupingAnalytics
-    : (ROLLUP | CUBE) '(' groupingSet (',' groupingSet)* ')'
-    | GROUPING SETS '(' groupingElement (',' groupingElement)* ')'
+    : (ROLLUP | CUBE) LEFT_PAREN groupingSet (COMMA groupingSet)* RIGHT_PAREN
+    | GROUPING SETS LEFT_PAREN groupingElement (COMMA groupingElement)* RIGHT_PAREN
     ;
 
 groupingElement
@@ -650,17 +598,17 @@ groupingElement
     ;
 
 groupingSet
-    : '(' (expression (',' expression)*)? ')'
+    : LEFT_PAREN (expression (COMMA expression)*)? RIGHT_PAREN
     | expression
     ;
 
 pivotClause
-    : PIVOT '(' aggregates=namedExpressionSeq FOR pivotColumn IN '(' pivotValues+=pivotValue (',' pivotValues+=pivotValue)* ')' ')'
+    : PIVOT LEFT_PAREN aggregates=namedExpressionSeq FOR pivotColumn IN LEFT_PAREN pivotValues+=pivotValue (COMMA pivotValues+=pivotValue)* RIGHT_PAREN RIGHT_PAREN
     ;
 
 pivotColumn
     : identifiers+=identifier
-    | '(' identifiers+=identifier (',' identifiers+=identifier)* ')'
+    | LEFT_PAREN identifiers+=identifier (COMMA identifiers+=identifier)* RIGHT_PAREN
     ;
 
 pivotValue
@@ -668,7 +616,7 @@ pivotValue
     ;
 
 lateralView
-    : LATERAL VIEW (OUTER)? qualifiedName '(' (expression (',' expression)*)? ')' tblName=identifier (AS? colName+=identifier (',' colName+=identifier)*)?
+    : LATERAL VIEW (OUTER)? qualifiedName LEFT_PAREN (expression (COMMA expression)*)? RIGHT_PAREN tblName=identifier (AS? colName+=identifier (COMMA colName+=identifier)*)?
     ;
 
 setQuantifier
@@ -701,27 +649,27 @@ joinCriteria
     ;
 
 sample
-    : TABLESAMPLE '(' sampleMethod? ')' (REPEATABLE '('seed=INTEGER_VALUE')')?
+    : TABLESAMPLE LEFT_PAREN sampleMethod? RIGHT_PAREN (REPEATABLE LEFT_PAREN seed=INTEGER_VALUE RIGHT_PAREN)?
     ;
 
 sampleMethod
     : negativeSign=MINUS? percentage=(INTEGER_VALUE | DECIMAL_VALUE) PERCENTLIT   #sampleByPercentile
     | expression ROWS                                                             #sampleByRows
     | sampleType=BUCKET numerator=INTEGER_VALUE OUT OF denominator=INTEGER_VALUE
-        (ON (identifier | qualifiedName '(' ')'))?                                #sampleByBucket
+        (ON (identifier | qualifiedName LEFT_PAREN RIGHT_PAREN))?                 #sampleByBucket
     | bytes=expression                                                            #sampleByBytes
     ;
 
 identifierList
-    : '(' identifierSeq ')'
+    : LEFT_PAREN identifierSeq RIGHT_PAREN
     ;
 
 identifierSeq
-    : ident+=errorCapturingIdentifier (',' ident+=errorCapturingIdentifier)*
+    : ident+=errorCapturingIdentifier (COMMA ident+=errorCapturingIdentifier)*
     ;
 
 orderedIdentifierList
-    : '(' orderedIdentifier (',' orderedIdentifier)* ')'
+    : LEFT_PAREN orderedIdentifier (COMMA orderedIdentifier)* RIGHT_PAREN
     ;
 
 orderedIdentifier
@@ -729,7 +677,7 @@ orderedIdentifier
     ;
 
 identifierCommentList
-    : '(' identifierComment (',' identifierComment)* ')'
+    : LEFT_PAREN identifierComment (COMMA identifierComment)* RIGHT_PAREN
     ;
 
 identifierComment
@@ -738,19 +686,19 @@ identifierComment
 
 relationPrimary
     : multipartIdentifier temporalClause?
-      sample? tableAlias                      #tableName
-    | '(' query ')' sample? tableAlias        #aliasedQuery
-    | '(' relation ')' sample? tableAlias     #aliasedRelation
-    | inlineTable                             #inlineTableDefault2
-    | functionTable                           #tableValuedFunction
+      sample? tableAlias                                    #tableName
+    | LEFT_PAREN query RIGHT_PAREN sample? tableAlias       #aliasedQuery
+    | LEFT_PAREN relation RIGHT_PAREN sample? tableAlias    #aliasedRelation
+    | inlineTable                                           #inlineTableDefault2
+    | functionTable                                         #tableValuedFunction
     ;
 
 inlineTable
-    : VALUES expression (',' expression)* tableAlias
+    : VALUES expression (COMMA expression)* tableAlias
     ;
 
 functionTable
-    : funcName=functionName '(' (expression (',' expression)*)? ')' tableAlias
+    : funcName=functionName LEFT_PAREN (expression (COMMA expression)*)? RIGHT_PAREN tableAlias
     ;
 
 tableAlias
@@ -768,15 +716,15 @@ rowFormat
     ;
 
 multipartIdentifierList
-    : multipartIdentifier (',' multipartIdentifier)*
+    : multipartIdentifier (COMMA multipartIdentifier)*
     ;
 
 multipartIdentifier
-    : parts+=errorCapturingIdentifier ('.' parts+=errorCapturingIdentifier)*
+    : parts+=errorCapturingIdentifier (DOT parts+=errorCapturingIdentifier)*
     ;
 
 multipartIdentifierPropertyList
-    : multipartIdentifierProperty (',' multipartIdentifierProperty)*
+    : multipartIdentifierProperty (COMMA multipartIdentifierProperty)*
     ;
 
 multipartIdentifierProperty
@@ -784,11 +732,11 @@ multipartIdentifierProperty
     ;
 
 tableIdentifier
-    : (db=errorCapturingIdentifier '.')? table=errorCapturingIdentifier
+    : (db=errorCapturingIdentifier DOT)? table=errorCapturingIdentifier
     ;
 
 functionIdentifier
-    : (db=errorCapturingIdentifier '.')? function=errorCapturingIdentifier
+    : (db=errorCapturingIdentifier DOT)? function=errorCapturingIdentifier
     ;
 
 namedExpression
@@ -796,11 +744,11 @@ namedExpression
     ;
 
 namedExpressionSeq
-    : namedExpression (',' namedExpression)*
+    : namedExpression (COMMA namedExpression)*
     ;
 
 partitionFieldList
-    : '(' fields+=partitionField (',' fields+=partitionField)* ')'
+    : LEFT_PAREN fields+=partitionField (COMMA fields+=partitionField)* RIGHT_PAREN
     ;
 
 partitionField
@@ -809,9 +757,9 @@ partitionField
     ;
 
 transform
-    : qualifiedName                                                           #identityTransform
+    : qualifiedName                                                                             #identityTransform
     | transformName=identifier
-      '(' argument+=transformArgument (',' argument+=transformArgument)* ')'  #applyTransform
+      LEFT_PAREN argument+=transformArgument (COMMA argument+=transformArgument)* RIGHT_PAREN   #applyTransform
     ;
 
 transformArgument
@@ -824,12 +772,12 @@ expression
     ;
 
 expressionSeq
-    : expression (',' expression)*
+    : expression (COMMA expression)*
     ;
 
 booleanExpression
     : NOT booleanExpression                                        #logicalNot
-    | EXISTS '(' query ')'                                         #exists
+    | EXISTS LEFT_PAREN query RIGHT_PAREN                          #exists
     | valueExpression predicate?                                   #predicated
     | left=booleanExpression operator=AND right=booleanExpression  #logicalBinary
     | left=booleanExpression operator=OR right=booleanExpression   #logicalBinary
@@ -837,10 +785,10 @@ booleanExpression
 
 predicate
     : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression
-    | NOT? kind=IN '(' expression (',' expression)* ')'
-    | NOT? kind=IN '(' query ')'
+    | NOT? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN
+    | NOT? kind=IN LEFT_PAREN query RIGHT_PAREN
     | NOT? kind=RLIKE pattern=valueExpression
-    | NOT? kind=(LIKE | ILIKE) quantifier=(ANY | SOME | ALL) ('('')' | '(' expression (',' expression)* ')')
+    | NOT? kind=(LIKE | ILIKE) quantifier=(ANY | SOME | ALL) (LEFT_PAREN RIGHT_PAREN | LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN)
     | NOT? kind=(LIKE | ILIKE) pattern=valueExpression (ESCAPE escapeChar=STRING)?
     | IS NOT? kind=NULL
     | IS NOT? kind=(TRUE | FALSE | UNKNOWN)
@@ -860,38 +808,38 @@ valueExpression
 
 primaryExpression
     : name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER)                                   #currentLike
-    | name=(TIMESTAMPADD | DATEADD | DATE_ADD) '(' unit=identifier ',' unitsAmount=valueExpression ',' timestamp=valueExpression ')'  #timestampadd
-    | name=(TIMESTAMPDIFF | DATEDIFF | DATE_DIFF) '(' unit=identifier ',' startTimestamp=valueExpression ',' endTimestamp=valueExpression ')'  #timestampdiff
+    | name=(TIMESTAMPADD | DATEADD | DATE_ADD) LEFT_PAREN unit=identifier COMMA unitsAmount=valueExpression COMMA timestamp=valueExpression RIGHT_PAREN             #timestampadd
+    | name=(TIMESTAMPDIFF | DATEDIFF | DATE_DIFF) LEFT_PAREN unit=identifier COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN    #timestampdiff
     | CASE whenClause+ (ELSE elseExpression=expression)? END                                   #searchedCase
     | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END                  #simpleCase
-    | name=(CAST | TRY_CAST) '(' expression AS dataType ')'                                    #cast
-    | STRUCT '(' (argument+=namedExpression (',' argument+=namedExpression)*)? ')'             #struct
-    | FIRST '(' expression (IGNORE NULLS)? ')'                                                 #first
-    | LAST '(' expression (IGNORE NULLS)? ')'                                                  #last
-    | POSITION '(' substr=valueExpression IN str=valueExpression ')'                           #position
+    | name=(CAST | TRY_CAST) LEFT_PAREN expression AS dataType RIGHT_PAREN                     #cast
+    | STRUCT LEFT_PAREN (argument+=namedExpression (COMMA argument+=namedExpression)*)? RIGHT_PAREN #struct
+    | FIRST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN                                  #first
+    | LAST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN                                   #last
+    | POSITION LEFT_PAREN substr=valueExpression IN str=valueExpression RIGHT_PAREN            #position
     | constant                                                                                 #constantDefault
     | ASTERISK                                                                                 #star
-    | qualifiedName '.' ASTERISK                                                               #star
-    | '(' namedExpression (',' namedExpression)+ ')'                                           #rowConstructor
-    | '(' query ')'                                                                            #subqueryExpression
-    | functionName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')'
-       (FILTER '(' WHERE where=booleanExpression ')')?
+    | qualifiedName DOT ASTERISK                                                               #star
+    | LEFT_PAREN namedExpression (COMMA namedExpression)+ RIGHT_PAREN                          #rowConstructor
+    | LEFT_PAREN query RIGHT_PAREN                                                             #subqueryExpression
+    | functionName LEFT_PAREN (setQuantifier? argument+=expression (COMMA argument+=expression)*)? RIGHT_PAREN
+       (FILTER LEFT_PAREN WHERE where=booleanExpression RIGHT_PAREN)?
        (nullsOption=(IGNORE | RESPECT) NULLS)? ( OVER windowSpec)?                             #functionCall
-    | identifier '->' expression                                                               #lambda
-    | '(' identifier (',' identifier)+ ')' '->' expression                                     #lambda
-    | value=primaryExpression '[' index=valueExpression ']'                                    #subscript
+    | identifier ARROW expression                                                              #lambda
+    | LEFT_PAREN identifier (COMMA identifier)+ RIGHT_PAREN ARROW expression                   #lambda
+    | value=primaryExpression LEFT_BRACKET index=valueExpression RIGHT_BRACKET                 #subscript
     | identifier                                                                               #columnReference
-    | base=primaryExpression '.' fieldName=identifier                                          #dereference
-    | '(' expression ')'                                                                       #parenthesizedExpression
-    | EXTRACT '(' field=identifier FROM source=valueExpression ')'                             #extract
-    | (SUBSTR | SUBSTRING) '(' str=valueExpression (FROM | ',') pos=valueExpression
-      ((FOR | ',') len=valueExpression)? ')'                                                   #substring
-    | TRIM '(' trimOption=(BOTH | LEADING | TRAILING)? (trimStr=valueExpression)?
-       FROM srcStr=valueExpression ')'                                                         #trim
-    | OVERLAY '(' input=valueExpression PLACING replace=valueExpression
-      FROM position=valueExpression (FOR length=valueExpression)? ')'                          #overlay
-    | PERCENTILE_CONT '(' percentage=valueExpression ')'
-      WITHIN GROUP '(' ORDER BY sortItem ')'                                                   #percentile
+    | base=primaryExpression DOT fieldName=identifier                                          #dereference
+    | LEFT_PAREN expression RIGHT_PAREN                                                        #parenthesizedExpression
+    | EXTRACT LEFT_PAREN field=identifier FROM source=valueExpression RIGHT_PAREN              #extract
+    | (SUBSTR | SUBSTRING) LEFT_PAREN str=valueExpression (FROM | COMMA) pos=valueExpression
+      ((FOR | COMMA) len=valueExpression)? RIGHT_PAREN                                         #substring
+    | TRIM LEFT_PAREN trimOption=(BOTH | LEADING | TRAILING)? (trimStr=valueExpression)?
+       FROM srcStr=valueExpression RIGHT_PAREN                                                 #trim
+    | OVERLAY LEFT_PAREN input=valueExpression PLACING replace=valueExpression
+      FROM position=valueExpression (FOR length=valueExpression)? RIGHT_PAREN                  #overlay
+    | PERCENTILE_CONT LEFT_PAREN percentage=valueExpression RIGHT_PAREN
+      WITHIN GROUP LEFT_PAREN ORDER BY sortItem RIGHT_PAREN                                    #percentile
     ;
 
 constant
@@ -948,17 +896,18 @@ colPosition
     ;
 
 dataType
-    : complex=ARRAY '<' dataType '>'                            #complexDataType
-    | complex=MAP '<' dataType ',' dataType '>'                 #complexDataType
-    | complex=STRUCT ('<' complexColTypeList? '>' | NEQ)        #complexDataType
+    : complex=ARRAY LT dataType GT                              #complexDataType
+    | complex=MAP LT dataType COMMA dataType GT                 #complexDataType
+    | complex=STRUCT (LT complexColTypeList? GT | NEQ)          #complexDataType
     | INTERVAL from=(YEAR | MONTH) (TO to=MONTH)?               #yearMonthIntervalDataType
     | INTERVAL from=(DAY | HOUR | MINUTE | SECOND)
       (TO to=(HOUR | MINUTE | SECOND))?                         #dayTimeIntervalDataType
-    | identifier ('(' INTEGER_VALUE (',' INTEGER_VALUE)* ')')?  #primitiveDataType
+    | identifier (LEFT_PAREN INTEGER_VALUE
+      (COMMA INTEGER_VALUE)* RIGHT_PAREN)?                      #primitiveDataType
     ;
 
 qualifiedColTypeWithPositionList
-    : qualifiedColTypeWithPosition (',' qualifiedColTypeWithPosition)*
+    : qualifiedColTypeWithPosition (COMMA qualifiedColTypeWithPosition)*
     ;
 
 qualifiedColTypeWithPosition
@@ -966,7 +915,7 @@ qualifiedColTypeWithPosition
     ;
 
 colTypeList
-    : colType (',' colType)*
+    : colType (COMMA colType)*
     ;
 
 colType
@@ -974,11 +923,11 @@ colType
     ;
 
 complexColTypeList
-    : complexColType (',' complexColType)*
+    : complexColType (COMMA complexColType)*
     ;
 
 complexColType
-    : identifier ':'? dataType (NOT NULL)? commentSpec?
+    : identifier COLON? dataType (NOT NULL)? commentSpec?
     ;
 
 whenClause
@@ -986,7 +935,7 @@ whenClause
     ;
 
 windowClause
-    : WINDOW namedWindow (',' namedWindow)*
+    : WINDOW namedWindow (COMMA namedWindow)*
     ;
 
 namedWindow
@@ -994,14 +943,14 @@ namedWindow
     ;
 
 windowSpec
-    : name=errorCapturingIdentifier         #windowRef
-    | '('name=errorCapturingIdentifier')'   #windowRef
-    | '('
-      ( CLUSTER BY partition+=expression (',' partition+=expression)*
-      | ((PARTITION | DISTRIBUTE) BY partition+=expression (',' partition+=expression)*)?
-        ((ORDER | SORT) BY sortItem (',' sortItem)*)?)
+    : name=errorCapturingIdentifier                         #windowRef
+    | LEFT_PAREN name=errorCapturingIdentifier RIGHT_PAREN  #windowRef
+    | LEFT_PAREN
+      ( CLUSTER BY partition+=expression (COMMA partition+=expression)*
+      | ((PARTITION | DISTRIBUTE) BY partition+=expression (COMMA partition+=expression)*)?
+        ((ORDER | SORT) BY sortItem (COMMA sortItem)*)?)
       windowFrame?
-      ')'                                   #windowDef
+      RIGHT_PAREN                                           #windowDef
     ;
 
 windowFrame
@@ -1018,7 +967,7 @@ frameBound
     ;
 
 qualifiedNameList
-    : qualifiedName (',' qualifiedName)*
+    : qualifiedName (COMMA qualifiedName)*
     ;
 
 functionName
@@ -1029,7 +978,7 @@ functionName
     ;
 
 qualifiedName
-    : identifier ('.' identifier)*
+    : identifier (DOT identifier)*
     ;
 
 // this rule is used for explicitly capturing wrong identifiers such as test-table, which should actually be `test-table`
@@ -1592,401 +1541,3 @@ nonReserved
     | ZONE
 //--DEFAULT-NON-RESERVED-END
     ;
-
-// NOTE: If you add a new token in the list below, you should update the list of keywords
-// and reserved tag in `docs/sql-ref-ansi-compliance.md#sql-keywords`.
-
-//============================
-// Start of the keywords list
-//============================
-//--SPARK-KEYWORD-LIST-START
-ADD: 'ADD';
-AFTER: 'AFTER';
-ALL: 'ALL';
-ALTER: 'ALTER';
-ANALYZE: 'ANALYZE';
-AND: 'AND';
-ANTI: 'ANTI';
-ANY: 'ANY';
-ARCHIVE: 'ARCHIVE';
-ARRAY: 'ARRAY';
-AS: 'AS';
-ASC: 'ASC';
-AT: 'AT';
-AUTHORIZATION: 'AUTHORIZATION';
-BETWEEN: 'BETWEEN';
-BOTH: 'BOTH';
-BUCKET: 'BUCKET';
-BUCKETS: 'BUCKETS';
-BY: 'BY';
-CACHE: 'CACHE';
-CASCADE: 'CASCADE';
-CASE: 'CASE';
-CAST: 'CAST';
-CATALOG: 'CATALOG';
-CATALOGS: 'CATALOGS';
-CHANGE: 'CHANGE';
-CHECK: 'CHECK';
-CLEAR: 'CLEAR';
-CLUSTER: 'CLUSTER';
-CLUSTERED: 'CLUSTERED';
-CODEGEN: 'CODEGEN';
-COLLATE: 'COLLATE';
-COLLECTION: 'COLLECTION';
-COLUMN: 'COLUMN';
-COLUMNS: 'COLUMNS';
-COMMENT: 'COMMENT';
-COMMIT: 'COMMIT';
-COMPACT: 'COMPACT';
-COMPACTIONS: 'COMPACTIONS';
-COMPUTE: 'COMPUTE';
-CONCATENATE: 'CONCATENATE';
-CONSTRAINT: 'CONSTRAINT';
-COST: 'COST';
-CREATE: 'CREATE';
-CROSS: 'CROSS';
-CUBE: 'CUBE';
-CURRENT: 'CURRENT';
-CURRENT_DATE: 'CURRENT_DATE';
-CURRENT_TIME: 'CURRENT_TIME';
-CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP';
-CURRENT_USER: 'CURRENT_USER';
-DAY: 'DAY';
-DATA: 'DATA';
-DATABASE: 'DATABASE';
-DATABASES: 'DATABASES';
-DATEADD: 'DATEADD';
-DATE_ADD: 'DATE_ADD';
-DATEDIFF: 'DATEDIFF';
-DATE_DIFF: 'DATE_DIFF';
-DBPROPERTIES: 'DBPROPERTIES';
-DEFINED: 'DEFINED';
-DELETE: 'DELETE';
-DELIMITED: 'DELIMITED';
-DESC: 'DESC';
-DESCRIBE: 'DESCRIBE';
-DFS: 'DFS';
-DIRECTORIES: 'DIRECTORIES';
-DIRECTORY: 'DIRECTORY';
-DISTINCT: 'DISTINCT';
-DISTRIBUTE: 'DISTRIBUTE';
-DIV: 'DIV';
-DROP: 'DROP';
-ELSE: 'ELSE';
-END: 'END';
-ESCAPE: 'ESCAPE';
-ESCAPED: 'ESCAPED';
-EXCEPT: 'EXCEPT';
-EXCHANGE: 'EXCHANGE';
-EXISTS: 'EXISTS';
-EXPLAIN: 'EXPLAIN';
-EXPORT: 'EXPORT';
-EXTENDED: 'EXTENDED';
-EXTERNAL: 'EXTERNAL';
-EXTRACT: 'EXTRACT';
-FALSE: 'FALSE';
-FETCH: 'FETCH';
-FIELDS: 'FIELDS';
-FILTER: 'FILTER';
-FILEFORMAT: 'FILEFORMAT';
-FIRST: 'FIRST';
-FOLLOWING: 'FOLLOWING';
-FOR: 'FOR';
-FOREIGN: 'FOREIGN';
-FORMAT: 'FORMAT';
-FORMATTED: 'FORMATTED';
-FROM: 'FROM';
-FULL: 'FULL';
-FUNCTION: 'FUNCTION';
-FUNCTIONS: 'FUNCTIONS';
-GLOBAL: 'GLOBAL';
-GRANT: 'GRANT';
-GROUP: 'GROUP';
-GROUPING: 'GROUPING';
-HAVING: 'HAVING';
-HOUR: 'HOUR';
-IF: 'IF';
-IGNORE: 'IGNORE';
-IMPORT: 'IMPORT';
-IN: 'IN';
-INDEX: 'INDEX';
-INDEXES: 'INDEXES';
-INNER: 'INNER';
-INPATH: 'INPATH';
-INPUTFORMAT: 'INPUTFORMAT';
-INSERT: 'INSERT';
-INTERSECT: 'INTERSECT';
-INTERVAL: 'INTERVAL';
-INTO: 'INTO';
-IS: 'IS';
-ITEMS: 'ITEMS';
-JOIN: 'JOIN';
-KEYS: 'KEYS';
-LAST: 'LAST';
-LATERAL: 'LATERAL';
-LAZY: 'LAZY';
-LEADING: 'LEADING';
-LEFT: 'LEFT';
-LIKE: 'LIKE';
-ILIKE: 'ILIKE';
-LIMIT: 'LIMIT';
-LINES: 'LINES';
-LIST: 'LIST';
-LOAD: 'LOAD';
-LOCAL: 'LOCAL';
-LOCATION: 'LOCATION';
-LOCK: 'LOCK';
-LOCKS: 'LOCKS';
-LOGICAL: 'LOGICAL';
-MACRO: 'MACRO';
-MAP: 'MAP';
-MATCHED: 'MATCHED';
-MERGE: 'MERGE';
-MINUTE: 'MINUTE';
-MONTH: 'MONTH';
-MSCK: 'MSCK';
-NAMESPACE: 'NAMESPACE';
-NAMESPACES: 'NAMESPACES';
-NATURAL: 'NATURAL';
-NO: 'NO';
-NOT: 'NOT' | '!';
-NULL: 'NULL';
-NULLS: 'NULLS';
-OF: 'OF';
-ON: 'ON';
-ONLY: 'ONLY';
-OPTION: 'OPTION';
-OPTIONS: 'OPTIONS';
-OR: 'OR';
-ORDER: 'ORDER';
-OUT: 'OUT';
-OUTER: 'OUTER';
-OUTPUTFORMAT: 'OUTPUTFORMAT';
-OVER: 'OVER';
-OVERLAPS: 'OVERLAPS';
-OVERLAY: 'OVERLAY';
-OVERWRITE: 'OVERWRITE';
-PARTITION: 'PARTITION';
-PARTITIONED: 'PARTITIONED';
-PARTITIONS: 'PARTITIONS';
-PERCENTILE_CONT: 'PERCENTILE_CONT';
-PERCENTLIT: 'PERCENT';
-PIVOT: 'PIVOT';
-PLACING: 'PLACING';
-POSITION: 'POSITION';
-PRECEDING: 'PRECEDING';
-PRIMARY: 'PRIMARY';
-PRINCIPALS: 'PRINCIPALS';
-PROPERTIES: 'PROPERTIES';
-PURGE: 'PURGE';
-QUERY: 'QUERY';
-RANGE: 'RANGE';
-RECORDREADER: 'RECORDREADER';
-RECORDWRITER: 'RECORDWRITER';
-RECOVER: 'RECOVER';
-REDUCE: 'REDUCE';
-REFERENCES: 'REFERENCES';
-REFRESH: 'REFRESH';
-RENAME: 'RENAME';
-REPAIR: 'REPAIR';
-REPEATABLE: 'REPEATABLE';
-REPLACE: 'REPLACE';
-RESET: 'RESET';
-RESPECT: 'RESPECT';
-RESTRICT: 'RESTRICT';
-REVOKE: 'REVOKE';
-RIGHT: 'RIGHT';
-RLIKE: 'RLIKE' | 'REGEXP';
-ROLE: 'ROLE';
-ROLES: 'ROLES';
-ROLLBACK: 'ROLLBACK';
-ROLLUP: 'ROLLUP';
-ROW: 'ROW';
-ROWS: 'ROWS';
-SECOND: 'SECOND';
-SCHEMA: 'SCHEMA';
-SCHEMAS: 'SCHEMAS';
-SELECT: 'SELECT';
-SEMI: 'SEMI';
-SEPARATED: 'SEPARATED';
-SERDE: 'SERDE';
-SERDEPROPERTIES: 'SERDEPROPERTIES';
-SESSION_USER: 'SESSION_USER';
-SET: 'SET';
-SETMINUS: 'MINUS';
-SETS: 'SETS';
-SHOW: 'SHOW';
-SKEWED: 'SKEWED';
-SOME: 'SOME';
-SORT: 'SORT';
-SORTED: 'SORTED';
-START: 'START';
-STATISTICS: 'STATISTICS';
-STORED: 'STORED';
-STRATIFY: 'STRATIFY';
-STRUCT: 'STRUCT';
-SUBSTR: 'SUBSTR';
-SUBSTRING: 'SUBSTRING';
-SYNC: 'SYNC';
-SYSTEM_TIME: 'SYSTEM_TIME';
-SYSTEM_VERSION: 'SYSTEM_VERSION';
-TABLE: 'TABLE';
-TABLES: 'TABLES';
-TABLESAMPLE: 'TABLESAMPLE';
-TBLPROPERTIES: 'TBLPROPERTIES';
-TEMPORARY: 'TEMPORARY' | 'TEMP';
-TERMINATED: 'TERMINATED';
-THEN: 'THEN';
-TIME: 'TIME';
-TIMESTAMP: 'TIMESTAMP';
-TIMESTAMPADD: 'TIMESTAMPADD';
-TIMESTAMPDIFF: 'TIMESTAMPDIFF';
-TO: 'TO';
-TOUCH: 'TOUCH';
-TRAILING: 'TRAILING';
-TRANSACTION: 'TRANSACTION';
-TRANSACTIONS: 'TRANSACTIONS';
-TRANSFORM: 'TRANSFORM';
-TRIM: 'TRIM';
-TRUE: 'TRUE';
-TRUNCATE: 'TRUNCATE';
-TRY_CAST: 'TRY_CAST';
-TYPE: 'TYPE';
-UNARCHIVE: 'UNARCHIVE';
-UNBOUNDED: 'UNBOUNDED';
-UNCACHE: 'UNCACHE';
-UNION: 'UNION';
-UNIQUE: 'UNIQUE';
-UNKNOWN: 'UNKNOWN';
-UNLOCK: 'UNLOCK';
-UNSET: 'UNSET';
-UPDATE: 'UPDATE';
-USE: 'USE';
-USER: 'USER';
-USING: 'USING';
-VALUES: 'VALUES';
-VERSION: 'VERSION';
-VIEW: 'VIEW';
-VIEWS: 'VIEWS';
-WHEN: 'WHEN';
-WHERE: 'WHERE';
-WINDOW: 'WINDOW';
-WITH: 'WITH';
-WITHIN: 'WITHIN';
-YEAR: 'YEAR';
-ZONE: 'ZONE';
-//--SPARK-KEYWORD-LIST-END
-//============================
-// End of the keywords list
-//============================
-
-EQ  : '=' | '==';
-NSEQ: '<=>';
-NEQ : '<>';
-NEQJ: '!=';
-LT  : '<';
-LTE : '<=' | '!>';
-GT  : '>';
-GTE : '>=' | '!<';
-
-PLUS: '+';
-MINUS: '-';
-ASTERISK: '*';
-SLASH: '/';
-PERCENT: '%';
-TILDE: '~';
-AMPERSAND: '&';
-PIPE: '|';
-CONCAT_PIPE: '||';
-HAT: '^';
-
-STRING
-    : '\'' ( ~('\''|'\\') | ('\\' .) )* '\''
-    | '"' ( ~('"'|'\\') | ('\\' .) )* '"'
-    | 'R\'' (~'\'')* '\''
-    | 'R"'(~'"')* '"'
-    ;
-
-BIGINT_LITERAL
-    : DIGIT+ 'L'
-    ;
-
-SMALLINT_LITERAL
-    : DIGIT+ 'S'
-    ;
-
-TINYINT_LITERAL
-    : DIGIT+ 'Y'
-    ;
-
-INTEGER_VALUE
-    : DIGIT+
-    ;
-
-EXPONENT_VALUE
-    : DIGIT+ EXPONENT
-    | DECIMAL_DIGITS EXPONENT {isValidDecimal()}?
-    ;
-
-DECIMAL_VALUE
-    : DECIMAL_DIGITS {isValidDecimal()}?
-    ;
-
-FLOAT_LITERAL
-    : DIGIT+ EXPONENT? 'F'
-    | DECIMAL_DIGITS EXPONENT? 'F' {isValidDecimal()}?
-    ;
-
-DOUBLE_LITERAL
-    : DIGIT+ EXPONENT? 'D'
-    | DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}?
-    ;
-
-BIGDECIMAL_LITERAL
-    : DIGIT+ EXPONENT? 'BD'
-    | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}?
-    ;
-
-IDENTIFIER
-    : (LETTER | DIGIT | '_')+
-    ;
-
-BACKQUOTED_IDENTIFIER
-    : '`' ( ~'`' | '``' )* '`'
-    ;
-
-fragment DECIMAL_DIGITS
-    : DIGIT+ '.' DIGIT*
-    | '.' DIGIT+
-    ;
-
-fragment EXPONENT
-    : 'E' [+-]? DIGIT+
-    ;
-
-fragment DIGIT
-    : [0-9]
-    ;
-
-fragment LETTER
-    : [A-Z]
-    ;
-
-SIMPLE_COMMENT
-    : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN)
-    ;
-
-BRACKETED_COMMENT
-    : '/*' {!isHint()}? ( BRACKETED_COMMENT | . )*? ('*/' | {markUnclosedComment();} EOF) -> channel(HIDDEN)
-    ;
-
-WS
-    : [ \r\n\t]+ -> channel(HIDDEN)
-    ;
-
-// Catch-all for anything we can't recognize.
-// We use this to be able to ignore and recover all the text
-// when splitting statements with DelimiterLexer
-UNRECOGNIZED
-    : .
-    ;
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 47c03aa316ed6..2e56df7ba7bf5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -54,7 +54,7 @@ import org.apache.spark.util.random.RandomSampler
  * The AstBuilder converts an ANTLR4 ParseTree into a catalyst Expression, LogicalPlan or
  * TableIdentifier.
  */
-class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logging {
+class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper with Logging {
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
   import ParserUtils._
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index 1057c78f3c282..22532ed2ec305 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -275,7 +275,7 @@ class ParseException(
 /**
  * The post-processor validates & cleans-up the parse tree during the parse process.
  */
-case object PostProcessor extends SqlBaseBaseListener {
+case object PostProcessor extends SqlBaseParserBaseListener {
 
   /** Throws error message when exiting a explicitly captured wrong identifier rule */
   override def exitErrorIdent(ctx: SqlBaseParser.ErrorIdentContext): Unit = {
@@ -319,7 +319,7 @@ case object PostProcessor extends SqlBaseBaseListener {
  * The post-processor checks the unclosed bracketed comment.
  */
 case class UnclosedCommentProcessor(
-    command: String, tokenStream: CommonTokenStream) extends SqlBaseBaseListener {
+    command: String, tokenStream: CommonTokenStream) extends SqlBaseParserBaseListener {
 
   override def exitSingleDataType(ctx: SqlBaseParser.SingleDataTypeContext): Unit = {
     checkUnclosedComment(tokenStream, command)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala
index e9d30156951d2..0c1c9d5bfeeaf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala
@@ -30,11 +30,15 @@ import org.apache.spark.sql.catalyst.util.fileToString
 trait SQLKeywordUtils extends SparkFunSuite with SQLHelper {
 
   val sqlSyntaxDefs = {
-    val sqlBasePath = {
+    val sqlBaseParserPath =
       getWorkspaceFilePath("sql", "catalyst", "src", "main", "antlr4", "org",
-        "apache", "spark", "sql", "catalyst", "parser", "SqlBase.g4").toFile
-    }
-    fileToString(sqlBasePath).split("\n")
+        "apache", "spark", "sql", "catalyst", "parser", "SqlBaseParser.g4").toFile
+
+    val sqlBaseLexerPath =
+      getWorkspaceFilePath("sql", "catalyst", "src", "main", "antlr4", "org",
+        "apache", "spark", "sql", "catalyst", "parser", "SqlBaseLexer.g4").toFile
+
+    (fileToString(sqlBaseParserPath) + fileToString(sqlBaseLexerPath)).split("\n")
   }
 
   // each element is an array of 4 string: the keyword name, reserve or not in Spark ANSI mode,
@@ -67,8 +71,9 @@ trait SQLKeywordUtils extends SparkFunSuite with SQLHelper {
         }
       }
     }
-    assert(keywords.nonEmpty && startTagFound && parseFinished, "cannot extract keywords from " +
-      s"the `SqlBase.g4` file, so please check if the start/end tags (`$startTag` and `$endTag`) " +
+    assert(keywords.nonEmpty && startTagFound && parseFinished,
+      "cannot extract keywords from the `SqlBaseParser.g4` or `SqlBaseLexer.g4` file, " +
+      s"so please check if the start/end tags (`$startTag` and `$endTag`) " +
       "are placed correctly in the file.")
     keywords.toSet
   }

From b81d90b98c45f7a6c53a4ea434cc45ef6d9ba150 Mon Sep 17 00:00:00 2001
From: Bo Zhang <bo.zhang@databricks.com>
Date: Thu, 3 Mar 2022 21:52:19 +0800
Subject: [PATCH 388/513] [SPARK-38312][CORE] Use error class in GraphiteSink

### What changes were proposed in this pull request?
This change is to refactor exceptions thrown in GraphiteSink to use error class framework.

### Why are the changes needed?
This is to follow the error class framework.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added unit tests.

Closes #35643 from bozhang2820/error-class.

Authored-by: Bo Zhang <bo.zhang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../main/resources/error/error-classes.json   |  6 +++
 .../apache/spark/errors/SparkCoreErrors.scala | 10 +++++
 .../spark/metrics/sink/GraphiteSink.scala     |  7 ++--
 .../metrics/sink/GraphiteSinkSuite.scala      | 40 ++++++++++++++++++-
 4 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 48812b95f7129..cbdfe5f991374 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -43,6 +43,12 @@
   "FAILED_SET_ORIGINAL_PERMISSION_BACK" : {
     "message" : [ "Failed to set original permission %s back to the created path: %s. Exception: %s" ]
   },
+  "GRAPHITE_SINK_INVALID_PROTOCOL" : {
+    "message" : [ "Invalid Graphite protocol: %s" ]
+  },
+  "GRAPHITE_SINK_PROPERTY_MISSING" : {
+    "message" : [ "Graphite sink requires '%s' property." ]
+  },
   "GROUPING_COLUMN_MISMATCH" : {
     "message" : [ "Column of grouping (%s) can't be found in grouping columns %s" ],
     "sqlState" : "42000"
diff --git a/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala b/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala
index 95925deca0c30..aecef8ed2d63d 100644
--- a/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala
+++ b/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala
@@ -315,4 +315,14 @@ object SparkCoreErrors {
   def failToGetNonShuffleBlockError(blockId: BlockId, e: Throwable): Throwable = {
     new SparkException(s"Failed to get block $blockId, which is not a shuffle block", e)
   }
+
+  def graphiteSinkInvalidProtocolError(invalidProtocol: String): Throwable = {
+    new SparkException(errorClass = "GRAPHITE_SINK_INVALID_PROTOCOL",
+      messageParameters = Array(invalidProtocol), cause = null)
+  }
+
+  def graphiteSinkPropertyMissingError(missingProperty: String): Throwable = {
+    new SparkException(errorClass = "GRAPHITE_SINK_PROPERTY_MISSING",
+      messageParameters = Array(missingProperty), cause = null)
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala
index 1c59e191db531..13460954c061c 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala
@@ -23,6 +23,7 @@ import java.util.concurrent.TimeUnit
 import com.codahale.metrics.{Metric, MetricFilter, MetricRegistry}
 import com.codahale.metrics.graphite.{Graphite, GraphiteReporter, GraphiteUDP}
 
+import org.apache.spark.errors.SparkCoreErrors
 import org.apache.spark.metrics.MetricsSystem
 
 private[spark] class GraphiteSink(
@@ -42,11 +43,11 @@ private[spark] class GraphiteSink(
   def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop))
 
   if (!propertyToOption(GRAPHITE_KEY_HOST).isDefined) {
-    throw new Exception("Graphite sink requires 'host' property.")
+    throw SparkCoreErrors.graphiteSinkPropertyMissingError("host")
   }
 
   if (!propertyToOption(GRAPHITE_KEY_PORT).isDefined) {
-    throw new Exception("Graphite sink requires 'port' property.")
+    throw SparkCoreErrors.graphiteSinkPropertyMissingError("port")
   }
 
   val host = propertyToOption(GRAPHITE_KEY_HOST).get
@@ -69,7 +70,7 @@ private[spark] class GraphiteSink(
   val graphite = propertyToOption(GRAPHITE_KEY_PROTOCOL).map(_.toLowerCase(Locale.ROOT)) match {
     case Some("udp") => new GraphiteUDP(host, port)
     case Some("tcp") | None => new Graphite(host, port)
-    case Some(p) => throw new Exception(s"Invalid Graphite protocol: $p")
+    case Some(p) => throw SparkCoreErrors.graphiteSinkInvalidProtocolError(p)
   }
 
   val filter = propertyToOption(GRAPHITE_KEY_REGEX) match {
diff --git a/core/src/test/scala/org/apache/spark/metrics/sink/GraphiteSinkSuite.scala b/core/src/test/scala/org/apache/spark/metrics/sink/GraphiteSinkSuite.scala
index cf34121fe73dc..3a6e7f4c12472 100644
--- a/core/src/test/scala/org/apache/spark/metrics/sink/GraphiteSinkSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/sink/GraphiteSinkSuite.scala
@@ -23,7 +23,7 @@ import scala.collection.JavaConverters._
 
 import com.codahale.metrics._
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkException, SparkFunSuite}
 
 class GraphiteSinkSuite extends SparkFunSuite {
 
@@ -79,4 +79,42 @@ class GraphiteSinkSuite extends SparkFunSuite {
     assert(metricKeys.equals(filteredMetricKeys),
       "Should contain only metrics matches regex filter")
   }
+
+  test("GraphiteSink without host") {
+    val props = new Properties
+    props.put("port", "54321")
+    val registry = new MetricRegistry
+
+    val e = intercept[SparkException] {
+      new GraphiteSink(props, registry)
+    }
+    assert(e.getErrorClass === "GRAPHITE_SINK_PROPERTY_MISSING")
+    assert(e.getMessage === "Graphite sink requires 'host' property.")
+  }
+
+  test("GraphiteSink without port") {
+    val props = new Properties
+    props.put("host", "127.0.0.1")
+    val registry = new MetricRegistry
+
+    val e = intercept[SparkException] {
+      new GraphiteSink(props, registry)
+    }
+    assert(e.getErrorClass === "GRAPHITE_SINK_PROPERTY_MISSING")
+    assert(e.getMessage === "Graphite sink requires 'port' property.")
+  }
+
+  test("GraphiteSink with invalid protocol") {
+    val props = new Properties
+    props.put("host", "127.0.0.1")
+    props.put("port", "54321")
+    props.put("protocol", "http")
+    val registry = new MetricRegistry
+
+    val e = intercept[SparkException] {
+      new GraphiteSink(props, registry)
+    }
+    assert(e.getErrorClass === "GRAPHITE_SINK_INVALID_PROTOCOL")
+    assert(e.getMessage === "Invalid Graphite protocol: http")
+  }
 }

From 34618a7ef6a1c25e9ba2f91382c8bfc22b581dbf Mon Sep 17 00:00:00 2001
From: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
Date: Thu, 3 Mar 2022 08:28:59 -0600
Subject: [PATCH 389/513] [SPARK-38351][TESTS] Don't use deprecate symbol API
 in test classes

### What changes were proposed in this pull request?

Replace symbols like `'abc` with the more verbose `Symbol("abc") in the test code.

### Why are the changes needed?

Building with Scala 2.13 produces a lot of warnings like the following ones:

```
[warn] /home/runner/work/spark/spark/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala:562:11: [deprecation   | origin= | version=2.13.0] symbol literal is deprecated; use Symbol("d") instead
[warn]           'd.cast("string"),
[warn]           ^
[warn] /home/runner/work/spark/spark/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala:563:11: [deprecation   | origin= | version=2.13.0] symbol literal is deprecated; use Symbol("e") instead
[warn]           'e.cast("string")).collect())
```

This should make it easier to upgrade to Scala 3 later.

### Does this PR introduce _any_ user-facing change?

No! The PR touches only test classes!

### How was this patch tested?

The build at CI must be green!

Closes #35560 from martin-g/dont-use-deprecate-symbol-api.

Authored-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../spark/sql/ColumnExpressionSuite.scala     | 108 +++++-----
 .../sql/DataFrameSessionWindowingSuite.scala  |   8 +-
 .../org/apache/spark/sql/ExplainSuite.scala   |   5 +-
 .../spark/sql/FileBasedDataSourceSuite.scala  |  67 +++---
 .../org/apache/spark/sql/FileScanSuite.scala  |   9 +-
 .../org/apache/spark/sql/JoinSuite.scala      |   8 +-
 .../apache/spark/sql/JsonFunctionsSuite.scala |   2 +-
 .../apache/spark/sql/MathFunctionsSuite.scala |  36 ++--
 .../org/apache/spark/sql/SQLQuerySuite.scala  |   8 +-
 .../spark/sql/StatisticsCollectionSuite.scala |   6 +-
 .../scala/org/apache/spark/sql/UDFSuite.scala |   6 +-
 .../spark/sql/UserDefinedTypeSuite.scala      |   9 +-
 ...SourceV2DataFrameSessionCatalogSuite.scala |   2 +-
 .../DataSourceV2DataFrameSuite.scala          |   2 +-
 .../sql/connector/DataSourceV2Suite.scala     |  95 +++++----
 .../SupportsCatalogOptionsSuite.scala         |   6 +-
 .../AggregatingAccumulatorSuite.scala         |   6 +-
 .../BaseScriptTransformationSuite.scala       |  28 +--
 .../execution/CoGroupedIteratorSuite.scala    |  11 +-
 .../sql/execution/GroupedIteratorSuite.scala  |   6 +-
 .../spark/sql/execution/PlannerSuite.scala    |  31 +--
 .../execution/RemoveRedundantSortsSuite.scala |  16 +-
 .../spark/sql/execution/SQLViewSuite.scala    |   3 +-
 .../spark/sql/execution/SortSuite.scala       |  23 ++-
 .../sql/execution/SparkSqlParserSuite.scala   |  20 +-
 .../SubExprEliminationBenchmark.scala         |   4 +-
 .../TakeOrderedAndProjectSuite.scala          |   2 +-
 .../execution/WholeStageCodegenSuite.scala    |   8 +-
 .../adaptive/AdaptiveQueryExecSuite.scala     |  64 +++---
 .../execution/benchmark/RangeBenchmark.scala  |   2 +-
 .../columnar/InMemoryColumnarQuerySuite.scala |   2 +-
 .../execution/command/DDLParserSuite.scala    |   8 +-
 .../sql/execution/command/DDLSuite.scala      |   4 +-
 .../datasources/DataSourceStrategySuite.scala |  22 +-
 .../datasources/DataSourceSuite.scala         |   3 +-
 .../datasources/FileFormatWriterSuite.scala   |   7 +-
 .../datasources/FileSourceStrategySuite.scala |  12 +-
 .../datasources/SchemaPruningSuite.scala      |   6 +-
 .../execution/datasources/csv/CSVSuite.scala  |   2 +-
 .../datasources/json/JsonBenchmark.scala      |   2 +-
 .../datasources/noop/NoopStreamSuite.scala    |   2 +-
 .../datasources/noop/NoopSuite.scala          |   2 +-
 .../datasources/orc/OrcQuerySuite.scala       |  28 +--
 .../parquet/ParquetFilterSuite.scala          |  14 +-
 .../datasources/parquet/ParquetIOSuite.scala  |  12 +-
 .../ParquetPartitionDiscoverySuite.scala      |   3 +-
 .../parquet/ParquetQuerySuite.scala           |   8 +-
 .../parquet/ParquetSchemaSuite.scala          |   3 +-
 .../v2/DataSourceV2StrategySuite.scala        |   2 +-
 .../exchange/ValidateRequirementsSuite.scala  |  20 +-
 .../execution/joins/BroadcastJoinSuite.scala  |   4 +-
 .../execution/metric/SQLMetricsSuite.scala    |  29 +--
 .../streaming/MicroBatchExecutionSuite.scala  |   8 +-
 .../sources/ConsoleWriteSupportSuite.scala    |   2 +-
 .../sources/ForeachWriterSuite.scala          |   8 +-
 .../RatePerMicroBatchProviderSuite.scala      |   4 +-
 .../sources/RateStreamProviderSuite.scala     |   2 +-
 .../RocksDBStateStoreIntegrationSuite.scala   |   8 +-
 .../ui/SQLAppStatusListenerSuite.scala        |   6 +-
 .../internal/ExecutorSideSQLConfSuite.scala   |   2 +-
 .../sql/sources/DataSourceAnalysisSuite.scala |  18 +-
 .../streaming/EventTimeWatermarkSuite.scala   |  56 ++---
 .../spark/sql/streaming/StreamSuite.scala     |   2 +-
 .../streaming/StreamingAggregationSuite.scala |  28 +--
 .../StreamingDeduplicationSuite.scala         |   4 +-
 .../sql/streaming/StreamingJoinSuite.scala    | 191 ++++++++++--------
 ...StreamingQueryStatusAndProgressSuite.scala |   2 +-
 .../sql/streaming/StreamingQuerySuite.scala   |   4 +-
 .../StreamingSessionWindowSuite.scala         |   6 +-
 .../continuous/ContinuousSuite.scala          |  10 +-
 .../test/DataStreamReaderWriterSuite.scala    |   5 +-
 .../sql/test/DataFrameReaderWriterSuite.scala |   3 +-
 .../status/api/v1/sql/SqlResourceSuite.scala  |   4 +-
 73 files changed, 624 insertions(+), 545 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index fe56bcb99117e..baf46b3c54c55 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -934,14 +934,14 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("SPARK-37646: lit") {
     assert(lit($"foo") == $"foo")
-    assert(lit('foo) == $"foo")
+    assert(lit(Symbol("foo")) == $"foo")
     assert(lit(1) == Column(Literal(1)))
     assert(lit(null) == Column(Literal(null, NullType)))
   }
 
   test("typedLit") {
     assert(typedLit($"foo") == $"foo")
-    assert(typedLit('foo) == $"foo")
+    assert(typedLit(Symbol("foo")) == $"foo")
     assert(typedLit(1) == Column(Literal(1)))
     assert(typedLit[String](null) == Column(Literal(null, StringType)))
 
@@ -1029,17 +1029,17 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should throw an exception if any intermediate structs don't exist") {
     intercept[AnalysisException] {
-      structLevel2.withColumn("a", 'a.withField("x.b", lit(2)))
+      structLevel2.withColumn("a", Symbol("a").withField("x.b", lit(2)))
     }.getMessage should include("No such struct field x in a")
 
     intercept[AnalysisException] {
-      structLevel3.withColumn("a", 'a.withField("a.x.b", lit(2)))
+      structLevel3.withColumn("a", Symbol("a").withField("a.x.b", lit(2)))
     }.getMessage should include("No such struct field x in a")
   }
 
   test("withField should throw an exception if intermediate field is not a struct") {
     intercept[AnalysisException] {
-      structLevel1.withColumn("a", 'a.withField("b.a", lit(2)))
+      structLevel1.withColumn("a", Symbol("a").withField("b.a", lit(2)))
     }.getMessage should include("struct argument should be struct type, got: int")
   }
 
@@ -1053,7 +1053,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             StructField("a", structType, nullable = false))),
             nullable = false))))
 
-      structLevel2.withColumn("a", 'a.withField("a.b", lit(2)))
+      structLevel2.withColumn("a", Symbol("a").withField("a.b", lit(2)))
     }.getMessage should include("Ambiguous reference to fields")
   }
 
@@ -1072,7 +1072,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should add field to struct") {
     checkAnswer(
-      structLevel1.withColumn("a", 'a.withField("d", lit(4))),
+      structLevel1.withColumn("a", Symbol("a").withField("d", lit(4))),
       Row(Row(1, null, 3, 4)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1113,7 +1113,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should add null field to struct") {
     checkAnswer(
-      structLevel1.withColumn("a", 'a.withField("d", lit(null).cast(IntegerType))),
+      structLevel1.withColumn("a", Symbol("a").withField("d", lit(null).cast(IntegerType))),
       Row(Row(1, null, 3, null)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1126,7 +1126,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should add multiple fields to struct") {
     checkAnswer(
-      structLevel1.withColumn("a", 'a.withField("d", lit(4)).withField("e", lit(5))),
+      structLevel1.withColumn("a", Symbol("a").withField("d", lit(4)).withField("e", lit(5))),
       Row(Row(1, null, 3, 4, 5)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1140,7 +1140,8 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should add multiple fields to nullable struct") {
     checkAnswer(
-      nullableStructLevel1.withColumn("a", 'a.withField("d", lit(4)).withField("e", lit(5))),
+      nullableStructLevel1.withColumn("a", Symbol("a")
+        .withField("d", lit(4)).withField("e", lit(5))),
       Row(null) :: Row(Row(1, null, 3, 4, 5)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1154,8 +1155,8 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should add field to nested struct") {
     Seq(
-      structLevel2.withColumn("a", 'a.withField("a.d", lit(4))),
-      structLevel2.withColumn("a", 'a.withField("a", $"a.a".withField("d", lit(4))))
+      structLevel2.withColumn("a", Symbol("a").withField("a.d", lit(4))),
+      structLevel2.withColumn("a", Symbol("a").withField("a", $"a.a".withField("d", lit(4))))
     ).foreach { df =>
       checkAnswer(
         df,
@@ -1216,7 +1217,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should add field to deeply nested struct") {
     checkAnswer(
-      structLevel3.withColumn("a", 'a.withField("a.a.d", lit(4))),
+      structLevel3.withColumn("a", Symbol("a").withField("a.a.d", lit(4))),
       Row(Row(Row(Row(1, null, 3, 4)))) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1233,7 +1234,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should replace field in struct") {
     checkAnswer(
-      structLevel1.withColumn("a", 'a.withField("b", lit(2))),
+      structLevel1.withColumn("a", Symbol("a").withField("b", lit(2))),
       Row(Row(1, 2, 3)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1245,7 +1246,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should replace field in nullable struct") {
     checkAnswer(
-      nullableStructLevel1.withColumn("a", 'a.withField("b", lit("foo"))),
+      nullableStructLevel1.withColumn("a", Symbol("a").withField("b", lit("foo"))),
       Row(null) :: Row(Row(1, "foo", 3)) ::  Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1271,7 +1272,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should replace field with null value in struct") {
     checkAnswer(
-      structLevel1.withColumn("a", 'a.withField("c", lit(null).cast(IntegerType))),
+      structLevel1.withColumn("a", Symbol("a").withField("c", lit(null).cast(IntegerType))),
       Row(Row(1, null, null)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1283,7 +1284,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should replace multiple fields in struct") {
     checkAnswer(
-      structLevel1.withColumn("a", 'a.withField("a", lit(10)).withField("b", lit(20))),
+      structLevel1.withColumn("a", Symbol("a").withField("a", lit(10)).withField("b", lit(20))),
       Row(Row(10, 20, 3)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1295,7 +1296,8 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should replace multiple fields in nullable struct") {
     checkAnswer(
-      nullableStructLevel1.withColumn("a", 'a.withField("a", lit(10)).withField("b", lit(20))),
+      nullableStructLevel1.withColumn("a", Symbol("a").withField("a", lit(10))
+        .withField("b", lit(20))),
       Row(null) :: Row(Row(10, 20, 3)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1308,7 +1310,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   test("withField should replace field in nested struct") {
     Seq(
       structLevel2.withColumn("a", $"a".withField("a.b", lit(2))),
-      structLevel2.withColumn("a", 'a.withField("a", $"a.a".withField("b", lit(2))))
+      structLevel2.withColumn("a", Symbol("a").withField("a", $"a.a".withField("b", lit(2))))
     ).foreach { df =>
       checkAnswer(
         df,
@@ -1389,7 +1391,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
           nullable = false))))
 
     checkAnswer(
-      structLevel1.withColumn("a", 'a.withField("b", lit(100))),
+      structLevel1.withColumn("a", Symbol("a").withField("b", lit(100))),
       Row(Row(1, 100, 100)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1401,7 +1403,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should replace fields in struct in given order") {
     checkAnswer(
-      structLevel1.withColumn("a", 'a.withField("b", lit(2)).withField("b", lit(20))),
+      structLevel1.withColumn("a", Symbol("a").withField("b", lit(2)).withField("b", lit(20))),
       Row(Row(1, 20, 3)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1413,7 +1415,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should add field and then replace same field in struct") {
     checkAnswer(
-      structLevel1.withColumn("a", 'a.withField("d", lit(4)).withField("d", lit(5))),
+      structLevel1.withColumn("a", Symbol("a").withField("d", lit(4)).withField("d", lit(5))),
       Row(Row(1, null, 3, 5)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1437,7 +1439,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
           nullable = false))))
 
     checkAnswer(
-      df.withColumn("a", 'a.withField("`a.b`.`e.f`", lit(2))),
+      df.withColumn("a", Symbol("a").withField("`a.b`.`e.f`", lit(2))),
       Row(Row(Row(1, 2, 3))) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1449,7 +1451,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
           nullable = false))))
 
     intercept[AnalysisException] {
-      df.withColumn("a", 'a.withField("a.b.e.f", lit(2)))
+      df.withColumn("a", Symbol("a").withField("a.b.e.f", lit(2)))
     }.getMessage should include("No such struct field a in a.b")
   }
 
@@ -1464,7 +1466,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   test("withField should replace field in struct even if casing is different") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
       checkAnswer(
-        mixedCaseStructLevel1.withColumn("a", 'a.withField("A", lit(2))),
+        mixedCaseStructLevel1.withColumn("a", Symbol("a").withField("A", lit(2))),
         Row(Row(2, 1)) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1473,7 +1475,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             nullable = false))))
 
       checkAnswer(
-        mixedCaseStructLevel1.withColumn("a", 'a.withField("b", lit(2))),
+        mixedCaseStructLevel1.withColumn("a", Symbol("a").withField("b", lit(2))),
         Row(Row(1, 2)) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1486,7 +1488,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   test("withField should add field to struct because casing is different") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
       checkAnswer(
-        mixedCaseStructLevel1.withColumn("a", 'a.withField("A", lit(2))),
+        mixedCaseStructLevel1.withColumn("a", Symbol("a").withField("A", lit(2))),
         Row(Row(1, 1, 2)) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1496,7 +1498,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             nullable = false))))
 
       checkAnswer(
-        mixedCaseStructLevel1.withColumn("a", 'a.withField("b", lit(2))),
+        mixedCaseStructLevel1.withColumn("a", Symbol("a").withField("b", lit(2))),
         Row(Row(1, 1, 2)) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1524,7 +1526,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   test("withField should replace nested field in struct even if casing is different") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
       checkAnswer(
-        mixedCaseStructLevel2.withColumn("a", 'a.withField("A.a", lit(2))),
+        mixedCaseStructLevel2.withColumn("a", Symbol("a").withField("A.a", lit(2))),
         Row(Row(Row(2, 1), Row(1, 1))) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1539,7 +1541,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             nullable = false))))
 
       checkAnswer(
-        mixedCaseStructLevel2.withColumn("a", 'a.withField("b.a", lit(2))),
+        mixedCaseStructLevel2.withColumn("a", Symbol("a").withField("b.a", lit(2))),
         Row(Row(Row(1, 1), Row(2, 1))) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1558,11 +1560,11 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   test("withField should throw an exception because casing is different") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
       intercept[AnalysisException] {
-        mixedCaseStructLevel2.withColumn("a", 'a.withField("A.a", lit(2)))
+        mixedCaseStructLevel2.withColumn("a", Symbol("a").withField("A.a", lit(2)))
       }.getMessage should include("No such struct field A in a, B")
 
       intercept[AnalysisException] {
-        mixedCaseStructLevel2.withColumn("a", 'a.withField("b.a", lit(2)))
+        mixedCaseStructLevel2.withColumn("a", Symbol("a").withField("b.a", lit(2)))
       }.getMessage should include("No such struct field b in a, B")
     }
   }
@@ -1769,17 +1771,17 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("dropFields should throw an exception if any intermediate structs don't exist") {
     intercept[AnalysisException] {
-      structLevel2.withColumn("a", 'a.dropFields("x.b"))
+      structLevel2.withColumn("a", Symbol("a").dropFields("x.b"))
     }.getMessage should include("No such struct field x in a")
 
     intercept[AnalysisException] {
-      structLevel3.withColumn("a", 'a.dropFields("a.x.b"))
+      structLevel3.withColumn("a", Symbol("a").dropFields("a.x.b"))
     }.getMessage should include("No such struct field x in a")
   }
 
   test("dropFields should throw an exception if intermediate field is not a struct") {
     intercept[AnalysisException] {
-      structLevel1.withColumn("a", 'a.dropFields("b.a"))
+      structLevel1.withColumn("a", Symbol("a").dropFields("b.a"))
     }.getMessage should include("struct argument should be struct type, got: int")
   }
 
@@ -1793,13 +1795,13 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             StructField("a", structType, nullable = false))),
             nullable = false))))
 
-      structLevel2.withColumn("a", 'a.dropFields("a.b"))
+      structLevel2.withColumn("a", Symbol("a").dropFields("a.b"))
     }.getMessage should include("Ambiguous reference to fields")
   }
 
   test("dropFields should drop field in struct") {
     checkAnswer(
-      structLevel1.withColumn("a", 'a.dropFields("b")),
+      structLevel1.withColumn("a", Symbol("a").dropFields("b")),
       Row(Row(1, 3)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1822,7 +1824,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   test("dropFields should drop multiple fields in struct") {
     Seq(
       structLevel1.withColumn("a", $"a".dropFields("b", "c")),
-      structLevel1.withColumn("a", 'a.dropFields("b").dropFields("c"))
+      structLevel1.withColumn("a", Symbol("a").dropFields("b").dropFields("c"))
     ).foreach { df =>
       checkAnswer(
         df,
@@ -1836,7 +1838,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("dropFields should throw an exception if no fields will be left in struct") {
     intercept[AnalysisException] {
-      structLevel1.withColumn("a", 'a.dropFields("a", "b", "c"))
+      structLevel1.withColumn("a", Symbol("a").dropFields("a", "b", "c"))
     }.getMessage should include("cannot drop all fields in struct")
   }
 
@@ -1860,7 +1862,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("dropFields should drop field in nested struct") {
     checkAnswer(
-      structLevel2.withColumn("a", 'a.dropFields("a.b")),
+      structLevel2.withColumn("a", Symbol("a").dropFields("a.b")),
       Row(Row(Row(1, 3))) :: Nil,
       StructType(
         Seq(StructField("a", StructType(Seq(
@@ -1873,7 +1875,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("dropFields should drop multiple fields in nested struct") {
     checkAnswer(
-      structLevel2.withColumn("a", 'a.dropFields("a.b", "a.c")),
+      structLevel2.withColumn("a", Symbol("a").dropFields("a.b", "a.c")),
       Row(Row(Row(1))) :: Nil,
       StructType(
         Seq(StructField("a", StructType(Seq(
@@ -1910,7 +1912,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("dropFields should drop field in deeply nested struct") {
     checkAnswer(
-      structLevel3.withColumn("a", 'a.dropFields("a.a.b")),
+      structLevel3.withColumn("a", Symbol("a").dropFields("a.a.b")),
       Row(Row(Row(Row(1, 3)))) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1934,7 +1936,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
           nullable = false))))
 
     checkAnswer(
-      structLevel1.withColumn("a", 'a.dropFields("b")),
+      structLevel1.withColumn("a", Symbol("a").dropFields("b")),
       Row(Row(1)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1945,7 +1947,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   test("dropFields should drop field in struct even if casing is different") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
       checkAnswer(
-        mixedCaseStructLevel1.withColumn("a", 'a.dropFields("A")),
+        mixedCaseStructLevel1.withColumn("a", Symbol("a").dropFields("A")),
         Row(Row(1)) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1953,7 +1955,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             nullable = false))))
 
       checkAnswer(
-        mixedCaseStructLevel1.withColumn("a", 'a.dropFields("b")),
+        mixedCaseStructLevel1.withColumn("a", Symbol("a").dropFields("b")),
         Row(Row(1)) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1965,7 +1967,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   test("dropFields should not drop field in struct because casing is different") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
       checkAnswer(
-        mixedCaseStructLevel1.withColumn("a", 'a.dropFields("A")),
+        mixedCaseStructLevel1.withColumn("a", Symbol("a").dropFields("A")),
         Row(Row(1, 1)) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1974,7 +1976,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             nullable = false))))
 
       checkAnswer(
-        mixedCaseStructLevel1.withColumn("a", 'a.dropFields("b")),
+        mixedCaseStructLevel1.withColumn("a", Symbol("a").dropFields("b")),
         Row(Row(1, 1)) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1987,7 +1989,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   test("dropFields should drop nested field in struct even if casing is different") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
       checkAnswer(
-        mixedCaseStructLevel2.withColumn("a", 'a.dropFields("A.a")),
+        mixedCaseStructLevel2.withColumn("a", Symbol("a").dropFields("A.a")),
         Row(Row(Row(1), Row(1, 1))) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -2001,7 +2003,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             nullable = false))))
 
       checkAnswer(
-        mixedCaseStructLevel2.withColumn("a", 'a.dropFields("b.a")),
+        mixedCaseStructLevel2.withColumn("a", Symbol("a").dropFields("b.a")),
         Row(Row(Row(1, 1), Row(1))) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -2019,18 +2021,18 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   test("dropFields should throw an exception because casing is different") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
       intercept[AnalysisException] {
-        mixedCaseStructLevel2.withColumn("a", 'a.dropFields("A.a"))
+        mixedCaseStructLevel2.withColumn("a", Symbol("a").dropFields("A.a"))
       }.getMessage should include("No such struct field A in a, B")
 
       intercept[AnalysisException] {
-        mixedCaseStructLevel2.withColumn("a", 'a.dropFields("b.a"))
+        mixedCaseStructLevel2.withColumn("a", Symbol("a").dropFields("b.a"))
       }.getMessage should include("No such struct field b in a, B")
     }
   }
 
   test("dropFields should drop only fields that exist") {
     checkAnswer(
-      structLevel1.withColumn("a", 'a.dropFields("d")),
+      structLevel1.withColumn("a", Symbol("a").dropFields("d")),
       Row(Row(1, null, 3)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -2040,7 +2042,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
           nullable = false))))
 
     checkAnswer(
-      structLevel1.withColumn("a", 'a.dropFields("b", "d")),
+      structLevel1.withColumn("a", Symbol("a").dropFields("b", "d")),
       Row(Row(1, 3)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala
index 076b64cde8c66..376fa2e95a8e2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala
@@ -83,7 +83,7 @@ class DataFrameSessionWindowingSuite extends QueryTest with SharedSparkSession
     // key "b" => (19:39:27 ~ 19:39:37)
 
     checkAnswer(
-      df.groupBy(session_window($"time", "10 seconds"), 'id)
+      df.groupBy(session_window($"time", "10 seconds"), Symbol("id"))
         .agg(count("*").as("counts"), sum("value").as("sum"))
         .orderBy($"session_window.start".asc)
         .selectExpr("CAST(session_window.start AS STRING)", "CAST(session_window.end AS STRING)",
@@ -113,7 +113,7 @@ class DataFrameSessionWindowingSuite extends QueryTest with SharedSparkSession
     // key "b" => (19:39:27 ~ 19:39:37)
 
     checkAnswer(
-      df.groupBy(session_window($"time", "10 seconds"), 'id)
+      df.groupBy(session_window($"time", "10 seconds"), Symbol("id"))
         .agg(count("*").as("counts"), sum_distinct(col("value")).as("sum"))
         .orderBy($"session_window.start".asc)
         .selectExpr("CAST(session_window.start AS STRING)", "CAST(session_window.end AS STRING)",
@@ -142,7 +142,7 @@ class DataFrameSessionWindowingSuite extends QueryTest with SharedSparkSession
     // key "b" => (19:39:27 ~ 19:39:37)
 
     checkAnswer(
-      df.groupBy(session_window($"time", "10 seconds"), 'id)
+      df.groupBy(session_window($"time", "10 seconds"), Symbol("id"))
         .agg(sum_distinct(col("value")).as("sum"), sum_distinct(col("value2")).as("sum2"))
         .orderBy($"session_window.start".asc)
         .selectExpr("CAST(session_window.start AS STRING)", "CAST(session_window.end AS STRING)",
@@ -171,7 +171,7 @@ class DataFrameSessionWindowingSuite extends QueryTest with SharedSparkSession
     // b => (19:39:27 ~ 19:39:37), (19:39:39 ~ 19:39:55)
 
     checkAnswer(
-      df.groupBy(session_window($"time", "10 seconds"), 'id)
+      df.groupBy(session_window($"time", "10 seconds"), Symbol("id"))
         .agg(count("*").as("counts"), sum("value").as("sum"))
         .orderBy($"session_window.start".asc)
         .selectExpr("CAST(session_window.start AS STRING)", "CAST(session_window.end AS STRING)",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index a5403ec548d7e..3659f20fb6ec2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -593,7 +593,7 @@ class ExplainSuiteAE extends ExplainSuiteHelper with EnableAdaptiveExecutionSuit
   }
 
   test("SPARK-35884: Explain should only display one plan before AQE takes effect") {
-    val df = (0 to 10).toDF("id").where('id > 5)
+    val df = (0 to 10).toDF("id").where(Symbol("id") > 5)
     val modes = Seq(SimpleMode, ExtendedMode, CostMode, FormattedMode)
     modes.foreach { mode =>
       checkKeywordsExistsInExplain(df, mode, "AdaptiveSparkPlan")
@@ -608,7 +608,8 @@ class ExplainSuiteAE extends ExplainSuiteHelper with EnableAdaptiveExecutionSuit
 
   test("SPARK-35884: Explain formatted with subquery") {
     withTempView("t1", "t2") {
-      spark.range(100).select('id % 10 as "key", 'id as "value").createOrReplaceTempView("t1")
+      spark.range(100).select(Symbol("id") % 10 as "key", Symbol("id") as "value")
+        .createOrReplaceTempView("t1")
       spark.range(10).createOrReplaceTempView("t2")
       val query =
         """
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 8024f24e2eb13..11886f80f9455 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -967,52 +967,57 @@ class FileBasedDataSourceSuite extends QueryTest
 
           // cases when value == MAX
           var v = Short.MaxValue
-          checkPushedFilters(format, df.where('id > v.toInt), Array(), noScan = true)
-          checkPushedFilters(format, df.where('id >= v.toInt), Array(sources.IsNotNull("id"),
-            sources.EqualTo("id", v)))
-          checkPushedFilters(format, df.where('id === v.toInt), Array(sources.IsNotNull("id"),
-            sources.EqualTo("id", v)))
-          checkPushedFilters(format, df.where('id <=> v.toInt),
+          checkPushedFilters(format, df.where(Symbol("id") > v.toInt), Array(), noScan = true)
+          checkPushedFilters(format, df.where(Symbol("id") >= v.toInt),
+            Array(sources.IsNotNull("id"), sources.EqualTo("id", v)))
+          checkPushedFilters(format, df.where(Symbol("id") === v.toInt),
+            Array(sources.IsNotNull("id"), sources.EqualTo("id", v)))
+          checkPushedFilters(format, df.where(Symbol("id") <=> v.toInt),
             Array(sources.EqualNullSafe("id", v)))
-          checkPushedFilters(format, df.where('id <= v.toInt), Array(sources.IsNotNull("id")))
-          checkPushedFilters(format, df.where('id < v.toInt), Array(sources.IsNotNull("id"),
-            sources.Not(sources.EqualTo("id", v))))
+          checkPushedFilters(format, df.where(Symbol("id") <= v.toInt),
+            Array(sources.IsNotNull("id")))
+          checkPushedFilters(format, df.where(Symbol("id") < v.toInt),
+            Array(sources.IsNotNull("id"), sources.Not(sources.EqualTo("id", v))))
 
           // cases when value > MAX
           var v1: Int = positiveInt
-          checkPushedFilters(format, df.where('id > v1), Array(), noScan = true)
-          checkPushedFilters(format, df.where('id >= v1), Array(), noScan = true)
-          checkPushedFilters(format, df.where('id === v1), Array(), noScan = true)
-          checkPushedFilters(format, df.where('id <=> v1), Array(), noScan = true)
-          checkPushedFilters(format, df.where('id <= v1), Array(sources.IsNotNull("id")))
-          checkPushedFilters(format, df.where('id < v1), Array(sources.IsNotNull("id")))
+          checkPushedFilters(format, df.where(Symbol("id") > v1), Array(), noScan = true)
+          checkPushedFilters(format, df.where(Symbol("id") >= v1), Array(), noScan = true)
+          checkPushedFilters(format, df.where(Symbol("id") === v1), Array(), noScan = true)
+          checkPushedFilters(format, df.where(Symbol("id") <=> v1), Array(), noScan = true)
+          checkPushedFilters(format, df.where(Symbol("id") <= v1), Array(sources.IsNotNull("id")))
+          checkPushedFilters(format, df.where(Symbol("id") < v1), Array(sources.IsNotNull("id")))
 
           // cases when value = MIN
           v = Short.MinValue
-          checkPushedFilters(format, df.where(lit(v.toInt) < 'id), Array(sources.IsNotNull("id"),
-            sources.Not(sources.EqualTo("id", v))))
-          checkPushedFilters(format, df.where(lit(v.toInt) <= 'id), Array(sources.IsNotNull("id")))
-          checkPushedFilters(format, df.where(lit(v.toInt) === 'id), Array(sources.IsNotNull("id"),
+          checkPushedFilters(format, df.where(lit(v.toInt) < Symbol("id")),
+            Array(sources.IsNotNull("id"), sources.Not(sources.EqualTo("id", v))))
+          checkPushedFilters(format, df.where(lit(v.toInt) <= Symbol("id")),
+            Array(sources.IsNotNull("id")))
+          checkPushedFilters(format, df.where(lit(v.toInt) === Symbol("id")),
+            Array(sources.IsNotNull("id"),
             sources.EqualTo("id", v)))
-          checkPushedFilters(format, df.where(lit(v.toInt) <=> 'id),
+          checkPushedFilters(format, df.where(lit(v.toInt) <=> Symbol("id")),
             Array(sources.EqualNullSafe("id", v)))
-          checkPushedFilters(format, df.where(lit(v.toInt) >= 'id), Array(sources.IsNotNull("id"),
-            sources.EqualTo("id", v)))
-          checkPushedFilters(format, df.where(lit(v.toInt) > 'id), Array(), noScan = true)
+          checkPushedFilters(format, df.where(lit(v.toInt) >= Symbol("id")),
+            Array(sources.IsNotNull("id"), sources.EqualTo("id", v)))
+          checkPushedFilters(format, df.where(lit(v.toInt) > Symbol("id")), Array(), noScan = true)
 
           // cases when value < MIN
           v1 = negativeInt
-          checkPushedFilters(format, df.where(lit(v1) < 'id), Array(sources.IsNotNull("id")))
-          checkPushedFilters(format, df.where(lit(v1) <= 'id), Array(sources.IsNotNull("id")))
-          checkPushedFilters(format, df.where(lit(v1) === 'id), Array(), noScan = true)
-          checkPushedFilters(format, df.where(lit(v1) >= 'id), Array(), noScan = true)
-          checkPushedFilters(format, df.where(lit(v1) > 'id), Array(), noScan = true)
+          checkPushedFilters(format, df.where(lit(v1) < Symbol("id")),
+            Array(sources.IsNotNull("id")))
+          checkPushedFilters(format, df.where(lit(v1) <= Symbol("id")),
+            Array(sources.IsNotNull("id")))
+          checkPushedFilters(format, df.where(lit(v1) === Symbol("id")), Array(), noScan = true)
+          checkPushedFilters(format, df.where(lit(v1) >= Symbol("id")), Array(), noScan = true)
+          checkPushedFilters(format, df.where(lit(v1) > Symbol("id")), Array(), noScan = true)
 
           // cases when value is within range (MIN, MAX)
-          checkPushedFilters(format, df.where('id > 30), Array(sources.IsNotNull("id"),
+          checkPushedFilters(format, df.where(Symbol("id") > 30), Array(sources.IsNotNull("id"),
             sources.GreaterThan("id", 30)))
-          checkPushedFilters(format, df.where(lit(100) >= 'id), Array(sources.IsNotNull("id"),
-            sources.LessThanOrEqual("id", 100)))
+          checkPushedFilters(format, df.where(lit(100) >= Symbol("id")),
+            Array(sources.IsNotNull("id"), sources.LessThanOrEqual("id", 100)))
         }
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileScanSuite.scala
index 14b59ba23d09f..ce98fd27350a8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileScanSuite.scala
@@ -85,10 +85,11 @@ trait FileScanSuiteBase extends SharedSparkSession {
     val options = new CaseInsensitiveStringMap(ImmutableMap.copyOf(optionsMap))
     val optionsNotEqual =
       new CaseInsensitiveStringMap(ImmutableMap.copyOf(ImmutableMap.of("key2", "value2")))
-    val partitionFilters = Seq(And(IsNull('data.int), LessThan('data.int, 0)))
-    val partitionFiltersNotEqual = Seq(And(IsNull('data.int), LessThan('data.int, 1)))
-    val dataFilters = Seq(And(IsNull('data.int), LessThan('data.int, 0)))
-    val dataFiltersNotEqual = Seq(And(IsNull('data.int), LessThan('data.int, 1)))
+    val partitionFilters = Seq(And(IsNull(Symbol("data").int), LessThan(Symbol("data").int, 0)))
+    val partitionFiltersNotEqual = Seq(And(IsNull(Symbol("data").int),
+      LessThan(Symbol("data").int, 1)))
+    val dataFilters = Seq(And(IsNull(Symbol("data").int), LessThan(Symbol("data").int, 0)))
+    val dataFiltersNotEqual = Seq(And(IsNull(Symbol("data").int), LessThan(Symbol("data").int, 1)))
 
     scanBuilders.foreach { case (name, scanBuilder, exclusions) =>
       test(s"SPARK-33482: Test $name equals") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 77493afe43145..ec6c863b8183f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -183,7 +183,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
   test("inner join where, one match per row") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
       checkAnswer(
-        upperCaseData.join(lowerCaseData).where('n === 'N),
+        upperCaseData.join(lowerCaseData).where(Symbol("n") === 'N),
         Seq(
           Row(1, "A", 1, "a"),
           Row(2, "B", 2, "b"),
@@ -404,8 +404,8 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
 
   test("full outer join") {
     withTempView("`left`", "`right`") {
-      upperCaseData.where('N <= 4).createOrReplaceTempView("`left`")
-      upperCaseData.where('N >= 3).createOrReplaceTempView("`right`")
+      upperCaseData.where(Symbol("N") <= 4).createOrReplaceTempView("`left`")
+      upperCaseData.where(Symbol("N") >= 3).createOrReplaceTempView("`right`")
 
       val left = UnresolvedRelation(TableIdentifier("left"))
       val right = UnresolvedRelation(TableIdentifier("right"))
@@ -623,7 +623,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
       testData.createOrReplaceTempView("B")
       testData2.createOrReplaceTempView("C")
       testData3.createOrReplaceTempView("D")
-      upperCaseData.where('N >= 3).createOrReplaceTempView("`right`")
+      upperCaseData.where(Symbol("N") >= 3).createOrReplaceTempView("`right`")
       val cartesianQueries = Seq(
         /** The following should error out since there is no explicit cross join */
         "SELECT * FROM testData inner join testData2",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 6661b58b8f522..e18c087a26279 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -403,7 +403,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
 
   test("SPARK-24709: infers schemas of json strings and pass them to from_json") {
     val in = Seq("""{"a": [1, 2, 3]}""").toDS()
-    val out = in.select(from_json('value, schema_of_json("""{"a": [1]}""")) as "parsed")
+    val out = in.select(from_json(Symbol("value"), schema_of_json("""{"a": [1]}""")) as "parsed")
     val expected = StructType(StructField(
       "parsed",
       StructType(StructField(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
index f3bff7389ee74..ab52cb98208f2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
@@ -47,12 +47,12 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
       c: Column => Column,
       f: T => U): Unit = {
     checkAnswer(
-      doubleData.select(c('a)),
+      doubleData.select(c(Symbol("a"))),
       (1 to 10).map(n => Row(f((n * 0.2 - 1).asInstanceOf[T])))
     )
 
     checkAnswer(
-      doubleData.select(c('b)),
+      doubleData.select(c(Symbol("b"))),
       (1 to 10).map(n => Row(f((-n * 0.2 + 1).asInstanceOf[T])))
     )
 
@@ -65,13 +65,13 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
   private def testOneToOneNonNegativeMathFunction(c: Column => Column, f: Double => Double): Unit =
   {
     checkAnswer(
-      nnDoubleData.select(c('a)),
+      nnDoubleData.select(c(Symbol("a"))),
       (1 to 10).map(n => Row(f(n * 0.1)))
     )
 
     if (f(-1) === StrictMath.log1p(-1)) {
       checkAnswer(
-        nnDoubleData.select(c('b)),
+        nnDoubleData.select(c(Symbol("b"))),
         (1 to 9).map(n => Row(f(n * -0.1))) :+ Row(null)
       )
     }
@@ -87,12 +87,12 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
       d: (Column, Double) => Column,
       f: (Double, Double) => Double): Unit = {
     checkAnswer(
-      nnDoubleData.select(c('a, 'a)),
+      nnDoubleData.select(c('a, Symbol("a"))),
       nnDoubleData.collect().toSeq.map(r => Row(f(r.getDouble(0), r.getDouble(0))))
     )
 
     checkAnswer(
-      nnDoubleData.select(c('a, 'b)),
+      nnDoubleData.select(c('a, Symbol("b"))),
       nnDoubleData.collect().toSeq.map(r => Row(f(r.getDouble(0), r.getDouble(1))))
     )
 
@@ -109,7 +109,7 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
     val nonNull = nullDoubles.collect().toSeq.filter(r => r.get(0) != null)
 
     checkAnswer(
-      nullDoubles.select(c('a, 'a)).orderBy('a.asc),
+      nullDoubles.select(c('a, Symbol("a"))).orderBy(Symbol("a").asc),
       Row(null) +: nonNull.map(r => Row(f(r.getDouble(0), r.getDouble(0))))
     )
   }
@@ -255,7 +255,7 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
   test("factorial") {
     val df = (0 to 5).map(i => (i, i)).toDF("a", "b")
     checkAnswer(
-      df.select(factorial('a)),
+      df.select(factorial(Symbol("a"))),
       Seq(Row(1), Row(1), Row(2), Row(6), Row(24), Row(120))
     )
     checkAnswer(
@@ -271,11 +271,11 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
   test("round/bround/ceil/floor") {
     val df = Seq(5, 55, 555).map(Tuple1(_)).toDF("a")
     checkAnswer(
-      df.select(round('a), round('a, -1), round('a, -2)),
+      df.select(round(Symbol("a")), round('a, -1), round('a, -2)),
       Seq(Row(5, 10, 0), Row(55, 60, 100), Row(555, 560, 600))
     )
     checkAnswer(
-      df.select(bround('a), bround('a, -1), bround('a, -2)),
+      df.select(bround(Symbol("a")), bround('a, -1), bround('a, -2)),
       Seq(Row(5, 0, 0), Row(55, 60, 100), Row(555, 560, 600))
     )
     checkAnswer(
@@ -343,11 +343,11 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
   test("round/bround/ceil/floor with data frame from a local Seq of Product") {
     val df = spark.createDataFrame(Seq(Tuple1(BigDecimal("5.9")))).toDF("value")
     checkAnswer(
-      df.withColumn("value_rounded", round('value)),
+      df.withColumn("value_rounded", round(Symbol("value"))),
       Seq(Row(BigDecimal("5.9"), BigDecimal("6")))
     )
     checkAnswer(
-      df.withColumn("value_brounded", bround('value)),
+      df.withColumn("value_brounded", bround(Symbol("value"))),
       Seq(Row(BigDecimal("5.9"), BigDecimal("6")))
     )
     checkAnswer(
@@ -423,10 +423,10 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
 
   test("hex") {
     val data = Seq((28, -28, 100800200404L, "hello")).toDF("a", "b", "c", "d")
-    checkAnswer(data.select(hex('a)), Seq(Row("1C")))
-    checkAnswer(data.select(hex('b)), Seq(Row("FFFFFFFFFFFFFFE4")))
-    checkAnswer(data.select(hex('c)), Seq(Row("177828FED4")))
-    checkAnswer(data.select(hex('d)), Seq(Row("68656C6C6F")))
+    checkAnswer(data.select(hex(Symbol("a"))), Seq(Row("1C")))
+    checkAnswer(data.select(hex(Symbol("b"))), Seq(Row("FFFFFFFFFFFFFFE4")))
+    checkAnswer(data.select(hex(Symbol("c"))), Seq(Row("177828FED4")))
+    checkAnswer(data.select(hex(Symbol("d"))), Seq(Row("68656C6C6F")))
     checkAnswer(data.selectExpr("hex(a)"), Seq(Row("1C")))
     checkAnswer(data.selectExpr("hex(b)"), Seq(Row("FFFFFFFFFFFFFFE4")))
     checkAnswer(data.selectExpr("hex(c)"), Seq(Row("177828FED4")))
@@ -436,8 +436,8 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
 
   test("unhex") {
     val data = Seq(("1C", "737472696E67")).toDF("a", "b")
-    checkAnswer(data.select(unhex('a)), Row(Array[Byte](28.toByte)))
-    checkAnswer(data.select(unhex('b)), Row("string".getBytes(StandardCharsets.UTF_8)))
+    checkAnswer(data.select(unhex(Symbol("a"))), Row(Array[Byte](28.toByte)))
+    checkAnswer(data.select(unhex(Symbol("b"))), Row("string".getBytes(StandardCharsets.UTF_8)))
     checkAnswer(data.selectExpr("unhex(a)"), Row(Array[Byte](28.toByte)))
     checkAnswer(data.selectExpr("unhex(b)"), Row("string".getBytes(StandardCharsets.UTF_8)))
     checkAnswer(data.selectExpr("""unhex("##")"""), Row(null))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 326ea314ec68e..c28dde9cea09a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3066,15 +3066,17 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
           val df = spark.read.format(format).load(dir.getCanonicalPath)
           checkPushedFilters(
             format,
-            df.where(('id < 2 and 's.contains("foo")) or ('id > 10 and 's.contains("bar"))),
+            df.where((Symbol("id") < 2 and Symbol("s").contains("foo")) or
+              (Symbol("id") > 10 and Symbol("s").contains("bar"))),
             Array(sources.Or(sources.LessThan("id", 2), sources.GreaterThan("id", 10))))
           checkPushedFilters(
             format,
-            df.where('s.contains("foo") or ('id > 10 and 's.contains("bar"))),
+            df.where(Symbol("s").contains("foo") or
+              (Symbol("id") > 10 and Symbol("s").contains("bar"))),
             Array.empty)
           checkPushedFilters(
             format,
-            df.where('id < 2 and not('id > 10 and 's.contains("bar"))),
+            df.where(Symbol("id") < 2 and not(Symbol("id") > 10 and Symbol("s").contains("bar"))),
             Array(sources.IsNotNull("id"), sources.LessThan("id", 2)))
         }
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index 57fc49ddc8131..c37309d97acae 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -407,9 +407,9 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
       withTable("TBL1", "TBL") {
         import org.apache.spark.sql.functions._
         val df = spark.range(1000L).select('id,
-          'id * 2 as "FLD1",
-          'id * 12 as "FLD2",
-          lit(null).cast(DoubleType) + 'id as "fld3")
+          Symbol("id") * 2 as "FLD1",
+          Symbol("id") * 12 as "FLD2",
+          lit(null).cast(DoubleType) + Symbol("id") as "fld3")
         df.write
           .mode(SaveMode.Overwrite)
           .bucketBy(10, "id", "FLD1", "FLD2")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index d100cad89fcc1..e651459394fd9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -424,7 +424,7 @@ class UDFSuite extends QueryTest with SharedSparkSession {
       ("N", Integer.valueOf(3), null)).toDF("a", "b", "c")
 
     val udf1 = udf((a: String, b: Int, c: Any) => a + b + c)
-    val df = input.select(udf1('a, 'b, 'c))
+    val df = input.select(udf1(Symbol("a"), 'b, 'c))
     checkAnswer(df, Seq(Row("null1x"), Row(null), Row("N3null")))
 
     // test Java UDF. Java UDF can't have primitive inputs, as it's generic typed.
@@ -554,7 +554,7 @@ class UDFSuite extends QueryTest with SharedSparkSession {
     spark.udf.register("buildLocalDateInstantType",
       udf((d: LocalDate, i: Instant) => LocalDateInstantType(d, i)))
     checkAnswer(df.selectExpr(s"buildLocalDateInstantType(d, i) as di")
-      .select('di.cast(StringType)),
+      .select(Symbol("di").cast(StringType)),
       Row(s"{$expectedDate, $expectedInstant}") :: Nil)
 
     // test null cases
@@ -584,7 +584,7 @@ class UDFSuite extends QueryTest with SharedSparkSession {
     spark.udf.register("buildTimestampInstantType",
       udf((t: Timestamp, i: Instant) => TimestampInstantType(t, i)))
     checkAnswer(df.selectExpr("buildTimestampInstantType(t, i) as ti")
-      .select('ti.cast(StringType)),
+      .select(Symbol("ti").cast(StringType)),
       Row(s"{$expectedTimestamp, $expectedInstant}"))
 
     // test null cases
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
index cc52b6d8a14a7..729312c3e5912 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
@@ -82,14 +82,14 @@ class UserDefinedTypeSuite extends QueryTest with SharedSparkSession with Parque
   }
 
   test("register user type: MyDenseVector for MyLabeledPoint") {
-    val labels: RDD[Double] = pointsRDD.select('label).rdd.map { case Row(v: Double) => v }
+    val labels: RDD[Double] = pointsRDD.select(Symbol("label")).rdd.map { case Row(v: Double) => v }
     val labelsArrays: Array[Double] = labels.collect()
     assert(labelsArrays.size === 2)
     assert(labelsArrays.contains(1.0))
     assert(labelsArrays.contains(0.0))
 
     val features: RDD[TestUDT.MyDenseVector] =
-      pointsRDD.select('features).rdd.map { case Row(v: TestUDT.MyDenseVector) => v }
+      pointsRDD.select(Symbol("features")).rdd.map { case Row(v: TestUDT.MyDenseVector) => v }
     val featuresArrays: Array[TestUDT.MyDenseVector] = features.collect()
     assert(featuresArrays.size === 2)
     assert(featuresArrays.contains(new TestUDT.MyDenseVector(Array(0.1, 1.0))))
@@ -137,8 +137,9 @@ class UserDefinedTypeSuite extends QueryTest with SharedSparkSession with Parque
     val df = Seq((1, vec)).toDF("int", "vec")
     assert(vec === df.collect()(0).getAs[TestUDT.MyDenseVector](1))
     assert(vec === df.take(1)(0).getAs[TestUDT.MyDenseVector](1))
-    checkAnswer(df.limit(1).groupBy('int).agg(first('vec)), Row(1, vec))
-    checkAnswer(df.orderBy('int).limit(1).groupBy('int).agg(first('vec)), Row(1, vec))
+    checkAnswer(df.limit(1).groupBy(Symbol("int")).agg(first(Symbol("vec"))), Row(1, vec))
+    checkAnswer(df.orderBy(Symbol("int")).limit(1).groupBy(Symbol("int"))
+      .agg(first(Symbol("vec"))), Row(1, vec))
   }
 
   test("UDTs with JSON") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
index 3edc4b9502064..05aafceb36ec7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
@@ -210,7 +210,7 @@ private [connector] trait SessionCatalogTest[T <: Table, Catalog <: TestV2Sessio
     verifyTable(t1, df)
 
     // Check that appends are by name
-    df.select('data, 'id).write.format(v2Format).mode("append").saveAsTable(t1)
+    df.select(Symbol("data"), Symbol("id")).write.format(v2Format).mode("append").saveAsTable(t1)
     verifyTable(t1, df.union(df))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala
index dd810a70d1585..03dcfcf7ddc7d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala
@@ -93,7 +93,7 @@ class DataSourceV2DataFrameSuite
       assert(spark.table(t1).count() === 0)
 
       // appends are by name not by position
-      df.select('data, 'id).write.mode("append").saveAsTable(t1)
+      df.select(Symbol("data"), Symbol("id")).write.mode("append").saveAsTable(t1)
       checkAnswer(spark.table(t1), df)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
index fd3c69eff5652..23164edddaeed 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
@@ -80,8 +80,8 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
       withClue(cls.getName) {
         val df = spark.read.format(cls.getName).load()
         checkAnswer(df, (0 until 10).map(i => Row(i, -i)))
-        checkAnswer(df.select('j), (0 until 10).map(i => Row(-i)))
-        checkAnswer(df.filter('i > 5), (6 until 10).map(i => Row(i, -i)))
+        checkAnswer(df.select(Symbol("j")), (0 until 10).map(i => Row(-i)))
+        checkAnswer(df.filter(Symbol("i") > 5), (6 until 10).map(i => Row(i, -i)))
       }
     }
   }
@@ -92,7 +92,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
         val df = spark.read.format(cls.getName).load()
         checkAnswer(df, (0 until 10).map(i => Row(i, -i)))
 
-        val q1 = df.select('j)
+        val q1 = df.select(Symbol("j"))
         checkAnswer(q1, (0 until 10).map(i => Row(-i)))
         if (cls == classOf[AdvancedDataSourceV2]) {
           val batch = getBatch(q1)
@@ -104,7 +104,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
           assert(batch.requiredSchema.fieldNames === Seq("j"))
         }
 
-        val q2 = df.filter('i > 3)
+        val q2 = df.filter(Symbol("i") > 3)
         checkAnswer(q2, (4 until 10).map(i => Row(i, -i)))
         if (cls == classOf[AdvancedDataSourceV2]) {
           val batch = getBatch(q2)
@@ -116,7 +116,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
           assert(batch.requiredSchema.fieldNames === Seq("i", "j"))
         }
 
-        val q3 = df.select('i).filter('i > 6)
+        val q3 = df.select(Symbol("i")).filter(Symbol("i") > 6)
         checkAnswer(q3, (7 until 10).map(i => Row(i)))
         if (cls == classOf[AdvancedDataSourceV2]) {
           val batch = getBatch(q3)
@@ -128,16 +128,16 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
           assert(batch.requiredSchema.fieldNames === Seq("i"))
         }
 
-        val q4 = df.select('j).filter('j < -10)
+        val q4 = df.select(Symbol("j")).filter(Symbol("j") < -10)
         checkAnswer(q4, Nil)
         if (cls == classOf[AdvancedDataSourceV2]) {
           val batch = getBatch(q4)
-          // 'j < 10 is not supported by the testing data source.
+          // Symbol("j") < 10 is not supported by the testing data source.
           assert(batch.filters.isEmpty)
           assert(batch.requiredSchema.fieldNames === Seq("j"))
         } else {
           val batch = getJavaBatch(q4)
-          // 'j < 10 is not supported by the testing data source.
+          // Symbol("j") < 10 is not supported by the testing data source.
           assert(batch.filters.isEmpty)
           assert(batch.requiredSchema.fieldNames === Seq("j"))
         }
@@ -152,7 +152,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
         val df = spark.read.format(cls.getName).load()
         checkAnswer(df, (0 until 10).map(i => Row(i, -i)))
 
-        val q1 = df.select('j)
+        val q1 = df.select(Symbol("j"))
         checkAnswer(q1, (0 until 10).map(i => Row(-i)))
         if (cls == classOf[AdvancedDataSourceV2WithV2Filter]) {
           val batch = getBatchWithV2Filter(q1)
@@ -164,7 +164,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
           assert(batch.requiredSchema.fieldNames === Seq("j"))
         }
 
-        val q2 = df.filter('i > 3)
+        val q2 = df.filter(Symbol("i") > 3)
         checkAnswer(q2, (4 until 10).map(i => Row(i, -i)))
         if (cls == classOf[AdvancedDataSourceV2WithV2Filter]) {
           val batch = getBatchWithV2Filter(q2)
@@ -176,7 +176,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
           assert(batch.requiredSchema.fieldNames === Seq("i", "j"))
         }
 
-        val q3 = df.select('i).filter('i > 6)
+        val q3 = df.select(Symbol("i")).filter(Symbol("i") > 6)
         checkAnswer(q3, (7 until 10).map(i => Row(i)))
         if (cls == classOf[AdvancedDataSourceV2WithV2Filter]) {
           val batch = getBatchWithV2Filter(q3)
@@ -188,16 +188,16 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
           assert(batch.requiredSchema.fieldNames === Seq("i"))
         }
 
-        val q4 = df.select('j).filter('j < -10)
+        val q4 = df.select(Symbol("j")).filter(Symbol("j") < -10)
         checkAnswer(q4, Nil)
         if (cls == classOf[AdvancedDataSourceV2WithV2Filter]) {
           val batch = getBatchWithV2Filter(q4)
-          // 'j < 10 is not supported by the testing data source.
+          // Symbol("j") < 10 is not supported by the testing data source.
           assert(batch.filters.isEmpty)
           assert(batch.requiredSchema.fieldNames === Seq("j"))
         } else {
           val batch = getJavaBatchWithV2Filter(q4)
-          // 'j < 10 is not supported by the testing data source.
+          // Symbol("j") < 10 is not supported by the testing data source.
           assert(batch.filters.isEmpty)
           assert(batch.requiredSchema.fieldNames === Seq("j"))
         }
@@ -210,8 +210,8 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
       withClue(cls.getName) {
         val df = spark.read.format(cls.getName).load()
         checkAnswer(df, (0 until 90).map(i => Row(i, -i)))
-        checkAnswer(df.select('j), (0 until 90).map(i => Row(-i)))
-        checkAnswer(df.filter('i > 50), (51 until 90).map(i => Row(i, -i)))
+        checkAnswer(df.select(Symbol("j")), (0 until 90).map(i => Row(-i)))
+        checkAnswer(df.filter(Symbol("i") > 50), (51 until 90).map(i => Row(i, -i)))
       }
     }
   }
@@ -235,12 +235,12 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
     "supports external metadata") {
     withTempDir { dir =>
       val cls = classOf[SupportsExternalMetadataWritableDataSource].getName
-      spark.range(10).select('id as 'i, -'id as 'j).write.format(cls)
-          .option("path", dir.getCanonicalPath).mode("append").save()
+      spark.range(10).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j"))
+        .write.format(cls).option("path", dir.getCanonicalPath).mode("append").save()
       val schema = new StructType().add("i", "long").add("j", "long")
         checkAnswer(
           spark.read.format(cls).option("path", dir.getCanonicalPath).schema(schema).load(),
-          spark.range(10).select('id, -'id))
+          spark.range(10).select(Symbol("id"), -Symbol("id")))
     }
   }
 
@@ -251,25 +251,25 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
         val df = spark.read.format(cls.getName).load()
         checkAnswer(df, Seq(Row(1, 4), Row(1, 4), Row(3, 6), Row(2, 6), Row(4, 2), Row(4, 2)))
 
-        val groupByColA = df.groupBy('i).agg(sum('j))
+        val groupByColA = df.groupBy(Symbol("i")).agg(sum(Symbol("j")))
         checkAnswer(groupByColA, Seq(Row(1, 8), Row(2, 6), Row(3, 6), Row(4, 4)))
         assert(collectFirst(groupByColA.queryExecution.executedPlan) {
           case e: ShuffleExchangeExec => e
         }.isEmpty)
 
-        val groupByColAB = df.groupBy('i, 'j).agg(count("*"))
+        val groupByColAB = df.groupBy(Symbol("i"), Symbol("j")).agg(count("*"))
         checkAnswer(groupByColAB, Seq(Row(1, 4, 2), Row(2, 6, 1), Row(3, 6, 1), Row(4, 2, 2)))
         assert(collectFirst(groupByColAB.queryExecution.executedPlan) {
           case e: ShuffleExchangeExec => e
         }.isEmpty)
 
-        val groupByColB = df.groupBy('j).agg(sum('i))
+        val groupByColB = df.groupBy(Symbol("j")).agg(sum(Symbol("i")))
         checkAnswer(groupByColB, Seq(Row(2, 8), Row(4, 2), Row(6, 5)))
         assert(collectFirst(groupByColB.queryExecution.executedPlan) {
           case e: ShuffleExchangeExec => e
         }.isDefined)
 
-        val groupByAPlusB = df.groupBy('i + 'j).agg(count("*"))
+        val groupByAPlusB = df.groupBy(Symbol("i") + Symbol("j")).agg(count("*"))
         checkAnswer(groupByAPlusB, Seq(Row(5, 2), Row(6, 2), Row(8, 1), Row(9, 1)))
         assert(collectFirst(groupByAPlusB.queryExecution.executedPlan) {
           case e: ShuffleExchangeExec => e
@@ -307,37 +307,43 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
         val path = file.getCanonicalPath
         assert(spark.read.format(cls.getName).option("path", path).load().collect().isEmpty)
 
-        spark.range(10).select('id as 'i, -'id as 'j).write.format(cls.getName)
+        spark.range(10).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j"))
+          .write.format(cls.getName)
           .option("path", path).mode("append").save()
         checkAnswer(
           spark.read.format(cls.getName).option("path", path).load(),
-          spark.range(10).select('id, -'id))
+          spark.range(10).select(Symbol("id"), -Symbol("id")))
 
         // default save mode is ErrorIfExists
         intercept[AnalysisException] {
-          spark.range(10).select('id as 'i, -'id as 'j).write.format(cls.getName)
+          spark.range(10).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j"))
+            .write.format(cls.getName)
             .option("path", path).save()
         }
-        spark.range(10).select('id as 'i, -'id as 'j).write.mode("append").format(cls.getName)
+        spark.range(10).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j"))
+          .write.mode("append").format(cls.getName)
           .option("path", path).save()
         checkAnswer(
           spark.read.format(cls.getName).option("path", path).load(),
-          spark.range(10).union(spark.range(10)).select('id, -'id))
+          spark.range(10).union(spark.range(10)).select(Symbol("id"), -Symbol("id")))
 
-        spark.range(5).select('id as 'i, -'id as 'j).write.format(cls.getName)
+        spark.range(5).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j"))
+          .write.format(cls.getName)
           .option("path", path).mode("overwrite").save()
         checkAnswer(
           spark.read.format(cls.getName).option("path", path).load(),
-          spark.range(5).select('id, -'id))
+          spark.range(5).select(Symbol("id"), -Symbol("id")))
 
         val e = intercept[AnalysisException] {
-          spark.range(5).select('id as 'i, -'id as 'j).write.format(cls.getName)
+          spark.range(5).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j"))
+            .write.format(cls.getName)
             .option("path", path).mode("ignore").save()
         }
         assert(e.message.contains("please use Append or Overwrite modes instead"))
 
         val e2 = intercept[AnalysisException] {
-          spark.range(5).select('id as 'i, -'id as 'j).write.format(cls.getName)
+          spark.range(5).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j"))
+            .write.format(cls.getName)
             .option("path", path).mode("error").save()
         }
         assert(e2.getMessage.contains("please use Append or Overwrite modes instead"))
@@ -354,7 +360,8 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
           }
         }
         // this input data will fail to read middle way.
-        val input = spark.range(15).select(failingUdf('id).as('i)).select('i, -'i as 'j)
+        val input = spark.range(15).select(failingUdf(Symbol("id")).as(Symbol("i")))
+          .select(Symbol("i"), -Symbol("i") as Symbol("j"))
         val e3 = intercept[SparkException] {
           input.write.format(cls.getName).option("path", path).mode("overwrite").save()
         }
@@ -374,11 +381,13 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
         assert(spark.read.format(cls.getName).option("path", path).load().collect().isEmpty)
 
         val numPartition = 6
-        spark.range(0, 10, 1, numPartition).select('id as 'i, -'id as 'j).write.format(cls.getName)
+        spark.range(0, 10, 1, numPartition)
+          .select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j"))
+          .write.format(cls.getName)
           .mode("append").option("path", path).save()
         checkAnswer(
           spark.read.format(cls.getName).option("path", path).load(),
-          spark.range(10).select('id, -'id))
+          spark.range(10).select(Symbol("id"), -Symbol("id")))
 
         assert(SimpleCounter.getCounter == numPartition,
           "method onDataWriterCommit should be called as many as the number of partitions")
@@ -395,7 +404,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
   test("SPARK-23301: column pruning with arbitrary expressions") {
     val df = spark.read.format(classOf[AdvancedDataSourceV2].getName).load()
 
-    val q1 = df.select('i + 1)
+    val q1 = df.select(Symbol("i") + 1)
     checkAnswer(q1, (1 until 11).map(i => Row(i)))
     val batch1 = getBatch(q1)
     assert(batch1.requiredSchema.fieldNames === Seq("i"))
@@ -406,14 +415,14 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
     assert(batch2.requiredSchema.isEmpty)
 
     // 'j === 1 can't be pushed down, but we should still be able do column pruning
-    val q3 = df.filter('j === -1).select('j * 2)
+    val q3 = df.filter(Symbol("j") === -1).select(Symbol("j") * 2)
     checkAnswer(q3, Row(-2))
     val batch3 = getBatch(q3)
     assert(batch3.filters.isEmpty)
     assert(batch3.requiredSchema.fieldNames === Seq("j"))
 
     // column pruning should work with other operators.
-    val q4 = df.sort('i).limit(1).select('i + 1)
+    val q4 = df.sort(Symbol("i")).limit(1).select(Symbol("i") + 1)
     checkAnswer(q4, Row(1))
     val batch4 = getBatch(q4)
     assert(batch4.requiredSchema.fieldNames === Seq("i"))
@@ -435,7 +444,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
 
     val df = spark.read.format(classOf[AdvancedDataSourceV2].getName).load()
     checkCanonicalizedOutput(df, 2, 2)
-    checkCanonicalizedOutput(df.select('i), 2, 1)
+    checkCanonicalizedOutput(df.select(Symbol("i")), 2, 1)
   }
 
   test("SPARK-25425: extra options should override sessions options during reading") {
@@ -474,7 +483,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
     withTempView("t1") {
       val t2 = spark.read.format(classOf[SimpleDataSourceV2].getName).load()
       Seq(2, 3).toDF("a").createTempView("t1")
-      val df = t2.where("i < (select max(a) from t1)").select('i)
+      val df = t2.where("i < (select max(a) from t1)").select(Symbol("i"))
       val subqueries = stripAQEPlan(df.queryExecution.executedPlan).collect {
         case p => p.subqueries
       }.flatten
@@ -493,8 +502,8 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
     Seq(classOf[AdvancedDataSourceV2], classOf[JavaAdvancedDataSourceV2]).foreach { cls =>
       withClue(cls.getName) {
         val df = spark.read.format(cls.getName).load()
-        val q1 = df.select('i).filter('i > 6)
-        val q2 = df.select('i).filter('i > 5)
+        val q1 = df.select(Symbol("i")).filter(Symbol("i") > 6)
+        val q2 = df.select(Symbol("i")).filter(Symbol("i") > 5)
         val scan1 = getScanExec(q1)
         val scan2 = getScanExec(q2)
         assert(!scan1.equals(scan2))
@@ -507,7 +516,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
       withClue(cls.getName) {
         val df = spark.read.format(cls.getName).load()
         // before SPARK-33267 below query just threw NPE
-        df.select('i).where("i in (1, null)").collect()
+        df.select(Symbol("i")).where("i in (1, null)").collect()
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala
index 9cb524c2c3822..473f679b4b99d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala
@@ -75,7 +75,7 @@ class SupportsCatalogOptionsSuite extends QueryTest with SharedSparkSession with
       saveMode: SaveMode,
       withCatalogOption: Option[String],
       partitionBy: Seq[String]): Unit = {
-    val df = spark.range(10).withColumn("part", 'id % 5)
+    val df = spark.range(10).withColumn("part", Symbol("id") % 5)
     val dfw = df.write.format(format).mode(saveMode).option("name", "t1")
     withCatalogOption.foreach(cName => dfw.option("catalog", cName))
     dfw.partitionBy(partitionBy: _*).save()
@@ -140,7 +140,7 @@ class SupportsCatalogOptionsSuite extends QueryTest with SharedSparkSession with
 
   test("Ignore mode if table exists - session catalog") {
     sql(s"create table t1 (id bigint) using $format")
-    val df = spark.range(10).withColumn("part", 'id % 5)
+    val df = spark.range(10).withColumn("part", Symbol("id") % 5)
     val dfw = df.write.format(format).mode(SaveMode.Ignore).option("name", "t1")
     dfw.save()
 
@@ -152,7 +152,7 @@ class SupportsCatalogOptionsSuite extends QueryTest with SharedSparkSession with
 
   test("Ignore mode if table exists - testcat catalog") {
     sql(s"create table $catalogName.t1 (id bigint) using $format")
-    val df = spark.range(10).withColumn("part", 'id % 5)
+    val df = spark.range(10).withColumn("part", Symbol("id") % 5)
     val dfw = df.write.format(format).mode(SaveMode.Ignore).option("name", "t1")
     dfw.option("catalog", catalogName).save()
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala
index a33b9fad7ff4f..06fc2022c01ad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala
@@ -35,9 +35,9 @@ class AggregatingAccumulatorSuite
   extends SparkFunSuite
   with SharedSparkSession
   with ExpressionEvalHelper {
-  private val a = 'a.long
-  private val b = 'b.string
-  private val c = 'c.double
+  private val a = Symbol("a").long
+  private val b = Symbol("b").string
+  private val c = Symbol("c").double
   private val inputAttributes = Seq(a, b, c)
   private def str(s: String): UTF8String = UTF8String.fromString(s)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
index f774c4504bb43..09a880a706b0f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
@@ -133,8 +133,8 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
         """.stripMargin)
 
       checkAnswer(query, identity, df.select(
-        'a.cast("string"),
-        'b.cast("string"),
+        Symbol("a").cast("string"),
+        Symbol("b").cast("string"),
         'c.cast("string"),
         'd.cast("string"),
         'e.cast("string")).collect())
@@ -164,7 +164,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
           'b.cast("string").as("value")).collect())
 
       checkAnswer(
-        df.select('a, 'b),
+        df.select(Symbol("a"), Symbol("b")),
         (child: SparkPlan) => createScriptTransformationExec(
           script = "cat",
           output = Seq(
@@ -178,7 +178,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
           'b.cast("string").as("value")).collect())
 
       checkAnswer(
-        df.select('a),
+        df.select(Symbol("a")),
         (child: SparkPlan) => createScriptTransformationExec(
           script = "cat",
           output = Seq(
@@ -242,7 +242,8 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
             child = child,
             ioschema = serde
           ),
-          df.select('a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j).collect())
+          df.select(Symbol("a"), Symbol("b"), Symbol("c"), Symbol("d"), Symbol("e"),
+            Symbol("f"), Symbol("g"), Symbol("h"), Symbol("i"), Symbol("j")).collect())
       }
     }
   }
@@ -282,7 +283,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
           child = child,
           ioschema = defaultIOSchema
         ),
-        df.select('a, 'b, 'c, 'd, 'e).collect())
+        df.select(Symbol("a"), Symbol("b"), Symbol("c"), Symbol("d"), Symbol("e")).collect())
     }
   }
 
@@ -304,7 +305,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
               |USING 'cat' AS (a timestamp, b date)
               |FROM v
             """.stripMargin)
-          checkAnswer(query, identity, df.select('a, 'b).collect())
+          checkAnswer(query, identity, df.select(Symbol("a"), Symbol("b")).collect())
         }
       }
     }
@@ -379,7 +380,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
     ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18)
 
     checkAnswer(
-      df.select('a, 'b),
+      df.select(Symbol("a"), Symbol("b")),
       (child: SparkPlan) => createScriptTransformationExec(
         script = "cat",
         output = Seq(
@@ -452,10 +453,10 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
         (Array(6, 7, 8), Array(Array(6, 7), Array(8)),
           Map("c" -> 3), Map("d" -> Array("e", "f")))
       ).toDF("a", "b", "c", "d")
-        .select('a, 'b, 'c, 'd,
-          struct('a, 'b).as("e"),
-          struct('a, 'd).as("f"),
-          struct(struct('a, 'b), struct('a, 'd)).as("g")
+        .select(Symbol("a"), Symbol("b"), Symbol("c"), Symbol("d"),
+          struct(Symbol("a"), Symbol("b")).as("e"),
+          struct(Symbol("a"), Symbol("d")).as("f"),
+          struct(struct(Symbol("a"), Symbol("b")), struct(Symbol("a"), Symbol("d"))).as("g")
         )
 
       checkAnswer(
@@ -483,7 +484,8 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
           child = child,
           ioschema = defaultIOSchema
         ),
-        df.select('a, 'b, 'c, 'd, 'e, 'f, 'g).collect())
+        df.select(Symbol("a"), Symbol("b"), Symbol("c"), Symbol("d"), Symbol("e"),
+          Symbol("f"), Symbol("g")).collect())
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoGroupedIteratorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoGroupedIteratorSuite.scala
index 4ff96e6574cac..e4f17eb60108d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoGroupedIteratorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoGroupedIteratorSuite.scala
@@ -26,9 +26,11 @@ class CoGroupedIteratorSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("basic") {
     val leftInput = Seq(create_row(1, "a"), create_row(1, "b"), create_row(2, "c")).iterator
     val rightInput = Seq(create_row(1, 2L), create_row(2, 3L), create_row(3, 4L)).iterator
-    val leftGrouped = GroupedIterator(leftInput, Seq('i.int.at(0)), Seq('i.int, 's.string))
-    val rightGrouped = GroupedIterator(rightInput, Seq('i.int.at(0)), Seq('i.int, 'l.long))
-    val cogrouped = new CoGroupedIterator(leftGrouped, rightGrouped, Seq('i.int))
+    val leftGrouped = GroupedIterator(leftInput, Seq(Symbol("i").int.at(0)),
+      Seq(Symbol("i").int, Symbol("s").string))
+    val rightGrouped = GroupedIterator(rightInput, Seq(Symbol("i").int.at(0)),
+      Seq(Symbol("i").int, Symbol("l").long))
+    val cogrouped = new CoGroupedIterator(leftGrouped, rightGrouped, Seq(Symbol("i").int))
 
     val result = cogrouped.map {
       case (key, leftData, rightData) =>
@@ -52,7 +54,8 @@ class CoGroupedIteratorSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("SPARK-11393: respect the fact that GroupedIterator.hasNext is not idempotent") {
     val leftInput = Seq(create_row(2, "a")).iterator
     val rightInput = Seq(create_row(1, 2L)).iterator
-    val leftGrouped = GroupedIterator(leftInput, Seq('i.int.at(0)), Seq('i.int, 's.string))
+    val leftGrouped = GroupedIterator(leftInput, Seq(Symbol("i").int.at(0)),
+      Seq(Symbol("i").int, Symbol("s").string))
     val rightGrouped = GroupedIterator(rightInput, Seq('i.int.at(0)), Seq('i.int, 'l.long))
     val cogrouped = new CoGroupedIterator(leftGrouped, rightGrouped, Seq('i.int))
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/GroupedIteratorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/GroupedIteratorSuite.scala
index 4b2a2b439c89e..06c51cee02019 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/GroupedIteratorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/GroupedIteratorSuite.scala
@@ -32,7 +32,7 @@ class GroupedIteratorSuite extends SparkFunSuite {
     val fromRow = encoder.createDeserializer()
     val input = Seq(Row(1, "a"), Row(1, "b"), Row(2, "c"))
     val grouped = GroupedIterator(input.iterator.map(toRow),
-      Seq('i.int.at(0)), schema.toAttributes)
+      Seq(Symbol("i").int.at(0)), schema.toAttributes)
 
     val result = grouped.map {
       case (key, data) =>
@@ -59,7 +59,7 @@ class GroupedIteratorSuite extends SparkFunSuite {
       Row(3, 2L, "e"))
 
     val grouped = GroupedIterator(input.iterator.map(toRow),
-      Seq('i.int.at(0), 'l.long.at(1)), schema.toAttributes)
+      Seq(Symbol("i").int.at(0), Symbol("l").long.at(1)), schema.toAttributes)
 
     val result = grouped.map {
       case (key, data) =>
@@ -80,7 +80,7 @@ class GroupedIteratorSuite extends SparkFunSuite {
     val toRow = encoder.createSerializer()
     val input = Seq(Row(1, "a"), Row(1, "b"), Row(2, "c"))
     val grouped = GroupedIterator(input.iterator.map(toRow),
-      Seq('i.int.at(0)), schema.toAttributes)
+      Seq(Symbol("i").int.at(0)), schema.toAttributes)
 
     assert(grouped.length == 2)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 2ab1b6d4963a5..dfc1b70cf4a5d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -59,18 +59,21 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("count is partially aggregated") {
-    val query = testData.groupBy('value).agg(count('key)).queryExecution.analyzed
+    val query = testData.groupBy(Symbol("value")).agg(count(Symbol("key"))).queryExecution.analyzed
     testPartialAggregationPlan(query)
   }
 
   test("count distinct is partially aggregated") {
-    val query = testData.groupBy('value).agg(count_distinct('key)).queryExecution.analyzed
+    val query = testData.groupBy(Symbol("value")).agg(count_distinct(Symbol("key")))
+      .queryExecution.analyzed
     testPartialAggregationPlan(query)
   }
 
   test("mixed aggregates are partially aggregated") {
     val query =
-      testData.groupBy('value).agg(count('value), count_distinct('key)).queryExecution.analyzed
+      testData.groupBy(Symbol("value"))
+        .agg(count(Symbol("value")), count_distinct(Symbol("key")))
+        .queryExecution.analyzed
     testPartialAggregationPlan(query)
   }
 
@@ -193,45 +196,47 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("efficient terminal limit -> sort should use TakeOrderedAndProject") {
-    val query = testData.select('key, 'value).sort('key).limit(2)
+    val query = testData.select(Symbol("key"), Symbol("value")).sort(Symbol("key")).limit(2)
     val planned = query.queryExecution.executedPlan
     assert(planned.isInstanceOf[execution.TakeOrderedAndProjectExec])
-    assert(planned.output === testData.select('key, 'value).logicalPlan.output)
+    assert(planned.output === testData.select(Symbol("key"), Symbol("value")).logicalPlan.output)
   }
 
   test("terminal limit -> project -> sort should use TakeOrderedAndProject") {
-    val query = testData.select('key, 'value).sort('key).select('value, 'key).limit(2)
+    val query = testData.select(Symbol("key"), Symbol("value")).sort(Symbol("key"))
+      .select(Symbol("value"), Symbol("key")).limit(2)
     val planned = query.queryExecution.executedPlan
     assert(planned.isInstanceOf[execution.TakeOrderedAndProjectExec])
-    assert(planned.output === testData.select('value, 'key).logicalPlan.output)
+    assert(planned.output === testData.select(Symbol("value"), Symbol("key")).logicalPlan.output)
   }
 
   test("terminal limits that are not handled by TakeOrderedAndProject should use CollectLimit") {
-    val query = testData.select('value).limit(2)
+    val query = testData.select(Symbol("value")).limit(2)
     val planned = query.queryExecution.sparkPlan
     assert(planned.isInstanceOf[CollectLimitExec])
-    assert(planned.output === testData.select('value).logicalPlan.output)
+    assert(planned.output === testData.select(Symbol("value")).logicalPlan.output)
   }
 
   test("TakeOrderedAndProject can appear in the middle of plans") {
-    val query = testData.select('key, 'value).sort('key).limit(2).filter('key === 3)
+    val query = testData.select(Symbol("key"), Symbol("value"))
+      .sort(Symbol("key")).limit(2).filter('key === 3)
     val planned = query.queryExecution.executedPlan
     assert(planned.find(_.isInstanceOf[TakeOrderedAndProjectExec]).isDefined)
   }
 
   test("CollectLimit can appear in the middle of a plan when caching is used") {
-    val query = testData.select('key, 'value).limit(2).cache()
+    val query = testData.select(Symbol("key"), Symbol("value")).limit(2).cache()
     val planned = query.queryExecution.optimizedPlan.asInstanceOf[InMemoryRelation]
     assert(planned.cachedPlan.isInstanceOf[CollectLimitExec])
   }
 
   test("TakeOrderedAndProjectExec appears only when number of limit is below the threshold.") {
     withSQLConf(SQLConf.TOP_K_SORT_FALLBACK_THRESHOLD.key -> "1000") {
-      val query0 = testData.select('value).orderBy('key).limit(100)
+      val query0 = testData.select(Symbol("value")).orderBy(Symbol("key")).limit(100)
       val planned0 = query0.queryExecution.executedPlan
       assert(planned0.find(_.isInstanceOf[TakeOrderedAndProjectExec]).isDefined)
 
-      val query1 = testData.select('value).orderBy('key).limit(2000)
+      val query1 = testData.select(Symbol("value")).orderBy(Symbol("key")).limit(2000)
       val planned1 = query1.queryExecution.executedPlan
       assert(planned1.find(_.isInstanceOf[TakeOrderedAndProjectExec]).isEmpty)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala
index 751078d08fda9..21702b6cf582c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala
@@ -51,7 +51,7 @@ abstract class RemoveRedundantSortsSuiteBase
 
   test("remove redundant sorts with limit") {
     withTempView("t") {
-      spark.range(100).select('id as "key").createOrReplaceTempView("t")
+      spark.range(100).select(Symbol("id") as "key").createOrReplaceTempView("t")
       val query =
         """
           |SELECT key FROM
@@ -64,8 +64,8 @@ abstract class RemoveRedundantSortsSuiteBase
 
   test("remove redundant sorts with broadcast hash join") {
     withTempView("t1", "t2") {
-      spark.range(1000).select('id as "key").createOrReplaceTempView("t1")
-      spark.range(1000).select('id as "key").createOrReplaceTempView("t2")
+      spark.range(1000).select(Symbol("id") as "key").createOrReplaceTempView("t1")
+      spark.range(1000).select(Symbol("id") as "key").createOrReplaceTempView("t2")
 
       val queryTemplate = """
         |SELECT /*+ BROADCAST(%s) */ t1.key FROM
@@ -100,8 +100,8 @@ abstract class RemoveRedundantSortsSuiteBase
 
   test("remove redundant sorts with sort merge join") {
     withTempView("t1", "t2") {
-      spark.range(1000).select('id as "key").createOrReplaceTempView("t1")
-      spark.range(1000).select('id as "key").createOrReplaceTempView("t2")
+      spark.range(1000).select(Symbol("id") as "key").createOrReplaceTempView("t1")
+      spark.range(1000).select(Symbol("id") as "key").createOrReplaceTempView("t2")
       val query = """
         |SELECT /*+ MERGE(t1) */ t1.key FROM
         | (SELECT key FROM t1 WHERE key > 10 ORDER BY key DESC LIMIT 10) t1
@@ -123,8 +123,8 @@ abstract class RemoveRedundantSortsSuiteBase
 
   test("cached sorted data doesn't need to be re-sorted") {
     withSQLConf(SQLConf.REMOVE_REDUNDANT_SORTS_ENABLED.key -> "true") {
-      val df = spark.range(1000).select('id as "key").sort('key.desc).cache()
-      val resorted = df.sort('key.desc)
+      val df = spark.range(1000).select(Symbol("id") as "key").sort(Symbol("key").desc).cache()
+      val resorted = df.sort(Symbol("key").desc)
       val sortedAsc = df.sort('key.asc)
       checkNumSorts(df, 0)
       checkNumSorts(resorted, 0)
@@ -140,7 +140,7 @@ abstract class RemoveRedundantSortsSuiteBase
 
   test("SPARK-33472: shuffled join with different left and right side partition numbers") {
     withTempView("t1", "t2") {
-      spark.range(0, 100, 1, 2).select('id as "key").createOrReplaceTempView("t1")
+      spark.range(0, 100, 1, 2).select(Symbol("id") as "key").createOrReplaceTempView("t1")
       (0 to 100).toDF("key").createOrReplaceTempView("t2")
 
       val queryTemplate = """
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index 9e6974a07a4a0..68eb15b4ae097 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -593,7 +593,8 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       spark.range(10).write.saveAsTable("add_col")
       withView("v") {
         sql("CREATE VIEW v AS SELECT * FROM add_col")
-        spark.range(10).select('id, 'id as 'a).write.mode("overwrite").saveAsTable("add_col")
+        spark.range(10).select(Symbol("id"), 'id as Symbol("a"))
+          .write.mode("overwrite").saveAsTable("add_col")
         checkAnswer(sql("SELECT * FROM v"), spark.range(10).toDF())
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala
index 812fdba8dda23..5fa7a4d0c71cc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala
@@ -44,13 +44,15 @@ class SortSuite extends SparkPlanTest with SharedSparkSession {
 
     checkAnswer(
       input.toDF("a", "b", "c"),
-      (child: SparkPlan) => SortExec('a.asc :: 'b.asc :: Nil, global = true, child = child),
+      (child: SparkPlan) => SortExec(Symbol("a").asc :: Symbol("b").asc :: Nil,
+        global = true, child = child),
       input.sortBy(t => (t._1, t._2)).map(Row.fromTuple),
       sortAnswers = false)
 
     checkAnswer(
       input.toDF("a", "b", "c"),
-      (child: SparkPlan) => SortExec('b.asc :: 'a.asc :: Nil, global = true, child = child),
+      (child: SparkPlan) => SortExec(Symbol("b").asc :: Symbol("a").asc :: Nil,
+        global = true, child = child),
       input.sortBy(t => (t._2, t._1)).map(Row.fromTuple),
       sortAnswers = false)
   }
@@ -59,9 +61,9 @@ class SortSuite extends SparkPlanTest with SharedSparkSession {
     checkThatPlansAgree(
       (1 to 100).map(v => Tuple1(v)).toDF().selectExpr("NULL as a"),
       (child: SparkPlan) =>
-        GlobalLimitExec(10, SortExec('a.asc :: Nil, global = true, child = child)),
+        GlobalLimitExec(10, SortExec(Symbol("a").asc :: Nil, global = true, child = child)),
       (child: SparkPlan) =>
-        GlobalLimitExec(10, ReferenceSort('a.asc :: Nil, global = true, child)),
+        GlobalLimitExec(10, ReferenceSort(Symbol("a").asc :: Nil, global = true, child)),
       sortAnswers = false
     )
   }
@@ -70,15 +72,15 @@ class SortSuite extends SparkPlanTest with SharedSparkSession {
     checkThatPlansAgree(
       (1 to 100).map(v => Tuple1(v)).toDF("a"),
       (child: SparkPlan) =>
-        GlobalLimitExec(10, SortExec('a.asc :: Nil, global = true, child = child)),
+        GlobalLimitExec(10, SortExec(Symbol("a").asc :: Nil, global = true, child = child)),
       (child: SparkPlan) =>
-        GlobalLimitExec(10, ReferenceSort('a.asc :: Nil, global = true, child)),
+        GlobalLimitExec(10, ReferenceSort(Symbol("a").asc :: Nil, global = true, child)),
       sortAnswers = false
     )
   }
 
   test("sorting does not crash for large inputs") {
-    val sortOrder = 'a.asc :: Nil
+    val sortOrder = Symbol("a").asc :: Nil
     val stringLength = 1024 * 1024 * 2
     checkThatPlansAgree(
       Seq(Tuple1("a" * stringLength), Tuple1("b" * stringLength)).toDF("a").repartition(1),
@@ -92,8 +94,8 @@ class SortSuite extends SparkPlanTest with SharedSparkSession {
     AccumulatorSuite.verifyPeakExecutionMemorySet(sparkContext, "unsafe external sort") {
       checkThatPlansAgree(
         (1 to 100).map(v => Tuple1(v)).toDF("a"),
-        (child: SparkPlan) => SortExec('a.asc :: Nil, global = true, child = child),
-        (child: SparkPlan) => ReferenceSort('a.asc :: Nil, global = true, child),
+        (child: SparkPlan) => SortExec(Symbol("a").asc :: Nil, global = true, child = child),
+        (child: SparkPlan) => ReferenceSort(Symbol("a").asc :: Nil, global = true, child),
         sortAnswers = false)
     }
   }
@@ -106,7 +108,8 @@ class SortSuite extends SparkPlanTest with SharedSparkSession {
     )
     checkAnswer(
       input.toDF("a", "b", "c"),
-      (child: SparkPlan) => SortExec(Stream('a.asc, 'b.asc, 'c.asc), global = true, child = child),
+      (child: SparkPlan) => SortExec(Stream(Symbol("a").asc, 'b.asc, 'c.asc),
+        global = true, child = child),
       input.sortBy(t => (t._1, t._2, t._3)).map(Row.fromTuple),
       sortAnswers = false)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index ba6dd170d89a9..e26be63b10955 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -312,7 +312,7 @@ class SparkSqlParserSuite extends AnalysisTest {
         Seq(AttributeReference("a", StringType)(),
           AttributeReference("b", StringType)(),
           AttributeReference("c", StringType)()),
-        Project(Seq('a, 'b, 'c),
+        Project(Seq(Symbol("a"), Symbol("b"), Symbol("c")),
           UnresolvedRelation(TableIdentifier("testData"))),
         ioSchema))
 
@@ -336,9 +336,9 @@ class SparkSqlParserSuite extends AnalysisTest {
             UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false),
             Literal(10)),
           Aggregate(
-            Seq('a),
+            Seq(Symbol("a")),
             Seq(
-              'a,
+              Symbol("a"),
               UnresolvedAlias(
                 UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false), None),
               UnresolvedAlias(
@@ -363,12 +363,12 @@ class SparkSqlParserSuite extends AnalysisTest {
           AttributeReference("c", StringType)()),
         WithWindowDefinition(
           Map("w" -> WindowSpecDefinition(
-            Seq('a),
-            Seq(SortOrder('b, Ascending, NullsFirst, Seq.empty)),
+            Seq(Symbol("a")),
+            Seq(SortOrder(Symbol("b"), Ascending, NullsFirst, Seq.empty)),
             UnspecifiedFrame)),
           Project(
             Seq(
-              'a,
+              Symbol("a"),
               UnresolvedAlias(
                 UnresolvedWindowExpression(
                   UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false),
@@ -403,9 +403,9 @@ class SparkSqlParserSuite extends AnalysisTest {
             UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false),
             Literal(10)),
           Aggregate(
-            Seq('a, 'myCol, 'myCol2),
+            Seq(Symbol("a"), Symbol("myCol"), Symbol("myCol2")),
             Seq(
-              'a,
+              Symbol("a"),
               UnresolvedAlias(
                 UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false), None),
               UnresolvedAlias(
@@ -415,7 +415,7 @@ class SparkSqlParserSuite extends AnalysisTest {
               UnresolvedGenerator(
                 FunctionIdentifier("explode"),
                 Seq(UnresolvedAttribute("myTable.myCol"))),
-              Nil, false, Option("mytable2"), Seq('myCol2),
+              Nil, false, Option("mytable2"), Seq(Symbol("myCol2")),
               Generate(
                 UnresolvedGenerator(
                   FunctionIdentifier("explode"),
@@ -423,7 +423,7 @@ class SparkSqlParserSuite extends AnalysisTest {
                     Seq(
                       UnresolvedFunction("array", Seq(Literal(1), Literal(2), Literal(3)), false)),
                     false))),
-                Nil, false, Option("mytable"), Seq('myCol),
+                Nil, false, Option("mytable"), Seq(Symbol("myCol")),
                 UnresolvedRelation(TableIdentifier("testData")))))),
         ioSchema))
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala
index c025670fb895e..3718b3a3c3378 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala
@@ -49,7 +49,7 @@ object SubExprEliminationBenchmark extends SqlBasedBenchmark {
       val schema = writeWideRow(path.getAbsolutePath, rowsNum, numCols)
 
       val cols = (0 until numCols).map { idx =>
-        from_json('value, schema).getField(s"col$idx")
+        from_json(Symbol("value"), schema).getField(s"col$idx")
       }
 
       Seq(
@@ -88,7 +88,7 @@ object SubExprEliminationBenchmark extends SqlBasedBenchmark {
       val schema = writeWideRow(path.getAbsolutePath, rowsNum, numCols)
 
       val predicate = (0 until numCols).map { idx =>
-        (from_json('value, schema).getField(s"col$idx") >= Literal(100000)).expr
+        (from_json(Symbol("value"), schema).getField(s"col$idx") >= Literal(100000)).expr
       }.asInstanceOf[Seq[Expression]].reduce(Or)
 
       Seq(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/TakeOrderedAndProjectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/TakeOrderedAndProjectSuite.scala
index 6ec5c6287eed1..ce48945e52c5d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/TakeOrderedAndProjectSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/TakeOrderedAndProjectSuite.scala
@@ -58,7 +58,7 @@ class TakeOrderedAndProjectSuite extends SparkPlanTest with SharedSparkSession {
   private def noOpFilter(plan: SparkPlan): SparkPlan = FilterExec(Literal(true), plan)
 
   val limit = 250
-  val sortOrder = 'a.desc :: 'b.desc :: Nil
+  val sortOrder = Symbol("a").desc :: Symbol("b").desc :: Nil
 
   test("TakeOrderedAndProject.doExecute without project") {
     withClue(s"seed = $seed") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index 7332d49b942f8..b5b67287447c8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -573,7 +573,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
     import testImplicits._
     withTempPath { dir =>
       val path = dir.getCanonicalPath
-      val df = spark.range(10).select(Seq.tabulate(201) {i => ('id + i).as(s"c$i")} : _*)
+      val df = spark.range(10).select(Seq.tabulate(201) {i => (Symbol("id") + i).as(s"c$i")} : _*)
       df.write.mode(SaveMode.Overwrite).parquet(path)
 
       withSQLConf(SQLConf.WHOLESTAGE_MAX_NUM_FIELDS.key -> "202",
@@ -590,7 +590,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
 
   test("Control splitting consume function by operators with config") {
     import testImplicits._
-    val df = spark.range(10).select(Seq.tabulate(2) {i => ('id + i).as(s"c$i")} : _*)
+    val df = spark.range(10).select(Seq.tabulate(2) {i => (Symbol("id") + i).as(s"c$i")} : _*)
 
     Seq(true, false).foreach { config =>
       withSQLConf(SQLConf.WHOLESTAGE_SPLIT_CONSUME_FUNC_BY_OPERATOR.key -> s"$config") {
@@ -653,9 +653,9 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
     withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_USE_ID_IN_CLASS_NAME.key -> "true") {
       // the same query run twice should produce identical code, which would imply a hit in
       // the generated code cache.
-      val ds1 = spark.range(3).select('id + 2)
+      val ds1 = spark.range(3).select(Symbol("id") + 2)
       val code1 = genCode(ds1)
-      val ds2 = spark.range(3).select('id + 2)
+      val ds2 = spark.range(3).select(Symbol("id") + 2)
       val code2 = genCode(ds2) // same query shape as above, deliberately
       assert(code1 == code2, "Should produce same code")
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index ef4c2d0e08031..c24cc2bab9fd1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -280,11 +280,12 @@ class AdaptiveQueryExecSuite
       SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
       SQLConf.COALESCE_PARTITIONS_ENABLED.key -> "true",
       SQLConf.ADAPTIVE_OPTIMIZER_EXCLUDED_RULES.key -> AQEPropagateEmptyRelation.ruleName) {
-      val df1 = spark.range(10).withColumn("a", 'id)
-      val df2 = spark.range(10).withColumn("b", 'id)
+      val df1 = spark.range(10).withColumn("a", Symbol("id"))
+      val df2 = spark.range(10).withColumn("b", Symbol("id"))
       withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
-        val testDf = df1.where('a > 10).join(df2.where('b > 10), Seq("id"), "left_outer")
-          .groupBy('a).count()
+        val testDf = df1.where(Symbol("a") > 10)
+          .join(df2.where(Symbol("b") > 10), Seq("id"), "left_outer")
+          .groupBy(Symbol("a")).count()
         checkAnswer(testDf, Seq())
         val plan = testDf.queryExecution.executedPlan
         assert(find(plan)(_.isInstanceOf[SortMergeJoinExec]).isDefined)
@@ -296,8 +297,9 @@ class AdaptiveQueryExecSuite
       }
 
       withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "1") {
-        val testDf = df1.where('a > 10).join(df2.where('b > 10), Seq("id"), "left_outer")
-          .groupBy('a).count()
+        val testDf = df1.where(Symbol("a") > 10)
+          .join(df2.where(Symbol("b") > 10), Seq("id"), "left_outer")
+          .groupBy(Symbol("a")).count()
         checkAnswer(testDf, Seq())
         val plan = testDf.queryExecution.executedPlan
         assert(find(plan)(_.isInstanceOf[BroadcastHashJoinExec]).isDefined)
@@ -751,17 +753,17 @@ class AdaptiveQueryExecSuite
           spark
             .range(0, 1000, 1, 10)
             .select(
-              when('id < 250, 249)
-                .when('id >= 750, 1000)
-                .otherwise('id).as("key1"),
-              'id as "value1")
+              when(Symbol("id") < 250, 249)
+                .when(Symbol("id") >= 750, 1000)
+                .otherwise(Symbol("id")).as("key1"),
+              Symbol("id") as "value1")
             .createOrReplaceTempView("skewData1")
           spark
             .range(0, 1000, 1, 10)
             .select(
-              when('id < 250, 249)
-                .otherwise('id).as("key2"),
-              'id as "value2")
+              when(Symbol("id") < 250, 249)
+                .otherwise(Symbol("id")).as("key2"),
+              Symbol("id") as "value2")
             .createOrReplaceTempView("skewData2")
 
           def checkSkewJoin(
@@ -996,17 +998,17 @@ class AdaptiveQueryExecSuite
           spark
             .range(0, 1000, 1, 10)
             .select(
-              when('id < 250, 249)
-                .when('id >= 750, 1000)
-                .otherwise('id).as("key1"),
-              'id as "value1")
+              when(Symbol("id") < 250, 249)
+                .when(Symbol("id") >= 750, 1000)
+                .otherwise(Symbol("id")).as("key1"),
+              Symbol("id") as "value1")
             .createOrReplaceTempView("skewData1")
           spark
             .range(0, 1000, 1, 10)
             .select(
-              when('id < 250, 249)
-                .otherwise('id).as("key2"),
-              'id as "value2")
+              when(Symbol("id") < 250, 249)
+                .otherwise(Symbol("id")).as("key2"),
+              Symbol("id") as "value2")
             .createOrReplaceTempView("skewData2")
           val (_, adaptivePlan) = runAdaptiveAndVerifyResult(
             "SELECT * FROM skewData1 join skewData2 ON key1 = key2")
@@ -1084,7 +1086,7 @@ class AdaptiveQueryExecSuite
 
   test("AQE should set active session during execution") {
     withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
-      val df = spark.range(10).select(sum('id))
+      val df = spark.range(10).select(sum(Symbol("id")))
       assert(df.queryExecution.executedPlan.isInstanceOf[AdaptiveSparkPlanExec])
       SparkSession.setActiveSession(null)
       checkAnswer(df, Seq(Row(45)))
@@ -1111,7 +1113,7 @@ class AdaptiveQueryExecSuite
       SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY.key -> "true") {
       try {
         spark.experimental.extraStrategies = TestStrategy :: Nil
-        val df = spark.range(10).groupBy('id).count()
+        val df = spark.range(10).groupBy(Symbol("id")).count()
         df.collect()
       } finally {
         spark.experimental.extraStrategies = Nil
@@ -1567,7 +1569,7 @@ class AdaptiveQueryExecSuite
 
   test("SPARK-33494: Do not use local shuffle read for repartition") {
     withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
-      val df = spark.table("testData").repartition('key)
+      val df = spark.table("testData").repartition(Symbol("key"))
       df.collect()
       // local shuffle read breaks partitioning and shouldn't be used for repartition operation
       // which is specified by users.
@@ -1651,23 +1653,23 @@ class AdaptiveQueryExecSuite
 
       withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80") {
         // Repartition with no partition num specified.
-        checkBHJ(df.repartition('b),
+        checkBHJ(df.repartition(Symbol("b")),
           // The top shuffle from repartition is optimized out.
           optimizeOutRepartition = true, probeSideLocalRead = false, probeSideCoalescedRead = true)
 
         // Repartition with default partition num (5 in test env) specified.
-        checkBHJ(df.repartition(5, 'b),
+        checkBHJ(df.repartition(5, Symbol("b")),
           // The top shuffle from repartition is optimized out
           // The final plan must have 5 partitions, no optimization can be made to the probe side.
           optimizeOutRepartition = true, probeSideLocalRead = false, probeSideCoalescedRead = false)
 
         // Repartition with non-default partition num specified.
-        checkBHJ(df.repartition(4, 'b),
+        checkBHJ(df.repartition(4, Symbol("b")),
           // The top shuffle from repartition is not optimized out
           optimizeOutRepartition = false, probeSideLocalRead = true, probeSideCoalescedRead = true)
 
         // Repartition by col and project away the partition cols
-        checkBHJ(df.repartition('b).select('key),
+        checkBHJ(df.repartition(Symbol("b")).select(Symbol("key")),
           // The top shuffle from repartition is not optimized out
           optimizeOutRepartition = false, probeSideLocalRead = true, probeSideCoalescedRead = true)
       }
@@ -1679,23 +1681,23 @@ class AdaptiveQueryExecSuite
         SQLConf.SKEW_JOIN_SKEWED_PARTITION_FACTOR.key -> "0",
         SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "10") {
         // Repartition with no partition num specified.
-        checkSMJ(df.repartition('b),
+        checkSMJ(df.repartition(Symbol("b")),
           // The top shuffle from repartition is optimized out.
           optimizeOutRepartition = true, optimizeSkewJoin = false, coalescedRead = true)
 
         // Repartition with default partition num (5 in test env) specified.
-        checkSMJ(df.repartition(5, 'b),
+        checkSMJ(df.repartition(5, Symbol("b")),
           // The top shuffle from repartition is optimized out.
           // The final plan must have 5 partitions, can't do coalesced read.
           optimizeOutRepartition = true, optimizeSkewJoin = false, coalescedRead = false)
 
         // Repartition with non-default partition num specified.
-        checkSMJ(df.repartition(4, 'b),
+        checkSMJ(df.repartition(4, Symbol("b")),
           // The top shuffle from repartition is not optimized out.
           optimizeOutRepartition = false, optimizeSkewJoin = true, coalescedRead = false)
 
         // Repartition by col and project away the partition cols
-        checkSMJ(df.repartition('b).select('key),
+        checkSMJ(df.repartition(Symbol("b")).select(Symbol("key")),
           // The top shuffle from repartition is not optimized out.
           optimizeOutRepartition = false, optimizeSkewJoin = true, coalescedRead = false)
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala
index e9bdff5853a51..31d5fd9ffdffe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala
@@ -49,7 +49,7 @@ object RangeBenchmark extends SqlBasedBenchmark {
       }
 
       benchmark.addCase("filter after range", numIters = 4) { _ =>
-        spark.range(N).filter('id % 100 === 0).noop()
+        spark.range(N).filter(Symbol("id") % 100 === 0).noop()
       }
 
       benchmark.addCase("count after range", numIters = 4) { _ =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
index 2cf12dd92f64c..120ddf469f4a0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -152,7 +152,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSparkSession {
   }
 
   test("projection") {
-    val logicalPlan = testData.select('value, 'key).logicalPlan
+    val logicalPlan = testData.select(Symbol("value"), Symbol("key")).logicalPlan
     val plan = spark.sessionState.executePlan(logicalPlan).sparkPlan
     val scan = InMemoryRelation(new TestCachedBatchSerializer(useCompression = true, 5),
       MEMORY_ONLY, plan, None, logicalPlan)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
index 4d24b262fa03a..53d643d3ea901 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -288,12 +288,12 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val s = ScriptTransformation("func", Seq.empty, p, null)
 
     compareTransformQuery("select transform(a, b) using 'func' from e where f < 10",
-      s.copy(child = p.copy(child = p.child.where('f < 10)),
-        output = Seq('key.string, 'value.string)))
+      s.copy(child = p.copy(child = p.child.where(Symbol("f") < 10)),
+        output = Seq(Symbol("key").string, Symbol("value").string)))
     compareTransformQuery("map a, b using 'func' as c, d from e",
-      s.copy(output = Seq('c.string, 'd.string)))
+      s.copy(output = Seq(Symbol("c").string, Symbol("d").string)))
     compareTransformQuery("reduce a, b using 'func' as (c int, d decimal(10, 0)) from e",
-      s.copy(output = Seq('c.int, 'd.decimal(10, 0))))
+      s.copy(output = Seq(Symbol("c").int, Symbol("d").decimal(10, 0))))
   }
 
   test("use backticks in output of Script Transform") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 00d1ed2cbc680..9da40df7dbd2d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -115,7 +115,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
       }.getMessage
       assert(e.contains("Hive support is required to CREATE Hive TABLE (AS SELECT)"))
 
-      spark.range(1).select('id as 'a, 'id as 'b).write.saveAsTable("t1")
+      spark.range(1).select('id as Symbol("a"), 'id as Symbol("b")).write.saveAsTable("t1")
       e = intercept[AnalysisException] {
         sql("CREATE TABLE t STORED AS parquet SELECT a, b from t1")
       }.getMessage
@@ -1374,7 +1374,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       sql("CREATE TABLE t USING parquet SELECT 1 as a, 1 as b")
       checkAnswer(spark.table("t"), Row(1, 1) :: Nil)
 
-      spark.range(1).select('id as 'a, 'id as 'b).write.saveAsTable("t1")
+      spark.range(1).select('id as Symbol("a"), 'id as Symbol("b")).write.saveAsTable("t1")
       sql("CREATE TABLE t2 USING parquet SELECT a, b from t1")
       checkAnswer(spark.table("t2"), spark.table("t1"))
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala
index 37fe3c205e5d8..ef6d6f4a2968a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala
@@ -26,12 +26,12 @@ import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructT
 
 class DataSourceStrategySuite extends PlanTest with SharedSparkSession {
   val attrInts = Seq(
-    'cint.int,
+    Symbol("cint").int,
     Symbol("c.int").int,
-    GetStructField('a.struct(StructType(
+    GetStructField(Symbol("a").struct(StructType(
       StructField("cstr", StringType, nullable = true) ::
         StructField("cint", IntegerType, nullable = true) :: Nil)), 1, None),
-    GetStructField('a.struct(StructType(
+    GetStructField(Symbol("a").struct(StructType(
       StructField("c.int", IntegerType, nullable = true) ::
         StructField("cstr", StringType, nullable = true) :: Nil)), 0, None),
     GetStructField(Symbol("a.b").struct(StructType(
@@ -40,7 +40,7 @@ class DataSourceStrategySuite extends PlanTest with SharedSparkSession {
         StructField("cint", IntegerType, nullable = true) :: Nil)), 2, None),
     GetStructField(Symbol("a.b").struct(StructType(
       StructField("c.int", IntegerType, nullable = true) :: Nil)), 0, None),
-    GetStructField(GetStructField('a.struct(StructType(
+    GetStructField(GetStructField(Symbol("a").struct(StructType(
       StructField("cstr1", StringType, nullable = true) ::
         StructField("b", StructType(StructField("cint", IntegerType, nullable = true) ::
           StructField("cstr2", StringType, nullable = true) :: Nil)) :: Nil)), 1, None), 0, None)
@@ -55,12 +55,12 @@ class DataSourceStrategySuite extends PlanTest with SharedSparkSession {
   ))
 
   val attrStrs = Seq(
-    'cstr.string,
+    Symbol("cstr").string,
     Symbol("c.str").string,
-    GetStructField('a.struct(StructType(
+    GetStructField(Symbol("a").struct(StructType(
       StructField("cint", IntegerType, nullable = true) ::
         StructField("cstr", StringType, nullable = true) :: Nil)), 1, None),
-    GetStructField('a.struct(StructType(
+    GetStructField(Symbol("a").struct(StructType(
       StructField("c.str", StringType, nullable = true) ::
         StructField("cint", IntegerType, nullable = true) :: Nil)), 0, None),
     GetStructField(Symbol("a.b").struct(StructType(
@@ -69,7 +69,7 @@ class DataSourceStrategySuite extends PlanTest with SharedSparkSession {
         StructField("cstr", StringType, nullable = true) :: Nil)), 2, None),
     GetStructField(Symbol("a.b").struct(StructType(
       StructField("c.str", StringType, nullable = true) :: Nil)), 0, None),
-    GetStructField(GetStructField('a.struct(StructType(
+    GetStructField(GetStructField(Symbol("a").struct(StructType(
       StructField("cint1", IntegerType, nullable = true) ::
         StructField("b", StructType(StructField("cstr", StringType, nullable = true) ::
           StructField("cint2", IntegerType, nullable = true) :: Nil)) :: Nil)), 1, None), 0, None)
@@ -280,7 +280,7 @@ class DataSourceStrategySuite extends PlanTest with SharedSparkSession {
   }}
 
   test("SPARK-26865 DataSourceV2Strategy should push normalized filters") {
-    val attrInt = 'cint.int
+    val attrInt = Symbol("cint").int
     assertResult(Seq(IsNotNull(attrInt))) {
       DataSourceStrategy.normalizeExprs(Seq(IsNotNull(attrInt.withName("CiNt"))), Seq(attrInt))
     }
@@ -308,11 +308,11 @@ class DataSourceStrategySuite extends PlanTest with SharedSparkSession {
     }
 
     // `Abs(col)` can not be pushed down, so it returns `None`
-    assert(PushableColumnAndNestedColumn.unapply(Abs('col.int)) === None)
+    assert(PushableColumnAndNestedColumn.unapply(Abs(Symbol("col").int)) === None)
   }
 
   test("SPARK-36644: Push down boolean column filter") {
-    testTranslateFilter('col.boolean, Some(sources.EqualTo("col", true)))
+    testTranslateFilter(Symbol("col").boolean, Some(sources.EqualTo("col", true)))
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
index 6ba3d2723412b..3034d4fe67c1b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
@@ -143,7 +143,8 @@ class DataSourceSuite extends SharedSparkSession with PrivateMethodTester {
   test("Data source options should be propagated in method checkAndGlobPathIfNecessary") {
     val dataSourceOptions = Map("fs.defaultFS" -> "nonexistentFs://nonexistentFs")
     val dataSource = DataSource(spark, "parquet", Seq("/path3"), options = dataSourceOptions)
-    val checkAndGlobPathIfNecessary = PrivateMethod[Seq[Path]]('checkAndGlobPathIfNecessary)
+    val checkAndGlobPathIfNecessary =
+      PrivateMethod[Seq[Path]](Symbol("checkAndGlobPathIfNecessary"))
 
     val message = intercept[java.io.IOException] {
       dataSource invokePrivate checkAndGlobPathIfNecessary(false, false)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala
index f492fc653653e..c9e15f71524d4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala
@@ -39,12 +39,15 @@ class FileFormatWriterSuite
 
   test("SPARK-22252: FileFormatWriter should respect the input query schema") {
     withTable("t1", "t2", "t3", "t4") {
-      spark.range(1).select('id as 'col1, 'id as 'col2).write.saveAsTable("t1")
+      spark.range(1).select(Symbol("id") as Symbol("col1"), Symbol("id") as Symbol("col2"))
+        .write.saveAsTable("t1")
       spark.sql("select COL1, COL2 from t1").write.saveAsTable("t2")
       checkAnswer(spark.table("t2"), Row(0, 0))
 
       // Test picking part of the columns when writing.
-      spark.range(1).select('id, 'id as 'col1, 'id as 'col2).write.saveAsTable("t3")
+      spark.range(1)
+        .select(Symbol("id"), Symbol("id") as Symbol("col1"), Symbol("id") as Symbol("col2"))
+        .write.saveAsTable("t3")
       spark.sql("select COL1, COL2 from t3").write.saveAsTable("t4")
       checkAnswer(spark.table("t4"), Row(0, 0))
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
index 634016664dfb6..b14ccb089f449 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
@@ -60,7 +60,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre
           "file9" -> 1,
           "file10" -> 1))
 
-    checkScan(table.select('c1)) { partitions =>
+    checkScan(table.select(Symbol("c1"))) { partitions =>
       // 10 one byte files should fit in a single partition with 10 files.
       assert(partitions.size == 1, "when checking partitions")
       assert(partitions.head.files.size == 10, "when checking partition 1")
@@ -83,7 +83,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre
 
     withSQLConf(SQLConf.FILES_MAX_PARTITION_BYTES.key -> "11",
       SQLConf.FILES_OPEN_COST_IN_BYTES.key -> "1") {
-      checkScan(table.select('c1)) { partitions =>
+      checkScan(table.select(Symbol("c1"))) { partitions =>
         // 5 byte files should be laid out [(5, 5), (5)]
         assert(partitions.size == 2, "when checking partitions")
         assert(partitions(0).files.size == 2, "when checking partition 1")
@@ -108,7 +108,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre
 
     withSQLConf(SQLConf.FILES_MAX_PARTITION_BYTES.key -> "10",
       SQLConf.FILES_OPEN_COST_IN_BYTES.key -> "1") {
-      checkScan(table.select('c1)) { partitions =>
+      checkScan(table.select(Symbol("c1"))) { partitions =>
         // Files should be laid out [(0-10), (10-15, 4)]
         assert(partitions.size == 2, "when checking partitions")
         assert(partitions(0).files.size == 1, "when checking partition 1")
@@ -141,7 +141,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre
 
     withSQLConf(SQLConf.FILES_MAX_PARTITION_BYTES.key -> "4",
         SQLConf.FILES_OPEN_COST_IN_BYTES.key -> "1") {
-      checkScan(table.select('c1)) { partitions =>
+      checkScan(table.select(Symbol("c1"))) { partitions =>
         // Files should be laid out [(file1), (file2, file3), (file4, file5), (file6)]
         assert(partitions.size == 4, "when checking partitions")
         assert(partitions(0).files.size == 1, "when checking partition 1")
@@ -359,7 +359,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre
       withSQLConf(
         SQLConf.FILES_MAX_PARTITION_BYTES.key -> "2",
         SQLConf.FILES_OPEN_COST_IN_BYTES.key -> "0") {
-        checkScan(table.select('c1)) { partitions =>
+        checkScan(table.select(Symbol("c1"))) { partitions =>
           assert(partitions.size == 2)
           assert(partitions(0).files.size == 1)
           assert(partitions(1).files.size == 2)
@@ -375,7 +375,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre
       withSQLConf(
         SQLConf.FILES_MAX_PARTITION_BYTES.key -> "2",
         SQLConf.FILES_OPEN_COST_IN_BYTES.key -> "0") {
-        checkScan(table.select('c1)) { partitions =>
+        checkScan(table.select(Symbol("c1"))) { partitions =>
           assert(partitions.size == 3)
           assert(partitions(0).files.size == 1)
           assert(partitions(1).files.size == 2)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
index fe50e4e7f9d1a..2c227baa04fc2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
@@ -573,7 +573,7 @@ abstract class SchemaPruningSuite
         Seq(Concat(Seq($"name.first", $"name.last")),
           Concat(Seq($"name.last", $"name.first")))
       ),
-      Seq('a.string, 'b.string),
+      Seq(Symbol("a").string, Symbol("b").string),
       sql("select * from contacts").logicalPlan
     ).toDF()
     checkScan(query1, "struct<name:struct<first:string,last:string>>")
@@ -590,7 +590,7 @@ abstract class SchemaPruningSuite
     val name = StructType.fromDDL("first string, middle string, last string")
     val query2 = Expand(
       Seq(Seq($"name", $"name.last")),
-      Seq('a.struct(name), 'b.string),
+      Seq(Symbol("a").struct(name), Symbol("b").string),
       sql("select * from contacts").logicalPlan
     ).toDF()
     checkScan(query2, "struct<name:struct<first:string,middle:string,last:string>>")
@@ -909,7 +909,7 @@ abstract class SchemaPruningSuite
           .createOrReplaceTempView("table")
 
         val read = spark.table("table")
-        val query = read.select(explode($"items").as('item)).select(count($"*"))
+        val query = read.select(explode($"items").as(Symbol("item"))).select(count($"*"))
 
         checkScan(query, "struct<items:array<struct<itemId:long>>>")
         checkAnswer(query, Row(2) :: Nil)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 7bbe371879d40..9f9b7b72ab329 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -1836,7 +1836,7 @@ abstract class CSVSuite
       val idf = spark.read
         .schema(schema)
         .csv(path.getCanonicalPath)
-        .select('f15, 'f10, 'f5)
+        .select(Symbol("f15"), Symbol("f10"), Symbol("f5"))
 
       assert(idf.count() == 2)
       checkAnswer(idf, List(Row(15, 10, 5), Row(-15, -10, -5)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
index e4f6ccaa9a621..c741320d4220b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
@@ -263,7 +263,7 @@ object JsonBenchmark extends SqlBasedBenchmark {
 
     benchmark.addCase("from_json", iters) { _ =>
       val schema = new StructType().add("a", IntegerType)
-      val from_json_ds = in.select(from_json('value, schema))
+      val from_json_ds = in.select(from_json(Symbol("value"), schema))
       from_json_ds.noop()
     }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala
index 3cb8287f09b26..b892a9e155815 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala
@@ -90,7 +90,7 @@ class NoopStreamSuite extends StreamTest {
       .option("numPartitions", "1")
       .option("rowsPerSecond", "5")
       .load()
-      .select('value)
+      .select(Symbol("value"))
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala
index b4073bedf5597..811953754953a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala
@@ -42,7 +42,7 @@ class NoopSuite extends SharedSparkSession {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
       spark.range(numElems)
-        .select('id mod 10 as "key", 'id as "value")
+        .select(Symbol("id") mod 10 as "key", Symbol("id") as "value")
         .write
         .partitionBy("key")
         .parquet(path)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
index 038606b854d9e..551a3f5a7cc1b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
@@ -371,7 +371,7 @@ abstract class OrcQueryTest extends OrcTest {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
 
-      spark.range(0, 10).select('id as "Acol").write.orc(path)
+      spark.range(0, 10).select(Symbol("id") as "Acol").write.orc(path)
       spark.read.orc(path).schema("Acol")
       intercept[IllegalArgumentException] {
         spark.read.orc(path).schema("acol")
@@ -416,19 +416,19 @@ abstract class OrcQueryTest extends OrcTest {
             s"No data was filtered for predicate: $pred")
         }
 
-        checkPredicate('a === 5, List(5).map(Row(_, null)))
-        checkPredicate('a <=> 5, List(5).map(Row(_, null)))
-        checkPredicate('a < 5, List(1, 3).map(Row(_, null)))
-        checkPredicate('a <= 5, List(1, 3, 5).map(Row(_, null)))
-        checkPredicate('a > 5, List(7, 9).map(Row(_, null)))
-        checkPredicate('a >= 5, List(5, 7, 9).map(Row(_, null)))
-        checkPredicate('a.isNull, List(null).map(Row(_, null)))
-        checkPredicate('b.isNotNull, List())
-        checkPredicate('a.isin(3, 5, 7), List(3, 5, 7).map(Row(_, null)))
-        checkPredicate('a > 0 && 'a < 3, List(1).map(Row(_, null)))
-        checkPredicate('a < 1 || 'a > 8, List(9).map(Row(_, null)))
-        checkPredicate(!('a > 3), List(1, 3).map(Row(_, null)))
-        checkPredicate(!('a > 0 && 'a < 3), List(3, 5, 7, 9).map(Row(_, null)))
+        checkPredicate(Symbol("a") === 5, List(5).map(Row(_, null)))
+        checkPredicate(Symbol("a") <=> 5, List(5).map(Row(_, null)))
+        checkPredicate(Symbol("a") < 5, List(1, 3).map(Row(_, null)))
+        checkPredicate(Symbol("a") <= 5, List(1, 3, 5).map(Row(_, null)))
+        checkPredicate(Symbol("a") > 5, List(7, 9).map(Row(_, null)))
+        checkPredicate(Symbol("a") >= 5, List(5, 7, 9).map(Row(_, null)))
+        checkPredicate(Symbol("a").isNull, List(null).map(Row(_, null)))
+        checkPredicate(Symbol("b").isNotNull, List())
+        checkPredicate(Symbol("a").isin(3, 5, 7), List(3, 5, 7).map(Row(_, null)))
+        checkPredicate(Symbol("a") > 0 && Symbol("a") < 3, List(1).map(Row(_, null)))
+        checkPredicate(Symbol("a") < 1 || Symbol("a") > 8, List(9).map(Row(_, null)))
+        checkPredicate(!(Symbol("a") > 3), List(1, 3).map(Row(_, null)))
+        checkPredicate(!(Symbol("a") > 0 && Symbol("a") < 3), List(3, 5, 7, 9).map(Row(_, null)))
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 9b554b626df85..d5180a393f61a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -1426,39 +1426,39 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
   test("filter pushdown - StringStartsWith") {
     withParquetDataFrame((1 to 4).map(i => Tuple1(i + "str" + i))) { implicit df =>
       checkFilterPredicate(
-        '_1.startsWith("").asInstanceOf[Predicate],
+        Symbol("_1").startsWith("").asInstanceOf[Predicate],
         classOf[UserDefinedByInstance[_, _]],
         Seq("1str1", "2str2", "3str3", "4str4").map(Row(_)))
 
       Seq("2", "2s", "2st", "2str", "2str2").foreach { prefix =>
         checkFilterPredicate(
-          '_1.startsWith(prefix).asInstanceOf[Predicate],
+          Symbol("_1").startsWith(prefix).asInstanceOf[Predicate],
           classOf[UserDefinedByInstance[_, _]],
           "2str2")
       }
 
       Seq("2S", "null", "2str22").foreach { prefix =>
         checkFilterPredicate(
-          '_1.startsWith(prefix).asInstanceOf[Predicate],
+          Symbol("_1").startsWith(prefix).asInstanceOf[Predicate],
           classOf[UserDefinedByInstance[_, _]],
           Seq.empty[Row])
       }
 
       checkFilterPredicate(
-        !'_1.startsWith("").asInstanceOf[Predicate],
+        !Symbol("_1").startsWith("").asInstanceOf[Predicate],
         classOf[Operators.Not],
         Seq().map(Row(_)))
 
       Seq("2", "2s", "2st", "2str", "2str2").foreach { prefix =>
         checkFilterPredicate(
-          !'_1.startsWith(prefix).asInstanceOf[Predicate],
+          !Symbol("_1").startsWith(prefix).asInstanceOf[Predicate],
           classOf[Operators.Not],
           Seq("1str1", "3str3", "4str4").map(Row(_)))
       }
 
       Seq("2S", "null", "2str22").foreach { prefix =>
         checkFilterPredicate(
-          !'_1.startsWith(prefix).asInstanceOf[Predicate],
+          !Symbol("_1").startsWith(prefix).asInstanceOf[Predicate],
           classOf[Operators.Not],
           Seq("1str1", "2str2", "3str3", "4str4").map(Row(_)))
       }
@@ -1472,7 +1472,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
     // SPARK-28371: make sure filter is null-safe.
     withParquetDataFrame(Seq(Tuple1[String](null))) { implicit df =>
       checkFilterPredicate(
-        '_1.startsWith("blah").asInstanceOf[Predicate],
+        Symbol("_1").startsWith("blah").asInstanceOf[Predicate],
         classOf[UserDefinedByInstance[_, _]],
         Seq.empty[Row])
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index c70ac8084a841..99b2d9844ed1b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -187,7 +187,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
         .range(1000)
         // Parquet doesn't allow column names with spaces, have to add an alias here.
         // Minus 500 here so that negative decimals are also tested.
-        .select((('id - 500) / 100.0) cast decimal as 'dec)
+        .select(((Symbol("id") - 500) / 100.0) cast decimal as Symbol("dec"))
         .coalesce(1)
     }
 
@@ -802,7 +802,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
 
       withTempPath { dir =>
         val m2 = intercept[SparkException] {
-          val df = spark.range(1).select('id as 'a, 'id as 'b).coalesce(1)
+          val df = spark.range(1).select(Symbol("id") as Symbol("a"), Symbol("id") as Symbol("b"))
+            .coalesce(1)
           df.write.partitionBy("a").options(extraOptions).parquet(dir.getCanonicalPath)
         }.getCause.getMessage
         assert(m2.contains("Intentional exception for testing purposes"))
@@ -868,7 +869,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
       checkAnswer(
         // Decimal column in this file is encoded using plain dictionary
         readResourceParquetFile("test-data/dec-in-i32.parquet"),
-        spark.range(1 << 4).select('id % 10 cast DecimalType(5, 2) as 'i32_dec))
+        spark.range(1 << 4).select(Symbol("id") % 10 cast DecimalType(5, 2) as Symbol("i32_dec")))
     }
   }
 
@@ -877,7 +878,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
       checkAnswer(
         // Decimal column in this file is encoded using plain dictionary
         readResourceParquetFile("test-data/dec-in-i64.parquet"),
-        spark.range(1 << 4).select('id % 10 cast DecimalType(10, 2) as 'i64_dec))
+        spark.range(1 << 4).select(Symbol("id") % 10 cast DecimalType(10, 2) as Symbol("i64_dec")))
     }
   }
 
@@ -886,7 +887,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
       checkAnswer(
         // Decimal column in this file is encoded using plain dictionary
         readResourceParquetFile("test-data/dec-in-fixed-len.parquet"),
-        spark.range(1 << 4).select('id % 10 cast DecimalType(10, 2) as 'fixed_len_dec))
+        spark.range(1 << 4)
+          .select(Symbol("id") % 10 cast DecimalType(10, 2) as Symbol("fixed_len_dec")))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index bf37421331db6..f3751562c332e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -979,7 +979,8 @@ abstract class ParquetPartitionDiscoverySuite
     withTempPath { dir =>
       withSQLConf(SQLConf.PARALLEL_PARTITION_DISCOVERY_THRESHOLD.key -> "1") {
         val path = dir.getCanonicalPath
-        val df = spark.range(5).select('id as 'a, 'id as 'b, 'id as 'c).coalesce(1)
+        val df = spark.range(5).select(Symbol("id") as Symbol("a"), Symbol("id") as Symbol("b"),
+          Symbol("id") as Symbol("c")).coalesce(1)
         df.write.partitionBy("b", "c").parquet(path)
         checkAnswer(spark.read.parquet(path), df)
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 057de2abdb9e0..654ab7fe36200 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -153,7 +153,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
         (1, "2016-01-01 10:11:12.123456"),
         (2, null),
         (3, "1965-01-01 10:11:12.123456"))
-        .toDS().select('_1, $"_2".cast("timestamp"))
+        .toDS().select(Symbol("_1"), $"_2".cast("timestamp"))
       checkAnswer(sql("select * from ts"), expected)
     }
   }
@@ -805,7 +805,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
   test("SPARK-15804: write out the metadata to parquet file") {
     val df = Seq((1, "abc"), (2, "hello")).toDF("a", "b")
     val md = new MetadataBuilder().putString("key", "value").build()
-    val dfWithmeta = df.select('a, 'b.as("b", md))
+    val dfWithmeta = df.select(Symbol("a"), Symbol("b").as("b", md))
 
     withTempPath { dir =>
       val path = dir.getCanonicalPath
@@ -1027,7 +1027,7 @@ class ParquetV1QuerySuite extends ParquetQuerySuite {
     withSQLConf(SQLConf.WHOLESTAGE_MAX_NUM_FIELDS.key -> "10") {
       withTempPath { dir =>
         val path = dir.getCanonicalPath
-        val df = spark.range(10).select(Seq.tabulate(11) {i => ('id + i).as(s"c$i")} : _*)
+        val df = spark.range(10).select(Seq.tabulate(11) {i => (Symbol("id") + i).as(s"c$i")} : _*)
         df.write.mode(SaveMode.Overwrite).parquet(path)
 
         // do not return batch - whole stage codegen is disabled for wide table (>200 columns)
@@ -1060,7 +1060,7 @@ class ParquetV2QuerySuite extends ParquetQuerySuite {
     withSQLConf(SQLConf.WHOLESTAGE_MAX_NUM_FIELDS.key -> "10") {
       withTempPath { dir =>
         val path = dir.getCanonicalPath
-        val df = spark.range(10).select(Seq.tabulate(11) {i => ('id + i).as(s"c$i")} : _*)
+        val df = spark.range(10).select(Seq.tabulate(11) {i => (Symbol("id") + i).as(s"c$i")} : _*)
         df.write.mode(SaveMode.Overwrite).parquet(path)
 
         // do not return batch - whole stage codegen is disabled for wide table (>200 columns)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
index 2feea41d15656..d0228d7bdf9f2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
@@ -944,7 +944,8 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
       spark.range(3).write.parquet(s"$path/p=1")
-      spark.range(3).select('id cast IntegerType as 'id).write.parquet(s"$path/p=2")
+      spark.range(3).select(Symbol("id") cast IntegerType as Symbol("id"))
+        .write.parquet(s"$path/p=2")
 
       val message = intercept[SparkException] {
         spark.read.option("mergeSchema", "true").parquet(path).schema
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala
index 143feebdd4994..0fb6fc58c400d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.types.BooleanType
 
 class DataSourceV2StrategySuite extends PlanTest with SharedSparkSession {
   test("SPARK-36644: Push down boolean column filter") {
-    testTranslateFilter('col.boolean,
+    testTranslateFilter(Symbol("col").boolean,
       Some(new V2EqualTo(FieldReference("col"), LiteralValue(true, BooleanType))))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/ValidateRequirementsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/ValidateRequirementsSuite.scala
index 767a26876f902..6e2eba68d9262 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/ValidateRequirementsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/ValidateRequirementsSuite.scala
@@ -36,11 +36,11 @@ class ValidateRequirementsSuite extends PlanTest with SharedSparkSession {
       rightPartitionNum: Int,
       success: Boolean): Unit = {
     val table1 =
-      spark.range(10).select('id + 1 as 'a1, 'id + 2 as 'b1, 'id + 3 as 'c1)
-        .queryExecution.executedPlan
+      spark.range(10).select(Symbol("id") + 1 as Symbol("a1"), Symbol("id") + 2 as Symbol("b1"),
+        Symbol("id") + 3 as Symbol("c1")).queryExecution.executedPlan
     val table2 =
-      spark.range(10).select('id + 1 as 'a2, 'id + 2 as 'b2, 'id + 3 as 'c2)
-        .queryExecution.executedPlan
+      spark.range(10).select(Symbol("id") + 1 as Symbol("a2"), Symbol("id") + 2 as Symbol("b2"),
+        Symbol("id") + 3 as Symbol("c2")).queryExecution.executedPlan
 
     val leftKeys = joinKeyIndices.map(table1.output)
     val rightKeys = joinKeyIndices.map(table2.output)
@@ -105,14 +105,14 @@ class ValidateRequirementsSuite extends PlanTest with SharedSparkSession {
       partNums: Seq[Int],
       success: Boolean): Unit = {
     val table1 =
-      spark.range(10).select('id + 1 as 'a1, 'id + 2 as 'b1, 'id + 3 as 'c1)
-        .queryExecution.executedPlan
+      spark.range(10).select(Symbol("id") + 1 as Symbol("a1"), Symbol("id") + 2 as Symbol("b1"),
+        Symbol("id") + 3 as Symbol("c1")).queryExecution.executedPlan
     val table2 =
-      spark.range(10).select('id + 1 as 'a2, 'id + 2 as 'b2, 'id + 3 as 'c2)
-        .queryExecution.executedPlan
+      spark.range(10).select(Symbol("id") + 1 as Symbol("a2"), Symbol("id") + 2 as Symbol("b2"),
+        Symbol("id") + 3 as Symbol("c2")).queryExecution.executedPlan
     val table3 =
-      spark.range(10).select('id + 1 as 'a3, 'id + 2 as 'b3, 'id + 3 as 'c3)
-        .queryExecution.executedPlan
+      spark.range(10).select(Symbol("id") + 1 as Symbol("a3"), Symbol("id") + 2 as Symbol("b3"),
+        Symbol("id") + 3 as Symbol("c3")).queryExecution.executedPlan
 
     val key1 = joinKeyIndices1.map(_._1).map(table1.output)
     val key2 = joinKeyIndices1.map(_._2).map(table2.output)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
index f27a249c8f753..256e942620272 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
@@ -415,8 +415,8 @@ abstract class BroadcastJoinSuiteBase extends QueryTest with SQLTestUtils
   test("Broadcast timeout") {
     val timeout = 5
     val slowUDF = udf({ x: Int => Thread.sleep(timeout * 1000); x })
-    val df1 = spark.range(10).select($"id" as 'a)
-    val df2 = spark.range(5).select(slowUDF($"id") as 'a)
+    val df1 = spark.range(10).select($"id" as Symbol("a"))
+    val df2 = spark.range(5).select(slowUDF($"id") as Symbol("a"))
     val testDf = df1.join(broadcast(df2), "a")
     withSQLConf(SQLConf.BROADCAST_TIMEOUT.key -> timeout.toString) {
       if (!conf.adaptiveExecutionEnabled) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index 0fd5c892e2c42..aa746370b8fd3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -79,7 +79,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     // Assume the execution plan is
     // PhysicalRDD(nodeId = 1) -> Filter(nodeId = 0)
     Seq((0L, false), (1L, true)).foreach { case (nodeId, enableWholeStage) =>
-      val df = person.filter('age < 25)
+      val df = person.filter(Symbol("age") < 25)
       testSparkPlanMetrics(df, 1, Map(
         nodeId -> (("Filter", Map(
           "number of output rows" -> 1L)))),
@@ -94,7 +94,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     //   Filter(nodeId = 1)
     //     Range(nodeId = 2)
     // TODO: update metrics in generated operators
-    val ds = spark.range(10).filter('id < 5)
+    val ds = spark.range(10).filter(Symbol("id") < 5)
     testSparkPlanMetricsWithPredicates(ds.toDF(), 1, Map(
       0L -> (("WholeStageCodegen (1)", Map(
         "duration" -> {
@@ -128,7 +128,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     )
 
     // 2 partitions and each partition contains 2 keys
-    val df2 = testData2.groupBy('a).count()
+    val df2 = testData2.groupBy(Symbol("a")).count()
     val expected2 = Seq(
       Map("number of output rows" -> 4L,
         "avg hash probe bucket list iters" ->
@@ -176,7 +176,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     //           Exchange(nodeId = 5)
     //             LocalTableScan(nodeId = 6)
     Seq(true, false).foreach { enableWholeStage =>
-      val df = generateRandomBytesDF().repartition(2).groupBy('a).count()
+      val df = generateRandomBytesDF().repartition(2).groupBy(Symbol("a")).count()
       val nodeIds = if (enableWholeStage) {
         Set(4L, 1L)
       } else {
@@ -204,7 +204,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     // Assume the execution plan is
     // ... -> ObjectHashAggregate(nodeId = 2) -> Exchange(nodeId = 1)
     // -> ObjectHashAggregate(nodeId = 0)
-    val df = testData2.groupBy().agg(collect_set('a)) // 2 partitions
+    val df = testData2.groupBy().agg(collect_set(Symbol("a"))) // 2 partitions
     testSparkPlanMetrics(df, 1, Map(
       2L -> (("ObjectHashAggregate", Map("number of output rows" -> 2L))),
       1L -> (("Exchange", Map(
@@ -216,7 +216,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     )
 
     // 2 partitions and each partition contains 2 keys
-    val df2 = testData2.groupBy('a).agg(collect_set('a))
+    val df2 = testData2.groupBy(Symbol("a")).agg(collect_set(Symbol("a")))
     testSparkPlanMetrics(df2, 1, Map(
       2L -> (("ObjectHashAggregate", Map(
         "number of output rows" -> 4L,
@@ -233,7 +233,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
 
     // 2 partitions and each partition contains 2 keys, with fallback to sort-based aggregation
     withSQLConf(SQLConf.OBJECT_AGG_SORT_BASED_FALLBACK_THRESHOLD.key -> "1") {
-      val df3 = testData2.groupBy('a).agg(collect_set('a))
+      val df3 = testData2.groupBy(Symbol("a")).agg(collect_set(Symbol("a")))
       testSparkPlanMetrics(df3, 1, Map(
         2L -> (("ObjectHashAggregate", Map(
           "number of output rows" -> 4L,
@@ -263,7 +263,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     //       LocalTableScan(nodeId = 3)
     // Because of SPARK-25267, ConvertToLocalRelation is disabled in the test cases of sql/core,
     // so Project here is not collapsed into LocalTableScan.
-    val df = Seq(1, 3, 2).toDF("id").sort('id)
+    val df = Seq(1, 3, 2).toDF("id").sort(Symbol("id"))
     testSparkPlanMetricsWithPredicates(df, 2, Map(
       0L -> (("Sort", Map(
         "sort time" -> {
@@ -281,7 +281,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
   test("SortMergeJoin metrics") {
     // Because SortMergeJoin may skip different rows if the number of partitions is different, this
     // test should use the deterministic number of partitions.
-    val testDataForJoin = testData2.filter('a < 2) // TestData2(1, 1) :: TestData2(1, 2)
+    val testDataForJoin = testData2.filter(Symbol("a") < 2) // TestData2(1, 1) :: TestData2(1, 2)
     testDataForJoin.createOrReplaceTempView("testDataForJoin")
     withTempView("testDataForJoin") {
       // Assume the execution plan is
@@ -314,7 +314,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
   test("SortMergeJoin(outer) metrics") {
     // Because SortMergeJoin may skip different rows if the number of partitions is different,
     // this test should use the deterministic number of partitions.
-    val testDataForJoin = testData2.filter('a < 2) // TestData2(1, 1) :: TestData2(1, 2)
+    val testDataForJoin = testData2.filter(Symbol("a") < 2) // TestData2(1, 1) :: TestData2(1, 2)
     testDataForJoin.createOrReplaceTempView("testDataForJoin")
     withTempView("testDataForJoin") {
       // Assume the execution plan is
@@ -459,7 +459,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
   }
 
   test("BroadcastNestedLoopJoin metrics") {
-    val testDataForJoin = testData2.filter('a < 2) // TestData2(1, 1) :: TestData2(1, 2)
+    val testDataForJoin = testData2.filter(Symbol("a") < 2) // TestData2(1, 1) :: TestData2(1, 2)
     testDataForJoin.createOrReplaceTempView("testDataForJoin")
     withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       withTempView("testDataForJoin") {
@@ -512,7 +512,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
 
   test("CartesianProduct metrics") {
     withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
-      val testDataForJoin = testData2.filter('a < 2) // TestData2(1, 1) :: TestData2(1, 2)
+      val testDataForJoin = testData2.filter(Symbol("a") < 2) // TestData2(1, 1) :: TestData2(1, 2)
       testDataForJoin.createOrReplaceTempView("testDataForJoin")
       withTempView("testDataForJoin") {
         // Assume the execution plan is
@@ -547,7 +547,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
   test("save metrics") {
     withTempPath { file =>
       // person creates a temporary view. get the DF before listing previous execution IDs
-      val data = person.select('name)
+      val data = person.select(Symbol("name"))
       val previousExecutionIds = currentExecutionIds()
       // Assume the execution plan is
       // PhysicalRDD(nodeId = 0)
@@ -704,7 +704,8 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
       // A special query that only has one partition, so there is no shuffle and the entire query
       // can be whole-stage-codegened.
-      val df = spark.range(0, 1500, 1, 1).limit(10).groupBy('id).count().limit(1).filter('id >= 0)
+      val df = spark.range(0, 1500, 1, 1).limit(10).groupBy(Symbol("id"))
+        .count().limit(1).filter('id >= 0)
       df.collect()
       val plan = df.queryExecution.executedPlan
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala
index 53ef9dfbe39fa..f06e62b33b1a0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala
@@ -44,8 +44,8 @@ class MicroBatchExecutionSuite extends StreamTest with BeforeAndAfter {
     val df = inputData.toDF()
       .withColumn("eventTime", timestamp_seconds($"value"))
       .withWatermark("eventTime", "10 seconds")
-      .groupBy(window($"eventTime", "5 seconds") as 'window)
-      .agg(count("*") as 'count)
+      .groupBy(window($"eventTime", "5 seconds") as Symbol("window"))
+      .agg(count("*") as Symbol("count"))
       .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
 
     testStream(df)(
@@ -104,8 +104,8 @@ class MicroBatchExecutionSuite extends StreamTest with BeforeAndAfter {
     val df = testSource.toDF()
       .withColumn("eventTime", timestamp_seconds($"value"))
       .withWatermark("eventTime", "10 seconds")
-      .groupBy(window($"eventTime", "5 seconds") as 'window)
-      .agg(count("*") as 'count)
+      .groupBy(window($"eventTime", "5 seconds") as Symbol("window"))
+      .agg(count("*") as Symbol("count"))
       .select($"window".getField("start").cast("long").as[Long])
 
     /** Reset this test source so that it appears to be a new source requiring initialization */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala
index 5884380271f0e..11dbf9c2beaa1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala
@@ -141,7 +141,7 @@ class ConsoleWriteSupportSuite extends StreamTest {
         .option("numPartitions", "1")
         .option("rowsPerSecond", "5")
         .load()
-        .select('value)
+        .select(Symbol("value"))
 
       val query = input.writeStream.format("console").trigger(Trigger.Continuous(200)).start()
       assert(query.isActive)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala
index 0fe339b93047a..46440c98226aa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala
@@ -165,8 +165,8 @@ class ForeachWriterSuite extends StreamTest with SharedSparkSession with BeforeA
     val windowedAggregation = inputData.toDF()
       .withColumn("eventTime", timestamp_seconds($"value"))
       .withWatermark("eventTime", "10 seconds")
-      .groupBy(window($"eventTime", "5 seconds") as 'window)
-      .agg(count("*") as 'count)
+      .groupBy(window($"eventTime", "5 seconds") as Symbol("window"))
+      .agg(count("*") as Symbol("count"))
       .select($"count".as[Long])
       .map(_.toInt)
       .repartition(1)
@@ -199,8 +199,8 @@ class ForeachWriterSuite extends StreamTest with SharedSparkSession with BeforeA
     val windowedAggregation = inputData.toDF()
       .withColumn("eventTime", timestamp_seconds($"value"))
       .withWatermark("eventTime", "10 seconds")
-      .groupBy(window($"eventTime", "5 seconds") as 'window)
-      .agg(count("*") as 'count)
+      .groupBy(window($"eventTime", "5 seconds") as Symbol("window"))
+      .agg(count("*") as Symbol("count"))
       .select($"count".as[Long])
       .map(_.toInt)
       .repartition(1)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala
index 449aea8256673..fe846acab28ca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala
@@ -60,7 +60,7 @@ class RatePerMicroBatchProviderSuite extends StreamTest {
         .format("rate-micro-batch")
         .option("rowsPerBatch", "10")
         .load()
-        .select('value)
+        .select(Symbol("value"))
 
       val clock = new StreamManualClock
       testStream(input)(
@@ -97,7 +97,7 @@ class RatePerMicroBatchProviderSuite extends StreamTest {
         .format("rate-micro-batch")
         .option("rowsPerBatch", "10")
         .load()
-        .select('value)
+        .select(Symbol("value"))
 
       val clock = new StreamManualClock
       testStream(input)(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
index 6440e69e2ec23..2c1bb41302c11 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
@@ -83,7 +83,7 @@ class RateStreamProviderSuite extends StreamTest {
       .format("rate")
       .option("rowsPerSecond", "10")
       .load()
-      .select('value)
+      .select(Symbol("value"))
 
     var streamDuration = 0
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala
index d4792301a1ce5..0678cfc38660e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala
@@ -67,7 +67,7 @@ class RocksDBStateStoreIntegrationSuite extends StreamTest {
         val inputData = MemoryStream[Int]
 
         val query = inputData.toDS().toDF("value")
-          .select('value)
+          .select(Symbol("value"))
           .groupBy($"value")
           .agg(count("*"))
           .writeStream
@@ -119,7 +119,7 @@ class RocksDBStateStoreIntegrationSuite extends StreamTest {
 
         def startQuery(): StreamingQuery = {
           inputData.toDS().toDF("value")
-            .select('value)
+            .select(Symbol("value"))
             .groupBy($"value")
             .agg(count("*"))
             .writeStream
@@ -156,7 +156,7 @@ class RocksDBStateStoreIntegrationSuite extends StreamTest {
       SQLConf.STATE_STORE_ROCKSDB_FORMAT_VERSION.key -> "100") {
       val inputData = MemoryStream[Int]
       val query = inputData.toDS().toDF("value")
-        .select('value)
+        .select(Symbol("value"))
         .groupBy($"value")
         .agg(count("*"))
         .writeStream
@@ -179,7 +179,7 @@ class RocksDBStateStoreIntegrationSuite extends StreamTest {
         val inputData = MemoryStream[Int]
 
         val query = inputData.toDS().toDF("value")
-          .select('value)
+          .select(Symbol("value"))
           .groupBy($"value")
           .agg(count("*"))
           .writeStream
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
index ad744696f5472..9b5b532d3ecdc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
@@ -878,7 +878,8 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
       val oldCount = statusStore.executionsList().size
 
       val cls = classOf[CustomMetricsDataSource].getName
-      spark.range(10).select('id as 'i, -'id as 'j).write.format(cls)
+      spark.range(10).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j"))
+        .write.format(cls)
         .option("path", dir.getCanonicalPath).mode("append").save()
 
       // Wait until the new execution is started and being tracked.
@@ -919,7 +920,8 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
 
       try {
         val cls = classOf[CustomMetricsDataSource].getName
-        spark.range(0, 10, 1, 2).select('id as 'i, -'id as 'j).write.format(cls)
+        spark.range(0, 10, 1, 2).select(Symbol("id") as Symbol("i"), -'id as Symbol("j"))
+          .write.format(cls)
           .option("path", dir.getCanonicalPath).mode("append").save()
 
         // Wait until the new execution is started and being tracked.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
index dde463dd395f7..057bb34175a29 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
@@ -81,7 +81,7 @@ class ExecutorSideSQLConfSuite extends SparkFunSuite with SQLTestUtils {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
       withTempPath { path =>
         val pathString = path.getCanonicalPath
-        spark.range(10).select('id.as("ID")).write.json(pathString)
+        spark.range(10).select(Symbol("id").as("ID")).write.json(pathString)
         spark.range(10).write.mode("append").json(pathString)
         assert(spark.read.json(pathString).columns.toSet == Set("id", "ID"))
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala
index 81ce979ef0b62..1b1f3714dc701 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala
@@ -36,7 +36,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    targetAttributes = Seq('a.int, 'd.int, 'b.int, 'c.int)
+    targetAttributes = Seq(Symbol("a").int, Symbol("d").int, Symbol("b").int, Symbol("c").int)
     targetPartitionSchema = new StructType()
       .add("b", IntegerType)
       .add("c", IntegerType)
@@ -74,7 +74,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
         caseSensitive) {
       intercept[AssertionError] {
         rule.convertStaticPartitions(
-          sourceAttributes = Seq('e.int, 'f.int),
+          sourceAttributes = Seq(Symbol("e").int, Symbol("f").int),
           providedPartitions = Map("b" -> None, "c" -> None),
           targetAttributes = targetAttributes,
           targetPartitionSchema = targetPartitionSchema)
@@ -85,7 +85,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
       // Missing columns.
       intercept[AnalysisException] {
         rule.convertStaticPartitions(
-          sourceAttributes = Seq('e.int),
+          sourceAttributes = Seq(Symbol("e").int),
           providedPartitions = Map("b" -> Some("1"), "c" -> None),
           targetAttributes = targetAttributes,
           targetPartitionSchema = targetPartitionSchema)
@@ -96,7 +96,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
       // Missing partitioning columns.
       intercept[AnalysisException] {
         rule.convertStaticPartitions(
-          sourceAttributes = Seq('e.int, 'f.int),
+          sourceAttributes = Seq(Symbol("e").int, Symbol("f").int),
           providedPartitions = Map("b" -> Some("1")),
           targetAttributes = targetAttributes,
           targetPartitionSchema = targetPartitionSchema)
@@ -105,7 +105,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
       // Missing partitioning columns.
       intercept[AnalysisException] {
         rule.convertStaticPartitions(
-          sourceAttributes = Seq('e.int, 'f.int, 'g.int),
+          sourceAttributes = Seq(Symbol("e").int, Symbol("f").int, Symbol("g").int),
           providedPartitions = Map("b" -> Some("1")),
           targetAttributes = targetAttributes,
           targetPartitionSchema = targetPartitionSchema)
@@ -114,7 +114,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
       // Wrong partitioning columns.
       intercept[AnalysisException] {
         rule.convertStaticPartitions(
-          sourceAttributes = Seq('e.int, 'f.int),
+          sourceAttributes = Seq(Symbol("e").int, Symbol("f").int),
           providedPartitions = Map("b" -> Some("1"), "d" -> None),
           targetAttributes = targetAttributes,
           targetPartitionSchema = targetPartitionSchema)
@@ -125,7 +125,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
       // Wrong partitioning columns.
       intercept[AnalysisException] {
         rule.convertStaticPartitions(
-          sourceAttributes = Seq('e.int, 'f.int),
+          sourceAttributes = Seq(Symbol("e").int, Symbol("f").int),
           providedPartitions = Map("b" -> Some("1"), "d" -> Some("2")),
           targetAttributes = targetAttributes,
           targetPartitionSchema = targetPartitionSchema)
@@ -134,7 +134,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
       // Wrong partitioning columns.
       intercept[AnalysisException] {
         rule.convertStaticPartitions(
-          sourceAttributes = Seq('e.int),
+          sourceAttributes = Seq(Symbol("e").int),
           providedPartitions = Map("b" -> Some("1"), "c" -> Some("3"), "d" -> Some("2")),
           targetAttributes = targetAttributes,
           targetPartitionSchema = targetPartitionSchema)
@@ -144,7 +144,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
         // Wrong partitioning columns.
         intercept[AnalysisException] {
           rule.convertStaticPartitions(
-            sourceAttributes = Seq('e.int, 'f.int),
+            sourceAttributes = Seq(Symbol("e").int, Symbol("f").int),
             providedPartitions = Map("b" -> Some("1"), "C" -> Some("3")),
             targetAttributes = targetAttributes,
             targetPartitionSchema = targetPartitionSchema)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
index a81bd3bd060d3..3d315be636741 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
@@ -133,8 +133,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
     val inputData1 = MemoryStream[Int]
     val aggWithoutWatermark = inputData1.toDF()
       .withColumn("eventTime", timestamp_seconds($"value"))
-      .groupBy(window($"eventTime", "5 seconds") as 'window)
-      .agg(count("*") as 'count)
+      .groupBy(window($"eventTime", "5 seconds") as Symbol("window"))
+      .agg(count("*") as Symbol("count"))
       .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
 
     testStream(aggWithoutWatermark, outputMode = Complete)(
@@ -151,8 +151,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
     val aggWithWatermark = inputData2.toDF()
         .withColumn("eventTime", timestamp_seconds($"value"))
         .withWatermark("eventTime", "10 seconds")
-        .groupBy(window($"eventTime", "5 seconds") as 'window)
-        .agg(count("*") as 'count)
+        .groupBy(window($"eventTime", "5 seconds") as Symbol("window"))
+        .agg(count("*") as Symbol("count"))
         .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
 
     testStream(aggWithWatermark)(
@@ -174,8 +174,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
     val aggWithWatermark = inputData.toDF()
         .withColumn("eventTime", timestamp_seconds($"value"))
         .withWatermark("eventTime", "10 seconds")
-        .groupBy(window($"eventTime", "5 seconds") as 'window)
-        .agg(count("*") as 'count)
+        .groupBy(window($"eventTime", "5 seconds") as Symbol("window"))
+        .agg(count("*") as Symbol("count"))
         .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
 
     // Unlike the ProcessingTime trigger, Trigger.Once only runs one trigger every time
@@ -229,8 +229,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
     val aggWithWatermark = inputData.toDF()
         .withColumn("eventTime", timestamp_seconds($"value"))
         .withWatermark("eventTime", "10 seconds")
-        .groupBy(window($"eventTime", "5 seconds") as 'window)
-        .agg(count("*") as 'count)
+        .groupBy(window($"eventTime", "5 seconds") as Symbol("window"))
+        .agg(count("*") as Symbol("count"))
         .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
 
 
@@ -291,8 +291,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
     val windowedAggregation = inputData.toDF()
       .withColumn("eventTime", timestamp_seconds($"value"))
       .withWatermark("eventTime", "10 seconds")
-      .groupBy(window($"eventTime", "5 seconds") as 'window)
-      .agg(count("*") as 'count)
+      .groupBy(window($"eventTime", "5 seconds") as Symbol("window"))
+      .agg(count("*") as Symbol("count"))
       .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
 
     testStream(windowedAggregation)(
@@ -316,8 +316,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
     val windowedAggregation = inputData.toDF()
       .withColumn("eventTime", timestamp_seconds($"value"))
       .withWatermark("eventTime", "10 seconds")
-      .groupBy(window($"eventTime", "5 seconds") as 'window)
-      .agg(count("*") as 'count)
+      .groupBy(window($"eventTime", "5 seconds") as Symbol("window"))
+      .agg(count("*") as Symbol("count"))
       .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
 
     testStream(windowedAggregation, OutputMode.Update)(
@@ -346,8 +346,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
     val aggWithWatermark = input.toDF()
       .withColumn("eventTime", timestamp_seconds($"value"))
       .withWatermark("eventTime", "2 years 5 months")
-      .groupBy(window($"eventTime", "5 seconds") as 'window)
-      .agg(count("*") as 'count)
+      .groupBy(window($"eventTime", "5 seconds") as Symbol("window"))
+      .agg(count("*") as Symbol("count"))
       .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
 
     def monthsSinceEpoch(date: Date): Int = {
@@ -378,8 +378,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
     val df = inputData.toDF()
       .withColumn("eventTime", timestamp_seconds($"value"))
       .withWatermark("eventTime", "10 seconds")
-      .groupBy(window($"eventTime", "5 seconds") as 'window)
-      .agg(count("*") as 'count)
+      .groupBy(window($"eventTime", "5 seconds") as Symbol("window"))
+      .agg(count("*") as Symbol("count"))
       .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
 
     testStream(df)(
@@ -413,17 +413,17 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
     val firstDf = first.toDF()
       .withColumn("eventTime", timestamp_seconds($"value"))
       .withWatermark("eventTime", "10 seconds")
-      .select('value)
+      .select(Symbol("value"))
 
     val second = MemoryStream[Int]
 
     val secondDf = second.toDF()
       .withColumn("eventTime", timestamp_seconds($"value"))
       .withWatermark("eventTime", "5 seconds")
-      .select('value)
+      .select(Symbol("value"))
 
     withTempDir { checkpointDir =>
-      val unionWriter = firstDf.union(secondDf).agg(sum('value))
+      val unionWriter = firstDf.union(secondDf).agg(sum(Symbol("value")))
         .writeStream
         .option("checkpointLocation", checkpointDir.getCanonicalPath)
         .format("memory")
@@ -490,8 +490,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
     val windowedAggregation = inputData.toDF()
         .withColumn("eventTime", timestamp_seconds($"value"))
         .withWatermark("eventTime", "10 seconds")
-        .groupBy(window($"eventTime", "5 seconds") as 'window)
-        .agg(count("*") as 'count)
+        .groupBy(window($"eventTime", "5 seconds") as Symbol("window"))
+        .agg(count("*") as Symbol("count"))
         .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
 
     // No eviction when asked to compute complete results.
@@ -516,7 +516,7 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
         .withColumn("eventTime", timestamp_seconds($"value"))
         .withWatermark("eventTime", "10 seconds")
         .groupBy($"eventTime")
-        .agg(count("*") as 'count)
+        .agg(count("*") as Symbol("count"))
         .select($"eventTime".cast("long").as[Long], $"count".as[Long])
 
     testStream(windowedAggregation)(
@@ -587,7 +587,7 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
       val groupEvents = input
         .withWatermark("eventTime", "2 seconds")
         .groupBy("symbol", "eventTime")
-        .agg(count("price") as 'count)
+        .agg(count("price") as Symbol("count"))
         .select("symbol", "eventTime", "count")
       val q = groupEvents.writeStream
         .outputMode("append")
@@ -606,14 +606,14 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
     val aliasWindow = inputData.toDF()
       .withColumn("eventTime", timestamp_seconds($"value"))
       .withWatermark("eventTime", "10 seconds")
-      .select(window($"eventTime", "5 seconds") as 'aliasWindow)
+      .select(window($"eventTime", "5 seconds") as Symbol("aliasWindow"))
     // Check the eventTime metadata is kept in the top level alias.
     assert(aliasWindow.logicalPlan.output.exists(
       _.metadata.contains(EventTimeWatermark.delayKey)))
 
     val windowedAggregation = aliasWindow
-      .groupBy('aliasWindow)
-      .agg(count("*") as 'count)
+      .groupBy(Symbol("aliasWindow"))
+      .agg(count("*") as Symbol("count"))
       .select($"aliasWindow".getField("start").cast("long").as[Long], $"count".as[Long])
 
     testStream(windowedAggregation)(
@@ -636,8 +636,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
       val windowedAggregation = inputData.toDF()
         .withColumn("eventTime", timestamp_seconds($"value"))
         .withWatermark("eventTime", "10 seconds")
-        .groupBy(window($"eventTime", "5 seconds") as 'window)
-        .agg(count("*") as 'count)
+        .groupBy(window($"eventTime", "5 seconds") as Symbol("window"))
+        .agg(count("*") as Symbol("count"))
         .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
 
       testStream(windowedAggregation)(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index e89197b5ff26c..71e8ae74fe207 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -216,7 +216,7 @@ class StreamSuite extends StreamTest {
             query.processAllAvailable()
             // Parquet write page-level CRC checksums will change the file size and
             // affect the data order when reading these files. Please see PARQUET-1746 for details.
-            val outputDf = spark.read.parquet(outputDir.getAbsolutePath).sort('a).as[Long]
+            val outputDf = spark.read.parquet(outputDir.getAbsolutePath).sort(Symbol("a")).as[Long]
             checkDataset[Long](outputDf, (0L to 10L).toArray: _*)
           } finally {
             query.stop()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index 8a7bb8b60c878..a183e6b4e3950 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -109,7 +109,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
 
     val aggregated =
       inputData.toDF()
-        .select($"*", explode($"_2") as 'value)
+        .select($"*", explode($"_2") as Symbol("value"))
         .groupBy($"_1")
         .agg(size(collect_set($"value")))
         .as[(Int, Int)]
@@ -190,8 +190,8 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
     val aggWithWatermark = inputData.toDF()
       .withColumn("eventTime", timestamp_seconds($"value"))
       .withWatermark("eventTime", "10 seconds")
-      .groupBy(window($"eventTime", "5 seconds") as 'window)
-      .agg(count("*") as 'count)
+      .groupBy(window($"eventTime", "5 seconds") as Symbol("window"))
+      .agg(count("*") as Symbol("count"))
       .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
 
     implicit class RichStreamExecution(query: StreamExecution) {
@@ -413,13 +413,13 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
       inputDataOne.toDF()
         .groupBy($"value")
         .agg(count("*"))
-        .where('value >= current_timestamp().cast("long") - 10L)
+        .where(Symbol("value") >= current_timestamp().cast("long") - 10L)
     val inputDataTwo = MemoryStream[Long]
     val aggregatedTwo =
       inputDataTwo.toDF()
         .groupBy($"value")
         .agg(count("*"))
-        .where('value >= localtimestamp().cast(TimestampType).cast("long") - 10L)
+        .where(Symbol("value") >= localtimestamp().cast(TimestampType).cast("long") - 10L)
 
     Seq((inputDataOne, aggregatedOne), (inputDataTwo, aggregatedTwo)).foreach { x =>
       val inputData = x._1
@@ -475,7 +475,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
     val inputData = MemoryStream[Long]
     val aggregated =
       inputData.toDF()
-        .select(to_utc_timestamp(from_unixtime('value * SECONDS_PER_DAY), tz))
+        .select(to_utc_timestamp(from_unixtime(Symbol("value") * SECONDS_PER_DAY), tz))
         .toDF("value")
         .groupBy($"value")
         .agg(count("*"))
@@ -522,12 +522,12 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
     val streamInput = MemoryStream[Int]
     val batchDF = Seq(1, 2, 3, 4, 5)
         .toDF("value")
-        .withColumn("parity", 'value % 2)
-        .groupBy('parity)
-        .agg(count("*") as 'joinValue)
+        .withColumn("parity", Symbol("value") % 2)
+        .groupBy(Symbol("parity"))
+        .agg(count("*") as Symbol("joinValue"))
     val joinDF = streamInput
         .toDF()
-        .join(batchDF, 'value === 'parity)
+        .join(batchDF, Symbol("value") === Symbol("parity"))
 
     // make sure we're planning an aggregate in the first place
     assert(batchDF.queryExecution.optimizedPlan match { case _: Aggregate => true })
@@ -639,7 +639,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
         def createDf(partitions: Int): Dataset[(Long, Long)] = {
           spark.readStream
             .format((new MockSourceProvider).getClass.getCanonicalName)
-            .load().coalesce(partitions).groupBy('a % 1).count().as[(Long, Long)]
+            .load().coalesce(partitions).groupBy(Symbol("a") % 1).count().as[(Long, Long)]
         }
 
         testStream(createDf(1), Complete())(
@@ -677,7 +677,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
   testWithAllStateVersions("SPARK-22230: last should change with new batches") {
     val input = MemoryStream[Int]
 
-    val aggregated = input.toDF().agg(last('value))
+    val aggregated = input.toDF().agg(last(Symbol("value")))
     testStream(aggregated, OutputMode.Complete())(
       AddData(input, 1, 2, 3),
       CheckLastBatch(3),
@@ -853,8 +853,8 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
     val aggWithWatermark = inputData.toDF()
       .withColumn("eventTime", timestamp_seconds($"value"))
       .withWatermark("eventTime", "10 seconds")
-      .groupBy(window($"eventTime", "5 seconds") as 'window)
-      .agg(count("*") as 'count)
+      .groupBy(window($"eventTime", "5 seconds") as Symbol("window"))
+      .agg(count("*") as Symbol("count"))
       .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
 
     inputData.reset() // reset the input to clear any data from prev test
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
index aa03da6c5843f..c1908d95f39e3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
@@ -146,8 +146,8 @@ class StreamingDeduplicationSuite extends StateStoreMetricsTest {
       .withWatermark("eventTime", "10 seconds")
       .dropDuplicates()
       .withWatermark("eventTime", "10 seconds")
-      .groupBy(window($"eventTime", "5 seconds") as 'window)
-      .agg(count("*") as 'count)
+      .groupBy(window($"eventTime", "5 seconds") as Symbol("window"))
+      .agg(count("*") as Symbol("count"))
       .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
 
     testStream(windowedaggregate)(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index 2fbe6c4fed392..29caaf7289d6f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -56,9 +56,9 @@ abstract class StreamingJoinSuite
     val input = MemoryStream[Int]
     val df = input.toDF
       .select(
-        'value as "key",
+        Symbol("value") as "key",
         timestamp_seconds($"value")  as s"${prefix}Time",
-        ('value * multiplier) as s"${prefix}Value")
+        (Symbol("value") * multiplier) as s"${prefix}Value")
       .withWatermark(s"${prefix}Time", "10 seconds")
 
     (input, df)
@@ -69,13 +69,16 @@ abstract class StreamingJoinSuite
 
     val (input1, df1) = setupStream("left", 2)
     val (input2, df2) = setupStream("right", 3)
-    val windowed1 = df1.select('key, window('leftTime, "10 second"), 'leftValue)
-    val windowed2 = df2.select('key, window('rightTime, "10 second"), 'rightValue)
+    val windowed1 = df1
+      .select(Symbol("key"), window(Symbol("leftTime"), "10 second"), Symbol("leftValue"))
+    val windowed2 = df2
+      .select(Symbol("key"), window(Symbol("rightTime"), "10 second"), Symbol("rightValue"))
     val joined = windowed1.join(windowed2, Seq("key", "window"), joinType)
     val select = if (joinType == "left_semi") {
-      joined.select('key, $"window.end".cast("long"), 'leftValue)
+      joined.select(Symbol("key"), $"window.end".cast("long"), Symbol("leftValue"))
     } else {
-      joined.select('key, $"window.end".cast("long"), 'leftValue, 'rightValue)
+      joined.select(Symbol("key"), $"window.end".cast("long"), Symbol("leftValue"),
+        Symbol("rightValue"))
     }
 
     (input1, input2, select)
@@ -87,25 +90,29 @@ abstract class StreamingJoinSuite
     val (leftInput, df1) = setupStream("left", 2)
     val (rightInput, df2) = setupStream("right", 3)
     // Use different schemas to ensure the null row is being generated from the correct side.
-    val left = df1.select('key, window('leftTime, "10 second"), 'leftValue)
-    val right = df2.select('key, window('rightTime, "10 second"), 'rightValue.cast("string"))
+    val left = df1.select(Symbol("key"), window(Symbol("leftTime"), "10 second"),
+      Symbol("leftValue"))
+    val right = df2.select(Symbol("key"), window(Symbol("rightTime"), "10 second"),
+      Symbol("rightValue").cast("string"))
 
     val joined = left.join(
       right,
       left("key") === right("key")
         && left("window") === right("window")
-        && 'leftValue > 4,
+        && Symbol("leftValue") > 4,
       joinType)
 
     val select = if (joinType == "left_semi") {
-      joined.select(left("key"), left("window.end").cast("long"), 'leftValue)
+      joined.select(left("key"), left("window.end").cast("long"), Symbol("leftValue"))
     } else if (joinType == "left_outer") {
-      joined.select(left("key"), left("window.end").cast("long"), 'leftValue, 'rightValue)
+      joined.select(left("key"), left("window.end").cast("long"), Symbol("leftValue"),
+        Symbol("rightValue"))
     } else if (joinType == "right_outer") {
-      joined.select(right("key"), right("window.end").cast("long"), 'leftValue, 'rightValue)
+      joined.select(right("key"), right("window.end").cast("long"), Symbol("leftValue"),
+        Symbol("rightValue"))
     } else {
-      joined.select(left("key"), left("window.end").cast("long"), 'leftValue,
-        right("key"), right("window.end").cast("long"), 'rightValue)
+      joined.select(left("key"), left("window.end").cast("long"), Symbol("leftValue"),
+        right("key"), right("window.end").cast("long"), Symbol("rightValue"))
     }
 
     (leftInput, rightInput, select)
@@ -117,25 +124,29 @@ abstract class StreamingJoinSuite
     val (leftInput, df1) = setupStream("left", 2)
     val (rightInput, df2) = setupStream("right", 3)
     // Use different schemas to ensure the null row is being generated from the correct side.
-    val left = df1.select('key, window('leftTime, "10 second"), 'leftValue)
-    val right = df2.select('key, window('rightTime, "10 second"), 'rightValue.cast("string"))
+    val left = df1.select(Symbol("key"), window(Symbol("leftTime"), "10 second"),
+      Symbol("leftValue"))
+    val right = df2.select(Symbol("key"), window(Symbol("rightTime"), "10 second"),
+      Symbol("rightValue").cast("string"))
 
     val joined = left.join(
       right,
       left("key") === right("key")
         && left("window") === right("window")
-        && 'rightValue.cast("int") > 7,
+        && Symbol("rightValue").cast("int") > 7,
       joinType)
 
     val select = if (joinType == "left_semi") {
-      joined.select(left("key"), left("window.end").cast("long"), 'leftValue)
+      joined.select(left("key"), left("window.end").cast("long"), Symbol("leftValue"))
     } else if (joinType == "left_outer") {
-      joined.select(left("key"), left("window.end").cast("long"), 'leftValue, 'rightValue)
+      joined.select(left("key"), left("window.end").cast("long"), Symbol("leftValue"),
+        Symbol("rightValue"))
     } else if (joinType == "right_outer") {
-      joined.select(right("key"), right("window.end").cast("long"), 'leftValue, 'rightValue)
+      joined.select(right("key"), right("window.end").cast("long"), Symbol("leftValue"),
+        Symbol("rightValue"))
     } else {
-      joined.select(left("key"), left("window.end").cast("long"), 'leftValue,
-        right("key"), right("window.end").cast("long"), 'rightValue)
+      joined.select(left("key"), left("window.end").cast("long"), Symbol("leftValue"),
+        right("key"), right("window.end").cast("long"), Symbol("rightValue"))
     }
 
     (leftInput, rightInput, select)
@@ -152,12 +163,13 @@ abstract class StreamingJoinSuite
     val rightInput = MemoryStream[(Int, Int)]
 
     val df1 = leftInput.toDF.toDF("leftKey", "time")
-      .select('leftKey, timestamp_seconds($"time") as "leftTime", ('leftKey * 2) as "leftValue")
+      .select(Symbol("leftKey"), timestamp_seconds($"time") as "leftTime",
+        (Symbol("leftKey") * 2) as "leftValue")
       .withWatermark("leftTime", watermark)
 
     val df2 = rightInput.toDF.toDF("rightKey", "time")
-      .select('rightKey, timestamp_seconds($"time") as "rightTime",
-        ('rightKey * 3) as "rightValue")
+      .select(Symbol("rightKey"), timestamp_seconds($"time") as "rightTime",
+        (Symbol("rightKey") * 3) as "rightValue")
       .withWatermark("rightTime", watermark)
 
     val joined =
@@ -168,9 +180,10 @@ abstract class StreamingJoinSuite
         joinType)
 
     val select = if (joinType == "left_semi") {
-      joined.select('leftKey, 'leftTime.cast("int"))
+      joined.select(Symbol("leftKey"), Symbol("leftTime").cast("int"))
     } else {
-      joined.select('leftKey, 'rightKey, 'leftTime.cast("int"), 'rightTime.cast("int"))
+      joined.select(Symbol("leftKey"), Symbol("rightKey"), Symbol("leftTime").cast("int"),
+        Symbol("rightTime").cast("int"))
     }
 
     (leftInput, rightInput, select)
@@ -217,8 +230,8 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val input1 = MemoryStream[Int]
     val input2 = MemoryStream[Int]
 
-    val df1 = input1.toDF.select('value as "key", ('value * 2) as "leftValue")
-    val df2 = input2.toDF.select('value as "key", ('value * 3) as "rightValue")
+    val df1 = input1.toDF.select(Symbol("value") as "key", (Symbol("value") * 2) as "leftValue")
+    val df2 = input2.toDF.select(Symbol("value") as "key", (Symbol("value") * 3) as "rightValue")
     val joined = df1.join(df2, "key")
 
     testStream(joined)(
@@ -247,17 +260,17 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val input2 = MemoryStream[Int]
 
     val df1 = input1.toDF
-      .select('value as "key", timestamp_seconds($"value") as "timestamp",
-        ('value * 2) as "leftValue")
-      .select('key, window('timestamp, "10 second"), 'leftValue)
+      .select(Symbol("value") as "key", timestamp_seconds($"value") as "timestamp",
+        (Symbol("value") * 2) as "leftValue")
+      .select(Symbol("key"), window(Symbol("timestamp"), "10 second"), Symbol("leftValue"))
 
     val df2 = input2.toDF
-      .select('value as "key", timestamp_seconds($"value") as "timestamp",
-        ('value * 3) as "rightValue")
-      .select('key, window('timestamp, "10 second"), 'rightValue)
+      .select(Symbol("value") as "key", timestamp_seconds($"value") as "timestamp",
+        (Symbol("value") * 3) as "rightValue")
+      .select(Symbol("key"), window(Symbol("timestamp"), "10 second"), Symbol("rightValue"))
 
     val joined = df1.join(df2, Seq("key", "window"))
-      .select('key, $"window.end".cast("long"), 'leftValue, 'rightValue)
+      .select(Symbol("key"), $"window.end".cast("long"), Symbol("leftValue"), Symbol("rightValue"))
 
     testStream(joined)(
       AddData(input1, 1),
@@ -288,18 +301,18 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val input2 = MemoryStream[Int]
 
     val df1 = input1.toDF
-      .select('value as "key", timestamp_seconds($"value") as "timestamp",
-        ('value * 2) as "leftValue")
+      .select(Symbol("value") as "key", timestamp_seconds($"value") as "timestamp",
+        (Symbol("value") * 2) as "leftValue")
       .withWatermark("timestamp", "10 seconds")
-      .select('key, window('timestamp, "10 second"), 'leftValue)
+      .select(Symbol("key"), window(Symbol("timestamp"), "10 second"), Symbol("leftValue"))
 
     val df2 = input2.toDF
-      .select('value as "key", timestamp_seconds($"value") as "timestamp",
-        ('value * 3) as "rightValue")
-      .select('key, window('timestamp, "10 second"), 'rightValue)
+      .select(Symbol("value") as "key", timestamp_seconds($"value") as "timestamp",
+        (Symbol("value") * 3) as "rightValue")
+      .select(Symbol("key"), window(Symbol("timestamp"), "10 second"), Symbol("rightValue"))
 
     val joined = df1.join(df2, Seq("key", "window"))
-      .select('key, $"window.end".cast("long"), 'leftValue, 'rightValue)
+      .select(Symbol("key"), $"window.end".cast("long"), Symbol("leftValue"), Symbol("rightValue"))
 
     testStream(joined)(
       AddData(input1, 1),
@@ -339,17 +352,18 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val rightInput = MemoryStream[(Int, Int)]
 
     val df1 = leftInput.toDF.toDF("leftKey", "time")
-      .select('leftKey, timestamp_seconds($"time") as "leftTime", ('leftKey * 2) as "leftValue")
+      .select(Symbol("leftKey"), timestamp_seconds($"time") as "leftTime",
+        (Symbol("leftKey") * 2) as "leftValue")
       .withWatermark("leftTime", "10 seconds")
 
     val df2 = rightInput.toDF.toDF("rightKey", "time")
-      .select('rightKey, timestamp_seconds($"time") as "rightTime",
-        ('rightKey * 3) as "rightValue")
+      .select(Symbol("rightKey"), timestamp_seconds($"time") as "rightTime",
+        (Symbol("rightKey") * 3) as "rightValue")
       .withWatermark("rightTime", "10 seconds")
 
     val joined =
       df1.join(df2, expr("leftKey = rightKey AND leftTime < rightTime - interval 5 seconds"))
-        .select('leftKey, 'leftTime.cast("int"), 'rightTime.cast("int"))
+        .select(Symbol("leftKey"), Symbol("leftTime").cast("int"), Symbol("rightTime").cast("int"))
 
     testStream(joined)(
       AddData(leftInput, (1, 5)),
@@ -398,12 +412,13 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val rightInput = MemoryStream[(Int, Int)]
 
     val df1 = leftInput.toDF.toDF("leftKey", "time")
-      .select('leftKey, timestamp_seconds($"time") as "leftTime", ('leftKey * 2) as "leftValue")
+      .select(Symbol("leftKey"), timestamp_seconds($"time") as "leftTime",
+        (Symbol("leftKey") * 2) as "leftValue")
       .withWatermark("leftTime", "20 seconds")
 
     val df2 = rightInput.toDF.toDF("rightKey", "time")
-      .select('rightKey, timestamp_seconds($"time") as "rightTime",
-        ('rightKey * 3) as "rightValue")
+      .select(Symbol("rightKey"), timestamp_seconds($"time") as "rightTime",
+        (Symbol("rightKey") * 3) as "rightValue")
       .withWatermark("rightTime", "30 seconds")
 
     val condition = expr(
@@ -432,7 +447,8 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     //     drop state where rightTime < eventTime - 5
 
     val joined =
-      df1.join(df2, condition).select('leftKey, 'leftTime.cast("int"), 'rightTime.cast("int"))
+      df1.join(df2, condition).select(Symbol("leftKey"), Symbol("leftTime").cast("int"),
+        Symbol("rightTime").cast("int"))
 
     testStream(joined)(
       // If leftTime = 20, then it match only with rightTime = [15, 30]
@@ -479,8 +495,10 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val input1 = MemoryStream[Int]
     val input2 = MemoryStream[Int]
 
-    val df1 = input1.toDF.select('value as "leftKey", ('value * 2) as "leftValue")
-    val df2 = input2.toDF.select('value as "rightKey", ('value * 3) as "rightValue")
+    val df1 = input1.toDF
+      .select(Symbol("value") as "leftKey", (Symbol("value") * 2) as "leftValue")
+    val df2 = input2.toDF
+      .select(Symbol("value") as "rightKey", (Symbol("value") * 3) as "rightValue")
     val joined = df1.join(df2, expr("leftKey < rightKey"))
     val e = intercept[Exception] {
       val q = joined.writeStream.format("memory").queryName("test").start()
@@ -494,8 +512,8 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val input = MemoryStream[Int]
     val df = input.toDF
     val join =
-      df.select('value % 5 as "key", 'value).join(
-        df.select('value % 5 as "key", 'value), "key")
+      df.select(Symbol("value") % 5 as "key", Symbol("value")).join(
+        df.select(Symbol("value") % 5 as "key", Symbol("value")), "key")
 
     testStream(join)(
       AddData(input, 1, 2),
@@ -559,9 +577,11 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val input2 = MemoryStream[Int]
     val input3 = MemoryStream[Int]
 
-    val df1 = input1.toDF.select('value as "leftKey", ('value * 2) as "leftValue")
-    val df2 = input2.toDF.select('value as "middleKey", ('value * 3) as "middleValue")
-    val df3 = input3.toDF.select('value as "rightKey", ('value * 5) as "rightValue")
+    val df1 = input1.toDF.select(Symbol("value") as "leftKey", (Symbol("value") * 2) as "leftValue")
+    val df2 = input2.toDF
+      .select(Symbol("value") as "middleKey", (Symbol("value") * 3) as "middleValue")
+    val df3 = input3.toDF
+      .select(Symbol("value") as "rightKey", (Symbol("value") * 5) as "rightValue")
 
     val joined = df1.join(df2, expr("leftKey = middleKey")).join(df3, expr("rightKey = middleKey"))
 
@@ -576,9 +596,12 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val input1 = MemoryStream[Int]
     val input2 = MemoryStream[Int]
 
-    val df1 = input1.toDF.select('value as 'a, 'value * 2 as 'b)
-    val df2 = input2.toDF.select('value as 'a, 'value * 2 as 'b).repartition('b)
-    val joined = df1.join(df2, Seq("a", "b")).select('a)
+    val df1 = input1.toDF
+      .select(Symbol("value") as Symbol("a"), Symbol("value") * 2 as Symbol("b"))
+    val df2 = input2.toDF
+      .select(Symbol("value") as Symbol("a"), Symbol("value") * 2 as Symbol("b"))
+      .repartition(Symbol("b"))
+    val joined = df1.join(df2, Seq("a", "b")).select(Symbol("a"))
 
     testStream(joined)(
       AddData(input1, 1.to(1000): _*),
@@ -667,18 +690,18 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val input2 = MemoryStream[Int]
 
     val df1 = input1.toDF
-      .select('value as "key", timestamp_seconds($"value") as "timestamp",
-        ('value * 2) as "leftValue")
+      .select(Symbol("value") as "key", timestamp_seconds($"value") as "timestamp",
+        (Symbol("value") * 2) as "leftValue")
       .withWatermark("timestamp", "10 seconds")
-      .select('key, window('timestamp, "10 second"), 'leftValue)
+      .select(Symbol("key"), window(Symbol("timestamp"), "10 second"), Symbol("leftValue"))
 
     val df2 = input2.toDF
-      .select('value as "key", timestamp_seconds($"value") as "timestamp",
-        ('value * 3) as "rightValue")
-      .select('key, window('timestamp, "10 second"), 'rightValue)
+      .select(Symbol("value") as "key", timestamp_seconds($"value") as "timestamp",
+        (Symbol("value") * 3) as "rightValue")
+      .select(Symbol("key"), window(Symbol("timestamp"), "10 second"), Symbol("rightValue"))
 
     val joined = df1.join(df2, Seq("key", "window"))
-      .select('key, $"window.end".cast("long"), 'leftValue, 'rightValue)
+      .select(Symbol("key"), $"window.end".cast("long"), Symbol("leftValue"), Symbol("rightValue"))
 
     testStream(joined)(
       StartStream(additionalConfs = Map(SQLConf.SHUFFLE_PARTITIONS.key -> "3")),
@@ -924,15 +947,19 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite {
     val (leftInput, simpleLeftDf) = setupStream("left", 2)
     val (rightInput, simpleRightDf) = setupStream("right", 3)
 
-    val left = simpleLeftDf.select('key, window('leftTime, "10 second"), 'leftValue)
-    val right = simpleRightDf.select('key, window('rightTime, "10 second"), 'rightValue)
+    val left = simpleLeftDf
+      .select(Symbol("key"), window(Symbol("leftTime"), "10 second"), Symbol("leftValue"))
+    val right = simpleRightDf
+      .select(Symbol("key"), window(Symbol("rightTime"), "10 second"), Symbol("rightValue"))
 
     val joined = left.join(
         right,
         left("key") === right("key") && left("window") === right("window") &&
-            'leftValue > 10 && ('rightValue < 300 || 'rightValue > 1000),
+          Symbol("leftValue") > 10 &&
+          (Symbol("rightValue") < 300 || Symbol("rightValue") > 1000),
         "left_outer")
-      .select(left("key"), left("window.end").cast("long"), 'leftValue, 'rightValue)
+      .select(left("key"), left("window.end").cast("long"), Symbol("leftValue"),
+        Symbol("rightValue"))
 
     testStream(joined)(
       // leftValue <= 10 should generate outer join rows even though it matches right keys
@@ -1123,9 +1150,9 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite {
       val input1 = MemoryStream[Int](desiredPartitionsForInput1)
       val df1 = input1.toDF
         .select(
-          'value as "key",
-          'value as "leftValue",
-          'value as "rightValue")
+          Symbol("value") as "key",
+          Symbol("value") as "leftValue",
+          Symbol("value") as "rightValue")
       val (input2, df2) = setupStream("left", 2)
       val (input3, df3) = setupStream("right", 3)
 
@@ -1133,7 +1160,7 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite {
         .join(df3,
           df2("key") === df3("key") && df2("leftTime") === df3("rightTime"),
           "inner")
-        .select(df2("key"), 'leftValue, 'rightValue)
+        .select(df2("key"), Symbol("leftValue"), Symbol("rightValue"))
 
       (input1, input2, input3, df1.union(joined))
     }
@@ -1316,15 +1343,15 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite {
         "_2 * 3 as rightValue")
       .withWatermark("rightTime", "10 seconds")
 
-    val windowed1 = df1.select('leftKey1, 'leftKey2,
-      window('leftTime, "10 second").as('leftWindow), 'leftValue)
-    val windowed2 = df2.select('rightKey1, 'rightKey2,
-      window('rightTime, "10 second").as('rightWindow), 'rightValue)
+    val windowed1 = df1.select(Symbol("leftKey1"), Symbol("leftKey2"),
+      window(Symbol("leftTime"), "10 second").as(Symbol("leftWindow")), Symbol("leftValue"))
+    val windowed2 = df2.select(Symbol("rightKey1"), Symbol("rightKey2"),
+      window(Symbol("rightTime"), "10 second").as(Symbol("rightWindow")), Symbol("rightValue"))
     windowed1.join(windowed2,
       expr("leftKey1 <=> rightKey1 AND leftKey2 = rightKey2 AND leftWindow = rightWindow"),
       "left_outer"
-    ).select('leftKey1, 'rightKey1, 'leftKey2, 'rightKey2, $"leftWindow.end".cast("long"),
-      'leftValue, 'rightValue)
+    ).select(Symbol("leftKey1"), Symbol("rightKey1"), Symbol("leftKey2"), Symbol("rightKey2"),
+      $"leftWindow.end".cast("long"), Symbol("leftValue"), Symbol("rightValue"))
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
index 99fcef109a07c..7bc4288b2c1c4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
@@ -237,7 +237,7 @@ class StreamingQueryStatusAndProgressSuite extends StreamTest with Eventually {
       val inputData = MemoryStream[Int]
 
       val query = inputData.toDS().toDF("value")
-        .select('value)
+        .select(Symbol("value"))
         .groupBy($"value")
         .agg(count("*"))
         .writeStream
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 54bed5c966d1f..84060733e865c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -860,8 +860,8 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     val baseDf = Seq((1, "A"), (2, "b")).toDF("num", "char").where("char = 'A'")
     val otherDf = stream.toDF().toDF("num", "numSq")
       .join(broadcast(baseDf), "num")
-      .groupBy('char)
-      .agg(sum('numSq))
+      .groupBy(Symbol("char"))
+      .agg(sum(Symbol("numSq")))
 
     testStream(otherDf, OutputMode.Complete())(
       AddData(stream, (1, 1), (2, 4)),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala
index e82b9df93dd7d..d0f3a87acbc29 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala
@@ -417,7 +417,7 @@ class StreamingSessionWindowSuite extends StreamTest
       .selectExpr("explode(split(value, ' ')) AS sessionId", "eventTime")
 
     events
-      .groupBy(sessionWindow as 'session, 'sessionId)
+      .groupBy(sessionWindow as Symbol("session"), Symbol("sessionId"))
       .agg(count("*").as("numEvents"))
       .selectExpr("sessionId", "CAST(session.start AS LONG)", "CAST(session.end AS LONG)",
         "CAST(session.end AS LONG) - CAST(session.start AS LONG) AS durationMs",
@@ -429,8 +429,8 @@ class StreamingSessionWindowSuite extends StreamTest
       .selectExpr("*")
       .withColumn("eventTime", $"value".cast("timestamp"))
       .withWatermark("eventTime", "10 seconds")
-      .groupBy(session_window($"eventTime", "5 seconds") as 'session)
-      .agg(count("*") as 'count, sum("value") as 'sum)
+      .groupBy(session_window($"eventTime", "5 seconds") as Symbol("session"))
+      .agg(count("*") as Symbol("count"), sum("value") as Symbol("sum"))
       .select($"session".getField("start").cast("long").as[Long],
         $"session".getField("end").cast("long").as[Long], $"count".as[Long], $"sum".as[Long])
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
index 0e2fcfbd46356..5893c3da09812 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
@@ -257,7 +257,7 @@ class ContinuousSuite extends ContinuousSuiteBase {
       .option("numPartitions", "2")
       .option("rowsPerSecond", "2")
       .load()
-      .select('value)
+      .select(Symbol("value"))
 
     val query = df.writeStream
       .format("memory")
@@ -306,7 +306,7 @@ class ContinuousStressSuite extends ContinuousSuiteBase {
       .option("numPartitions", "5")
       .option("rowsPerSecond", "500")
       .load()
-      .select('value)
+      .select(Symbol("value"))
 
     testStream(df)(
       StartStream(longContinuousTrigger),
@@ -326,7 +326,7 @@ class ContinuousStressSuite extends ContinuousSuiteBase {
       .option("numPartitions", "5")
       .option("rowsPerSecond", "500")
       .load()
-      .select('value)
+      .select(Symbol("value"))
 
     testStream(df)(
       StartStream(Trigger.Continuous(2012)),
@@ -345,7 +345,7 @@ class ContinuousStressSuite extends ContinuousSuiteBase {
       .option("numPartitions", "5")
       .option("rowsPerSecond", "500")
       .load()
-      .select('value)
+      .select(Symbol("value"))
 
     testStream(df)(
       StartStream(Trigger.Continuous(1012)),
@@ -436,7 +436,7 @@ class ContinuousEpochBacklogSuite extends ContinuousSuiteBase {
         .option("numPartitions", "2")
         .option("rowsPerSecond", "500")
         .load()
-        .select('value)
+        .select(Symbol("value"))
 
       testStream(df)(
         StartStream(Trigger.Continuous(1)),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index fc78527af381e..c40ba02fd0dd8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -553,7 +553,10 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
       val createArray = udf { (length: Long) =>
         for (i <- 1 to length.toInt) yield i.toString
       }
-      spark.range(4).select(createArray('id + 1) as 'ex, 'id, 'id % 4 as 'part).coalesce(1).write
+      spark.range(4)
+        .select(createArray(Symbol("id") + 1) as Symbol("ex"), Symbol("id"),
+          Symbol("id") % 4 as Symbol("part"))
+        .coalesce(1).write
         .partitionBy("part", "id")
         .mode("overwrite")
         .parquet(src.toString)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index cb3bd29c27991..dabd9c001eb3d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -880,7 +880,8 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
       val createArray = udf { (length: Long) =>
         for (i <- 1 to length.toInt) yield i.toString
       }
-      spark.range(4).select(createArray('id + 1) as 'ex, 'id, 'id % 4 as 'part).coalesce(1).write
+      spark.range(4).select(createArray(Symbol("id") + 1) as Symbol("ex"),
+        Symbol("id"), Symbol("id") % 4 as Symbol("part")).coalesce(1).write
         .partitionBy("part", "id")
         .mode("overwrite")
         .parquet(src.toString)
diff --git a/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceSuite.scala
index baa04ada8b5d1..11201aadf67f8 100644
--- a/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceSuite.scala
@@ -152,7 +152,7 @@ class SqlResourceSuite extends SparkFunSuite with PrivateMethodTester {
   import SqlResourceSuite._
 
   val sqlResource = new SqlResource()
-  val prepareExecutionData = PrivateMethod[ExecutionData]('prepareExecutionData)
+  val prepareExecutionData = PrivateMethod[ExecutionData](Symbol("prepareExecutionData"))
 
   test("Prepare ExecutionData when details = false and planDescription = false") {
     val executionData =
@@ -196,7 +196,7 @@ class SqlResourceSuite extends SparkFunSuite with PrivateMethodTester {
   }
 
   test("Parse wholeStageCodegenId from nodeName") {
-    val getWholeStageCodegenId = PrivateMethod[Option[Long]]('getWholeStageCodegenId)
+    val getWholeStageCodegenId = PrivateMethod[Option[Long]](Symbol("getWholeStageCodegenId"))
     val wholeStageCodegenId =
       sqlResource invokePrivate getWholeStageCodegenId(WHOLE_STAGE_CODEGEN_1)
     assert(wholeStageCodegenId == Some(1))

From 5039c0f34f98c2c9937f9ad3576fb18d8e9cba34 Mon Sep 17 00:00:00 2001
From: Xinrong Meng <xinrong.meng@databricks.com>
Date: Fri, 4 Mar 2022 21:01:49 +0800
Subject: [PATCH 390/513] [SPARK-38345][SQL] Introduce SQL function ARRAY_SIZE

### What changes were proposed in this pull request?
Introduce SQL function ARRAY_SIZE.

ARRAY_SIZE works the same as SIZE when the input is an array except for:
- ARRAY_SIZE raises an exception for non-array input.
- ARRAY_SIZE always returns null for null input.

### Why are the changes needed?
Counting elements within an array is a common use case. ARRAY_SIZE ensures the input to be an array and then returns the size.

Other DBRMS like Snowflake supports that as well: [Snowflake ARRAY_SIZE](https://docs.snowflake.com/en/sql-reference/functions/array_size.html). Implementing that improves compatibility with DBMS and makes migration easier.

### Does this PR introduce _any_ user-facing change?
Yea. `array_size` is available now.

```
scala> spark.sql("select array_size(array(2, 1))").show()
+-----------------------+
|array_size(array(2, 1))|
+-----------------------+
|                      2|
+-----------------------+

scala> spark.sql("select array_size(map('a', 1, 'b', 2))").show()
org.apache.spark.sql.AnalysisException: cannot resolve 'array_size(map('a', 1, 'b', 2))' due to data type mismatch: argument 1 requires array type, however, 'map('a', 1, 'b', 2)' is of map<string,int> type.; line 1 pos 7;
'Project [unresolvedalias(array_size(map(a, 1, b, 2), None), None)]
```

### How was this patch tested?
Unit tests.

Closes #35671 from xinrong-databricks/array_size.

Authored-by: Xinrong Meng <xinrong.meng@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/analysis/FunctionRegistry.scala  |  1 +
 .../expressions/collectionOperations.scala    | 27 +++++++++++-
 .../sql-functions/sql-expression-schema.md    |  3 +-
 .../test/resources/sql-tests/inputs/array.sql |  7 +++
 .../sql-tests/results/ansi/array.sql.out      | 43 ++++++++++++++++++-
 .../resources/sql-tests/results/array.sql.out | 43 ++++++++++++++++++-
 6 files changed, 120 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index bc7eb09ca352e..e01457cbca78a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -640,6 +640,7 @@ object FunctionRegistry {
     expression[ArrayIntersect]("array_intersect"),
     expression[ArrayJoin]("array_join"),
     expression[ArrayPosition]("array_position"),
+    expression[ArraySize]("array_size"),
     expression[ArraySort]("array_sort"),
     expression[ArrayExcept]("array_except"),
     expression[ArrayUnion]("array_union"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index e53fc5eef06d3..363c531b04272 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion, Un
 import org.apache.spark.sql.catalyst.expressions.ArraySortLike.NullOrder
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.trees.BinaryLike
+import org.apache.spark.sql.catalyst.trees.{BinaryLike, UnaryLike}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{ARRAYS_ZIP, CONCAT, TreePattern}
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
@@ -133,6 +133,31 @@ object Size {
   def apply(child: Expression): Size = new Size(child)
 }
 
+
+/**
+ * Given an array, returns total number of elements in it.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - Returns the size of an array. The function returns null for null input.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(array('b', 'd', 'c', 'a'));
+       4
+  """,
+  since = "3.3.0",
+  group = "collection_funcs")
+case class ArraySize(child: Expression)
+  extends RuntimeReplaceable with ImplicitCastInputTypes with UnaryLike[Expression] {
+
+  override lazy val replacement: Expression = Size(child, legacySizeOfNull = false)
+
+  override def prettyName: String = "array_size"
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType)
+
+  protected def withNewChildInternal(newChild: Expression): ArraySize = copy(child = newChild)
+}
+
 /**
  * Returns an unordered array containing the keys of the map.
  */
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 88ad1aea77ddd..052e88e798440 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,6 +1,6 @@
 <!-- Automatically generated by ExpressionsSchemaSuite -->
 ## Summary
-  - Number of queries: 383
+  - Number of queries: 384
   - Number of expressions that missing example: 12
   - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint
 ## Schema of Built-in Functions
@@ -28,6 +28,7 @@
 | org.apache.spark.sql.catalyst.expressions.ArrayPosition | array_position | SELECT array_position(array(3, 2, 1), 1) | struct<array_position(array(3, 2, 1), 1):bigint> |
 | org.apache.spark.sql.catalyst.expressions.ArrayRemove | array_remove | SELECT array_remove(array(1, 2, 3, null, 3), 3) | struct<array_remove(array(1, 2, 3, NULL, 3), 3):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.ArrayRepeat | array_repeat | SELECT array_repeat('123', 2) | struct<array_repeat(123, 2):array<string>> |
+| org.apache.spark.sql.catalyst.expressions.ArraySize | array_size | SELECT array_size(array('b', 'd', 'c', 'a')) | struct<array_size(array(b, d, c, a)):int> |
 | org.apache.spark.sql.catalyst.expressions.ArraySort | array_sort | SELECT array_sort(array(5, 6, 1), (left, right) -> case when left < right then -1 when left > right then 1 else 0 end) | struct<array_sort(array(5, 6, 1), lambdafunction(CASE WHEN (namedlambdavariable() < namedlambdavariable()) THEN -1 WHEN (namedlambdavariable() > namedlambdavariable()) THEN 1 ELSE 0 END, namedlambdavariable(), namedlambdavariable())):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.ArrayTransform | transform | SELECT transform(array(1, 2, 3), x -> x + 1) | struct<transform(array(1, 2, 3), lambdafunction((namedlambdavariable() + 1), namedlambdavariable())):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.ArrayUnion | array_union | SELECT array_union(array(1, 2, 3), array(1, 3, 5)) | struct<array_union(array(1, 2, 3), array(1, 3, 5)):array<int>> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/array.sql b/sql/core/src/test/resources/sql-tests/inputs/array.sql
index 0223ce5475832..dfcf1742feb6f 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/array.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/array.sql
@@ -106,3 +106,10 @@ select elt(2, '123', null);
 
 select array(1, 2, 3)[5];
 select array(1, 2, 3)[-1];
+
+-- array_size
+select array_size(array());
+select array_size(array(true));
+select array_size(array(2, 1));
+select array_size(NULL);
+select array_size(map('a', 1, 'b', 2));
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
index f2b355279f5f2..00ac2eeba7ffd 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 33
+-- Number of queries: 38
 
 
 -- !query
@@ -266,6 +266,47 @@ org.apache.spark.SparkArrayIndexOutOfBoundsException
 Invalid index: -1, numElements: 3. If necessary set spark.sql.ansi.strictIndexOperator to false to bypass this error.
 
 
+-- !query
+select array_size(array())
+-- !query schema
+struct<array_size(array()):int>
+-- !query output
+0
+
+
+-- !query
+select array_size(array(true))
+-- !query schema
+struct<array_size(array(true)):int>
+-- !query output
+1
+
+
+-- !query
+select array_size(array(2, 1))
+-- !query schema
+struct<array_size(array(2, 1)):int>
+-- !query output
+2
+
+
+-- !query
+select array_size(NULL)
+-- !query schema
+struct<array_size(NULL):int>
+-- !query output
+NULL
+
+
+-- !query
+select array_size(map('a', 1, 'b', 2))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'array_size(map('a', 1, 'b', 2))' due to data type mismatch: argument 1 requires array type, however, 'map('a', 1, 'b', 2)' is of map<string,int> type.; line 1 pos 7
+
+
 -- !query
 set spark.sql.ansi.strictIndexOperator=false
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out
index 9d42b8a46a5a1..1ff2a1790ceee 100644
--- a/sql/core/src/test/resources/sql-tests/results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 24
+-- Number of queries: 29
 
 
 -- !query
@@ -257,3 +257,44 @@ select array(1, 2, 3)[-1]
 struct<array(1, 2, 3)[-1]:int>
 -- !query output
 NULL
+
+
+-- !query
+select array_size(array())
+-- !query schema
+struct<array_size(array()):int>
+-- !query output
+0
+
+
+-- !query
+select array_size(array(true))
+-- !query schema
+struct<array_size(array(true)):int>
+-- !query output
+1
+
+
+-- !query
+select array_size(array(2, 1))
+-- !query schema
+struct<array_size(array(2, 1)):int>
+-- !query output
+2
+
+
+-- !query
+select array_size(NULL)
+-- !query schema
+struct<array_size(NULL):int>
+-- !query output
+NULL
+
+
+-- !query
+select array_size(map('a', 1, 'b', 2))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'array_size(map('a', 1, 'b', 2))' due to data type mismatch: argument 1 requires array type, however, 'map('a', 1, 'b', 2)' is of map<string,int> type.; line 1 pos 7

From 83d8000184e18e8de2047119e62241e89f94d955 Mon Sep 17 00:00:00 2001
From: Jiaan Geng <beliefer@163.com>
Date: Fri, 4 Mar 2022 21:23:45 +0800
Subject: [PATCH 391/513] [SPARK-38196][SQL] Refactor framework so as JDBC
 dialect could compile expression by self way

### What changes were proposed in this pull request?
https://github.com/apache/spark/pull/35248 provides a new framework to represent catalyst expressions in DS V2 APIs.
Because the framework translate all catalyst expressions to a unified SQL string and cannot keep compatibility between different JDBC database, the framework works not good.

This PR reactor the framework so as JDBC dialect could compile expression by self way.
First, The framework translate catalyst expressions to DS V2 expression.
Second, The JDBC dialect could compile DS V2 expression to different SQL syntax.

The java doc looks show below:
![image](https://user-images.githubusercontent.com/8486025/156579584-f56cafb5-641f-4c5b-a06e-38f4369051c3.png)

### Why are the changes needed?
Make  the framework be more common use.

### Does this PR introduce _any_ user-facing change?
'No'.
The feature is not released.

### How was this patch tested?
Exists tests.

Closes #35494 from beliefer/SPARK-37960_followup.

Authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/GeneralSQLExpression.java     |  41 ----
 .../expressions/GeneralScalarExpression.java  | 203 ++++++++++++++++++
 .../util/V2ExpressionSQLBuilder.java          | 151 +++++++++++++
 .../catalyst/util/ExpressionSQLBuilder.scala  |  69 ------
 .../catalyst/util/V2ExpressionBuilder.scala   |  94 ++++++++
 .../datasources/DataSourceStrategy.scala      |  12 +-
 .../apache/spark/sql/jdbc/JdbcDialects.scala  |  73 +++----
 ...SourceV2DataFrameSessionCatalogSuite.scala |   4 +-
 .../connector/DataSourceV2FunctionSuite.scala |   3 +-
 .../sql/connector/DataSourceV2Suite.scala     |   7 +-
 .../spark/sql/connector/LocalScanSuite.scala  |   8 +-
 .../connector/SimpleWritableDataSource.scala  |   5 +-
 .../connector/TableCapabilityCheckSuite.scala |   6 +-
 .../connector/TestV2SessionCatalogBase.scala  |   9 +-
 .../sql/connector/V1ReadFallbackSuite.scala   |   8 +-
 .../sql/connector/V1WriteFallbackSuite.scala  |   8 +-
 .../apache/spark/sql/jdbc/JDBCV2Suite.scala   |  31 ++-
 17 files changed, 538 insertions(+), 194 deletions(-)
 delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralSQLExpression.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
 delete mode 100644 sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/ExpressionSQLBuilder.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralSQLExpression.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralSQLExpression.java
deleted file mode 100644
index ebeee22a853cf..0000000000000
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralSQLExpression.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.connector.expressions;
-
-import java.io.Serializable;
-
-import org.apache.spark.annotation.Evolving;
-
-/**
- * The general SQL string corresponding to expression.
- *
- * @since 3.3.0
- */
-@Evolving
-public class GeneralSQLExpression implements Expression, Serializable {
-    private String sql;
-
-    public GeneralSQLExpression(String sql) {
-        this.sql = sql;
-    }
-
-    public String sql() { return sql; }
-
-    @Override
-    public String toString() { return sql; }
-}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java
new file mode 100644
index 0000000000000..b3dd2cbfe3d7d
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.expressions;
+
+import java.io.Serializable;
+import java.util.Arrays;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.util.V2ExpressionSQLBuilder;
+
+// scalastyle:off line.size.limit
+/**
+ * The general representation of SQL scalar expressions, which contains the upper-cased
+ * expression name and all the children expressions.
+ * <p>
+ * The currently supported SQL scalar expressions:
+ * <ol>
+ *  <li>Name: <code>IS_NULL</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr IS NULL</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>IS_NOT_NULL</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr IS NOT NULL</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>=</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr1 = expr2</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>!=</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr1 != expr2</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>&lt;&gt;</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr1 &lt;&gt; expr2</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>&lt;=&gt;</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr1 &lt;=&gt; expr2</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>&lt;</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr1 &lt; expr2</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>&lt;=</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr1 &lt;= expr2</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>&gt;</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr1 &gt; expr2</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>&gt;=</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr1 &gt;= expr2</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>+</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr1 + expr2</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>-</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr1 - expr2</code> or <code>- expr</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>*</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr1 * expr2</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>/</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr1 / expr2</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>%</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr1 % expr2</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>&amp;</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr1 &amp; expr2</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>|</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr1 | expr2</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>^</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr1 ^ expr2</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>AND</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr1 AND expr2</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>OR</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>expr1 OR expr2</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>NOT</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>NOT expr</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>~</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>~ expr</code></li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>CASE_WHEN</code>
+ *   <ul>
+ *    <li>SQL semantic:
+ *     <code>CASE WHEN expr1 THEN expr2 [WHEN expr3 THEN expr4]* [ELSE expr5] END</code>
+ *    </li>
+ *    <li>Since version: 3.3.0</li>
+ *   </ul>
+ *  </li>
+ * </ol>
+ * Note: SQL semantic conforms ANSI standard, so some expressions are not supported when ANSI off,
+ * including: add, subtract, multiply, divide, remainder, pmod.
+ *
+ * @since 3.3.0
+ */
+// scalastyle:on line.size.limit
+@Evolving
+public class GeneralScalarExpression implements Expression, Serializable {
+  private String name;
+  private Expression[] children;
+
+  public GeneralScalarExpression(String name, Expression[] children) {
+    this.name = name;
+    this.children = children;
+  }
+
+  public String name() { return name; }
+  public Expression[] children() { return children; }
+
+  @Override
+  public String toString() {
+    V2ExpressionSQLBuilder builder = new V2ExpressionSQLBuilder();
+    try {
+      return builder.build(this);
+    } catch (Throwable e) {
+      return name + "(" +
+        Arrays.stream(children).map(child -> child.toString()).reduce((a,b) -> a + "," + b) + ")";
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
new file mode 100644
index 0000000000000..0af0d88b0f622
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.util;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.spark.sql.connector.expressions.Expression;
+import org.apache.spark.sql.connector.expressions.FieldReference;
+import org.apache.spark.sql.connector.expressions.GeneralScalarExpression;
+import org.apache.spark.sql.connector.expressions.LiteralValue;
+
+/**
+ * The builder to generate SQL from V2 expressions.
+ */
+public class V2ExpressionSQLBuilder {
+  public String build(Expression expr) {
+    if (expr instanceof LiteralValue) {
+      return visitLiteral((LiteralValue) expr);
+    } else if (expr instanceof FieldReference) {
+      return visitFieldReference((FieldReference) expr);
+    } else if (expr instanceof GeneralScalarExpression) {
+      GeneralScalarExpression e = (GeneralScalarExpression) expr;
+      String name = e.name();
+      switch (name) {
+        case "IS_NULL":
+          return visitIsNull(build(e.children()[0]));
+        case "IS_NOT_NULL":
+          return visitIsNotNull(build(e.children()[0]));
+        case "=":
+        case "!=":
+        case "<=>":
+        case "<":
+        case "<=":
+        case ">":
+        case ">=":
+          return visitBinaryComparison(name, build(e.children()[0]), build(e.children()[1]));
+        case "+":
+        case "*":
+        case "/":
+        case "%":
+        case "&":
+        case "|":
+        case "^":
+          return visitBinaryArithmetic(name, build(e.children()[0]), build(e.children()[1]));
+        case "-":
+          if (e.children().length == 1) {
+            return visitUnaryArithmetic(name, build(e.children()[0]));
+          } else {
+            return visitBinaryArithmetic(name, build(e.children()[0]), build(e.children()[1]));
+          }
+        case "AND":
+          return visitAnd(name, build(e.children()[0]), build(e.children()[1]));
+        case "OR":
+          return visitOr(name, build(e.children()[0]), build(e.children()[1]));
+        case "NOT":
+          return visitNot(build(e.children()[0]));
+        case "~":
+          return visitUnaryArithmetic(name, build(e.children()[0]));
+        case "CASE_WHEN":
+          List<String> children = new ArrayList<>();
+          for (Expression child : e.children()) {
+            children.add(build(child));
+          }
+          return visitCaseWhen(children.toArray(new String[e.children().length]));
+        // TODO supports other expressions
+        default:
+          return visitUnexpectedExpr(expr);
+      }
+    } else {
+      return visitUnexpectedExpr(expr);
+    }
+  }
+
+  protected String visitLiteral(LiteralValue literalValue) {
+    return literalValue.toString();
+  }
+
+  protected String visitFieldReference(FieldReference fieldRef) {
+    return fieldRef.toString();
+  }
+
+  protected String visitIsNull(String v) {
+    return v + " IS NULL";
+  }
+
+  protected String visitIsNotNull(String v) {
+    return v + " IS NOT NULL";
+  }
+
+  protected String visitBinaryComparison(String name, String l, String r) {
+    return "(" + l + ") " + name + " (" + r + ")";
+  }
+
+  protected String visitBinaryArithmetic(String name, String l, String r) {
+    return "(" + l + ") " + name + " (" + r + ")";
+  }
+
+  protected String visitAnd(String name, String l, String r) {
+    return "(" + l + ") " + name + " (" + r + ")";
+  }
+
+  protected String visitOr(String name, String l, String r) {
+    return "(" + l + ") " + name + " (" + r + ")";
+  }
+
+  protected String visitNot(String v) {
+    return "NOT (" + v + ")";
+  }
+
+  protected String visitUnaryArithmetic(String name, String v) { return name +" (" + v + ")"; }
+
+  protected String visitCaseWhen(String[] children) {
+    StringBuilder sb = new StringBuilder("CASE");
+    for (int i = 0; i < children.length; i += 2) {
+      String c = children[i];
+      int j = i + 1;
+      if (j < children.length) {
+        String v = children[j];
+        sb.append(" WHEN ");
+        sb.append(c);
+        sb.append(" THEN ");
+        sb.append(v);
+      } else {
+        sb.append(" ELSE ");
+        sb.append(c);
+      }
+    }
+    sb.append(" END");
+    return sb.toString();
+  }
+
+  protected String visitUnexpectedExpr(Expression expr) throws IllegalArgumentException {
+    throw new IllegalArgumentException("Unexpected V2 expression: " + expr);
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/ExpressionSQLBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/ExpressionSQLBuilder.scala
deleted file mode 100644
index 6239d0e2e7ae8..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/ExpressionSQLBuilder.scala
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.util
-
-import org.apache.spark.sql.catalyst.expressions.{Attribute, BinaryOperator, CaseWhen, EqualTo, Expression, IsNotNull, IsNull, Literal, Not}
-import org.apache.spark.sql.connector.expressions.LiteralValue
-
-/**
- * The builder to generate SQL string from catalyst expressions.
- */
-class ExpressionSQLBuilder(e: Expression) {
-
-  def build(): Option[String] = generateSQL(e)
-
-  private def generateSQL(expr: Expression): Option[String] = expr match {
-    case Literal(value, dataType) => Some(LiteralValue(value, dataType).toString)
-    case a: Attribute => Some(quoteIfNeeded(a.name))
-    case IsNull(col) => generateSQL(col).map(c => s"$c IS NULL")
-    case IsNotNull(col) => generateSQL(col).map(c => s"$c IS NOT NULL")
-    case b: BinaryOperator =>
-      val l = generateSQL(b.left)
-      val r = generateSQL(b.right)
-      if (l.isDefined && r.isDefined) {
-        Some(s"(${l.get}) ${b.sqlOperator} (${r.get})")
-      } else {
-        None
-      }
-    case Not(EqualTo(left, right)) =>
-      val l = generateSQL(left)
-      val r = generateSQL(right)
-      if (l.isDefined && r.isDefined) {
-        Some(s"${l.get} != ${r.get}")
-      } else {
-        None
-      }
-    case Not(child) => generateSQL(child).map(v => s"NOT ($v)")
-    case CaseWhen(branches, elseValue) =>
-      val conditionsSQL = branches.map(_._1).flatMap(generateSQL)
-      val valuesSQL = branches.map(_._2).flatMap(generateSQL)
-      if (conditionsSQL.length == branches.length && valuesSQL.length == branches.length) {
-        val branchSQL =
-          conditionsSQL.zip(valuesSQL).map { case (c, v) => s" WHEN $c THEN $v" }.mkString
-        if (elseValue.isDefined) {
-          elseValue.flatMap(generateSQL).map(v => s"CASE$branchSQL ELSE $v END")
-        } else {
-          Some(s"CASE$branchSQL END")
-        }
-      } else {
-        None
-      }
-    // TODO supports other expressions
-    case _ => None
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
new file mode 100644
index 0000000000000..1e361695056a7
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import org.apache.spark.sql.catalyst.expressions.{Add, And, Attribute, BinaryComparison, BinaryOperator, BitwiseAnd, BitwiseNot, BitwiseOr, BitwiseXor, CaseWhen, Divide, EqualTo, Expression, IsNotNull, IsNull, Literal, Multiply, Not, Or, Remainder, Subtract, UnaryMinus}
+import org.apache.spark.sql.connector.expressions.{Expression => V2Expression, FieldReference, GeneralScalarExpression, LiteralValue}
+
+/**
+ * The builder to generate V2 expressions from catalyst expressions.
+ */
+class V2ExpressionBuilder(e: Expression) {
+
+  def build(): Option[V2Expression] = generateExpression(e)
+
+  private def canTranslate(b: BinaryOperator) = b match {
+    case _: And | _: Or => true
+    case _: BinaryComparison => true
+    case _: BitwiseAnd | _: BitwiseOr | _: BitwiseXor => true
+    case add: Add => add.failOnError
+    case sub: Subtract => sub.failOnError
+    case mul: Multiply => mul.failOnError
+    case div: Divide => div.failOnError
+    case r: Remainder => r.failOnError
+    case _ => false
+  }
+
+  private def generateExpression(expr: Expression): Option[V2Expression] = expr match {
+    case Literal(value, dataType) => Some(LiteralValue(value, dataType))
+    case attr: Attribute => Some(FieldReference.column(attr.name))
+    case IsNull(col) => generateExpression(col)
+      .map(c => new GeneralScalarExpression("IS_NULL", Array[V2Expression](c)))
+    case IsNotNull(col) => generateExpression(col)
+      .map(c => new GeneralScalarExpression("IS_NOT_NULL", Array[V2Expression](c)))
+    case b: BinaryOperator if canTranslate(b) =>
+      val left = generateExpression(b.left)
+      val right = generateExpression(b.right)
+      if (left.isDefined && right.isDefined) {
+        Some(new GeneralScalarExpression(b.sqlOperator, Array[V2Expression](left.get, right.get)))
+      } else {
+        None
+      }
+    case Not(eq: EqualTo) =>
+      val left = generateExpression(eq.left)
+      val right = generateExpression(eq.right)
+      if (left.isDefined && right.isDefined) {
+        Some(new GeneralScalarExpression("!=", Array[V2Expression](left.get, right.get)))
+      } else {
+        None
+      }
+    case Not(child) => generateExpression(child)
+      .map(v => new GeneralScalarExpression("NOT", Array[V2Expression](v)))
+    case UnaryMinus(child, true) => generateExpression(child)
+      .map(v => new GeneralScalarExpression("-", Array[V2Expression](v)))
+    case BitwiseNot(child) => generateExpression(child)
+      .map(v => new GeneralScalarExpression("~", Array[V2Expression](v)))
+    case CaseWhen(branches, elseValue) =>
+      val conditions = branches.map(_._1).flatMap(generateExpression)
+      val values = branches.map(_._2).flatMap(generateExpression)
+      if (conditions.length == branches.length && values.length == branches.length) {
+        val branchExpressions = conditions.zip(values).flatMap { case (c, v) =>
+          Seq[V2Expression](c, v)
+        }
+        if (elseValue.isDefined) {
+          elseValue.flatMap(generateExpression).map { v =>
+            val children = (branchExpressions :+ v).toArray[V2Expression]
+            // The children looks like [condition1, value1, ..., conditionN, valueN, elseValue]
+            new GeneralScalarExpression("CASE_WHEN", children)
+          }
+        } else {
+          // The children looks like [condition1, value1, ..., conditionN, valueN]
+          Some(new GeneralScalarExpression("CASE_WHEN", branchExpressions.toArray[V2Expression]))
+        }
+      } else {
+        None
+      }
+    // TODO supports other expressions
+    case _ => None
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index a1602a3aa4880..c386655c947f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -38,10 +38,10 @@ import org.apache.spark.sql.catalyst.planning.ScanOperation
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoStatement, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
-import org.apache.spark.sql.catalyst.util.ExpressionSQLBuilder
+import org.apache.spark.sql.catalyst.util.V2ExpressionBuilder
 import org.apache.spark.sql.connector.catalog.SupportsRead
 import org.apache.spark.sql.connector.catalog.TableCapability._
-import org.apache.spark.sql.connector.expressions.{Expression => ExpressionV2, FieldReference, GeneralSQLExpression, NullOrdering, SortDirection, SortOrder => SortOrderV2, SortValue}
+import org.apache.spark.sql.connector.expressions.{Expression => V2Expression, FieldReference, NullOrdering, SortDirection, SortOrder => V2SortOrder, SortValue}
 import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Aggregation, Avg, Count, CountStar, GeneralAggregateFunc, Max, Min, Sum}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.{InSubqueryExec, RowDataSourceScanExec, SparkPlan}
@@ -776,8 +776,8 @@ object DataSourceStrategy
     Some(new Aggregation(translatedAggregates.toArray, translatedGroupBys.toArray))
   }
 
-  protected[sql] def translateSortOrders(sortOrders: Seq[SortOrder]): Seq[SortOrderV2] = {
-    def translateOortOrder(sortOrder: SortOrder): Option[SortOrderV2] = sortOrder match {
+  protected[sql] def translateSortOrders(sortOrders: Seq[SortOrder]): Seq[V2SortOrder] = {
+    def translateOortOrder(sortOrder: SortOrder): Option[V2SortOrder] = sortOrder match {
       case SortOrder(PushableColumnWithoutNestedColumn(name), directionV1, nullOrderingV1, _) =>
         val directionV2 = directionV1 match {
           case Ascending => SortDirection.ASCENDING
@@ -864,8 +864,8 @@ object PushableColumnWithoutNestedColumn extends PushableColumnBase {
  * Get the expression of DS V2 to represent catalyst expression that can be pushed down.
  */
 object PushableExpression {
-  def unapply(e: Expression): Option[ExpressionV2] = e match {
+  def unapply(e: Expression): Option[V2Expression] = e match {
     case PushableColumnWithoutNestedColumn(name) => Some(FieldReference.column(name))
-    case _ => new ExpressionSQLBuilder(e).build().map(new GeneralSQLExpression(_))
+    case _ => new V2ExpressionBuilder(e).build()
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 2d10bbf5de537..a7e0ec8b72a7c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -32,8 +32,9 @@ import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, Timesta
 import org.apache.spark.sql.connector.catalog.TableChange
 import org.apache.spark.sql.connector.catalog.TableChange._
 import org.apache.spark.sql.connector.catalog.index.TableIndex
-import org.apache.spark.sql.connector.expressions.{FieldReference, GeneralSQLExpression, NamedReference}
+import org.apache.spark.sql.connector.expressions.{Expression, FieldReference, NamedReference}
 import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Avg, Count, CountStar, Max, Min, Sum}
+import org.apache.spark.sql.connector.util.V2ExpressionSQLBuilder
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
 import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
@@ -194,6 +195,31 @@ abstract class JdbcDialect extends Serializable with Logging{
     case _ => value
   }
 
+  class JDBCSQLBuilder extends V2ExpressionSQLBuilder {
+    override def visitFieldReference(fieldRef: FieldReference): String = {
+      if (fieldRef.fieldNames().length != 1) {
+        throw new IllegalArgumentException(
+          "FieldReference with field name has multiple or zero parts unsupported: " + fieldRef);
+      }
+      quoteIdentifier(fieldRef.fieldNames.head)
+    }
+  }
+
+  /**
+   * Converts V2 expression to String representing a SQL expression.
+   * @param expr The V2 expression to be converted.
+   * @return Converted value.
+   */
+  @Since("3.3.0")
+  def compileExpression(expr: Expression): Option[String] = {
+    val jdbcSQLBuilder = new JDBCSQLBuilder()
+    try {
+      Some(jdbcSQLBuilder.build(expr))
+    } catch {
+      case _: IllegalArgumentException => None
+    }
+  }
+
   /**
    * Converts aggregate function to String representing a SQL expression.
    * @param aggFunction The aggregate function to be converted.
@@ -203,55 +229,20 @@ abstract class JdbcDialect extends Serializable with Logging{
   def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
     aggFunction match {
       case min: Min =>
-        val sql = min.column match {
-          case field: FieldReference =>
-            if (field.fieldNames.length != 1) return None
-            quoteIdentifier(field.fieldNames.head)
-          case expr: GeneralSQLExpression =>
-            expr.sql()
-        }
-        Some(s"MIN($sql)")
+        compileExpression(min.column).map(v => s"MIN($v)")
       case max: Max =>
-        val sql = max.column match {
-          case field: FieldReference =>
-            if (field.fieldNames.length != 1) return None
-            quoteIdentifier(field.fieldNames.head)
-          case expr: GeneralSQLExpression =>
-            expr.sql()
-        }
-        Some(s"MAX($sql)")
+        compileExpression(max.column).map(v => s"MAX($v)")
       case count: Count =>
-        val sql = count.column match {
-          case field: FieldReference =>
-            if (field.fieldNames.length != 1) return None
-            quoteIdentifier(field.fieldNames.head)
-          case expr: GeneralSQLExpression =>
-            expr.sql()
-        }
         val distinct = if (count.isDistinct) "DISTINCT " else ""
-        Some(s"COUNT($distinct$sql)")
+        compileExpression(count.column).map(v => s"COUNT($distinct$v)")
       case sum: Sum =>
-        val sql = sum.column match {
-          case field: FieldReference =>
-            if (field.fieldNames.length != 1) return None
-            quoteIdentifier(field.fieldNames.head)
-          case expr: GeneralSQLExpression =>
-            expr.sql()
-        }
         val distinct = if (sum.isDistinct) "DISTINCT " else ""
-        Some(s"SUM($distinct$sql)")
+        compileExpression(sum.column).map(v => s"SUM($distinct$v)")
       case _: CountStar =>
         Some("COUNT(*)")
       case avg: Avg =>
-        val sql = avg.column match {
-          case field: FieldReference =>
-            if (field.fieldNames.length != 1) return None
-            quoteIdentifier(field.fieldNames.head)
-          case expr: GeneralSQLExpression =>
-            expr.sql()
-        }
         val distinct = if (avg.isDistinct) "DISTINCT " else ""
-        Some(s"AVG($distinct$sql)")
+        compileExpression(avg.column).map(v => s"AVG($distinct$v)")
       case _ => None
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
index 05aafceb36ec7..98d95e48f5447 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.connector
 
-import java.util
-
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.sql.{DataFrame, QueryTest, SaveMode}
@@ -97,7 +95,7 @@ class InMemoryTableSessionCatalog extends TestV2SessionCatalogBase[InMemoryTable
       name: String,
       schema: StructType,
       partitions: Array[Transform],
-      properties: util.Map[String, String]): InMemoryTable = {
+      properties: java.util.Map[String, String]): InMemoryTable = {
     new InMemoryTable(name, schema, partitions, properties)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2FunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2FunctionSuite.scala
index a1463523d38ff..92a5c552108b7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2FunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2FunctionSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.connector
 
-import java.util
 import java.util.Collections
 
 import test.org.apache.spark.sql.connector.catalog.functions._
@@ -37,7 +36,7 @@ import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
 class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
-  private val emptyProps: util.Map[String, String] = Collections.emptyMap[String, String]
+  private val emptyProps: java.util.Map[String, String] = Collections.emptyMap[String, String]
 
   private def addFunction(ident: Identifier, fn: UnboundFunction): Unit = {
     catalog("testcat").asInstanceOf[InMemoryCatalog].createFunction(ident, fn)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
index 23164edddaeed..8f37e42b167be 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.connector
 
 import java.io.File
-import java.util
 import java.util.OptionalLong
 
 import test.org.apache.spark.sql.connector._
@@ -561,7 +560,7 @@ abstract class SimpleBatchTable extends Table with SupportsRead  {
 
   override def name(): String = this.getClass.toString
 
-  override def capabilities(): util.Set[TableCapability] = util.EnumSet.of(BATCH_READ)
+  override def capabilities(): java.util.Set[TableCapability] = java.util.EnumSet.of(BATCH_READ)
 }
 
 abstract class SimpleScanBuilder extends ScanBuilder
@@ -584,7 +583,7 @@ trait TestingV2Source extends TableProvider {
   override def getTable(
       schema: StructType,
       partitioning: Array[Transform],
-      properties: util.Map[String, String]): Table = {
+      properties: java.util.Map[String, String]): Table = {
     getTable(new CaseInsensitiveStringMap(properties))
   }
 
@@ -801,7 +800,7 @@ class SchemaRequiredDataSource extends TableProvider {
   override def getTable(
       schema: StructType,
       partitioning: Array[Transform],
-      properties: util.Map[String, String]): Table = {
+      properties: java.util.Map[String, String]): Table = {
     val userGivenSchema = schema
     new SimpleBatchTable {
       override def schema(): StructType = userGivenSchema
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/LocalScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/LocalScanSuite.scala
index 094667001b6c3..e3d61a846fdb4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/LocalScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/LocalScanSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.connector
 
-import java.util
-
 import org.apache.spark.sql.{QueryTest, Row}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.connector.catalog.{BasicInMemoryTableCatalog, Identifier, SupportsRead, Table, TableCapability}
@@ -61,7 +59,7 @@ class TestLocalScanCatalog extends BasicInMemoryTableCatalog {
       ident: Identifier,
       schema: StructType,
       partitions: Array[Transform],
-      properties: util.Map[String, String]): Table = {
+      properties: java.util.Map[String, String]): Table = {
     val table = new TestLocalScanTable(ident.toString)
     tables.put(ident, table)
     table
@@ -76,8 +74,8 @@ object TestLocalScanTable {
 class TestLocalScanTable(override val name: String) extends Table with SupportsRead {
   override def schema(): StructType = TestLocalScanTable.schema
 
-  override def capabilities(): util.Set[TableCapability] =
-    util.EnumSet.of(TableCapability.BATCH_READ)
+  override def capabilities(): java.util.Set[TableCapability] =
+    java.util.EnumSet.of(TableCapability.BATCH_READ)
 
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder =
     new TestLocalScanBuilder
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala
index 99c322a7155f2..64c893ed74fdb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.connector
 
 import java.io.{BufferedReader, InputStreamReader, IOException}
-import java.util
 
 import scala.collection.JavaConverters._
 
@@ -138,8 +137,8 @@ class SimpleWritableDataSource extends TestingV2Source {
       new MyWriteBuilder(path, info)
     }
 
-    override def capabilities(): util.Set[TableCapability] =
-      util.EnumSet.of(BATCH_READ, BATCH_WRITE, TRUNCATE)
+    override def capabilities(): java.util.Set[TableCapability] =
+      java.util.EnumSet.of(BATCH_READ, BATCH_WRITE, TRUNCATE)
   }
 
   override def getTable(options: CaseInsensitiveStringMap): Table = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala
index a12065ec0ab2a..5f2e0b28aeccc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.connector
 
-import java.util
-
 import org.apache.spark.sql.{AnalysisException, DataFrame, SQLContext}
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, NamedRelation}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal}
@@ -215,8 +213,8 @@ private case object TestRelation extends LeafNode with NamedRelation {
 private case class CapabilityTable(_capabilities: TableCapability*) extends Table {
   override def name(): String = "capability_test_table"
   override def schema(): StructType = TableCapabilityCheckSuite.schema
-  override def capabilities(): util.Set[TableCapability] = {
-    val set = util.EnumSet.noneOf(classOf[TableCapability])
+  override def capabilities(): java.util.Set[TableCapability] = {
+    val set = java.util.EnumSet.noneOf(classOf[TableCapability])
     _capabilities.foreach(set.add)
     set
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
index bf2749d1afc53..0a0aaa8021996 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.connector
 
-import java.util
 import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.atomic.AtomicBoolean
 
@@ -35,7 +34,7 @@ import org.apache.spark.sql.types.StructType
  */
 private[connector] trait TestV2SessionCatalogBase[T <: Table] extends DelegatingCatalogExtension {
 
-  protected val tables: util.Map[Identifier, T] = new ConcurrentHashMap[Identifier, T]()
+  protected val tables: java.util.Map[Identifier, T] = new ConcurrentHashMap[Identifier, T]()
 
   private val tableCreated: AtomicBoolean = new AtomicBoolean(false)
 
@@ -48,7 +47,7 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating
       name: String,
       schema: StructType,
       partitions: Array[Transform],
-      properties: util.Map[String, String]): T
+      properties: java.util.Map[String, String]): T
 
   override def loadTable(ident: Identifier): Table = {
     if (tables.containsKey(ident)) {
@@ -69,12 +68,12 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating
       ident: Identifier,
       schema: StructType,
       partitions: Array[Transform],
-      properties: util.Map[String, String]): Table = {
+      properties: java.util.Map[String, String]): Table = {
     val key = TestV2SessionCatalogBase.SIMULATE_ALLOW_EXTERNAL_PROPERTY
     val propsWithLocation = if (properties.containsKey(key)) {
       // Always set a location so that CREATE EXTERNAL TABLE won't fail with LOCATION not specified.
       if (!properties.containsKey(TableCatalog.PROP_LOCATION)) {
-        val newProps = new util.HashMap[String, String]()
+        val newProps = new java.util.HashMap[String, String]()
         newProps.putAll(properties)
         newProps.put(TableCatalog.PROP_LOCATION, "file:/abc")
         newProps
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala
index ff1bd29808637..c5be222645b19 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.connector
 
-import java.util
-
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, QueryTest, Row, SparkSession, SQLContext}
 import org.apache.spark.sql.connector.catalog.{BasicInMemoryTableCatalog, Identifier, SupportsRead, Table, TableCapability}
@@ -104,7 +102,7 @@ class V1ReadFallbackCatalog extends BasicInMemoryTableCatalog {
       ident: Identifier,
       schema: StructType,
       partitions: Array[Transform],
-      properties: util.Map[String, String]): Table = {
+      properties: java.util.Map[String, String]): Table = {
     // To simplify the test implementation, only support fixed schema.
     if (schema != V1ReadFallbackCatalog.schema || partitions.nonEmpty) {
       throw new UnsupportedOperationException
@@ -129,8 +127,8 @@ class TableWithV1ReadFallback(override val name: String) extends Table with Supp
 
   override def schema(): StructType = V1ReadFallbackCatalog.schema
 
-  override def capabilities(): util.Set[TableCapability] = {
-    util.EnumSet.of(TableCapability.BATCH_READ)
+  override def capabilities(): java.util.Set[TableCapability] = {
+    java.util.EnumSet.of(TableCapability.BATCH_READ)
   }
 
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
index 9fbaf7890f8f8..992c46cc6cdb1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.connector
 
-import java.util
-
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
@@ -223,7 +221,7 @@ class V1FallbackTableCatalog extends TestV2SessionCatalogBase[InMemoryTableWithV
       name: String,
       schema: StructType,
       partitions: Array[Transform],
-      properties: util.Map[String, String]): InMemoryTableWithV1Fallback = {
+      properties: java.util.Map[String, String]): InMemoryTableWithV1Fallback = {
     val t = new InMemoryTableWithV1Fallback(name, schema, partitions, properties)
     InMemoryV1Provider.tables.put(name, t)
     tables.put(Identifier.of(Array("default"), name), t)
@@ -321,7 +319,7 @@ class InMemoryTableWithV1Fallback(
     override val name: String,
     override val schema: StructType,
     override val partitioning: Array[Transform],
-    override val properties: util.Map[String, String])
+    override val properties: java.util.Map[String, String])
   extends Table
   with SupportsWrite with SupportsRead {
 
@@ -331,7 +329,7 @@ class InMemoryTableWithV1Fallback(
     }
   }
 
-  override def capabilities: util.Set[TableCapability] = util.EnumSet.of(
+  override def capabilities: java.util.Set[TableCapability] = java.util.EnumSet.of(
     TableCapability.BATCH_READ,
     TableCapability.V1_BATCH_WRITE,
     TableCapability.OVERWRITE_BY_FILTER,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
index aa0289ae75bdb..3f90fb47efb28 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.jdbc
 import java.sql.{Connection, DriverManager}
 import java.util.Properties
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.sql.{DataFrame, ExplainSuiteHelper, QueryTest, Row}
 import org.apache.spark.sql.catalyst.analysis.CannotReplaceMissingTableException
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Sort}
@@ -28,6 +28,7 @@ import org.apache.spark.sql.connector.expressions.{FieldReference, NullOrdering,
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanRelation, V1ScanWrapper}
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
 import org.apache.spark.sql.functions.{avg, count, lit, sum, udf}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.util.Utils
 
@@ -841,6 +842,34 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       Row(2, 2, 2, 2, 2, 0d, 12000d, 0d, 12000d, 12000d, 0d, 0d, 3, 0d)))
   }
 
+  test("scan with aggregate push-down: aggregate function with binary arithmetic") {
+    Seq(false, true).foreach { ansiMode =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiMode.toString) {
+        val df = sql("SELECT SUM(2147483647 + DEPT) FROM h2.test.employee")
+        checkAggregateRemoved(df, ansiMode)
+        val expected_plan_fragment = if (ansiMode) {
+          "PushedAggregates: [SUM((2147483647) + (DEPT))], " +
+            "PushedFilters: [], PushedGroupByColumns: []"
+        } else {
+          "PushedFilters: []"
+        }
+        df.queryExecution.optimizedPlan.collect {
+          case _: DataSourceV2ScanRelation =>
+            checkKeywordsExistsInExplain(df, expected_plan_fragment)
+        }
+        if (ansiMode) {
+          val e = intercept[SparkException] {
+            checkAnswer(df, Seq(Row(-10737418233L)))
+          }
+          assert(e.getMessage.contains(
+            "org.h2.jdbc.JdbcSQLDataException: Numeric value out of range: \"2147483648\""))
+        } else {
+          checkAnswer(df, Seq(Row(-10737418233L)))
+        }
+      }
+    }
+  }
+
   test("scan with aggregate push-down: aggregate function with UDF") {
     val df = spark.table("h2.test.employee")
     val decrease = udf { (x: Double, y: Double) => x - y }

From ae9b80410a12c67bad19aa22b5e0de956fef9d17 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 4 Mar 2022 16:50:02 -0800
Subject: [PATCH 392/513] [SPARK-38417][CORE] Remove `Experimental` from
 `RDD.cleanShuffleDependencies` API

### What changes were proposed in this pull request?

This PR aims to remove `Experimental` from `RDD.cleanShuffleDependencies` API at Apache Spark 3.3.

### Why are the changes needed?

This API has been used since Apache Spark 3.1.0.

### Does this PR introduce _any_ user-facing change?

No. This has been used for a long time in 3.1.1 ~ 3.2.1 since April 7, 2020.
- https://spark.apache.org/docs/3.1.1/api/scala/org/apache/spark/rdd/RDD.html#cleanShuffleDependencies(blocking:Boolean):Unit

### How was this patch tested?

Manual review because this is a human-oriented doc change.

Closes #35736 from dongjoon-hyun/SPARK-38417.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 core/src/main/scala/org/apache/spark/rdd/RDD.scala | 2 --
 1 file changed, 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 71885664513ac..c76b0d95d103d 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1746,7 +1746,6 @@ abstract class RDD[T: ClassTag](
   }
 
   /**
-   * :: Experimental ::
    * Removes an RDD's shuffles and it's non-persisted ancestors.
    * When running without a shuffle service, cleaning up shuffle files enables downscaling.
    * If you use the RDD after this call, you should checkpoint and materialize it first.
@@ -1755,7 +1754,6 @@ abstract class RDD[T: ClassTag](
    *   * Tuning the driver GC to be more aggressive, so the regular context cleaner is triggered
    *   * Setting an appropriate TTL for shuffle files to be auto cleaned
    */
-  @Experimental
   @DeveloperApi
   @Since("3.1.0")
   def cleanShuffleDependencies(blocking: Boolean = false): Unit = {

From 980d88d27976d0d2ed75d108b1e12c6cc1ac7ba1 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 4 Mar 2022 19:24:56 -0800
Subject: [PATCH 393/513] [SPARK-38418][PYSPARK] Add PySpark
 `cleanShuffleDependencies` developer API

### What changes were proposed in this pull request?

This PR aims to add `cleanShuffleDependencies` developer API to PySpark RDD like Scala.

### Why are the changes needed?

This API has been documented and used since Apache Spark 3.1.0 and we removed `Experimental` tag at Apache Spark 3.3.0 via SPARK-38417.
- https://spark.apache.org/docs/latest/api/scala/org/apache/spark/rdd/RDD.html#cleanShuffleDependencies(blocking:Boolean):Unit

This is required for a feature parity in PySpark 3.3.0.

### Does this PR introduce _any_ user-facing change?

Yes, but this is a new API addition.

### How was this patch tested?

Pass the CIs.

Closes #35737 from dongjoon-hyun/SPARK-38418.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 python/docs/source/reference/pyspark.rst |  1 +
 python/pyspark/rdd.py                    | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/python/docs/source/reference/pyspark.rst b/python/docs/source/reference/pyspark.rst
index bf4e66ee3353e..f0997255bb911 100644
--- a/python/docs/source/reference/pyspark.rst
+++ b/python/docs/source/reference/pyspark.rst
@@ -112,6 +112,7 @@ RDD APIs
     RDD.cache
     RDD.cartesian
     RDD.checkpoint
+    RDD.cleanShuffleDependencies
     RDD.coalesce
     RDD.cogroup
     RDD.collect
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 7cb887fe35606..fd8fc77fc547d 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -465,6 +465,26 @@ def getCheckpointFile(self) -> Optional[str]:
 
         return checkpointFile.get() if checkpointFile.isDefined() else None
 
+    def cleanShuffleDependencies(self, blocking: bool = False) -> None:
+        """
+        Removes an RDD's shuffles and it's non-persisted ancestors.
+
+        When running without a shuffle service, cleaning up shuffle files enables downscaling.
+        If you use the RDD after this call, you should checkpoint and materialize it first.
+
+        .. versionadded:: 3.3.0
+
+        Parameters
+        ----------
+        blocking : bool, optional
+           block on shuffle cleanup tasks. Disabled by default.
+
+        Notes
+        -----
+        This API is a developer API.
+        """
+        self._jrdd.rdd().cleanShuffleDependencies(blocking)
+
     def map(self: "RDD[T]", f: Callable[[T], U], preservesPartitioning: bool = False) -> "RDD[U]":
         """
         Return a new RDD by applying a function to each element of this RDD.

From 727f044612c0a71097aa0d29cb3f24a53b93fc1f Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Fri, 4 Mar 2022 19:28:14 -0800
Subject: [PATCH 394/513] [SPARK-38189][K8S][DOC] Add `Priority scheduling` doc
 for Spark on K8S

### What changes were proposed in this pull request?
Document how to set the priority class with the pod template.

### Why are the changes needed?
Currently, we didn't have a certain doc to help user enable priority scheduling

Related: https://github.com/apache/spark/pull/35716 https://github.com/apache/spark/pull/35639#issuecomment-1055847723

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
![image](https://user-images.githubusercontent.com/1736354/156696247-cf2cf566-57a0-4b8a-a18f-aa300a6f6a3d.png)

Closes #35728 from Yikun/SPARK-38189-doc.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/running-on-kubernetes.md | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 8553d7886acf0..971c0a6078db4 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1707,6 +1707,30 @@ Spark automatically handles translating the Spark configs <code>spark.{driver/ex
 
 Kubernetes does not tell Spark the addresses of the resources allocated to each container. For that reason, the user must specify a discovery script that gets run by the executor on startup to discover what resources are available to that executor. You can find an example scripts in `examples/src/main/scripts/getGpusResources.sh`. The script must have execute permissions set and the user should setup permissions to not allow malicious users to modify it. The script should write to STDOUT a JSON string in the format of the ResourceInformation class. This has the resource name and an array of resource addresses available to just that executor.
 
+### Resource Level Scheduling Overview
+
+There are several resource level scheduling features supported by Spark on Kubernetes.
+
+#### Priority Scheduling
+
+Kubernetes supports [Pod priority](https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption) by default.
+
+Spark on Kubernetes allows defining the priority of jobs by [Pod template](#pod-template). The user can specify the <code>priorityClassName</code> in driver or executor Pod template <code>spec</code> section. Below is an example to show how to specify it:
+
+```
+apiVersion: v1
+Kind: Pod
+metadata:
+  labels:
+    template-label-key: driver-template-label-value
+spec:
+  # Specify the priority in here 
+  priorityClassName: system-node-critical
+  containers:
+  - name: test-driver-container
+    image: will-be-overwritten
+```
+
 ### Stage Level Scheduling Overview
 
 Stage level scheduling is supported on Kubernetes when dynamic allocation is enabled. This also requires <code>spark.dynamicAllocation.shuffleTracking.enabled</code> to be enabled since Kubernetes doesn't support an external shuffle service at this time. The order in which containers for different profiles is requested from Kubernetes is not guaranteed. Note that since dynamic allocation on Kubernetes requires the shuffle tracking feature, this means that executors from previous stages that used a different ResourceProfile may not idle timeout due to having shuffle data on them. This could result in using more cluster resources and in the worst case if there are no remaining resources on the Kubernetes cluster then Spark could potentially hang. You may consider looking at config <code>spark.dynamicAllocation.shuffleTracking.timeout</code> to set a timeout, but that could result in data having to be recomputed if the shuffle data is really needed.

From 97716f7b2d6dc38d4b0a28049eeae3cea8730a86 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Sat, 5 Mar 2022 08:06:32 -0600
Subject: [PATCH 395/513] [SPARK-38393][SQL] Clean up deprecated usage of
 `GenSeq/GenMap`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
`GenSeq/GenMap`  is identified as `deprecated` since Scala 2.13.0 and `Gen* collection types have been removed`.

On the other hand, for Scala 2.12：

-  `trait Map[K, +V] extends Iterable[(K, V)] with GenMap[K, V] with MapLike[K, V, Map[K, V]]`
- `trait Seq[+A] extends PartialFunction[Int, A] with Iterable[A] with GenSeq[A] with GenericTraversableTemplate[A, Seq] with SeqLike[A, Seq[A]]`

It is not necessary to declare as `GenSeq/GenMap`, so this pr change to use `Map/Seq` to clean up deprecated usage.

### Why are the changes needed?
Clean up deprecated usage.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35713 from LuciferYang/clean-up-gen.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../spark/sql/execution/command/ddl.scala     | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 5e2a8c1e9ddbd..14d0e9753f2b0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.execution.command
 import java.util.Locale
 import java.util.concurrent.TimeUnit._
 
-import scala.collection.{GenMap, GenSeq}
 import scala.collection.parallel.ForkJoinTaskSupport
 import scala.collection.parallel.immutable.ParVector
 import scala.util.control.NonFatal
@@ -643,7 +642,7 @@ case class RepairTableCommand(
       val pathFilter = getPathFilter(hadoopConf)
 
       val evalPool = ThreadUtils.newForkJoinPool("RepairTableCommand", 8)
-      val partitionSpecsAndLocs: GenSeq[(TablePartitionSpec, Path)] =
+      val partitionSpecsAndLocs: Seq[(TablePartitionSpec, Path)] =
         try {
           scanPartitions(spark, fs, pathFilter, root, Map(), table.partitionColumnNames, threshold,
             spark.sessionState.conf.resolver, new ForkJoinTaskSupport(evalPool)).seq
@@ -656,7 +655,7 @@ case class RepairTableCommand(
       val partitionStats = if (spark.sqlContext.conf.gatherFastStats) {
         gatherPartitionStats(spark, partitionSpecsAndLocs, fs, pathFilter, threshold)
       } else {
-        GenMap.empty[String, PartitionStatistics]
+        Map.empty[String, PartitionStatistics]
       }
       logInfo(s"Finished to gather the fast stats for all $total partitions.")
 
@@ -689,13 +688,13 @@ case class RepairTableCommand(
       partitionNames: Seq[String],
       threshold: Int,
       resolver: Resolver,
-      evalTaskSupport: ForkJoinTaskSupport): GenSeq[(TablePartitionSpec, Path)] = {
+      evalTaskSupport: ForkJoinTaskSupport): Seq[(TablePartitionSpec, Path)] = {
     if (partitionNames.isEmpty) {
       return Seq(spec -> path)
     }
 
     val statuses = fs.listStatus(path, filter)
-    val statusPar: GenSeq[FileStatus] =
+    val statusPar: Seq[FileStatus] =
       if (partitionNames.length > 1 && statuses.length > threshold || partitionNames.length > 2) {
         // parallelize the list of partitions here, then we can have better parallelism later.
         val parArray = new ParVector(statuses.toVector)
@@ -728,10 +727,10 @@ case class RepairTableCommand(
 
   private def gatherPartitionStats(
       spark: SparkSession,
-      partitionSpecsAndLocs: GenSeq[(TablePartitionSpec, Path)],
+      partitionSpecsAndLocs: Seq[(TablePartitionSpec, Path)],
       fs: FileSystem,
       pathFilter: PathFilter,
-      threshold: Int): GenMap[String, PartitionStatistics] = {
+      threshold: Int): Map[String, PartitionStatistics] = {
     if (partitionSpecsAndLocs.length > threshold) {
       val hadoopConf = spark.sessionState.newHadoopConf()
       val serializableConfiguration = new SerializableConfiguration(hadoopConf)
@@ -752,7 +751,7 @@ case class RepairTableCommand(
             val statuses = fs.listStatus(path, pathFilter)
             (path.toString, PartitionStatistics(statuses.length, statuses.map(_.getLen).sum))
           }
-        }.collectAsMap()
+        }.collectAsMap().toMap
     } else {
       partitionSpecsAndLocs.map { case (_, location) =>
         val statuses = fs.listStatus(location, pathFilter)
@@ -764,8 +763,8 @@ case class RepairTableCommand(
   private def addPartitions(
       spark: SparkSession,
       table: CatalogTable,
-      partitionSpecsAndLocs: GenSeq[(TablePartitionSpec, Path)],
-      partitionStats: GenMap[String, PartitionStatistics]): Unit = {
+      partitionSpecsAndLocs: Seq[(TablePartitionSpec, Path)],
+      partitionStats: Map[String, PartitionStatistics]): Unit = {
     val total = partitionSpecsAndLocs.length
     var done = 0L
     // Hive metastore may not have enough memory to handle millions of partitions in single RPC,

From 18219d40248bd707054d66079ab1a6dae0d665fe Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Sun, 6 Mar 2022 02:31:07 +0100
Subject: [PATCH 396/513] [SPARK-37400][SPARK-37426][PYTHON][MLLIB] Inline type
 hints for pyspark.mllib classification and regression

### What changes were proposed in this pull request?

This PR migrates type `pyspark.mllib.classification` and   `pyspark.mllib.regression` annotations from stub file to inline type hints.

### Why are the changes needed?

Part of ongoing migration of type hints.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #35585 from zero323/SPARK-37400+SPARK-37426.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/mllib/classification.py  | 257 +++++++++++++++---------
 python/pyspark/mllib/classification.pyi | 151 --------------
 python/pyspark/mllib/regression.py      | 249 +++++++++++++++--------
 python/pyspark/mllib/regression.pyi     | 149 --------------
 4 files changed, 336 insertions(+), 470 deletions(-)
 delete mode 100644 python/pyspark/mllib/classification.pyi
 delete mode 100644 python/pyspark/mllib/regression.pyi

diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index f302634882ef5..300d5650aa3f7 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -18,19 +18,26 @@
 from math import exp
 import sys
 import warnings
+from typing import Any, Iterable, Optional, Union, overload, TYPE_CHECKING
 
 import numpy
 
-from pyspark import RDD, since
+from pyspark import RDD, SparkContext, since
+from pyspark.streaming.dstream import DStream
 from pyspark.mllib.common import callMLlibFunc, _py2java, _java2py
 from pyspark.mllib.linalg import _convert_to_vector
-from pyspark.mllib.regression import (
+from pyspark.mllib.regression import (  # type: ignore[attr-defined]
     LabeledPoint,
     LinearModel,
     _regression_train_wrapper,
     StreamingLinearAlgorithm,
 )
-from pyspark.mllib.util import Saveable, Loader, inherit_doc
+from pyspark.mllib.util import Saveable, Loader, inherit_doc  # type: ignore[attr-defined]
+from pyspark.mllib.linalg import Vector
+from pyspark.mllib.regression import LabeledPoint
+
+if TYPE_CHECKING:
+    from pyspark.mllib._typing import VectorLike
 
 
 __all__ = [
@@ -51,12 +58,12 @@ class LinearClassificationModel(LinearModel):
     model. The categories are represented by int values: 0, 1, 2, etc.
     """
 
-    def __init__(self, weights, intercept):
+    def __init__(self, weights: Vector, intercept: float) -> None:
         super(LinearClassificationModel, self).__init__(weights, intercept)
-        self._threshold = None
+        self._threshold: Optional[float] = None
 
     @since("1.4.0")
-    def setThreshold(self, value):
+    def setThreshold(self, value: float) -> None:
         """
         Sets the threshold that separates positive predictions from
         negative predictions. An example with prediction score greater
@@ -66,9 +73,9 @@ def setThreshold(self, value):
         """
         self._threshold = value
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def threshold(self):
+    def threshold(self) -> Optional[float]:
         """
         Returns the threshold (if any) used for converting raw
         prediction scores into 0/1 predictions. It is used for
@@ -77,18 +84,29 @@ def threshold(self):
         return self._threshold
 
     @since("1.4.0")
-    def clearThreshold(self):
+    def clearThreshold(self) -> None:
         """
         Clears the threshold so that `predict` will output raw
         prediction scores. It is used for binary classification only.
         """
         self._threshold = None
 
-    @since("1.4.0")
-    def predict(self, test):
+    @overload
+    def predict(self, test: "VectorLike") -> Union[int, float]:
+        ...
+
+    @overload
+    def predict(self, test: RDD["VectorLike"]) -> RDD[Union[int, float]]:
+        ...
+
+    def predict(
+        self, test: Union["VectorLike", RDD["VectorLike"]]
+    ) -> Union[RDD[Union[int, float]], Union[int, float]]:
         """
         Predict values for a single data point or an RDD of points
         using the model trained.
+
+        .. versionadded:: 1.4.0
         """
         raise NotImplementedError
 
@@ -178,7 +196,9 @@ class LogisticRegressionModel(LinearClassificationModel):
     2
     """
 
-    def __init__(self, weights, intercept, numFeatures, numClasses):
+    def __init__(
+        self, weights: Vector, intercept: float, numFeatures: int, numClasses: int
+    ) -> None:
         super(LogisticRegressionModel, self).__init__(weights, intercept)
         self._numFeatures = int(numFeatures)
         self._numClasses = int(numClasses)
@@ -187,40 +207,53 @@ def __init__(self, weights, intercept, numFeatures, numClasses):
             self._dataWithBiasSize = None
             self._weightsMatrix = None
         else:
-            self._dataWithBiasSize = self._coeff.size // (self._numClasses - 1)
+            self._dataWithBiasSize = self._coeff.size // (  # type: ignore[attr-defined]
+                self._numClasses - 1
+            )
             self._weightsMatrix = self._coeff.toArray().reshape(
                 self._numClasses - 1, self._dataWithBiasSize
             )
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def numFeatures(self):
+    def numFeatures(self) -> int:
         """
         Dimension of the features.
         """
         return self._numFeatures
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def numClasses(self):
+    def numClasses(self) -> int:
         """
         Number of possible outcomes for k classes classification problem
         in Multinomial Logistic Regression.
         """
         return self._numClasses
 
-    @since("0.9.0")
-    def predict(self, x):
+    @overload
+    def predict(self, x: "VectorLike") -> Union[int, float]:
+        ...
+
+    @overload
+    def predict(self, x: RDD["VectorLike"]) -> RDD[Union[int, float]]:
+        ...
+
+    def predict(
+        self, x: Union["VectorLike", RDD["VectorLike"]]
+    ) -> Union[RDD[Union[int, float]], Union[int, float]]:
         """
         Predict values for a single data point or an RDD of points
         using the model trained.
+
+        .. versionadded:: 0.9.0
         """
         if isinstance(x, RDD):
             return x.map(lambda v: self.predict(v))
 
         x = _convert_to_vector(x)
         if self.numClasses == 2:
-            margin = self.weights.dot(x) + self._intercept
+            margin = self.weights.dot(x) + self._intercept  # type: ignore[attr-defined]
             if margin > 0:
                 prob = 1 / (1 + exp(-margin))
             else:
@@ -231,29 +264,34 @@ def predict(self, x):
             else:
                 return 1 if prob > self._threshold else 0
         else:
+            assert self._weightsMatrix is not None
+
             best_class = 0
             max_margin = 0.0
-            if x.size + 1 == self._dataWithBiasSize:
+            if x.size + 1 == self._dataWithBiasSize:  # type: ignore[attr-defined]
                 for i in range(0, self._numClasses - 1):
                     margin = (
-                        x.dot(self._weightsMatrix[i][0 : x.size]) + self._weightsMatrix[i][x.size]
+                        x.dot(self._weightsMatrix[i][0 : x.size])  # type: ignore[attr-defined]
+                        + self._weightsMatrix[i][x.size]  # type: ignore[attr-defined]
                     )
                     if margin > max_margin:
                         max_margin = margin
                         best_class = i + 1
             else:
                 for i in range(0, self._numClasses - 1):
-                    margin = x.dot(self._weightsMatrix[i])
+                    margin = x.dot(self._weightsMatrix[i])  # type: ignore[attr-defined]
                     if margin > max_margin:
                         max_margin = margin
                         best_class = i + 1
             return best_class
 
     @since("1.4.0")
-    def save(self, sc, path):
+    def save(self, sc: SparkContext, path: str) -> None:
         """
         Save this model to the given path.
         """
+        assert sc._jvm is not None
+
         java_model = sc._jvm.org.apache.spark.mllib.classification.LogisticRegressionModel(
             _py2java(sc, self._coeff), self.intercept, self.numFeatures, self.numClasses
         )
@@ -261,10 +299,12 @@ def save(self, sc, path):
 
     @classmethod
     @since("1.4.0")
-    def load(cls, sc, path):
+    def load(cls, sc: SparkContext, path: str) -> "LogisticRegressionModel":
         """
         Load a model from the given path.
         """
+        assert sc._jvm is not None
+
         java_model = sc._jvm.org.apache.spark.mllib.classification.LogisticRegressionModel.load(
             sc._jsc.sc(), path
         )
@@ -277,8 +317,8 @@ def load(cls, sc, path):
         model.setThreshold(threshold)
         return model
 
-    def __repr__(self):
-        return self._call_java("toString")
+    def __repr__(self) -> str:
+        return self._call_java("toString")  # type: ignore[attr-defined]  # SPARK-38239
 
 
 class LogisticRegressionWithSGD:
@@ -293,17 +333,17 @@ class LogisticRegressionWithSGD:
     @classmethod
     def train(
         cls,
-        data,
-        iterations=100,
-        step=1.0,
-        miniBatchFraction=1.0,
-        initialWeights=None,
-        regParam=0.01,
-        regType="l2",
-        intercept=False,
-        validateData=True,
-        convergenceTol=0.001,
-    ):
+        data: RDD[LabeledPoint],
+        iterations: int = 100,
+        step: float = 1.0,
+        miniBatchFraction: float = 1.0,
+        initialWeights: Optional["VectorLike"] = None,
+        regParam: float = 0.01,
+        regType: str = "l2",
+        intercept: bool = False,
+        validateData: bool = True,
+        convergenceTol: float = 0.001,
+    ) -> LogisticRegressionModel:
         """
         Train a logistic regression model on the given data.
 
@@ -355,7 +395,7 @@ def train(
             FutureWarning,
         )
 
-        def train(rdd, i):
+        def train(rdd: RDD[LabeledPoint], i: Vector) -> Iterable[Any]:
             return callMLlibFunc(
                 "trainLogisticRegressionModelWithSGD",
                 rdd,
@@ -385,17 +425,17 @@ class LogisticRegressionWithLBFGS:
     @classmethod
     def train(
         cls,
-        data,
-        iterations=100,
-        initialWeights=None,
-        regParam=0.0,
-        regType="l2",
-        intercept=False,
-        corrections=10,
-        tolerance=1e-6,
-        validateData=True,
-        numClasses=2,
-    ):
+        data: RDD[LabeledPoint],
+        iterations: int = 100,
+        initialWeights: Optional["VectorLike"] = None,
+        regParam: float = 0.0,
+        regType: str = "l2",
+        intercept: bool = False,
+        corrections: int = 10,
+        tolerance: float = 1e-6,
+        validateData: bool = True,
+        numClasses: int = 2,
+    ) -> LogisticRegressionModel:
         """
         Train a logistic regression model on the given data.
 
@@ -457,7 +497,7 @@ def train(
         0
         """
 
-        def train(rdd, i):
+        def train(rdd: RDD[LabeledPoint], i: Vector) -> Iterable[Any]:
             return callMLlibFunc(
                 "trainLogisticRegressionModelWithLBFGS",
                 rdd,
@@ -541,31 +581,44 @@ class SVMModel(LinearClassificationModel):
     ...    pass
     """
 
-    def __init__(self, weights, intercept):
+    def __init__(self, weights: Vector, intercept: float) -> None:
         super(SVMModel, self).__init__(weights, intercept)
         self._threshold = 0.0
 
-    @since("0.9.0")
-    def predict(self, x):
+    @overload
+    def predict(self, x: "VectorLike") -> Union[int, float]:
+        ...
+
+    @overload
+    def predict(self, x: RDD["VectorLike"]) -> RDD[Union[int, float]]:
+        ...
+
+    def predict(
+        self, x: Union["VectorLike", RDD["VectorLike"]]
+    ) -> Union[RDD[Union[int, float]], Union[int, float]]:
         """
         Predict values for a single data point or an RDD of points
         using the model trained.
+
+        .. versionadded:: 0.9.0
         """
         if isinstance(x, RDD):
             return x.map(lambda v: self.predict(v))
 
         x = _convert_to_vector(x)
-        margin = self.weights.dot(x) + self.intercept
+        margin = self.weights.dot(x) + self.intercept  # type: ignore[attr-defined]
         if self._threshold is None:
             return margin
         else:
             return 1 if margin > self._threshold else 0
 
     @since("1.4.0")
-    def save(self, sc, path):
+    def save(self, sc: SparkContext, path: str) -> None:
         """
         Save this model to the given path.
         """
+        assert sc._jvm is not None
+
         java_model = sc._jvm.org.apache.spark.mllib.classification.SVMModel(
             _py2java(sc, self._coeff), self.intercept
         )
@@ -573,10 +626,12 @@ def save(self, sc, path):
 
     @classmethod
     @since("1.4.0")
-    def load(cls, sc, path):
+    def load(cls, sc: SparkContext, path: str) -> "SVMModel":
         """
         Load a model from the given path.
         """
+        assert sc._jvm is not None
+
         java_model = sc._jvm.org.apache.spark.mllib.classification.SVMModel.load(sc._jsc.sc(), path)
         weights = _java2py(sc, java_model.weights())
         intercept = java_model.intercept()
@@ -596,17 +651,17 @@ class SVMWithSGD:
     @classmethod
     def train(
         cls,
-        data,
-        iterations=100,
-        step=1.0,
-        regParam=0.01,
-        miniBatchFraction=1.0,
-        initialWeights=None,
-        regType="l2",
-        intercept=False,
-        validateData=True,
-        convergenceTol=0.001,
-    ):
+        data: RDD[LabeledPoint],
+        iterations: int = 100,
+        step: float = 1.0,
+        regParam: float = 0.01,
+        miniBatchFraction: float = 1.0,
+        initialWeights: Optional["VectorLike"] = None,
+        regType: str = "l2",
+        intercept: bool = False,
+        validateData: bool = True,
+        convergenceTol: float = 0.001,
+    ) -> SVMModel:
         """
         Train a support vector machine on the given data.
 
@@ -653,7 +708,7 @@ def train(
             (default: 0.001)
         """
 
-        def train(rdd, i):
+        def train(rdd: RDD[LabeledPoint], i: Vector) -> Iterable[Any]:
             return callMLlibFunc(
                 "trainSVMModelWithSGD",
                 rdd,
@@ -672,7 +727,7 @@ def train(rdd, i):
 
 
 @inherit_doc
-class NaiveBayesModel(Saveable, Loader):
+class NaiveBayesModel(Saveable, Loader["NaiveBayesModel"]):
 
     """
     Model for Naive Bayes classifiers.
@@ -727,13 +782,23 @@ class NaiveBayesModel(Saveable, Loader):
     ...     pass
     """
 
-    def __init__(self, labels, pi, theta):
+    def __init__(self, labels: numpy.ndarray, pi: numpy.ndarray, theta: numpy.ndarray) -> None:
         self.labels = labels
         self.pi = pi
         self.theta = theta
 
+    @overload
+    def predict(self, x: "VectorLike") -> numpy.float64:
+        ...
+
+    @overload
+    def predict(self, x: RDD["VectorLike"]) -> RDD[numpy.float64]:
+        ...
+
     @since("0.9.0")
-    def predict(self, x):
+    def predict(
+        self, x: Union["VectorLike", RDD["VectorLike"]]
+    ) -> Union[numpy.float64, RDD[numpy.float64]]:
         """
         Return the most likely class for a data vector
         or an RDD of vectors
@@ -741,12 +806,16 @@ def predict(self, x):
         if isinstance(x, RDD):
             return x.map(lambda v: self.predict(v))
         x = _convert_to_vector(x)
-        return self.labels[numpy.argmax(self.pi + x.dot(self.theta.transpose()))]
+        return self.labels[
+            numpy.argmax(self.pi + x.dot(self.theta.transpose()))  # type: ignore[attr-defined]
+        ]
 
-    def save(self, sc, path):
+    def save(self, sc: SparkContext, path: str) -> None:
         """
         Save this model to the given path.
         """
+        assert sc._jvm is not None
+
         java_labels = _py2java(sc, self.labels.tolist())
         java_pi = _py2java(sc, self.pi.tolist())
         java_theta = _py2java(sc, self.theta.tolist())
@@ -757,10 +826,12 @@ def save(self, sc, path):
 
     @classmethod
     @since("1.4.0")
-    def load(cls, sc, path):
+    def load(cls, sc: SparkContext, path: str) -> "NaiveBayesModel":
         """
         Load a model from the given path.
         """
+        assert sc._jvm is not None
+
         java_model = sc._jvm.org.apache.spark.mllib.classification.NaiveBayesModel.load(
             sc._jsc.sc(), path
         )
@@ -779,7 +850,7 @@ class NaiveBayes:
     """
 
     @classmethod
-    def train(cls, data, lambda_=1.0):
+    def train(cls, data: RDD[LabeledPoint], lambda_: float = 1.0) -> NaiveBayesModel:
         """
         Train a Naive Bayes model given an RDD of (label, features)
         vectors.
@@ -843,22 +914,26 @@ class StreamingLogisticRegressionWithSGD(StreamingLinearAlgorithm):
 
     def __init__(
         self,
-        stepSize=0.1,
-        numIterations=50,
-        miniBatchFraction=1.0,
-        regParam=0.0,
-        convergenceTol=0.001,
-    ):
+        stepSize: float = 0.1,
+        numIterations: int = 50,
+        miniBatchFraction: float = 1.0,
+        regParam: float = 0.0,
+        convergenceTol: float = 0.001,
+    ) -> None:
         self.stepSize = stepSize
         self.numIterations = numIterations
         self.regParam = regParam
         self.miniBatchFraction = miniBatchFraction
         self.convergenceTol = convergenceTol
-        self._model = None
-        super(StreamingLogisticRegressionWithSGD, self).__init__(model=self._model)
+        self._model: Optional[LogisticRegressionModel] = None
+        super(StreamingLogisticRegressionWithSGD, self).__init__(
+            model=self._model  # type: ignore[arg-type]
+        )
 
     @since("1.5.0")
-    def setInitialWeights(self, initialWeights):
+    def setInitialWeights(
+        self, initialWeights: "VectorLike"
+    ) -> "StreamingLogisticRegressionWithSGD":
         """
         Set the initial value of weights.
 
@@ -867,15 +942,17 @@ def setInitialWeights(self, initialWeights):
         initialWeights = _convert_to_vector(initialWeights)
 
         # LogisticRegressionWithSGD does only binary classification.
-        self._model = LogisticRegressionModel(initialWeights, 0, initialWeights.size, 2)
+        self._model = LogisticRegressionModel(
+            initialWeights, 0, initialWeights.size, 2  # type: ignore[attr-defined]
+        )
         return self
 
     @since("1.5.0")
-    def trainOn(self, dstream):
+    def trainOn(self, dstream: "DStream[LabeledPoint]") -> None:
         """Train the model on the incoming dstream."""
         self._validate(dstream)
 
-        def update(rdd):
+        def update(rdd: RDD[LabeledPoint]) -> None:
             # LogisticRegressionWithSGD.train raises an error for an empty RDD.
             if not rdd.isEmpty():
                 self._model = LogisticRegressionWithSGD.train(
@@ -883,7 +960,7 @@ def update(rdd):
                     self.numIterations,
                     self.stepSize,
                     self.miniBatchFraction,
-                    self._model.weights,
+                    self._model.weights,  # type: ignore[union-attr]
                     regParam=self.regParam,
                     convergenceTol=self.convergenceTol,
                 )
@@ -891,7 +968,7 @@ def update(rdd):
         dstream.foreachRDD(update)
 
 
-def _test():
+def _test() -> None:
     import doctest
     from pyspark.sql import SparkSession
     import pyspark.mllib.classification
diff --git a/python/pyspark/mllib/classification.pyi b/python/pyspark/mllib/classification.pyi
deleted file mode 100644
index ba88f6dcb2dda..0000000000000
--- a/python/pyspark/mllib/classification.pyi
+++ /dev/null
@@ -1,151 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import overload
-from typing import Optional, Union
-
-from pyspark.context import SparkContext
-from pyspark.rdd import RDD
-from pyspark.mllib._typing import VectorLike
-from pyspark.mllib.linalg import Vector
-from pyspark.mllib.regression import LabeledPoint, LinearModel, StreamingLinearAlgorithm
-from pyspark.mllib.util import Saveable, Loader
-from pyspark.streaming.dstream import DStream
-
-from numpy import float64, ndarray
-
-class LinearClassificationModel(LinearModel):
-    def __init__(self, weights: Vector, intercept: float) -> None: ...
-    def setThreshold(self, value: float) -> None: ...
-    @property
-    def threshold(self) -> Optional[float]: ...
-    def clearThreshold(self) -> None: ...
-    @overload
-    def predict(self, test: VectorLike) -> Union[int, float, float64]: ...
-    @overload
-    def predict(self, test: RDD[VectorLike]) -> RDD[Union[int, float]]: ...
-
-class LogisticRegressionModel(LinearClassificationModel):
-    def __init__(
-        self, weights: Vector, intercept: float, numFeatures: int, numClasses: int
-    ) -> None: ...
-    @property
-    def numFeatures(self) -> int: ...
-    @property
-    def numClasses(self) -> int: ...
-    @overload
-    def predict(self, x: VectorLike) -> Union[int, float]: ...
-    @overload
-    def predict(self, x: RDD[VectorLike]) -> RDD[Union[int, float]]: ...
-    def save(self, sc: SparkContext, path: str) -> None: ...
-    @classmethod
-    def load(cls, sc: SparkContext, path: str) -> LogisticRegressionModel: ...
-
-class LogisticRegressionWithSGD:
-    @classmethod
-    def train(
-        cls,
-        data: RDD[LabeledPoint],
-        iterations: int = ...,
-        step: float = ...,
-        miniBatchFraction: float = ...,
-        initialWeights: Optional[VectorLike] = ...,
-        regParam: float = ...,
-        regType: str = ...,
-        intercept: bool = ...,
-        validateData: bool = ...,
-        convergenceTol: float = ...,
-    ) -> LogisticRegressionModel: ...
-
-class LogisticRegressionWithLBFGS:
-    @classmethod
-    def train(
-        cls,
-        data: RDD[LabeledPoint],
-        iterations: int = ...,
-        initialWeights: Optional[VectorLike] = ...,
-        regParam: float = ...,
-        regType: str = ...,
-        intercept: bool = ...,
-        corrections: int = ...,
-        tolerance: float = ...,
-        validateData: bool = ...,
-        numClasses: int = ...,
-    ) -> LogisticRegressionModel: ...
-
-class SVMModel(LinearClassificationModel):
-    def __init__(self, weights: Vector, intercept: float) -> None: ...
-    @overload  # type: ignore
-    def predict(self, x: VectorLike) -> float64: ...
-    @overload
-    def predict(self, x: RDD[VectorLike]) -> RDD[float64]: ...
-    def save(self, sc: SparkContext, path: str) -> None: ...
-    @classmethod
-    def load(cls, sc: SparkContext, path: str) -> SVMModel: ...
-
-class SVMWithSGD:
-    @classmethod
-    def train(
-        cls,
-        data: RDD[LabeledPoint],
-        iterations: int = ...,
-        step: float = ...,
-        regParam: float = ...,
-        miniBatchFraction: float = ...,
-        initialWeights: Optional[VectorLike] = ...,
-        regType: str = ...,
-        intercept: bool = ...,
-        validateData: bool = ...,
-        convergenceTol: float = ...,
-    ) -> SVMModel: ...
-
-class NaiveBayesModel(Saveable, Loader[NaiveBayesModel]):
-    labels: ndarray
-    pi: ndarray
-    theta: ndarray
-    def __init__(self, labels: ndarray, pi: ndarray, theta: ndarray) -> None: ...
-    @overload
-    def predict(self, x: VectorLike) -> float64: ...
-    @overload
-    def predict(self, x: RDD[VectorLike]) -> RDD[float64]: ...
-    def save(self, sc: SparkContext, path: str) -> None: ...
-    @classmethod
-    def load(cls, sc: SparkContext, path: str) -> NaiveBayesModel: ...
-
-class NaiveBayes:
-    @classmethod
-    def train(cls, data: RDD[VectorLike], lambda_: float = ...) -> NaiveBayesModel: ...
-
-class StreamingLogisticRegressionWithSGD(StreamingLinearAlgorithm):
-    stepSize: float
-    numIterations: int
-    regParam: float
-    miniBatchFraction: float
-    convergenceTol: float
-    def __init__(
-        self,
-        stepSize: float = ...,
-        numIterations: int = ...,
-        miniBatchFraction: float = ...,
-        regParam: float = ...,
-        convergenceTol: float = ...,
-    ) -> None: ...
-    def setInitialWeights(
-        self, initialWeights: VectorLike
-    ) -> StreamingLogisticRegressionWithSGD: ...
-    def trainOn(self, dstream: DStream[LabeledPoint]) -> None: ...
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index c099b4880281e..9ce31c8b538d2 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -17,6 +17,18 @@
 
 import sys
 import warnings
+from typing import (
+    Any,
+    Callable,
+    Iterable,
+    Optional,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+    overload,
+    TYPE_CHECKING,
+)
 
 import numpy as np
 
@@ -25,6 +37,16 @@
 from pyspark.mllib.common import callMLlibFunc, _py2java, _java2py, inherit_doc
 from pyspark.mllib.linalg import _convert_to_vector
 from pyspark.mllib.util import Saveable, Loader
+from pyspark.rdd import RDD
+from pyspark.context import SparkContext
+from pyspark.mllib.linalg import Vector
+
+if TYPE_CHECKING:
+    from pyspark.mllib._typing import VectorLike
+
+
+LM = TypeVar("LM")
+K = TypeVar("K")
 
 __all__ = [
     "LabeledPoint",
@@ -62,17 +84,17 @@ class LabeledPoint:
     'label' and 'features' are accessible as class attributes.
     """
 
-    def __init__(self, label, features):
+    def __init__(self, label: float, features: Iterable[float]):
         self.label = float(label)
         self.features = _convert_to_vector(features)
 
-    def __reduce__(self):
+    def __reduce__(self) -> Tuple[Type["LabeledPoint"], Tuple[float, Vector]]:
         return (LabeledPoint, (self.label, self.features))
 
-    def __str__(self):
+    def __str__(self) -> str:
         return "(" + ",".join((str(self.label), str(self.features))) + ")"
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return "LabeledPoint(%s, %s)" % (self.label, self.features)
 
 
@@ -91,23 +113,23 @@ class LinearModel:
       Intercept computed for this model.
     """
 
-    def __init__(self, weights, intercept):
+    def __init__(self, weights: Vector, intercept: float):
         self._coeff = _convert_to_vector(weights)
         self._intercept = float(intercept)
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.0.0")
-    def weights(self):
+    def weights(self) -> Vector:
         """Weights computed for every feature."""
         return self._coeff
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.0.0")
-    def intercept(self):
+    def intercept(self) -> float:
         """Intercept computed for this model."""
         return self._intercept
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return "(weights=%s, intercept=%r)" % (self._coeff, self._intercept)
 
 
@@ -128,16 +150,25 @@ class LinearRegressionModelBase(LinearModel):
     True
     """
 
-    @since("0.9.0")
-    def predict(self, x):
+    @overload
+    def predict(self, x: "VectorLike") -> float:
+        ...
+
+    @overload
+    def predict(self, x: RDD["VectorLike"]) -> RDD[float]:
+        ...
+
+    def predict(self, x: Union["VectorLike", RDD["VectorLike"]]) -> Union[float, RDD[float]]:
         """
         Predict the value of the dependent variable given a vector or
         an RDD of vectors containing values for the independent variables.
+
+        .. versionadded:: 0.9.0
         """
         if isinstance(x, RDD):
             return x.map(self.predict)
         x = _convert_to_vector(x)
-        return self.weights.dot(x) + self.intercept
+        return self.weights.dot(x) + self.intercept  # type: ignore[attr-defined]
 
 
 @inherit_doc
@@ -204,8 +235,10 @@ class LinearRegressionModel(LinearRegressionModelBase):
     """
 
     @since("1.4.0")
-    def save(self, sc, path):
+    def save(self, sc: SparkContext, path: str) -> None:
         """Save a LinearRegressionModel."""
+        assert sc._jvm is not None
+
         java_model = sc._jvm.org.apache.spark.mllib.regression.LinearRegressionModel(
             _py2java(sc, self._coeff), self.intercept
         )
@@ -213,8 +246,10 @@ def save(self, sc, path):
 
     @classmethod
     @since("1.4.0")
-    def load(cls, sc, path):
+    def load(cls, sc: SparkContext, path: str) -> "LinearRegressionModel":
         """Load a LinearRegressionModel."""
+        assert sc._jvm is not None
+
         java_model = sc._jvm.org.apache.spark.mllib.regression.LinearRegressionModel.load(
             sc._jsc.sc(), path
         )
@@ -227,7 +262,12 @@ def load(cls, sc, path):
 # train_func should take two parameters, namely data and initial_weights, and
 # return the result of a call to the appropriate JVM stub.
 # _regression_train_wrapper is responsible for setup and error checking.
-def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
+def _regression_train_wrapper(
+    train_func: Callable[[RDD[LabeledPoint], Vector], Iterable[Any]],
+    modelClass: Type[LM],
+    data: RDD[LabeledPoint],
+    initial_weights: Optional["VectorLike"],
+) -> LM:
     from pyspark.mllib.classification import LogisticRegressionModel
 
     first = data.first()
@@ -239,10 +279,12 @@ def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
         weights, intercept, numFeatures, numClasses = train_func(
             data, _convert_to_vector(initial_weights)
         )
-        return modelClass(weights, intercept, numFeatures, numClasses)
+        return modelClass(  # type: ignore[call-arg, return-value]
+            weights, intercept, numFeatures, numClasses
+        )
     else:
         weights, intercept = train_func(data, _convert_to_vector(initial_weights))
-        return modelClass(weights, intercept)
+        return modelClass(weights, intercept)  # type: ignore[call-arg, return-value]
 
 
 class LinearRegressionWithSGD:
@@ -257,17 +299,17 @@ class LinearRegressionWithSGD:
     @classmethod
     def train(
         cls,
-        data,
-        iterations=100,
-        step=1.0,
-        miniBatchFraction=1.0,
-        initialWeights=None,
-        regParam=0.0,
-        regType=None,
-        intercept=False,
-        validateData=True,
-        convergenceTol=0.001,
-    ):
+        data: RDD[LabeledPoint],
+        iterations: int = 100,
+        step: float = 1.0,
+        miniBatchFraction: float = 1.0,
+        initialWeights: Optional["VectorLike"] = None,
+        regParam: float = 0.0,
+        regType: Optional[str] = None,
+        intercept: bool = False,
+        validateData: bool = True,
+        convergenceTol: float = 0.001,
+    ) -> LinearRegressionModel:
         """
         Train a linear regression model using Stochastic Gradient
         Descent (SGD). This solves the least squares regression
@@ -324,7 +366,7 @@ def train(
         """
         warnings.warn("Deprecated in 2.0.0. Use ml.regression.LinearRegression.", FutureWarning)
 
-        def train(rdd, i):
+        def train(rdd: RDD[LabeledPoint], i: Vector) -> Iterable[Any]:
             return callMLlibFunc(
                 "trainLinearRegressionModelWithSGD",
                 rdd,
@@ -339,7 +381,9 @@ def train(rdd, i):
                 float(convergenceTol),
             )
 
-        return _regression_train_wrapper(train, LinearRegressionModel, data, initialWeights)
+        return _regression_train_wrapper(
+            train, LinearRegressionModel, data, initialWeights  # type: ignore[arg-type]
+        )
 
 
 @inherit_doc
@@ -407,8 +451,10 @@ class LassoModel(LinearRegressionModelBase):
     """
 
     @since("1.4.0")
-    def save(self, sc, path):
+    def save(self, sc: SparkContext, path: str) -> None:
         """Save a LassoModel."""
+        assert sc._jvm is not None
+
         java_model = sc._jvm.org.apache.spark.mllib.regression.LassoModel(
             _py2java(sc, self._coeff), self.intercept
         )
@@ -416,8 +462,10 @@ def save(self, sc, path):
 
     @classmethod
     @since("1.4.0")
-    def load(cls, sc, path):
+    def load(cls, sc: SparkContext, path: str) -> "LassoModel":
         """Load a LassoModel."""
+        assert sc._jvm is not None
+
         java_model = sc._jvm.org.apache.spark.mllib.regression.LassoModel.load(sc._jsc.sc(), path)
         weights = _java2py(sc, java_model.weights())
         intercept = java_model.intercept()
@@ -438,16 +486,16 @@ class LassoWithSGD:
     @classmethod
     def train(
         cls,
-        data,
-        iterations=100,
-        step=1.0,
-        regParam=0.01,
-        miniBatchFraction=1.0,
-        initialWeights=None,
-        intercept=False,
-        validateData=True,
-        convergenceTol=0.001,
-    ):
+        data: RDD[LabeledPoint],
+        iterations: int = 100,
+        step: float = 1.0,
+        regParam: float = 0.01,
+        miniBatchFraction: float = 1.0,
+        initialWeights: Optional["VectorLike"] = None,
+        intercept: bool = False,
+        validateData: bool = True,
+        convergenceTol: float = 0.001,
+    ) -> LassoModel:
         """
         Train a regression model with L1-regularization using Stochastic
         Gradient Descent. This solves the l1-regularized least squares
@@ -499,7 +547,7 @@ def train(
             FutureWarning,
         )
 
-        def train(rdd, i):
+        def train(rdd: RDD[LabeledPoint], i: Vector) -> Iterable[Any]:
             return callMLlibFunc(
                 "trainLassoModelWithSGD",
                 rdd,
@@ -513,7 +561,9 @@ def train(rdd, i):
                 float(convergenceTol),
             )
 
-        return _regression_train_wrapper(train, LassoModel, data, initialWeights)
+        return _regression_train_wrapper(
+            train, LassoModel, data, initialWeights  # type: ignore[arg-type]
+        )
 
 
 @inherit_doc
@@ -581,8 +631,10 @@ class RidgeRegressionModel(LinearRegressionModelBase):
     """
 
     @since("1.4.0")
-    def save(self, sc, path):
+    def save(self, sc: SparkContext, path: str) -> None:
         """Save a RidgeRegressionMode."""
+        assert sc._jvm is not None
+
         java_model = sc._jvm.org.apache.spark.mllib.regression.RidgeRegressionModel(
             _py2java(sc, self._coeff), self.intercept
         )
@@ -590,8 +642,10 @@ def save(self, sc, path):
 
     @classmethod
     @since("1.4.0")
-    def load(cls, sc, path):
+    def load(cls, sc: SparkContext, path: str) -> "RidgeRegressionModel":
         """Load a RidgeRegressionMode."""
+        assert sc._jvm is not None
+
         java_model = sc._jvm.org.apache.spark.mllib.regression.RidgeRegressionModel.load(
             sc._jsc.sc(), path
         )
@@ -615,16 +669,16 @@ class RidgeRegressionWithSGD:
     @classmethod
     def train(
         cls,
-        data,
-        iterations=100,
-        step=1.0,
-        regParam=0.01,
-        miniBatchFraction=1.0,
-        initialWeights=None,
-        intercept=False,
-        validateData=True,
-        convergenceTol=0.001,
-    ):
+        data: RDD[LabeledPoint],
+        iterations: int = 100,
+        step: float = 1.0,
+        regParam: float = 0.01,
+        miniBatchFraction: float = 1.0,
+        initialWeights: Optional["VectorLike"] = None,
+        intercept: bool = False,
+        validateData: bool = True,
+        convergenceTol: float = 0.001,
+    ) -> RidgeRegressionModel:
         """
         Train a regression model with L2-regularization using Stochastic
         Gradient Descent. This solves the l2-regularized least squares
@@ -677,7 +731,7 @@ def train(
             FutureWarning,
         )
 
-        def train(rdd, i):
+        def train(rdd: RDD[LabeledPoint], i: Vector) -> Iterable[Any]:
             return callMLlibFunc(
                 "trainRidgeModelWithSGD",
                 rdd,
@@ -691,10 +745,12 @@ def train(rdd, i):
                 float(convergenceTol),
             )
 
-        return _regression_train_wrapper(train, RidgeRegressionModel, data, initialWeights)
+        return _regression_train_wrapper(
+            train, RidgeRegressionModel, data, initialWeights  # type: ignore[arg-type]
+        )
 
 
-class IsotonicRegressionModel(Saveable, Loader):
+class IsotonicRegressionModel(Saveable, Loader["IsotonicRegressionModel"]):
 
     """
     Regression model for isotonic regression.
@@ -737,12 +793,30 @@ class IsotonicRegressionModel(Saveable, Loader):
     ...     pass
     """
 
-    def __init__(self, boundaries, predictions, isotonic):
+    def __init__(self, boundaries: np.ndarray, predictions: np.ndarray, isotonic: bool):
         self.boundaries = boundaries
         self.predictions = predictions
         self.isotonic = isotonic
 
-    def predict(self, x):
+    @overload
+    def predict(self, x: float) -> np.float64:
+        ...
+
+    @overload
+    def predict(self, x: "VectorLike") -> np.ndarray:
+        ...
+
+    @overload
+    def predict(self, x: RDD[float]) -> RDD[np.float64]:
+        ...
+
+    @overload
+    def predict(self, x: RDD["VectorLike"]) -> RDD[np.ndarray]:
+        ...
+
+    def predict(
+        self, x: Union[float, "VectorLike", RDD[float], RDD["VectorLike"]]
+    ) -> Union[np.float64, np.ndarray, RDD[np.float64], RDD[np.ndarray]]:
         """
         Predict labels for provided features.
         Using a piecewise linear function.
@@ -770,13 +844,17 @@ def predict(self, x):
         """
         if isinstance(x, RDD):
             return x.map(lambda v: self.predict(v))
-        return np.interp(x, self.boundaries, self.predictions)
+        return np.interp(
+            x, self.boundaries, self.predictions  # type: ignore[call-overload, arg-type]
+        )
 
     @since("1.4.0")
-    def save(self, sc, path):
+    def save(self, sc: SparkContext, path: str) -> None:
         """Save an IsotonicRegressionModel."""
         java_boundaries = _py2java(sc, self.boundaries.tolist())
         java_predictions = _py2java(sc, self.predictions.tolist())
+        assert sc._jvm is not None
+
         java_model = sc._jvm.org.apache.spark.mllib.regression.IsotonicRegressionModel(
             java_boundaries, java_predictions, self.isotonic
         )
@@ -784,8 +862,10 @@ def save(self, sc, path):
 
     @classmethod
     @since("1.4.0")
-    def load(cls, sc, path):
+    def load(cls, sc: SparkContext, path: str) -> "IsotonicRegressionModel":
         """Load an IsotonicRegressionModel."""
+        assert sc._jvm is not None
+
         java_model = sc._jvm.org.apache.spark.mllib.regression.IsotonicRegressionModel.load(
             sc._jsc.sc(), path
         )
@@ -823,7 +903,7 @@ class IsotonicRegression:
     """
 
     @classmethod
-    def train(cls, data, isotonic=True):
+    def train(cls, data: RDD["VectorLike"], isotonic: bool = True) -> IsotonicRegressionModel:
         """
         Train an isotonic regression model on the given data.
 
@@ -852,23 +932,23 @@ class StreamingLinearAlgorithm:
     .. versionadded:: 1.5.0
     """
 
-    def __init__(self, model):
+    def __init__(self, model: Optional[LinearModel]):
         self._model = model
 
     @since("1.5.0")
-    def latestModel(self):
+    def latestModel(self) -> Optional[LinearModel]:
         """
         Returns the latest model.
         """
         return self._model
 
-    def _validate(self, dstream):
+    def _validate(self, dstream: Any) -> None:
         if not isinstance(dstream, DStream):
             raise TypeError("dstream should be a DStream object, got %s" % type(dstream))
         if not self._model:
             raise ValueError("Model must be initialized using setInitialWeights")
 
-    def predictOn(self, dstream):
+    def predictOn(self, dstream: "DStream[VectorLike]") -> "DStream[float]":
         """
         Use the model to make predictions on batches of data from a
         DStream.
@@ -881,9 +961,11 @@ def predictOn(self, dstream):
             DStream containing predictions.
         """
         self._validate(dstream)
-        return dstream.map(lambda x: self._model.predict(x))
+        return dstream.map(lambda x: self._model.predict(x))  # type: ignore[union-attr]
 
-    def predictOnValues(self, dstream):
+    def predictOnValues(
+        self, dstream: "DStream[Tuple[K, VectorLike]]"
+    ) -> "DStream[Tuple[K, float]]":
         """
         Use the model to make predictions on the values of a DStream and
         carry over its keys.
@@ -896,7 +978,7 @@ def predictOnValues(self, dstream):
             DStream containing predictions.
         """
         self._validate(dstream)
-        return dstream.mapValues(lambda x: self._model.predict(x))
+        return dstream.mapValues(lambda x: self._model.predict(x))  # type: ignore[union-attr]
 
 
 @inherit_doc
@@ -930,16 +1012,22 @@ class StreamingLinearRegressionWithSGD(StreamingLinearAlgorithm):
         (default: 0.001)
     """
 
-    def __init__(self, stepSize=0.1, numIterations=50, miniBatchFraction=1.0, convergenceTol=0.001):
+    def __init__(
+        self,
+        stepSize: float = 0.1,
+        numIterations: int = 50,
+        miniBatchFraction: float = 1.0,
+        convergenceTol: float = 0.001,
+    ):
         self.stepSize = stepSize
         self.numIterations = numIterations
         self.miniBatchFraction = miniBatchFraction
         self.convergenceTol = convergenceTol
-        self._model = None
+        self._model: Optional[LinearModel] = None
         super(StreamingLinearRegressionWithSGD, self).__init__(model=self._model)
 
     @since("1.5.0")
-    def setInitialWeights(self, initialWeights):
+    def setInitialWeights(self, initialWeights: "VectorLike") -> "StreamingLinearRegressionWithSGD":
         """
         Set the initial value of weights.
 
@@ -950,27 +1038,28 @@ def setInitialWeights(self, initialWeights):
         return self
 
     @since("1.5.0")
-    def trainOn(self, dstream):
+    def trainOn(self, dstream: "DStream[LabeledPoint]") -> None:
         """Train the model on the incoming dstream."""
         self._validate(dstream)
 
-        def update(rdd):
+        def update(rdd: RDD[LabeledPoint]) -> None:
             # LinearRegressionWithSGD.train raises an error for an empty RDD.
             if not rdd.isEmpty():
+                assert self._model is not None
                 self._model = LinearRegressionWithSGD.train(
                     rdd,
                     self.numIterations,
                     self.stepSize,
                     self.miniBatchFraction,
                     self._model.weights,
-                    intercept=self._model.intercept,
+                    intercept=self._model.intercept,  # type: ignore[arg-type]
                     convergenceTol=self.convergenceTol,
                 )
 
         dstream.foreachRDD(update)
 
 
-def _test():
+def _test() -> None:
     import doctest
     from pyspark.sql import SparkSession
     import pyspark.mllib.regression
diff --git a/python/pyspark/mllib/regression.pyi b/python/pyspark/mllib/regression.pyi
deleted file mode 100644
index 0e5e13a53f811..0000000000000
--- a/python/pyspark/mllib/regression.pyi
+++ /dev/null
@@ -1,149 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import overload
-from typing import Iterable, Optional, Tuple, TypeVar
-from pyspark.rdd import RDD
-from pyspark.mllib._typing import VectorLike
-from pyspark.context import SparkContext
-from pyspark.mllib.linalg import Vector
-from pyspark.mllib.util import Saveable, Loader
-from pyspark.streaming.dstream import DStream
-from numpy import ndarray
-
-K = TypeVar("K")
-
-class LabeledPoint:
-    label: int
-    features: Vector
-    def __init__(self, label: float, features: Iterable[float]) -> None: ...
-    def __reduce__(self) -> Tuple[type, Tuple[bytes]]: ...
-
-class LinearModel:
-    def __init__(self, weights: Vector, intercept: float) -> None: ...
-    @property
-    def weights(self) -> Vector: ...
-    @property
-    def intercept(self) -> float: ...
-
-class LinearRegressionModelBase(LinearModel):
-    @overload
-    def predict(self, x: Vector) -> float: ...
-    @overload
-    def predict(self, x: RDD[Vector]) -> RDD[float]: ...
-
-class LinearRegressionModel(LinearRegressionModelBase):
-    def save(self, sc: SparkContext, path: str) -> None: ...
-    @classmethod
-    def load(cls, sc: SparkContext, path: str) -> LinearRegressionModel: ...
-
-class LinearRegressionWithSGD:
-    @classmethod
-    def train(
-        cls,
-        data: RDD[LabeledPoint],
-        iterations: int = ...,
-        step: float = ...,
-        miniBatchFraction: float = ...,
-        initialWeights: Optional[VectorLike] = ...,
-        regParam: float = ...,
-        regType: Optional[str] = ...,
-        intercept: bool = ...,
-        validateData: bool = ...,
-        convergenceTol: float = ...,
-    ) -> LinearRegressionModel: ...
-
-class LassoModel(LinearRegressionModelBase):
-    def save(self, sc: SparkContext, path: str) -> None: ...
-    @classmethod
-    def load(cls, sc: SparkContext, path: str) -> LassoModel: ...
-
-class LassoWithSGD:
-    @classmethod
-    def train(
-        cls,
-        data: RDD[LabeledPoint],
-        iterations: int = ...,
-        step: float = ...,
-        regParam: float = ...,
-        miniBatchFraction: float = ...,
-        initialWeights: Optional[VectorLike] = ...,
-        intercept: bool = ...,
-        validateData: bool = ...,
-        convergenceTol: float = ...,
-    ) -> LassoModel: ...
-
-class RidgeRegressionModel(LinearRegressionModelBase):
-    def save(self, sc: SparkContext, path: str) -> None: ...
-    @classmethod
-    def load(cls, sc: SparkContext, path: str) -> RidgeRegressionModel: ...
-
-class RidgeRegressionWithSGD:
-    @classmethod
-    def train(
-        cls,
-        data: RDD[LabeledPoint],
-        iterations: int = ...,
-        step: float = ...,
-        regParam: float = ...,
-        miniBatchFraction: float = ...,
-        initialWeights: Optional[VectorLike] = ...,
-        intercept: bool = ...,
-        validateData: bool = ...,
-        convergenceTol: float = ...,
-    ) -> RidgeRegressionModel: ...
-
-class IsotonicRegressionModel(Saveable, Loader[IsotonicRegressionModel]):
-    boundaries: ndarray
-    predictions: ndarray
-    isotonic: bool
-    def __init__(self, boundaries: ndarray, predictions: ndarray, isotonic: bool) -> None: ...
-    @overload
-    def predict(self, x: Vector) -> ndarray: ...
-    @overload
-    def predict(self, x: RDD[Vector]) -> RDD[ndarray]: ...
-    def save(self, sc: SparkContext, path: str) -> None: ...
-    @classmethod
-    def load(cls, sc: SparkContext, path: str) -> IsotonicRegressionModel: ...
-
-class IsotonicRegression:
-    @classmethod
-    def train(cls, data: RDD[VectorLike], isotonic: bool = ...) -> IsotonicRegressionModel: ...
-
-class StreamingLinearAlgorithm:
-    def __init__(self, model: LinearModel) -> None: ...
-    def latestModel(self) -> LinearModel: ...
-    def predictOn(self, dstream: DStream[VectorLike]) -> DStream[float]: ...
-    def predictOnValues(
-        self, dstream: DStream[Tuple[K, VectorLike]]
-    ) -> DStream[Tuple[K, float]]: ...
-
-class StreamingLinearRegressionWithSGD(StreamingLinearAlgorithm):
-    stepSize: float
-    numIterations: int
-    miniBatchFraction: float
-    convergenceTol: float
-    def __init__(
-        self,
-        stepSize: float = ...,
-        numIterations: int = ...,
-        miniBatchFraction: float = ...,
-        convergenceTol: float = ...,
-    ) -> None: ...
-    def setInitialWeights(self, initialWeights: VectorLike) -> StreamingLinearRegressionWithSGD: ...
-    def trainOn(self, dstream: DStream[LabeledPoint]) -> None: ...

From 69bc9d1acd018d9abe1b0fe3071043c6ae0faae9 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Sun, 6 Mar 2022 10:55:55 -0600
Subject: [PATCH 397/513] [SPARK-38239][PYTHON][MLLIB] Fix
 pyspark.mllib.LogisticRegressionModel.__repr__

### What changes were proposed in this pull request?

Adds native implementation of `__repr__` to `pyspark.mllib.classification.LogisticRegressionModel`.  [Documentation example](https://spark.apache.org/docs/latest/mllib-linear-methods.html#logistic-regression) returns the following after this patch:

```python
>>> model
pyspark.mllib.LogisticRegressionModel: intercept = 0.0, numFeatures = 16, numClasses = 2, threshold = 0.5

```

### Why are the changes needed?

Current implementation is copied from `pyspark.ml.classification` counterpart and fails with `AttributeError`.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual testing.

Closes #35554 from zero323/SPARK-38239.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 python/pyspark/mllib/classification.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 300d5650aa3f7..ee7168163bf25 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -318,7 +318,10 @@ def load(cls, sc: SparkContext, path: str) -> "LogisticRegressionModel":
         return model
 
     def __repr__(self) -> str:
-        return self._call_java("toString")  # type: ignore[attr-defined]  # SPARK-38239
+        return (
+            "pyspark.mllib.LogisticRegressionModel: intercept = {}, "
+            "numFeatures = {}, numClasses = {}, threshold = {}"
+        ).format(self._intercept, self._numFeatures, self._numClasses, self._threshold)
 
 
 class LogisticRegressionWithSGD:

From 135841f257fbb008aef211a5e38222940849cb26 Mon Sep 17 00:00:00 2001
From: Cheng Pan <chengpan@apache.org>
Date: Sun, 6 Mar 2022 15:41:20 -0800
Subject: [PATCH 398/513] [SPARK-38411][CORE] Use `UTF-8` when
 `doMergeApplicationListingInternal` reads event logs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

Use UTF-8 instead of system default encoding to read event log

### Why are the changes needed?

After SPARK-29160, we should always use UTF-8 to read event log, otherwise, if Spark History Server run with different default charset than "UTF-8", will encounter such error.

```
2022-03-04 12:16:00,143 [3752440] - INFO  [log-replay-executor-19:Logging57] - Parsing hdfs://hz-cluster11/spark2-history/application_1640597251469_2453817_1.lz4 for listing data...
2022-03-04 12:16:00,145 [3752442] - ERROR [log-replay-executor-18:Logging94] - Exception while merging application listings
java.nio.charset.MalformedInputException: Input length = 1
	at java.nio.charset.CoderResult.throwException(CoderResult.java:281)
	at sun.nio.cs.StreamDecoder.implRead(StreamDecoder.java:339)
	at sun.nio.cs.StreamDecoder.read(StreamDecoder.java:178)
	at java.io.InputStreamReader.read(InputStreamReader.java:184)
	at java.io.BufferedReader.fill(BufferedReader.java:161)
	at java.io.BufferedReader.readLine(BufferedReader.java:324)
	at java.io.BufferedReader.readLine(BufferedReader.java:389)
	at scala.io.BufferedSource$BufferedLineIterator.hasNext(BufferedSource.scala:74)
	at scala.collection.Iterator$$anon$20.hasNext(Iterator.scala:884)
	at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:511)
	at org.apache.spark.scheduler.ReplayListenerBus.replay(ReplayListenerBus.scala:82)
	at org.apache.spark.deploy.history.FsHistoryProvider.$anonfun$doMergeApplicationListing$4(FsHistoryProvider.scala:819)
	at org.apache.spark.deploy.history.FsHistoryProvider.$anonfun$doMergeApplicationListing$4$adapted(FsHistoryProvider.scala:801)
	at org.apache.spark.util.Utils$.tryWithResource(Utils.scala:2626)
	at org.apache.spark.deploy.history.FsHistoryProvider.doMergeApplicationListing(FsHistoryProvider.scala:801)
	at org.apache.spark.deploy.history.FsHistoryProvider.mergeApplicationListing(FsHistoryProvider.scala:715)
	at org.apache.spark.deploy.history.FsHistoryProvider.$anonfun$checkForLogs$15(FsHistoryProvider.scala:581)
	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
```

### Does this PR introduce _any_ user-facing change?

Yes, bug fix.

### How was this patch tested?

Verification steps in ubuntu:20.04

1. build `spark-3.3.0-SNAPSHOT-bin-master.tgz` on commit `34618a7ef6` using `dev/make-distribution.sh --tgz --name master`
2. build `spark-3.3.0-SNAPSHOT-bin-SPARK-38411.tgz` on commit `2a8f56038b` using `dev/make-distribution.sh --tgz --name SPARK-38411`
3. switch to UTF-8 using `export LC_ALL=C.UTF-8 && bash`
4. generate event log contains no-ASCII chars.
    ```
    bin/spark-submit \
        --master local[*] \
        --class org.apache.spark.examples.SparkPi \
        --conf spark.eventLog.enabled=true \
        --conf spark.user.key='计算圆周率' \
        examples/jars/spark-examples_2.12-3.3.0-SNAPSHOT.jar
    ```
5. switch to POSIX using `export LC_ALL=POSIX && bash`
6. run `spark-3.3.0-SNAPSHOT-bin-master/sbin/start-history-server.sh` and watch logs
    <details>

    ```
    Spark Command: /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java -cp /spark-3.3.0-SNAPSHOT-bin-master/conf/:/spark-3.3.0-SNAPSHOT-bin-master/jars/* -Xmx1g org.apache.spark.deploy.history.HistoryServer
    ========================================
    Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
    22/03/06 13:37:19 INFO HistoryServer: Started daemon with process name: 48729c3ffc10aa9
    22/03/06 13:37:19 INFO SignalUtils: Registering signal handler for TERM
    22/03/06 13:37:19 INFO SignalUtils: Registering signal handler for HUP
    22/03/06 13:37:19 INFO SignalUtils: Registering signal handler for INT
    22/03/06 13:37:21 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    22/03/06 13:37:21 INFO SecurityManager: Changing view acls to: root
    22/03/06 13:37:21 INFO SecurityManager: Changing modify acls to: root
    22/03/06 13:37:21 INFO SecurityManager: Changing view acls groups to:
    22/03/06 13:37:21 INFO SecurityManager: Changing modify acls groups to:
    22/03/06 13:37:21 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users  with view permissions: Set(root); groups with view permissions: Set(); users  with modify permissions: Set(root); groups with modify permissions: Set()
    22/03/06 13:37:21 INFO FsHistoryProvider: History server ui acls disabled; users with admin permissions: ; groups with admin permissions:
    22/03/06 13:37:22 INFO Utils: Successfully started service 'HistoryServerUI' on port 18080.
    22/03/06 13:37:23 INFO HistoryServer: Bound HistoryServer to 0.0.0.0, and started at http://29c3ffc10aa9:18080
    22/03/06 13:37:23 INFO FsHistoryProvider: Parsing file:/tmp/spark-events/local-1646573251839 for listing data...
    22/03/06 13:37:25 ERROR FsHistoryProvider: Exception while merging application listings
    java.nio.charset.MalformedInputException: Input length = 1
	    at java.nio.charset.CoderResult.throwException(CoderResult.java:281) ~[?:1.8.0_312]
	    at sun.nio.cs.StreamDecoder.implRead(StreamDecoder.java:339) ~[?:1.8.0_312]
	    at sun.nio.cs.StreamDecoder.read(StreamDecoder.java:178) ~[?:1.8.0_312]
	    at java.io.InputStreamReader.read(InputStreamReader.java:184) ~[?:1.8.0_312]
	    at java.io.BufferedReader.fill(BufferedReader.java:161) ~[?:1.8.0_312]
	    at java.io.BufferedReader.readLine(BufferedReader.java:324) ~[?:1.8.0_312]
	    at java.io.BufferedReader.readLine(BufferedReader.java:389) ~[?:1.8.0_312]
	    at scala.io.BufferedSource$BufferedLineIterator.hasNext(BufferedSource.scala:74) ~[scala-library-2.12.15.jar:?]
	    at scala.collection.Iterator$$anon$20.hasNext(Iterator.scala:886) ~[scala-library-2.12.15.jar:?]
	    at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:513) ~[scala-library-2.12.15.jar:?]
	    at org.apache.spark.scheduler.ReplayListenerBus.replay(ReplayListenerBus.scala:82) ~[spark-core_2.12-3.3.0-SNAPSHOT.jar:3.3.0-SNAPSHOT]
	    at org.apache.spark.deploy.history.FsHistoryProvider.$anonfun$doMergeApplicationListingInternal$4(FsHistoryProvider.scala:830) ~[spark-core_2.12-3.3.0-SNAPSHOT.jar:3.3.0-SNAPSHOT]
	    at org.apache.spark.deploy.history.FsHistoryProvider.$anonfun$doMergeApplicationListingInternal$4$adapted(FsHistoryProvider.scala:812) ~[spark-core_2.12-3.3.0-SNAPSHOT.jar:3.3.0-SNAPSHOT]
	    at org.apache.spark.util.Utils$.tryWithResource(Utils.scala:2738) ~[spark-core_2.12-3.3.0-SNAPSHOT.jar:3.3.0-SNAPSHOT]
	    at org.apache.spark.deploy.history.FsHistoryProvider.doMergeApplicationListingInternal(FsHistoryProvider.scala:812) ~[spark-core_2.12-3.3.0-SNAPSHOT.jar:3.3.0-SNAPSHOT]
	    at org.apache.spark.deploy.history.FsHistoryProvider.doMergeApplicationListing(FsHistoryProvider.scala:758) ~[spark-core_2.12-3.3.0-SNAPSHOT.jar:3.3.0-SNAPSHOT]
	    at org.apache.spark.deploy.history.FsHistoryProvider.mergeApplicationListing(FsHistoryProvider.scala:718) ~[spark-core_2.12-3.3.0-SNAPSHOT.jar:3.3.0-SNAPSHOT]
	    at org.apache.spark.deploy.history.FsHistoryProvider.$anonfun$checkForLogs$15(FsHistoryProvider.scala:584) ~[spark-core_2.12-3.3.0-SNAPSHOT.jar:3.3.0-SNAPSHOT]
	    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [?:1.8.0_312]
	    at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_312]
	    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_312]
	    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_312]
	    at java.lang.Thread.run(Thread.java:748) [?:1.8.0_312]
    ```
    </details>
7. run `spark-3.3.0-SNAPSHOT-bin-master/sbin/stop-history-server.sh`
8. run `spark-3.3.0-SNAPSHOT-bin-SPARK-38411/sbin/stop-history-server.sh` and watch logs
    <details>

    ```
    Spark Command: /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java -cp /spark-3.3.0-SNAPSHOT-bin-SPARK-38411/conf/:/spark-3.3.0-SNAPSHOT-bin-SPARK-38411/jars/* -Xmx1g org.apache.spark.deploy.history.HistoryServer
    ========================================
    Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
    22/03/06 13:30:54 INFO HistoryServer: Started daemon with process name: 34729c3ffc10aa9
    22/03/06 13:30:54 INFO SignalUtils: Registering signal handler for TERM
    22/03/06 13:30:54 INFO SignalUtils: Registering signal handler for HUP
    22/03/06 13:30:54 INFO SignalUtils: Registering signal handler for INT
    22/03/06 13:30:55 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    22/03/06 13:30:56 INFO SecurityManager: Changing view acls to: root
    22/03/06 13:30:56 INFO SecurityManager: Changing modify acls to: root
    22/03/06 13:30:56 INFO SecurityManager: Changing view acls groups to:
    22/03/06 13:30:56 INFO SecurityManager: Changing modify acls groups to:
    22/03/06 13:30:56 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users  with view permissions: Set(root); groups with view permissions: Set(); users  with modify permissions: Set(root); groups with modify permissions: Set()
    22/03/06 13:30:56 INFO FsHistoryProvider: History server ui acls disabled; users with admin permissions: ; groups with admin permissions:
    22/03/06 13:30:57 INFO Utils: Successfully started service 'HistoryServerUI' on port 18080.
    22/03/06 13:30:57 INFO HistoryServer: Bound HistoryServer to 0.0.0.0, and started at http://29c3ffc10aa9:18080
    22/03/06 13:30:57 INFO FsHistoryProvider: Parsing file:/tmp/spark-events/local-1646573251839 for listing data...
    22/03/06 13:30:59 INFO FsHistoryProvider: Finished parsing file:/tmp/spark-events/local-1646573251839
    ```
    </details>

Closes #35730 from pan3793/SPARK-38411.

Authored-by: Cheng Pan <chengpan@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/deploy/history/FsHistoryProvider.scala   | 4 ++--
 .../test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala | 4 ++--
 .../spark/deploy/history/EventLogFileWritersSuite.scala       | 4 ++--
 .../apache/spark/scheduler/EventLoggingListenerSuite.scala    | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index a2494eb52d0ab..a9adaed374af1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -26,7 +26,7 @@ import java.util.zip.ZipOutputStream
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
-import scala.io.Source
+import scala.io.{Codec, Source}
 import scala.util.control.NonFatal
 import scala.xml.Node
 
@@ -819,7 +819,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
           }
         }
 
-        val source = Source.fromInputStream(in).getLines()
+        val source = Source.fromInputStream(in)(Codec.UTF8).getLines()
 
         // Because skipping may leave the stream in the middle of a line, read the next line
         // before replaying.
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index aead72ea0fdb7..c5a72efcb786b 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -23,7 +23,7 @@ import java.nio.charset.StandardCharsets
 import java.nio.file.{Files, Paths}
 
 import scala.collection.mutable.ArrayBuffer
-import scala.io.Source
+import scala.io.{Codec, Source}
 
 import com.google.common.io.ByteStreams
 import org.apache.commons.io.FileUtils
@@ -647,7 +647,7 @@ class SparkSubmitSuite
       runSparkSubmit(args)
       val listStatus = fileSystem.listStatus(testDirPath)
       val logData = EventLogFileReader.openEventLog(listStatus.last.getPath, fileSystem)
-      Source.fromInputStream(logData).getLines().foreach { line =>
+      Source.fromInputStream(logData)(Codec.UTF8).getLines().foreach { line =>
         assert(!line.contains("secret_password"))
       }
     }
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala
index e6dd9ae4224d9..455e2e18b11e1 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala
@@ -21,7 +21,7 @@ import java.io.{File, FileOutputStream, IOException}
 import java.net.URI
 
 import scala.collection.mutable
-import scala.io.Source
+import scala.io.{Codec, Source}
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
@@ -114,7 +114,7 @@ abstract class EventLogFileWritersSuite extends SparkFunSuite with LocalSparkCon
   protected def readLinesFromEventLogFile(log: Path, fs: FileSystem): List[String] = {
     val logDataStream = EventLogFileReader.openEventLog(log, fs)
     try {
-      Source.fromInputStream(logDataStream).getLines().toList
+      Source.fromInputStream(logDataStream)(Codec.UTF8).getLines().toList
     } finally {
       logDataStream.close()
     }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
index b06e83e291c0a..edb2095004f71 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
@@ -23,7 +23,7 @@ import java.util.{Arrays, Properties}
 import scala.collection.immutable.Map
 import scala.collection.mutable
 import scala.collection.mutable.Set
-import scala.io.Source
+import scala.io.{Codec, Source}
 
 import org.apache.hadoop.fs.Path
 import org.json4s.jackson.JsonMethods._
@@ -661,7 +661,7 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
   }
 
   private def readLines(in: InputStream): Seq[String] = {
-    Source.fromInputStream(in).getLines().toSeq
+    Source.fromInputStream(in)(Codec.UTF8).getLines().toSeq
   }
 
   /**

From b6516174a84d849bd620417dca9e0a81e0d3b5dc Mon Sep 17 00:00:00 2001
From: bjornjorgensen <bjornjorgensen@gmail.com>
Date: Mon, 7 Mar 2022 09:00:06 +0900
Subject: [PATCH 399/513] [SPARK-38416][PYTHON][TESTS] Change day to month

### What changes were proposed in this pull request?
Right now we have two functions that are testing the same thing.

### Why are the changes needed?
To test both day and mount

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Got the green light.

Closes #35741 from bjornjorgensen/change-day-to-month.

Authored-by: bjornjorgensen <bjornjorgensen@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/pandas/tests/indexes/test_datetime.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/pandas/tests/indexes/test_datetime.py b/python/pyspark/pandas/tests/indexes/test_datetime.py
index e3bf14e654616..85a2b21901774 100644
--- a/python/pyspark/pandas/tests/indexes/test_datetime.py
+++ b/python/pyspark/pandas/tests/indexes/test_datetime.py
@@ -120,7 +120,7 @@ def test_day_name(self):
 
     def test_month_name(self):
         for psidx, pidx in self.idx_pairs:
-            self.assert_eq(psidx.day_name(), pidx.day_name())
+            self.assert_eq(psidx.month_name(), pidx.month_name())
 
     def test_normalize(self):
         for psidx, pidx in self.idx_pairs:

From 3175d830cb029d41909de8960aa790d4272aa188 Mon Sep 17 00:00:00 2001
From: Steve Loughran <stevel@cloudera.com>
Date: Sun, 6 Mar 2022 19:23:31 -0600
Subject: [PATCH 400/513] [SPARK-38394][BUILD] Upgrade `scala-maven-plugin` to
 4.4.0 for Hadoop 3 profile

### What changes were proposed in this pull request?

This sets scala-maven-plugin.version to 4.4.0 except when the hadoop-2.7
profile is used, because SPARK-36547 shows that only 4.3.0 works there.

### Why are the changes needed?

1. If you try to build against a local snapshot of hadoop trunk with `-Dhadoop.version=3.4.0-SNAPSHOT` the build failes with the error shown in the JIRA.
2. upgrading the scala plugin version fixes this. It is a plugin issue.
3. the version is made configurable so the hadoop 2.7 profile can switch back to the one which works there.

As to why this only surfaces when compiling hadoop trunk, or why hadoop-2.7 requires the new one -who knows. they both look certificate related, which is interesting. maybe something related to signed JARs?

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

by successfully building spark against a local build of hadoop 3.4.0-SNAPSHOT

Closes #35725 from steveloughran/SPARK-38394-compiler-version.

Authored-by: Steve Loughran <stevel@cloudera.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 pom.xml | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 176d3af786adf..8e031675b7c6f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -163,6 +163,10 @@
     <scala.version>2.12.15</scala.version>
     <scala.binary.version>2.12</scala.binary.version>
     <scalatest-maven-plugin.version>2.0.2</scalatest-maven-plugin.version>
+    <!-- SPARK-36547: This needs to be managed in different profiles to avoid
+      errors building different Hadoop versions -->
+
+    <scala-maven-plugin.version>4.4.0</scala-maven-plugin.version>
     <scalafmt.parameters>--test</scalafmt.parameters>
     <!-- for now, not running scalafmt as part of default verify pipeline -->
     <scalafmt.skip>true</scalafmt.skip>
@@ -2775,8 +2779,7 @@
         <plugin>
           <groupId>net.alchim31.maven</groupId>
           <artifactId>scala-maven-plugin</artifactId>
-          <!-- SPARK-36547: Please don't upgrade the version below, otherwise there will be an error on building Hadoop 2.7 package -->
-          <version>4.3.0</version>
+          <version>${scala-maven-plugin.version}</version>
           <executions>
             <execution>
               <id>eclipse-add-source</id>
@@ -3430,6 +3433,7 @@
         <hadoop-client-api.artifact>hadoop-client</hadoop-client-api.artifact>
         <hadoop-client-runtime.artifact>hadoop-yarn-api</hadoop-client-runtime.artifact>
         <hadoop-client-minicluster.artifact>hadoop-client</hadoop-client-minicluster.artifact>
+        <scala-maven-plugin.version>4.3.0</scala-maven-plugin.version>
       </properties>
     </profile>
 

From b99f58a57c880ed9cdec3d37ac8683c31daa4c10 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Sun, 6 Mar 2022 19:26:45 -0600
Subject: [PATCH 401/513] [SPARK-38267][CORE][SQL][SS] Replace pattern matches
 on boolean expressions with conditional statements

### What changes were proposed in this pull request?
This pr uses `conditional statements` to simplify `pattern matches on boolean`:

**Before**

```scala
val bool: Boolean
bool match {
    case true => do something when bool is true
    case false => do something when bool is false
}
```

**After**

```scala
val bool: Boolean
if (bool) {
  do something when bool is true
} else {
  do something when bool is false
}
```

### Why are the changes needed?
Simplify unnecessary pattern match.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35589 from LuciferYang/trivial-match.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 ...kManagerDecommissionIntegrationSuite.scala |  7 +--
 .../expressions/datetimeExpressions.scala     | 50 +++++++++----------
 .../sql/catalyst/parser/AstBuilder.scala      | 14 +++---
 .../internal/ExecutorSideSQLConfSuite.scala   |  7 +--
 .../FlatMapGroupsWithStateSuite.scala         |  7 +--
 5 files changed, 43 insertions(+), 42 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
index 8999a121bcd15..e004c334dee73 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
@@ -165,9 +165,10 @@ class BlockManagerDecommissionIntegrationSuite extends SparkFunSuite with LocalS
       }
       x.map(y => (y, y))
     }
-    val testRdd = shuffle match {
-      case true => baseRdd.reduceByKey(_ + _)
-      case false => baseRdd
+    val testRdd = if (shuffle) {
+      baseRdd.reduceByKey(_ + _)
+    } else {
+      baseRdd
     }
 
     // Listen for the job & block updates
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 8b5a3879e7bc7..d8cf474e65e69 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -2903,25 +2903,25 @@ case class SubtractTimestamps(
   @transient private lazy val zoneIdInEval: ZoneId = zoneIdForType(left.dataType)
 
   @transient
-  private lazy val evalFunc: (Long, Long) => Any = legacyInterval match {
-    case false => (leftMicros, rightMicros) =>
-      subtractTimestamps(leftMicros, rightMicros, zoneIdInEval)
-    case true => (leftMicros, rightMicros) =>
+  private lazy val evalFunc: (Long, Long) => Any = if (legacyInterval) {
+    (leftMicros, rightMicros) =>
       new CalendarInterval(0, 0, leftMicros - rightMicros)
+  } else {
+    (leftMicros, rightMicros) =>
+      subtractTimestamps(leftMicros, rightMicros, zoneIdInEval)
   }
 
   override def nullSafeEval(leftMicros: Any, rightMicros: Any): Any = {
     evalFunc(leftMicros.asInstanceOf[Long], rightMicros.asInstanceOf[Long])
   }
 
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = legacyInterval match {
-    case false =>
-      val zid = ctx.addReferenceObj("zoneId", zoneIdInEval, classOf[ZoneId].getName)
-      val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
-      defineCodeGen(ctx, ev, (l, r) => s"""$dtu.subtractTimestamps($l, $r, $zid)""")
-    case true =>
-      defineCodeGen(ctx, ev, (end, start) =>
-        s"new org.apache.spark.unsafe.types.CalendarInterval(0, 0, $end - $start)")
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = if (legacyInterval) {
+    defineCodeGen(ctx, ev, (end, start) =>
+      s"new org.apache.spark.unsafe.types.CalendarInterval(0, 0, $end - $start)")
+  } else {
+    val zid = ctx.addReferenceObj("zoneId", zoneIdInEval, classOf[ZoneId].getName)
+    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+    defineCodeGen(ctx, ev, (l, r) => s"""$dtu.subtractTimestamps($l, $r, $zid)""")
   }
 
   override def toString: String = s"($left - $right)"
@@ -2961,26 +2961,26 @@ case class SubtractDates(
   }
 
   @transient
-  private lazy val evalFunc: (Int, Int) => Any = legacyInterval match {
-    case false => (leftDays: Int, rightDays: Int) =>
+  private lazy val evalFunc: (Int, Int) => Any = if (legacyInterval) {
+    (leftDays: Int, rightDays: Int) => subtractDates(leftDays, rightDays)
+  } else {
+    (leftDays: Int, rightDays: Int) =>
       Math.multiplyExact(Math.subtractExact(leftDays, rightDays), MICROS_PER_DAY)
-    case true => (leftDays: Int, rightDays: Int) => subtractDates(leftDays, rightDays)
   }
 
   override def nullSafeEval(leftDays: Any, rightDays: Any): Any = {
     evalFunc(leftDays.asInstanceOf[Int], rightDays.asInstanceOf[Int])
   }
 
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = legacyInterval match {
-    case false =>
-      val m = classOf[Math].getName
-      defineCodeGen(ctx, ev, (leftDays, rightDays) =>
-        s"$m.multiplyExact($m.subtractExact($leftDays, $rightDays), ${MICROS_PER_DAY}L)")
-    case true =>
-      defineCodeGen(ctx, ev, (leftDays, rightDays) => {
-        val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
-        s"$dtu.subtractDates($leftDays, $rightDays)"
-      })
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = if (legacyInterval) {
+    defineCodeGen(ctx, ev, (leftDays, rightDays) => {
+      val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+      s"$dtu.subtractDates($leftDays, $rightDays)"
+    })
+  } else {
+    val m = classOf[Math].getName
+    defineCodeGen(ctx, ev, (leftDays, rightDays) =>
+      s"$m.multiplyExact($m.subtractExact($leftDays, $rightDays), ${MICROS_PER_DAY}L)")
   }
 
   override def toString: String = s"($left - $right)"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 2e56df7ba7bf5..64d54861d2988 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2580,11 +2580,10 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
             }
             if (values(i).MINUS() == null) {
               value
+            } else if (value.startsWith("-")) {
+              value.replaceFirst("-", "")
             } else {
-              value.startsWith("-") match {
-                case true => value.replaceFirst("-", "")
-                case false => s"-$value"
-              }
+              s"-$value"
             }
           } else {
             values(i).getText
@@ -2609,11 +2608,10 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
       val value = Option(ctx.intervalValue.STRING).map(string).map { interval =>
         if (ctx.intervalValue().MINUS() == null) {
           interval
+        } else if (interval.startsWith("-")) {
+          interval.replaceFirst("-", "")
         } else {
-          interval.startsWith("-") match {
-            case true => interval.replaceFirst("-", "")
-            case false => s"-$interval"
-          }
+          s"-$interval"
         }
       }.getOrElse {
         throw QueryParsingErrors.invalidFromToUnitValueError(ctx.intervalValue)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
index 057bb34175a29..0d1ab5ef77b64 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
@@ -139,9 +139,10 @@ class ExecutorSideSQLConfSuite extends SparkFunSuite with SQLTestUtils {
           Seq(true)
             .toDF()
             .mapPartitions { _ =>
-              TaskContext.get.getLocalProperty(confKey) == confValue match {
-                case true => Iterator(true)
-                case false => Iterator.empty
+              if (TaskContext.get.getLocalProperty(confKey) == confValue) {
+                Iterator(true)
+              } else {
+                Iterator.empty
               }
             }
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
index d34b2b8e9f7b1..5d3fcd52f592b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
@@ -427,9 +427,10 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
       timeoutConf: GroupStateTimeout,
       procTime: Long,
       watermarkPresent: Boolean): GroupState[Int] = {
-      val eventTimeWatermarkMs = watermarkPresent match {
-        case true => Optional.of(1000L)
-        case false => Optional.empty[Long]
+      val eventTimeWatermarkMs = if (watermarkPresent) {
+        Optional.of(1000L)
+      } else {
+        Optional.empty[Long]
       }
       TestGroupState.create[Int](
         Optional.of(1000), timeoutConf, procTime, eventTimeWatermarkMs, hasTimedOut = false)

From d83ab94dc3591d32976896720030d3beec0fd536 Mon Sep 17 00:00:00 2001
From: stczwd <qcsd2011@163.com>
Date: Sun, 6 Mar 2022 19:41:17 -0600
Subject: [PATCH 402/513] [SPARK-38419][BUILD] Replace tabs that exist in the
 script with spaces

### Why are the changes needed?
There are some tabs in some script, which don't seem to be standardized. This pr tries to replace tabs that exist in the script with spaces.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
UT

Closes #35738 from stczwd/SPARK-38419.

Authored-by: stczwd <qcsd2011@163.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../docker/src/main/dockerfiles/spark/entrypoint.sh  |  4 ++--
 sbin/spark-daemon.sh                                 | 12 ++++++------
 sbin/start-master.sh                                 |  8 ++++----
 sbin/start-mesos-dispatcher.sh                       |  8 ++++----
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
index b3e0d69909ab0..5691011795dcf 100755
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
@@ -30,9 +30,9 @@ set -e
 # If there is no passwd entry for the container UID, attempt to create one
 if [ -z "$uidentry" ] ; then
     if [ -w /etc/passwd ] ; then
-	echo "$myuid:x:$myuid:$mygid:${SPARK_USER_NAME:-anonymous uid}:$SPARK_HOME:/bin/false" >> /etc/passwd
+        echo "$myuid:x:$myuid:$mygid:${SPARK_USER_NAME:-anonymous uid}:$SPARK_HOME:/bin/false" >> /etc/passwd
     else
-	echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
+        echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
     fi
 fi
 
diff --git a/sbin/spark-daemon.sh b/sbin/spark-daemon.sh
index e563f7bff1667..3cfd5acfe2b56 100755
--- a/sbin/spark-daemon.sh
+++ b/sbin/spark-daemon.sh
@@ -86,12 +86,12 @@ spark_rotate_log ()
     fi
 
     if [ -f "$log" ]; then # rotate logs
-	while [ $num -gt 1 ]; do
-	    prev=`expr $num - 1`
-	    [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
-	    num=$prev
-	done
-	mv "$log" "$log.$num";
+        while [ $num -gt 1 ]; do
+            prev=`expr $num - 1`
+            [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
+            num=$prev
+        done
+        mv "$log" "$log.$num";
     fi
 }
 
diff --git a/sbin/start-master.sh b/sbin/start-master.sh
index b6a566e4daf4b..36fe4b4abeb91 100755
--- a/sbin/start-master.sh
+++ b/sbin/start-master.sh
@@ -51,11 +51,11 @@ fi
 if [ "$SPARK_MASTER_HOST" = "" ]; then
   case `uname` in
       (SunOS)
-	  SPARK_MASTER_HOST="`/usr/sbin/check-hostname | awk '{print $NF}'`"
-	  ;;
+          SPARK_MASTER_HOST="`/usr/sbin/check-hostname | awk '{print $NF}'`"
+          ;;
       (*)
-	  SPARK_MASTER_HOST="`hostname -f`"
-	  ;;
+          SPARK_MASTER_HOST="`hostname -f`"
+          ;;
   esac
 fi
 
diff --git a/sbin/start-mesos-dispatcher.sh b/sbin/start-mesos-dispatcher.sh
index ecaad7ad09634..c2e30d8c0b080 100755
--- a/sbin/start-mesos-dispatcher.sh
+++ b/sbin/start-mesos-dispatcher.sh
@@ -36,11 +36,11 @@ fi
 if [ "$SPARK_MESOS_DISPATCHER_HOST" = "" ]; then
   case `uname` in
       (SunOS)
-	  SPARK_MESOS_DISPATCHER_HOST="`/usr/sbin/check-hostname | awk '{print $NF}'`"
-	  ;;
+          SPARK_MESOS_DISPATCHER_HOST="`/usr/sbin/check-hostname | awk '{print $NF}'`"
+          ;;
       (*)
-	  SPARK_MESOS_DISPATCHER_HOST="`hostname -f`"
-	  ;;
+          SPARK_MESOS_DISPATCHER_HOST="`hostname -f`"
+          ;;
   esac
 fi
 

From fc6b5e57c375af3c7f5ffd3d80b4a0216c59fc44 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Sun, 6 Mar 2022 20:49:50 -0800
Subject: [PATCH 403/513] [SPARK-38188][K8S][TESTS][FOLLOWUP] Cleanup resources
 in `afterEach`

### What changes were proposed in this pull request?
Cleanup resources in `afterEach`
- Cleanup pod resources.
- Cleanup Yaml resources.

### Why are the changes needed?
- Test pods with custom group label are not be deleted after each test completed when user specified custom group

### Does this PR introduce _any_ user-facing change?
NO, K8S IT only.

### How was this patch tested?
```
$ build/sbt -Pvolcano -Pkubernetes -Pkubernetes-integration-tests -Dtest.exclude.tags=minikube,r  -Dtest.include.tags=volcano -Dspark.kubernetes.test.namespace=default "kubernetes-integration-tests/test"

[info] VolcanoSuite:
[info] - Run SparkPi with volcano scheduler (13 seconds, 172 milliseconds)
[info] - SPARK-38188: Run SparkPi jobs with 2 queues (only 1 enable) (17 seconds, 409 milliseconds)
[info] - SPARK-38188: Run SparkPi jobs with 2 queues (all enable) (27 seconds, 448 milliseconds)

$ k get pod
No resources found in default namespace.

# Only default queue left
$ k get queue
NAME      AGE
default   25h
```

Closes #35733 from Yikun/SPARK-38188-FOLLOWUP.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../integrationtest/VolcanoTestsSuite.scala   | 61 +++++++++++++++----
 1 file changed, 50 insertions(+), 11 deletions(-)

diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
index 7ffd28b790ceb..f918381a58b26 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
@@ -29,6 +29,7 @@ import scala.concurrent.Future
 import io.fabric8.kubernetes.api.model.Pod
 import io.fabric8.kubernetes.client.NamespacedKubernetesClient
 import io.fabric8.volcano.client.VolcanoClient
+import org.scalatest.BeforeAndAfterEach
 import org.scalatest.concurrent.Eventually
 
 import org.apache.spark.SparkFunSuite
@@ -36,7 +37,7 @@ import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.features.VolcanoFeatureStep
 import org.apache.spark.internal.config.NETWORK_AUTH_ENABLED
 
-private[spark] trait VolcanoTestsSuite { k8sSuite: KubernetesSuite =>
+private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: KubernetesSuite =>
   import VolcanoTestsSuite._
   import org.apache.spark.deploy.k8s.integrationtest.VolcanoSuite.volcanoTag
   import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{k8sTestTag, INTERVAL, TIMEOUT}
@@ -44,6 +45,43 @@ private[spark] trait VolcanoTestsSuite { k8sSuite: KubernetesSuite =>
   lazy val volcanoClient: VolcanoClient
     = kubernetesTestComponents.kubernetesClient.adapt(classOf[VolcanoClient])
   lazy val k8sClient: NamespacedKubernetesClient = kubernetesTestComponents.kubernetesClient
+  private val testGroups: mutable.Set[String] = mutable.Set.empty
+  private val testYAMLPaths: mutable.Set[String] = mutable.Set.empty
+
+  private def deletePodInTestGroup(): Unit = {
+    testGroups.foreach { g =>
+      k8sClient.pods().withLabel("spark-group-locator", g).delete()
+      Eventually.eventually(TIMEOUT, INTERVAL) {
+        assert(k8sClient.pods().withLabel("spark-group-locator", g).list().getItems.isEmpty)
+      }
+    }
+    testGroups.clear()
+  }
+
+  private def deleteYamlResources(): Unit = {
+    testYAMLPaths.foreach { yaml =>
+      deleteYAMLResource(yaml)
+      Eventually.eventually(TIMEOUT, INTERVAL) {
+        val resources = k8sClient.load(new FileInputStream(yaml)).fromServer.get.asScala
+        // Make sure all elements are null (no specific resources in cluster)
+        resources.foreach { r => assert(r === null) }
+      }
+    }
+    testYAMLPaths.clear()
+  }
+
+  override protected def afterEach(): Unit = {
+    deletePodInTestGroup()
+    deleteYamlResources()
+    super.afterEach()
+  }
+
+  protected def generateGroupName(name: String): String = {
+    val groupName = GROUP_PREFIX + name
+    // Append to testGroups
+    testGroups += groupName
+    groupName
+  }
 
   protected def checkScheduler(pod: Pod): Unit = {
     assert(pod.getSpec.getSchedulerName === "volcano")
@@ -67,6 +105,7 @@ private[spark] trait VolcanoTestsSuite { k8sSuite: KubernetesSuite =>
 
   private def createOrReplaceYAMLResource(yamlPath: String): Unit = {
     k8sClient.load(new FileInputStream(yamlPath)).createOrReplace()
+    testYAMLPaths += yamlPath
   }
 
   private def deleteYAMLResource(yamlPath: String): Unit = {
@@ -151,7 +190,7 @@ private[spark] trait VolcanoTestsSuite { k8sSuite: KubernetesSuite =>
     )
   }
 
-  test("SPARK-38188: Run SparkPi jobs with 2 queues (only 1 enable)", k8sTestTag, volcanoTag) {
+  test("SPARK-38188: Run SparkPi jobs with 2 queues (only 1 enabled)", k8sTestTag, volcanoTag) {
     // Disabled queue0 and enabled queue1
     createOrReplaceYAMLResource(VOLCANO_Q0_DISABLE_Q1_ENABLE_YAML)
     // Submit jobs into disabled queue0 and enabled queue1
@@ -159,20 +198,21 @@ private[spark] trait VolcanoTestsSuite { k8sSuite: KubernetesSuite =>
     (1 to jobNum).foreach { i =>
       Future {
         val queueName = s"queue${i % 2}"
-        runJobAndVerify(i.toString, Option(s"$GROUP_PREFIX-$queueName"), Option(queueName))
+        val groupName = generateGroupName(queueName)
+        runJobAndVerify(i.toString, Option(groupName), Option(queueName))
       }
     }
     // There are two `Succeeded` jobs and two `Pending` jobs
     Eventually.eventually(TIMEOUT, INTERVAL) {
-      val completedPods = getPods("driver", s"$GROUP_PREFIX-queue1", "Succeeded")
+      val completedPods = getPods("driver", s"${GROUP_PREFIX}queue1", "Succeeded")
       assert(completedPods.size === 2)
-      val pendingPods = getPods("driver", s"$GROUP_PREFIX-queue0", "Pending")
+      val pendingPods = getPods("driver", s"${GROUP_PREFIX}queue0", "Pending")
       assert(pendingPods.size === 2)
     }
-    deleteYAMLResource(VOLCANO_Q0_DISABLE_Q1_ENABLE_YAML)
   }
 
-  test("SPARK-38188: Run SparkPi jobs with 2 queues (all enable)", k8sTestTag, volcanoTag) {
+  test("SPARK-38188: Run SparkPi jobs with 2 queues (all enabled)", k8sTestTag, volcanoTag) {
+    val groupName = generateGroupName("queue-enable")
     // Enable all queues
     createOrReplaceYAMLResource(VOLCANO_ENABLE_Q0_AND_Q1_YAML)
     val jobNum = 4
@@ -180,15 +220,14 @@ private[spark] trait VolcanoTestsSuite { k8sSuite: KubernetesSuite =>
     (1 to jobNum).foreach { i =>
       Future {
         val queueName = s"queue${i % 2}"
-        runJobAndVerify(i.toString, Option(s"$GROUP_PREFIX"), Option(queueName))
+        runJobAndVerify(i.toString, Option(groupName), Option(queueName))
       }
     }
     // All jobs "Succeeded"
     Eventually.eventually(TIMEOUT, INTERVAL) {
-      val completedPods = getPods("driver", GROUP_PREFIX, "Succeeded")
+      val completedPods = getPods("driver", groupName, "Succeeded")
       assert(completedPods.size === jobNum)
     }
-    deleteYAMLResource(VOLCANO_ENABLE_Q0_AND_Q1_YAML)
   }
 }
 
@@ -200,5 +239,5 @@ private[spark] object VolcanoTestsSuite extends SparkFunSuite {
   val VOLCANO_Q0_DISABLE_Q1_ENABLE_YAML = new File(
     getClass.getResource("/volcano/disable-queue0-enable-queue1.yml").getFile
   ).getAbsolutePath
-  val GROUP_PREFIX = "volcano-test" + UUID.randomUUID().toString.replaceAll("-", "")
+  val GROUP_PREFIX = "volcano-test" + UUID.randomUUID().toString.replaceAll("-", "") + "-"
 }

From 3bbc43d662ccfff6bd93a351fcbf96179289f58f Mon Sep 17 00:00:00 2001
From: William Hyun <william@apache.org>
Date: Sun, 6 Mar 2022 22:04:20 -0800
Subject: [PATCH 404/513] [SPARK-38430][K8S][DOCS] Add `SBT` commands to K8s IT
 README

### What changes were proposed in this pull request?
This PR aims to add SBT commands to K8s IT README.

### Why are the changes needed?
This will introduce new SBT commands to developers.

### Does this PR introduce _any_ user-facing change?
No, this is a dev-only change.

### How was this patch tested?
Manual.

Closes #35745 from williamhyun/sbtdoc.

Authored-by: William Hyun <william@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../kubernetes/integration-tests/README.md    | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md
index edd3bf5f7afe8..2151b7fbb7700 100644
--- a/resource-managers/kubernetes/integration-tests/README.md
+++ b/resource-managers/kubernetes/integration-tests/README.md
@@ -269,3 +269,28 @@ to the wrapper scripts and using the wrapper scripts will simply set these appro
     <td></td>
   </tr>
 </table>
+
+# Running the Kubernetes Integration Tests with SBT
+
+You can use SBT in the same way to build image and run all K8s integration tests except Minikube-only ones.
+
+    build/sbt -Psparkr -Pkubernetes -Pkubernetes-integration-tests \
+        -Dtest.exclude.tags=minikube \
+        -Dspark.kubernetes.test.deployMode=docker-desktop \
+        -Dspark.kubernetes.test.imageTag=2022-03-06 \
+        'kubernetes-integration-tests/test'
+
+The following is an example to rerun tests with the pre-built image.
+
+    build/sbt -Psparkr -Pkubernetes -Pkubernetes-integration-tests \
+        -Dtest.exclude.tags=minikube \
+        -Dspark.kubernetes.test.deployMode=docker-desktop \
+        -Dspark.kubernetes.test.imageTag=2022-03-06 \
+        'kubernetes-integration-tests/runIts'
+
+In addition, you can run a single test selectively.
+
+    build/sbt -Psparkr -Pkubernetes -Pkubernetes-integration-tests \
+        -Dspark.kubernetes.test.deployMode=docker-desktop \
+        -Dspark.kubernetes.test.imageTag=2022-03-06 \
+        'kubernetes-integration-tests/testOnly -- -z "Run SparkPi with a very long application name"'

From f36d1bfba47f6f6ff0f4375a1eb74bb606f8a0b7 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Sun, 6 Mar 2022 23:54:18 -0800
Subject: [PATCH 405/513] [SPARK-38423][K8S] Reuse driver pod's
 `priorityClassName` for `PodGroup`

### What changes were proposed in this pull request?
This patch set podgroup `priorityClassName` to `driver.pod.spec.priorityClassName`.

### Why are the changes needed?
Support priority scheduling with Volcano implementations

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
- New UT to make sure feature step set podgroup priority as expected.
- Add two integration tests:
  - 1. Submit 3 different priority jobs (spark pi) to make sure job completed result as expected.
  - 2. Submit 3 different priority jobs (driver submisson) to make sure job scheduler order as expected.
- All existing UT and IT

Closes #35639 from Yikun/SPARK-38189.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../k8s/features/VolcanoFeatureStep.scala     |   6 +
 .../features/VolcanoFeatureStepSuite.scala    |  30 ++++
 .../test/resources/volcano/disable-queue.yml  |  24 +++
 .../test/resources/volcano/enable-queue.yml   |  24 +++
 .../volcano/high-priority-driver-template.yml |  26 +++
 .../volcano/low-priority-driver-template.yml  |  26 +++
 .../medium-priority-driver-template.yml       |  26 +++
 .../resources/volcano/priorityClasses.yml     |  33 ++++
 .../integrationtest/VolcanoTestsSuite.scala   | 163 ++++++++++++++++--
 9 files changed, 340 insertions(+), 18 deletions(-)
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue.yml
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/volcano/enable-queue.yml
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/volcano/high-priority-driver-template.yml
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/volcano/low-priority-driver-template.yml
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/volcano/medium-priority-driver-template.yml
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/volcano/priorityClasses.yml

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
index c6efe4d1368a8..48303c8c2e37f 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
@@ -32,6 +32,7 @@ private[spark] class VolcanoFeatureStep extends KubernetesDriverCustomFeatureCon
   private lazy val podGroupName = s"${kubernetesConf.appId}-podgroup"
   private lazy val namespace = kubernetesConf.namespace
   private lazy val queue = kubernetesConf.get(KUBERNETES_JOB_QUEUE)
+  private var priorityClassName: Option[String] = None
 
   override def init(config: KubernetesDriverConf): Unit = {
     kubernetesConf = config
@@ -50,10 +51,15 @@ private[spark] class VolcanoFeatureStep extends KubernetesDriverCustomFeatureCon
 
     queue.foreach(podGroup.editOrNewSpec().withQueue(_).endSpec())
 
+    priorityClassName.foreach(podGroup.editOrNewSpec().withPriorityClassName(_).endSpec())
+
     Seq(podGroup.build())
   }
 
   override def configurePod(pod: SparkPod): SparkPod = {
+
+    priorityClassName = Some(pod.pod.getSpec.getPriorityClassName)
+
     val k8sPodBuilder = new PodBuilder(pod.pod)
       .editMetadata()
         .addToAnnotations(POD_GROUP_ANNOTATION, podGroupName)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
index eda1ccc36767e..350df77ed4b3a 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
@@ -16,6 +16,7 @@
  */
 package org.apache.spark.deploy.k8s.features
 
+import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder}
 import io.fabric8.volcano.scheduling.v1beta1.PodGroup
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
@@ -57,4 +58,33 @@ class VolcanoFeatureStepSuite extends SparkFunSuite {
     val annotations = configuredPod.pod.getMetadata.getAnnotations
     assert(annotations.get("scheduling.k8s.io/group-name") === s"${kubernetesConf.appId}-podgroup")
   }
+
+  test("SPARK-38423: Support priorityClassName") {
+    // test null priority
+    val podWithNullPriority = SparkPod.initialPod()
+    assert(podWithNullPriority.pod.getSpec.getPriorityClassName === null)
+    verifyPriority(SparkPod.initialPod())
+    // test normal priority
+    val podWithPriority = SparkPod(
+      new PodBuilder()
+        .withNewMetadata()
+        .endMetadata()
+        .withNewSpec()
+          .withPriorityClassName("priority")
+        .endSpec()
+        .build(),
+      new ContainerBuilder().build())
+    assert(podWithPriority.pod.getSpec.getPriorityClassName === "priority")
+    verifyPriority(podWithPriority)
+  }
+
+  private def verifyPriority(pod: SparkPod): Unit = {
+    val sparkConf = new SparkConf()
+    val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf)
+    val step = new VolcanoFeatureStep()
+    step.init(kubernetesConf)
+    val sparkPod = step.configurePod(pod)
+    val podGroup = step.getAdditionalPreKubernetesResources().head.asInstanceOf[PodGroup]
+    assert(podGroup.getSpec.getPriorityClassName === sparkPod.pod.getSpec.getPriorityClassName)
+  }
 }
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue.yml
new file mode 100644
index 0000000000000..909102d7c90c1
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue.yml
@@ -0,0 +1,24 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: scheduling.volcano.sh/v1beta1
+kind: Queue
+metadata:
+  name: queue
+spec:
+  weight: 0
+  capability:
+    cpu: "1"
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/enable-queue.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/enable-queue.yml
new file mode 100644
index 0000000000000..e753b8c07f01e
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/enable-queue.yml
@@ -0,0 +1,24 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: scheduling.volcano.sh/v1beta1
+kind: Queue
+metadata:
+  name: queue
+spec:
+  weight: 1
+  capability:
+    cpu: "1"
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/high-priority-driver-template.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/high-priority-driver-template.yml
new file mode 100644
index 0000000000000..a7968bfcb2c1a
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/high-priority-driver-template.yml
@@ -0,0 +1,26 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: v1
+Kind: Pod
+metadata:
+  labels:
+    template-label-key: driver-template-label-value
+spec:
+  priorityClassName: high
+  containers:
+    - name: test-driver-container
+      image: will-be-overwritten
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/low-priority-driver-template.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/low-priority-driver-template.yml
new file mode 100644
index 0000000000000..7f04b9e120c83
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/low-priority-driver-template.yml
@@ -0,0 +1,26 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: v1
+Kind: Pod
+metadata:
+  labels:
+    template-label-key: driver-template-label-value
+spec:
+  priorityClassName: low
+  containers:
+    - name: test-driver-container
+      image: will-be-overwritten
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/medium-priority-driver-template.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/medium-priority-driver-template.yml
new file mode 100644
index 0000000000000..78d9295399c2e
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/medium-priority-driver-template.yml
@@ -0,0 +1,26 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: v1
+Kind: Pod
+metadata:
+  labels:
+    template-label-key: driver-template-label-value
+spec:
+  priorityClassName: medium
+  containers:
+    - name: test-driver-container
+      image: will-be-overwritten
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/priorityClasses.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/priorityClasses.yml
new file mode 100644
index 0000000000000..64e9b0d530363
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/priorityClasses.yml
@@ -0,0 +1,33 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: scheduling.k8s.io/v1
+kind: PriorityClass
+metadata:
+  name: high
+value: 100
+---
+apiVersion: scheduling.k8s.io/v1
+kind: PriorityClass
+metadata:
+  name: medium
+value: 50
+---
+apiVersion: scheduling.k8s.io/v1
+kind: PriorityClass
+metadata:
+  name: low
+value: 0
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
index f918381a58b26..803a8d3f194d0 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
@@ -17,6 +17,7 @@
 package org.apache.spark.deploy.k8s.integrationtest
 
 import java.io.{File, FileInputStream}
+import java.time.Instant
 import java.util.UUID
 
 import scala.collection.JavaConverters._
@@ -40,7 +41,8 @@ import org.apache.spark.internal.config.NETWORK_AUTH_ENABLED
 private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: KubernetesSuite =>
   import VolcanoTestsSuite._
   import org.apache.spark.deploy.k8s.integrationtest.VolcanoSuite.volcanoTag
-  import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{k8sTestTag, INTERVAL, TIMEOUT}
+  import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{k8sTestTag, INTERVAL, TIMEOUT,
+    SPARK_DRIVER_MAIN_CLASS}
 
   lazy val volcanoClient: VolcanoClient
     = kubernetesTestComponents.kubernetesClient.adapt(classOf[VolcanoClient])
@@ -95,12 +97,15 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
 
   protected def checkPodGroup(
       pod: Pod,
-      queue: Option[String] = None): Unit = {
+      queue: Option[String] = None,
+      priorityClassName: Option[String] = None): Unit = {
     val appId = pod.getMetadata.getLabels.get("spark-app-selector")
     val podGroupName = s"$appId-podgroup"
     val podGroup = volcanoClient.podGroups().withName(podGroupName).get()
     assert(podGroup.getMetadata.getOwnerReferences.get(0).getName === pod.getMetadata.getName)
     queue.foreach(q => assert(q === podGroup.getSpec.getQueue))
+    priorityClassName.foreach(_ =>
+      assert(pod.getSpec.getPriorityClassName === podGroup.getSpec.getPriorityClassName))
   }
 
   private def createOrReplaceYAMLResource(yamlPath: String): Unit = {
@@ -128,31 +133,73 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
   def runJobAndVerify(
       batchSuffix: String,
       groupLoc: Option[String] = None,
-      queue: Option[String] = None): Unit = {
+      queue: Option[String] = None,
+      driverTemplate: Option[String] = None,
+      isDriverJob: Boolean = false): Unit = {
     val appLoc = s"${appLocator}${batchSuffix}"
     val podName = s"${driverPodName}-${batchSuffix}"
     // create new configuration for every job
-    val conf = createVolcanoSparkConf(podName, appLoc, groupLoc, queue)
-    runSparkPiAndVerifyCompletion(
-      driverPodChecker = (driverPod: Pod) => {
-        checkScheduler(driverPod)
-        checkAnnotaion(driverPod)
-        checkPodGroup(driverPod, queue)
-      },
-      executorPodChecker = (executorPod: Pod) => {
-        checkScheduler(executorPod)
-        checkAnnotaion(executorPod)
-      },
-      customSparkConf = Option(conf),
-      customAppLocator = Option(appLoc)
-    )
+    val conf = createVolcanoSparkConf(podName, appLoc, groupLoc, queue, driverTemplate)
+    if (isDriverJob) {
+      runSparkDriverSubmissionAndVerifyCompletion(
+        driverPodChecker = (driverPod: Pod) => {
+          checkScheduler(driverPod)
+          checkAnnotaion(driverPod)
+          checkPodGroup(driverPod, queue)
+        },
+        customSparkConf = Option(conf),
+        customAppLocator = Option(appLoc)
+      )
+    } else {
+      runSparkPiAndVerifyCompletion(
+        driverPodChecker = (driverPod: Pod) => {
+          checkScheduler(driverPod)
+          checkAnnotaion(driverPod)
+          checkPodGroup(driverPod, queue)
+        },
+        executorPodChecker = (executorPod: Pod) => {
+          checkScheduler(executorPod)
+          checkAnnotaion(executorPod)
+        },
+        customSparkConf = Option(conf),
+        customAppLocator = Option(appLoc)
+      )
+    }
+  }
+
+  protected def runSparkDriverSubmissionAndVerifyCompletion(
+      appResource: String = containerLocalSparkDistroExamplesJar,
+      mainClass: String = SPARK_DRIVER_MAIN_CLASS,
+      driverPodChecker: Pod => Unit = doBasicDriverPodCheck,
+      appArgs: Array[String] = Array("2"),
+      customSparkConf: Option[SparkAppConf] = None,
+      customAppLocator: Option[String] = None): Unit = {
+    val appArguments = SparkAppArguments(
+      mainAppResource = appResource,
+      mainClass = mainClass,
+      appArgs = appArgs)
+    SparkAppLauncher.launch(
+      appArguments,
+      customSparkConf.getOrElse(sparkAppConf),
+      TIMEOUT.value.toSeconds.toInt,
+      sparkHomeDir,
+      true)
+    val driverPod = kubernetesTestComponents.kubernetesClient
+      .pods()
+      .withLabel("spark-app-locator", customAppLocator.getOrElse(appLocator))
+      .withLabel("spark-role", "driver")
+      .list()
+      .getItems
+      .get(0)
+    driverPodChecker(driverPod)
   }
 
   private def createVolcanoSparkConf(
       driverPodName: String = driverPodName,
       appLoc: String = appLocator,
       groupLoc: Option[String] = None,
-      queue: Option[String] = None): SparkAppConf = {
+      queue: Option[String] = None,
+      driverTemplate: Option[String] = None): SparkAppConf = {
     val conf = kubernetesTestComponents.newSparkAppConf()
       .set(CONTAINER_IMAGE.key, image)
       .set(KUBERNETES_DRIVER_POD_NAME.key, driverPodName)
@@ -168,6 +215,7 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
       conf.set(s"${KUBERNETES_DRIVER_LABEL_PREFIX}spark-group-locator", locator)
       conf.set(s"${KUBERNETES_EXECUTOR_LABEL_PREFIX}spark-group-locator", locator)
     }
+    driverTemplate.foreach(conf.set(KUBERNETES_DRIVER_PODTEMPLATE_FILE.key, _))
     conf
   }
 
@@ -229,6 +277,77 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
       assert(completedPods.size === jobNum)
     }
   }
+
+  test("SPARK-38423: Run SparkPi Jobs with priorityClassName", k8sTestTag, volcanoTag) {
+    // Prepare the priority resource
+    createOrReplaceYAMLResource(VOLCANO_PRIORITY_YAML)
+    val priorities = Seq("low", "medium", "high")
+    val groupName = generateGroupName("priority")
+    priorities.foreach { p =>
+      Future {
+        val templatePath = new File(
+          getClass.getResource(s"/volcano/$p-priority-driver-template.yml").getFile
+        ).getAbsolutePath
+        runJobAndVerify(
+          p, groupLoc = Option(groupName),
+          driverTemplate = Option(templatePath)
+        )
+      }
+    }
+    // Make sure all jobs are Succeeded
+    Eventually.eventually(TIMEOUT, INTERVAL) {
+        val pods = getPods(role = "driver", groupName, statusPhase = "Succeeded")
+        assert(pods.size === priorities.size)
+    }
+  }
+
+  test("SPARK-38423: Run driver job to validate priority order", k8sTestTag, volcanoTag) {
+    // Prepare the priority resource and queue
+    createOrReplaceYAMLResource(DISABLE_QUEUE)
+    createOrReplaceYAMLResource(VOLCANO_PRIORITY_YAML)
+    // Submit 3 jobs with different priority
+    val priorities = Seq("low", "medium", "high")
+    priorities.foreach { p =>
+      Future {
+        val templatePath = new File(
+          getClass.getResource(s"/volcano/$p-priority-driver-template.yml").getFile
+        ).getAbsolutePath
+        val groupName = generateGroupName(p)
+        runJobAndVerify(
+          p, groupLoc = Option(groupName),
+          queue = Option("queue"),
+          driverTemplate = Option(templatePath),
+          isDriverJob = true
+        )
+      }
+    }
+    // Make sure 3 jobs are pending
+    Eventually.eventually(TIMEOUT, INTERVAL) {
+      priorities.foreach { p =>
+        val pods = getPods(role = "driver", s"$GROUP_PREFIX$p", statusPhase = "Pending")
+        assert(pods.size === 1)
+      }
+    }
+
+    // Enable queue to let jobs running one by one
+    createOrReplaceYAMLResource(ENABLE_QUEUE)
+
+    // Verify scheduling order follow the specified priority
+    Eventually.eventually(TIMEOUT, INTERVAL) {
+      var m = Map.empty[String, Instant]
+      priorities.foreach { p =>
+        val pods = getPods(role = "driver", s"$GROUP_PREFIX$p", statusPhase = "Succeeded")
+        assert(pods.size === 1)
+        val conditions = pods.head.getStatus.getConditions.asScala
+        val scheduledTime
+          = conditions.filter(_.getType === "PodScheduled").head.getLastTransitionTime
+        m += (p -> Instant.parse(scheduledTime))
+      }
+      // high --> medium --> low
+      assert(m("high").isBefore(m("medium")))
+      assert(m("medium").isBefore(m("low")))
+    }
+  }
 }
 
 private[spark] object VolcanoTestsSuite extends SparkFunSuite {
@@ -240,4 +359,12 @@ private[spark] object VolcanoTestsSuite extends SparkFunSuite {
     getClass.getResource("/volcano/disable-queue0-enable-queue1.yml").getFile
   ).getAbsolutePath
   val GROUP_PREFIX = "volcano-test" + UUID.randomUUID().toString.replaceAll("-", "") + "-"
+  val VOLCANO_PRIORITY_YAML
+    = new File(getClass.getResource("/volcano/priorityClasses.yml").getFile).getAbsolutePath
+  val ENABLE_QUEUE = new File(
+    getClass.getResource("/volcano/enable-queue.yml").getFile
+  ).getAbsolutePath
+  val DISABLE_QUEUE = new File(
+    getClass.getResource("/volcano/disable-queue.yml").getFile
+  ).getAbsolutePath
 }

From 4883a80eb6b4ca12d1c96151365aa3d5af28f1bd Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Mon, 7 Mar 2022 16:01:24 +0800
Subject: [PATCH 406/513] [SPARK-38382][DOC] Fix incorrect version infomation
 of migration guide doc

### What changes were proposed in this pull request?
Unify the presentation of migration guide doc and fix some unclear describtion.

### Why are the changes needed?
Make doc more clear

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Not need

Closes #35705 from AngersZhuuuu/SPARK-38382.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 9d7b109650dbe..578586c96a2fa 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -197,7 +197,7 @@ license: |
 
   - In Spark 3.0.2, `PARTITION(col=null)` is always parsed as a null literal in the partition spec. In Spark 3.0.1 or earlier, it is parsed as a string literal of its text representation, e.g., string "null", if the partition column is string type. To restore the legacy behavior, you can set `spark.sql.legacy.parseNullPartitionSpecAsStringLiteral` as true.
 
-  - In Spark 3.0.0, the output schema of `SHOW DATABASES` becomes `namespace: string`. In Spark version 2.4 and earlier, the schema was `databaseName: string`. Since Spark 3.0.2, you can restore the old schema by setting `spark.sql.legacy.keepCommandOutputSchema` to `true`.
+  - In Spark 3.0.2, the output schema of `SHOW DATABASES` becomes `namespace: string`. In Spark version 3.0.1 and earlier, the schema was `databaseName: string`. Since Spark 3.0.2, you can restore the old schema by setting `spark.sql.legacy.keepCommandOutputSchema` to `true`.
 
 ## Upgrading from Spark SQL 3.0 to 3.0.1
 
@@ -213,7 +213,7 @@ license: |
 
   - In Spark 2.4 and below, `Dataset.groupByKey` results to a grouped dataset with key attribute is wrongly named as "value", if the key is non-struct type, for example, int, string, array, etc. This is counterintuitive and makes the schema of aggregation queries unexpected. For example, the schema of `ds.groupByKey(...).count()` is `(value, count)`. Since Spark 3.0, we name the grouping attribute to "key". The old behavior is preserved under a newly added configuration `spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue` with a default value of `false`.
 
-  - In Spark 3.0, the column metadata will always be propagated in the API `Column.name` and `Column.as`. In Spark version 2.4 and earlier, the metadata of `NamedExpression` is set as the `explicitMetadata` for the new column at the time the API is called, it won't change even if the underlying `NamedExpression` changes metadata. To restore the behavior before Spark 2.4, you can use the API `as(alias: String, metadata: Metadata)` with explicit metadata.
+  - In Spark 3.0, the column metadata will always be propagated in the API `Column.name` and `Column.as`. In Spark version 2.4 and earlier, the metadata of `NamedExpression` is set as the `explicitMetadata` for the new column at the time the API is called, it won't change even if the underlying `NamedExpression` changes metadata. To restore the behavior before Spark 3.0, you can use the API `as(alias: String, metadata: Metadata)` with explicit metadata.
 
 ### DDL Statements
 

From e21cb62d02c85a66771822cdd49c49dbb3e44502 Mon Sep 17 00:00:00 2001
From: Daniel Tenedorio <daniel.tenedorio@databricks.com>
Date: Mon, 7 Mar 2022 17:05:19 +0800
Subject: [PATCH 407/513] [SPARK-38335][SQL] Implement parser support for
 DEFAULT column values

### What changes were proposed in this pull request?

Implement parser changes needed to support for DEFAULT column values as tracked in https://issues.apache.org/jira/browse/SPARK-38334.

Note that these are the parser changes only. Analysis support will take place in a following PR.

Background: in the future, CREATE TABLE and ALTER TABLE invocations will support setting column default values for later operations. Following INSERT, UPDATE, MERGE statements may then reference the value using the DEFAULT keyword as needed.

Examples:
```sql
CREATE TABLE T(a INT, b INT NOT NULL);

-- The default default is NULL
INSERT INTO T VALUES (DEFAULT, 0);
INSERT INTO T(b)  VALUES (1);
SELECT * FROM T;
(NULL, 0)
(NULL, 1)

-- Adding a default to a table with rows, sets the values for the
-- existing rows (exist default) and new rows (current default).
ALTER TABLE T ADD COLUMN c INT DEFAULT 5;
INSERT INTO T VALUES (1, 2, DEFAULT);
SELECT * FROM T;
(NULL, 0, 5)
(NULL, 1, 5)
(1, 2, 5)
```

### Why are the changes needed?

This new API helps users run DDL and DML statements to add new values to tables and scan existing values from tables more easily.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Added unit test coverage in DDLParserSuite.scala

Closes #35690 from dtenedor/default-cols-parsing.

Authored-by: Daniel Tenedorio <daniel.tenedorio@databricks.com>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 docs/sql-ref-ansi-compliance.md               |  1 +
 .../spark/sql/catalyst/parser/SqlBaseLexer.g4 |  1 +
 .../sql/catalyst/parser/SqlBaseParser.g4      | 22 ++++++-
 .../sql/catalyst/parser/AstBuilder.scala      | 61 ++++++++++++++++++-
 .../spark/sql/errors/QueryParsingErrors.scala |  4 ++
 .../sql/catalyst/parser/DDLParserSuite.scala  | 53 ++++++++++++++++
 .../spark/sql/execution/SparkSqlParser.scala  |  2 +-
 7 files changed, 138 insertions(+), 6 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 76462062c210b..5b187192d3c5e 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -396,6 +396,7 @@ Below is a list of all the keywords in Spark SQL.
 |DATE_DIFF|non-reserved|non-reserved|non-reserved|
 |DAY|non-reserved|non-reserved|non-reserved|
 |DBPROPERTIES|non-reserved|non-reserved|non-reserved|
+|DEFAULT|non-reserved|non-reserved|non-reserved|
 |DEFINED|non-reserved|non-reserved|non-reserved|
 |DELETE|non-reserved|non-reserved|reserved|
 |DELIMITED|non-reserved|non-reserved|non-reserved|
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
index 18bb19e9ccf13..a6e1b6530822b 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
@@ -147,6 +147,7 @@ DATE_ADD: 'DATE_ADD';
 DATEDIFF: 'DATEDIFF';
 DATE_DIFF: 'DATE_DIFF';
 DBPROPERTIES: 'DBPROPERTIES';
+DEFAULT: 'DEFAULT';
 DEFINED: 'DEFINED';
 DELETE: 'DELETE';
 DELIMITED: 'DELIMITED';
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 76a00331edf76..b3b834710757c 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -83,7 +83,7 @@ statement
         (RESTRICT | CASCADE)?                                          #dropNamespace
     | SHOW namespaces ((FROM | IN) multipartIdentifier)?
         (LIKE? pattern=STRING)?                                        #showNamespaces
-    | createTableHeader (LEFT_PAREN colTypeList RIGHT_PAREN)? tableProvider?
+    | createTableHeader (LEFT_PAREN createOrReplaceTableColTypeList RIGHT_PAREN)? tableProvider?
         createTableClauses
         (AS? query)?                                                   #createTable
     | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier
@@ -93,7 +93,7 @@ statement
         createFileFormat |
         locationSpec |
         (TBLPROPERTIES tableProps=propertyList))*                      #createTableLike
-    | replaceTableHeader (LEFT_PAREN colTypeList RIGHT_PAREN)? tableProvider?
+    | replaceTableHeader (LEFT_PAREN createOrReplaceTableColTypeList RIGHT_PAREN)? tableProvider?
         createTableClauses
         (AS? query)?                                                   #replaceTable
     | ANALYZE TABLE multipartIdentifier partitionSpec? COMPUTE STATISTICS
@@ -911,7 +911,11 @@ qualifiedColTypeWithPositionList
     ;
 
 qualifiedColTypeWithPosition
-    : name=multipartIdentifier dataType (NOT NULL)? commentSpec? colPosition?
+    : name=multipartIdentifier dataType (NOT NULL)? defaultExpression? commentSpec? colPosition?
+    ;
+
+defaultExpression
+    : DEFAULT expression
     ;
 
 colTypeList
@@ -922,6 +926,14 @@ colType
     : colName=errorCapturingIdentifier dataType (NOT NULL)? commentSpec?
     ;
 
+createOrReplaceTableColTypeList
+    : createOrReplaceTableColType (COMMA createOrReplaceTableColType)*
+    ;
+
+createOrReplaceTableColType
+    : colName=errorCapturingIdentifier dataType (NOT NULL)? defaultExpression? commentSpec?
+    ;
+
 complexColTypeList
     : complexColType (COMMA complexColType)*
     ;
@@ -1028,6 +1040,8 @@ alterColumnAction
     | commentSpec
     | colPosition
     | setOrDrop=(SET | DROP) NOT NULL
+    | SET defaultExpression
+    | dropDefault=DROP DEFAULT
     ;
 
 
@@ -1086,6 +1100,7 @@ ansiNonReserved
     | DATE_DIFF
     | DAY
     | DBPROPERTIES
+    | DEFAULT
     | DEFINED
     | DELETE
     | DELIMITED
@@ -1338,6 +1353,7 @@ nonReserved
     | DATE_DIFF
     | DAY
     | DBPROPERTIES
+    | DEFAULT
     | DEFINED
     | DELETE
     | DELIMITED
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 64d54861d2988..4619a3f9be280 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2728,6 +2728,13 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     StructType(Option(ctx).toSeq.flatMap(visitColTypeList))
   }
 
+  /**
+   * Create top level table schema.
+   */
+  protected def createSchema(ctx: CreateOrReplaceTableColTypeListContext): StructType = {
+    StructType(Option(ctx).toSeq.flatMap(visitCreateOrReplaceTableColTypeList))
+  }
+
   /**
    * Create a [[StructType]] from a number of column definitions.
    */
@@ -2754,6 +2761,41 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
       metadata = builder.build())
   }
 
+  /**
+   * Create a [[StructType]] from a number of CREATE TABLE column definitions.
+   */
+  override def visitCreateOrReplaceTableColTypeList(
+      ctx: CreateOrReplaceTableColTypeListContext): Seq[StructField] = withOrigin(ctx) {
+    ctx.createOrReplaceTableColType().asScala.map(visitCreateOrReplaceTableColType).toSeq
+  }
+
+  /**
+   * Create a top level [[StructField]] from a CREATE TABLE column definition.
+   */
+  override def visitCreateOrReplaceTableColType(
+      ctx: CreateOrReplaceTableColTypeContext): StructField = withOrigin(ctx) {
+    import ctx._
+
+    val builder = new MetadataBuilder
+    // Add comment to metadata
+    Option(commentSpec()).map(visitCommentSpec).foreach {
+      builder.putString("comment", _)
+    }
+
+    // Process the 'DEFAULT expression' clause in the column definition, if any.
+    val name: String = colName.getText
+    val defaultExpr = Option(ctx.defaultExpression()).map(visitDefaultExpression)
+    if (defaultExpr.isDefined) {
+      throw QueryParsingErrors.defaultColumnNotImplementedYetError(ctx)
+    }
+
+    StructField(
+      name = name,
+      dataType = typedVisit[DataType](ctx.dataType),
+      nullable = NULL == null,
+      metadata = builder.build())
+  }
+
   /**
    * Create a [[StructType]] from a sequence of [[StructField]]s.
    */
@@ -3457,7 +3499,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
   override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = withOrigin(ctx) {
     val (table, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader)
 
-    val columns = Option(ctx.colTypeList()).map(visitColTypeList).getOrElse(Nil)
+    val columns = Option(ctx.createOrReplaceTableColTypeList())
+      .map(visitCreateOrReplaceTableColTypeList).getOrElse(Nil)
     val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText)
     val (partTransforms, partCols, bucketSpec, properties, options, location, comment, serdeInfo) =
       visitCreateTableClauses(ctx.createTableClauses())
@@ -3536,7 +3579,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     val orCreate = ctx.replaceTableHeader().CREATE() != null
     val (partTransforms, partCols, bucketSpec, properties, options, location, comment, serdeInfo) =
       visitCreateTableClauses(ctx.createTableClauses())
-    val columns = Option(ctx.colTypeList()).map(visitColTypeList).getOrElse(Nil)
+    val columns = Option(ctx.createOrReplaceTableColTypeList())
+      .map(visitCreateOrReplaceTableColTypeList).getOrElse(Nil)
     val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText)
 
     if (provider.isDefined && serdeInfo.isDefined) {
@@ -3655,6 +3699,10 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
   override def visitQualifiedColTypeWithPosition(
       ctx: QualifiedColTypeWithPositionContext): QualifiedColType = withOrigin(ctx) {
     val name = typedVisit[Seq[String]](ctx.name)
+    val defaultExpr = Option(ctx.defaultExpression()).map(visitDefaultExpression)
+    if (defaultExpr.isDefined) {
+      throw QueryParsingErrors.defaultColumnNotImplementedYetError(ctx)
+    }
     QualifiedColType(
       path = if (name.length > 1) Some(UnresolvedFieldName(name.init)) else None,
       colName = name.last,
@@ -3743,6 +3791,12 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     } else {
       None
     }
+    if (action.defaultExpression != null) {
+      throw QueryParsingErrors.defaultColumnNotImplementedYetError(ctx)
+    }
+    if (action.dropDefault != null) {
+      throw QueryParsingErrors.defaultColumnNotImplementedYetError(ctx)
+    }
 
     assert(Seq(dataType, nullable, comment, position).count(_.nonEmpty) == 1)
 
@@ -3811,6 +3865,9 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
           throw QueryParsingErrors.operationInHiveStyleCommandUnsupportedError(
             "Replacing with a nested column", "REPLACE COLUMNS", ctx)
         }
+        if (Option(colType.defaultExpression()).map(visitDefaultExpression).isDefined) {
+          throw QueryParsingErrors.defaultColumnNotImplementedYetError(ctx)
+        }
         col
       }.toSeq
     )
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index 6d7ed7be6760e..96bcc181a2434 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -436,4 +436,8 @@ object QueryParsingErrors {
     new ParseException(
       s"DROP TEMPORARY FUNCTION requires a single part name but got: ${name.quoted}", ctx)
   }
+
+  def defaultColumnNotImplementedYetError(ctx: ParserRuleContext): Throwable = {
+    new ParseException("Support for DEFAULT column values is not implemented yet", ctx)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 956b70a6e0351..ebd9ac89d5fd5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -2235,4 +2235,57 @@ class DDLParserSuite extends AnalysisTest {
     comparePlans(parsePlan(timestampTypeSql), insertPartitionPlan(timestamp))
     comparePlans(parsePlan(binaryTypeSql), insertPartitionPlan(binaryStr))
   }
+
+  test("SPARK-38335: Implement parser support for DEFAULT values for columns in tables") {
+    // The following commands will support DEFAULT columns, but this has not been implemented yet.
+    for (sql <- Seq(
+      "ALTER TABLE t1 ADD COLUMN x int NOT NULL DEFAULT 42",
+      "ALTER TABLE t1 ALTER COLUMN a.b.c SET DEFAULT 42",
+      "ALTER TABLE t1 ALTER COLUMN a.b.c DROP DEFAULT",
+      "ALTER TABLE t1 REPLACE COLUMNS (x STRING DEFAULT 42)",
+      "CREATE TABLE my_tab(a INT COMMENT 'test', b STRING NOT NULL DEFAULT \"abc\") USING parquet",
+      "REPLACE TABLE my_tab(a INT COMMENT 'test', b STRING NOT NULL DEFAULT \"xyz\") USING parquet"
+    )) {
+      val exc = intercept[ParseException] {
+        parsePlan(sql);
+      }
+      assert(exc.getMessage.contains("Support for DEFAULT column values is not implemented yet"));
+    }
+    // In each of the following cases, the DEFAULT reference parses as an unresolved attribute
+    // reference. We can handle these cases after the parsing stage, at later phases of analysis.
+    comparePlans(parsePlan("VALUES (1, 2, DEFAULT) AS val"),
+      SubqueryAlias("val",
+        UnresolvedInlineTable(Seq("col1", "col2", "col3"), Seq(Seq(Literal(1), Literal(2),
+          UnresolvedAttribute("DEFAULT"))))))
+    comparePlans(parsePlan(
+      "INSERT INTO t PARTITION(part = date'2019-01-02') VALUES ('a', DEFAULT)"),
+      InsertIntoStatement(
+        UnresolvedRelation(Seq("t")),
+        Map("part" -> Some("2019-01-02")),
+        userSpecifiedCols = Seq.empty[String],
+        query = UnresolvedInlineTable(Seq("col1", "col2"), Seq(Seq(Literal("a"),
+          UnresolvedAttribute("DEFAULT")))),
+        overwrite = false, ifPartitionNotExists = false))
+    parseCompare(
+      """
+        |MERGE INTO testcat1.ns1.ns2.tbl AS target
+        |USING testcat2.ns1.ns2.tbl AS source
+        |ON target.col1 = source.col1
+        |WHEN MATCHED AND (target.col2='delete') THEN DELETE
+        |WHEN MATCHED AND (target.col2='update') THEN UPDATE SET target.col2 = DEFAULT
+        |WHEN NOT MATCHED AND (target.col2='insert')
+        |THEN INSERT (target.col1, target.col2) VALUES (source.col1, DEFAULT)
+      """.stripMargin,
+      MergeIntoTable(
+        SubqueryAlias("target", UnresolvedRelation(Seq("testcat1", "ns1", "ns2", "tbl"))),
+        SubqueryAlias("source", UnresolvedRelation(Seq("testcat2", "ns1", "ns2", "tbl"))),
+        EqualTo(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
+        Seq(DeleteAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("delete")))),
+          UpdateAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("update"))),
+            Seq(Assignment(UnresolvedAttribute("target.col2"),
+              UnresolvedAttribute("DEFAULT"))))),
+        Seq(InsertAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("insert"))),
+          Seq(Assignment(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
+            Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("DEFAULT")))))))
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index fed02dddecf78..a4e72e04507b5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -318,7 +318,7 @@ class SparkSqlAstBuilder extends AstBuilder {
       val (_, _, _, _, options, location, _, _) = visitCreateTableClauses(ctx.createTableClauses())
       val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText).getOrElse(
         throw QueryParsingErrors.createTempTableNotSpecifyProviderError(ctx))
-      val schema = Option(ctx.colTypeList()).map(createSchema)
+      val schema = Option(ctx.createOrReplaceTableColTypeList()).map(createSchema)
 
       logWarning(s"CREATE TEMPORARY TABLE ... USING ... is deprecated, please use " +
           "CREATE TEMPORARY VIEW ... USING ... instead")

From c1e5e8a27595c711c53bb3571ea391ef700d531a Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Mon, 7 Mar 2022 20:09:11 +0800
Subject: [PATCH 408/513] [SPARK-38407][SQL] ANSI Cast: loosen the limitation
 of casting non-null complex types

### What changes were proposed in this pull request?

When ANSI mode is off, `ArrayType(DoubleType, containsNull = false)` can't cast as `ArrayType(IntegerType, containsNull = false)` since there can be overflow thus result in null results and breaks the non-null constraint.

When ANSI mode is on, currently Spark SQL has the same behavior. However, this is not correct since the non-null constraint won't be break. Spark SQL can just execute the cast and throw runtime error on overflow, just like casting DoubleType as IntegerType.

This applies to MapType and StructType as well.  This PR is to loosen the limitation of casting non-null Array/Map/Struct types.

### Why are the changes needed?

For ANSI mode compliance
<img width="559" alt="image" src="https://user-images.githubusercontent.com/1097932/156600154-24e3817b-ba22-45ce-a16f-faf7461af73e.png">

### Does this PR introduce _any_ user-facing change?

Yes, for Cast under ANSI mode or table insertion, a complex type which don't contain null can be cast as another non-null complex type as long as the element types are castable. Before changes, this is only allowed when the source element type can be upcast to the target element type.

### How was this patch tested?

UT

Closes #35724 from gengliangwang/canCast.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../spark/sql/catalyst/expressions/Cast.scala |  14 +-
 .../sql/catalyst/expressions/TryCast.scala    |  30 ++++-
 .../expressions/AnsiCastSuiteBase.scala       | 122 ++++++++++++++++++
 .../sql/catalyst/expressions/CastSuite.scala  |  55 ++++++++
 .../catalyst/expressions/CastSuiteBase.scala  |  52 --------
 5 files changed, 209 insertions(+), 64 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index ef054e77707ac..39463ed122b6e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -23,7 +23,7 @@ import java.util.concurrent.TimeUnit._
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion}
-import org.apache.spark.sql.catalyst.expressions.Cast.{forceNullable, resolvableNullability}
+import org.apache.spark.sql.catalyst.expressions.Cast.resolvableNullability
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
@@ -2226,23 +2226,17 @@ object AnsiCast {
     case (TimestampType, _: NumericType) => true
 
     case (ArrayType(fromType, fn), ArrayType(toType, tn)) =>
-      canCast(fromType, toType) &&
-        resolvableNullability(fn || forceNullable(fromType, toType), tn)
+      canCast(fromType, toType) && resolvableNullability(fn, tn)
 
     case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) =>
-      canCast(fromKey, toKey) &&
-        (!forceNullable(fromKey, toKey)) &&
-        canCast(fromValue, toValue) &&
-        resolvableNullability(fn || forceNullable(fromValue, toValue), tn)
+      canCast(fromKey, toKey) && canCast(fromValue, toValue) && resolvableNullability(fn, tn)
 
     case (StructType(fromFields), StructType(toFields)) =>
       fromFields.length == toFields.length &&
         fromFields.zip(toFields).forall {
           case (fromField, toField) =>
             canCast(fromField.dataType, toField.dataType) &&
-              resolvableNullability(
-                fromField.nullable || forceNullable(fromField.dataType, toField.dataType),
-                toField.nullable)
+              resolvableNullability(fromField.nullable, toField.nullable)
         }
 
     case (udt1: UserDefinedType[_], udt2: UserDefinedType[_]) if udt2.acceptsType(udt1) => true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryCast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryCast.scala
index 0f63de1bf7e45..f43a80bf997a7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryCast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryCast.scala
@@ -17,9 +17,10 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import org.apache.spark.sql.catalyst.expressions.Cast.{forceNullable, resolvableNullability}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType}
 
 /**
  * A special version of [[AnsiCast]]. It performs the same operation (i.e. converts a value of
@@ -56,7 +57,32 @@ case class TryCast(child: Expression, dataType: DataType, timeZoneId: Option[Str
 
   override def nullable: Boolean = true
 
-  override def canCast(from: DataType, to: DataType): Boolean = AnsiCast.canCast(from, to)
+  // If the target data type is a complex type which can't have Null values, we should guarantee
+  // that the casting between the element types won't produce Null results.
+  override def canCast(from: DataType, to: DataType): Boolean = (from, to) match {
+    case (ArrayType(fromType, fn), ArrayType(toType, tn)) =>
+      canCast(fromType, toType) &&
+        resolvableNullability(fn || forceNullable(fromType, toType), tn)
+
+    case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) =>
+      canCast(fromKey, toKey) &&
+        (!forceNullable(fromKey, toKey)) &&
+        canCast(fromValue, toValue) &&
+        resolvableNullability(fn || forceNullable(fromValue, toValue), tn)
+
+    case (StructType(fromFields), StructType(toFields)) =>
+      fromFields.length == toFields.length &&
+        fromFields.zip(toFields).forall {
+          case (fromField, toField) =>
+            canCast(fromField.dataType, toField.dataType) &&
+              resolvableNullability(
+                fromField.nullable || forceNullable(fromField.dataType, toField.dataType),
+                toField.nullable)
+        }
+
+    case _ =>
+      AnsiCast.canCast(from, to)
+  }
 
   override def cast(from: DataType, to: DataType): Any => Any = (input: Any) =>
     try {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala
index 6338be1a2eb54..7fb04fe8b7f76 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala
@@ -21,6 +21,7 @@ import java.sql.Timestamp
 import java.time.DateTimeException
 
 import org.apache.spark.SparkArithmeticException
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_SECOND
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
@@ -315,6 +316,28 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
       assert(ret.resolved)
       checkCastToBooleanError(array_notNull, to, Seq(null, true, false))
     }
+
+    {
+      val ret = cast(array_notNull, ArrayType(BooleanType, containsNull = false))
+      assert(ret.resolved == !isTryCast)
+      if (!isTryCast) {
+        checkExceptionInExpression[UnsupportedOperationException](
+          ret, "invalid input syntax for type boolean")
+      }
+    }
+  }
+
+  test("cast from array III") {
+    if (!isTryCast) {
+      val from: DataType = ArrayType(DoubleType, containsNull = false)
+      val array = Literal.create(Seq(1.0, 2.0), from)
+      val to: DataType = ArrayType(IntegerType, containsNull = false)
+      val answer = Literal.create(Seq(1, 2), to).value
+      checkEvaluation(cast(array, to), answer)
+
+      val overflowArray = Literal.create(Seq(Int.MaxValue + 1.0D), from)
+      checkExceptionInExpression[ArithmeticException](cast(overflowArray, to), "overflow")
+    }
   }
 
   test("cast from map II") {
@@ -340,6 +363,49 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
       assert(ret.resolved)
       checkCastToBooleanError(map_notNull, to, Map("a" -> null, "b" -> true, "c" -> false))
     }
+
+    {
+      val ret = cast(map, MapType(IntegerType, StringType, valueContainsNull = true))
+      assert(ret.resolved == !isTryCast)
+      if (!isTryCast) {
+        checkExceptionInExpression[NumberFormatException](
+          ret, "invalid input syntax for type numeric")
+      }
+    }
+
+    {
+      val ret = cast(map_notNull, MapType(StringType, BooleanType, valueContainsNull = false))
+      assert(ret.resolved == !isTryCast)
+      if (!isTryCast) {
+        checkExceptionInExpression[UnsupportedOperationException](
+          ret, "invalid input syntax for type boolean")
+      }
+    }
+
+    {
+      val ret = cast(map_notNull, MapType(IntegerType, StringType, valueContainsNull = true))
+      assert(ret.resolved == !isTryCast)
+      if (!isTryCast) {
+        checkExceptionInExpression[NumberFormatException](
+          ret, "invalid input syntax for type numeric")
+      }
+    }
+  }
+
+  test("cast from map III") {
+    if (!isTryCast) {
+      val from: DataType = MapType(DoubleType, DoubleType, valueContainsNull = false)
+      val map = Literal.create(Map(1.0 -> 2.0), from)
+      val to: DataType = MapType(IntegerType, IntegerType, valueContainsNull = false)
+      val answer = Literal.create(Map(1 -> 2), to).value
+      checkEvaluation(cast(map, to), answer)
+
+      Seq(
+        Literal.create(Map((Int.MaxValue + 1.0) -> 2.0), from),
+        Literal.create(Map(1.0 -> (Int.MinValue - 1.0)), from)).foreach { overflowMap =>
+        checkExceptionInExpression[ArithmeticException](cast(overflowMap, to), "overflow")
+      }
+    }
   }
 
   test("cast from struct II") {
@@ -392,6 +458,62 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
       assert(ret.resolved)
       checkCastToBooleanError(struct_notNull, to, InternalRow(null, true, false))
     }
+
+    {
+      val ret = cast(struct_notNull, StructType(Seq(
+        StructField("a", BooleanType, nullable = true),
+        StructField("b", BooleanType, nullable = true),
+        StructField("c", BooleanType, nullable = false))))
+      assert(ret.resolved == !isTryCast)
+      if (!isTryCast) {
+        checkExceptionInExpression[UnsupportedOperationException](
+          ret, "invalid input syntax for type boolean")
+      }
+    }
+  }
+
+  test("cast from struct III") {
+    if (!isTryCast) {
+      val from: DataType = StructType(Seq(StructField("a", DoubleType, nullable = false)))
+      val struct = Literal.create(InternalRow(1.0), from)
+      val to: DataType = StructType(Seq(StructField("a", IntegerType, nullable = false)))
+      val answer = Literal.create(InternalRow(1), to).value
+      checkEvaluation(cast(struct, to), answer)
+
+      val overflowStruct = Literal.create(InternalRow(Int.MaxValue + 1.0), from)
+      checkExceptionInExpression[ArithmeticException](cast(overflowStruct, to), "overflow")
+    }
+  }
+
+  test("complex casting") {
+    val complex = Literal.create(
+      Row(
+        Seq("123", "true", "f"),
+        Map("a" -> "123", "b" -> "true", "c" -> "f"),
+        Row(0)),
+      StructType(Seq(
+        StructField("a",
+          ArrayType(StringType, containsNull = false), nullable = true),
+        StructField("m",
+          MapType(StringType, StringType, valueContainsNull = false), nullable = true),
+        StructField("s",
+          StructType(Seq(
+            StructField("i", IntegerType, nullable = true)))))))
+
+    val ret = cast(complex, StructType(Seq(
+      StructField("a",
+        ArrayType(IntegerType, containsNull = true), nullable = true),
+      StructField("m",
+        MapType(StringType, BooleanType, valueContainsNull = false), nullable = true),
+      StructField("s",
+        StructType(Seq(
+          StructField("l", LongType, nullable = true)))))))
+
+    assert(ret.resolved === !isTryCast)
+    if (!isTryCast) {
+      checkExceptionInExpression[NumberFormatException](
+        ret, "invalid input syntax for type numeric")
+    }
   }
 
   test("ANSI mode: cast string to timestamp with parse error") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 52e3cb0baf73d..ca110502c6b3a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -242,6 +242,11 @@ class CastSuite extends CastSuiteBase {
       assert(ret.resolved)
       checkEvaluation(ret, Seq(null, true, false))
     }
+
+    {
+      val ret = cast(array_notNull, ArrayType(BooleanType, containsNull = false))
+      assert(ret.resolved === false)
+    }
   }
 
   test("cast from map II") {
@@ -263,6 +268,21 @@ class CastSuite extends CastSuiteBase {
       assert(ret.resolved)
       checkEvaluation(ret, Map("a" -> null, "b" -> true, "c" -> false))
     }
+
+    {
+      val ret = cast(map, MapType(IntegerType, StringType, valueContainsNull = true))
+      assert(ret.resolved === false)
+    }
+
+    {
+      val ret = cast(map_notNull, MapType(StringType, BooleanType, valueContainsNull = false))
+      assert(ret.resolved === false)
+    }
+
+    {
+      val ret = cast(map_notNull, MapType(IntegerType, StringType, valueContainsNull = true))
+      assert(ret.resolved === false)
+    }
   }
 
   test("cast from struct II") {
@@ -313,6 +333,41 @@ class CastSuite extends CastSuiteBase {
       assert(ret.resolved)
       checkEvaluation(ret, InternalRow(null, true, false))
     }
+
+    {
+      val ret = cast(struct_notNull, StructType(Seq(
+        StructField("a", BooleanType, nullable = true),
+        StructField("b", BooleanType, nullable = true),
+        StructField("c", BooleanType, nullable = false))))
+      assert(ret.resolved === false)
+    }
+  }
+
+  test("complex casting") {
+    val complex = Literal.create(
+      Row(
+        Seq("123", "true", "f"),
+        Map("a" -> "123", "b" -> "true", "c" -> "f"),
+        Row(0)),
+      StructType(Seq(
+        StructField("a",
+          ArrayType(StringType, containsNull = false), nullable = true),
+        StructField("m",
+          MapType(StringType, StringType, valueContainsNull = false), nullable = true),
+        StructField("s",
+          StructType(Seq(
+            StructField("i", IntegerType, nullable = true)))))))
+
+    val ret = cast(complex, StructType(Seq(
+      StructField("a",
+        ArrayType(IntegerType, containsNull = true), nullable = true),
+      StructField("m",
+        MapType(StringType, BooleanType, valueContainsNull = false), nullable = true),
+      StructField("s",
+        StructType(Seq(
+          StructField("l", LongType, nullable = true)))))))
+
+    assert(ret.resolved === false)
   }
 
   test("SPARK-31227: Non-nullable null type should not coerce to nullable type") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
index 54497f1b21edb..ba8ab708046d1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
@@ -427,11 +427,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
       assert(ret.resolved === false)
     }
 
-    {
-      val ret = cast(array_notNull, ArrayType(BooleanType, containsNull = false))
-      assert(ret.resolved === false)
-    }
-
     {
       val ret = cast(array, IntegerType)
       assert(ret.resolved === false)
@@ -452,18 +447,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
       val ret = cast(map, MapType(StringType, BooleanType, valueContainsNull = false))
       assert(ret.resolved === false)
     }
-    {
-      val ret = cast(map, MapType(IntegerType, StringType, valueContainsNull = true))
-      assert(ret.resolved === false)
-    }
-    {
-      val ret = cast(map_notNull, MapType(StringType, BooleanType, valueContainsNull = false))
-      assert(ret.resolved === false)
-    }
-    {
-      val ret = cast(map_notNull, MapType(IntegerType, StringType, valueContainsNull = true))
-      assert(ret.resolved === false)
-    }
 
     {
       val ret = cast(map, IntegerType)
@@ -510,14 +493,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
       assert(ret.resolved === false)
     }
 
-    {
-      val ret = cast(struct_notNull, StructType(Seq(
-        StructField("a", BooleanType, nullable = true),
-        StructField("b", BooleanType, nullable = true),
-        StructField("c", BooleanType, nullable = false))))
-      assert(ret.resolved === false)
-    }
-
     {
       val ret = cast(struct, StructType(Seq(
         StructField("a", StringType, nullable = true),
@@ -541,33 +516,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(cast(inp, targetSchema), expected)
   }
 
-  test("complex casting") {
-    val complex = Literal.create(
-      Row(
-        Seq("123", "true", "f"),
-        Map("a" -> "123", "b" -> "true", "c" -> "f"),
-        Row(0)),
-      StructType(Seq(
-        StructField("a",
-          ArrayType(StringType, containsNull = false), nullable = true),
-        StructField("m",
-          MapType(StringType, StringType, valueContainsNull = false), nullable = true),
-        StructField("s",
-          StructType(Seq(
-            StructField("i", IntegerType, nullable = true)))))))
-
-    val ret = cast(complex, StructType(Seq(
-      StructField("a",
-        ArrayType(IntegerType, containsNull = true), nullable = true),
-      StructField("m",
-        MapType(StringType, BooleanType, valueContainsNull = false), nullable = true),
-      StructField("s",
-        StructType(Seq(
-          StructField("l", LongType, nullable = true)))))))
-
-    assert(ret.resolved === false)
-  }
-
   test("cast between string and interval") {
     import org.apache.spark.unsafe.types.CalendarInterval
 

From 1b31b7cfc28ab33cc9039a379cff595b94bb1a67 Mon Sep 17 00:00:00 2001
From: Tengfei Huang <tengfei.h@gmail.com>
Date: Mon, 7 Mar 2022 22:16:59 +0800
Subject: [PATCH 409/513] [SPARK-38434][SQL] Correct semantic of
 CheckAnalysis.getDataTypesAreCompatibleFn method

### What changes were proposed in this pull request?
Modify the return value of method `CheckAnalysis.getDataTypesAreCompatibleFn`:
https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala#L606

Return true if data types are compatible, otherwise return false.

### Why are the changes needed?
Avoid the confusing of method definition.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing tests.

Closes #35752 from ivoson/union-type-mismatch.

Authored-by: Tengfei Huang <tengfei.h@gmail.com>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../spark/sql/catalyst/analysis/CheckAnalysis.scala       | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 5c639d102688a..731924813b694 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -435,7 +435,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
               // Check if the data types match.
               dataTypes(child).zip(ref).zipWithIndex.foreach { case ((dt1, dt2), ci) =>
                 // SPARK-18058: we shall not care about the nullability of columns
-                if (dataTypesAreCompatibleFn(dt1, dt2)) {
+                if (!dataTypesAreCompatibleFn(dt1, dt2)) {
                   val errorMessage =
                     s"""
                        |${operator.nodeName} can only be performed on tables with the compatible
@@ -607,11 +607,11 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
     val isUnion = plan.isInstanceOf[Union]
     if (isUnion) {
       (dt1: DataType, dt2: DataType) =>
-        !DataType.equalsStructurally(dt1, dt2, true)
+        DataType.equalsStructurally(dt1, dt2, true)
     } else {
       // SPARK-18058: we shall not care about the nullability of columns
       (dt1: DataType, dt2: DataType) =>
-        TypeCoercion.findWiderTypeForTwo(dt1.asNullable, dt2.asNullable).isEmpty
+        TypeCoercion.findWiderTypeForTwo(dt1.asNullable, dt2.asNullable).nonEmpty
     }
   }
 
@@ -662,7 +662,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
           nonAnsiPlan.children.tail.zipWithIndex.foreach { case (child, ti) =>
             // Check if the data types match.
             dataTypes(child).zip(ref).zipWithIndex.foreach { case ((dt1, dt2), ci) =>
-              if (dataTypesAreCompatibleFn(dt1, dt2)) {
+              if (!dataTypesAreCompatibleFn(dt1, dt2)) {
                 issueFixedIfAnsiOff = false
               }
             }

From ed3a61deec2e3cb8aff656e7beb86d36a7ccfc92 Mon Sep 17 00:00:00 2001
From: Steve Loughran <stevel@cloudera.com>
Date: Mon, 7 Mar 2022 08:49:25 -0800
Subject: [PATCH 410/513] [SPARK-38394][BUILD][FOLLOWUP] Update comments about
 `scala-maven-plugin` versions per Hadoop profiles

### What changes were proposed in this pull request?

More detail in the comments as to why care is needed when altering
these values, especially that the hadoop 2.7 profile *must* stay on
version 4.3.0

### Why are the changes needed?

see discussion in #35725 -some more details in the comments were
requested.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

build of spark against hadoop trunk.

Closes #35753 from steveloughran/SPARK-38394-followup.

Authored-by: Steve Loughran <stevel@cloudera.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 pom.xml | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 8e031675b7c6f..207618a95b46c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -163,8 +163,11 @@
     <scala.version>2.12.15</scala.version>
     <scala.binary.version>2.12</scala.binary.version>
     <scalatest-maven-plugin.version>2.0.2</scalatest-maven-plugin.version>
-    <!-- SPARK-36547: This needs to be managed in different profiles to avoid
-      errors building different Hadoop versions -->
+    <!--
+      This needs to be managed in different profiles to avoid
+      errors building different Hadoop versions.
+      See: SPARK-36547, SPARK-38394.
+       -->
 
     <scala-maven-plugin.version>4.4.0</scala-maven-plugin.version>
     <scalafmt.parameters>--test</scalafmt.parameters>
@@ -3433,6 +3436,7 @@
         <hadoop-client-api.artifact>hadoop-client</hadoop-client-api.artifact>
         <hadoop-client-runtime.artifact>hadoop-yarn-api</hadoop-client-runtime.artifact>
         <hadoop-client-minicluster.artifact>hadoop-client</hadoop-client-minicluster.artifact>
+        <!-- SPARK-36547: Please don't upgrade the version below, otherwise there will be an error on building Hadoop 2.7 package -->
         <scala-maven-plugin.version>4.3.0</scala-maven-plugin.version>
       </properties>
     </profile>

From 60d3de182eb5b81bc0116d78a85cba6584de33f6 Mon Sep 17 00:00:00 2001
From: Yuto Akutsu <rhythmy.dev@gmail.com>
Date: Mon, 7 Mar 2022 20:23:56 +0300
Subject: [PATCH 411/513] [SPARK-38104][SQL] Migrate parsing errors of window
 into the new error framework

### What changes were proposed in this pull request?
In this PR, I migrated parsing errors of window listed below into the new error framework.
- repetitiveWindowDefinitionError
- invalidWindowReferenceError
- cannotResolveWindowReferenceError

### Why are the changes needed?
Porting the parsing errors of window into the new error framework should improve user experience with Spark SQL.

### Does this PR introduce _any_ user-facing change?
Yes, it changes the error message.

### How was this patch tested?
`$ build/sbt "test:testOnly *QueryParsingErrorsSuite"`

Closes #35718 from yutoacts/SPARK-38104.

Lead-authored-by: Yuto Akutsu <rhythmy.dev@gmail.com>
Co-authored-by: Yuto Akutsu <yuto.akutsu@oss.nttdata.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../spark/sql/errors/QueryParsingErrors.scala |  9 ++--
 .../sql-tests/results/window.sql.out          |  2 +-
 .../sql/errors/QueryParsingErrorsSuite.scala  | 47 ++++++++++++++++++-
 3 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index 96bcc181a2434..4c62550a299b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -118,15 +118,18 @@ object QueryParsingErrors {
   }
 
   def repetitiveWindowDefinitionError(name: String, ctx: WindowClauseContext): Throwable = {
-    new ParseException(s"The definition of window '$name' is repetitive", ctx)
+    new ParseException("INVALID_SQL_SYNTAX",
+      Array(s"The definition of window '$name' is repetitive."), ctx)
   }
 
   def invalidWindowReferenceError(name: String, ctx: WindowClauseContext): Throwable = {
-    new ParseException(s"Window reference '$name' is not a window specification", ctx)
+    new ParseException("INVALID_SQL_SYNTAX",
+      Array(s"Window reference '$name' is not a window specification."), ctx)
   }
 
   def cannotResolveWindowReferenceError(name: String, ctx: WindowClauseContext): Throwable = {
-    new ParseException(s"Cannot resolve window reference '$name'", ctx)
+    new ParseException("INVALID_SQL_SYNTAX",
+      Array(s"Cannot resolve window reference '$name'."), ctx)
   }
 
   def naturalCrossJoinUnsupportedError(ctx: RelationContext): Throwable = {
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
index d781245227ec4..d13411e333371 100644
--- a/sql/core/src/test/resources/sql-tests/results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -898,7 +898,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-The definition of window 'w' is repetitive(line 8, pos 0)
+Invalid SQL syntax: The definition of window 'w' is repetitive.(line 8, pos 0)
 
 == SQL ==
 SELECT
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
index 466852dae7022..f7b891ead6134 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
@@ -86,7 +86,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession {
     }
   }
 
-  test("SPARK-35789: INVALID_SQL_SYNTAX - LATERAL can only be used with subquery") {
+  test("INVALID_SQL_SYNTAX: LATERAL can only be used with subquery") {
     Seq(
       "SELECT * FROM t1, LATERAL t2" -> 26,
       "SELECT * FROM t1 JOIN LATERAL t2" -> 30,
@@ -124,4 +124,49 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession {
           |--------------^^^
           |""".stripMargin)
   }
+
+  test("INVALID_SQL_SYNTAX: redefine window") {
+    validateParsingError(
+      sqlText = "SELECT min(a) OVER win FROM t1 WINDOW win AS win, win AS win2",
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      message =
+        """
+          |Invalid SQL syntax: The definition of window 'win' is repetitive.(line 1, pos 31)
+          |
+          |== SQL ==
+          |SELECT min(a) OVER win FROM t1 WINDOW win AS win, win AS win2
+          |-------------------------------^^^
+          |""".stripMargin)
+  }
+
+  test("INVALID_SQL_SYNTAX: invalid window reference") {
+    validateParsingError(
+      sqlText = "SELECT min(a) OVER win FROM t1 WINDOW win AS win",
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      message =
+        """
+          |Invalid SQL syntax: Window reference 'win' is not a window specification.(line 1, pos 31)
+          |
+          |== SQL ==
+          |SELECT min(a) OVER win FROM t1 WINDOW win AS win
+          |-------------------------------^^^
+          |""".stripMargin)
+  }
+
+  test("INVALID_SQL_SYNTAX: window reference cannot be resolved") {
+    validateParsingError(
+      sqlText = "SELECT min(a) OVER win FROM t1 WINDOW win AS win2",
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      message =
+        """
+          |Invalid SQL syntax: Cannot resolve window reference 'win2'.(line 1, pos 31)
+          |
+          |== SQL ==
+          |SELECT min(a) OVER win FROM t1 WINDOW win AS win2
+          |-------------------------------^^^
+          |""".stripMargin)
+  }
 }

From ddc18038ca8be82e801d2554043ae06dafc3f31f Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Mon, 7 Mar 2022 10:55:57 -0800
Subject: [PATCH 412/513] 
 [SPARK-38414][CORE][DSTREAM][EXAMPLES][ML][MLLIB][SQL] Remove redundant
 `@SuppressWarnings `

### What changes were proposed in this pull request?
This pr remove redundant `SuppressWarnings` in Spark Java code, all case inspected by IDE (IntelliJ)

### Why are the changes needed?
Remove redundant `SuppressWarnings `

### Does this PR introduce _any_ user-facing change?
NO.

### How was this patch tested?
Pass GA

Closes #35732 from LuciferYang/cleanup-redundant-suppression.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: huaxingao <huaxin_gao@apple.com>
---
 .../shuffle/RetryingBlockTransferorSuite.java |  1 -
 .../sort/UnsafeShuffleWriterSuite.java        |  1 -
 .../test/org/apache/spark/JavaAPISuite.java   | 20 --------
 .../JavaPowerIterationClusteringExample.java  |  1 -
 .../mllib/JavaStratifiedSamplingExample.java  |  1 -
 .../JavaStatefulNetworkWordCount.java         |  1 -
 .../JavaLogisticRegressionSuite.java          |  1 -
 .../JavaStreamingLogisticRegressionSuite.java |  1 -
 .../clustering/JavaStreamingKMeansSuite.java  |  1 -
 .../evaluation/JavaRankingMetricsSuite.java   |  1 -
 .../spark/mllib/feature/JavaTfIdfSuite.java   |  2 -
 .../mllib/feature/JavaWord2VecSuite.java      |  1 -
 .../mllib/fpm/JavaAssociationRulesSuite.java  |  1 -
 .../spark/mllib/fpm/JavaFPGrowthSuite.java    |  2 -
 .../spark/mllib/linalg/JavaVectorsSuite.java  |  1 -
 .../mllib/random/JavaRandomRDDsSuite.java     |  7 ---
 .../JavaStreamingLinearRegressionSuite.java   |  1 -
 .../org/apache/spark/sql/JavaRowSuite.java    |  1 -
 .../org/apache/spark/sql/JavaUDAFSuite.java   |  1 -
 .../org/apache/spark/sql/JavaUDFSuite.java    |  8 ---
 .../apache/spark/streaming/JavaAPISuite.java  | 49 -------------------
 21 files changed, 103 deletions(-)

diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java
index 1b44b061f3d81..985a7a364282e 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java
@@ -240,7 +240,6 @@ public void testRetryAndUnrecoverable() throws IOException, InterruptedException
    * retries -- the first interaction may include an IOException, which causes a retry of some
    * subset of the original blocks in a second interaction.
    */
-  @SuppressWarnings("unchecked")
   private static void performInteractions(List<? extends Map<String, Object>> interactions,
                                           BlockFetchingListener listener)
     throws IOException, InterruptedException {
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
index cd25f32cca89c..f4e09b7a0a38a 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
@@ -89,7 +89,6 @@ public void tearDown() {
   }
 
   @Before
-  @SuppressWarnings("unchecked")
   public void setUp() throws Exception {
     MockitoAnnotations.openMocks(this).close();
     tempDir = Utils.createTempDir(null, "test");
diff --git a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
index 3796d3ba88ed6..fd91237a999a3 100644
--- a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
+++ b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
@@ -130,7 +130,6 @@ public void sparkContextUnion() {
     assertEquals(4, pUnion.count());
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void intersection() {
     List<Integer> ints1 = Arrays.asList(1, 10, 2, 3, 4, 5);
@@ -216,7 +215,6 @@ public void sortByKey() {
     assertEquals(new Tuple2<>(3, 2), sortedPairs.get(2));
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void repartitionAndSortWithinPartitions() {
     List<Tuple2<Integer, Integer>> pairs = new ArrayList<>();
@@ -356,7 +354,6 @@ public void zipWithIndex() {
     assertEquals(correctIndexes, indexes.collect());
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void lookup() {
     JavaPairRDD<String, String> categories = sc.parallelizePairs(Arrays.asList(
@@ -401,7 +398,6 @@ public void groupByOnPairRDD() {
     assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void keyByOnPairRDD() {
     // Regression test for SPARK-4459
@@ -413,7 +409,6 @@ public void keyByOnPairRDD() {
     assertEquals(1, (long) keyed.lookup("2").get(0)._1());
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void cogroup() {
     JavaPairRDD<String, String> categories = sc.parallelizePairs(Arrays.asList(
@@ -433,7 +428,6 @@ public void cogroup() {
     cogrouped.collect();
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void cogroup3() {
     JavaPairRDD<String, String> categories = sc.parallelizePairs(Arrays.asList(
@@ -460,7 +454,6 @@ public void cogroup3() {
     cogrouped.collect();
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void cogroup4() {
     JavaPairRDD<String, String> categories = sc.parallelizePairs(Arrays.asList(
@@ -491,7 +484,6 @@ public void cogroup4() {
     cogrouped.collect();
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void leftOuterJoin() {
     JavaPairRDD<Integer, Integer> rdd1 = sc.parallelizePairs(Arrays.asList(
@@ -557,7 +549,6 @@ public void treeAggregateWithFinalAggregateOnExecutor() {
     }
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void aggregateByKey() {
     JavaPairRDD<Integer, Integer> pairs = sc.parallelizePairs(
@@ -583,7 +574,6 @@ public void aggregateByKey() {
     assertEquals(new HashSet<>(Arrays.asList(1, 3)), sets.get(5));
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void foldByKey() {
     List<Tuple2<Integer, Integer>> pairs = Arrays.asList(
@@ -600,7 +590,6 @@ public void foldByKey() {
     assertEquals(3, sums.lookup(3).get(0).intValue());
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void reduceByKey() {
     List<Tuple2<Integer, Integer>> pairs = Arrays.asList(
@@ -836,7 +825,6 @@ public void flatMap() {
     assertEquals(11, pairsRDD.count());
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void mapsFromPairsToPairs() {
     List<Tuple2<Integer, String>> pairs = Arrays.asList(
@@ -919,7 +907,6 @@ public void repartition() {
     }
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void persist() {
     JavaDoubleRDD doubleRDD = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0));
@@ -1018,7 +1005,6 @@ public void textFilesCompressed() {
     assertEquals(expected, readRDD.collect());
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void sequenceFile() {
     String outputDir = new File(tempDir, "output").getAbsolutePath();
@@ -1108,7 +1094,6 @@ public void binaryRecords() throws Exception {
     }
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void writeWithNewAPIHadoopFile() {
     String outputDir = new File(tempDir, "output").getAbsolutePath();
@@ -1159,7 +1144,6 @@ public void objectFilesOfInts() {
     assertEquals(expected, readRDD.collect());
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void objectFilesOfComplexTypes() {
     String outputDir = new File(tempDir, "output").getAbsolutePath();
@@ -1297,7 +1281,6 @@ public void combineByKey() {
     assertEquals(expected, results);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void mapOnPairRDD() {
     JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1,2,3,4));
@@ -1310,7 +1293,6 @@ public void mapOnPairRDD() {
       new Tuple2<>(0, 4)), rdd3.collect());
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void collectPartitions() {
     JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7), 3);
@@ -1391,7 +1373,6 @@ public void collectAsMapAndSerialize() throws Exception {
   }
 
   @Test
-  @SuppressWarnings("unchecked")
   public void sampleByKey() {
     JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), 3);
     JavaPairRDD<Integer, Integer> rdd2 = rdd1.mapToPair(i -> new Tuple2<>(i % 2, 1));
@@ -1411,7 +1392,6 @@ public void sampleByKey() {
   }
 
   @Test
-  @SuppressWarnings("unchecked")
   public void sampleByKeyExact() {
     JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), 3);
     JavaPairRDD<Integer, Integer> rdd2 = rdd1.mapToPair(i -> new Tuple2<>(i % 2, 1));
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaPowerIterationClusteringExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaPowerIterationClusteringExample.java
index 5155f182ba20e..e2260e1c53210 100644
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaPowerIterationClusteringExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaPowerIterationClusteringExample.java
@@ -37,7 +37,6 @@ public static void main(String[] args) {
     SparkConf sparkConf = new SparkConf().setAppName("JavaPowerIterationClusteringExample");
     JavaSparkContext sc = new JavaSparkContext(sparkConf);
 
-    @SuppressWarnings("unchecked")
     // $example on$
     JavaRDD<Tuple3<Long, Long, Double>> similarities = sc.parallelize(Arrays.asList(
       new Tuple3<>(0L, 1L, 0.9),
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java
index 286b95cfbc33d..b06b2cceccfcd 100644
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java
@@ -35,7 +35,6 @@ public static void main(String[] args) {
     SparkConf conf = new SparkConf().setAppName("JavaStratifiedSamplingExample");
     JavaSparkContext jsc = new JavaSparkContext(conf);
 
-    @SuppressWarnings("unchecked")
     // $example on$
     List<Tuple2<Integer, Character>> list = Arrays.asList(
         new Tuple2<>(1, 'a'),
diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaStatefulNetworkWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaStatefulNetworkWordCount.java
index 9d8bd7fd11ebd..ec1185a78eb33 100644
--- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaStatefulNetworkWordCount.java
+++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaStatefulNetworkWordCount.java
@@ -63,7 +63,6 @@ public static void main(String[] args) throws Exception {
     ssc.checkpoint(".");
 
     // Initial state RDD input to mapWithState
-    @SuppressWarnings("unchecked")
     List<Tuple2<String, Integer>> tuples =
         Arrays.asList(new Tuple2<>("hello", 1), new Tuple2<>("world", 1));
     JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples);
diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java
index 7c63a8755b4f3..3dd224b1ef309 100644
--- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java
@@ -107,7 +107,6 @@ public void logisticRegressionWithSetters() {
     Assert.assertEquals("theProb", model2.getProbabilityCol());
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void logisticRegressionPredictorClassifierMethods() {
     LogisticRegression lr = new LogisticRegression();
diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaStreamingLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaStreamingLogisticRegressionSuite.java
index 8c6bced52dd74..d436c0ea61d13 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaStreamingLogisticRegressionSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaStreamingLogisticRegressionSuite.java
@@ -57,7 +57,6 @@ public void tearDown() {
   }
 
   @Test
-  @SuppressWarnings("unchecked")
   public void javaAPI() {
     List<LabeledPoint> trainingBatch = Arrays.asList(
       new LabeledPoint(1.0, Vectors.dense(1.0)),
diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaStreamingKMeansSuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaStreamingKMeansSuite.java
index d41fc0e4dca96..f61d547bfdd4a 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaStreamingKMeansSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaStreamingKMeansSuite.java
@@ -56,7 +56,6 @@ public void tearDown() {
   }
 
   @Test
-  @SuppressWarnings("unchecked")
   public void javaAPI() {
     List<Vector> trainingBatch = Arrays.asList(
       Vectors.dense(1.0),
diff --git a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java
index e9d7e4fdbe8ce..50822c61fdc6a 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java
@@ -47,7 +47,6 @@ public void setUp() throws IOException {
 
   @Test
   public void rankingMetrics() {
-    @SuppressWarnings("unchecked")
     RankingMetrics<?> metrics = RankingMetrics.of(predictionAndLabels);
     Assert.assertEquals(0.355026, metrics.meanAveragePrecision(), 1e-5);
     Assert.assertEquals(0.75 / 3.0, metrics.precisionAt(4), 1e-5);
diff --git a/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java b/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java
index 05128ea343420..4508d2ca2792e 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java
@@ -33,7 +33,6 @@ public class JavaTfIdfSuite extends SharedSparkSession {
   public void tfIdf() {
     // The tests are to check Java compatibility.
     HashingTF tf = new HashingTF();
-    @SuppressWarnings("unchecked")
     JavaRDD<List<String>> documents = jsc.parallelize(Arrays.asList(
       Arrays.asList("this is a sentence".split(" ")),
       Arrays.asList("this is another sentence".split(" ")),
@@ -53,7 +52,6 @@ public void tfIdf() {
   public void tfIdfMinimumDocumentFrequency() {
     // The tests are to check Java compatibility.
     HashingTF tf = new HashingTF();
-    @SuppressWarnings("unchecked")
     JavaRDD<List<String>> documents = jsc.parallelize(Arrays.asList(
       Arrays.asList("this is a sentence".split(" ")),
       Arrays.asList("this is another sentence".split(" ")),
diff --git a/mllib/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java b/mllib/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java
index 3e3abddbee638..a423e8bd47176 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java
@@ -33,7 +33,6 @@
 public class JavaWord2VecSuite extends SharedSparkSession {
 
   @Test
-  @SuppressWarnings("unchecked")
   public void word2Vec() {
     // The tests are to check Java compatibility.
     String sentence = Strings.repeat("a b ", 100) + Strings.repeat("a c ", 10);
diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java
index 15de566c886de..1f3fa9d813760 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java
@@ -29,7 +29,6 @@ public class JavaAssociationRulesSuite extends SharedSparkSession {
   @Test
   public void runAssociationRules() {
 
-    @SuppressWarnings("unchecked")
     JavaRDD<FPGrowth.FreqItemset<String>> freqItemsets = jsc.parallelize(Arrays.asList(
       new FreqItemset<>(new String[]{"a"}, 15L),
       new FreqItemset<>(new String[]{"b"}, 35L),
diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java
index 46e9dd8b59828..dda8884249ec7 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java
@@ -34,7 +34,6 @@ public class JavaFPGrowthSuite extends SharedSparkSession {
   @Test
   public void runFPGrowth() {
 
-    @SuppressWarnings("unchecked")
     JavaRDD<List<String>> rdd = jsc.parallelize(Arrays.asList(
       Arrays.asList("r z h k p".split(" ")),
       Arrays.asList("z y x w v u t s".split(" ")),
@@ -61,7 +60,6 @@ public void runFPGrowth() {
   @Test
   public void runFPGrowthSaveLoad() {
 
-    @SuppressWarnings("unchecked")
     JavaRDD<List<String>> rdd = jsc.parallelize(Arrays.asList(
       Arrays.asList("r z h k p".split(" ")),
       Arrays.asList("z y x w v u t s".split(" ")),
diff --git a/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaVectorsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaVectorsSuite.java
index f67f555e418a7..d854ba208a493 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaVectorsSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaVectorsSuite.java
@@ -35,7 +35,6 @@ public void denseArrayConstruction() {
 
   @Test
   public void sparseArrayConstruction() {
-    @SuppressWarnings("unchecked")
     Vector v = Vectors.sparse(3, Arrays.asList(
       new Tuple2<>(0, 2.0),
       new Tuple2<>(2, 3.0)));
diff --git a/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
index 6d114024c31be..8f54060158817 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
@@ -117,7 +117,6 @@ public void testGammaRDD() {
 
 
   @Test
-  @SuppressWarnings("unchecked")
   public void testUniformVectorRDD() {
     long m = 100L;
     int n = 10;
@@ -133,7 +132,6 @@ public void testUniformVectorRDD() {
   }
 
   @Test
-  @SuppressWarnings("unchecked")
   public void testNormalVectorRDD() {
     long m = 100L;
     int n = 10;
@@ -149,7 +147,6 @@ public void testNormalVectorRDD() {
   }
 
   @Test
-  @SuppressWarnings("unchecked")
   public void testLogNormalVectorRDD() {
     double mean = 4.0;
     double std = 2.0;
@@ -167,7 +164,6 @@ public void testLogNormalVectorRDD() {
   }
 
   @Test
-  @SuppressWarnings("unchecked")
   public void testPoissonVectorRDD() {
     double mean = 2.0;
     long m = 100L;
@@ -184,7 +180,6 @@ public void testPoissonVectorRDD() {
   }
 
   @Test
-  @SuppressWarnings("unchecked")
   public void testExponentialVectorRDD() {
     double mean = 2.0;
     long m = 100L;
@@ -201,7 +196,6 @@ public void testExponentialVectorRDD() {
   }
 
   @Test
-  @SuppressWarnings("unchecked")
   public void testGammaVectorRDD() {
     double shape = 1.0;
     double jscale = 2.0;
@@ -234,7 +228,6 @@ public void testArbitrary() {
   }
 
   @Test
-  @SuppressWarnings("unchecked")
   public void testRandomVectorRDD() {
     UniformGenerator generator = new UniformGenerator();
     long m = 100L;
diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaStreamingLinearRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaStreamingLinearRegressionSuite.java
index ab554475d59a1..449be868c4fd9 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaStreamingLinearRegressionSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaStreamingLinearRegressionSuite.java
@@ -56,7 +56,6 @@ public void tearDown() {
   }
 
   @Test
-  @SuppressWarnings("unchecked")
   public void javaAPI() {
     List<LabeledPoint> trainingBatch = Arrays.asList(
       new LabeledPoint(1.0, Vectors.dense(1.0)),
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaRowSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaRowSuite.java
index ca78d6489ef5c..37f49ce5705de 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaRowSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaRowSuite.java
@@ -145,7 +145,6 @@ public void constructComplexRow() {
       doubleValue, stringValue, timestampValue, null);
 
     // Complex array
-    @SuppressWarnings("unchecked")
     List<Map<String, Long>> arrayOfMaps = Arrays.asList(simpleMap);
     List<Row> arrayOfRows = Arrays.asList(simpleStruct);
 
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDAFSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDAFSuite.java
index 08dc129f27a0c..1da5fb4b64cbb 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDAFSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDAFSuite.java
@@ -43,7 +43,6 @@ public void tearDown() {
     spark = null;
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void udf1Test() {
     spark.range(1, 10).toDF("value").createOrReplaceTempView("df");
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
index 7e938ca88d8b9..cd64f858b1473 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
@@ -55,7 +55,6 @@ public void tearDown() {
     spark = null;
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void udf1Test() {
     spark.udf().register("stringLengthTest", (String str) -> str.length(), DataTypes.IntegerType);
@@ -64,7 +63,6 @@ public void udf1Test() {
     Assert.assertEquals(4, result.getInt(0));
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void udf2Test() {
     spark.udf().register("stringLengthTest",
@@ -81,7 +79,6 @@ public Integer call(String str1, String str2) {
     }
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void udf3Test() {
     spark.udf().registerJava("stringLengthTest", StringLengthTest.class.getName(),
@@ -95,7 +92,6 @@ public void udf3Test() {
     Assert.assertEquals(9, result.getInt(0));
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void udf4Test() {
     spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType);
@@ -111,14 +107,12 @@ public void udf4Test() {
     Assert.assertEquals(55, sum);
   }
 
-  @SuppressWarnings("unchecked")
   @Test(expected = AnalysisException.class)
   public void udf5Test() {
     spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType);
     List<Row> results = spark.sql("SELECT inc(1, 5)").collectAsList();
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void udf6Test() {
     spark.udf().register("returnOne", () -> 1, DataTypes.IntegerType);
@@ -126,7 +120,6 @@ public void udf6Test() {
     Assert.assertEquals(1, result.getInt(0));
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void udf7Test() {
     String originConf = spark.conf().get(SQLConf.DATETIME_JAVA8API_ENABLED().key());
@@ -142,7 +135,6 @@ public void udf7Test() {
     }
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void sourceTest() {
     spark.udf().register("stringLengthTest", (String str) -> str.length(), DataTypes.IntegerType);
diff --git a/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java
index 8a57b0c58b228..41c4bf9e711d5 100644
--- a/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java
+++ b/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java
@@ -75,7 +75,6 @@ public void testInitialization() {
     Assert.assertNotNull(ssc.sparkContext());
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testContextState() {
     List<List<Integer>> inputData = Arrays.asList(Arrays.asList(1, 2, 3, 4));
@@ -89,7 +88,6 @@ public void testContextState() {
     Assert.assertEquals(StreamingContextState.STOPPED, ssc.getState());
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testCount() {
     List<List<Integer>> inputData = Arrays.asList(
@@ -109,7 +107,6 @@ public void testCount() {
     assertOrderInvariantEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testMap() {
     List<List<String>> inputData = Arrays.asList(
@@ -128,7 +125,6 @@ public void testMap() {
     assertOrderInvariantEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testWindow() {
     List<List<Integer>> inputData = Arrays.asList(
@@ -150,7 +146,6 @@ public void testWindow() {
     assertOrderInvariantEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testWindowWithSlideDuration() {
     List<List<Integer>> inputData = Arrays.asList(
@@ -175,7 +170,6 @@ public void testWindowWithSlideDuration() {
     assertOrderInvariantEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testFilter() {
     List<List<String>> inputData = Arrays.asList(
@@ -194,7 +188,6 @@ public void testFilter() {
     assertOrderInvariantEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testRepartitionMorePartitions() {
     List<List<Integer>> inputData = Arrays.asList(
@@ -214,7 +207,6 @@ public void testRepartitionMorePartitions() {
     }
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testRepartitionFewerPartitions() {
     List<List<Integer>> inputData = Arrays.asList(
@@ -233,7 +225,6 @@ public void testRepartitionFewerPartitions() {
     }
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testGlom() {
     List<List<String>> inputData = Arrays.asList(
@@ -252,7 +243,6 @@ public void testGlom() {
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testMapPartitions() {
     List<List<String>> inputData = Arrays.asList(
@@ -291,7 +281,6 @@ public Integer call(Integer i1, Integer i2) {
     }
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testReduce() {
     List<List<Integer>> inputData = Arrays.asList(
@@ -312,19 +301,16 @@ public void testReduce() {
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testReduceByWindowWithInverse() {
     testReduceByWindow(true);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testReduceByWindowWithoutInverse() {
     testReduceByWindow(false);
   }
 
-  @SuppressWarnings("unchecked")
   private void testReduceByWindow(boolean withInverse) {
     List<List<Integer>> inputData = Arrays.asList(
         Arrays.asList(1,2,3),
@@ -354,7 +340,6 @@ private void testReduceByWindow(boolean withInverse) {
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testQueueStream() {
     ssc.stop();
@@ -386,7 +371,6 @@ public void testQueueStream() {
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testTransform() {
     List<List<Integer>> inputData = Arrays.asList(
@@ -408,7 +392,6 @@ public void testTransform() {
     assertOrderInvariantEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testVariousTransform() {
     // tests whether all variations of transform can be called from Java
@@ -495,7 +478,6 @@ public void testTransformWith() {
   }
 
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testVariousTransformWith() {
     // tests whether all variations of transformWith can be called from Java
@@ -593,7 +575,6 @@ public void testStreamingContextTransform(){
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testFlatMap() {
     List<List<String>> inputData = Arrays.asList(
@@ -615,7 +596,6 @@ public void testFlatMap() {
     assertOrderInvariantEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testForeachRDD() {
     final LongAccumulator accumRdd = ssc.sparkContext().sc().longAccumulator();
@@ -641,7 +621,6 @@ public void testForeachRDD() {
     Assert.assertEquals(6, accumEle.value().intValue());
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testPairFlatMap() {
     List<List<String>> inputData = Arrays.asList(
@@ -690,7 +669,6 @@ public void testPairFlatMap() {
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testUnion() {
     List<List<Integer>> inputData1 = Arrays.asList(
@@ -737,7 +715,6 @@ public static <T> void assertOrderInvariantEquals(
 
 
   // PairDStream Functions
-  @SuppressWarnings("unchecked")
   @Test
   public void testPairFilter() {
     List<List<String>> inputData = Arrays.asList(
@@ -759,7 +736,6 @@ public void testPairFilter() {
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   private final List<List<Tuple2<String, String>>> stringStringKVStream = Arrays.asList(
       Arrays.asList(new Tuple2<>("california", "dodgers"),
                     new Tuple2<>("california", "giants"),
@@ -770,7 +746,6 @@ public void testPairFilter() {
                     new Tuple2<>("new york", "rangers"),
                     new Tuple2<>("new york", "islanders")));
 
-  @SuppressWarnings("unchecked")
   private final List<List<Tuple2<String, Integer>>> stringIntKVStream = Arrays.asList(
       Arrays.asList(
           new Tuple2<>("california", 1),
@@ -783,7 +758,6 @@ public void testPairFilter() {
           new Tuple2<>("new york", 3),
           new Tuple2<>("new york", 1)));
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testPairMap() { // Maps pair -> pair of different type
     List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream;
@@ -811,7 +785,6 @@ public void testPairMap() { // Maps pair -> pair of different type
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testPairMapPartitions() { // Maps pair -> pair of different type
     List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream;
@@ -846,7 +819,6 @@ public void testPairMapPartitions() { // Maps pair -> pair of different type
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testPairMap2() { // Maps pair -> single
     List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream;
@@ -866,7 +838,6 @@ public void testPairMap2() { // Maps pair -> single
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testPairToPairFlatMapWithChangingTypes() { // Maps pair -> pair
     List<List<Tuple2<String, Integer>>> inputData = Arrays.asList(
@@ -905,7 +876,6 @@ public void testPairToPairFlatMapWithChangingTypes() { // Maps pair -> pair
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testPairGroupByKey() {
     List<List<Tuple2<String, String>>> inputData = stringStringKVStream;
@@ -942,7 +912,6 @@ public void testPairGroupByKey() {
     }
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testPairReduceByKey() {
     List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream;
@@ -967,7 +936,6 @@ public void testPairReduceByKey() {
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testCombineByKey() {
     List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream;
@@ -993,7 +961,6 @@ public void testCombineByKey() {
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testCountByValue() {
     List<List<String>> inputData = Arrays.asList(
@@ -1019,7 +986,6 @@ public void testCountByValue() {
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testGroupByKeyAndWindow() {
     List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream;
@@ -1067,7 +1033,6 @@ private static Tuple2<String, HashSet<Integer>> convert(Tuple2<String, List<Inte
     return new Tuple2<>(tuple._1(), new HashSet<>(tuple._2()));
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testReduceByKeyAndWindow() {
     List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream;
@@ -1092,7 +1057,6 @@ public void testReduceByKeyAndWindow() {
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testUpdateStateByKey() {
     List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream;
@@ -1125,7 +1089,6 @@ public void testUpdateStateByKey() {
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testUpdateStateByKeyWithInitial() {
     List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream;
@@ -1165,7 +1128,6 @@ public void testUpdateStateByKeyWithInitial() {
     assertOrderInvariantEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testReduceByKeyAndWindowWithInverse() {
     List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream;
@@ -1225,7 +1187,6 @@ public void testCountByValueAndWindow() {
     Assert.assertEquals(expected, unorderedResult);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testPairTransform() {
     List<List<Tuple2<Integer, Integer>>> inputData = Arrays.asList(
@@ -1264,7 +1225,6 @@ public void testPairTransform() {
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testPairToNormalRDDTransform() {
     List<List<Tuple2<Integer, Integer>>> inputData = Arrays.asList(
@@ -1295,7 +1255,6 @@ public void testPairToNormalRDDTransform() {
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testMapValues() {
     List<List<Tuple2<String, String>>> inputData = stringStringKVStream;
@@ -1323,7 +1282,6 @@ public void testMapValues() {
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testFlatMapValues() {
     List<List<Tuple2<String, String>>> inputData = stringStringKVStream;
@@ -1364,7 +1322,6 @@ public void testFlatMapValues() {
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testCoGroup() {
     List<List<Tuple2<String, String>>> stringStringKVStream1 = Arrays.asList(
@@ -1430,7 +1387,6 @@ public void testCoGroup() {
     }
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testJoin() {
     List<List<Tuple2<String, String>>> stringStringKVStream1 = Arrays.asList(
@@ -1474,7 +1430,6 @@ public void testJoin() {
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testLeftOuterJoin() {
     List<List<Tuple2<String, String>>> stringStringKVStream1 = Arrays.asList(
@@ -1507,7 +1462,6 @@ public void testLeftOuterJoin() {
     Assert.assertEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testCheckpointMasterRecovery() throws InterruptedException {
     List<List<String>> inputData = Arrays.asList(
@@ -1543,7 +1497,6 @@ public void testCheckpointMasterRecovery() throws InterruptedException {
     Utils.deleteRecursively(tempDir);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testContextGetOrCreate() throws InterruptedException {
     ssc.stop();
@@ -1648,7 +1601,6 @@ public void testSocketString() {
       StorageLevel.MEMORY_ONLY());
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testTextFileStream() throws IOException {
     File testDir = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "spark");
@@ -1661,7 +1613,6 @@ public void testTextFileStream() throws IOException {
     assertOrderInvariantEquals(expected, result);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testFileStream() throws IOException {
     File testDir = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "spark");

From 6c486d2eba5750b502468cd837b0ccdca849cc21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20J=C3=B8rgensen?= <bjornjorgensen@gmail.com>
Date: Mon, 7 Mar 2022 11:01:32 -0800
Subject: [PATCH 413/513] [SPARK-38436][PYTHON][TESTS] Fix `test_ceil` to test
 `ceil`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
We have two functions that are testing the same thing.

### Why are the changes needed?
To test both floor and ceil.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Got the green light.

Closes #35755 from bjornjorgensen/Test-ceil.

Authored-by: Bjørn Jørgensen <bjornjorgensen@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 python/pyspark/pandas/tests/test_series_datetime.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/pandas/tests/test_series_datetime.py b/python/pyspark/pandas/tests/test_series_datetime.py
index 637c1897bf544..d837c34fc7439 100644
--- a/python/pyspark/pandas/tests/test_series_datetime.py
+++ b/python/pyspark/pandas/tests/test_series_datetime.py
@@ -264,8 +264,8 @@ def test_floor(self):
         self.check_func(lambda x: x.dt.floor(freq="H"))
 
     def test_ceil(self):
-        self.check_func(lambda x: x.dt.floor(freq="min"))
-        self.check_func(lambda x: x.dt.floor(freq="H"))
+        self.check_func(lambda x: x.dt.ceil(freq="min"))
+        self.check_func(lambda x: x.dt.ceil(freq="H"))
 
     @unittest.skip("Unsupported locale setting")
     def test_month_name(self):

From 71991f75ff441e80a52cb71f66f46bfebdb05671 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 7 Mar 2022 12:04:24 -0800
Subject: [PATCH 414/513] [SPARK-38285][SQL] Avoid generator pruning for
 invalid extractor

### What changes were proposed in this pull request?

This fixes a bug in generator nested column pruning. The bug happens when the extractor pattern is like `GetArrayStructFields(GetStructField(...), ...)` on the generator output. Once the input to the generator is an array, after replacing with the extractor based on pruning logic, it becomes an extractor of `GetArrayStructFields(GetArrayStructFields(...), ...)` which is not valid.

### Why are the changes needed?

To fix a bug in generator nested column pruning.

### Does this PR introduce _any_ user-facing change?

Yes, fixing a user-facing bug.

### How was this patch tested?

Added unit test.

Closes #35749 from viirya/SPARK-38285.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 .../optimizer/NestedColumnAliasing.scala      | 11 ++++++++++
 .../org/apache/spark/sql/DataFrameSuite.scala | 20 +++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
index c8c67f5000942..a2ee950dae9ee 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
@@ -372,6 +372,17 @@ object GeneratorNestedColumnAliasing {
                 e.withNewChildren(Seq(extractor))
             }
 
+            // If after replacing generator expression with nested extractor, there
+            // is invalid extractor pattern like
+            // `GetArrayStructFields(GetArrayStructFields(...), ...), we cannot do
+            // pruning but fallback to original query plan.
+            val invalidExtractor = rewrittenG.generator.children.head.collect {
+              case GetArrayStructFields(_: GetArrayStructFields, _, _, _, _) => true
+            }
+            if (invalidExtractor.nonEmpty) {
+              return Some(pushedThrough)
+            }
+
             // As we change the child of the generator, its output data type must be updated.
             val updatedGeneratorOutput = rewrittenG.generatorOutput
               .zip(rewrittenG.generator.elementSchema.toAttributes)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index c7d05df7a4dbe..3eb976498b8b5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -3107,6 +3107,26 @@ class DataFrameSuite extends QueryTest
 
     assert(res.collect.length == 2)
   }
+
+  test("SPARK-38285: Fix ClassCastException: GenericArrayData cannot be cast to InternalRow") {
+    withTempView("v1") {
+      val sqlText =
+        """
+          |CREATE OR REPLACE TEMP VIEW v1 AS
+          |SELECT * FROM VALUES
+          |(array(
+          |  named_struct('s', 'string1', 'b', array(named_struct('e', 'string2'))),
+          |  named_struct('s', 'string4', 'b', array(named_struct('e', 'string5')))
+          |  )
+          |)
+          |v1(o);
+          |""".stripMargin
+      sql(sqlText)
+
+      val df = sql("SELECT eo.b.e FROM (SELECT explode(o) AS eo FROM v1)")
+      checkAnswer(df, Row(Seq("string2")) :: Row(Seq("string5")) :: Nil)
+    }
+  }
 }
 
 case class GroupByKey(a: Int, b: Int)

From a13b4780177f74e4c362aa89514ce0dd32f8d119 Mon Sep 17 00:00:00 2001
From: itholic <haejoon.lee@databricks.com>
Date: Tue, 8 Mar 2022 09:58:38 +0900
Subject: [PATCH 415/513] [SPARK-38183][PYTHON][FOLLOWUP] Check the ANSI conf
 properly when creating pandas-on-Spark session

### What changes were proposed in this pull request?

This followup for https://github.com/apache/spark/pull/35488 proposes to fix ANSI conf check properly in `python/pyspark/pandas/utils.py`.

### Why are the changes needed?

So far, the condition `if spark.conf.get("spark.sql.ansi.enabled"):` always went True, since `spark.conf.get("spark.sql.ansi.enabled")` returns `"true"` or `"false"` instead of `True` or `False`.

So, it's always show the warning message although the ANSI mode is not enabled.

We might need to check the returned string properly so that we can only show the warning message when the `"spark.sql.ansi.enabled"` is actually set as True.

### Does this PR introduce _any_ user-facing change?

Now users see warning message when only `"spark.sql.ansi.enabled"` is set as True.

### How was this patch tested?

Manually test, and the existing tests are should be passed.

Closes #35757 from itholic/SPARK-38183-followup.

Authored-by: itholic <haejoon.lee@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/pandas/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/pandas/utils.py b/python/pyspark/pandas/utils.py
index 95a7eec2d5ec3..d37a359440ff7 100644
--- a/python/pyspark/pandas/utils.py
+++ b/python/pyspark/pandas/utils.py
@@ -474,7 +474,7 @@ def default_session() -> SparkSession:
     # the behavior of pandas API on Spark follows pandas, not SQL.
     if is_testing():
         spark.conf.set("spark.sql.ansi.enabled", False)  # type: ignore[arg-type]
-    if spark.conf.get("spark.sql.ansi.enabled"):
+    if spark.conf.get("spark.sql.ansi.enabled") == "true":
         log_advice(
             "The config 'spark.sql.ansi.enabled' is set to True. "
             "This can cause unexpected behavior "

From 14cda58a477042ef912af3b63261bcbf7d8b059a Mon Sep 17 00:00:00 2001
From: Xinyi Yu <xinyi.yu@databricks.com>
Date: Tue, 8 Mar 2022 09:25:36 +0800
Subject: [PATCH 416/513] [SPARK-38385][SQL] Improve error messages of
 'mismatched input' cases from ANTLR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
This PR handles the case 1 mentioned in https://issues.apache.org/jira/browse/SPARK-38385:
* Before
    ```
    ParseException:
    mismatched input 'sel' expecting {'(', 'APPLY', 'CONVERT', 'COPY', 'OPTIMIZE', 'RESTORE', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'SYNC', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)

    == SQL ==
    sel 1
    ^^^
    ```
* After
    ```
    ParseException:
    syntax error at or near 'sel'(line 1, pos 0)

    == SQL ==
    sel 1
    ^^^
    ```

#### Implementation general idea
ANTLR uses the DefaultErrorStrategy class to create error messages:

```scala
public class DefaultErrorStrategy implements ANTLRErrorStrategy {
  protected void reportInputMismatch(Parser recognizer, InputMismatchException e)
  {
	String msg = "mismatched input " +
                    getTokenErrorDisplay(e.getOffendingToken()) + " expecting " +
                    e.getExpectedTokens().toString(recognizer.getVocabulary());
	recognizer.notifyErrorListeners(e.getOffendingToken(), msg, e);
  }
  ..
}
```
It is easy to extend the `DefaultErrorStrategy` and override corresponding functions to output better error messages. Then in our parser, set the error strategy to be the one we created.

#### Changes in code
To achieve this, the following changes are made:
* error-classes.json
    Define a new type of error `PARSE_INPUT_MISMATCHED` with the new error framework:
    ```json
      "PARSE_INPUT_MISMATCHED" : {
        "message" : [ "syntax error at or near %s" ],
        "sqlState" : "42000"
      },
    ```
* SparkParserErrorStrategy.scala
    This is a new class, extending the `org.antlr.v4.runtime.DefaultErrorStrategy` that does special handling on errors. Note the original `DefaultErrorStrategy` is where the `mismatched input` error message generates from.
    The new class is intended to provide more information, e.g. the error class and the message parameters, on these errors encountered in ANTLR parser to the downstream consumers to be able to apply the `SparkThrowable` error message framework to these exceptions.

* ParserDriver.scala
  * It sets the error strategy of the parser to be the above new `SparkParserErrorStrategy`.
  * When catching an exception thrown from ANTLR, when it can find out the error class and message parameter, it creates `ParseException` with this information, which composes the error message through `SparkThrowableHelper.getMessage`. It then formalizes the standard error messages of these types.

* test suites
    It change all affected test suites. It also adds a check on the error class, note the newly added `PARSE_INPUT_MISMATCHED` in the after case:
    ```scala
    // before
    intercept("select * from r order by q from t", 1, 27, 31,
      "mismatched input",
      "---------------------------^^^")
    // after
    intercept("select * from r order by q from t", "PARSE_INPUT_MISMATCHED",
      1, 27, 31,
      "syntax error at or near",
      "---------------------------^^^"
    )
    ```

### Why are the changes needed?
https://issues.apache.org/jira/browse/SPARK-38384 The description states the reason for the change.
TLDR, the error messages of ParseException directly coming from ANTLR are not user-friendly and we want to improve it.

### Does this PR introduce _any_ user-facing change?
If the error messages change are considered as user-facing change, then yes.
The changes in the error message:
1. Adjust the words, from ‘mismatched input {}’ to a more readable one, ‘syntax error at or near {}.’. This also aligns with the PostgreSQL error messages.
2. Remove the expecting full list.

One example case is listed in the top of this PR description.

### How was this patch tested?
Through manual local tests.

Closes #35707 from anchovYu/improve-parse-exception-mismatched-input.

Authored-by: Xinyi Yu <xinyi.yu@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../main/resources/error/error-classes.json   |  4 +
 .../sql/catalyst/parser/ParseDriver.scala     | 23 +++++-
 .../parser/SparkParserErrorStrategy.scala     | 76 +++++++++++++++++++
 .../sql/catalyst/analysis/AnalysisTest.scala  |  8 +-
 .../sql/catalyst/parser/DDLParserSuite.scala  |  7 +-
 .../catalyst/parser/ErrorParserSuite.scala    | 70 ++++++++++++-----
 .../parser/ExpressionParserSuite.scala        | 11 ++-
 .../sql/catalyst/parser/PlanParserSuite.scala | 44 ++++++-----
 .../sql-tests/results/describe-query.sql.out  |  6 +-
 .../results/postgreSQL/union.sql.out          | 18 ++---
 .../results/postgreSQL/window_part3.sql.out   |  2 +-
 .../sql-tests/results/show-tables.sql.out     |  4 +-
 .../sql/execution/SparkSqlParserSuite.scala   |  2 +-
 .../command/CreateNamespaceParserSuite.scala  |  2 +-
 .../execution/command/DDLParserSuite.scala    |  2 +-
 .../command/PlanResolutionSuite.scala         |  4 +-
 .../datasources/jdbc/JdbcUtilsSuite.scala     |  2 +-
 .../sql/hive/thriftserver/CliSuite.scala      |  2 +-
 .../apache/spark/sql/hive/InsertSuite.scala   |  4 +-
 19 files changed, 218 insertions(+), 73 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index cbdfe5f991374..663454fa74b64 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -128,6 +128,10 @@
     "message" : [ "PARTITION clause cannot contain a non-partition column name: %s" ],
     "sqlState" : "42000"
   },
+  "PARSE_INPUT_MISMATCHED" : {
+    "message" : [ "Syntax error at or near %s" ],
+    "sqlState" : "42000"
+  },
   "PIVOT_VALUE_DATA_TYPE_MISMATCH" : {
     "message" : [ "Invalid pivot value '%s': value data type %s does not match pivot column data type %s" ],
     "sqlState" : "42000"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index 22532ed2ec305..87825540f7427 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -104,6 +104,7 @@ abstract class AbstractSqlParser extends ParserInterface with SQLConfHelper with
     parser.addParseListener(UnclosedCommentProcessor(command, tokenStream))
     parser.removeErrorListeners()
     parser.addErrorListener(ParseErrorListener)
+    parser.setErrorHandler(new SparkParserErrorStrategy())
     parser.legacy_setops_precedence_enabled = conf.setOpsPrecedenceEnforced
     parser.legacy_exponent_literal_as_decimal_enabled = conf.exponentLiteralAsDecimalEnabled
     parser.SQL_standard_keyword_behavior = conf.enforceReservedKeywords
@@ -207,7 +208,12 @@ case object ParseErrorListener extends BaseErrorListener {
         val start = Origin(Some(line), Some(charPositionInLine))
         (start, start)
     }
-    throw new ParseException(None, msg, start, stop)
+    e match {
+      case sre: SparkRecognitionException if sre.errorClass.isDefined =>
+        throw new ParseException(None, start, stop, sre.errorClass.get, sre.messageParameters)
+      case _ =>
+        throw new ParseException(None, msg, start, stop)
+    }
   }
 }
 
@@ -246,6 +252,21 @@ class ParseException(
       Some(errorClass),
       messageParameters)
 
+  /** Compose the message through SparkThrowableHelper given errorClass and messageParameters. */
+  def this(
+      command: Option[String],
+      start: Origin,
+      stop: Origin,
+      errorClass: String,
+      messageParameters: Array[String]) =
+    this(
+      command,
+      SparkThrowableHelper.getMessage(errorClass, messageParameters),
+      start,
+      stop,
+      Some(errorClass),
+      messageParameters)
+
   override def getMessage: String = {
     val builder = new StringBuilder
     builder ++= "\n" ++= message
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala
new file mode 100644
index 0000000000000..d514a61e315dc
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.parser
+
+import org.antlr.v4.runtime.{DefaultErrorStrategy, InputMismatchException, IntStream, Parser,
+  ParserRuleContext, RecognitionException, Recognizer}
+
+/**
+ * A [[SparkRecognitionException]] extends the [[RecognitionException]] with more information
+ * including the error class and parameters for the error message, which align with the interface
+ * of [[SparkThrowableHelper]].
+ */
+class SparkRecognitionException(
+    message: String,
+    recognizer: Recognizer[_, _],
+    input: IntStream,
+    ctx: ParserRuleContext,
+    val errorClass: Option[String] = None,
+    val messageParameters: Array[String] = Array.empty)
+  extends RecognitionException(message, recognizer, input, ctx) {
+
+  /** Construct from a given [[RecognitionException]], with additional error information. */
+  def this(
+      recognitionException: RecognitionException,
+      errorClass: String,
+      messageParameters: Array[String]) =
+    this(
+      recognitionException.getMessage,
+      recognitionException.getRecognizer,
+      recognitionException.getInputStream,
+      recognitionException.getCtx match {
+        case p: ParserRuleContext => p
+        case _ => null
+      },
+      Some(errorClass),
+      messageParameters)
+}
+
+/**
+ * A [[SparkParserErrorStrategy]] extends the [[DefaultErrorStrategy]], that does special handling
+ * on errors.
+ *
+ * The intention of this class is to provide more information of these errors encountered in
+ * ANTLR parser to the downstream consumers, to be able to apply the [[SparkThrowable]] error
+ * message framework to these exceptions.
+ */
+class SparkParserErrorStrategy() extends DefaultErrorStrategy {
+  override def reportInputMismatch(recognizer: Parser, e: InputMismatchException): Unit = {
+    // Keep the original error message in ANTLR
+    val msg = "mismatched input " +
+      this.getTokenErrorDisplay(e.getOffendingToken) +
+      " expecting " +
+      e.getExpectedTokens.toString(recognizer.getVocabulary)
+
+    val exceptionWithErrorClass = new SparkRecognitionException(
+      e,
+      "PARSE_INPUT_MISMATCHED",
+      Array(getTokenErrorDisplay(e.getOffendingToken)))
+    recognizer.notifyErrorListeners(e.getOffendingToken, msg, exceptionWithErrorClass)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
index 53dc9be6c69b7..804f1edbe06fd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
@@ -200,11 +200,15 @@ trait AnalysisTest extends PlanTest {
     }
   }
 
-  protected def interceptParseException(
-      parser: String => Any)(sqlCommand: String, messages: String*): Unit = {
+  protected def interceptParseException(parser: String => Any)(
+    sqlCommand: String, messages: String*)(
+    errorClass: Option[String] = None): Unit = {
     val e = intercept[ParseException](parser(sqlCommand))
     messages.foreach { message =>
       assert(e.message.contains(message))
     }
+    if (errorClass.isDefined) {
+      assert(e.getErrorClass == errorClass.get)
+    }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index ebd9ac89d5fd5..a339e6d33f5f3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -44,7 +44,10 @@ class DDLParserSuite extends AnalysisTest {
   }
 
   private def intercept(sqlCommand: String, messages: String*): Unit =
-    interceptParseException(parsePlan)(sqlCommand, messages: _*)
+    interceptParseException(parsePlan)(sqlCommand, messages: _*)()
+
+  private def intercept(sqlCommand: String, errorClass: Option[String], messages: String*): Unit =
+    interceptParseException(parsePlan)(sqlCommand, messages: _*)(errorClass)
 
   private def parseCompare(sql: String, expected: LogicalPlan): Unit = {
     comparePlans(parsePlan(sql), expected, checkAnalysis = false)
@@ -1774,7 +1777,7 @@ class DDLParserSuite extends AnalysisTest {
         allColumns = true))
 
     intercept("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL COLUMNS key, value",
-      "mismatched input 'key' expecting {<EOF>, ';'}")
+      Some("PARSE_INPUT_MISMATCHED"), "Syntax error at or near 'key'") // expecting {<EOF>, ';'}
     intercept("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL",
       "missing 'COLUMNS' at '<EOF>'")
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
index 99051d692451b..c1c8393ce3df9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
@@ -16,6 +16,7 @@
  */
 package org.apache.spark.sql.catalyst.parser
 
+import org.apache.spark.SparkThrowableHelper
 import org.apache.spark.sql.catalyst.analysis.AnalysisTest
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 
@@ -31,26 +32,49 @@ class ErrorParserSuite extends AnalysisTest {
     assert(parsePlan(sqlCommand) == plan)
   }
 
-  def intercept(sqlCommand: String, messages: String*): Unit =
-    interceptParseException(CatalystSqlParser.parsePlan)(sqlCommand, messages: _*)
-
-  def intercept(sql: String, line: Int, startPosition: Int, stopPosition: Int,
-                messages: String*): Unit = {
+  private def interceptImpl(sql: String, messages: String*)(
+      line: Option[Int] = None,
+      startPosition: Option[Int] = None,
+      stopPosition: Option[Int] = None,
+      errorClass: Option[String] = None): Unit = {
     val e = intercept[ParseException](CatalystSqlParser.parsePlan(sql))
 
-    // Check position.
-    assert(e.line.isDefined)
-    assert(e.line.get === line)
-    assert(e.startPosition.isDefined)
-    assert(e.startPosition.get === startPosition)
-    assert(e.stop.startPosition.isDefined)
-    assert(e.stop.startPosition.get === stopPosition)
-
     // Check messages.
     val error = e.getMessage
     messages.foreach { message =>
       assert(error.contains(message))
     }
+
+    // Check position.
+    if (line.isDefined) {
+      assert(line.isDefined && startPosition.isDefined && stopPosition.isDefined)
+      assert(e.line.isDefined)
+      assert(e.line.get === line.get)
+      assert(e.startPosition.isDefined)
+      assert(e.startPosition.get === startPosition.get)
+      assert(e.stop.startPosition.isDefined)
+      assert(e.stop.startPosition.get === stopPosition.get)
+    }
+
+    // Check error class.
+    if (errorClass.isDefined) {
+      assert(e.getErrorClass == errorClass.get)
+    }
+  }
+
+  def intercept(sqlCommand: String, errorClass: Option[String], messages: String*): Unit = {
+    interceptImpl(sqlCommand, messages: _*)(errorClass = errorClass)
+  }
+
+  def intercept(
+      sql: String, line: Int, startPosition: Int, stopPosition: Int, messages: String*): Unit = {
+    interceptImpl(sql, messages: _*)(Some(line), Some(startPosition), Some(stopPosition))
+  }
+
+  def intercept(sql: String, errorClass: String, line: Int, startPosition: Int, stopPosition: Int,
+      messages: String*): Unit = {
+    interceptImpl(sql, messages: _*)(
+      Some(line), Some(startPosition), Some(stopPosition), Some(errorClass))
   }
 
   test("no viable input") {
@@ -64,10 +88,14 @@ class ErrorParserSuite extends AnalysisTest {
   }
 
   test("mismatched input") {
-    intercept("select * from r order by q from t", 1, 27, 31,
-      "mismatched input",
-      "---------------------------^^^")
-    intercept("select *\nfrom r\norder by q\nfrom t", 4, 0, 4, "mismatched input", "^^^")
+    intercept("select * from r order by q from t", "PARSE_INPUT_MISMATCHED",
+      1, 27, 31,
+      "Syntax error at or near",
+      "---------------------------^^^"
+    )
+    intercept("select *\nfrom r\norder by q\nfrom t", "PARSE_INPUT_MISMATCHED",
+      4, 0, 4,
+      "Syntax error at or near", "^^^")
   }
 
   test("semantic errors") {
@@ -77,9 +105,11 @@ class ErrorParserSuite extends AnalysisTest {
   }
 
   test("SPARK-21136: misleading error message due to problematic antlr grammar") {
-    intercept("select * from a left join_ b on a.id = b.id", "missing 'JOIN' at 'join_'")
-    intercept("select * from test where test.t is like 'test'", "mismatched input 'is' expecting")
-    intercept("SELECT * FROM test WHERE x NOT NULL", "mismatched input 'NOT' expecting")
+    intercept("select * from a left join_ b on a.id = b.id", None, "missing 'JOIN' at 'join_'")
+    intercept("select * from test where test.t is like 'test'", Some("PARSE_INPUT_MISMATCHED"),
+      SparkThrowableHelper.getMessage("PARSE_INPUT_MISMATCHED", Array("'is'")))
+    intercept("SELECT * FROM test WHERE x NOT NULL", Some("PARSE_INPUT_MISMATCHED"),
+      SparkThrowableHelper.getMessage("PARSE_INPUT_MISMATCHED", Array("'NOT'")))
   }
 
   test("hyphen in identifier - DDL tests") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 93b6ca64ca2db..754ac8b91f738 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -58,7 +58,10 @@ class ExpressionParserSuite extends AnalysisTest {
   }
 
   private def intercept(sqlCommand: String, messages: String*): Unit =
-    interceptParseException(defaultParser.parseExpression)(sqlCommand, messages: _*)
+    interceptParseException(defaultParser.parseExpression)(sqlCommand, messages: _*)()
+
+  private def intercept(sqlCommand: String, errorClass: Option[String], messages: String*): Unit =
+    interceptParseException(defaultParser.parseExpression)(sqlCommand, messages: _*)(errorClass)
 
   def assertEval(
       sqlCommand: String,
@@ -863,7 +866,8 @@ class ExpressionParserSuite extends AnalysisTest {
   test("composed expressions") {
     assertEqual("1 + r.r As q", (Literal(1) + UnresolvedAttribute("r.r")).as("q"))
     assertEqual("1 - f('o', o(bar))", Literal(1) - 'f.function("o", 'o.function('bar)))
-    intercept("1 - f('o', o(bar)) hello * world", "mismatched input '*'")
+    intercept("1 - f('o', o(bar)) hello * world", Some("PARSE_INPUT_MISMATCHED"),
+      "Syntax error at or near '*'")
   }
 
   test("SPARK-17364, fully qualified column name which starts with number") {
@@ -882,7 +886,8 @@ class ExpressionParserSuite extends AnalysisTest {
   test("SPARK-17832 function identifier contains backtick") {
     val complexName = FunctionIdentifier("`ba`r", Some("`fo`o"))
     assertEqual(complexName.quotedString, UnresolvedAttribute(Seq("`fo`o", "`ba`r")))
-    intercept(complexName.unquotedString, "mismatched input")
+    intercept(complexName.unquotedString, Some("PARSE_INPUT_MISMATCHED"),
+      "Syntax error at or near")
     // Function identifier contains continuous backticks should be treated correctly.
     val complexName2 = FunctionIdentifier("ba``r", Some("fo``o"))
     assertEqual(complexName2.quotedString, UnresolvedAttribute(Seq("fo``o", "ba``r")))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index e8088a62ecdde..70138a3e688c7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -41,7 +41,10 @@ class PlanParserSuite extends AnalysisTest {
   }
 
   private def intercept(sqlCommand: String, messages: String*): Unit =
-    interceptParseException(parsePlan)(sqlCommand, messages: _*)
+    interceptParseException(parsePlan)(sqlCommand, messages: _*)()
+
+  private def intercept(sqlCommand: String, errorClass: Option[String], messages: String*): Unit =
+    interceptParseException(parsePlan)(sqlCommand, messages: _*)(errorClass)
 
   private def cte(
       plan: LogicalPlan,
@@ -289,11 +292,11 @@ class PlanParserSuite extends AnalysisTest {
       "from a select * select * where s < 10",
       table("a").select(star()).union(table("a").where('s < 10).select(star())))
     intercept(
-      "from a select * select * from x where a.s < 10",
-      "mismatched input 'from' expecting")
+      "from a select * select * from x where a.s < 10", Some("PARSE_INPUT_MISMATCHED"),
+      "Syntax error at or near 'from'")
     intercept(
-      "from a select * from b",
-      "mismatched input 'from' expecting")
+      "from a select * from b", Some("PARSE_INPUT_MISMATCHED"),
+      "Syntax error at or near 'from'")
     assertEqual(
       "from a insert into tbl1 select * insert into tbl2 select * where s < 10",
       table("a").select(star()).insertInto("tbl1").union(
@@ -775,16 +778,12 @@ class PlanParserSuite extends AnalysisTest {
 
   test("select hint syntax") {
     // Hive compatibility: Missing parameter raises ParseException.
-    val m = intercept[ParseException] {
-      parsePlan("SELECT /*+ HINT() */ * FROM t")
-    }.getMessage
-    assert(m.contains("mismatched input"))
+    intercept("SELECT /*+ HINT() */ * FROM t", Some("PARSE_INPUT_MISMATCHED"),
+      "Syntax error at or near")
 
     // Disallow space as the delimiter.
-    val m3 = intercept[ParseException] {
-      parsePlan("SELECT /*+ INDEX(a b c) */ * from default.t")
-    }.getMessage
-    assert(m3.contains("mismatched input 'b' expecting"))
+    intercept("SELECT /*+ INDEX(a b c) */ * from default.t", Some("PARSE_INPUT_MISMATCHED"),
+      "Syntax error at or near 'b'")
 
     comparePlans(
       parsePlan("SELECT /*+ HINT */ * FROM t"),
@@ -841,7 +840,8 @@ class PlanParserSuite extends AnalysisTest {
         UnresolvedHint("REPARTITION", Seq(Literal(100)),
           table("t").select(star()))))
 
-    intercept("SELECT /*+ COALESCE(30 + 50) */ * FROM t", "mismatched input")
+    intercept("SELECT /*+ COALESCE(30 + 50) */ * FROM t", Some("PARSE_INPUT_MISMATCHED"),
+      "Syntax error at or near")
 
     comparePlans(
       parsePlan("SELECT /*+ REPARTITION(c) */ * FROM t"),
@@ -965,8 +965,10 @@ class PlanParserSuite extends AnalysisTest {
       )
     }
 
-    intercept("select ltrim(both 'S' from 'SS abc S'", "mismatched input 'from' expecting {')'")
-    intercept("select rtrim(trailing 'S' from 'SS abc S'", "mismatched input 'from' expecting {')'")
+    intercept("select ltrim(both 'S' from 'SS abc S'", Some("PARSE_INPUT_MISMATCHED"),
+      "Syntax error at or near 'from'") // expecting {')'
+    intercept("select rtrim(trailing 'S' from 'SS abc S'", Some("PARSE_INPUT_MISMATCHED"),
+      "Syntax error at or near 'from'") //  expecting {')'
 
     assertTrimPlans(
       "SELECT TRIM(BOTH '@$%&( )abc' FROM '@ $ % & ()abc ' )",
@@ -1079,7 +1081,7 @@ class PlanParserSuite extends AnalysisTest {
     val m1 = intercept[ParseException] {
       parsePlan("CREATE VIEW testView AS INSERT INTO jt VALUES(1, 1)")
     }.getMessage
-    assert(m1.contains("mismatched input 'INSERT' expecting"))
+    assert(m1.contains("Syntax error at or near 'INSERT'"))
     // Multi insert query
     val m2 = intercept[ParseException] {
       parsePlan(
@@ -1089,11 +1091,11 @@ class PlanParserSuite extends AnalysisTest {
           |INSERT INTO tbl2 SELECT * WHERE jt.id > 4
         """.stripMargin)
     }.getMessage
-    assert(m2.contains("mismatched input 'INSERT' expecting"))
+    assert(m2.contains("Syntax error at or near 'INSERT'"))
     val m3 = intercept[ParseException] {
       parsePlan("ALTER VIEW testView AS INSERT INTO jt VALUES(1, 1)")
     }.getMessage
-    assert(m3.contains("mismatched input 'INSERT' expecting"))
+    assert(m3.contains("Syntax error at or near 'INSERT'"))
     // Multi insert query
     val m4 = intercept[ParseException] {
       parsePlan(
@@ -1104,7 +1106,7 @@ class PlanParserSuite extends AnalysisTest {
         """.stripMargin
       )
     }.getMessage
-    assert(m4.contains("mismatched input 'INSERT' expecting"))
+    assert(m4.contains("Syntax error at or near 'INSERT'"))
   }
 
   test("Invalid insert constructs in the query") {
@@ -1115,7 +1117,7 @@ class PlanParserSuite extends AnalysisTest {
     val m2 = intercept[ParseException] {
       parsePlan("SELECT * FROM S WHERE C1 IN (INSERT INTO T VALUES (2))")
     }.getMessage
-    assert(m2.contains("mismatched input 'IN' expecting"))
+    assert(m2.contains("Syntax error at or near 'IN'"))
   }
 
   test("relation in v2 catalog") {
diff --git a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
index 2199fc0312d25..322b24877a57e 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
@@ -112,7 +112,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-mismatched input 'desc_temp1' expecting {<EOF>, ';'}(line 1, pos 21)
+Syntax error at or near 'desc_temp1'(line 1, pos 21)
 
 == SQL ==
 DESCRIBE INSERT INTO desc_temp1 values (1, 'val1')
@@ -126,7 +126,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-mismatched input 'desc_temp1' expecting {<EOF>, ';'}(line 1, pos 21)
+Syntax error at or near 'desc_temp1'(line 1, pos 21)
 
 == SQL ==
 DESCRIBE INSERT INTO desc_temp1 SELECT * FROM desc_temp2
@@ -143,7 +143,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-mismatched input 'insert' expecting {'MAP', 'REDUCE', 'SELECT'}(line 3, pos 5)
+Syntax error at or near 'insert'(line 3, pos 5)
 
 == SQL ==
 DESCRIBE
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out
index a3f7b35fa27ed..99b6ea78cace1 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out
@@ -80,7 +80,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-mismatched input 'SELECT' expecting {<EOF>, ';'}(line 1, pos 39)
+Syntax error at or near 'SELECT'(line 1, pos 39)
 
 == SQL ==
 SELECT 1 AS three UNION SELECT 2 UNION SELECT 3 ORDER BY 1
@@ -94,7 +94,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-mismatched input 'SELECT' expecting {<EOF>, ';'}(line 1, pos 37)
+Syntax error at or near 'SELECT'(line 1, pos 37)
 
 == SQL ==
 SELECT 1 AS two UNION SELECT 2 UNION SELECT 2 ORDER BY 1
@@ -171,7 +171,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-mismatched input 'SELECT' expecting {<EOF>, ';'}(line 1, pos 41)
+Syntax error at or near 'SELECT'(line 1, pos 41)
 
 == SQL ==
 SELECT 1.1 AS three UNION SELECT 2 UNION SELECT 3 ORDER BY 1
@@ -185,7 +185,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-mismatched input 'SELECT' expecting {<EOF>, ';'}(line 1, pos 47)
+Syntax error at or near 'SELECT'(line 1, pos 47)
 
 == SQL ==
 SELECT double(1.1) AS two UNION SELECT 2 UNION SELECT double(2.0) ORDER BY 1
@@ -381,7 +381,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-mismatched input 'SELECT' expecting {')', ',', 'CLUSTER', 'DISTRIBUTE', 'EXCEPT', 'FROM', 'GROUP', 'HAVING', 'INTERSECT', 'LATERAL', 'LIMIT', 'ORDER', 'MINUS', 'SORT', 'UNION', 'WHERE', 'WINDOW', '-'}(line 1, pos 20)
+Syntax error at or near 'SELECT'(line 1, pos 20)
 
 == SQL ==
 (SELECT 1,2,3 UNION SELECT 4,5,6) INTERSECT SELECT 4,5,6
@@ -395,7 +395,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-mismatched input 'SELECT' expecting {')', ',', 'CLUSTER', 'DISTRIBUTE', 'EXCEPT', 'FROM', 'GROUP', 'HAVING', 'INTERSECT', 'LATERAL', 'LIMIT', 'ORDER', 'MINUS', 'SORT', 'UNION', 'WHERE', 'WINDOW', '-'}(line 1, pos 20)
+Syntax error at or near 'SELECT'(line 1, pos 20)
 
 == SQL ==
 (SELECT 1,2,3 UNION SELECT 4,5,6 ORDER BY 1,2) INTERSECT SELECT 4,5,6
@@ -409,7 +409,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-mismatched input 'SELECT' expecting {')', ',', 'CLUSTER', 'DISTRIBUTE', 'EXCEPT', 'FROM', 'GROUP', 'HAVING', 'INTERSECT', 'LATERAL', 'LIMIT', 'ORDER', 'MINUS', 'SORT', 'UNION', 'WHERE', 'WINDOW', '-'}(line 1, pos 20)
+Syntax error at or near 'SELECT'(line 1, pos 20)
 
 == SQL ==
 (SELECT 1,2,3 UNION SELECT 4,5,6) EXCEPT SELECT 4,5,6
@@ -423,7 +423,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-mismatched input 'SELECT' expecting {')', ',', 'CLUSTER', 'DISTRIBUTE', 'EXCEPT', 'FROM', 'GROUP', 'HAVING', 'INTERSECT', 'LATERAL', 'LIMIT', 'ORDER', 'MINUS', 'SORT', 'UNION', 'WHERE', 'WINDOW', '-'}(line 1, pos 20)
+Syntax error at or near 'SELECT'(line 1, pos 20)
 
 == SQL ==
 (SELECT 1,2,3 UNION SELECT 4,5,6 ORDER BY 1,2) EXCEPT SELECT 4,5,6
@@ -728,7 +728,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-mismatched input 'SELECT' expecting {<EOF>, ';'}(line 1, pos 44)
+Syntax error at or near 'SELECT'(line 1, pos 44)
 
 == SQL ==
 SELECT cast('3.4' as decimal(38, 18)) UNION SELECT 'foo'
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
index a182f9e82c61c..fc19471bb5b32 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
@@ -329,7 +329,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-mismatched input 'BY' expecting {')', ',', '-'}(line 1, pos 33)
+Syntax error at or near 'BY'(line 1, pos 33)
 
 == SQL ==
 SELECT * FROM rank() OVER (ORDER BY random())
diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
index 139004345accb..b771a54f3f8f9 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
@@ -168,7 +168,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-mismatched input '<EOF>' expecting {'FROM', 'IN', 'LIKE'}(line 1, pos 19)
+Syntax error at or near '<EOF>'(line 1, pos 19)
 
 == SQL ==
 SHOW TABLE EXTENDED
@@ -193,7 +193,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-mismatched input 'PARTITION' expecting {'FROM', 'IN', 'LIKE'}(line 1, pos 20)
+Syntax error at or near 'PARTITION'(line 1, pos 20)
 
 == SQL ==
 SHOW TABLE EXTENDED PARTITION(c='Us', d=1)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index e26be63b10955..fb8f2ea6d8db2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -47,7 +47,7 @@ class SparkSqlParserSuite extends AnalysisTest {
   }
 
   private def intercept(sqlCommand: String, messages: String*): Unit =
-    interceptParseException(parser.parsePlan)(sqlCommand, messages: _*)
+    interceptParseException(parser.parsePlan)(sqlCommand, messages: _*)()
 
   test("Checks if SET/RESET can parse all the configurations") {
     // Force to build static SQL configurations
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceParserSuite.scala
index c3cb96814a506..69a208b942429 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceParserSuite.scala
@@ -107,5 +107,5 @@ class CreateNamespaceParserSuite extends AnalysisTest {
   }
 
   private def intercept(sqlCommand: String, messages: String*): Unit =
-    interceptParseException(parsePlan)(sqlCommand, messages: _*)
+    interceptParseException(parsePlan)(sqlCommand, messages: _*)()
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
index 53d643d3ea901..1053cb9f2a772 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -46,7 +46,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
   }
 
   private def intercept(sqlCommand: String, messages: String*): Unit =
-    interceptParseException(parser.parsePlan)(sqlCommand, messages: _*)
+    interceptParseException(parser.parsePlan)(sqlCommand, messages: _*)()
 
   private def compareTransformQuery(sql: String, expected: LogicalPlan): Unit = {
     val plan = parser.parsePlan(sql).asInstanceOf[ScriptTransformation].copy(ioschema = null)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 4a8defdad4105..5399f9674377a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -1750,7 +1750,7 @@ class PlanResolutionSuite extends AnalysisTest {
 
     interceptParseException(parsePlan)(
       "CREATE TABLE my_tab(a: INT COMMENT 'test', b: STRING)",
-      "extraneous input ':'")
+      "extraneous input ':'")()
   }
 
   test("create hive table - table file format") {
@@ -1875,7 +1875,7 @@ class PlanResolutionSuite extends AnalysisTest {
 
   test("Duplicate clauses - create hive table") {
     def intercept(sqlCommand: String, messages: String*): Unit =
-      interceptParseException(parsePlan)(sqlCommand, messages: _*)
+      interceptParseException(parsePlan)(sqlCommand, messages: _*)()
 
     def createTableHeader(duplicateClause: String): String = {
       s"CREATE TABLE my_tab(a INT, b STRING) STORED AS parquet $duplicateClause $duplicateClause"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtilsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtilsSuite.scala
index 7d277c1ffaffe..cfcddbaf0d92d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtilsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtilsSuite.scala
@@ -63,6 +63,6 @@ class JdbcUtilsSuite extends SparkFunSuite {
       JdbcUtils.getCustomSchema(tableSchema, "c3 DATE. C2 STRING", caseInsensitive) ===
         StructType(Seq(StructField("c3", DateType, false), StructField("C2", StringType, false)))
     }
-    assert(mismatchedInput.getMessage.contains("mismatched input '.' expecting"))
+    assert(mismatchedInput.getMessage.contains("Syntax error at or near '.'"))
   }
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 4af051746b96e..74288ca0bc170 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -630,7 +630,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
   test("SPARK-37555: spark-sql should pass last unclosed comment to backend") {
     runCliWithin(2.minute)(
       // Only unclosed comment.
-      "/* SELECT /*+ HINT() 4; */;".stripMargin -> "mismatched input ';'",
+      "/* SELECT /*+ HINT() 4; */;".stripMargin -> "Syntax error at or near ';'",
       // Unclosed nested bracketed comment.
       "/* SELECT /*+ HINT() 4; */ SELECT 1;".stripMargin -> "1",
       // Unclosed comment with query.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
index 177c227595162..9e29386475232 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
@@ -717,7 +717,7 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
            """.stripMargin)
       }.getMessage
 
-      assert(e.contains("mismatched input 'ROW'"))
+      assert(e.contains("Syntax error at or near 'ROW'"))
     }
   }
 
@@ -739,7 +739,7 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
            """.stripMargin)
       }.getMessage
 
-      assert(e.contains("mismatched input 'ROW'"))
+      assert(e.contains("Syntax error at or near 'ROW'"))
     }
   }
 

From e80d979f83cdd4ea3ecd2b84bbbbe180de96e5a1 Mon Sep 17 00:00:00 2001
From: Pablo Langa <soypab@gmail.com>
Date: Tue, 8 Mar 2022 10:52:53 +0800
Subject: [PATCH 417/513] [SPARK-37895][SQL] Filter push down column with
 quoted columns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

The problem happens when you have a column that is quoted because it has special characters.

```
select view1.`Имя1` , view1.`Имя2`, view2.`Имя3`
from view1 left join  view2
on view1.`Имя2`=view2.`Имя4`
```

It result in the following JDBC push down

```
SELECT "Имя3","Имя4" FROM "public"."tab2"  WHERE ("`Имя4`" IS NOT NULL)
```

This PR fix this and in the push down query only remains the dialect quote

```
SELECT "Имя3","Имя4" FROM "public"."tab2"  WHERE ("Имя4" IS NOT NULL)
```

### Why are the changes needed?

Fix this bug in the JDBC push down

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit testing

Closes #35726 from planga82/bugfix/SPARK37895_push_down_quoted_identifiers.

Authored-by: Pablo Langa <soypab@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/datasources/jdbc/JDBCRDD.scala  | 11 +++-
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 52 +++++++++++--------
 .../apache/spark/sql/jdbc/JDBCV2Suite.scala   | 15 +++++-
 3 files changed, 53 insertions(+), 25 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index baee53847a5a4..3e3b15af81140 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -24,8 +24,10 @@ import scala.util.control.NonFatal
 import org.apache.spark.{InterruptibleIterator, Partition, SparkContext, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.connector.expressions.SortOrder
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
 import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects}
 import org.apache.spark.sql.sources._
@@ -97,7 +99,14 @@ object JDBCRDD extends Logging {
    * Returns None for an unhandled filter.
    */
   def compileFilter(f: Filter, dialect: JdbcDialect): Option[String] = {
-    def quote(colName: String): String = dialect.quoteIdentifier(colName)
+
+    def quote(colName: String): String = {
+      val nameParts = SparkSession.active.sessionState.sqlParser.parseMultipartIdentifier(colName)
+      if(nameParts.length > 1) {
+        throw QueryCompilationErrors.commandNotSupportNestedColumnError("Filter push down", colName)
+      }
+      dialect.quoteIdentifier(nameParts.head)
+    }
 
     Option(f match {
       case EqualTo(attr, value) => s"${quote(attr)} = ${dialect.compileValue(value)}"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 18e1f8c1aa67f..d32e958c7ca2b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -776,30 +776,36 @@ class JDBCSuite extends QueryTest
     val compileFilter = PrivateMethod[Option[String]](Symbol("compileFilter"))
     def doCompileFilter(f: Filter): String =
       JDBCRDD invokePrivate compileFilter(f, JdbcDialects.get("jdbc:")) getOrElse("")
-    assert(doCompileFilter(EqualTo("col0", 3)) === """"col0" = 3""")
-    assert(doCompileFilter(Not(EqualTo("col1", "abc"))) === """(NOT ("col1" = 'abc'))""")
-    assert(doCompileFilter(And(EqualTo("col0", 0), EqualTo("col1", "def")))
-      === """("col0" = 0) AND ("col1" = 'def')""")
-    assert(doCompileFilter(Or(EqualTo("col0", 2), EqualTo("col1", "ghi")))
-      === """("col0" = 2) OR ("col1" = 'ghi')""")
-    assert(doCompileFilter(LessThan("col0", 5)) === """"col0" < 5""")
-    assert(doCompileFilter(LessThan("col3",
-      Timestamp.valueOf("1995-11-21 00:00:00.0"))) === """"col3" < '1995-11-21 00:00:00.0'""")
-    assert(doCompileFilter(LessThan("col4", Date.valueOf("1983-08-04")))
-      === """"col4" < '1983-08-04'""")
-    assert(doCompileFilter(LessThanOrEqual("col0", 5)) === """"col0" <= 5""")
-    assert(doCompileFilter(GreaterThan("col0", 3)) === """"col0" > 3""")
-    assert(doCompileFilter(GreaterThanOrEqual("col0", 3)) === """"col0" >= 3""")
-    assert(doCompileFilter(In("col1", Array("jkl"))) === """"col1" IN ('jkl')""")
-    assert(doCompileFilter(In("col1", Array.empty)) ===
-      """CASE WHEN "col1" IS NULL THEN NULL ELSE FALSE END""")
-    assert(doCompileFilter(Not(In("col1", Array("mno", "pqr"))))
-      === """(NOT ("col1" IN ('mno', 'pqr')))""")
-    assert(doCompileFilter(IsNull("col1")) === """"col1" IS NULL""")
-    assert(doCompileFilter(IsNotNull("col1")) === """"col1" IS NOT NULL""")
-    assert(doCompileFilter(And(EqualNullSafe("col0", "abc"), EqualTo("col1", "def")))
-      === """((NOT ("col0" != 'abc' OR "col0" IS NULL OR 'abc' IS NULL) """
+    Seq(("col0", "col1"), ("`col0`", "`col1`")).foreach { case(col0, col1) =>
+      assert(doCompileFilter(EqualTo(col0, 3)) === """"col0" = 3""")
+      assert(doCompileFilter(Not(EqualTo(col1, "abc"))) === """(NOT ("col1" = 'abc'))""")
+      assert(doCompileFilter(And(EqualTo(col0, 0), EqualTo(col1, "def")))
+        === """("col0" = 0) AND ("col1" = 'def')""")
+      assert(doCompileFilter(Or(EqualTo(col0, 2), EqualTo(col1, "ghi")))
+        === """("col0" = 2) OR ("col1" = 'ghi')""")
+      assert(doCompileFilter(LessThan(col0, 5)) === """"col0" < 5""")
+      assert(doCompileFilter(LessThan(col0,
+        Timestamp.valueOf("1995-11-21 00:00:00.0"))) === """"col0" < '1995-11-21 00:00:00.0'""")
+      assert(doCompileFilter(LessThan(col0, Date.valueOf("1983-08-04")))
+        === """"col0" < '1983-08-04'""")
+      assert(doCompileFilter(LessThanOrEqual(col0, 5)) === """"col0" <= 5""")
+      assert(doCompileFilter(GreaterThan(col0, 3)) === """"col0" > 3""")
+      assert(doCompileFilter(GreaterThanOrEqual(col0, 3)) === """"col0" >= 3""")
+      assert(doCompileFilter(In(col1, Array("jkl"))) === """"col1" IN ('jkl')""")
+      assert(doCompileFilter(In(col1, Array.empty)) ===
+        """CASE WHEN "col1" IS NULL THEN NULL ELSE FALSE END""")
+      assert(doCompileFilter(Not(In(col1, Array("mno", "pqr"))))
+        === """(NOT ("col1" IN ('mno', 'pqr')))""")
+      assert(doCompileFilter(IsNull(col1)) === """"col1" IS NULL""")
+      assert(doCompileFilter(IsNotNull(col1)) === """"col1" IS NOT NULL""")
+      assert(doCompileFilter(And(EqualNullSafe(col0, "abc"), EqualTo(col1, "def")))
+        === """((NOT ("col0" != 'abc' OR "col0" IS NULL OR 'abc' IS NULL) """
         + """OR ("col0" IS NULL AND 'abc' IS NULL))) AND ("col1" = 'def')""")
+    }
+    val e = intercept[AnalysisException] {
+      doCompileFilter(EqualTo("col0.nested", 3))
+    }.getMessage
+    assert(e.contains("Filter push down does not support nested column: col0.nested"))
   }
 
   test("Dialect unregister") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
index 3f90fb47efb28..85ccf828873d1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
@@ -92,6 +92,10 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       // scalastyle:on
       conn.prepareStatement("INSERT INTO \"test\".\"person\" VALUES (1)").executeUpdate()
       conn.prepareStatement("INSERT INTO \"test\".\"person\" VALUES (2)").executeUpdate()
+      conn.prepareStatement(
+        """CREATE TABLE "test"."view1" ("|col1" INTEGER, "|col2" INTEGER)""").executeUpdate()
+      conn.prepareStatement(
+        """CREATE TABLE "test"."view2" ("|col1" INTEGER, "|col3" INTEGER)""").executeUpdate()
     }
   }
 
@@ -317,7 +321,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
   test("show tables") {
     checkAnswer(sql("SHOW TABLES IN h2.test"),
       Seq(Row("test", "people", false), Row("test", "empty_table", false),
-        Row("test", "employee", false), Row("test", "dept", false), Row("test", "person", false)))
+        Row("test", "employee", false), Row("test", "dept", false), Row("test", "person", false),
+        Row("test", "view1", false), Row("test", "view2", false)))
   }
 
   test("SQL API: create table as select") {
@@ -1019,4 +1024,12 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       Row("david", 10000.00, 10000.000000, 1),
       Row("jen", 12000.00, 12000.000000, 1)))
   }
+
+  test("SPARK-37895: JDBC push down with delimited special identifiers") {
+    val df = sql(
+      """SELECT h2.test.view1.`|col1`, h2.test.view1.`|col2`, h2.test.view2.`|col3`
+        |FROM h2.test.view1 LEFT JOIN h2.test.view2
+        |ON h2.test.view1.`|col1` = h2.test.view2.`|col1`""".stripMargin)
+    checkAnswer(df, Seq.empty[Row])
+  }
 }

From e5ba617111ce0d0fc1ff7a069081d2d9009d862f Mon Sep 17 00:00:00 2001
From: Jiaan Geng <beliefer@163.com>
Date: Tue, 8 Mar 2022 16:38:23 +0800
Subject: [PATCH 418/513] [SPARK-38361][SQL] Add factory method `getConnection`
 into `JDBCDialect`

### What changes were proposed in this pull request?
At present, the parameter of the factory method for obtaining JDBC connection is empty because the JDBC URL of some databases is fixed and unique.
However, for databases such as ClickHouse, connection is related to the shard node.
So I think the parameter form of `getConnection: Partition = > Connection` is more general.

This PR adds factory method `getConnection` into `JDBCDialect` according to https://github.com/apache/spark/pull/35696#issuecomment-1058060107.

### Why are the changes needed?
Make factory method `getConnection` more general.

### Does this PR introduce _any_ user-facing change?
'No'.
Just inner change.

### How was this patch tested?
Exists test.

Closes #35727 from beliefer/SPARK-38361_new.

Authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/datasources/jdbc/JDBCRDD.scala  |  8 ++---
 .../jdbc/JdbcRelationProvider.scala           |  5 +--
 .../datasources/jdbc/JdbcUtils.scala          | 31 +++----------------
 .../jdbc/connection/ConnectionProvider.scala  |  2 +-
 .../v2/jdbc/JDBCWriteBuilder.scala            |  4 ++-
 .../apache/spark/sql/jdbc/JdbcDialects.scala  | 28 +++++++++++++++--
 6 files changed, 42 insertions(+), 36 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index 3e3b15af81140..3cd2e03828212 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -62,7 +62,7 @@ object JDBCRDD extends Logging {
 
   def getQueryOutputSchema(
       query: String, options: JDBCOptions, dialect: JdbcDialect): StructType = {
-    val conn: Connection = JdbcUtils.createConnectionFactory(options)()
+    val conn: Connection = dialect.createConnectionFactory(options)(-1)
     try {
       val statement = conn.prepareStatement(query)
       try {
@@ -191,7 +191,7 @@ object JDBCRDD extends Logging {
     }
     new JDBCRDD(
       sc,
-      JdbcUtils.createConnectionFactory(options),
+      dialect.createConnectionFactory(options),
       outputSchema.getOrElse(pruneSchema(schema, requiredColumns)),
       quotedColumns,
       filters,
@@ -213,7 +213,7 @@ object JDBCRDD extends Logging {
  */
 private[jdbc] class JDBCRDD(
     sc: SparkContext,
-    getConnection: () => Connection,
+    getConnection: Int => Connection,
     schema: StructType,
     columns: Array[String],
     filters: Array[Filter],
@@ -327,7 +327,7 @@ private[jdbc] class JDBCRDD(
 
     val inputMetrics = context.taskMetrics().inputMetrics
     val part = thePart.asInstanceOf[JDBCPartition]
-    conn = getConnection()
+    conn = getConnection(part.idx)
     val dialect = JdbcDialects.get(url)
     import scala.collection.JavaConverters._
     dialect.beforeFetch(conn, options.asProperties.asScala.toMap)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcRelationProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcRelationProvider.scala
index d953ba45cc2fb..2760c7ac3019c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcRelationProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcRelationProvider.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources.jdbc
 import org.apache.spark.sql.{DataFrame, SaveMode, SQLContext}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils._
+import org.apache.spark.sql.jdbc.JdbcDialects
 import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, DataSourceRegister, RelationProvider}
 
 class JdbcRelationProvider extends CreatableRelationProvider
@@ -45,8 +46,8 @@ class JdbcRelationProvider extends CreatableRelationProvider
       df: DataFrame): BaseRelation = {
     val options = new JdbcOptionsInWrite(parameters)
     val isCaseSensitive = sqlContext.conf.caseSensitiveAnalysis
-
-    val conn = JdbcUtils.createConnectionFactory(options)()
+    val dialect = JdbcDialects.get(options.url)
+    val conn = dialect.createConnectionFactory(options)(-1)
     try {
       val tableExists = JdbcUtils.tableExists(conn, options)
       if (tableExists) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index ed167c07756e2..6c67a22b8e3ce 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.datasources.jdbc
 
-import java.sql.{Connection, Driver, JDBCType, PreparedStatement, ResultSet, ResultSetMetaData, SQLException}
+import java.sql.{Connection, JDBCType, PreparedStatement, ResultSet, ResultSetMetaData, SQLException}
 import java.time.{Instant, LocalDate}
 import java.util
 import java.util.Locale
@@ -43,7 +43,6 @@ import org.apache.spark.sql.connector.catalog.TableChange
 import org.apache.spark.sql.connector.catalog.index.{SupportsIndex, TableIndex}
 import org.apache.spark.sql.connector.expressions.NamedReference
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
-import org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider
 import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects, JdbcType}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
@@ -54,24 +53,6 @@ import org.apache.spark.util.NextIterator
  * Util functions for JDBC tables.
  */
 object JdbcUtils extends Logging with SQLConfHelper {
-  /**
-   * Returns a factory for creating connections to the given JDBC URL.
-   *
-   * @param options - JDBC options that contains url, table and other information.
-   * @throws IllegalArgumentException if the driver could not open a JDBC connection.
-   */
-  def createConnectionFactory(options: JDBCOptions): () => Connection = {
-    val driverClass: String = options.driverClass
-    () => {
-      DriverRegistry.register(driverClass)
-      val driver: Driver = DriverRegistry.get(driverClass)
-      val connection =
-        ConnectionProvider.create(driver, options.parameters, options.connectionProviderName)
-      require(connection != null,
-        s"The driver could not open a JDBC connection. Check the URL: ${options.url}")
-      connection
-    }
-  }
 
   /**
    * Returns true if the table already exists in the JDBC database.
@@ -651,7 +632,6 @@ object JdbcUtils extends Logging with SQLConfHelper {
    * updated even with error if it doesn't support transaction, as there're dirty outputs.
    */
   def savePartition(
-      getConnection: () => Connection,
       table: String,
       iterator: Iterator[Row],
       rddSchema: StructType,
@@ -667,7 +647,7 @@ object JdbcUtils extends Logging with SQLConfHelper {
 
     val outMetrics = TaskContext.get().taskMetrics().outputMetrics
 
-    val conn = getConnection()
+    val conn = dialect.createConnectionFactory(options)(-1)
     var committed = false
 
     var finalIsolationLevel = Connection.TRANSACTION_NONE
@@ -874,7 +854,6 @@ object JdbcUtils extends Logging with SQLConfHelper {
     val table = options.table
     val dialect = JdbcDialects.get(url)
     val rddSchema = df.schema
-    val getConnection: () => Connection = createConnectionFactory(options)
     val batchSize = options.batchSize
     val isolationLevel = options.isolationLevel
 
@@ -886,8 +865,7 @@ object JdbcUtils extends Logging with SQLConfHelper {
       case _ => df
     }
     repartitionedDF.rdd.foreachPartition { iterator => savePartition(
-      getConnection, table, iterator, rddSchema, insertStmt, batchSize, dialect, isolationLevel,
-      options)
+      table, iterator, rddSchema, insertStmt, batchSize, dialect, isolationLevel, options)
     }
   }
 
@@ -1177,7 +1155,8 @@ object JdbcUtils extends Logging with SQLConfHelper {
   }
 
   def withConnection[T](options: JDBCOptions)(f: Connection => T): T = {
-    val conn = createConnectionFactory(options)()
+    val dialect = JdbcDialects.get(options.url)
+    val conn = dialect.createConnectionFactory(options)(-1)
     try {
       f(conn)
     } finally {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
index 84a62693a6e7d..0d8c80c9fc15c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
@@ -104,4 +104,4 @@ protected abstract class ConnectionProviderBase extends Logging {
   }
 }
 
-private[jdbc] object ConnectionProvider extends ConnectionProviderBase
+private[sql] object ConnectionProvider extends ConnectionProviderBase
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCWriteBuilder.scala
index 0e6c72c2cc331..7449f66ee020f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCWriteBuilder.scala
@@ -20,6 +20,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.connector.write._
 import org.apache.spark.sql.execution.datasources.jdbc.{JdbcOptionsInWrite, JdbcUtils}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.jdbc.JdbcDialects
 import org.apache.spark.sql.sources.InsertableRelation
 import org.apache.spark.sql.types.StructType
 
@@ -37,7 +38,8 @@ case class JDBCWriteBuilder(schema: StructType, options: JdbcOptionsInWrite) ext
     override def toInsertableRelation: InsertableRelation = (data: DataFrame, _: Boolean) => {
       // TODO (SPARK-32595): do truncate and append atomically.
       if (isTruncate) {
-        val conn = JdbcUtils.createConnectionFactory(options)()
+        val dialect = JdbcDialects.get(options.url)
+        val conn = dialect.createConnectionFactory(options)(-1)
         JdbcUtils.truncateTable(conn, options)
       }
       JdbcUtils.saveTable(data, Some(schema), SQLConf.get.caseSensitiveAnalysis, options)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index a7e0ec8b72a7c..c9dcbb2706cd4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.jdbc
 
-import java.sql.{Connection, Date, Statement, Timestamp}
+import java.sql.{Connection, Date, Driver, Statement, Timestamp}
 import java.time.{Instant, LocalDate}
 import java.util
 
@@ -36,7 +36,8 @@ import org.apache.spark.sql.connector.expressions.{Expression, FieldReference, N
 import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Avg, Count, CountStar, Max, Min, Sum}
 import org.apache.spark.sql.connector.util.V2ExpressionSQLBuilder
 import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
+import org.apache.spark.sql.execution.datasources.jdbc.{DriverRegistry, JDBCOptions, JdbcUtils}
+import org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider
 import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -100,6 +101,29 @@ abstract class JdbcDialect extends Serializable with Logging{
    */
   def getJDBCType(dt: DataType): Option[JdbcType] = None
 
+  /**
+   * Returns a factory for creating connections to the given JDBC URL.
+   * In general, creating a connection has nothing to do with JDBC partition id.
+   * But sometimes it is needed, such as a database with multiple shard nodes.
+   * @param options - JDBC options that contains url, table and other information.
+   * @return The factory method for creating JDBC connections with the RDD partition ID. -1 means
+             the connection is being created at the driver side.
+   * @throws IllegalArgumentException if the driver could not open a JDBC connection.
+   */
+  @Since("3.3.0")
+  def createConnectionFactory(options: JDBCOptions): Int => Connection = {
+    val driverClass: String = options.driverClass
+    (partitionId: Int) => {
+      DriverRegistry.register(driverClass)
+      val driver: Driver = DriverRegistry.get(driverClass)
+      val connection =
+        ConnectionProvider.create(driver, options.parameters, options.connectionProviderName)
+      require(connection != null,
+        s"The driver could not open a JDBC connection. Check the URL: ${options.url}")
+      connection
+    }
+  }
+
   /**
    * Quotes the identifier. This is used to put quotes around the identifier in case the column
    * name is a reserved keyword, or in case it contains characters that require quotes (e.g. space).

From 4df8512b11dc9cc3a179fd5ccedf91af1f3fc6ee Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Tue, 8 Mar 2022 16:39:52 +0800
Subject: [PATCH 419/513] [SPARK-37283][SQL][FOLLOWUP] Avoid trying to store a
 table which contains timestamp_ntz types in Hive compatible format

### What changes were proposed in this pull request?

This is PR is to avoid trying to store a table which contains `timestamp_ntz` types in Hive compatible format.
In the current master, Spark tries to store such a table in Hive compatible format first, but it doesn't support `timestamp_ntz`.
As a result warning is logged like as follows though it's finally stored in Spark specific format.
```
CREATE TABLE tbl1(a TIMESTAMP_NTZ) USING Parquet
...
21/11/22 21:57:18 WARN HiveExternalCatalog: Could not persist `default`.`tbl1` in a Hive compatible way. Persisting it into Hive metastore in Spark SQL specific format.
org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.IllegalArgumentException: Error: type expected at the position 0 of 'timestamp_ntz' but 'timestamp_ntz' is found.
        at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:869)
        at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:874)
        at org.apache.spark.sql.hive.client.Shim_v0_12.createTable(HiveShim.scala:614)
        at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$createTable$1(HiveClientImpl.scala:554)
        at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
        at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:304)
        at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:235)
        at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:234)
        at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:284)
        at org.apache.spark.sql.hive.client.HiveClientImpl.createTable(HiveClientImpl.scala:552)
        at org.apache.spark.sql.hive.HiveExternalCatalog.saveTableIntoHive(HiveExternalCatalog.scala:506)
        at org.apache.spark.sql.hive.HiveExternalCatalog.createDataSourceTable(HiveExternalCatalog.scala:404)
        at org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$createTable$1(HiveExternalCatalog.scala:273)
...
```
### Why are the changes needed?

To fix the confusing behavior.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Modified the test added in #34551

Closes #34683 from sarutak/fix-timestampntz-hive-type.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/hive/HiveExternalCatalog.scala   | 3 ++-
 .../apache/spark/sql/hive/MetastoreDataSourcesSuite.scala | 8 +++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index e23f1fe27bbd9..fefa032d35105 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -46,7 +46,7 @@ import org.apache.spark.sql.execution.datasources.{PartitioningUtils, SourceOpti
 import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.HiveSerDe
 import org.apache.spark.sql.internal.StaticSQLConf._
-import org.apache.spark.sql.types.{AnsiIntervalType, ArrayType, DataType, MapType, StructType}
+import org.apache.spark.sql.types.{AnsiIntervalType, ArrayType, DataType, MapType, StructType, TimestampNTZType}
 
 /**
  * A persistent implementation of the system catalog using Hive.
@@ -1425,6 +1425,7 @@ object HiveExternalCatalog {
 
   private[spark] def isHiveCompatibleDataType(dt: DataType): Boolean = dt match {
     case _: AnsiIntervalType => false
+    case _: TimestampNTZType => false
     case s: StructType => s.forall(f => isHiveCompatibleDataType(f.dataType))
     case a: ArrayType => isHiveCompatibleDataType(a.elementType)
     case m: MapType =>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index dbe1b1234da99..16b5d6cf1bf8b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -1429,14 +1429,15 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
             |  c10 ARRAY<INTERVAL YEAR>,
             |  c11 MAP<INT, STRING>,
             |  c12 MAP<INT, INTERVAL DAY>,
-            |  c13 MAP<INTERVAL MINUTE TO SECOND, STRING>
+            |  c13 MAP<INTERVAL MINUTE TO SECOND, STRING>,
+            |  c14 TIMESTAMP_NTZ
             |) USING Parquet""".stripMargin)
       }
       val expectedMsg = "Hive incompatible types found: interval day to minute, " +
         "interval year to month, interval hour, interval month, " +
         "struct<a:int,b:interval hour to second>, " +
         "array<interval year>, map<int,interval day>, " +
-        "map<interval minute to second,string>. " +
+        "map<interval minute to second,string>, timestamp_ntz. " +
         "Persisting data source table `default`.`t` into Hive metastore in " +
         "Spark SQL specific format, which is NOT compatible with Hive."
       val actualMessages = logAppender.loggingEvents
@@ -1467,7 +1468,8 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
           StructField("c10", ArrayType(YearMonthIntervalType(YEAR))),
           StructField("c11", MapType(IntegerType, StringType)),
           StructField("c12", MapType(IntegerType, DayTimeIntervalType(DAY))),
-          StructField("c13", MapType(DayTimeIntervalType(MINUTE, SECOND), StringType)))))
+          StructField("c13", MapType(DayTimeIntervalType(MINUTE, SECOND), StringType)),
+          StructField("c14", TimestampNTZType))))
     }
   }
 

From 9e1d00c521964a6bbf7e0126fd6dcf0020509420 Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Tue, 8 Mar 2022 16:51:39 +0800
Subject: [PATCH 420/513] [SPARK-38406][SQL] Improve perfermance of
 ShufflePartitionsUtil createSkewPartitionSpecs

### What changes were proposed in this pull request?

Avoid unnecessary scala syntactic sugar.

### Why are the changes needed?

If shuffle is skewed with tens of thousands of map partitions and reduce partitions in AQE, the method `ShufflePartitionsUtil#createSkewPartitionSpecs` will be very slow. More unfortunately, it is running at driver side.

I test with local env using 50,000 maps and 10,000 reduces. We can see the cpu time using build seq. See the Flame Graph:

![image](https://user-images.githubusercontent.com/12025282/156567065-6d9bffe9-3ab3-469d-92e8-da9687a8e5a8.png)

And the perfermance number:

- before: 47,336 ms
- aflter: 9,274 ms

### Does this PR introduce _any_ user-facing change?

no, only improve perfermance

### How was this patch tested?

Pass CI and test perfermance local.

Closes #35722 from ulysses-you/SPARK-38406.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../adaptive/ShufflePartitionsUtil.scala         |  9 +++++++--
 .../execution/ShufflePartitionsUtilSuite.scala   | 16 ++++++++--------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala
index 0251f803786c8..af689db337987 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala
@@ -317,7 +317,7 @@ object ShufflePartitionsUtil extends Logging {
    */
   // Visible for testing
   private[sql] def splitSizeListByTargetSize(
-      sizes: Seq[Long],
+      sizes: Array[Long],
       targetSize: Long,
       smallPartitionFactor: Double): Array[Int] = {
     val partitionStartIndices = ArrayBuffer[Int]()
@@ -394,7 +394,12 @@ object ShufflePartitionsUtil extends Logging {
         } else {
           mapStartIndices(i + 1)
         }
-        val dataSize = startMapIndex.until(endMapIndex).map(mapPartitionSizes(_)).sum
+        var dataSize = 0L
+        var mapIndex = startMapIndex
+        while (mapIndex < endMapIndex) {
+          dataSize += mapPartitionSizes(mapIndex)
+          mapIndex += 1
+        }
         PartialReducerPartitionSpec(reducerId, startMapIndex, endMapIndex, dataSize)
       })
     } else {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ShufflePartitionsUtilSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ShufflePartitionsUtilSuite.scala
index 99856650fea1f..da05373125d31 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ShufflePartitionsUtilSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ShufflePartitionsUtilSuite.scala
@@ -705,50 +705,50 @@ class ShufflePartitionsUtilSuite extends SparkFunSuite with LocalSparkContext {
 
     val smallPartitionFactor1 = ShufflePartitionsUtil.SMALL_PARTITION_FACTOR
     // merge the small partitions at the beginning/end
-    val sizeList1 = Seq[Long](15, 90, 15, 15, 15, 90, 15)
+    val sizeList1 = Array[Long](15, 90, 15, 15, 15, 90, 15)
     assert(ShufflePartitionsUtil.splitSizeListByTargetSize(
       sizeList1, targetSize, smallPartitionFactor1).toSeq ==
       Seq(0, 2, 5))
 
     // merge the small partitions in the middle
-    val sizeList2 = Seq[Long](30, 15, 90, 10, 90, 15, 30)
+    val sizeList2 = Array[Long](30, 15, 90, 10, 90, 15, 30)
     assert(ShufflePartitionsUtil.splitSizeListByTargetSize(
       sizeList2, targetSize, smallPartitionFactor1).toSeq ==
       Seq(0, 2, 4, 5))
 
     // merge small partitions if the partition itself is smaller than
     // targetSize * SMALL_PARTITION_FACTOR
-    val sizeList3 = Seq[Long](15, 1000, 15, 1000)
+    val sizeList3 = Array[Long](15, 1000, 15, 1000)
     assert(ShufflePartitionsUtil.splitSizeListByTargetSize(
       sizeList3, targetSize, smallPartitionFactor1).toSeq ==
       Seq(0, 3))
 
     // merge small partitions if the combined size is smaller than
     // targetSize * MERGED_PARTITION_FACTOR
-    val sizeList4 = Seq[Long](35, 75, 90, 20, 35, 25, 35)
+    val sizeList4 = Array[Long](35, 75, 90, 20, 35, 25, 35)
     assert(ShufflePartitionsUtil.splitSizeListByTargetSize(
       sizeList4, targetSize, smallPartitionFactor1).toSeq ==
       Seq(0, 2, 3))
 
     val smallPartitionFactor2 = 0.5
     // merge last two partition if their size is not bigger than smallPartitionFactor * target
-    val sizeList5 = Seq[Long](50, 50, 40, 5)
+    val sizeList5 = Array[Long](50, 50, 40, 5)
     assert(ShufflePartitionsUtil.splitSizeListByTargetSize(
       sizeList5, targetSize, smallPartitionFactor2).toSeq ==
       Seq(0))
 
-    val sizeList6 = Seq[Long](40, 5, 50, 45)
+    val sizeList6 = Array[Long](40, 5, 50, 45)
     assert(ShufflePartitionsUtil.splitSizeListByTargetSize(
       sizeList6, targetSize, smallPartitionFactor2).toSeq ==
       Seq(0))
 
     // do not merge
-    val sizeList7 = Seq[Long](50, 50, 10, 40, 5)
+    val sizeList7 = Array[Long](50, 50, 10, 40, 5)
     assert(ShufflePartitionsUtil.splitSizeListByTargetSize(
       sizeList7, targetSize, smallPartitionFactor2).toSeq ==
       Seq(0, 2))
 
-    val sizeList8 = Seq[Long](10, 40, 5, 50, 50)
+    val sizeList8 = Array[Long](10, 40, 5, 50, 50)
     assert(ShufflePartitionsUtil.splitSizeListByTargetSize(
       sizeList8, targetSize, smallPartitionFactor2).toSeq ==
       Seq(0, 3))

From cd32c22c9373333d2bd3b89a4ffae1b549396658 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 8 Mar 2022 19:22:39 +0900
Subject: [PATCH 421/513] [SPARK-38240][SQL][FOLLOW-UP] Make
 RuntimeReplaceableAggregate as an add-on of AggregateFunction

### What changes were proposed in this pull request?

This PR is a followup of https://github.com/apache/spark/pull/35534 (https://github.com/apache/spark/pull/35534#discussion_r809708015) that proposes to make `RuntimeReplaceableAggregate` as an add-on of `AggregateFunction`.

Now, `ReplaceableAggregateFunction` is a mix-in for `AggregateFunction` that makes the expression can be replaced runtime.

### Why are the changes needed?

To make the hierarchy of class similar.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

I tested that it compiles fine. CI in this PR will test it out.

Closes #35626 from HyukjinKwon/SPARK-38240-followup.

Lead-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../catalyst/analysis/FunctionRegistry.scala  |  7 +++---
 .../sql/catalyst/expressions/Expression.scala | 10 ++++----
 .../expressions/aggregate/CountIf.scala       |  7 ++++--
 .../aggregate/boolAggregates.scala            |  4 ++--
 .../aggregate/linearRegression.scala          | 23 +++++++++++++++----
 5 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index e01457cbca78a..e131bd87626b9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -344,9 +344,10 @@ object FunctionRegistry {
   //     will be replaced by the actual expression at the end of analysis. See `Left` as an example.
   //   - The function can be implemented by combining some existing expressions. We can use
   //     `RuntimeReplaceable` to define the combination. See `ParseToDate` as an example.
-  //     We can also inherit the analysis behavior from the replacement expression, by
-  //     mixing `InheritAnalysisRules`. See `TryAdd` as an example.
-  //   - Similarly, we can use `RuntimeReplaceableAggregate` to implement new aggregate functions.
+  //     To inherit the analysis behavior from the replacement expression
+  //     mix-in `InheritAnalysisRules` with `RuntimeReplaceable`. See `TryAdd` as an example.
+  //   - For `AggregateFunction`, `RuntimeReplaceableAggregate` should be mixed-in. See
+  //     `CountIf` as an example.
   //
   // Sometimes, multiple functions share the same/similar expression replacement logic and it's
   // tedious to create many similar `RuntimeReplaceable` expressions. We can use `ExpressionBuilder`
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 4ff5267af03eb..b5695e8c87268 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -390,17 +390,17 @@ trait InheritAnalysisRules extends UnaryLike[Expression] { self: RuntimeReplacea
 }
 
 /**
- * An aggregate expression that gets rewritten (currently by the optimizer) into a
+ * An add-on of [[AggregateFunction]]. This gets rewritten (currently by the optimizer) into a
  * different aggregate expression for evaluation. This is mainly used to provide compatibility
  * with other databases. For example, we use this to support every, any/some aggregates by rewriting
  * them with Min and Max respectively.
  */
-abstract class RuntimeReplaceableAggregate extends AggregateFunction with RuntimeReplaceable {
-  def aggBufferSchema: StructType = throw new IllegalStateException(
+trait RuntimeReplaceableAggregate extends RuntimeReplaceable { self: AggregateFunction =>
+  override def aggBufferSchema: StructType = throw new IllegalStateException(
     "RuntimeReplaceableAggregate.aggBufferSchema should not be called")
-  def aggBufferAttributes: Seq[AttributeReference] = throw new IllegalStateException(
+  override def aggBufferAttributes: Seq[AttributeReference] = throw new IllegalStateException(
     "RuntimeReplaceableAggregate.aggBufferAttributes should not be called")
-  def inputAggBufferAttributes: Seq[AttributeReference] = throw new IllegalStateException(
+  override def inputAggBufferAttributes: Seq[AttributeReference] = throw new IllegalStateException(
     "RuntimeReplaceableAggregate.inputAggBufferAttributes should not be called")
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountIf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountIf.scala
index 6973641a6bf33..248ade05ab1d3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountIf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountIf.scala
@@ -34,8 +34,11 @@ import org.apache.spark.sql.types.{AbstractDataType, BooleanType}
   """,
   group = "agg_funcs",
   since = "3.0.0")
-case class CountIf(child: Expression) extends RuntimeReplaceableAggregate
-  with ImplicitCastInputTypes with UnaryLike[Expression] {
+case class CountIf(child: Expression)
+  extends AggregateFunction
+  with RuntimeReplaceableAggregate
+  with ImplicitCastInputTypes
+  with UnaryLike[Expression] {
   override lazy val replacement: Expression = Count(new NullIf(child, Literal.FalseLiteral))
   override def nodeName: String = "count_if"
   override def inputTypes: Seq[AbstractDataType] = Seq(BooleanType)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/boolAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/boolAggregates.scala
index 59c75f21c9a0f..ae759abf8a4f5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/boolAggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/boolAggregates.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.types._
   """,
   group = "agg_funcs",
   since = "3.0.0")
-case class BoolAnd(child: Expression) extends RuntimeReplaceableAggregate
+case class BoolAnd(child: Expression) extends AggregateFunction with RuntimeReplaceableAggregate
   with ImplicitCastInputTypes with UnaryLike[Expression] {
   override lazy val replacement: Expression = Min(child)
   override def nodeName: String = "bool_and"
@@ -56,7 +56,7 @@ case class BoolAnd(child: Expression) extends RuntimeReplaceableAggregate
   """,
   group = "agg_funcs",
   since = "3.0.0")
-case class BoolOr(child: Expression) extends RuntimeReplaceableAggregate
+case class BoolOr(child: Expression) extends AggregateFunction with RuntimeReplaceableAggregate
   with ImplicitCastInputTypes with UnaryLike[Expression] {
   override lazy val replacement: Expression = Max(child)
   override def nodeName: String = "bool_or"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala
index 1ad7cbeb2422a..8507069a7ac26 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala
@@ -37,7 +37,10 @@ import org.apache.spark.sql.types.{AbstractDataType, NumericType}
   group = "agg_funcs",
   since = "3.3.0")
 case class RegrCount(left: Expression, right: Expression)
-  extends RuntimeReplaceableAggregate with ImplicitCastInputTypes with BinaryLike[Expression] {
+  extends AggregateFunction
+  with RuntimeReplaceableAggregate
+  with ImplicitCastInputTypes
+  with BinaryLike[Expression] {
   override lazy val replacement: Expression = Count(Seq(left, right))
   override def nodeName: String = "regr_count"
   override def inputTypes: Seq[AbstractDataType] = Seq(NumericType, NumericType)
@@ -65,8 +68,13 @@ case class RegrCount(left: Expression, right: Expression)
   group = "agg_funcs",
   since = "3.3.0")
 // scalastyle:on line.size.limit
-case class RegrAvgX(left: Expression, right: Expression)
-  extends RuntimeReplaceableAggregate with ImplicitCastInputTypes with BinaryLike[Expression] {
+case class RegrAvgX(
+    left: Expression,
+    right: Expression)
+  extends AggregateFunction
+  with RuntimeReplaceableAggregate
+  with ImplicitCastInputTypes
+  with BinaryLike[Expression] {
   override lazy val replacement: Expression =
     Average(If(And(IsNotNull(left), IsNotNull(right)), right, Literal.create(null, right.dataType)))
   override def nodeName: String = "regr_avgx"
@@ -95,8 +103,13 @@ case class RegrAvgX(left: Expression, right: Expression)
   group = "agg_funcs",
   since = "3.3.0")
 // scalastyle:on line.size.limit
-case class RegrAvgY(left: Expression, right: Expression)
-  extends RuntimeReplaceableAggregate with ImplicitCastInputTypes with BinaryLike[Expression] {
+case class RegrAvgY(
+    left: Expression,
+    right: Expression)
+  extends AggregateFunction
+  with RuntimeReplaceableAggregate
+  with ImplicitCastInputTypes
+  with BinaryLike[Expression] {
   override lazy val replacement: Expression =
     Average(If(And(IsNotNull(left), IsNotNull(right)), left, Literal.create(null, left.dataType)))
   override def nodeName: String = "regr_avgy"

From 985445626848cd8a4b919b687390870bf242a208 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 8 Mar 2022 05:45:40 -0800
Subject: [PATCH 422/513] [SPARK-35956][K8S][FOLLOWP] Fix typos in config names

### What changes were proposed in this pull request?

This is a follow-up of #33270 to fix typos in config names

```scala
- ConfigBuilder("spark.kubernetes.executor.decommmissionLabel")
+ ConfigBuilder("spark.kubernetes.executor.decommissionLabel")
- ConfigBuilder("spark.kubernetes.executor.decommmissionLabelValue")
+ ConfigBuilder("spark.kubernetes.executor.decommissionLabelValue")
```

### Why are the changes needed?

To fix them before Apache Spark 3.3 branch cut.

### Does this PR introduce _any_ user-facing change?

No, this is not released yet.

### How was this patch tested?

Pass the CIs.

Closes #35767 from dongjoon-hyun/SPARK-35956.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/running-on-kubernetes.md                               | 6 +++---
 .../src/main/scala/org/apache/spark/deploy/k8s/Config.scala | 4 ++--
 .../k8s/KubernetesClusterSchedulerBackendSuite.scala        | 4 ++--
 .../deploy/k8s/integrationtest/DecommissionSuite.scala      | 5 +++--
 4 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 971c0a6078db4..79e01a35e2c57 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1314,7 +1314,7 @@ See the [configuration page](configuration.html) for information on Spark config
   <td>3.0.0</td>
 </tr>
 <tr>
-  <td><code>spark.kubernetes.executor.decommmissionLabel</code></td>
+  <td><code>spark.kubernetes.executor.decommissionLabel</code></td>
   <td>(none)</td>
   <td>
     Label to be applied to pods which are exiting or being decommissioned. Intended for use
@@ -1323,11 +1323,11 @@ See the [configuration page](configuration.html) for information on Spark config
   <td>3.3.0</td>
 </tr>
 <tr>
-  <td><code>spark.kubernetes.executor.decommmissionLabelValue</code></td>
+  <td><code>spark.kubernetes.executor.decommissionLabelValue</code></td>
   <td>(none)</td>
   <td>
     Value to be applied with the label when
-    <code>spark.kubernetes.executor.decommmissionLabel</code> is enabled.
+    <code>spark.kubernetes.executor.decommissionLabel</code> is enabled.
   </td>
   <td>3.3.0</td>
 </tr>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index 58a4a785b5182..a0270fa29a2ed 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -381,7 +381,7 @@ private[spark] object Config extends Logging {
       .createWithDefault(Nil)
 
   val KUBERNETES_EXECUTOR_DECOMMISSION_LABEL =
-    ConfigBuilder("spark.kubernetes.executor.decommmissionLabel")
+    ConfigBuilder("spark.kubernetes.executor.decommissionLabel")
       .doc("Label to apply to a pod which is being decommissioned." +
         " Designed for use with pod disruption budgets and similar mechanism" +
         " such as pod-deletion-cost.")
@@ -390,7 +390,7 @@ private[spark] object Config extends Logging {
       .createOptional
 
   val KUBERNETES_EXECUTOR_DECOMMISSION_LABEL_VALUE =
-    ConfigBuilder("spark.kubernetes.executor.decommmissionLabelValue")
+    ConfigBuilder("spark.kubernetes.executor.decommissionLabelValue")
       .doc("Label value to apply to a pod which is being decommissioned." +
         " Designed for use with pod disruption budgets and similar mechanism" +
         " such as pod-deletion-cost.")
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
index 53aaba206fe48..9c31f9f912f01 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
@@ -45,8 +45,8 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
   private val sparkConf = new SparkConf(false)
     .set("spark.executor.instances", "3")
     .set("spark.app.id", TEST_SPARK_APP_ID)
-    .set("spark.kubernetes.executor.decommmissionLabel", "soLong")
-    .set("spark.kubernetes.executor.decommmissionLabelValue", "cruelWorld")
+    .set(KUBERNETES_EXECUTOR_DECOMMISSION_LABEL.key, "soLong")
+    .set(KUBERNETES_EXECUTOR_DECOMMISSION_LABEL_VALUE.key, "cruelWorld")
 
   @Mock
   private var sc: SparkContext = _
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
index ca6108daa4de4..51ea1307236c8 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
@@ -27,6 +27,7 @@ import org.scalatest.concurrent.{Eventually, PatienceConfiguration}
 import org.scalatest.matchers.should.Matchers._
 import org.scalatest.time.{Minutes, Seconds, Span}
 
+import org.apache.spark.deploy.k8s.Config.{KUBERNETES_EXECUTOR_DECOMMISSION_LABEL, KUBERNETES_EXECUTOR_DECOMMISSION_LABEL_VALUE}
 import org.apache.spark.internal.config
 import org.apache.spark.internal.config.PLUGINS
 
@@ -140,8 +141,8 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite =>
         // give enough time to validate the labels are set.
         .set("spark.storage.decommission.replicationReattemptInterval", "75")
         // Configure labels for decommissioning pods.
-        .set("spark.kubernetes.executor.decommmissionLabel", "solong")
-        .set("spark.kubernetes.executor.decommmissionLabelValue", "cruelworld")
+        .set(KUBERNETES_EXECUTOR_DECOMMISSION_LABEL.key, "solong")
+        .set(KUBERNETES_EXECUTOR_DECOMMISSION_LABEL_VALUE.key, "cruelworld")
 
       // This is called on all exec pods but we only care about exec 0 since it's the "first."
       // We only do this inside of this test since the other tests trigger k8s side deletes where we

From 13021ed069e101e8d3fc5d3f21f9cba54976838b Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Tue, 8 Mar 2022 21:58:47 +0800
Subject: [PATCH 423/513] [SPARK-38442][SQL] Fix
 ConstantFoldingSuite/ColumnExpressionSuite/DataFrameSuite/AdaptiveQueryExecSuite
 under ANSI mode

### What changes were proposed in this pull request?

Fix the following test suites under ANSI mode:
* ConstantFoldingSuite
* ColumnExpressionSuite
* DataFrameSuite
* DataFrameFunctionsSuite
* AdaptiveQueryExecSuite

### Why are the changes needed?

To set up a new GA test job with ANSI mode on

### Does this PR introduce _any_ user-facing change?

No
### How was this patch tested?

Manually turn on ANSI mode and test .
Also it should pass GA tests.

Closes #35761 from gengliangwang/fixAQE.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../optimizer/ConstantFoldingSuite.scala      |   2 +-
 .../spark/sql/ColumnExpressionSuite.scala     |   8 +-
 .../spark/sql/DataFrameFunctionsSuite.scala   | 116 ++++++++++--------
 .../org/apache/spark/sql/DataFrameSuite.scala |  14 ++-
 .../adaptive/AdaptiveQueryExecSuite.scala     |   2 +-
 5 files changed, 81 insertions(+), 61 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
index 6f4f70423357b..a2ee2a2fb6813 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
@@ -144,7 +144,7 @@ class ConstantFoldingSuite extends PlanTest {
       testRelation
         .select(
           Cast(Literal("2"), IntegerType) + Literal(3) + 'a as Symbol("c1"),
-          Coalesce(Seq(Cast(Literal("abc"), IntegerType), Literal(3))) as Symbol("c2"))
+          Coalesce(Seq(TryCast(Literal("abc"), IntegerType), Literal(3))) as Symbol("c2"))
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index baf46b3c54c55..995bf5d903ad4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -281,9 +281,11 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
       testData.select(isnan($"a"), isnan($"b")),
       Row(true, true) :: Row(true, true) :: Row(false, false) :: Row(false, false) :: Nil)
 
-    checkAnswer(
-      sql("select isnan(15), isnan('invalid')"),
-      Row(false, false))
+    if (!conf.ansiEnabled) {
+      checkAnswer(
+        sql("select isnan(15), isnan('invalid')"),
+        Row(false, false))
+    }
   }
 
   test("nanvl") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 3999ae8620331..4d82d110a4c51 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -579,8 +579,10 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
   }
 
   test("array size function - legacy") {
-    withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "true") {
-      testSizeOfArray(sizeOfNull = -1)
+    if (!conf.ansiEnabled) {
+      withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "true") {
+        testSizeOfArray(sizeOfNull = -1)
+      }
     }
   }
 
@@ -732,8 +734,10 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
   }
 
   test("map size function - legacy") {
-    withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "true") {
-      testSizeOfMap(sizeOfNull = -1: Int)
+    if (!conf.ansiEnabled) {
+      withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "true") {
+        testSizeOfMap(sizeOfNull = -1: Int)
+      }
     }
   }
 
@@ -1027,15 +1031,17 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       Seq(Row(false))
     )
 
-    val e1 = intercept[AnalysisException] {
-      OneRowRelation().selectExpr("array_contains(array(1), .01234567890123456790123456780)")
-    }
-    val errorMsg1 =
-      s"""
-         |Input to function array_contains should have been array followed by a
-         |value with same element type, but it's [array<int>, decimal(38,29)].
+    if (!conf.ansiEnabled) {
+      val e1 = intercept[AnalysisException] {
+        OneRowRelation().selectExpr("array_contains(array(1), .01234567890123456790123456780)")
+      }
+      val errorMsg1 =
+        s"""
+           |Input to function array_contains should have been array followed by a
+           |value with same element type, but it's [array<int>, decimal(38,29)].
        """.stripMargin.replace("\n", " ").trim()
-    assert(e1.message.contains(errorMsg1))
+      assert(e1.message.contains(errorMsg1))
+    }
 
     val e2 = intercept[AnalysisException] {
       OneRowRelation().selectExpr("array_contains(array(1), 'foo')")
@@ -1464,41 +1470,43 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         Seq(Row(null), Row(null), Row(null))
       )
     }
-    checkAnswer(
-      df.select(element_at(df("a"), 4)),
-      Seq(Row(null), Row(null), Row(null))
-    )
-    checkAnswer(
-      df.select(element_at(df("a"), df("b"))),
-      Seq(Row("1"), Row(""), Row(null))
-    )
-    checkAnswer(
-      df.selectExpr("element_at(a, b)"),
-      Seq(Row("1"), Row(""), Row(null))
-    )
+    if (!conf.ansiEnabled) {
+      checkAnswer(
+        df.select(element_at(df("a"), 4)),
+        Seq(Row(null), Row(null), Row(null))
+      )
+      checkAnswer(
+        df.select(element_at(df("a"), df("b"))),
+        Seq(Row("1"), Row(""), Row(null))
+      )
+      checkAnswer(
+        df.selectExpr("element_at(a, b)"),
+        Seq(Row("1"), Row(""), Row(null))
+      )
 
-    checkAnswer(
-      df.select(element_at(df("a"), 1)),
-      Seq(Row("1"), Row(null), Row(null))
-    )
-    checkAnswer(
-      df.select(element_at(df("a"), -1)),
-      Seq(Row("3"), Row(""), Row(null))
-    )
+      checkAnswer(
+        df.select(element_at(df("a"), 1)),
+        Seq(Row("1"), Row(null), Row(null))
+      )
+      checkAnswer(
+        df.select(element_at(df("a"), -1)),
+        Seq(Row("3"), Row(""), Row(null))
+      )
 
-    checkAnswer(
-      df.selectExpr("element_at(a, 4)"),
-      Seq(Row(null), Row(null), Row(null))
-    )
+      checkAnswer(
+        df.selectExpr("element_at(a, 4)"),
+        Seq(Row(null), Row(null), Row(null))
+      )
 
-    checkAnswer(
-      df.selectExpr("element_at(a, 1)"),
-      Seq(Row("1"), Row(null), Row(null))
-    )
-    checkAnswer(
-      df.selectExpr("element_at(a, -1)"),
-      Seq(Row("3"), Row(""), Row(null))
-    )
+      checkAnswer(
+        df.selectExpr("element_at(a, 1)"),
+        Seq(Row("1"), Row(null), Row(null))
+      )
+      checkAnswer(
+        df.selectExpr("element_at(a, -1)"),
+        Seq(Row("3"), Row(""), Row(null))
+      )
+    }
 
     val e1 = intercept[AnalysisException] {
       Seq(("a string element", 1)).toDF().selectExpr("element_at(_1, _2)")
@@ -1560,10 +1568,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       Seq(Row("a"))
     )
 
-    checkAnswer(
-      OneRowRelation().selectExpr("element_at(map(1, 'a', 2, 'b'), 1.23D)"),
-      Seq(Row(null))
-    )
+    if (!conf.ansiEnabled) {
+      checkAnswer(
+        OneRowRelation().selectExpr("element_at(map(1, 'a', 2, 'b'), 1.23D)"),
+        Seq(Row(null))
+      )
+    }
 
     val e3 = intercept[AnalysisException] {
       OneRowRelation().selectExpr("element_at(map(1, 'a', 2, 'b'), '1')")
@@ -1638,10 +1648,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
 
     // Simple test cases
     def simpleTest(): Unit = {
-      checkAnswer (
-        df.select(concat($"i1", $"s1")),
-        Seq(Row(Seq("1", "a", "b", "c")), Row(Seq("1", "0", "a")))
-      )
+      if (!conf.ansiEnabled) {
+        checkAnswer(
+          df.select(concat($"i1", $"s1")),
+          Seq(Row(Seq("1", "a", "b", "c")), Row(Seq("1", "0", "a")))
+        )
+      }
       checkAnswer(
         df.select(concat($"i1", $"i2", $"i3")),
         Seq(Row(Seq(1, 2, 3, 5, 6)), Row(Seq(1, 0, 2)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 3eb976498b8b5..eb0dbb1289b1b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -86,7 +86,9 @@ class DataFrameSuite extends QueryTest
 
   test("access complex data") {
     assert(complexData.filter(complexData("a").getItem(0) === 2).count() == 1)
-    assert(complexData.filter(complexData("m").getItem("1") === 1).count() == 1)
+    if (!conf.ansiEnabled) {
+      assert(complexData.filter(complexData("m").getItem("1") === 1).count() == 1)
+    }
     assert(complexData.filter(complexData("s").getField("key") === 1).count() == 1)
   }
 
@@ -1563,7 +1565,9 @@ class DataFrameSuite extends QueryTest
 
   test("SPARK-7133: Implement struct, array, and map field accessor") {
     assert(complexData.filter(complexData("a")(0) === 2).count() == 1)
-    assert(complexData.filter(complexData("m")("1") === 1).count() == 1)
+    if (!conf.ansiEnabled) {
+      assert(complexData.filter(complexData("m")("1") === 1).count() == 1)
+    }
     assert(complexData.filter(complexData("s")("key") === 1).count() == 1)
     assert(complexData.filter(complexData("m")(complexData("s")("value")) === 1).count() == 1)
     assert(complexData.filter(complexData("a")(complexData("s")("key")) === 1).count() == 1)
@@ -2458,8 +2462,10 @@ class DataFrameSuite extends QueryTest
       val aggPlusSort2 = df.groupBy(col("name")).agg(count(col("name"))).orderBy(col("name"))
       checkAnswer(aggPlusSort1, aggPlusSort2.collect())
 
-      val aggPlusFilter1 = df.groupBy(df("name")).agg(count(df("name"))).filter(df("name") === 0)
-      val aggPlusFilter2 = df.groupBy(col("name")).agg(count(col("name"))).filter(col("name") === 0)
+      val aggPlusFilter1 =
+        df.groupBy(df("name")).agg(count(df("name"))).filter(df("name") === "test1")
+      val aggPlusFilter2 =
+        df.groupBy(col("name")).agg(count(col("name"))).filter(col("name") === "test1")
       checkAnswer(aggPlusFilter1, aggPlusFilter2.collect())
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index c24cc2bab9fd1..c69712369dbdf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -1648,7 +1648,7 @@ class AdaptiveQueryExecSuite
           |  SELECT * FROM testData WHERE key = 1
           |)
           |RIGHT OUTER JOIN testData2
-          |ON value = b
+          |ON CAST(value AS INT) = b
         """.stripMargin)
 
       withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80") {

From 8a0b101aa46976ccc36ff457319f8e38964f7dfd Mon Sep 17 00:00:00 2001
From: Tengfei Huang <tengfei.h@gmail.com>
Date: Tue, 8 Mar 2022 18:15:03 +0300
Subject: [PATCH 424/513] [SPARK-38112][SQL] Use error classes in the execution
 errors of date/timestamp handling

### What changes were proposed in this pull request?
Migrate the following errors in QueryExecutionErrors onto use error classes:
- sparkUpgradeInReadingDatesError => INCONSISTENT_BEHAVIOR_CROSS_VERSION
- sparkUpgradeInWritingDatesError => INCONSISTENT_BEHAVIOR_CROSS_VERSION
- timeZoneIdNotSpecifiedForTimestampTypeError => UNSUPPORTED_OPERATION
- cannotConvertOrcTimestampToTimestampNTZError => UNSUPPORTED_OPERATION

### Why are the changes needed?
Porting date/timestamp execute errors to new error framework.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
UT added.

Closes #35670 from ivoson/SPARK-38112-Rebase.

Lead-authored-by: Tengfei Huang <tengfei.h@gmail.com>
Co-authored-by: Huang Tengfei <tengfei.h@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../main/resources/error/error-classes.json   |   6 +
 .../org/apache/spark/SparkException.scala     |  19 ++-
 .../sql/errors/QueryExecutionErrors.scala     |  67 +++++++----
 .../expressions/DateExpressionsSuite.scala    |   2 +-
 .../sql-tests/results/ansi/date.sql.out       |   6 +-
 .../ansi/datetime-parsing-invalid.sql.out     |  16 +--
 .../sql-tests/results/ansi/timestamp.sql.out  |  12 +-
 .../resources/sql-tests/results/date.sql.out  |   6 +-
 .../datetime-formatting-invalid.sql.out       |  44 +++----
 .../results/datetime-parsing-invalid.sql.out  |  16 +--
 .../sql-tests/results/json-functions.sql.out  |   4 +-
 .../sql-tests/results/timestamp.sql.out       |  12 +-
 .../timestampNTZ/timestamp-ansi.sql.out       |   2 +-
 .../results/timestampNTZ/timestamp.sql.out    |   2 +-
 .../native/stringCastAndExpressions.sql.out   |   6 +-
 .../apache/spark/sql/DateFunctionsSuite.scala |   4 +-
 .../errors/QueryExecutionErrorsSuite.scala    | 109 +++++++++++++++++-
 .../datasources/orc/OrcQuerySuite.scala       |  26 -----
 .../execution/datasources/orc/OrcTest.scala   |   2 +-
 19 files changed, 238 insertions(+), 123 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 663454fa74b64..55e9373256f83 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -70,6 +70,9 @@
   "INCOMPATIBLE_DATASOURCE_REGISTER" : {
     "message" : [ "Detected an incompatible DataSourceRegister. Please remove the incompatible library from classpath or upgrade it. Error: %s" ]
   },
+  "INCONSISTENT_BEHAVIOR_CROSS_VERSION" : {
+    "message" : [ "You may get a different result due to the upgrading to Spark >= %s: %s" ]
+  },
   "INDEX_OUT_OF_BOUNDS" : {
     "message" : [ "Index %s must be between 0 and the length of the ArrayData." ],
     "sqlState" : "22023"
@@ -162,6 +165,9 @@
   "UNSUPPORTED_GROUPING_EXPRESSION" : {
     "message" : [ "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup" ]
   },
+  "UNSUPPORTED_OPERATION" : {
+    "message" : [ "The operation is not supported: %s" ]
+  },
   "WRITING_JOB_ABORTED" : {
     "message" : [ "Writing job aborted" ],
     "sqlState" : "40000"
diff --git a/core/src/main/scala/org/apache/spark/SparkException.scala b/core/src/main/scala/org/apache/spark/SparkException.scala
index aea09e36ade74..8442c8eb8d35d 100644
--- a/core/src/main/scala/org/apache/spark/SparkException.scala
+++ b/core/src/main/scala/org/apache/spark/SparkException.scala
@@ -71,9 +71,22 @@ private[spark] case class ExecutorDeadException(message: String)
 /**
  * Exception thrown when Spark returns different result after upgrading to a new version.
  */
-private[spark] class SparkUpgradeException(version: String, message: String, cause: Throwable)
-  extends RuntimeException("You may get a different result due to the upgrading of Spark" +
-    s" $version: $message", cause)
+private[spark] class SparkUpgradeException(
+    errorClass: String,
+    messageParameters: Array[String],
+    cause: Throwable)
+  extends RuntimeException(SparkThrowableHelper.getMessage(errorClass, messageParameters), cause)
+    with SparkThrowable {
+
+  def this(version: String, message: String, cause: Throwable) =
+    this (
+      errorClass = "INCONSISTENT_BEHAVIOR_CROSS_VERSION",
+      messageParameters = Array(version, message),
+      cause = cause
+    )
+
+  override def getErrorClass: String = errorClass
+}
 
 /**
  * Arithmetic exception thrown from Spark with an error class.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 6424b32efbc39..62b961604a25a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -533,30 +533,43 @@ object QueryExecutionErrors {
 
   def sparkUpgradeInReadingDatesError(
       format: String, config: String, option: String): SparkUpgradeException = {
-    new SparkUpgradeException("3.0",
-      s"""
-         |reading dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z from $format
-         |files can be ambiguous, as the files may be written by Spark 2.x or legacy versions of
-         |Hive, which uses a legacy hybrid calendar that is different from Spark 3.0+'s Proleptic
-         |Gregorian calendar. See more details in SPARK-31404. You can set the SQL config
-         |'$config' or the datasource option '$option' to 'LEGACY' to rebase the datetime values
-         |w.r.t. the calendar difference during reading. To read the datetime values as it is,
-         |set the SQL config '$config' or the datasource option '$option' to 'CORRECTED'.
-       """.stripMargin, null)
+    new SparkUpgradeException(
+      errorClass = "INCONSISTENT_BEHAVIOR_CROSS_VERSION",
+      messageParameters = Array(
+        "3.0",
+        s"""
+           |reading dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z
+           |from $format files can be ambiguous, as the files may be written by
+           |Spark 2.x or legacy versions of Hive, which uses a legacy hybrid calendar
+           |that is different from Spark 3.0+'s Proleptic Gregorian calendar.
+           |See more details in SPARK-31404. You can set the SQL config '$config' or
+           |the datasource option '$option' to 'LEGACY' to rebase the datetime values
+           |w.r.t. the calendar difference during reading. To read the datetime values
+           |as it is, set the SQL config '$config' or the datasource option '$option'
+           |to 'CORRECTED'.
+           |""".stripMargin),
+      cause = null
+    )
   }
 
   def sparkUpgradeInWritingDatesError(format: String, config: String): SparkUpgradeException = {
-    new SparkUpgradeException("3.0",
-      s"""
-         |writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z into $format
-         |files can be dangerous, as the files may be read by Spark 2.x or legacy versions of Hive
-         |later, which uses a legacy hybrid calendar that is different from Spark 3.0+'s Proleptic
-         |Gregorian calendar. See more details in SPARK-31404. You can set $config to 'LEGACY' to
-         |rebase the datetime values w.r.t. the calendar difference during writing, to get maximum
-         |interoperability. Or set $config to 'CORRECTED' to write the datetime values as it is,
-         |if you are 100% sure that the written files will only be read by Spark 3.0+ or other
-         |systems that use Proleptic Gregorian calendar.
-       """.stripMargin, null)
+    new SparkUpgradeException(
+      errorClass = "INCONSISTENT_BEHAVIOR_CROSS_VERSION",
+      messageParameters = Array(
+        "3.0",
+        s"""
+           |writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z
+           |into $format files can be dangerous, as the files may be read by Spark 2.x
+           |or legacy versions of Hive later, which uses a legacy hybrid calendar that
+           |is different from Spark 3.0+'s Proleptic Gregorian calendar. See more
+           |details in SPARK-31404. You can set $config to 'LEGACY' to rebase the
+           |datetime values w.r.t. the calendar difference during writing, to get maximum
+           |interoperability. Or set $config to 'CORRECTED' to write the datetime values
+           |as it is, if you are 100% sure that the written files will only be read by
+           |Spark 3.0+ or other systems that use Proleptic Gregorian calendar.
+           |""".stripMargin),
+      cause = null
+    )
   }
 
   def buildReaderUnsupportedForFileFormatError(format: String): Throwable = {
@@ -1617,8 +1630,12 @@ object QueryExecutionErrors {
   }
 
   def timeZoneIdNotSpecifiedForTimestampTypeError(): Throwable = {
-    new UnsupportedOperationException(
-      s"${TimestampType.catalogString} must supply timeZoneId parameter")
+    new SparkUnsupportedOperationException(
+      errorClass = "UNSUPPORTED_OPERATION",
+      messageParameters = Array(
+        s"${TimestampType.catalogString} must supply timeZoneId parameter " +
+          s"while converting to ArrowType")
+    )
   }
 
   def notPublicClassError(name: String): Throwable = {
@@ -1932,7 +1949,9 @@ object QueryExecutionErrors {
   }
 
   def cannotConvertOrcTimestampToTimestampNTZError(): Throwable = {
-    new RuntimeException("Unable to convert timestamp of Orc to data type 'timestamp_ntz'")
+    new SparkUnsupportedOperationException(
+      errorClass = "UNSUPPORTED_OPERATION",
+      messageParameters = Array("Unable to convert timestamp of Orc to data type 'timestamp_ntz'"))
   }
 
   def writePartitionExceedConfigSizeWhenDynamicPartitionError(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index deb720336a1f0..ed4e9348f1889 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -1737,7 +1737,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
             exprSeq2.foreach(pair =>
               checkExceptionInExpression[SparkUpgradeException](
                 pair._1,
-                  "You may get a different result due to the upgrading of Spark 3.0"))
+                  "You may get a different result due to the upgrading to Spark >= 3.0"))
           } else {
             if (ansiEnabled) {
               exprSeq2.foreach(pair =>
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
index a21512ffe8b7c..36cf228c6284b 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
@@ -641,7 +641,7 @@ select to_date('26/October/2015', 'dd/MMMMM/yyyy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -650,7 +650,7 @@ select from_json('{"d":"26/October/2015"}', 'd Date', map('dateFormat', 'dd/MMMM
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -659,7 +659,7 @@ select from_csv('26/October/2015', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'))
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out
index e6dd07b5658f6..5dc3b85b3a9eb 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out
@@ -17,7 +17,7 @@ select to_timestamp('1', 'yy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to parse '1' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '1' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
 
 
 -- !query
@@ -35,7 +35,7 @@ select to_timestamp('123', 'yy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to parse '123' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '123' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
 
 
 -- !query
@@ -44,7 +44,7 @@ select to_timestamp('1', 'yyy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to parse '1' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '1' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
 
 
 -- !query
@@ -53,7 +53,7 @@ select to_timestamp('1234567', 'yyyyyyy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'yyyyyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -71,7 +71,7 @@ select to_timestamp('9', 'DD')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to parse '9' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '9' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
 
 
 -- !query
@@ -89,7 +89,7 @@ select to_timestamp('9', 'DDD')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to parse '9' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '9' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
 
 
 -- !query
@@ -98,7 +98,7 @@ select to_timestamp('99', 'DDD')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to parse '99' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '99' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
 
 
 -- !query
@@ -170,7 +170,7 @@ select from_csv('2018-366', 'date Date', map('dateFormat', 'yyyy-DDD'))
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to parse '2018-366' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '2018-366' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
index 2c47ed3abe2c9..dc25ed9b0d140 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
@@ -725,7 +725,7 @@ select to_timestamp('2019-10-06 A', 'yyyy-MM-dd GGGGG')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyy-MM-dd GGGGG' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'yyyy-MM-dd GGGGG' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -734,7 +734,7 @@ select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEEE')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd MM yyyy EEEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd MM yyyy EEEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -743,7 +743,7 @@ select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -752,7 +752,7 @@ select unix_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -761,7 +761,7 @@ select from_json('{"t":"26/October/2015"}', 't Timestamp', map('timestampFormat'
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -770,7 +770,7 @@ select from_csv('26/October/2015', 't Timestamp', map('timestampFormat', 'dd/MMM
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/date.sql.out b/sql/core/src/test/resources/sql-tests/results/date.sql.out
index bd32361eaef06..ad6421a53df21 100644
--- a/sql/core/src/test/resources/sql-tests/results/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/date.sql.out
@@ -640,7 +640,7 @@ select to_date('26/October/2015', 'dd/MMMMM/yyyy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -649,7 +649,7 @@ select from_json('{"d":"26/October/2015"}', 'd Date', map('dateFormat', 'dd/MMMM
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -658,7 +658,7 @@ select from_csv('26/October/2015', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'))
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-formatting-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-invalid.sql.out
index 9c8553dc0f01f..6649ae3dbaf1c 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-formatting-invalid.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-invalid.sql.out
@@ -8,7 +8,7 @@ select date_format('2018-11-17 13:33:33.333', 'GGGGG')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'GGGGG' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'GGGGG' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -17,7 +17,7 @@ select date_format('2018-11-17 13:33:33.333', 'yyyyyyy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'yyyyyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -44,7 +44,7 @@ select date_format('2018-11-17 13:33:33.333', 'MMMMM')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'MMMMM' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'MMMMM' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -53,7 +53,7 @@ select date_format('2018-11-17 13:33:33.333', 'LLLLL')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'LLLLL' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'LLLLL' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -62,7 +62,7 @@ select date_format('2018-11-17 13:33:33.333', 'EEEEE')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -71,7 +71,7 @@ select date_format('2018-11-17 13:33:33.333', 'FF')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'FF' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'FF' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -80,7 +80,7 @@ select date_format('2018-11-17 13:33:33.333', 'ddd')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'ddd' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'ddd' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -89,7 +89,7 @@ select date_format('2018-11-17 13:33:33.333', 'DDDD')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'DDDD' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'DDDD' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -98,7 +98,7 @@ select date_format('2018-11-17 13:33:33.333', 'HHH')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'HHH' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'HHH' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -107,7 +107,7 @@ select date_format('2018-11-17 13:33:33.333', 'hhh')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'hhh' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'hhh' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -116,7 +116,7 @@ select date_format('2018-11-17 13:33:33.333', 'kkk')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'kkk' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'kkk' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -125,7 +125,7 @@ select date_format('2018-11-17 13:33:33.333', 'KKK')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'KKK' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'KKK' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -134,7 +134,7 @@ select date_format('2018-11-17 13:33:33.333', 'mmm')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'mmm' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'mmm' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -143,7 +143,7 @@ select date_format('2018-11-17 13:33:33.333', 'sss')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'sss' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'sss' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -152,7 +152,7 @@ select date_format('2018-11-17 13:33:33.333', 'SSSSSSSSSS')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'SSSSSSSSSS' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'SSSSSSSSSS' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -161,7 +161,7 @@ select date_format('2018-11-17 13:33:33.333', 'aa')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'aa' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'aa' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -179,7 +179,7 @@ select date_format('2018-11-17 13:33:33.333', 'zzzzz')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'zzzzz' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'zzzzz' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -197,7 +197,7 @@ select date_format('2018-11-17 13:33:33.333', 'ZZZZZZ')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'ZZZZZZ' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'ZZZZZZ' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -260,7 +260,7 @@ select date_format('2018-11-17 13:33:33.333', 'Y')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'Y' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'Y' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -269,7 +269,7 @@ select date_format('2018-11-17 13:33:33.333', 'w')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'w' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'w' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -278,7 +278,7 @@ select date_format('2018-11-17 13:33:33.333', 'W')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'W' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'W' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -287,7 +287,7 @@ select date_format('2018-11-17 13:33:33.333', 'u')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'u' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'u' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out
index c1e1a2c4b2143..33504709c08ec 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out
@@ -17,7 +17,7 @@ select to_timestamp('1', 'yy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to parse '1' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '1' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
 
 
 -- !query
@@ -34,7 +34,7 @@ select to_timestamp('123', 'yy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to parse '123' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '123' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
 
 
 -- !query
@@ -43,7 +43,7 @@ select to_timestamp('1', 'yyy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to parse '1' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '1' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
 
 
 -- !query
@@ -52,7 +52,7 @@ select to_timestamp('1234567', 'yyyyyyy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'yyyyyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -69,7 +69,7 @@ select to_timestamp('9', 'DD')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to parse '9' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '9' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
 
 
 -- !query
@@ -86,7 +86,7 @@ select to_timestamp('9', 'DDD')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to parse '9' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '9' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
 
 
 -- !query
@@ -95,7 +95,7 @@ select to_timestamp('99', 'DDD')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to parse '99' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '99' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
 
 
 -- !query
@@ -160,7 +160,7 @@ select from_csv('2018-366', 'date Date', map('dateFormat', 'yyyy-DDD'))
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to parse '2018-366' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '2018-366' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
index e509d4e4cc27b..84610834fa7e7 100644
--- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
@@ -339,7 +339,7 @@ select from_json(
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to parse '02-29' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '02-29' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
 
 
 -- !query
@@ -351,7 +351,7 @@ select from_json(
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to parse '02-29' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '02-29' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
index 6362a2ac20e0f..282e76351e805 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
@@ -719,7 +719,7 @@ select to_timestamp('2019-10-06 A', 'yyyy-MM-dd GGGGG')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyy-MM-dd GGGGG' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'yyyy-MM-dd GGGGG' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -728,7 +728,7 @@ select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEEE')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd MM yyyy EEEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd MM yyyy EEEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -737,7 +737,7 @@ select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -746,7 +746,7 @@ select unix_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -755,7 +755,7 @@ select from_json('{"t":"26/October/2015"}', 't Timestamp', map('timestampFormat'
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -764,7 +764,7 @@ select from_csv('26/October/2015', 't Timestamp', map('timestampFormat', 'dd/MMM
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
index 46b51fc9255b9..95120a83931ec 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
@@ -752,7 +752,7 @@ select unix_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out
index adadae552a7c2..0364f553d2676 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out
@@ -746,7 +746,7 @@ select unix_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out
index 14e941c074041..fd4f8b2c7a0e3 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out
@@ -139,7 +139,7 @@ select to_timestamp('2018-01-01', a) from t
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'aa' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'aa' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -156,7 +156,7 @@ select to_unix_timestamp('2018-01-01', a) from t
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'aa' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'aa' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
@@ -173,7 +173,7 @@ select unix_timestamp('2018-01-01', a) from t
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'aa' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'aa' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
 
 -- !query
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index 762bc15b4791e..fa246fa79b33c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -516,7 +516,7 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
       Seq(Row(null), Row(null), Row(null)))
     val e = intercept[SparkUpgradeException](df.select(to_date(col("s"), "yyyy-dd-aa")).collect())
     assert(e.getCause.isInstanceOf[IllegalArgumentException])
-    assert(e.getMessage.contains("You may get a different result due to the upgrading of Spark"))
+    assert(e.getMessage.contains("You may get a different result due to the upgrading to Spark"))
 
     // February
     val x1 = "2016-02-29"
@@ -699,7 +699,7 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
           val e = intercept[SparkUpgradeException](invalid.collect())
           assert(e.getCause.isInstanceOf[IllegalArgumentException])
           assert(
-            e.getMessage.contains("You may get a different result due to the upgrading of Spark"))
+            e.getMessage.contains("You may get a different result due to the upgrading to Spark"))
         }
 
         // February
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index 429e41e8c997d..eb1b06647aaec 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -17,12 +17,22 @@
 
 package org.apache.spark.sql.errors
 
-import org.apache.spark.{SparkException, SparkIllegalArgumentException, SparkRuntimeException, SparkUnsupportedOperationException}
-import org.apache.spark.sql.{DataFrame, QueryTest}
+import java.sql.Timestamp
+
+import org.apache.spark.{SparkException, SparkIllegalArgumentException, SparkRuntimeException, SparkUnsupportedOperationException, SparkUpgradeException}
+import org.apache.spark.sql.{DataFrame, QueryTest, Row}
+import org.apache.spark.sql.execution.datasources.orc.OrcTest
+import org.apache.spark.sql.execution.datasources.parquet.ParquetTest
 import org.apache.spark.sql.functions.{lit, lower, struct, sum}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy.EXCEPTION
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{StructField, StructType, TimestampNTZType, TimestampType}
+import org.apache.spark.sql.util.ArrowUtils
+
+class QueryExecutionErrorsSuite extends QueryTest
+  with ParquetTest with OrcTest with SharedSparkSession {
 
-class QueryExecutionErrorsSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
 
   private def getAesInputs(): (DataFrame, DataFrame) = {
@@ -181,4 +191,97 @@ class QueryExecutionErrorsSuite extends QueryTest with SharedSparkSession {
     assert(e2.getSqlState === "0A000")
     assert(e2.getMessage === "The feature is not supported: Pivot not after a groupBy.")
   }
+
+  test("INCONSISTENT_BEHAVIOR_CROSS_VERSION: " +
+    "compatibility with Spark 2.4/3.2 in reading/writing dates") {
+
+    // Fail to read ancient datetime values.
+    withSQLConf(SQLConf.PARQUET_REBASE_MODE_IN_READ.key -> EXCEPTION.toString) {
+      val fileName = "before_1582_date_v2_4_5.snappy.parquet"
+      val filePath = getResourceParquetFilePath("test-data/" + fileName)
+      val e = intercept[SparkException] {
+        spark.read.parquet(filePath).collect()
+      }.getCause.asInstanceOf[SparkUpgradeException]
+
+      val format = "Parquet"
+      val config = SQLConf.PARQUET_REBASE_MODE_IN_READ.key
+      val option = "datetimeRebaseMode"
+      assert(e.getErrorClass === "INCONSISTENT_BEHAVIOR_CROSS_VERSION")
+      assert(e.getMessage ===
+        "You may get a different result due to the upgrading to Spark >= 3.0: " +
+          s"""
+             |reading dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z
+             |from $format files can be ambiguous, as the files may be written by
+             |Spark 2.x or legacy versions of Hive, which uses a legacy hybrid calendar
+             |that is different from Spark 3.0+'s Proleptic Gregorian calendar.
+             |See more details in SPARK-31404. You can set the SQL config '$config' or
+             |the datasource option '$option' to 'LEGACY' to rebase the datetime values
+             |w.r.t. the calendar difference during reading. To read the datetime values
+             |as it is, set the SQL config '$config' or the datasource option '$option'
+             |to 'CORRECTED'.
+             |""".stripMargin)
+    }
+
+    // Fail to write ancient datetime values.
+    withSQLConf(SQLConf.PARQUET_REBASE_MODE_IN_WRITE.key -> EXCEPTION.toString) {
+      withTempPath { dir =>
+        val df = Seq(java.sql.Date.valueOf("1001-01-01")).toDF("dt")
+        val e = intercept[SparkException] {
+          df.write.parquet(dir.getCanonicalPath)
+        }.getCause.getCause.getCause.asInstanceOf[SparkUpgradeException]
+
+        val format = "Parquet"
+        val config = SQLConf.PARQUET_REBASE_MODE_IN_WRITE.key
+        assert(e.getErrorClass === "INCONSISTENT_BEHAVIOR_CROSS_VERSION")
+        assert(e.getMessage ===
+          "You may get a different result due to the upgrading to Spark >= 3.0: " +
+            s"""
+               |writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z
+               |into $format files can be dangerous, as the files may be read by Spark 2.x
+               |or legacy versions of Hive later, which uses a legacy hybrid calendar that
+               |is different from Spark 3.0+'s Proleptic Gregorian calendar. See more
+               |details in SPARK-31404. You can set $config to 'LEGACY' to rebase the
+               |datetime values w.r.t. the calendar difference during writing, to get maximum
+               |interoperability. Or set $config to 'CORRECTED' to write the datetime values
+               |as it is, if you are 100% sure that the written files will only be read by
+               |Spark 3.0+ or other systems that use Proleptic Gregorian calendar.
+               |""".stripMargin)
+      }
+    }
+  }
+
+  test("UNSUPPORTED_OPERATION: timeZoneId not specified while converting TimestampType to Arrow") {
+    val schema = new StructType().add("value", TimestampType)
+    val e = intercept[SparkUnsupportedOperationException] {
+      ArrowUtils.toArrowSchema(schema, null)
+    }
+
+    assert(e.getErrorClass === "UNSUPPORTED_OPERATION")
+    assert(e.getMessage === "The operation is not supported: " +
+      "timestamp must supply timeZoneId parameter while converting to ArrowType")
+  }
+
+  test("UNSUPPORTED_OPERATION - SPARK-36346: can't read Timestamp as TimestampNTZ") {
+    val data = (1 to 10).map { i =>
+      val ts = new Timestamp(i)
+      Row(ts)
+    }
+
+    val actualSchema = StructType(Seq(StructField("time", TimestampType, false)))
+    val providedSchema = StructType(Seq(StructField("time", TimestampNTZType, false)))
+
+    withTempPath { file =>
+      val df = spark.createDataFrame(sparkContext.parallelize(data), actualSchema)
+      df.write.orc(file.getCanonicalPath)
+      withAllNativeOrcReaders {
+        val e = intercept[SparkException] {
+          spark.read.schema(providedSchema).orc(file.getCanonicalPath).collect()
+        }.getCause.asInstanceOf[SparkUnsupportedOperationException]
+
+        assert(e.getErrorClass === "UNSUPPORTED_OPERATION")
+        assert(e.getMessage === "The operation is not supported: " +
+          "Unable to convert timestamp of Orc to data type 'timestamp_ntz'")
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
index 551a3f5a7cc1b..280a88091089b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
@@ -804,32 +804,6 @@ abstract class OrcQuerySuite extends OrcQueryTest with SharedSparkSession {
     }
   }
 
-  test("SPARK-36346: can't read TimestampLTZ as TimestampNTZ") {
-    val data = (1 to 10).map { i =>
-      val ts = new Timestamp(i)
-      Row(ts)
-    }
-    val answer = (1 to 10).map { i =>
-      // The second parameter is `nanoOfSecond`, while java.sql.Timestamp accepts milliseconds
-      // as input. So here we multiple the `nanoOfSecond` by NANOS_PER_MILLIS
-      val ts = LocalDateTime.ofEpochSecond(0, i * 1000000, ZoneOffset.UTC)
-      Row(ts)
-    }
-    val actualSchema = StructType(Seq(StructField("time", TimestampType, false)))
-    val providedSchema = StructType(Seq(StructField("time", TimestampNTZType, false)))
-
-    withTempPath { file =>
-      val df = spark.createDataFrame(sparkContext.parallelize(data), actualSchema)
-      df.write.orc(file.getCanonicalPath)
-      withAllNativeOrcReaders {
-        val msg = intercept[SparkException] {
-          spark.read.schema(providedSchema).orc(file.getCanonicalPath).collect()
-        }.getMessage
-        assert(msg.contains("Unable to convert timestamp of Orc to data type 'timestamp_ntz'"))
-      }
-    }
-  }
-
   test("SPARK-36346: read TimestampNTZ as TimestampLTZ") {
     val data = (1 to 10).map { i =>
       // The second parameter is `nanoOfSecond`, while java.sql.Timestamp accepts milliseconds
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
index 96932de3275bc..c36bfd9362466 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
@@ -47,7 +47,7 @@ import org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION
  *       -> HiveOrcPartitionDiscoverySuite
  *   -> OrcFilterSuite
  */
-abstract class OrcTest extends QueryTest with FileBasedDataSourceTest with BeforeAndAfterAll {
+trait OrcTest extends QueryTest with FileBasedDataSourceTest with BeforeAndAfterAll {
 
   val orcImp: String = "native"
 

From 8b08f1903739c26989fa6e8afeb6f0993fbb766f Mon Sep 17 00:00:00 2001
From: Eugene Koifman <eugene.koifman@workday.com>
Date: Wed, 9 Mar 2022 00:30:01 +0800
Subject: [PATCH 425/513] [SPARK-37753][SQL] Fine tune logic to demote
 Broadcast hash join in DynamicJoinSelection

### What changes were proposed in this pull request?

### Why are the changes needed?

In the current implementation of DynamicJoinSelection the logic checks if one side of the join has high ratio of empty partitions and adds a NO_BROADCAST hint on that side since a shuffle join can short-circuit the local joins where one side is empty.

This logic is doesn't make sense for all join type. For example, a Left Outer Join cannot short circuit if RHS is empty so we should not inhibit BHJ. On the other hand a LOJ executed as a shuffle join where the LHS has many empty can short circuit the local join so we should inhibit the BHJ because BHJ will use OptimizeShuffleWithLocalRead which will re-assemble LHS partitions as the were before the shuffle and thus may not have many empty ones any more.

This supersedes [SPARK-37193](https://issues.apache.org/jira/browse/SPARK-37193)
Also see previous discussion in https://github.com/apache/spark/pull/34464#discussion_r774630446

### Does this PR introduce _any_ user-facing change?

It may change which joins run as BHJ vs shuffle joins

### How was this patch tested?

Unit Tests

Closes #35715 from ekoifman/SPARK-37753-enhance-DynamiJoinSelection.

Authored-by: Eugene Koifman <eugene.koifman@workday.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../pandas/tests/test_ops_on_diff_frames.py   |  2 +-
 .../adaptive/DynamicJoinSelection.scala       | 78 ++++++++++++++-----
 .../adaptive/AdaptiveQueryExecSuite.scala     | 35 +++++++++
 3 files changed, 93 insertions(+), 22 deletions(-)

diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames.py b/python/pyspark/pandas/tests/test_ops_on_diff_frames.py
index dad4476975f6b..96473769475d2 100644
--- a/python/pyspark/pandas/tests/test_ops_on_diff_frames.py
+++ b/python/pyspark/pandas/tests/test_ops_on_diff_frames.py
@@ -1361,7 +1361,7 @@ def test_update(self):
 
         pser1.update(pser2)
         psser1.update(psser2)
-        self.assert_eq(psser1, pser1)
+        self.assert_eq(psser1.sort_index(), pser1)
 
     def test_where(self):
         pdf1 = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [100, 200, 300, 400, 500]})
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DynamicJoinSelection.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DynamicJoinSelection.scala
index 6106dff99b2ac..217569ae645c3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DynamicJoinSelection.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DynamicJoinSelection.scala
@@ -18,8 +18,10 @@
 package org.apache.spark.sql.execution.adaptive
 
 import org.apache.spark.MapOutputStatistics
+import org.apache.spark.sql.catalyst.optimizer.JoinSelectionHelper
 import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
-import org.apache.spark.sql.catalyst.plans.logical.{HintInfo, JoinStrategyHint, LogicalPlan, NO_BROADCAST_HASH, PREFER_SHUFFLE_HASH, SHUFFLE_HASH}
+import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftOuter, RightOuter}
+import org.apache.spark.sql.catalyst.plans.logical.{HintInfo, Join, JoinStrategyHint, LogicalPlan, NO_BROADCAST_HASH, PREFER_SHUFFLE_HASH, SHUFFLE_HASH}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.internal.SQLConf
 
@@ -33,9 +35,9 @@ import org.apache.spark.sql.internal.SQLConf
  *   3. if a join satisfies both NO_BROADCAST_HASH and PREFER_SHUFFLE_HASH,
  *      then add a SHUFFLE_HASH hint.
  */
-object DynamicJoinSelection extends Rule[LogicalPlan] {
+object DynamicJoinSelection extends Rule[LogicalPlan] with JoinSelectionHelper {
 
-  private def shouldDemoteBroadcastHashJoin(mapStats: MapOutputStatistics): Boolean = {
+  private def hasManyEmptyPartitions(mapStats: MapOutputStatistics): Boolean = {
     val partitionCnt = mapStats.bytesByPartitionId.length
     val nonZeroCnt = mapStats.bytesByPartitionId.count(_ > 0)
     partitionCnt > 0 && nonZeroCnt > 0 &&
@@ -50,35 +52,69 @@ object DynamicJoinSelection extends Rule[LogicalPlan] {
       mapStats.bytesByPartitionId.forall(_ <= maxShuffledHashJoinLocalMapThreshold)
   }
 
-  private def selectJoinStrategy(plan: LogicalPlan): Option[JoinStrategyHint] = plan match {
-    case LogicalQueryStage(_, stage: ShuffleQueryStageExec) if stage.isMaterialized
-      && stage.mapStats.isDefined =>
-      val demoteBroadcastHash = shouldDemoteBroadcastHashJoin(stage.mapStats.get)
-      val preferShuffleHash = preferShuffledHashJoin(stage.mapStats.get)
-      if (demoteBroadcastHash && preferShuffleHash) {
-        Some(SHUFFLE_HASH)
-      } else if (demoteBroadcastHash) {
-        Some(NO_BROADCAST_HASH)
-      } else if (preferShuffleHash) {
-        Some(PREFER_SHUFFLE_HASH)
-      } else {
-        None
-      }
+  private def selectJoinStrategy(
+      join: Join,
+      isLeft: Boolean): Option[JoinStrategyHint] = {
+    val plan = if (isLeft) join.left else join.right
+    plan match {
+      case LogicalQueryStage(_, stage: ShuffleQueryStageExec) if stage.isMaterialized
+        && stage.mapStats.isDefined =>
+
+        val manyEmptyInPlan = hasManyEmptyPartitions(stage.mapStats.get)
+        val canBroadcastPlan = (isLeft && canBuildBroadcastLeft(join.joinType)) ||
+          (!isLeft && canBuildBroadcastRight(join.joinType))
+        val manyEmptyInOther = (if (isLeft) join.right else join.left) match {
+          case LogicalQueryStage(_, stage: ShuffleQueryStageExec) if stage.isMaterialized
+            && stage.mapStats.isDefined => hasManyEmptyPartitions(stage.mapStats.get)
+          case _ => false
+        }
+
+        val demoteBroadcastHash = if (manyEmptyInPlan && canBroadcastPlan) {
+          join.joinType match {
+            // don't demote BHJ since you cannot short circuit local join if inner (null-filled)
+            // side is empty
+            case LeftOuter | RightOuter | LeftAnti => false
+            case _ => true
+          }
+        } else if (manyEmptyInOther && canBroadcastPlan) {
+          // for example, LOJ, !isLeft but it's the LHS that has many empty partitions if we
+          // proceed with shuffle.  But if we proceed with BHJ, the OptimizeShuffleWithLocalRead
+          // will assemble partitions as they were before the shuffle and that may no longer have
+          // many empty partitions and thus cannot short-circuit local join
+          join.joinType match {
+            case LeftOuter | RightOuter | LeftAnti => true
+            case _ => false
+          }
+        } else {
+          false
+        }
+
+        val preferShuffleHash = preferShuffledHashJoin(stage.mapStats.get)
+        if (demoteBroadcastHash && preferShuffleHash) {
+          Some(SHUFFLE_HASH)
+        } else if (demoteBroadcastHash) {
+          Some(NO_BROADCAST_HASH)
+        } else if (preferShuffleHash) {
+          Some(PREFER_SHUFFLE_HASH)
+        } else {
+          None
+        }
 
-    case _ => None
+      case _ => None
+    }
   }
 
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformDown {
-    case j @ ExtractEquiJoinKeys(_, _, _, _, _, left, right, hint) =>
+    case j @ ExtractEquiJoinKeys(_, _, _, _, _, _, _, hint) =>
       var newHint = hint
       if (!hint.leftHint.exists(_.strategy.isDefined)) {
-        selectJoinStrategy(left).foreach { strategy =>
+        selectJoinStrategy(j, true).foreach { strategy =>
           newHint = newHint.copy(leftHint =
             Some(hint.leftHint.getOrElse(HintInfo()).copy(strategy = Some(strategy))))
         }
       }
       if (!hint.rightHint.exists(_.strategy.isDefined)) {
-        selectJoinStrategy(right).foreach { strategy =>
+        selectJoinStrategy(j, false).foreach { strategy =>
           newHint = newHint.copy(rightHint =
             Some(hint.rightHint.getOrElse(HintInfo()).copy(strategy = Some(strategy))))
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index c69712369dbdf..76741dc4d08e0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -683,6 +683,41 @@ class AdaptiveQueryExecSuite
       }
     }
   }
+  test("SPARK-37753: Allow changing outer join to broadcast join even if too many empty" +
+    " partitions on broadcast side") {
+    withSQLConf(
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+      SQLConf.NON_EMPTY_PARTITION_RATIO_FOR_BROADCAST_JOIN.key -> "0.5") {
+      // `testData` is small enough to be broadcast but has empty partition ratio over the config.
+      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80") {
+        val (plan, adaptivePlan) = runAdaptiveAndVerifyResult(
+          "SELECT * FROM (select * from testData where value = '1') td" +
+            " right outer join testData2 ON key = a")
+        val smj = findTopLevelSortMergeJoin(plan)
+        assert(smj.size == 1)
+        val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
+        assert(bhj.size == 1)
+      }
+    }
+  }
+
+  test("SPARK-37753: Inhibit broadcast in left outer join when there are many empty" +
+    " partitions on outer/left side") {
+    withSQLConf(
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+      SQLConf.NON_EMPTY_PARTITION_RATIO_FOR_BROADCAST_JOIN.key -> "0.5") {
+      // `testData` is small enough to be broadcast but has empty partition ratio over the config.
+      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "200") {
+        val (plan, adaptivePlan) = runAdaptiveAndVerifyResult(
+          "SELECT * FROM (select * from testData where value = '1') td" +
+            " left outer join testData2 ON key = a")
+        val smj = findTopLevelSortMergeJoin(plan)
+        assert(smj.size == 1)
+        val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
+        assert(bhj.isEmpty)
+      }
+    }
+  }
 
   test("SPARK-29906: AQE should not introduce extra shuffle for outermost limit") {
     var numStages = 0

From b5589a9759ed518bc4d4518e87102c79d16246d8 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 8 Mar 2022 09:53:50 -0800
Subject: [PATCH 426/513] [SPARK-38423][K8S][FOLLOWUP] PodGroup spec should not
 be null

### What changes were proposed in this pull request?

This PR is a follow-up of #35639 to prevent `PodGroup` with `null` spec.

### Why are the changes needed?

First, for `null` value, we should use `Option(null)` to make it `None`. This PR fixes the following.
```
- priorityClassName = Some(pod.pod.getSpec.getPriorityClassName)
+ priorityClassName = Option(pod.pod.getSpec.getPriorityClassName)
```

Second, when `queue` and `priorityClassName` are not given. `PodGroup` is created with `null` spec.
This causes NullPointerException during the test case. This PR fixes it.

### Does this PR introduce _any_ user-facing change?

No. This is not released yet.

### How was this patch tested?

Pass the CIs.

Closes #35769 from dongjoon-hyun/SPARK-38423.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
index 48303c8c2e37f..58d6c5caf9fff 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
@@ -48,6 +48,8 @@ private[spark] class VolcanoFeatureStep extends KubernetesDriverCustomFeatureCon
         .withName(podGroupName)
         .withNamespace(namespace)
       .endMetadata()
+      .editOrNewSpec()
+      .endSpec()
 
     queue.foreach(podGroup.editOrNewSpec().withQueue(_).endSpec())
 
@@ -58,7 +60,7 @@ private[spark] class VolcanoFeatureStep extends KubernetesDriverCustomFeatureCon
 
   override def configurePod(pod: SparkPod): SparkPod = {
 
-    priorityClassName = Some(pod.pod.getSpec.getPriorityClassName)
+    priorityClassName = Option(pod.pod.getSpec.getPriorityClassName)
 
     val k8sPodBuilder = new PodBuilder(pod.pod)
       .editMetadata()

From 0ad76777e76f60d1aea0eed0a2a7bff20c7567d3 Mon Sep 17 00:00:00 2001
From: Rob Reeves <roreeves@linkedin.com>
Date: Tue, 8 Mar 2022 12:20:43 -0600
Subject: [PATCH 427/513] [SPARK-38309][CORE] Fix SHS `shuffleTotalReads` and
 `shuffleTotalBlocks` percentile metrics

### What changes were proposed in this pull request?

#### Background
In PR #26508 (SPARK-26260) the SHS stage metric percentiles were updated to only include successful tasks when using disk storage. It did this by making the values for each metric negative when the task is not in a successful state. This approach was chosen to avoid breaking changes to disk storage. See [this comment](https://github.com/apache/spark/pull/26508#issuecomment-554540314) for context.

To get the percentiles, it reads the metric values, starting at 0, in ascending order. This filters out all tasks that are not successful because the values are less than 0. To get the percentile values it scales the percentiles to the list index of successful tasks. For example if there are 200 tasks and you want percentiles [0, 25, 50, 75, 100] the lookup indexes in the task collection are [0, 50, 100, 150, 199].

#### Issue
For metrics 1) shuffle total reads and 2) shuffle total blocks, PR #26508 incorrectly makes the metric indices positive. This means tasks that are not successful are included in the percentile calculations. The percentile lookup index calculation is still based on the number of successful task so the wrong task metric is returned for a given percentile. This was not caught because the unit test only verified values for one metric, `executorRunTime`.

#### Fix
The index values for `SHUFFLE_TOTAL_READS` and `SHUFFLE_TOTAL_BLOCKS` should not convert back to positive metric values for tasks that are not successful. I believe this was done because the metrics values are summed from two other metrics. Using the raw values still creates the desired outcome. `negative + negative = negative` and `positive + positive = positive`. There is no case where one metric will be negative and one will be positive. I also verified that these two metrics are only used in the percentile calculations where only successful tasks are used.

### Why are the changes needed?
This change is required so that the SHS stage percentile metrics for shuffle read bytes and shuffle total blocks are correct.

### Does this PR introduce _any_ user-facing change?
Yes. The user will see the correct percentile values for the stage summary shuffle read bytes.

### How was this patch tested?
I updated the unit test to verify the percentile values for every task metric. I also modified the unit test to have unique values for every metric. Previously the test had the same metrics for every field. This would not catch bugs like the wrong field being read by accident.

I manually validated the fix in the UI.

**BEFORE**
![image](https://user-images.githubusercontent.com/5604993/155433460-322078c5-1821-4f2e-8e53-8fc3902eb7fe.png)

**AFTER**
![image](https://user-images.githubusercontent.com/5604993/155433491-25ce3acf-290b-4b83-a0a9-0f9b71c7af04.png)

I manually validated the fix in the task summary API (`/api/v1/applications/application_123/1/stages/14/0/taskSummary\?quantiles\=0,0.25,0.5,0.75,1.0`). See `shuffleReadMetrics.readBytes` and `shuffleReadMetrics.totalBlocksFetched`.

Before:
```json
{
   "quantiles":[
      0.0,
      0.25,
      0.5,
      0.75,
      1.0
   ],
   "shuffleReadMetrics":{
      "readBytes":[
         -2.0,
         -2.0,
         -2.0,
         -2.0,
         5.63718681E8
      ],
      "totalBlocksFetched":[
         -2.0,
         -2.0,
         -2.0,
         -2.0,
         2.0
      ],
      ...
   },
   ...
}
```

After:
```json
{
   "quantiles":[
      0.0,
      0.25,
      0.5,
      0.75,
      1.0
   ],
   "shuffleReadMetrics":{
      "readBytes":[
         5.62865286E8,
         5.63779421E8,
         5.63941681E8,
         5.64327925E8,
         5.7674183E8
      ],
      "totalBlocksFetched":[
         2.0,
         2.0,
         2.0,
         2.0,
         2.0
      ],
      ...
   }
   ...
}
```

Closes #35637 from robreeves/SPARK-38309.

Authored-by: Rob Reeves <roreeves@linkedin.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../org/apache/spark/status/storeTypes.scala  |   4 +-
 .../spark/status/AppStatusStoreSuite.scala    | 108 +++++++++++++-----
 2 files changed, 81 insertions(+), 31 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/status/storeTypes.scala b/core/src/main/scala/org/apache/spark/status/storeTypes.scala
index 103e4bab411e5..39bf593274904 100644
--- a/core/src/main/scala/org/apache/spark/status/storeTypes.scala
+++ b/core/src/main/scala/org/apache/spark/status/storeTypes.scala
@@ -344,7 +344,7 @@ private[spark] class TaskDataWrapper(
   @JsonIgnore @KVIndex(value = TaskIndexNames.SHUFFLE_TOTAL_READS, parent = TaskIndexNames.STAGE)
   private def shuffleTotalReads: Long = {
     if (hasMetrics) {
-      getMetricValue(shuffleLocalBytesRead) + getMetricValue(shuffleRemoteBytesRead)
+      shuffleLocalBytesRead + shuffleRemoteBytesRead
     } else {
       -1L
     }
@@ -353,7 +353,7 @@ private[spark] class TaskDataWrapper(
   @JsonIgnore @KVIndex(value = TaskIndexNames.SHUFFLE_TOTAL_BLOCKS, parent = TaskIndexNames.STAGE)
   private def shuffleTotalBlocks: Long = {
     if (hasMetrics) {
-      getMetricValue(shuffleLocalBlocksFetched) + getMetricValue(shuffleRemoteBlocksFetched)
+      shuffleLocalBlocksFetched + shuffleRemoteBlocksFetched
     } else {
       -1L
     }
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
index 798cff8d60fcd..53b01313d5d4c 100644
--- a/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.status
 
+import scala.util.Random
+
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.internal.config.History.{HYBRID_STORE_DISK_BACKEND, HybridStoreDiskBackend}
@@ -137,13 +139,52 @@ class AppStatusStoreSuite extends SparkFunSuite {
        * Task summary will consider (1, 3, 5) only
        */
       val summary = appStore.taskSummary(stageId, attemptId, uiQuantiles).get
+      val successfulTasks = Array(getTaskMetrics(1), getTaskMetrics(3), getTaskMetrics(5))
 
-      val values = Array(1.0, 3.0, 5.0)
+      def assertQuantiles(metricGetter: TaskMetrics => Double,
+        actualQuantiles: Seq[Double]): Unit = {
+        val values = successfulTasks.map(metricGetter)
+        val expectedQuantiles = new Distribution(values, 0, values.length)
+          .getQuantiles(uiQuantiles.sorted)
 
-      val dist = new Distribution(values, 0, values.length).getQuantiles(uiQuantiles.sorted)
-      dist.zip(summary.executorRunTime).foreach { case (expected, actual) =>
-        assert(expected === actual)
+        assert(actualQuantiles === expectedQuantiles)
       }
+
+      assertQuantiles(_.executorDeserializeTime, summary.executorDeserializeTime)
+      assertQuantiles(_.executorDeserializeCpuTime, summary.executorDeserializeCpuTime)
+      assertQuantiles(_.executorRunTime, summary.executorRunTime)
+      assertQuantiles(_.executorRunTime, summary.executorRunTime)
+      assertQuantiles(_.executorCpuTime, summary.executorCpuTime)
+      assertQuantiles(_.resultSize, summary.resultSize)
+      assertQuantiles(_.jvmGCTime, summary.jvmGcTime)
+      assertQuantiles(_.resultSerializationTime, summary.resultSerializationTime)
+      assertQuantiles(_.memoryBytesSpilled, summary.memoryBytesSpilled)
+      assertQuantiles(_.diskBytesSpilled, summary.diskBytesSpilled)
+      assertQuantiles(_.peakExecutionMemory, summary.peakExecutionMemory)
+      assertQuantiles(_.inputMetrics.bytesRead, summary.inputMetrics.bytesRead)
+      assertQuantiles(_.inputMetrics.recordsRead, summary.inputMetrics.recordsRead)
+      assertQuantiles(_.outputMetrics.bytesWritten, summary.outputMetrics.bytesWritten)
+      assertQuantiles(_.outputMetrics.recordsWritten, summary.outputMetrics.recordsWritten)
+      assertQuantiles(_.shuffleReadMetrics.remoteBlocksFetched,
+        summary.shuffleReadMetrics.remoteBlocksFetched)
+      assertQuantiles(_.shuffleReadMetrics.localBlocksFetched,
+        summary.shuffleReadMetrics.localBlocksFetched)
+      assertQuantiles(_.shuffleReadMetrics.fetchWaitTime, summary.shuffleReadMetrics.fetchWaitTime)
+      assertQuantiles(_.shuffleReadMetrics.remoteBytesRead,
+        summary.shuffleReadMetrics.remoteBytesRead)
+      assertQuantiles(_.shuffleReadMetrics.remoteBytesReadToDisk,
+        summary.shuffleReadMetrics.remoteBytesReadToDisk)
+      assertQuantiles(
+        t => t.shuffleReadMetrics.localBytesRead + t.shuffleReadMetrics.remoteBytesRead,
+        summary.shuffleReadMetrics.readBytes)
+      assertQuantiles(
+        t => t.shuffleReadMetrics.localBlocksFetched + t.shuffleReadMetrics.remoteBlocksFetched,
+        summary.shuffleReadMetrics.totalBlocksFetched)
+      assertQuantiles(_.shuffleWriteMetrics.bytesWritten, summary.shuffleWriteMetrics.writeBytes)
+      assertQuantiles(_.shuffleWriteMetrics.writeTime, summary.shuffleWriteMetrics.writeTime)
+      assertQuantiles(_.shuffleWriteMetrics.recordsWritten,
+        summary.shuffleWriteMetrics.writeRecords)
+
       appStore.close()
     }
   }
@@ -227,32 +268,41 @@ class AppStatusStoreSuite extends SparkFunSuite {
     liveTask.write(store.asInstanceOf[ElementTrackingStore], 1L)
   }
 
-  private def getTaskMetrics(i: Int): TaskMetrics = {
+  /**
+   * Creates fake task metrics
+   * @param seed The random seed. The output will be reproducible for a given seed.
+   * @return The test metrics object with fake data
+   */
+  private def getTaskMetrics(seed: Int): TaskMetrics = {
+    val random = new Random(seed)
+    val randomMax = 1000
+    def nextInt(): Int = random.nextInt(randomMax)
+
     val taskMetrics = new TaskMetrics()
-    taskMetrics.setExecutorDeserializeTime(i)
-    taskMetrics.setExecutorDeserializeCpuTime(i)
-    taskMetrics.setExecutorRunTime(i)
-    taskMetrics.setExecutorCpuTime(i)
-    taskMetrics.setResultSize(i)
-    taskMetrics.setJvmGCTime(i)
-    taskMetrics.setResultSerializationTime(i)
-    taskMetrics.incMemoryBytesSpilled(i)
-    taskMetrics.incDiskBytesSpilled(i)
-    taskMetrics.incPeakExecutionMemory(i)
-    taskMetrics.inputMetrics.incBytesRead(i)
-    taskMetrics.inputMetrics.incRecordsRead(i)
-    taskMetrics.outputMetrics.setBytesWritten(i)
-    taskMetrics.outputMetrics.setRecordsWritten(i)
-    taskMetrics.shuffleReadMetrics.incRemoteBlocksFetched(i)
-    taskMetrics.shuffleReadMetrics.incLocalBlocksFetched(i)
-    taskMetrics.shuffleReadMetrics.incFetchWaitTime(i)
-    taskMetrics.shuffleReadMetrics.incRemoteBytesRead(i)
-    taskMetrics.shuffleReadMetrics.incRemoteBytesReadToDisk(i)
-    taskMetrics.shuffleReadMetrics.incLocalBytesRead(i)
-    taskMetrics.shuffleReadMetrics.incRecordsRead(i)
-    taskMetrics.shuffleWriteMetrics.incBytesWritten(i)
-    taskMetrics.shuffleWriteMetrics.incWriteTime(i)
-    taskMetrics.shuffleWriteMetrics.incRecordsWritten(i)
+    taskMetrics.setExecutorDeserializeTime(nextInt())
+    taskMetrics.setExecutorDeserializeCpuTime(nextInt())
+    taskMetrics.setExecutorRunTime(nextInt())
+    taskMetrics.setExecutorCpuTime(nextInt())
+    taskMetrics.setResultSize(nextInt())
+    taskMetrics.setJvmGCTime(nextInt())
+    taskMetrics.setResultSerializationTime(nextInt())
+    taskMetrics.incMemoryBytesSpilled(nextInt())
+    taskMetrics.incDiskBytesSpilled(nextInt())
+    taskMetrics.incPeakExecutionMemory(nextInt())
+    taskMetrics.inputMetrics.incBytesRead(nextInt())
+    taskMetrics.inputMetrics.incRecordsRead(nextInt())
+    taskMetrics.outputMetrics.setBytesWritten(nextInt())
+    taskMetrics.outputMetrics.setRecordsWritten(nextInt())
+    taskMetrics.shuffleReadMetrics.incRemoteBlocksFetched(nextInt())
+    taskMetrics.shuffleReadMetrics.incLocalBlocksFetched(nextInt())
+    taskMetrics.shuffleReadMetrics.incFetchWaitTime(nextInt())
+    taskMetrics.shuffleReadMetrics.incRemoteBytesRead(nextInt())
+    taskMetrics.shuffleReadMetrics.incRemoteBytesReadToDisk(nextInt())
+    taskMetrics.shuffleReadMetrics.incLocalBytesRead(nextInt())
+    taskMetrics.shuffleReadMetrics.incRecordsRead(nextInt())
+    taskMetrics.shuffleWriteMetrics.incBytesWritten(nextInt())
+    taskMetrics.shuffleWriteMetrics.incWriteTime(nextInt())
+    taskMetrics.shuffleWriteMetrics.incRecordsWritten(nextInt())
     taskMetrics
   }
 

From 8fabd5efe311347a4d1c441813dd0bd3148655df Mon Sep 17 00:00:00 2001
From: weixiuli <weixiuli@jd.com>
Date: Tue, 8 Mar 2022 12:59:34 -0600
Subject: [PATCH 428/513] [SPARK-38428][SHUFFLE] Check the FetchShuffleBlocks
 message only once to improve iteration in external shuffle service

### What changes were proposed in this pull request?

Currently, the FetchShuffleBlocks is checked in each element of a ShuffleManagedBufferIterator, which is unnecessary and it only needs to be checked once in the ShuffleManagedBufferIterator constructor.

### Why are the changes needed?
To improve performance.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing unittests.

Closes #35743 from weixiuli/SPARK-38428-improve-iteration.

Authored-by: weixiuli <weixiuli@jd.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../spark/network/shuffle/ExternalBlockHandler.java       | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
index 1e413f6b2f375..52bc0f9c2226d 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
@@ -512,14 +512,14 @@ private class ShuffleManagedBufferIterator implements Iterator<ManagedBuffer> {
       mapIds = msg.mapIds;
       reduceIds = msg.reduceIds;
       batchFetchEnabled = msg.batchFetchEnabled;
-    }
-
-    @Override
-    public boolean hasNext() {
       // mapIds.length must equal to reduceIds.length, and the passed in FetchShuffleBlocks
       // must have non-empty mapIds and reduceIds, see the checking logic in
       // OneForOneBlockFetcher.
       assert(mapIds.length != 0 && mapIds.length == reduceIds.length);
+    }
+
+    @Override
+    public boolean hasNext() {
       return mapIdx < mapIds.length && reduceIdx < reduceIds[mapIdx].length;
     }
 

From 049d6d101f7a10b60ab1f126250327e1b4b8f271 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 8 Mar 2022 17:06:14 -0800
Subject: [PATCH 429/513] [SPARK-38443][SS][DOC] Document config
 STREAMING_SESSION_WINDOW_MERGE_SESSIONS_IN_LOCAL_PARTITION

### What changes were proposed in this pull request?

This proposes to document the config `spark.sql.streaming.sessionWindow.merge.sessions.in.local.partition` in structured streaming guide.

### Why are the changes needed?

We have use case the customer faces issue on large shuffle write in session window. Users are hardly to know what is the issue and cannot find related document. The config is hidden, although it is useful for such case. We should document it so end users can find it easily.

### Does this PR introduce _any_ user-facing change?

Yes, documented a config in user-facing guide.

### How was this patch tested?

Basically doc change.

Closes #35762 from viirya/ss_doc.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 docs/structured-streaming-programming-guide.md             | 7 +++++++
 .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 1 -
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 54325850a0333..2db4f92842cd6 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -1225,6 +1225,13 @@ Note that there are some restrictions when you use session window in streaming q
 
 For batch query, global window (only having `session_window` in grouping key) is supported.
 
+By default, Spark does not perform partial aggregation for session window aggregation, since it requires additional
+sort in local partitions before grouping. It works better for the case there are only few number of input rows in
+same group key for each local partition, but for the case there are numerous input rows having same group key in
+local partition, doing partial aggregation can still increase the performance significantly despite additional sort.
+
+You can enable `spark.sql.streaming.sessionWindow.merge.sessions.in.local.partition` to indicate Spark to perform partial aggregation.
+
 ##### Conditions for watermarking to clean aggregation state
 {:.no_toc}
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index a050156518c2c..c7aec8e023b22 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1720,7 +1720,6 @@ object SQLConf {
 
   val STREAMING_SESSION_WINDOW_MERGE_SESSIONS_IN_LOCAL_PARTITION =
     buildConf("spark.sql.streaming.sessionWindow.merge.sessions.in.local.partition")
-      .internal()
       .doc("When true, streaming session window sorts and merge sessions in local partition " +
         "prior to shuffle. This is to reduce the rows to shuffle, but only beneficial when " +
         "there're lots of rows in a batch being assigned to same sessions.")

From 59ce0a706cb52a54244a747d0a070b61f5cddd1c Mon Sep 17 00:00:00 2001
From: Karen Feng <karen.feng@databricks.com>
Date: Wed, 9 Mar 2022 09:34:01 +0800
Subject: [PATCH 430/513] [SPARK-37865][SQL] Fix union deduplication
 correctness bug
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

Fixes a correctness bug in `Union` in the case that there are duplicate output columns. Previously, duplicate columns on one side of the union would result in a duplicate column being output on the other side of the union.

To do so, we go through the union’s child’s output and find the duplicates. For each duplicate set, there is a first duplicate: this one is left alone. All following duplicates are aliased and given a tag; this tag is used to remove ambiguity during resolution.

As the first duplicate is left alone, the user can still select it, avoiding a breaking change. As the later duplicates are given new expression IDs, this fixes the correctness bug.

### Why are the changes needed?

Output of union with duplicate columns in the children was incorrect

### Does this PR introduce _any_ user-facing change?

Example query:
```
SELECT a, a FROM VALUES (1, 1), (1, 2) AS t1(a, b)
UNION ALL SELECT c, d FROM VALUES (2, 2), (2, 3) AS t2(c, d)
```

Result before:
```
a | a
_ | _
1 | 1
1 | 1
2 | 2
2 | 2
```

Result after:
```
a | a
_ | _
1 | 1
1 | 1
2 | 2
2 | 3
```

### How was this patch tested?

Unit tests

Closes #35760 from karenfeng/spark-37865.

Authored-by: Karen Feng <karen.feng@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 25 ++++++++
 .../sql/catalyst/expressions/package.scala    |  8 ++-
 .../org/apache/spark/sql/DataFrameSuite.scala | 63 +++++++++++++++++++
 3 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 1cb35fdf53616..245e232cda1ee 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1380,6 +1380,31 @@ class Analyzer(override val catalogManager: CatalogManager)
         throw QueryCompilationErrors.invalidStarUsageError("explode/json_tuple/UDTF",
           extractStar(g.generator.children))
 
+      case u @ Union(children, _, _)
+        // if there are duplicate output columns, give them unique expr ids
+          if children.exists(c => c.output.map(_.exprId).distinct.length < c.output.length) =>
+        val newChildren = children.map { c =>
+          if (c.output.map(_.exprId).distinct.length < c.output.length) {
+            val existingExprIds = mutable.HashSet[ExprId]()
+            val projectList = c.output.map { attr =>
+              if (existingExprIds.contains(attr.exprId)) {
+                // replace non-first duplicates with aliases and tag them
+                val newMetadata = new MetadataBuilder().withMetadata(attr.metadata)
+                  .putNull("__is_duplicate").build()
+                Alias(attr, attr.name)(explicitMetadata = Some(newMetadata))
+              } else {
+                // leave first duplicate alone
+                existingExprIds.add(attr.exprId)
+                attr
+              }
+            }
+            Project(projectList, c)
+          } else {
+            c
+          }
+        }
+        u.withNewChildren(newChildren)
+
       // When resolve `SortOrder`s in Sort based on child, don't report errors as
       // we still have chance to resolve it based on its descendants
       case s @ Sort(ordering, global, child) if child.resolved && !s.resolved =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index d950fef3b26a5..6a4fb099c8b78 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -335,8 +335,14 @@ package object expressions  {
         matchWithFourOrMoreQualifierParts(nameParts, resolver)
       }
 
+      val prunedCandidates = if (candidates.size > 1) {
+        candidates.filter(c => !c.metadata.contains("__is_duplicate"))
+      } else {
+        candidates
+      }
+
       def name = UnresolvedAttribute(nameParts).name
-      candidates match {
+      prunedCandidates match {
         case Seq(a) if nestedFields.nonEmpty =>
           // One match, but we also need to extract the requested nested field.
           // The foldLeft adds ExtractValues for every remaining parts of the identifier,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index eb0dbb1289b1b..d4e482540161f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -3133,6 +3133,69 @@ class DataFrameSuite extends QueryTest
       checkAnswer(df, Row(Seq("string2")) :: Row(Seq("string5")) :: Nil)
     }
   }
+
+  test("SPARK-37865: Do not deduplicate union output columns") {
+    val df1 = Seq((1, 1), (1, 2)).toDF("a", "b")
+    val df2 = Seq((2, 2), (2, 3)).toDF("c", "d")
+
+    def sqlQuery(cols1: Seq[String], cols2: Seq[String], distinct: Boolean): String = {
+      val union = if (distinct) {
+        "UNION"
+      } else {
+        "UNION ALL"
+      }
+      s"""
+         |SELECT ${cols1.mkString(",")} FROM VALUES (1, 1), (1, 2) AS t1(a, b)
+         |$union SELECT ${cols2.mkString(",")} FROM VALUES (2, 2), (2, 3) AS t2(c, d)
+         |""".stripMargin
+    }
+
+    Seq(
+      (Seq("a", "a"), Seq("c", "d"), Seq(Row(1, 1), Row(1, 1), Row(2, 2), Row(2, 3))),
+      (Seq("a", "b"), Seq("c", "d"), Seq(Row(1, 1), Row(1, 2), Row(2, 2), Row(2, 3))),
+      (Seq("a", "b"), Seq("c", "c"), Seq(Row(1, 1), Row(1, 2), Row(2, 2), Row(2, 2)))
+    ).foreach { case (cols1, cols2, rows) =>
+      // UNION ALL (non-distinct)
+      val df3 = df1.selectExpr(cols1: _*).union(df2.selectExpr(cols2: _*))
+      checkAnswer(df3, rows)
+
+      val t3 = sqlQuery(cols1, cols2, false)
+      checkAnswer(sql(t3), rows)
+
+      // Avoid breaking change
+      var correctAnswer = rows.map(r => Row(r(0)))
+      checkAnswer(df3.select(df1.col("a")), correctAnswer)
+      checkAnswer(sql(s"select a from ($t3) t3"), correctAnswer)
+
+      // This has always been broken
+      intercept[AnalysisException] {
+        df3.select(df2.col("d")).collect()
+      }
+      intercept[AnalysisException] {
+        sql(s"select d from ($t3) t3")
+      }
+
+      // UNION (distinct)
+      val df4 = df3.distinct
+      checkAnswer(df4, rows.distinct)
+
+      val t4 = sqlQuery(cols1, cols2, true)
+      checkAnswer(sql(t4), rows.distinct)
+
+      // Avoid breaking change
+      correctAnswer = rows.distinct.map(r => Row(r(0)))
+      checkAnswer(df4.select(df1.col("a")), correctAnswer)
+      checkAnswer(sql(s"select a from ($t4) t4"), correctAnswer)
+
+      // This has always been broken
+      intercept[AnalysisException] {
+        df4.select(df2.col("d")).collect()
+      }
+      intercept[AnalysisException] {
+        sql(s"select d from ($t4) t4")
+      }
+    }
+  }
 }
 
 case class GroupByKey(a: Int, b: Int)

From 43c7824bba40ebfb64dcd50d8d0e84b5a4d3c8c7 Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Wed, 9 Mar 2022 11:03:57 +0900
Subject: [PATCH 431/513] [SPARK-38412][SS] Fix the swapped sequence of from
 and to in StateSchemaCompatibilityChecker

### What changes were proposed in this pull request?

This PR fixes the StateSchemaCompatibilityChecker which mistakenly swapped `from` (should be provided schema) and `to` (should be existing schema).

### Why are the changes needed?

The bug mistakenly allows the case where it should not be allowed, and disallows the case where it should be allowed.

That allows nullable column to be stored into non-nullable column, which should be prohibited. This is less likely making runtime problem since state schema is conceptual one and row can be stored even not respecting the state schema.

The opposite case is worse, that disallows non-nullable column to be stored into nullable column, which should be allowed. Spark fails the query for this case.

### Does this PR introduce _any_ user-facing change?

Yes, after the fix, storing non-nullable column into nullable column for state will be allowed, which should have been allowed.

### How was this patch tested?

Modified UTs.

Closes #35731 from HeartSaVioR/SPARK-38412.

Authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../StateSchemaCompatibilityChecker.scala     |  2 +-
 ...StateSchemaCompatibilityCheckerSuite.scala | 51 ++++++++++++++++---
 2 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala
index 20625e10f321e..0c8cabb75ed65 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala
@@ -72,7 +72,7 @@ class StateSchemaCompatibilityChecker(
   }
 
   private def schemasCompatible(storedSchema: StructType, schema: StructType): Boolean =
-    DataType.equalsIgnoreNameAndCompatibleNullability(storedSchema, schema)
+    DataType.equalsIgnoreNameAndCompatibleNullability(schema, storedSchema)
 
   // Visible for testing
   private[sql] def readSchemaFile(): (StructType, StructType) = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala
index a9cc90ca45ce8..1539341359337 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala
@@ -63,6 +63,8 @@ class StateSchemaCompatibilityCheckerSuite extends SharedSparkSession {
   private val valueSchema65535Bytes = new StructType()
     .add(StructField("v" * (65535 - 87), IntegerType, nullable = true))
 
+  // Checks on adding/removing (nested) field.
+
   test("adding field to key should fail") {
     val fieldAddedKeySchema = keySchema.add(StructField("newKey", IntegerType))
     verifyException(keySchema, valueSchema, fieldAddedKeySchema, valueSchema)
@@ -107,6 +109,8 @@ class StateSchemaCompatibilityCheckerSuite extends SharedSparkSession {
     verifyException(keySchema, valueSchema, keySchema, newValueSchema)
   }
 
+  // Checks on changing type of (nested) field.
+
   test("changing the type of field in key should fail") {
     val typeChangedKeySchema = StructType(keySchema.map(_.copy(dataType = TimestampType)))
     verifyException(keySchema, valueSchema, typeChangedKeySchema, valueSchema)
@@ -129,28 +133,59 @@ class StateSchemaCompatibilityCheckerSuite extends SharedSparkSession {
     verifyException(keySchema, valueSchema, keySchema, newValueSchema)
   }
 
-  test("changing the nullability of nullable to non-nullable in key should fail") {
+  // Checks on changing nullability of (nested) field.
+  // Note that these tests have different format of the test name compared to others, since it was
+  // misleading to understand the assignment as the opposite way.
+
+  test("storing non-nullable column into nullable column in key should be allowed") {
     val nonNullChangedKeySchema = StructType(keySchema.map(_.copy(nullable = false)))
-    verifyException(keySchema, valueSchema, nonNullChangedKeySchema, valueSchema)
+    verifySuccess(keySchema, valueSchema, nonNullChangedKeySchema, valueSchema)
   }
 
-  test("changing the nullability of nullable to non-nullable in value should fail") {
+  test("storing non-nullable column into nullable column in value schema should be allowed") {
     val nonNullChangedValueSchema = StructType(valueSchema.map(_.copy(nullable = false)))
-    verifyException(keySchema, valueSchema, keySchema, nonNullChangedValueSchema)
+    verifySuccess(keySchema, valueSchema, keySchema, nonNullChangedValueSchema)
   }
 
-  test("changing the nullability of nullable to nonnullable in nested field in key should fail") {
+  test("storing non-nullable into nullable in nested field in key should be allowed") {
     val typeChangedNestedSchema = StructType(structSchema.map(_.copy(nullable = false)))
     val newKeySchema = applyNewSchemaToNestedFieldInKey(typeChangedNestedSchema)
-    verifyException(keySchema, valueSchema, newKeySchema, valueSchema)
+    verifySuccess(keySchema, valueSchema, newKeySchema, valueSchema)
   }
 
-  test("changing the nullability of nullable to nonnullable in nested field in value should fail") {
+  test("storing non-nullable into nullable in nested field in value should be allowed") {
     val typeChangedNestedSchema = StructType(structSchema.map(_.copy(nullable = false)))
     val newValueSchema = applyNewSchemaToNestedFieldInValue(typeChangedNestedSchema)
-    verifyException(keySchema, valueSchema, keySchema, newValueSchema)
+    verifySuccess(keySchema, valueSchema, keySchema, newValueSchema)
+  }
+
+  test("storing nullable column into non-nullable column in key should fail") {
+    val nonNullChangedKeySchema = StructType(keySchema.map(_.copy(nullable = false)))
+    verifyException(nonNullChangedKeySchema, valueSchema, keySchema, valueSchema)
+  }
+
+  test("storing nullable column into non-nullable column in value schema should fail") {
+    val nonNullChangedValueSchema = StructType(valueSchema.map(_.copy(nullable = false)))
+    verifyException(keySchema, nonNullChangedValueSchema, keySchema, valueSchema)
+  }
+
+  test("storing nullable column into non-nullable column in nested field in key should fail") {
+    val typeChangedNestedSchema = StructType(structSchema.map(_.copy(nullable = false)))
+    val newKeySchema = applyNewSchemaToNestedFieldInKey(typeChangedNestedSchema)
+    verifyException(newKeySchema, valueSchema, keySchema, valueSchema)
   }
 
+  test("storing nullable column into non-nullable column in nested field in value should fail") {
+    val typeChangedNestedSchema = StructType(structSchema.map(_.copy(nullable = false)))
+    val newValueSchema = applyNewSchemaToNestedFieldInValue(typeChangedNestedSchema)
+    verifyException(keySchema, newValueSchema, keySchema, valueSchema)
+  }
+
+  // Checks on changing name of (nested) field.
+  // Changing the name is allowed since it may be possible Spark can make relevant changes from
+  // operators/functions by chance. This opens a risk that end users swap two fields having same
+  // data type, but there is no way to address both.
+
   test("changing the name of field in key should be allowed") {
     val newName: StructField => StructField = f => f.copy(name = f.name + "_new")
     val fieldNameChangedKeySchema = StructType(keySchema.map(newName))

From f2058ebf7de2baa065030265a314db9a80f18255 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Wed, 9 Mar 2022 10:04:41 +0800
Subject: [PATCH 432/513] [SPARK-38450][SQL] Fix
 HiveQuerySuite//PushFoldableIntoBranchesSuite/TransposeWindowSuite under ANSI
 mode

### What changes were proposed in this pull request?

Fix the following test suites under ANSI mode:
* HiveQuerySuite
* HiveTypeCoercionSuite
* HivePartitionFilteringSuite
* PushFoldableIntoBranchesSuite
* TransposeWindowSuite

### Why are the changes needed?

To set up a new GA test job with ANSI mode on

### Does this PR introduce _any_ user-facing change?

No
### How was this patch tested?

Manually turn on ANSI mode and test .
Also it should pass GA tests.

Closes #35771 from gengliangwang/fixHiveAnsi.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../PushFoldableIntoBranchesSuite.scala       | 10 +++--
 .../optimizer/TransposeWindowSuite.scala      |  4 +-
 .../client/HivePartitionFilteringSuite.scala  | 36 ++++++++-------
 .../sql/hive/execution/HiveQuerySuite.scala   | 44 +++++++++++--------
 .../execution/HiveTypeCoercionSuite.scala     | 17 +++++--
 5 files changed, 66 insertions(+), 45 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
index 250a62d5eeb0b..7b9041a904a60 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
@@ -342,10 +342,12 @@ class PushFoldableIntoBranchesSuite
     assertEquivalent(
       EqualTo(CaseWhen(Seq((a, Literal.create(null, IntegerType)))), Literal(2)),
       Literal.create(null, BooleanType))
-    assertEquivalent(
-      EqualTo(CaseWhen(Seq((LessThan(Rand(1), Literal(0.5)), Literal("str")))).cast(IntegerType),
-        Literal(2)),
-      CaseWhen(Seq((LessThan(Rand(1), Literal(0.5)), Literal.create(null, BooleanType)))))
+    if (!conf.ansiEnabled) {
+      assertEquivalent(
+        EqualTo(CaseWhen(Seq((LessThan(Rand(1), Literal(0.5)), Literal("str")))).cast(IntegerType),
+          Literal(2)),
+        CaseWhen(Seq((LessThan(Rand(1), Literal(0.5)), Literal.create(null, BooleanType)))))
+    }
   }
 
   test("SPARK-33884: simplify CaseWhen clauses with (true and false) and (false and true)") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala
index 4fd681d4cedc8..a53e04da19d41 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.Rand
+import org.apache.spark.sql.catalyst.expressions.{Concat, Rand}
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -91,7 +91,7 @@ class TransposeWindowSuite extends PlanTest {
 
   test("don't transpose two adjacent windows with intersection of partition and output set") {
     val query = testRelation
-      .window(Seq(('a + 'b).as('e), sum(c).as('sum_a_2)), partitionSpec3, Seq.empty)
+      .window(Seq(Concat(Seq('a, 'b)).as('e), sum(c).as('sum_a_2)), partitionSpec3, Seq.empty)
       .window(Seq(sum(c).as('sum_a_1)), Seq(a, 'e), Seq.empty)
 
     val analyzed = query.analyze
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
index 5fef7d1d623ac..e9ab8edf9ad18 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
@@ -637,30 +637,32 @@ class HivePartitionFilteringSuite(version: String)
   }
 
   test("SPARK-35437: getPartitionsByFilter: relax cast if does not need timezone") {
-    // does not need time zone
-    Seq(("true", "20200104" :: Nil), ("false", dateStrValue)).foreach {
-      case (pruningFastFallbackEnabled, prunedPartition) =>
+    if (!SQLConf.get.ansiEnabled) {
+      // does not need time zone
+      Seq(("true", "20200104" :: Nil), ("false", dateStrValue)).foreach {
+        case (pruningFastFallbackEnabled, prunedPartition) =>
+          withSQLConf(pruningFastFallback -> pruningFastFallbackEnabled) {
+            testMetastorePartitionFiltering(
+              attr("datestr").cast(IntegerType) === 20200104,
+              dsValue,
+              hValue,
+              chunkValue,
+              dateValue,
+              prunedPartition)
+          }
+      }
+
+      // need time zone
+      Seq("true", "false").foreach { pruningFastFallbackEnabled =>
         withSQLConf(pruningFastFallback -> pruningFastFallbackEnabled) {
           testMetastorePartitionFiltering(
-            attr("datestr").cast(IntegerType) === 20200104,
+            attr("datestr").cast(DateType) === Date.valueOf("2020-01-01"),
             dsValue,
             hValue,
             chunkValue,
             dateValue,
-            prunedPartition)
+            dateStrValue)
         }
-    }
-
-    // need time zone
-    Seq("true", "false").foreach { pruningFastFallbackEnabled =>
-      withSQLConf(pruningFastFallback -> pruningFastFallbackEnabled) {
-        testMetastorePartitionFiltering(
-          attr("datestr").cast(DateType) === Date.valueOf("2020-01-01"),
-          dsValue,
-          hValue,
-          chunkValue,
-          dateValue,
-          dateStrValue)
       }
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index ee091e89379c4..e80c41401227d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -236,17 +236,19 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
   createQueryTest("no from clause",
     "SELECT 1, +1, -1")
 
-  createQueryTest("boolean = number",
-    """
-      |SELECT
-      |  1 = true, 1L = true, 1Y = true, true = 1, true = 1L, true = 1Y,
-      |  0 = true, 0L = true, 0Y = true, true = 0, true = 0L, true = 0Y,
-      |  1 = false, 1L = false, 1Y = false, false = 1, false = 1L, false = 1Y,
-      |  0 = false, 0L = false, 0Y = false, false = 0, false = 0L, false = 0Y,
-      |  2 = true, 2L = true, 2Y = true, true = 2, true = 2L, true = 2Y,
-      |  2 = false, 2L = false, 2Y = false, false = 2, false = 2L, false = 2Y
-      |FROM src LIMIT 1
+  if (!conf.ansiEnabled) {
+    createQueryTest("boolean = number",
+      """
+        |SELECT
+        |  1 = true, 1L = true, 1Y = true, true = 1, true = 1L, true = 1Y,
+        |  0 = true, 0L = true, 0Y = true, true = 0, true = 0L, true = 0Y,
+        |  1 = false, 1L = false, 1Y = false, false = 1, false = 1L, false = 1Y,
+        |  0 = false, 0L = false, 0Y = false, false = 0, false = 0L, false = 0Y,
+        |  2 = true, 2L = true, 2Y = true, true = 2, true = 2L, true = 2Y,
+        |  2 = false, 2L = false, 2Y = false, false = 2, false = 2L, false = 2Y
+        |FROM src LIMIT 1
     """.stripMargin)
+  }
 
   test("CREATE TABLE AS runs once") {
     sql("CREATE TABLE foo AS SELECT 1 FROM src LIMIT 1").collect()
@@ -282,11 +284,13 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
   createQueryTest("Constant Folding Optimization for AVG_SUM_COUNT",
     "SELECT AVG(0), SUM(0), COUNT(null), COUNT(value) FROM src GROUP BY key")
 
-  createQueryTest("Cast Timestamp to Timestamp in UDF",
-    """
-      | SELECT DATEDIFF(CAST(value AS timestamp), CAST('2002-03-21 00:00:00' AS timestamp))
-      | FROM src LIMIT 1
+  if (!conf.ansiEnabled) {
+    createQueryTest("Cast Timestamp to Timestamp in UDF",
+      """
+        | SELECT DATEDIFF(CAST(value AS timestamp), CAST('2002-03-21 00:00:00' AS timestamp))
+        | FROM src LIMIT 1
     """.stripMargin)
+  }
 
   createQueryTest("Date comparison test 1",
     """
@@ -516,8 +520,10 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
   createQueryTest("Specify the udtf output",
     "SELECT d FROM (SELECT explode(array(1,1)) d FROM src LIMIT 1) t")
 
-  createQueryTest("SPARK-9034 Reflect field names defined in GenericUDTF #1",
-    "SELECT col FROM (SELECT explode(array(key,value)) FROM src LIMIT 1) t")
+  if (!conf.ansiEnabled) {
+    createQueryTest("SPARK-9034 Reflect field names defined in GenericUDTF #1",
+      "SELECT col FROM (SELECT explode(array(key,value)) FROM src LIMIT 1) t")
+  }
 
   createQueryTest("SPARK-9034 Reflect field names defined in GenericUDTF #2",
     "SELECT key,value FROM (SELECT explode(map(key,value)) FROM src LIMIT 1) t")
@@ -768,9 +774,11 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
 
   test("SPARK-5367: resolve star expression in udf") {
     assert(sql("select concat(*) from src limit 5").collect().size == 5)
-    assert(sql("select array(*) from src limit 5").collect().size == 5)
     assert(sql("select concat(key, *) from src limit 5").collect().size == 5)
-    assert(sql("select array(key, *) from src limit 5").collect().size == 5)
+    if (!conf.ansiEnabled) {
+      assert(sql("select array(*) from src limit 5").collect().size == 5)
+      assert(sql("select array(key, *) from src limit 5").collect().size == 5)
+    }
   }
 
   test("Exactly once semantics for DDL and command statements") {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTypeCoercionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTypeCoercionSuite.scala
index 3f75454b8d8da..9b0d7d9f674d9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTypeCoercionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTypeCoercionSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.hive.execution
 import org.apache.spark.sql.catalyst.expressions.{Cast, EqualTo}
 import org.apache.spark.sql.execution.ProjectExec
 import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.tags.SlowHiveTest
 
 /**
@@ -27,13 +28,21 @@ import org.apache.spark.tags.SlowHiveTest
  */
 @SlowHiveTest
 class HiveTypeCoercionSuite extends HiveComparisonTest {
-  val baseTypes = Seq(
+  val baseTypes = if (SQLConf.get.ansiEnabled) {
+    Seq(
     ("1", "1"),
     ("1.0", "CAST(1.0 AS DOUBLE)"),
-    ("1L", "1L"),
     ("1S", "1S"),
-    ("1Y", "1Y"),
-    ("'1'", "'1'"))
+    ("1Y", "1Y"))
+  } else {
+    Seq(
+      ("1", "1"),
+      ("1.0", "CAST(1.0 AS DOUBLE)"),
+      ("1L", "1L"),
+      ("1S", "1S"),
+      ("1Y", "1Y"),
+      ("'1'", "'1'"))
+  }
 
   baseTypes.foreach { case (ni, si) =>
     baseTypes.foreach { case (nj, sj) =>

From 35c0e5cdf8e646203ce0310bfeee269addacdaa7 Mon Sep 17 00:00:00 2001
From: Harutaka Kawamura <hkawamura0130@gmail.com>
Date: Tue, 8 Mar 2022 19:53:19 -0800
Subject: [PATCH 433/513] [MINOR][PYTHON] Fix
 `MultilayerPerceptronClassifierTest.test_raw_and_probability_prediction`

### What changes were proposed in this pull request?

- Increase `rtol`.
- Use `numpy.testing.assert_allclose`.

### Why are the changes needed?

- To make the flaky test less likely to fail.
- To get a better assertion error message.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Updated test

Closes #35778 from harupy/fix-test_raw_and_probability_prediction.

Authored-by: Harutaka Kawamura <hkawamura0130@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 python/pyspark/ml/tests/test_algorithms.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/ml/tests/test_algorithms.py b/python/pyspark/ml/tests/test_algorithms.py
index bf74988a7c097..08da8592c043d 100644
--- a/python/pyspark/ml/tests/test_algorithms.py
+++ b/python/pyspark/ml/tests/test_algorithms.py
@@ -101,7 +101,15 @@ def test_raw_and_probability_prediction(self):
         expected_rawPrediction = [-11.6081922998, -8.15827998691, 22.17757045]
         self.assertTrue(result.prediction, expected_prediction)
         self.assertTrue(np.allclose(result.probability, expected_probability, atol=1e-4))
-        self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, rtol=0.11))
+        # Use `assert_allclose` to show the value of `result.rawPrediction` in the assertion error
+        # message
+        np.testing.assert_allclose(
+            result.rawPrediction,
+            expected_rawPrediction,
+            rtol=0.15,
+            # Use the same default value as `np.allclose`
+            atol=1e-08,
+        )
 
 
 class OneVsRestTests(SparkSessionTestCase):

From 4da04fc56c4108a4eab1e262d44f6e0822a61a23 Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Tue, 8 Mar 2022 19:56:55 -0800
Subject: [PATCH 434/513] [SPARK-37600][BUILD] Upgrade to Hadoop 3.3.2

### What changes were proposed in this pull request?

This PR aims to upgrade to Hadoop 3.3.2. In addition, it also removes the LZ4 wrapper classes added in SPARK-36669, therefore fixing SPARK-36679.

### Why are the changes needed?

Hadoop 3.3.2 has many bug fixes and we also can remove our internal hacked Hadoop codecs.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

Closes #34855 from sunchao/SPARK-37600.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 LICENSE-binary                                |  2 +
 NOTICE-binary                                 |  3 ++
 .../shaded/net/jpountz/lz4/LZ4Compressor.java | 37 -------------
 .../shaded/net/jpountz/lz4/LZ4Factory.java    | 49 -----------------
 .../net/jpountz/lz4/LZ4SafeDecompressor.java  | 36 -------------
 dev/deps/spark-deps-hadoop-3-hive-2.3         | 41 +++++++-------
 hadoop-cloud/pom.xml                          |  7 +++
 licenses/LICENSE-jdom.txt                     | 54 +++++++++++++++++++
 pom.xml                                       |  4 +-
 project/MimaExcludes.scala                    |  7 ++-
 .../hive/client/IsolatedClientLoader.scala    |  2 +-
 11 files changed, 98 insertions(+), 144 deletions(-)
 delete mode 100644 core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java
 delete mode 100644 core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java
 delete mode 100644 core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java
 create mode 100644 licenses/LICENSE-jdom.txt

diff --git a/LICENSE-binary b/LICENSE-binary
index 7e29e613f8057..8bbc913262c89 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -456,6 +456,7 @@ net.sf.py4j:py4j
 org.jpmml:pmml-model
 org.jpmml:pmml-schema
 org.threeten:threeten-extra
+org.jdom:jdom2
 
 python/lib/py4j-*-src.zip
 python/pyspark/cloudpickle.py
@@ -504,6 +505,7 @@ Common Development and Distribution License (CDDL) 1.0
 javax.activation:activation  http://www.oracle.com/technetwork/java/javase/tech/index-jsp-138795.html
 javax.xml.stream:stax-api    https://jcp.org/en/jsr/detail?id=173
 javax.transaction:javax.transaction-api
+javax.xml.bind:jaxb-api
 
 
 Common Development and Distribution License (CDDL) 1.1
diff --git a/NOTICE-binary b/NOTICE-binary
index 4ce8bf2f86b2a..95653c6f49a07 100644
--- a/NOTICE-binary
+++ b/NOTICE-binary
@@ -917,6 +917,9 @@ This product includes code (JaspellTernarySearchTrie) from Java Spelling Checkin
 g Package (jaspell): http://jaspell.sourceforge.net/
 License: The BSD License (http://www.opensource.org/licenses/bsd-license.php)
 
+This product includes software developed by the JDOM Project (http://www.jdom.org/)
+License: https://raw.githubusercontent.com/hunterhacker/jdom/master/LICENSE.txt
+
 The snowball stemmers in
   analysis/common/src/java/net/sf/snowball
 were developed by Martin Porter and Richard Boulton.
diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java
deleted file mode 100644
index 092ed59c6bb14..0000000000000
--- a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.shaded.net.jpountz.lz4;
-
-/**
- * TODO(SPARK-36679): A temporary workaround for SPARK-36669. We should remove this after
- * Hadoop 3.3.2 release which fixes the LZ4 relocation in shaded Hadoop client libraries.
- * This does not need implement all net.jpountz.lz4.LZ4Compressor API, just the ones used
- * by Hadoop Lz4Compressor.
- */
-public final class LZ4Compressor {
-
-  private net.jpountz.lz4.LZ4Compressor lz4Compressor;
-
-  public LZ4Compressor(net.jpountz.lz4.LZ4Compressor lz4Compressor) {
-    this.lz4Compressor = lz4Compressor;
-  }
-
-  public void compress(java.nio.ByteBuffer src, java.nio.ByteBuffer dest) {
-    lz4Compressor.compress(src, dest);
-  }
-}
diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java
deleted file mode 100644
index 61829b2728bce..0000000000000
--- a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.shaded.net.jpountz.lz4;
-
-/**
- * TODO(SPARK-36679): A temporary workaround for SPARK-36669. We should remove this after
- * Hadoop 3.3.2 release which fixes the LZ4 relocation in shaded Hadoop client libraries.
- * This does not need implement all net.jpountz.lz4.LZ4Factory API, just the ones used by
- * Hadoop Lz4Compressor.
- */
-public final class LZ4Factory {
-
-  private net.jpountz.lz4.LZ4Factory lz4Factory;
-
-  public LZ4Factory(net.jpountz.lz4.LZ4Factory lz4Factory) {
-    this.lz4Factory = lz4Factory;
-  }
-
-  public static LZ4Factory fastestInstance() {
-    return new LZ4Factory(net.jpountz.lz4.LZ4Factory.fastestInstance());
-  }
-
-  public LZ4Compressor highCompressor() {
-    return new LZ4Compressor(lz4Factory.highCompressor());
-  }
-
-  public LZ4Compressor fastCompressor() {
-    return new LZ4Compressor(lz4Factory.fastCompressor());
-  }
-
-  public LZ4SafeDecompressor safeDecompressor() {
-    return new LZ4SafeDecompressor(lz4Factory.safeDecompressor());
-  }
-}
diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java
deleted file mode 100644
index cd3dd6f060f52..0000000000000
--- a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.shaded.net.jpountz.lz4;
-
-/**
- * TODO(SPARK-36679): A temporary workaround for SPARK-36669. We should remove this after
- * Hadoop 3.3.2 release which fixes the LZ4 relocation in shaded Hadoop client libraries.
- * This does not need implement all net.jpountz.lz4.LZ4SafeDecompressor API, just the ones
- * used by Hadoop Lz4Decompressor.
- */
-public final class LZ4SafeDecompressor {
-  private net.jpountz.lz4.LZ4SafeDecompressor lz4Decompressor;
-
-  public LZ4SafeDecompressor(net.jpountz.lz4.LZ4SafeDecompressor lz4Decompressor) {
-    this.lz4Decompressor = lz4Decompressor;
-  }
-
-  public void decompress(java.nio.ByteBuffer src, java.nio.ByteBuffer dest) {
-    lz4Decompressor.decompress(src, dest);
-  }
-}
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 9eb89065e8718..41b1deeeca1b3 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -6,11 +6,10 @@ ST4/4.0.4//ST4-4.0.4.jar
 activation/1.1.1//activation-1.1.1.jar
 aircompressor/0.21//aircompressor-0.21.jar
 algebra_2.12/2.0.1//algebra_2.12-2.0.1.jar
-aliyun-java-sdk-core/3.4.0//aliyun-java-sdk-core-3.4.0.jar
-aliyun-java-sdk-ecs/4.2.0//aliyun-java-sdk-ecs-4.2.0.jar
-aliyun-java-sdk-ram/3.0.0//aliyun-java-sdk-ram-3.0.0.jar
-aliyun-java-sdk-sts/3.0.0//aliyun-java-sdk-sts-3.0.0.jar
-aliyun-sdk-oss/3.4.1//aliyun-sdk-oss-3.4.1.jar
+aliyun-java-sdk-core/4.5.10//aliyun-java-sdk-core-4.5.10.jar
+aliyun-java-sdk-kms/2.11.0//aliyun-java-sdk-kms-2.11.0.jar
+aliyun-java-sdk-ram/3.1.0//aliyun-java-sdk-ram-3.1.0.jar
+aliyun-sdk-oss/3.13.0//aliyun-sdk-oss-3.13.0.jar
 annotations/17.0.0//annotations-17.0.0.jar
 antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
 antlr4-runtime/4.8//antlr4-runtime-4.8.jar
@@ -26,7 +25,7 @@ automaton/1.11-8//automaton-1.11-8.jar
 avro-ipc/1.11.0//avro-ipc-1.11.0.jar
 avro-mapred/1.11.0//avro-mapred-1.11.0.jar
 avro/1.11.0//avro-1.11.0.jar
-aws-java-sdk-bundle/1.11.901//aws-java-sdk-bundle-1.11.901.jar
+aws-java-sdk-bundle/1.11.1026//aws-java-sdk-bundle-1.11.1026.jar
 azure-data-lake-store-sdk/2.3.9//azure-data-lake-store-sdk-2.3.9.jar
 azure-keyvault-core/1.0.0//azure-keyvault-core-1.0.0.jar
 azure-storage/7.0.1//azure-storage-7.0.1.jar
@@ -67,18 +66,18 @@ generex/1.0.2//generex-1.0.2.jar
 gmetric4j/1.0.10//gmetric4j-1.0.10.jar
 gson/2.2.4//gson-2.2.4.jar
 guava/14.0.1//guava-14.0.1.jar
-hadoop-aliyun/3.3.1//hadoop-aliyun-3.3.1.jar
-hadoop-annotations/3.3.1//hadoop-annotations-3.3.1.jar
-hadoop-aws/3.3.1//hadoop-aws-3.3.1.jar
-hadoop-azure-datalake/3.3.1//hadoop-azure-datalake-3.3.1.jar
-hadoop-azure/3.3.1//hadoop-azure-3.3.1.jar
-hadoop-client-api/3.3.1//hadoop-client-api-3.3.1.jar
-hadoop-client-runtime/3.3.1//hadoop-client-runtime-3.3.1.jar
-hadoop-cloud-storage/3.3.1//hadoop-cloud-storage-3.3.1.jar
-hadoop-cos/3.3.1//hadoop-cos-3.3.1.jar
-hadoop-openstack/3.3.1//hadoop-openstack-3.3.1.jar
+hadoop-aliyun/3.3.2//hadoop-aliyun-3.3.2.jar
+hadoop-annotations/3.3.2//hadoop-annotations-3.3.2.jar
+hadoop-aws/3.3.2//hadoop-aws-3.3.2.jar
+hadoop-azure-datalake/3.3.2//hadoop-azure-datalake-3.3.2.jar
+hadoop-azure/3.3.2//hadoop-azure-3.3.2.jar
+hadoop-client-api/3.3.2//hadoop-client-api-3.3.2.jar
+hadoop-client-runtime/3.3.2//hadoop-client-runtime-3.3.2.jar
+hadoop-cloud-storage/3.3.2//hadoop-cloud-storage-3.3.2.jar
+hadoop-cos/3.3.2//hadoop-cos-3.3.2.jar
+hadoop-openstack/3.3.2//hadoop-openstack-3.3.2.jar
 hadoop-shaded-guava/1.1.1//hadoop-shaded-guava-1.1.1.jar
-hadoop-yarn-server-web-proxy/3.3.1//hadoop-yarn-server-web-proxy-3.3.1.jar
+hadoop-yarn-server-web-proxy/3.3.2//hadoop-yarn-server-web-proxy-3.3.2.jar
 hive-beeline/2.3.9//hive-beeline-2.3.9.jar
 hive-cli/2.3.9//hive-cli-2.3.9.jar
 hive-common/2.3.9//hive-common-2.3.9.jar
@@ -97,9 +96,9 @@ hive-vector-code-gen/2.3.9//hive-vector-code-gen-2.3.9.jar
 hk2-api/2.6.1//hk2-api-2.6.1.jar
 hk2-locator/2.6.1//hk2-locator-2.6.1.jar
 hk2-utils/2.6.1//hk2-utils-2.6.1.jar
-htrace-core4/4.1.0-incubating//htrace-core4-4.1.0-incubating.jar
 httpclient/4.5.13//httpclient-4.5.13.jar
 httpcore/4.4.14//httpcore-4.4.14.jar
+ini4j/0.5.4//ini4j-0.5.4.jar
 istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
 ivy/2.5.0//ivy-2.5.0.jar
 jackson-annotations/2.13.1//jackson-annotations-2.13.1.jar
@@ -121,10 +120,11 @@ janino/3.0.16//janino-3.0.16.jar
 javassist/3.25.0-GA//javassist-3.25.0-GA.jar
 javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
 javolution/5.5.1//javolution-5.5.1.jar
+jaxb-api/2.2.11//jaxb-api-2.2.11.jar
 jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
 jcl-over-slf4j/1.7.32//jcl-over-slf4j-1.7.32.jar
 jdo-api/3.0.1//jdo-api-3.0.1.jar
-jdom/1.1//jdom-1.1.jar
+jdom2/2.0.6//jdom2-2.0.6.jar
 jersey-client/2.34//jersey-client-2.34.jar
 jersey-common/2.34//jersey-common-2.34.jar
 jersey-container-servlet-core/2.34//jersey-container-servlet-core-2.34.jar
@@ -204,6 +204,9 @@ objenesis/3.2//objenesis-3.2.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
 opencsv/2.3//opencsv-2.3.jar
+opentracing-api/0.33.0//opentracing-api-0.33.0.jar
+opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar
+opentracing-util/0.33.0//opentracing-util-0.33.0.jar
 orc-core/1.7.3//orc-core-1.7.3.jar
 orc-mapreduce/1.7.3//orc-mapreduce-1.7.3.jar
 orc-shims/1.7.3//orc-shims-1.7.3.jar
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 976ce02e9ea8d..3ba96055ae05f 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -267,6 +267,13 @@
               <groupId>com.google.guava</groupId>
               <artifactId>guava</artifactId>
             </exclusion>
+            <exclusion>
+              <!--
+              This is a code coverage library introduced by aliyun-java-sdk-core, only for testing
+              -->
+              <groupId>org.jacoco</groupId>
+              <artifactId>org.jacoco.agent</artifactId>
+            </exclusion>
           </exclusions>
         </dependency>
         <!--
diff --git a/licenses/LICENSE-jdom.txt b/licenses/LICENSE-jdom.txt
new file mode 100644
index 0000000000000..24746983ae003
--- /dev/null
+++ b/licenses/LICENSE-jdom.txt
@@ -0,0 +1,54 @@
+/*-- 
+
+ Copyright (C) 2000-2012 Jason Hunter & Brett McLaughlin.
+ All rights reserved.
+ 
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 
+ 1. Redistributions of source code must retain the above copyright
+    notice, this list of conditions, and the following disclaimer.
+ 
+ 2. Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions, and the disclaimer that follows 
+    these conditions in the documentation and/or other materials 
+    provided with the distribution.
+
+ 3. The name "JDOM" must not be used to endorse or promote products
+    derived from this software without prior written permission.  For
+    written permission, please contact <request_AT_jdom_DOT_org>.
+ 
+ 4. Products derived from this software may not be called "JDOM", nor
+    may "JDOM" appear in their name, without prior written permission
+    from the JDOM Project Management <request_AT_jdom_DOT_org>.
+ 
+ In addition, we request (but do not require) that you include in the 
+ end-user documentation provided with the redistribution and/or in the 
+ software itself an acknowledgement equivalent to the following:
+     "This product includes software developed by the
+      JDOM Project (http://www.jdom.org/)."
+ Alternatively, the acknowledgment may be graphical using the logos 
+ available at http://www.jdom.org/images/logos.
+
+ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED.  IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ SUCH DAMAGE.
+
+ This software consists of voluntary contributions made by many 
+ individuals on behalf of the JDOM Project and was originally 
+ created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
+ Brett McLaughlin <brett_AT_jdom_DOT_org>.  For more information
+ on the JDOM Project, please see <http://www.jdom.org/>. 
+
+ */
+
diff --git a/pom.xml b/pom.xml
index 207618a95b46c..414a749161ce6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -120,7 +120,8 @@
     <sbt.project.name>spark</sbt.project.name>
     <slf4j.version>1.7.32</slf4j.version>
     <log4j.version>2.17.1</log4j.version>
-    <hadoop.version>3.3.1</hadoop.version>
+    <!-- make sure to update IsolatedClientLoader whenever this version is changed -->
+    <hadoop.version>3.3.2</hadoop.version>
     <protobuf.version>2.5.0</protobuf.version>
     <yarn.version>${hadoop.version}</yarn.version>
     <zookeeper.version>3.6.2</zookeeper.version>
@@ -3427,6 +3428,7 @@
     <profile>
       <id>hadoop-2</id>
       <properties>
+        <!-- make sure to update IsolatedClientLoader whenever this version is changed -->
         <hadoop.version>2.7.4</hadoop.version>
         <curator.version>2.7.1</curator.version>
         <commons-io.version>2.4</commons-io.version>
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index f77bc5c284ec7..b045d4615d3c4 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -48,7 +48,12 @@ object MimaExcludes {
     // [SPARK-37780][SQL] QueryExecutionListener support SQLConf as constructor parameter
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.util.ExecutionListenerManager.this"),
     // [SPARK-37786][SQL] StreamingQueryListener support use SQLConf.get to get corresponding SessionState's SQLConf
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryManager.this")
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryManager.this"),
+
+    // [SPARK-37600][BUILD] Upgrade to Hadoop 3.3.2
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4Compressor"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4Factory"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4SafeDecompressor")
   )
 
   // Exclude rules for 3.2.x from 3.1.1
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 671b80f4b8abe..15c172a6e75c2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -69,7 +69,7 @@ private[hive] object IsolatedClientLoader extends Logging {
             // If the error message contains hadoop, it is probably because the hadoop
             // version cannot be resolved.
             val fallbackVersion = if (VersionUtils.isHadoop3) {
-              "3.3.1"
+              "3.3.2"
             } else {
               "2.7.4"
             }

From b8c03eeb15a22895d3ab55b931b468ad012a28d4 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 8 Mar 2022 20:16:43 -0800
Subject: [PATCH 435/513] [SPARK-38455][SPARK-38187][K8S] Support
 driver/executor `PodGroup` templates

### What changes were proposed in this pull request?

This PR aims to support driver/executor `PodGroup` templates like the following.

```yaml
apiVersion: scheduling.volcano.sh/v1beta1
kind: PodGroup
spec:
  minMember: 1000
  minResources:
    cpu: "4"
    memory: "16Gi"
  priorityClassName: executor-priority
  queue: executor-queue
```

### Why are the changes needed?

This is a simpler, more extensible and robust way to support Volcano future because we don't need to add new configurations like https://github.com/apache/spark/pull/35640 for all Volcano features.

### Does this PR introduce _any_ user-facing change?

No because this is a new feature.

### How was this patch tested?

Pass the CIs.

Closes #35776 from dongjoon-hyun/SPARK-38455.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 resource-managers/kubernetes/core/pom.xml     |  5 +++
 .../org/apache/spark/deploy/k8s/Config.scala  | 14 +++++++
 .../k8s/features/VolcanoFeatureStep.scala     | 31 +++++++++------
 .../resources/driver-podgroup-template.yml    | 25 ++++++++++++
 .../resources/executor-podgroup-template.yml  | 25 ++++++++++++
 .../features/VolcanoFeatureStepSuite.scala    | 38 +++++++++++++++++++
 6 files changed, 127 insertions(+), 11 deletions(-)
 create mode 100644 resource-managers/kubernetes/core/src/test/resources/driver-podgroup-template.yml
 create mode 100644 resource-managers/kubernetes/core/src/test/resources/executor-podgroup-template.yml

diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 6eb357ef2490c..611fee66342e3 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -44,6 +44,11 @@
           <artifactId>volcano-model-v1beta1</artifactId>
           <version>${kubernetes-client.version}</version>
         </dependency>
+        <dependency>
+          <groupId>io.fabric8</groupId>
+          <artifactId>volcano-client</artifactId>
+          <version>${kubernetes-client.version}</version>
+        </dependency>
       </dependencies>
     </profile>
   </profiles>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index a0270fa29a2ed..e66ecf4312bb2 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -292,6 +292,20 @@ private[spark] object Config extends Logging {
       .stringConf
       .createOptional
 
+  val KUBERNETES_DRIVER_PODGROUP_TEMPLATE_FILE =
+    ConfigBuilder("spark.kubernetes.driver.podGroupTemplateFile")
+      .doc("File containing a template pod group spec for driver")
+      .version("3.3.0")
+      .stringConf
+      .createOptional
+
+  val KUBERNETES_EXECUTOR_PODGROUP_TEMPLATE_FILE =
+    ConfigBuilder("spark.kubernetes.executor.podGroupTemplateFile")
+      .doc("File containing a template pod group spec for executors")
+      .version("3.3.0")
+      .stringConf
+      .createOptional
+
   val KUBERNETES_JOB_QUEUE = ConfigBuilder("spark.kubernetes.job.queue")
     .doc("The name of the queue to which the job is submitted. This info " +
       "will be stored in configuration and passed to specific feature step.")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
index 58d6c5caf9fff..5fd0fc69ea2df 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
@@ -17,7 +17,8 @@
 package org.apache.spark.deploy.k8s.features
 
 import io.fabric8.kubernetes.api.model._
-import io.fabric8.volcano.scheduling.v1beta1.PodGroupBuilder
+import io.fabric8.volcano.client.DefaultVolcanoClient
+import io.fabric8.volcano.scheduling.v1beta1.{PodGroup, PodGroupSpec}
 
 import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverConf, KubernetesExecutorConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
@@ -43,19 +44,27 @@ private[spark] class VolcanoFeatureStep extends KubernetesDriverCustomFeatureCon
   }
 
   override def getAdditionalPreKubernetesResources(): Seq[HasMetadata] = {
-    val podGroup = new PodGroupBuilder()
-      .editOrNewMetadata()
-        .withName(podGroupName)
-        .withNamespace(namespace)
-      .endMetadata()
-      .editOrNewSpec()
-      .endSpec()
+    val client = new DefaultVolcanoClient
 
-    queue.foreach(podGroup.editOrNewSpec().withQueue(_).endSpec())
+    val template = if (kubernetesConf.isInstanceOf[KubernetesDriverConf]) {
+      kubernetesConf.get(KUBERNETES_DRIVER_PODGROUP_TEMPLATE_FILE)
+    } else {
+      kubernetesConf.get(KUBERNETES_EXECUTOR_PODGROUP_TEMPLATE_FILE)
+    }
+    val pg = template.map(client.podGroups.load(_).get).getOrElse(new PodGroup())
+    var metadata = pg.getMetadata
+    if (metadata == null) metadata = new ObjectMeta
+    metadata.setName(podGroupName)
+    metadata.setNamespace(namespace)
+    pg.setMetadata(metadata)
 
-    priorityClassName.foreach(podGroup.editOrNewSpec().withPriorityClassName(_).endSpec())
+    var spec = pg.getSpec
+    if (spec == null) spec = new PodGroupSpec
+    queue.foreach(spec.setQueue(_))
+    priorityClassName.foreach(spec.setPriorityClassName(_))
+    pg.setSpec(spec)
 
-    Seq(podGroup.build())
+    Seq(pg)
   }
 
   override def configurePod(pod: SparkPod): SparkPod = {
diff --git a/resource-managers/kubernetes/core/src/test/resources/driver-podgroup-template.yml b/resource-managers/kubernetes/core/src/test/resources/driver-podgroup-template.yml
new file mode 100644
index 0000000000000..085d6b84c57aa
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/test/resources/driver-podgroup-template.yml
@@ -0,0 +1,25 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: scheduling.volcano.sh/v1beta1
+kind: PodGroup
+spec:
+  minMember: 1
+  minResources:
+    cpu: "2"
+    memory: "2048Mi"
+  priorityClassName: driver-priority
+  queue: driver-queue
diff --git a/resource-managers/kubernetes/core/src/test/resources/executor-podgroup-template.yml b/resource-managers/kubernetes/core/src/test/resources/executor-podgroup-template.yml
new file mode 100644
index 0000000000000..f0f7b35f191a1
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/test/resources/executor-podgroup-template.yml
@@ -0,0 +1,25 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: scheduling.volcano.sh/v1beta1
+kind: PodGroup
+spec:
+  minMember: 1000
+  minResources:
+    cpu: "4"
+    memory: "16Gi"
+  priorityClassName: executor-priority
+  queue: executor-queue
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
index 350df77ed4b3a..e7f1e316a6d67 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
@@ -16,6 +16,8 @@
  */
 package org.apache.spark.deploy.k8s.features
 
+import java.io.File
+
 import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder}
 import io.fabric8.volcano.scheduling.v1beta1.PodGroup
 
@@ -78,6 +80,42 @@ class VolcanoFeatureStepSuite extends SparkFunSuite {
     verifyPriority(podWithPriority)
   }
 
+  test("SPARK-38455: Support driver podgroup template") {
+    val templatePath = new File(
+      getClass.getResource("/driver-podgroup-template.yml").getFile).getAbsolutePath
+    val sparkConf = new SparkConf()
+      .set(KUBERNETES_DRIVER_PODGROUP_TEMPLATE_FILE.key, templatePath)
+    val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf)
+    val step = new VolcanoFeatureStep()
+    step.init(kubernetesConf)
+    step.configurePod(SparkPod.initialPod())
+    val podGroup = step.getAdditionalPreKubernetesResources().head.asInstanceOf[PodGroup]
+    assert(podGroup.getSpec.getMinMember == 1)
+    assert(podGroup.getSpec.getMinResources.get("cpu").getAmount == "2")
+    assert(podGroup.getSpec.getMinResources.get("memory").getAmount == "2048")
+    assert(podGroup.getSpec.getMinResources.get("memory").getFormat == "Mi")
+    assert(podGroup.getSpec.getPriorityClassName == "driver-priority")
+    assert(podGroup.getSpec.getQueue == "driver-queue")
+  }
+
+  test("SPARK-38455: Support executor podgroup template") {
+    val templatePath = new File(
+      getClass.getResource("/executor-podgroup-template.yml").getFile).getAbsolutePath
+    val sparkConf = new SparkConf()
+      .set(KUBERNETES_EXECUTOR_PODGROUP_TEMPLATE_FILE.key, templatePath)
+    val kubernetesConf = KubernetesTestConf.createExecutorConf(sparkConf)
+    val step = new VolcanoFeatureStep()
+    step.init(kubernetesConf)
+    step.configurePod(SparkPod.initialPod())
+    val podGroup = step.getAdditionalPreKubernetesResources().head.asInstanceOf[PodGroup]
+    assert(podGroup.getSpec.getMinMember == 1000)
+    assert(podGroup.getSpec.getMinResources.get("cpu").getAmount == "4")
+    assert(podGroup.getSpec.getMinResources.get("memory").getAmount == "16")
+    assert(podGroup.getSpec.getMinResources.get("memory").getFormat == "Gi")
+    assert(podGroup.getSpec.getPriorityClassName == "executor-priority")
+    assert(podGroup.getSpec.getQueue == "executor-queue")
+  }
+
   private def verifyPriority(pod: SparkPod): Unit = {
     val sparkConf = new SparkConf()
     val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf)

From 587ec34daacbf9156b1612e0e8b5fb3c3c991295 Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Wed, 9 Mar 2022 13:03:55 +0800
Subject: [PATCH 436/513] [SPARK-38449][SQL] Avoid call createTable when
 ignoreIfExists=true and table exists

### What changes were proposed in this pull request?
In current V2 code, we can see that when table exist and `ignoreIfExists` = true,  spark won't do nothing
```
case class CreateTableExec(
    catalog: TableCatalog,
    identifier: Identifier,
    tableSchema: StructType,
    partitioning: Seq[Transform],
    tableSpec: TableSpec,
    ignoreIfExists: Boolean) extends LeafV2CommandExec {
  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._

  val tableProperties = CatalogV2Util.convertTableProperties(tableSpec)

  override protected def run(): Seq[InternalRow] = {
    if (!catalog.tableExists(identifier)) {
      try {
        catalog.createTable(identifier, tableSchema, partitioning.toArray, tableProperties.asJava)
      } catch {
        case _: TableAlreadyExistsException if ignoreIfExists =>
          logWarning(s"Table ${identifier.quoted} was created concurrently. Ignoring.")
      }
    } else if (!ignoreIfExists) {
      throw QueryCompilationErrors.tableAlreadyExistsError(identifier)
    }

    Seq.empty
  }
```

But in current v1 code, it still will call `externalCatalog.createTable()`

And for current `InMemoryCatalog.createTable()`, there is even no code to handle concurrent create table request.
So here, we can handle it like v2 too. Under this case we just can do nothing.

### Why are the changes needed?
Remove unnecessary call create table, especially hive metastore

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

Closes #35770 from AngersZhuuuu/refactor-create-table.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/catalog/SessionCatalog.scala       | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 1a3054216972a..3727bb3c101cc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -365,10 +365,12 @@ class SessionCatalog(
       if (!ignoreIfExists) {
         throw new TableAlreadyExistsException(db = db, table = table)
       }
-    } else if (validateLocation) {
-      validateTableLocation(newTableDefinition)
+    } else {
+      if (validateLocation) {
+        validateTableLocation(newTableDefinition)
+      }
+      externalCatalog.createTable(newTableDefinition, ignoreIfExists)
     }
-    externalCatalog.createTable(newTableDefinition, ignoreIfExists)
   }
 
   def validateTableLocation(table: CatalogTable): Unit = {

From 66ff4b6d1b6fe22bd025bb645e12272e2d79ad0d Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Tue, 8 Mar 2022 21:16:12 -0800
Subject: [PATCH 437/513] [SPARK-38452][K8S][TESTS] Support pyDockerfile and
 rDockerfile in SBT K8s IT

### What changes were proposed in this pull request?
Support pyDockerfile and rDockerfile in SBT K8s IT

### Why are the changes needed?
Enable users to specify `pyDockerfile` and `rDockerfile` separately.

### Does this PR introduce _any_ user-facing change?
No, test only

### How was this patch tested?
```
build/sbt -Pkubernetes -Pkubernetes-integration-tests \
-Dtest.exclude.tags=minikube  -Dspark.kubernetes.test.deployMode=docker-desktop \
-Dspark.kubernetes.test.pyDockerFile=/Users/yikun/code/Dockerfile.py \
-Dspark.kubernetes.test.rDockerFile=/Users/yikun/code/Dockerfile.r \
-Dspark.kubernetes.test.DockerFile=/Users/yikun/code/Dockerfile "kubernetes-integration-tests/test"
```

Closes #35772 from Yikun/SPARK-38452.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 project/SparkBuild.scala                              |  8 ++++++--
 .../kubernetes/integration-tests/README.md            | 11 +++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 0f06e6bcb0897..b536b50532a05 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -646,6 +646,10 @@ object KubernetesIntegrationTests {
         val javaImageTag = sys.props.get("spark.kubernetes.test.javaImageTag")
         val dockerFile = sys.props.getOrElse("spark.kubernetes.test.dockerFile",
             s"$sparkHome/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17")
+        val pyDockerFile = sys.props.getOrElse("spark.kubernetes.test.pyDockerFile",
+            s"$bindingsDir/python/Dockerfile")
+        val rDockerFile = sys.props.getOrElse("spark.kubernetes.test.rDockerFile",
+            s"$bindingsDir/R/Dockerfile")
         val extraOptions = if (javaImageTag.isDefined) {
           Seq("-b", s"java_image_tag=$javaImageTag")
         } else {
@@ -654,8 +658,8 @@ object KubernetesIntegrationTests {
         val cmd = Seq(dockerTool,
           "-r", imageRepo,
           "-t", imageTag.getOrElse("dev"),
-          "-p", s"$bindingsDir/python/Dockerfile",
-          "-R", s"$bindingsDir/R/Dockerfile") ++
+          "-p", pyDockerFile,
+          "-R", rDockerFile) ++
           (if (deployMode != Some("minikube")) Seq.empty else Seq("-m")) ++
           extraOptions :+
           "build"
diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md
index 2151b7fbb7700..9eb928dfc83ea 100644
--- a/resource-managers/kubernetes/integration-tests/README.md
+++ b/resource-managers/kubernetes/integration-tests/README.md
@@ -294,3 +294,14 @@ In addition, you can run a single test selectively.
         -Dspark.kubernetes.test.deployMode=docker-desktop \
         -Dspark.kubernetes.test.imageTag=2022-03-06 \
         'kubernetes-integration-tests/testOnly -- -z "Run SparkPi with a very long application name"'
+
+You can also specify your specific dockerfile to build JVM/Python/R based image to test.
+
+    build/sbt -Psparkr -Pkubernetes -Pkubernetes-integration-tests \
+        -Dtest.exclude.tags=minikube \
+        -Dspark.kubernetes.test.deployMode=docker-desktop \
+        -Dspark.kubernetes.test.imageTag=2022-03-06 \
+        -Dspark.kubernetes.test.dockerFile=/path/to/Dockerfile \
+        -Dspark.kubernetes.test.pyDockerFile=/path/to/py/Dockerfile \
+        -Dspark.kubernetes.test.rDockerFile=/path/to/r/Dockerfile \
+        'kubernetes-integration-tests/test'

From 52e7602344036a649bb19ed642dbd74bc7ee9cb1 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Tue, 8 Mar 2022 22:14:11 -0800
Subject: [PATCH 438/513] [SPARK-38458][SQL] Fix always false condition in
 `LogDivertAppender#initLayout`

### What changes were proposed in this pull request?
`initLayout` method in `LogDivertAppender` as follows:

```java
private static StringLayout initLayout(OperationLog.LoggingLevel loggingMode) {
   ...
    for (Map.Entry<String, Appender> entry : appenders.entrySet()) {
      Appender ap = entry.getValue();
      if (ap.getClass().equals(ConsoleAppender.class)) {
        Layout l = ap.getLayout();
        if (l.getClass().equals(StringLayout.class)) {
          layout = (StringLayout) l;
          break;
        }
      }
    }
    return getLayout(isVerbose, layout);
  }
```

`l.getClass().equals(StringLayout.class)` in above method is always return `false` because `StringLayout` is an interface, for example, `JsonLayout` is an implementation of `StringLayout`, but `JsonLayout.class.equals(StringLayout.class)` will return false.

This pr change to use `instanceof` to fix the issue.

### Why are the changes needed?
Bug fix

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35780 from LuciferYang/SPARK-38458.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 .../apache/hive/service/cli/operation/LogDivertAppender.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/LogDivertAppender.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/LogDivertAppender.java
index 2fabf70c0f274..ca0fbe7eb67a9 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/LogDivertAppender.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/LogDivertAppender.java
@@ -267,7 +267,7 @@ private static StringLayout initLayout(OperationLog.LoggingLevel loggingMode) {
       Appender ap = entry.getValue();
       if (ap.getClass().equals(ConsoleAppender.class)) {
         Layout l = ap.getLayout();
-        if (l.getClass().equals(StringLayout.class)) {
+        if (l instanceof StringLayout) {
           layout = (StringLayout) l;
           break;
         }

From bd6a3b4a001d29255f36bab9e9969cd919306fc2 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Wed, 9 Mar 2022 11:36:57 +0300
Subject: [PATCH 439/513] [SPARK-38437][SQL] Lenient serialization of datetime
 from datasource

### What changes were proposed in this pull request?
In the PR, I propose to support the lenient mode by the row serializer used by datasources to converts rows received from scans. Spark SQL will be able to accept:
- `java.time.Instant` and `java.sql.Timestamp` for the `TIMESTAMP` type, and
- `java.time.LocalDate` and `java.sql.Date` for the `DATE` type

independently from the current value of the SQL config `spark.sql.datetime.java8API.enabled`.

### Why are the changes needed?
A datasource might not aware of the Spark SQL config `spark.sql.datetime.java8API.enabled` if this datasource was developed before the config was introduced by Spark version 3.0.0. In that case, it always return "legacy" timestamps/dates of the types `java.sql.Timestamp`/`java.sql.Date` even if an user enabled Java 8 API. As Spark expects `java.time.Instant` or `java.time.LocalDate` but gets `java.time.Timestamp` or `java.sql.Date`, the user observes the exception:
```java
ERROR SparkExecuteStatementOperation: Error executing query with ac61b10a-486e-463b-8726-3b61da58582e, currentState RUNNING,
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 2.0 failed 4 times, most recent failure: Lost task 0.3 in stage 2.0 (TID 8) (10.157.1.194 executor 0): java.lang.RuntimeException: Error while encoding: java.lang.RuntimeException: java.sql.Timestamp is not a valid external type for schema of timestamp
if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.sql.catalyst.util.DateTimeUtils$, TimestampType, instantToMicros, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 0, loan_perf_date), TimestampType), true, false) AS loan_perf_date#1125
at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Serializer.apply(ExpressionEncoder.scala:239)
```

This PR fixes the issue above. And after the changes, users can use legacy datasource connecters with new Spark versions even when they need to enable Java 8 API.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
By running the affected test suites:
```
$ build/sbt "test:testOnly *CodeGenerationSuite"
$ build/sbt "test:testOnly *ObjectExpressionsSuite"
```
and new tests:
```
$ build/sbt "test:testOnly *RowEncoderSuite"
$ build/sbt "test:testOnly *TableScanSuite"
```

Closes #35756 from MaxGekk/dynamic-serializer-java-ts.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../sql/catalyst/SerializerBuildHelper.scala  | 18 ++++++++++
 .../sql/catalyst/encoders/RowEncoder.scala    | 35 ++++++++++++-------
 .../expressions/objects/objects.scala         | 26 +++++++++++---
 .../sql/catalyst/util/DateTimeUtils.scala     | 22 ++++++++++++
 .../catalyst/encoders/RowEncoderSuite.scala   | 23 ++++++++++++
 .../expressions/CodeGenerationSuite.scala     |  4 ++-
 .../expressions/ObjectExpressionsSuite.scala  |  8 +++--
 .../datasources/DataSourceStrategy.scala      |  2 +-
 .../spark/sql/sources/TableScanSuite.scala    | 27 ++++++++++++++
 9 files changed, 144 insertions(+), 21 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
index 3c17575860db3..8dec923649f1a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
@@ -86,6 +86,15 @@ object SerializerBuildHelper {
       returnNullable = false)
   }
 
+  def createSerializerForAnyTimestamp(inputObject: Expression): Expression = {
+    StaticInvoke(
+      DateTimeUtils.getClass,
+      TimestampType,
+      "anyToMicros",
+      inputObject :: Nil,
+      returnNullable = false)
+  }
+
   def createSerializerForLocalDateTime(inputObject: Expression): Expression = {
     StaticInvoke(
       DateTimeUtils.getClass,
@@ -113,6 +122,15 @@ object SerializerBuildHelper {
       returnNullable = false)
   }
 
+  def createSerializerForAnyDate(inputObject: Expression): Expression = {
+    StaticInvoke(
+      DateTimeUtils.getClass,
+      DateType,
+      "anyToDays",
+      inputObject :: Nil,
+      returnNullable = false)
+  }
+
   def createSerializerForJavaDuration(inputObject: Expression): Expression = {
     StaticInvoke(
       IntervalUtils.getClass,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
index d34d9531c3f34..d7e497fafa86a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
@@ -66,23 +66,27 @@ import org.apache.spark.sql.types._
  * }}}
  */
 object RowEncoder {
-  def apply(schema: StructType): ExpressionEncoder[Row] = {
+  def apply(schema: StructType, lenient: Boolean): ExpressionEncoder[Row] = {
     val cls = classOf[Row]
     val inputObject = BoundReference(0, ObjectType(cls), nullable = true)
-    val serializer = serializerFor(inputObject, schema)
+    val serializer = serializerFor(inputObject, schema, lenient)
     val deserializer = deserializerFor(GetColumnByOrdinal(0, serializer.dataType), schema)
     new ExpressionEncoder[Row](
       serializer,
       deserializer,
       ClassTag(cls))
   }
+  def apply(schema: StructType): ExpressionEncoder[Row] = {
+    apply(schema, lenient = false)
+  }
 
   private def serializerFor(
       inputObject: Expression,
-      inputType: DataType): Expression = inputType match {
+      inputType: DataType,
+      lenient: Boolean): Expression = inputType match {
     case dt if ScalaReflection.isNativeType(dt) => inputObject
 
-    case p: PythonUserDefinedType => serializerFor(inputObject, p.sqlType)
+    case p: PythonUserDefinedType => serializerFor(inputObject, p.sqlType, lenient)
 
     case udt: UserDefinedType[_] =>
       val annotation = udt.userClass.getAnnotation(classOf[SQLUserDefinedType])
@@ -100,7 +104,9 @@ object RowEncoder {
       Invoke(obj, "serialize", udt, inputObject :: Nil, returnNullable = false)
 
     case TimestampType =>
-      if (SQLConf.get.datetimeJava8ApiEnabled) {
+      if (lenient) {
+        createSerializerForAnyTimestamp(inputObject)
+      } else if (SQLConf.get.datetimeJava8ApiEnabled) {
         createSerializerForJavaInstant(inputObject)
       } else {
         createSerializerForSqlTimestamp(inputObject)
@@ -109,7 +115,9 @@ object RowEncoder {
     case TimestampNTZType => createSerializerForLocalDateTime(inputObject)
 
     case DateType =>
-      if (SQLConf.get.datetimeJava8ApiEnabled) {
+      if (lenient) {
+        createSerializerForAnyDate(inputObject)
+      } else if (SQLConf.get.datetimeJava8ApiEnabled) {
         createSerializerForJavaLocalDate(inputObject)
       } else {
         createSerializerForSqlDate(inputObject)
@@ -144,7 +152,7 @@ object RowEncoder {
             inputObject,
             ObjectType(classOf[Object]),
             element => {
-              val value = serializerFor(ValidateExternalType(element, et), et)
+              val value = serializerFor(ValidateExternalType(element, et, lenient), et, lenient)
               expressionWithNullSafety(value, containsNull, WalkedTypePath())
             })
       }
@@ -156,7 +164,7 @@ object RowEncoder {
             returnNullable = false),
           "toSeq",
           ObjectType(classOf[scala.collection.Seq[_]]), returnNullable = false)
-      val convertedKeys = serializerFor(keys, ArrayType(kt, false))
+      val convertedKeys = serializerFor(keys, ArrayType(kt, false), lenient)
 
       val values =
         Invoke(
@@ -164,7 +172,7 @@ object RowEncoder {
             returnNullable = false),
           "toSeq",
           ObjectType(classOf[scala.collection.Seq[_]]), returnNullable = false)
-      val convertedValues = serializerFor(values, ArrayType(vt, valueNullable))
+      val convertedValues = serializerFor(values, ArrayType(vt, valueNullable), lenient)
 
       val nonNullOutput = NewInstance(
         classOf[ArrayBasedMapData],
@@ -183,8 +191,10 @@ object RowEncoder {
         val fieldValue = serializerFor(
           ValidateExternalType(
             GetExternalRowField(inputObject, index, field.name),
-            field.dataType),
-          field.dataType)
+            field.dataType,
+            lenient),
+          field.dataType,
+          lenient)
         val convertedField = if (field.nullable) {
           If(
             Invoke(inputObject, "isNullAt", BooleanType, Literal(index) :: Nil),
@@ -214,12 +224,13 @@ object RowEncoder {
    * can be `scala.math.BigDecimal`, `java.math.BigDecimal`, or
    * `org.apache.spark.sql.types.Decimal`.
    */
-  def externalDataTypeForInput(dt: DataType): DataType = dt match {
+  def externalDataTypeForInput(dt: DataType, lenient: Boolean): DataType = dt match {
     // In order to support both Decimal and java/scala BigDecimal in external row, we make this
     // as java.lang.Object.
     case _: DecimalType => ObjectType(classOf[java.lang.Object])
     // In order to support both Array and Seq in external row, we make this as java.lang.Object.
     case _: ArrayType => ObjectType(classOf[java.lang.Object])
+    case _: DateType | _: TimestampType if lenient => ObjectType(classOf[java.lang.Object])
     case _ => externalDataTypeFor(dt)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 4599c2a2d3055..6974ada8735c3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -1875,14 +1875,14 @@ case class GetExternalRowField(
  * Validates the actual data type of input expression at runtime.  If it doesn't match the
  * expectation, throw an exception.
  */
-case class ValidateExternalType(child: Expression, expected: DataType)
+case class ValidateExternalType(child: Expression, expected: DataType, lenient: Boolean)
   extends UnaryExpression with NonSQLExpression with ExpectsInputTypes {
 
   override def inputTypes: Seq[AbstractDataType] = Seq(ObjectType(classOf[Object]))
 
   override def nullable: Boolean = child.nullable
 
-  override val dataType: DataType = RowEncoder.externalDataTypeForInput(expected)
+  override val dataType: DataType = RowEncoder.externalDataTypeForInput(expected, lenient)
 
   private lazy val errMsg = s" is not a valid external type for schema of ${expected.simpleString}"
 
@@ -1896,6 +1896,14 @@ case class ValidateExternalType(child: Expression, expected: DataType)
       (value: Any) => {
         value.getClass.isArray || value.isInstanceOf[Seq[_]]
       }
+    case _: DateType =>
+      (value: Any) => {
+        value.isInstanceOf[java.sql.Date] || value.isInstanceOf[java.time.LocalDate]
+      }
+    case _: TimestampType =>
+      (value: Any) => {
+        value.isInstanceOf[java.sql.Timestamp] || value.isInstanceOf[java.time.Instant]
+      }
     case _ =>
       val dataTypeClazz = ScalaReflection.javaBoxedType(dataType)
       (value: Any) => {
@@ -1918,13 +1926,21 @@ case class ValidateExternalType(child: Expression, expected: DataType)
     val errMsgField = ctx.addReferenceObj("errMsg", errMsg)
     val input = child.genCode(ctx)
     val obj = input.value
-
+    def genCheckTypes(classes: Seq[Class[_]]): String = {
+      classes.map(cls => s"$obj instanceof ${cls.getName}").mkString(" || ")
+    }
     val typeCheck = expected match {
       case _: DecimalType =>
-        Seq(classOf[java.math.BigDecimal], classOf[scala.math.BigDecimal], classOf[Decimal])
-          .map(cls => s"$obj instanceof ${cls.getName}").mkString(" || ")
+        genCheckTypes(Seq(
+          classOf[java.math.BigDecimal],
+          classOf[scala.math.BigDecimal],
+          classOf[Decimal]))
       case _: ArrayType =>
         s"$obj.getClass().isArray() || $obj instanceof ${classOf[scala.collection.Seq[_]].getName}"
+      case _: DateType =>
+        genCheckTypes(Seq(classOf[java.sql.Date], classOf[java.time.LocalDate]))
+      case _: TimestampType =>
+        genCheckTypes(Seq(classOf[java.sql.Timestamp], classOf[java.time.Instant]))
       case _ =>
         s"$obj instanceof ${CodeGenerator.boxedType(dataType)}"
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index c2ca43630000f..7d2ead0c5f840 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -107,6 +107,17 @@ object DateTimeUtils {
     rebaseJulianToGregorianDays(julianDays)
   }
 
+  /**
+   * Converts an Java object to days.
+   *
+   * @param obj Either an object of `java.sql.Date` or `java.time.LocalDate`.
+   * @return The number of days since 1970-01-01.
+   */
+  def anyToDays(obj: Any): Int = obj match {
+    case d: Date => fromJavaDate(d)
+    case ld: LocalDate => localDateToDays(ld)
+  }
+
   /**
    * Converts days since the epoch 1970-01-01 in Proleptic Gregorian calendar to a local date
    * at the default JVM time zone in the hybrid calendar (Julian + Gregorian). It rebases the given
@@ -180,6 +191,17 @@ object DateTimeUtils {
     rebaseJulianToGregorianMicros(micros)
   }
 
+  /**
+   * Converts an Java object to microseconds.
+   *
+   * @param obj Either an object of `java.sql.Timestamp` or `java.time.Instant`.
+   * @return The number of micros since the epoch.
+   */
+  def anyToMicros(obj: Any): Long = obj match {
+    case t: Timestamp => fromJavaTimestamp(t)
+    case i: Instant => instantToMicros(i)
+  }
+
   /**
    * Returns the number of microseconds since epoch from Julian day and nanoseconds in a day.
    */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
index 44b06d9b3471e..c6bddfa5eee1f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
@@ -435,4 +435,27 @@ class RowEncoderSuite extends CodegenInterpretedPlanTest {
       }
     }
   }
+
+  test("SPARK-38437: encoding TimestampType/DateType from any supported datetime Java types") {
+    Seq(true, false).foreach { java8Api =>
+      withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> java8Api.toString) {
+        val schema = new StructType()
+          .add("t0", TimestampType)
+          .add("t1", TimestampType)
+          .add("d0", DateType)
+          .add("d1", DateType)
+        val encoder = RowEncoder(schema, lenient = true).resolveAndBind()
+        val instant = java.time.Instant.parse("2019-02-26T16:56:00Z")
+        val ld = java.time.LocalDate.parse("2022-03-08")
+        val row = encoder.createSerializer().apply(
+          Row(instant, java.sql.Timestamp.from(instant), ld, java.sql.Date.valueOf(ld)))
+        val expectedMicros = DateTimeUtils.instantToMicros(instant)
+        assert(row.getLong(0) === expectedMicros)
+        assert(row.getLong(1) === expectedMicros)
+        val expectedDays = DateTimeUtils.localDateToDays(ld)
+        assert(row.getInt(2) === expectedDays)
+        assert(row.getInt(3) === expectedDays)
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 2b59d723ab66b..1e4499a0ee3fe 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -330,7 +330,9 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
     val inputObject = BoundReference(0, ObjectType(classOf[Row]), nullable = true)
     GenerateUnsafeProjection.generate(
       ValidateExternalType(
-        GetExternalRowField(inputObject, index = 0, fieldName = "\"quote"), IntegerType) :: Nil)
+        GetExternalRowField(inputObject, index = 0, fieldName = "\"quote"),
+        IntegerType,
+        lenient = false) :: Nil)
   }
 
   test("SPARK-17160: field names are properly escaped by AssertTrue") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
index 8d98965f2be81..585191faf18bc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
@@ -498,13 +498,17 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       (Array(3, 2, 1), ArrayType(IntegerType))
     ).foreach { case (input, dt) =>
       val validateType = ValidateExternalType(
-        GetExternalRowField(inputObject, index = 0, fieldName = "c0"), dt)
+        GetExternalRowField(inputObject, index = 0, fieldName = "c0"),
+        dt,
+        lenient = false)
       checkObjectExprEvaluation(validateType, input, InternalRow.fromSeq(Seq(Row(input))))
     }
 
     checkExceptionInExpression[RuntimeException](
       ValidateExternalType(
-        GetExternalRowField(inputObject, index = 0, fieldName = "c0"), DoubleType),
+        GetExternalRowField(inputObject, index = 0, fieldName = "c0"),
+        DoubleType,
+        lenient = false),
       InternalRow.fromSeq(Seq(Row(1))),
       "java.lang.Integer is not a valid external type for schema of double")
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index c386655c947f6..4e5014cc83e13 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -802,7 +802,7 @@ object DataSourceStrategy
       output: Seq[Attribute],
       rdd: RDD[Row]): RDD[InternalRow] = {
     if (relation.needConversion) {
-      val toRow = RowEncoder(StructType.fromAttributes(output)).createSerializer()
+      val toRow = RowEncoder(StructType.fromAttributes(output), lenient = true).createSerializer()
       rdd.mapPartitions { iterator =>
         iterator.map(toRow)
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
index 47bacde5fea29..8f263f042cf9f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
@@ -109,6 +109,19 @@ case class AllDataTypesScan(
   }
 }
 
+class LegacyTimestampSource extends RelationProvider {
+  override def createRelation(ctx: SQLContext, parameters: Map[String, String]): BaseRelation = {
+    new BaseRelation() with TableScan {
+      override val sqlContext: SQLContext = ctx
+      override val schema: StructType = StructType(StructField("col", TimestampType) :: Nil)
+      override def buildScan(): RDD[Row] = {
+        sqlContext.sparkContext.parallelize(
+          Row(java.sql.Timestamp.valueOf("2022-03-08 12:13:14")) :: Nil)
+      }
+    }
+  }
+}
+
 class TableScanSuite extends DataSourceTest with SharedSparkSession {
   protected override lazy val sql = spark.sql _
 
@@ -420,4 +433,18 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession {
     val comments = planned.schema.fields.map(_.getComment().getOrElse("NO_COMMENT")).mkString(",")
     assert(comments === "SN,SA,NO_COMMENT")
   }
+
+  test("SPARK-38437: accept java.sql.Timestamp even when Java 8 API is enabled") {
+    val tableName = "relationProviderWithLegacyTimestamps"
+    withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") {
+      withTable (tableName) {
+        sql(s"""
+          |CREATE TABLE $tableName (col TIMESTAMP)
+          |USING org.apache.spark.sql.sources.LegacyTimestampSource""".stripMargin)
+        checkAnswer(
+          spark.table(tableName),
+          Row(java.sql.Timestamp.valueOf("2022-03-08 12:13:14").toInstant) :: Nil)
+      }
+    }
+  }
 }

From 62e4c29d18b880df84d49f0516e7ec80c4481919 Mon Sep 17 00:00:00 2001
From: dch nguyen <dchvn.dgd@gmail.com>
Date: Wed, 9 Mar 2022 11:15:31 +0100
Subject: [PATCH 440/513] [SPARK-37421][PYTHON] Inline type hints for
 python/pyspark/mllib/evaluation.py

### What changes were proposed in this pull request?
Inline type hints for evaluation.py in python/pyspark/mllib/
### Why are the changes needed?
We can take advantage of static type checking within the functions by inlining the type hints.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Existing tests

Closes #34680 from dchvn/SPARK-37421.

Lead-authored-by: dch nguyen <dchvn.dgd@gmail.com>
Co-authored-by: dch nguyen <dgd_contributor@viettel.com.vn>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/mllib/evaluation.py  | 134 +++++++++++++++-------------
 python/pyspark/mllib/evaluation.pyi |  92 -------------------
 2 files changed, 72 insertions(+), 154 deletions(-)
 delete mode 100644 python/pyspark/mllib/evaluation.pyi

diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index b09783458510c..1003ba68c5fa0 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -15,12 +15,16 @@
 # limitations under the License.
 #
 
+from typing import Generic, List, Optional, Tuple, TypeVar
+
 import sys
 
 from pyspark import since
+from pyspark.rdd import RDD
 from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc
+from pyspark.mllib.linalg import Matrix
 from pyspark.sql import SQLContext
-from pyspark.sql.types import ArrayType, StructField, StructType, DoubleType
+from pyspark.sql.types import ArrayType, DoubleType, StructField, StructType
 
 __all__ = [
     "BinaryClassificationMetrics",
@@ -29,6 +33,8 @@
     "RankingMetrics",
 ]
 
+T = TypeVar("T")
+
 
 class BinaryClassificationMetrics(JavaModelWrapper):
     """
@@ -61,7 +67,7 @@ class BinaryClassificationMetrics(JavaModelWrapper):
     0.88...
     """
 
-    def __init__(self, scoreAndLabels):
+    def __init__(self, scoreAndLabels: RDD[Tuple[float, float]]):
         sc = scoreAndLabels.ctx
         sql_ctx = SQLContext.getOrCreate(sc)
         numCol = len(scoreAndLabels.first())
@@ -74,29 +80,30 @@ def __init__(self, scoreAndLabels):
         if numCol == 3:
             schema.add("weight", DoubleType(), False)
         df = sql_ctx.createDataFrame(scoreAndLabels, schema=schema)
+        assert sc._jvm is not None
         java_class = sc._jvm.org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
         java_model = java_class(df._jdf)
         super(BinaryClassificationMetrics, self).__init__(java_model)
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def areaUnderROC(self):
+    def areaUnderROC(self) -> float:
         """
         Computes the area under the receiver operating characteristic
         (ROC) curve.
         """
         return self.call("areaUnderROC")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def areaUnderPR(self):
+    def areaUnderPR(self) -> float:
         """
         Computes the area under the precision-recall curve.
         """
         return self.call("areaUnderPR")
 
     @since("1.4.0")
-    def unpersist(self):
+    def unpersist(self) -> None:
         """
         Unpersists intermediate RDDs used in the computation.
         """
@@ -136,7 +143,7 @@ class RegressionMetrics(JavaModelWrapper):
     0.68...
     """
 
-    def __init__(self, predictionAndObservations):
+    def __init__(self, predictionAndObservations: RDD[Tuple[float, float]]):
         sc = predictionAndObservations.ctx
         sql_ctx = SQLContext.getOrCreate(sc)
         numCol = len(predictionAndObservations.first())
@@ -149,49 +156,50 @@ def __init__(self, predictionAndObservations):
         if numCol == 3:
             schema.add("weight", DoubleType(), False)
         df = sql_ctx.createDataFrame(predictionAndObservations, schema=schema)
+        assert sc._jvm is not None
         java_class = sc._jvm.org.apache.spark.mllib.evaluation.RegressionMetrics
         java_model = java_class(df._jdf)
         super(RegressionMetrics, self).__init__(java_model)
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def explainedVariance(self):
+    def explainedVariance(self) -> float:
         r"""
         Returns the explained variance regression score.
         explainedVariance = :math:`1 - \frac{variance(y - \hat{y})}{variance(y)}`
         """
         return self.call("explainedVariance")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def meanAbsoluteError(self):
+    def meanAbsoluteError(self) -> float:
         """
         Returns the mean absolute error, which is a risk function corresponding to the
         expected value of the absolute error loss or l1-norm loss.
         """
         return self.call("meanAbsoluteError")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def meanSquaredError(self):
+    def meanSquaredError(self) -> float:
         """
         Returns the mean squared error, which is a risk function corresponding to the
         expected value of the squared error loss or quadratic loss.
         """
         return self.call("meanSquaredError")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def rootMeanSquaredError(self):
+    def rootMeanSquaredError(self) -> float:
         """
         Returns the root mean squared error, which is defined as the square root of
         the mean squared error.
         """
         return self.call("rootMeanSquaredError")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def r2(self):
+    def r2(self) -> float:
         """
         Returns R^2^, the coefficient of determination.
         """
@@ -274,7 +282,7 @@ class MulticlassMetrics(JavaModelWrapper):
     0.9682...
     """
 
-    def __init__(self, predictionAndLabels):
+    def __init__(self, predictionAndLabels: RDD[Tuple[float, float]]):
         sc = predictionAndLabels.ctx
         sql_ctx = SQLContext.getOrCreate(sc)
         numCol = len(predictionAndLabels.first())
@@ -289,12 +297,13 @@ def __init__(self, predictionAndLabels):
         if numCol == 4:
             schema.add("probability", ArrayType(DoubleType(), False), False)
         df = sql_ctx.createDataFrame(predictionAndLabels, schema)
+        assert sc._jvm is not None
         java_class = sc._jvm.org.apache.spark.mllib.evaluation.MulticlassMetrics
         java_model = java_class(df._jdf)
         super(MulticlassMetrics, self).__init__(java_model)
 
     @since("1.4.0")
-    def confusionMatrix(self):
+    def confusionMatrix(self) -> Matrix:
         """
         Returns confusion matrix: predicted classes are in columns,
         they are ordered by class label ascending, as in "labels".
@@ -302,35 +311,35 @@ def confusionMatrix(self):
         return self.call("confusionMatrix")
 
     @since("1.4.0")
-    def truePositiveRate(self, label):
+    def truePositiveRate(self, label: float) -> float:
         """
         Returns true positive rate for a given label (category).
         """
         return self.call("truePositiveRate", label)
 
     @since("1.4.0")
-    def falsePositiveRate(self, label):
+    def falsePositiveRate(self, label: float) -> float:
         """
         Returns false positive rate for a given label (category).
         """
         return self.call("falsePositiveRate", label)
 
     @since("1.4.0")
-    def precision(self, label):
+    def precision(self, label: float) -> float:
         """
         Returns precision.
         """
         return self.call("precision", float(label))
 
     @since("1.4.0")
-    def recall(self, label):
+    def recall(self, label: float) -> float:
         """
         Returns recall.
         """
         return self.call("recall", float(label))
 
     @since("1.4.0")
-    def fMeasure(self, label, beta=None):
+    def fMeasure(self, label: float, beta: Optional[float] = None) -> float:
         """
         Returns f-measure.
         """
@@ -339,51 +348,51 @@ def fMeasure(self, label, beta=None):
         else:
             return self.call("fMeasure", label, beta)
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.0.0")
-    def accuracy(self):
+    def accuracy(self) -> float:
         """
         Returns accuracy (equals to the total number of correctly classified instances
         out of the total number of instances).
         """
         return self.call("accuracy")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def weightedTruePositiveRate(self):
+    def weightedTruePositiveRate(self) -> float:
         """
         Returns weighted true positive rate.
         (equals to precision, recall and f-measure)
         """
         return self.call("weightedTruePositiveRate")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def weightedFalsePositiveRate(self):
+    def weightedFalsePositiveRate(self) -> float:
         """
         Returns weighted false positive rate.
         """
         return self.call("weightedFalsePositiveRate")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def weightedRecall(self):
+    def weightedRecall(self) -> float:
         """
         Returns weighted averaged recall.
         (equals to precision, recall and f-measure)
         """
         return self.call("weightedRecall")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def weightedPrecision(self):
+    def weightedPrecision(self) -> float:
         """
         Returns weighted averaged precision.
         """
         return self.call("weightedPrecision")
 
     @since("1.4.0")
-    def weightedFMeasure(self, beta=None):
+    def weightedFMeasure(self, beta: Optional[float] = None) -> float:
         """
         Returns weighted averaged f-measure.
         """
@@ -393,14 +402,14 @@ def weightedFMeasure(self, beta=None):
             return self.call("weightedFMeasure", beta)
 
     @since("3.0.0")
-    def logLoss(self, eps=1e-15):
+    def logLoss(self, eps: float = 1e-15) -> float:
         """
         Returns weighted logLoss.
         """
         return self.call("logLoss", eps)
 
 
-class RankingMetrics(JavaModelWrapper):
+class RankingMetrics(JavaModelWrapper, Generic[T]):
     """
     Evaluator for ranking algorithms.
 
@@ -442,7 +451,7 @@ class RankingMetrics(JavaModelWrapper):
     0.66...
     """
 
-    def __init__(self, predictionAndLabels):
+    def __init__(self, predictionAndLabels: RDD[Tuple[List[T], List[T]]]):
         sc = predictionAndLabels.ctx
         sql_ctx = SQLContext.getOrCreate(sc)
         df = sql_ctx.createDataFrame(
@@ -452,7 +461,7 @@ def __init__(self, predictionAndLabels):
         super(RankingMetrics, self).__init__(java_model)
 
     @since("1.4.0")
-    def precisionAt(self, k):
+    def precisionAt(self, k: int) -> float:
         """
         Compute the average precision of all the queries, truncated at ranking position k.
 
@@ -465,9 +474,9 @@ def precisionAt(self, k):
         """
         return self.call("precisionAt", int(k))
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def meanAveragePrecision(self):
+    def meanAveragePrecision(self) -> float:
         """
         Returns the mean average precision (MAP) of all the queries.
         If a query has an empty ground truth set, the average precision will be zero and
@@ -476,7 +485,7 @@ def meanAveragePrecision(self):
         return self.call("meanAveragePrecision")
 
     @since("3.0.0")
-    def meanAveragePrecisionAt(self, k):
+    def meanAveragePrecisionAt(self, k: int) -> float:
         """
         Returns the mean average precision (MAP) at first k ranking of all the queries.
         If a query has an empty ground truth set, the average precision will be zero and
@@ -485,7 +494,7 @@ def meanAveragePrecisionAt(self, k):
         return self.call("meanAveragePrecisionAt", int(k))
 
     @since("1.4.0")
-    def ndcgAt(self, k):
+    def ndcgAt(self, k: int) -> float:
         """
         Compute the average NDCG value of all the queries, truncated at ranking position k.
         The discounted cumulative gain at position k is computed as:
@@ -498,7 +507,7 @@ def ndcgAt(self, k):
         return self.call("ndcgAt", int(k))
 
     @since("3.0.0")
-    def recallAt(self, k):
+    def recallAt(self, k: int) -> float:
         """
         Compute the average recall of all the queries, truncated at ranking position k.
 
@@ -556,18 +565,19 @@ class MultilabelMetrics(JavaModelWrapper):
     0.54...
     """
 
-    def __init__(self, predictionAndLabels):
+    def __init__(self, predictionAndLabels: RDD[Tuple[List[float], List[float]]]):
         sc = predictionAndLabels.ctx
         sql_ctx = SQLContext.getOrCreate(sc)
         df = sql_ctx.createDataFrame(
             predictionAndLabels, schema=sql_ctx._inferSchema(predictionAndLabels)
         )
+        assert sc._jvm is not None
         java_class = sc._jvm.org.apache.spark.mllib.evaluation.MultilabelMetrics
         java_model = java_class(df._jdf)
         super(MultilabelMetrics, self).__init__(java_model)
 
     @since("1.4.0")
-    def precision(self, label=None):
+    def precision(self, label: Optional[float] = None) -> float:
         """
         Returns precision or precision for a given label (category) if specified.
         """
@@ -577,7 +587,7 @@ def precision(self, label=None):
             return self.call("precision", float(label))
 
     @since("1.4.0")
-    def recall(self, label=None):
+    def recall(self, label: Optional[float] = None) -> float:
         """
         Returns recall or recall for a given label (category) if specified.
         """
@@ -587,7 +597,7 @@ def recall(self, label=None):
             return self.call("recall", float(label))
 
     @since("1.4.0")
-    def f1Measure(self, label=None):
+    def f1Measure(self, label: Optional[float] = None) -> float:
         """
         Returns f1Measure or f1Measure for a given label (category) if specified.
         """
@@ -596,60 +606,60 @@ def f1Measure(self, label=None):
         else:
             return self.call("f1Measure", float(label))
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def microPrecision(self):
+    def microPrecision(self) -> float:
         """
         Returns micro-averaged label-based precision.
         (equals to micro-averaged document-based precision)
         """
         return self.call("microPrecision")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def microRecall(self):
+    def microRecall(self) -> float:
         """
         Returns micro-averaged label-based recall.
         (equals to micro-averaged document-based recall)
         """
         return self.call("microRecall")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def microF1Measure(self):
+    def microF1Measure(self) -> float:
         """
         Returns micro-averaged label-based f1-measure.
         (equals to micro-averaged document-based f1-measure)
         """
         return self.call("microF1Measure")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def hammingLoss(self):
+    def hammingLoss(self) -> float:
         """
         Returns Hamming-loss.
         """
         return self.call("hammingLoss")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def subsetAccuracy(self):
+    def subsetAccuracy(self) -> float:
         """
         Returns subset accuracy.
         (for equal sets of labels)
         """
         return self.call("subsetAccuracy")
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def accuracy(self):
+    def accuracy(self) -> float:
         """
         Returns accuracy.
         """
         return self.call("accuracy")
 
 
-def _test():
+def _test() -> None:
     import doctest
     import numpy
     from pyspark.sql import SparkSession
diff --git a/python/pyspark/mllib/evaluation.pyi b/python/pyspark/mllib/evaluation.pyi
deleted file mode 100644
index bbe0eebf33594..0000000000000
--- a/python/pyspark/mllib/evaluation.pyi
+++ /dev/null
@@ -1,92 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import List, Optional, Tuple, TypeVar
-from pyspark.rdd import RDD
-from pyspark.mllib.common import JavaModelWrapper
-from pyspark.mllib.linalg import Matrix
-
-T = TypeVar("T")
-
-class BinaryClassificationMetrics(JavaModelWrapper):
-    def __init__(self, scoreAndLabels: RDD[Tuple[float, float]]) -> None: ...
-    @property
-    def areaUnderROC(self) -> float: ...
-    @property
-    def areaUnderPR(self) -> float: ...
-    def unpersist(self) -> None: ...
-
-class RegressionMetrics(JavaModelWrapper):
-    def __init__(self, predictionAndObservations: RDD[Tuple[float, float]]) -> None: ...
-    @property
-    def explainedVariance(self) -> float: ...
-    @property
-    def meanAbsoluteError(self) -> float: ...
-    @property
-    def meanSquaredError(self) -> float: ...
-    @property
-    def rootMeanSquaredError(self) -> float: ...
-    @property
-    def r2(self) -> float: ...
-
-class MulticlassMetrics(JavaModelWrapper):
-    def __init__(self, predictionAndLabels: RDD[Tuple[float, float]]) -> None: ...
-    def confusionMatrix(self) -> Matrix: ...
-    def truePositiveRate(self, label: float) -> float: ...
-    def falsePositiveRate(self, label: float) -> float: ...
-    def precision(self, label: float = ...) -> float: ...
-    def recall(self, label: float = ...) -> float: ...
-    def fMeasure(self, label: float = ..., beta: Optional[float] = ...) -> float: ...
-    @property
-    def accuracy(self) -> float: ...
-    @property
-    def weightedTruePositiveRate(self) -> float: ...
-    @property
-    def weightedFalsePositiveRate(self) -> float: ...
-    @property
-    def weightedRecall(self) -> float: ...
-    @property
-    def weightedPrecision(self) -> float: ...
-    def weightedFMeasure(self, beta: Optional[float] = ...) -> float: ...
-
-class RankingMetrics(JavaModelWrapper):
-    def __init__(self, predictionAndLabels: RDD[Tuple[List[T], List[T]]]) -> None: ...
-    def precisionAt(self, k: int) -> float: ...
-    @property
-    def meanAveragePrecision(self) -> float: ...
-    def meanAveragePrecisionAt(self, k: int) -> float: ...
-    def ndcgAt(self, k: int) -> float: ...
-    def recallAt(self, k: int) -> float: ...
-
-class MultilabelMetrics(JavaModelWrapper):
-    def __init__(self, predictionAndLabels: RDD[Tuple[List[float], List[float]]]) -> None: ...
-    def precision(self, label: Optional[float] = ...) -> float: ...
-    def recall(self, label: Optional[float] = ...) -> float: ...
-    def f1Measure(self, label: Optional[float] = ...) -> float: ...
-    @property
-    def microPrecision(self) -> float: ...
-    @property
-    def microRecall(self) -> float: ...
-    @property
-    def microF1Measure(self) -> float: ...
-    @property
-    def hammingLoss(self) -> float: ...
-    @property
-    def subsetAccuracy(self) -> float: ...
-    @property
-    def accuracy(self) -> float: ...

From 93a25a45dc95931012b844f24859d3fe09b05fa7 Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Wed, 9 Mar 2022 23:24:59 +0800
Subject: [PATCH 441/513] [SPARK-37947][SQL] Extract generator from
 GeneratorOuter expression contained by a Generate operator

### What changes were proposed in this pull request?

This PR updates the ExtractGenerator rule to extract a generator from a GeneratorOuter expression contained by a Generate operator.

### Why are the changes needed?

This works:

```
select * from values 1, 2 lateral view outer explode(array()) as b;
```

But this does not work:

```
select * from values 1, 2 lateral view explode_outer(array()) as b;
```

It produces the error:

```
Error in query: Column 'b' does not exist. Did you mean one of the following? [col1]; line 1 pos 26;
```

This is because the parser directly creates a Generate operator with the (as of yet unresolved) generator function. Later, the ResolveFunctions rule converts the unresolved function to a resolved generator wrapped by a GeneratorOuter expression. Although the ExtractGenerator rule will extract the generator function from a GeneratorOuter, it doesn't work if the GeneratorOuter is an expression in a Generate operator. Because the generator is not extracted, the ResolveGenerator rule fails to match on the Generate operator, and therefore fails to turn the generator's output expressions into attributes.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

- Existing unit tests.
- New unit test.

Closes #35232 from bersprockets/lateral_view_generatorouter.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  3 +++
 .../spark/sql/GeneratorFunctionSuite.scala    | 24 +++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 245e232cda1ee..27e8ed2b32d23 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2837,6 +2837,9 @@ class Analyzer(override val catalogManager: CatalogManager)
           p
         }
 
+      case g @ Generate(GeneratorOuter(generator), _, _, _, _, _) =>
+        g.copy(generator = generator, outer = true)
+
       case g: Generate => g
 
       case p if p.expressions.exists(hasGenerator) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
index d5c2d93055ba1..e270e0a528d9f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
@@ -357,6 +357,30 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
     val df = Seq(1, 2, 3).toDF("v")
     checkAnswer(df.select(explode(array(min($"v"), max($"v")))), Row(1) :: Row(3) :: Nil)
   }
+
+  test("SPARK-37947: lateral view <func>_outer()") {
+    checkAnswer(
+      sql("select * from values 1, 2 lateral view explode_outer(array()) a as b"),
+      Row(1, null) :: Row(2, null) :: Nil)
+
+    checkAnswer(
+      sql("select * from values 1, 2 lateral view outer explode_outer(array()) a as b"),
+      Row(1, null) :: Row(2, null) :: Nil)
+
+    withTempView("t1") {
+      sql(
+        """select * from values
+          |array(struct(0, 1), struct(3, 4)),
+          |array(struct(6, 7)),
+          |array(),
+          |null
+          |as tbl(arr)
+         """.stripMargin).createOrReplaceTempView("t1")
+      checkAnswer(
+        sql("select f1, f2 from t1 lateral view inline_outer(arr) as f1, f2"),
+        Row(0, 1) :: Row(3, 4) :: Row(6, 7) :: Row(null, null) :: Row(null, null) :: Nil)
+    }
+  }
 }
 
 case class EmptyGenerator() extends Generator with LeafLike[Expression] {

From 158436655f30141bbd5afa8d95aec66282a5c4b4 Mon Sep 17 00:00:00 2001
From: Cheng Su <chengsu@fb.com>
Date: Wed, 9 Mar 2022 23:53:08 +0800
Subject: [PATCH 442/513] [SPARK-38354][SQL] Add hash probes metric for
 shuffled hash join

### What changes were proposed in this pull request?

For hash aggregate, there's a SQL metrics to track number of hash probes per looked-up key. It would be better to add a similar metrics for shuffled hash join as well, to get some idea of hash probing performance. Also renamed the existing SQL metrics (and related methods names) in hash aggregate, from `avg hash probe bucket list iters` to `avg hash probes per key`, as the original name is quite obscured to understand.

### Why are the changes needed?

To show up in Spark web UI (and allow metrics collection) for shuffled hash join probing performance. When the metrics is more closer to 1.0, the probing performance is better.

### Does this PR introduce _any_ user-facing change?

Yes, the added SQL metrics. Will attach screenshot later.

### How was this patch tested?

The modified unit test in `SQLMetricsSuite.scala`.

Closes #35686 from c21/probe-metrics.

Authored-by: Cheng Su <chengsu@fb.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/unsafe/map/BytesToBytesMap.java     |  2 +-
 .../UnsafeFixedWidthAggregationMap.java       |  6 ++--
 .../aggregate/HashAggregateExec.scala         |  4 +--
 .../TungstenAggregationIterator.scala         |  2 +-
 .../sql/execution/joins/HashedRelation.scala  | 35 +++++++++++++++++++
 .../joins/ShuffledHashJoinExec.scala          | 10 ++++--
 .../execution/metric/SQLMetricsSuite.scala    | 16 +++++----
 7 files changed, 59 insertions(+), 16 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index f474c30b8b3d8..f4f4052b4faf4 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -941,7 +941,7 @@ public long getPeakMemoryUsedBytes() {
   /**
    * Returns the average number of probes per key lookup.
    */
-  public double getAvgHashProbeBucketListIterations() {
+  public double getAvgHashProbesPerKey() {
     return (1.0 * numProbes) / numKeyLookups;
   }
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java
index 117e98f33a0ec..31e10af38a42b 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java
@@ -226,10 +226,10 @@ public void free() {
   }
 
   /**
-   * Gets the average bucket list iterations per lookup in the underlying `BytesToBytesMap`.
+   * Gets the average number of hash probes per key lookup in the underlying `BytesToBytesMap`.
    */
-  public double getAvgHashProbeBucketListIterations() {
-    return map.getAvgHashProbeBucketListIterations();
+  public double getAvgHashProbesPerKey() {
+    return map.getAvgHashProbesPerKey();
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index 7da9c56bb47f5..1b4f4be501cce 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -65,7 +65,7 @@ case class HashAggregateExec(
     "spillSize" -> SQLMetrics.createSizeMetric(sparkContext, "spill size"),
     "aggTime" -> SQLMetrics.createTimingMetric(sparkContext, "time in aggregation build"),
     "avgHashProbe" ->
-      SQLMetrics.createAverageMetric(sparkContext, "avg hash probe bucket list iters"),
+      SQLMetrics.createAverageMetric(sparkContext, "avg hash probes per key"),
     "numTasksFallBacked" -> SQLMetrics.createMetric(sparkContext, "number of sort fallback tasks"))
 
   // This is for testing. We force TungstenAggregationIterator to fall back to the unsafe row hash
@@ -204,7 +204,7 @@ case class HashAggregateExec(
     metrics.incPeakExecutionMemory(maxMemory)
 
     // Update average hashmap probe
-    avgHashProbe.set(hashMap.getAvgHashProbeBucketListIterations)
+    avgHashProbe.set(hashMap.getAvgHashProbesPerKey)
 
     if (sorter == null) {
       // not spilled
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala
index 0a5e8838e1531..36405fe927273 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala
@@ -389,7 +389,7 @@ class TungstenAggregationIterator(
     metrics.incPeakExecutionMemory(maxMemory)
 
     // Updating average hashmap probe
-    avgHashProbe.set(hashMap.getAvgHashProbeBucketListIterations)
+    avgHashProbe.set(hashMap.getAvgHashProbesPerKey)
   })
 
   ///////////////////////////////////////////////////////////////////////////
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index 0ea245093e3ed..698e7ed6fc57e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -110,6 +110,11 @@ private[execution] sealed trait HashedRelation extends KnownSizeEstimation {
    */
   def keys(): Iterator[InternalRow]
 
+  /**
+   * Returns the average number of hash probes per key lookup.
+   */
+  def getAvgHashProbesPerKey(): Double
+
   /**
    * Returns a read-only copy of this, to be safely used in current thread.
    */
@@ -221,6 +226,8 @@ private[joins] class UnsafeHashedRelation(
 
   override def estimatedSize: Long = binaryMap.getTotalMemoryConsumption
 
+  override def getAvgHashProbesPerKey(): Double = binaryMap.getAvgHashProbesPerKey
+
   // re-used in get()/getValue()/getWithKeyIndex()/getValueWithKeyIndex()/valuesWithKeyIndex()
   var resultRow = new UnsafeRow(numFields)
 
@@ -566,6 +573,12 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
   // The number of unique keys.
   private var numKeys = 0L
 
+  // The number of hash probes for keys.
+  private var numProbes = 0L
+
+  // The number of keys lookups.
+  private var numKeyLookups = 0L
+
   // needed by serializer
   def this() = {
     this(
@@ -614,6 +627,11 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
    */
   def getTotalMemoryConsumption: Long = array.length * 8L + page.length * 8L
 
+  /**
+   * Returns the average number of hash probes per key lookup.
+   */
+  def getAvgHashProbesPerKey: Double = (1.0 * numProbes) / numKeyLookups
+
   /**
    * Returns the first slot of array that store the keys (sparse mode).
    */
@@ -648,7 +666,9 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
    * Returns the single UnsafeRow for given key, or null if not found.
    */
   def getValue(key: Long, resultRow: UnsafeRow): UnsafeRow = {
+    numKeyLookups += 1
     if (isDense) {
+      numProbes += 1
       if (key >= minKey && key <= maxKey) {
         val value = array((key - minKey).toInt)
         if (value > 0) {
@@ -656,12 +676,14 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
         }
       }
     } else {
+      numProbes += 1
       var pos = firstSlot(key)
       while (array(pos + 1) != 0) {
         if (array(pos) == key) {
           return getRow(array(pos + 1), resultRow)
         }
         pos = nextSlot(pos)
+        numProbes += 1
       }
     }
     null
@@ -688,7 +710,9 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
    * Returns an iterator for all the values for the given key, or null if no value found.
    */
   def get(key: Long, resultRow: UnsafeRow): Iterator[UnsafeRow] = {
+    numKeyLookups += 1
     if (isDense) {
+      numProbes += 1
       if (key >= minKey && key <= maxKey) {
         val value = array((key - minKey).toInt)
         if (value > 0) {
@@ -696,12 +720,14 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
         }
       }
     } else {
+      numProbes += 1
       var pos = firstSlot(key)
       while (array(pos + 1) != 0) {
         if (array(pos) == key) {
           return valueIter(array(pos + 1), resultRow)
         }
         pos = nextSlot(pos)
+        numProbes += 1
       }
     }
     null
@@ -780,10 +806,13 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
    * Update the address in array for given key.
    */
   private def updateIndex(key: Long, address: Long): Unit = {
+    numKeyLookups += 1
+    numProbes += 1
     var pos = firstSlot(key)
     assert(numKeys < array.length / 2)
     while (array(pos) != key && array(pos + 1) != 0) {
       pos = nextSlot(pos)
+      numProbes += 1
     }
     if (array(pos + 1) == 0) {
       // this is the first value for this key, put the address in array.
@@ -986,6 +1015,8 @@ class LongHashedRelation(
 
   override def estimatedSize: Long = map.getTotalMemoryConsumption
 
+  override def getAvgHashProbesPerKey(): Double = map.getAvgHashProbesPerKey
+
   override def get(key: InternalRow): Iterator[InternalRow] = {
     if (key.isNullAt(0)) {
       null
@@ -1103,6 +1134,8 @@ case object EmptyHashedRelation extends HashedRelation {
   override def close(): Unit = {}
 
   override def estimatedSize: Long = 0
+
+  override def getAvgHashProbesPerKey(): Double = 0
 }
 
 /**
@@ -1129,6 +1162,8 @@ case object HashedRelationWithAllNullKeys extends HashedRelation {
   override def close(): Unit = {}
 
   override def estimatedSize: Long = 0
+
+  override def getAvgHashProbesPerKey(): Double = 0
 }
 
 /** The HashedRelationBroadcastMode requires that rows are broadcasted as a HashedRelation. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoinExec.scala
index cfe35d04778fb..38c9c82f77e07 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoinExec.scala
@@ -49,7 +49,8 @@ case class ShuffledHashJoinExec(
   override lazy val metrics = Map(
     "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
     "buildDataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size of build side"),
-    "buildTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to build hash map"))
+    "buildTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to build hash map"),
+    "avgHashProbe" -> SQLMetrics.createAverageMetric(sparkContext, "avg hash probes per key"))
 
   override def output: Seq[Attribute] = super[ShuffledJoin].output
 
@@ -77,6 +78,7 @@ case class ShuffledHashJoinExec(
   def buildHashedRelation(iter: Iterator[InternalRow]): HashedRelation = {
     val buildDataSize = longMetric("buildDataSize")
     val buildTime = longMetric("buildTime")
+    val avgHashProbe = longMetric("avgHashProbe")
     val start = System.nanoTime()
     val context = TaskContext.get()
     val relation = HashedRelation(
@@ -89,7 +91,11 @@ case class ShuffledHashJoinExec(
     buildTime += NANOSECONDS.toMillis(System.nanoTime() - start)
     buildDataSize += relation.estimatedSize
     // This relation is usually used until the end of task.
-    context.addTaskCompletionListener[Unit](_ => relation.close())
+    context.addTaskCompletionListener[Unit](_ => {
+      // Update average hashmap probe
+      avgHashProbe.set(relation.getAvgHashProbesPerKey())
+      relation.close()
+    })
     relation
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index aa746370b8fd3..063f18622646c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -109,11 +109,11 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     val df = testData2.groupBy().count() // 2 partitions
     val expected1 = Seq(
       Map("number of output rows" -> 2L,
-        "avg hash probe bucket list iters" ->
+        "avg hash probes per key" ->
           aggregateMetricsPattern,
         "number of sort fallback tasks" -> 0L),
       Map("number of output rows" -> 1L,
-        "avg hash probe bucket list iters" ->
+        "avg hash probes per key" ->
           aggregateMetricsPattern,
         "number of sort fallback tasks" -> 0L))
     val shuffleExpected1 = Map(
@@ -131,11 +131,11 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     val df2 = testData2.groupBy(Symbol("a")).count()
     val expected2 = Seq(
       Map("number of output rows" -> 4L,
-        "avg hash probe bucket list iters" ->
+        "avg hash probes per key" ->
           aggregateMetricsPattern,
         "number of sort fallback tasks" -> 0L),
       Map("number of output rows" -> 3L,
-        "avg hash probe bucket list iters" ->
+        "avg hash probes per key" ->
           aggregateMetricsPattern,
         "number of sort fallback tasks" -> 0L))
 
@@ -184,7 +184,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
       }
       val metrics = getSparkPlanMetrics(df, 1, nodeIds, enableWholeStage).get
       nodeIds.foreach { nodeId =>
-        val probes = metrics(nodeId)._2("avg hash probe bucket list iters").toString
+        val probes = metrics(nodeId)._2("avg hash probes per key").toString
         if (!probes.contains("\n")) {
           // It's a single metrics value
           assert(probes.toDouble > 1.0)
@@ -372,7 +372,8 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
           val df = df1.join(df2, "key")
           testSparkPlanMetrics(df, 1, Map(
             nodeId1 -> (("ShuffledHashJoin", Map(
-              "number of output rows" -> 2L))),
+              "number of output rows" -> 2L,
+              "avg hash probes per key" -> aggregateMetricsPattern))),
             nodeId2 -> (("Exchange", Map(
               "shuffle records written" -> 2L,
               "records read" -> 2L))),
@@ -401,7 +402,8 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
           rightDf.hint("shuffle_hash"), $"key" === $"key2", joinType)
         testSparkPlanMetrics(df, 1, Map(
           nodeId -> (("ShuffledHashJoin", Map(
-            "number of output rows" -> rows)))),
+            "number of output rows" -> rows,
+            "avg hash probes per key" -> aggregateMetricsPattern)))),
           enableWholeStage
         )
       }

From effef8481e345c41413ab7cf5a94668bcf675c2f Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 9 Mar 2022 09:07:40 -0800
Subject: [PATCH 443/513] [SPARK-36681][CORE][TEST] Enable SnappyCodec test in
 FileSuite

### What changes were proposed in this pull request?

This patch enables `SnappyCodec` test in `FileSuite` as snappy-java relocation issue was fixed in Hadoop 3.3.2 that Spark is using now.

### Why are the changes needed?

Enabling test case.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Updated test.

Closes #35784 from viirya/SPARK-36681.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 core/src/test/scala/org/apache/spark/FileSuite.scala | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala
index f1f2b4fc70cdb..ac7670014eb9d 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -28,7 +28,7 @@ import com.google.common.io.Files
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io._
-import org.apache.hadoop.io.compress.{BZip2Codec, CompressionCodec, DefaultCodec, Lz4Codec}
+import org.apache.hadoop.io.compress.{BZip2Codec, CompressionCodec, DefaultCodec, Lz4Codec, SnappyCodec}
 import org.apache.hadoop.mapred.{FileAlreadyExistsException, FileSplit, JobConf, TextInputFormat, TextOutputFormat}
 import org.apache.hadoop.mapreduce.Job
 import org.apache.hadoop.mapreduce.lib.input.{FileSplit => NewFileSplit, TextInputFormat => NewTextInputFormat}
@@ -136,8 +136,8 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
   }
 
   // Hadoop "gzip" and "zstd" codecs require native library installed for sequence files
-  // "snappy" codec does not work due to SPARK-36681.
-  val codecs = Seq((new DefaultCodec(), "default"), (new BZip2Codec(), "bzip2")) ++ {
+  val codecs = Seq((new DefaultCodec(), "default"), (new BZip2Codec(), "bzip2"),
+      (new SnappyCodec(), "snappy")) ++ {
     if (VersionUtils.isHadoop3) Seq((new Lz4Codec(), "lz4")) else Seq()
   }
   codecs.foreach { case (codec, codecName) =>

From 97df0164b68448d73dd6d5a310a0db0a9ec95296 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 9 Mar 2022 10:12:07 -0800
Subject: [PATCH 444/513] [SPARK-38480][K8S] Remove
 `spark.kubernetes.job.queue` in favor of
 `spark.kubernetes.driver.podGroupTemplateFile`

### What changes were proposed in this pull request?

This PR aims to remove `spark.kubernetes.job.queue` in favor of `spark.kubernetes.driver.podGroupTemplateFile` for Apache Spark 3.3.

### Why are the changes needed?

There are several batch execution scheduler options including custom schedulers in K8s environment.
We had better isolate scheduler specific settings instead of introducing a new configuration.

### Does this PR introduce _any_ user-facing change?

No, the previous configuration is not released yet.

### How was this patch tested?

Pass the CIs and K8s IT.

```
[info] KubernetesSuite:
[info] - Run SparkPi with no resources (8 seconds, 548 milliseconds)
[info] - Run SparkPi with no resources & statefulset allocation (8 seconds, 419 milliseconds)
[info] - Run SparkPi with a very long application name. (8 seconds, 360 milliseconds)
[info] - Use SparkLauncher.NO_RESOURCE (8 seconds, 386 milliseconds)
[info] - Run SparkPi with a master URL without a scheme. (8 seconds, 589 milliseconds)
[info] - Run SparkPi with an argument. (8 seconds, 361 milliseconds)
[info] - Run SparkPi with custom labels, annotations, and environment variables. (8 seconds, 363 milliseconds)
[info] - All pods have the same service account by default (8 seconds, 332 milliseconds)
[info] - Run extraJVMOptions check on driver (4 seconds, 331 milliseconds)
[info] - Run SparkRemoteFileTest using a remote data file (8 seconds, 392 milliseconds)
[info] - Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j2.properties (13 seconds, 915 milliseconds)
[info] - Run SparkPi with env and mount secrets. (18 seconds, 172 milliseconds)
[info] - Run PySpark on simple pi.py example (9 seconds, 368 milliseconds)
[info] - Run PySpark to test a pyfiles example (11 seconds, 489 milliseconds)
[info] - Run PySpark with memory customization (9 seconds, 378 milliseconds)
[info] - Run in client mode. (6 seconds, 296 milliseconds)
[info] - Start pod creation from template (8 seconds, 465 milliseconds)
[info] - SPARK-38398: Schedule pod creation from template (9 seconds, 460 milliseconds)
[info] - Test basic decommissioning (40 seconds, 795 milliseconds)
[info] - Test basic decommissioning with shuffle cleanup (41 seconds, 16 milliseconds)
[info] *** Test still running after 2 minutes, 19 seconds: suite name: KubernetesSuite, test name: Test decommissioning with dynamic allocation & shuffle cleanups.
[info] - Test decommissioning with dynamic allocation & shuffle cleanups (2 minutes, 40 seconds)
[info] - Test decommissioning timeouts (40 seconds, 446 milliseconds)
[info] - SPARK-37576: Rolling decommissioning (1 minute, 5 seconds)
[info] - Run SparkR on simple dataframe.R example (12 seconds, 562 milliseconds)
[info] VolcanoSuite:
[info] - Run SparkPi with no resources (10 seconds, 339 milliseconds)
[info] - Run SparkPi with no resources & statefulset allocation (9 seconds, 346 milliseconds)
[info] - Run SparkPi with a very long application name. (9 seconds, 306 milliseconds)
[info] - Use SparkLauncher.NO_RESOURCE (9 seconds, 361 milliseconds)
[info] - Run SparkPi with a master URL without a scheme. (9 seconds, 344 milliseconds)
[info] - Run SparkPi with an argument. (9 seconds, 421 milliseconds)
[info] - Run SparkPi with custom labels, annotations, and environment variables. (9 seconds, 365 milliseconds)
[info] - All pods have the same service account by default (9 seconds, 337 milliseconds)
[info] - Run extraJVMOptions check on driver (5 seconds, 348 milliseconds)
[info] - Run SparkRemoteFileTest using a remote data file (8 seconds, 310 milliseconds)
[info] - Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j2.properties (15 seconds, 13 milliseconds)
[info] - Run SparkPi with env and mount secrets. (18 seconds, 466 milliseconds)
[info] - Run PySpark on simple pi.py example (10 seconds, 558 milliseconds)
[info] - Run PySpark to test a pyfiles example (11 seconds, 445 milliseconds)
[info] - Run PySpark with memory customization (10 seconds, 395 milliseconds)
[info] - Run in client mode. (6 seconds, 239 milliseconds)
[info] - Start pod creation from template (10 seconds, 415 milliseconds)
[info] - SPARK-38398: Schedule pod creation from template (9 seconds, 440 milliseconds)
[info] - Test basic decommissioning (42 seconds, 799 milliseconds)
[info] - Test basic decommissioning with shuffle cleanup (42 seconds, 836 milliseconds)
[info] - Test decommissioning with dynamic allocation & shuffle cleanups (2 minutes, 41 seconds)
[info] - Test decommissioning timeouts (42 seconds, 375 milliseconds)
[info] - SPARK-37576: Rolling decommissioning (1 minute, 7 seconds)
[info] - Run SparkR on simple dataframe.R example (12 seconds, 441 milliseconds)
[info] - Run SparkPi with volcano scheduler (10 seconds, 421 milliseconds)
[info] - SPARK-38188: Run SparkPi jobs with 2 queues (only 1 enabled) (13 seconds, 256 milliseconds)
[info] - SPARK-38188: Run SparkPi jobs with 2 queues (all enabled) (16 seconds, 216 milliseconds)
[info] - SPARK-38423: Run SparkPi Jobs with priorityClassName (14 seconds, 264 milliseconds
[info] - SPARK-38423: Run driver job to validate priority order (16 seconds, 325 milliseconds)
[info] Run completed in 28 minutes, 9 seconds.
[info] Total number of tests run: 53
[info] Suites: completed 2, aborted 0
[info] Tests: succeeded 53, failed 0, canceled 0, ignored 0, pending 0
[info] All tests passed.
[success] Total time: 1785 s (29:45), completed Mar 8, 2022 11:15:23 PM
```

Closes #35783 from dongjoon-hyun/SPARK-38480.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/running-on-kubernetes.md                 |  9 ---------
 .../org/apache/spark/deploy/k8s/Config.scala  |  7 -------
 .../k8s/features/VolcanoFeatureStep.scala     |  2 --
 .../features/VolcanoFeatureStepSuite.scala    | 10 ----------
 .../queue-driver-podgroup-template.yml        | 20 +++++++++++++++++++
 .../queue0-driver-podgroup-template.yml       | 20 +++++++++++++++++++
 .../queue1-driver-podgroup-template.yml       | 20 +++++++++++++++++++
 .../integrationtest/VolcanoTestsSuite.scala   |  7 ++++++-
 8 files changed, 66 insertions(+), 29 deletions(-)
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/volcano/queue-driver-podgroup-template.yml
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/volcano/queue0-driver-podgroup-template.yml
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/volcano/queue1-driver-podgroup-template.yml

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 79e01a35e2c57..a5da80a68d32d 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1356,15 +1356,6 @@ See the [configuration page](configuration.html) for information on Spark config
   </td>
   <td>3.3.0</td>
 </tr>
-<tr>
-  <td><code>spark.kubernetes.job.queue</code></td>
-  <td>(none)</td>
-  <td>
-    The name of the queue to which the job is submitted. This info will be stored in configuration
-    and passed to specific feature step.
-  </td>
-  <td>3.3.0</td>
-</tr>
 <tr>
   <td><code>spark.kubernetes.configMap.maxSize</code></td>
   <td><code>1572864</code></td>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index e66ecf4312bb2..ff17ef51fe630 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -306,13 +306,6 @@ private[spark] object Config extends Logging {
       .stringConf
       .createOptional
 
-  val KUBERNETES_JOB_QUEUE = ConfigBuilder("spark.kubernetes.job.queue")
-    .doc("The name of the queue to which the job is submitted. This info " +
-      "will be stored in configuration and passed to specific feature step.")
-    .version("3.3.0")
-    .stringConf
-    .createOptional
-
   val KUBERNETES_EXECUTOR_REQUEST_CORES =
     ConfigBuilder("spark.kubernetes.executor.request.cores")
       .doc("Specify the cpu request for each executor pod")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
index 5fd0fc69ea2df..393edd2871ea0 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
@@ -32,7 +32,6 @@ private[spark] class VolcanoFeatureStep extends KubernetesDriverCustomFeatureCon
 
   private lazy val podGroupName = s"${kubernetesConf.appId}-podgroup"
   private lazy val namespace = kubernetesConf.namespace
-  private lazy val queue = kubernetesConf.get(KUBERNETES_JOB_QUEUE)
   private var priorityClassName: Option[String] = None
 
   override def init(config: KubernetesDriverConf): Unit = {
@@ -60,7 +59,6 @@ private[spark] class VolcanoFeatureStep extends KubernetesDriverCustomFeatureCon
 
     var spec = pg.getSpec
     if (spec == null) spec = new PodGroupSpec
-    queue.foreach(spec.setQueue(_))
     priorityClassName.foreach(spec.setPriorityClassName(_))
     pg.setSpec(spec)
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
index e7f1e316a6d67..9f6bedb17626d 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
@@ -41,16 +41,6 @@ class VolcanoFeatureStepSuite extends SparkFunSuite {
     assert(podGroup.getMetadata.getName === s"${kubernetesConf.appId}-podgroup")
   }
 
-  test("SPARK-38818: Support `spark.kubernetes.job.queue`") {
-    val sparkConf = new SparkConf()
-      .set(KUBERNETES_JOB_QUEUE.key, "queue1")
-    val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf)
-    val step = new VolcanoFeatureStep()
-    step.init(kubernetesConf)
-    val podGroup = step.getAdditionalPreKubernetesResources().head.asInstanceOf[PodGroup]
-    assert(podGroup.getSpec.getQueue === "queue1")
-  }
-
   test("SPARK-36061: Executor Pod with Volcano PodGroup") {
     val sparkConf = new SparkConf()
     val kubernetesConf = KubernetesTestConf.createExecutorConf(sparkConf)
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/queue-driver-podgroup-template.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/queue-driver-podgroup-template.yml
new file mode 100644
index 0000000000000..591000a0d02d3
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/queue-driver-podgroup-template.yml
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: scheduling.volcano.sh/v1beta1
+kind: PodGroup
+spec:
+  queue: queue
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/queue0-driver-podgroup-template.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/queue0-driver-podgroup-template.yml
new file mode 100644
index 0000000000000..faba21abe1ec2
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/queue0-driver-podgroup-template.yml
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: scheduling.volcano.sh/v1beta1
+kind: PodGroup
+spec:
+  queue: queue0
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/queue1-driver-podgroup-template.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/queue1-driver-podgroup-template.yml
new file mode 100644
index 0000000000000..280656450ea06
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/queue1-driver-podgroup-template.yml
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: scheduling.volcano.sh/v1beta1
+kind: PodGroup
+spec:
+  queue: queue1
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
index 803a8d3f194d0..ce5f86345eb45 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
@@ -210,7 +210,12 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
       .set(KUBERNETES_SCHEDULER_NAME.key, "volcano")
       .set(KUBERNETES_DRIVER_POD_FEATURE_STEPS.key, VOLCANO_FEATURE_STEP)
       .set(KUBERNETES_EXECUTOR_POD_FEATURE_STEPS.key, VOLCANO_FEATURE_STEP)
-    queue.foreach(conf.set(KUBERNETES_JOB_QUEUE.key, _))
+    queue.foreach { q =>
+      conf.set(KUBERNETES_DRIVER_PODGROUP_TEMPLATE_FILE.key,
+        new File(
+          getClass.getResource(s"/volcano/$q-driver-podgroup-template.yml").getFile
+        ).getAbsolutePath)
+    }
     groupLoc.foreach { locator =>
       conf.set(s"${KUBERNETES_DRIVER_LABEL_PREFIX}spark-group-locator", locator)
       conf.set(s"${KUBERNETES_EXECUTOR_LABEL_PREFIX}spark-group-locator", locator)

From 01014aa99fa851411262a6719058dde97319bbb3 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 9 Mar 2022 16:22:04 -0800
Subject: [PATCH 445/513] [SPARK-38486][K8S][TESTS] Upgrade the minimum
 Minikube version to 1.18.0

### What changes were proposed in this pull request?

This PR aims to upgrade the minimum Minikube version to 1.18.0 from 1.7.3 at Apache Spark 3.3.0.

### Why are the changes needed?

Minikube v1.18.0 was released one year ago on March 2021, and the first version supporting Apple Silicon natively. Previously, there exists some issues while running Intel arch binary on Apple Silicon.
- https://github.com/kubernetes/minikube/releases/download/v1.18.0/minikube-darwin-arm64
- https://github.com/kubernetes/minikube/releases/tag/v1.18.0

### Does this PR introduce _any_ user-facing change?

No, this is a test-only PR.

### How was this patch tested?

Manually.

Closes #35791 from dongjoon-hyun/SPARK-38486.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 resource-managers/kubernetes/integration-tests/README.md      | 4 ++--
 .../k8s/integrationtest/backend/minikube/Minikube.scala       | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md
index 9eb928dfc83ea..ac82282526fe3 100644
--- a/resource-managers/kubernetes/integration-tests/README.md
+++ b/resource-managers/kubernetes/integration-tests/README.md
@@ -28,7 +28,7 @@ To run tests with Hadoop 2.x instead of Hadoop 3.x, use `--hadoop-profile`.
 
     ./dev/dev-run-integration-tests.sh --hadoop-profile hadoop-2
 
-The minimum tested version of Minikube is 1.7.3. The kube-dns addon must be enabled. Minikube should
+The minimum tested version of Minikube is 1.18.0. The kube-dns addon must be enabled. Minikube should
 run with a minimum of 4 CPUs and 6G of memory:
 
     minikube start --cpus 4 --memory 6144
@@ -47,7 +47,7 @@ default this is set to `minikube`, the available backends are their prerequisite
 
 ### `minikube`
 
-Uses the local `minikube` cluster, this requires that `minikube` 1.7.3 or greater be installed and that it be allocated
+Uses the local `minikube` cluster, this requires that `minikube` 1.18.0 or greater be installed and that it be allocated
 at least 4 CPUs and 6GB memory (some users have reported success with as few as 3 CPUs and 4GB memory).  The tests will 
 check if `minikube` is started and abort early if it isn't currently running.
 
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
index 9f99edefaf093..755feb9aca9e6 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
@@ -48,9 +48,9 @@ private[spark] object Minikube extends Logging {
 
     versionArrayOpt match {
       case Some(Array(x, y, z)) =>
-        if (Ordering.Tuple3[Int, Int, Int].lt((x, y, z), (1, 7, 3))) {
+        if (Ordering.Tuple3[Int, Int, Int].lt((x, y, z), (1, 18, 0))) {
           assert(false, s"Unsupported Minikube version is detected: $minikubeVersionString." +
-            "For integration testing Minikube version 1.7.3 or greater is expected.")
+            "For integration testing Minikube version 1.18.0 or greater is expected.")
         }
       case _ =>
         assert(false, s"Unexpected version format detected in `$minikubeVersionString`." +

From 0f4c26ae862e02ccf4e58165d51a882af5dd0383 Mon Sep 17 00:00:00 2001
From: Xinrong Meng <xinrong.meng@databricks.com>
Date: Wed, 9 Mar 2022 17:01:54 -0800
Subject: [PATCH 446/513] [SPARK-38387][PYTHON] Support `na_action` and Series
 input correspondence in `Series.map`

### What changes were proposed in this pull request?
Support `na_action` and Series input correspondence in `Series.map`

### Why are the changes needed?
To reach parity to pandas API.

Note that `na_action` is supported in older pandas:
```py
>>> import pandas as pd
>>> pd.__version__
'1.0.0'
>>> pser = pd.Series(['a', 'b', None])
>>> pser.map(lambda x : x.upper(), na_action='ignore')
0       A
1       B
2    None
dtype: object
```

### Does this PR introduce _any_ user-facing change?
Yes. `na_action`, Series input correspondence are supported now:
```py
>>> psser = ps.Series(['cat', 'dog', None, 'rabbit'])

# na_action support
>>> psser.map(lambda x : x.upper(), na_action="ignore")
0       CAT
1       DOG
2      None
3    RABBIT
dtype: object

# Series input correspondence support
>>> pser_to_apply = pd.Series(["one", "two", "four"], index=["cat", "dog", "rabbit"])
>>> psser.map(pser_to_apply)
0     one
1     two
2    None
3    four
dtype: object
```

### How was this patch tested?
Unit tests.

Closes #35706 from xinrong-databricks/series.map.

Authored-by: Xinrong Meng <xinrong.meng@databricks.com>
Signed-off-by: Takuya UESHIN <ueshin@databricks.com>
---
 python/pyspark/pandas/series.py            | 36 ++++++++++++++++++----
 python/pyspark/pandas/tests/test_series.py | 25 +++++++++++++--
 2 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index e2df500ca3dee..853e0e2749f5d 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -976,9 +976,11 @@ def cov(self, other: "Series", min_periods: Optional[int] = None) -> float:
         else:
             return sdf.select(F.covar_samp(*sdf.columns)).head(1)[0][0]
 
-    # TODO: arg should support Series
-    # TODO: NaN and None
-    def map(self, arg: Union[Dict, Callable]) -> "Series":
+    # TODO: NaN and None when ``arg`` is an empty dict
+    # TODO: Support ps.Series ``arg``
+    def map(
+        self, arg: Union[Dict, Callable[[Any], Any], pd.Series], na_action: Optional[str] = None
+    ) -> "Series":
         """
         Map values of Series according to input correspondence.
 
@@ -992,8 +994,10 @@ def map(self, arg: Union[Dict, Callable]) -> "Series":
 
         Parameters
         ----------
-        arg : function or dict
+        arg : function, dict or pd.Series
             Mapping correspondence.
+        na_action :
+            If `ignore`, propagate NA values, without passing them to the mapping correspondence.
 
         Returns
         -------
@@ -1034,6 +1038,16 @@ def map(self, arg: Union[Dict, Callable]) -> "Series":
         3      None
         dtype: object
 
+        It also accepts a pandas Series:
+
+        >>> pser = pd.Series(['kitten', 'puppy'], index=['cat', 'dog'])
+        >>> s.map(pser)
+        0    kitten
+        1     puppy
+        2      None
+        3      None
+        dtype: object
+
         It also accepts a function:
 
         >>> def format(x) -> str:
@@ -1045,8 +1059,18 @@ def map(self, arg: Union[Dict, Callable]) -> "Series":
         2      I am a None
         3    I am a rabbit
         dtype: object
+
+        To avoid applying the function to missing values (and keep them as NaN)
+        na_action='ignore' can be used:
+
+        >>> s.map('I am a {}'.format, na_action='ignore')
+        0       I am a cat
+        1       I am a dog
+        2             None
+        3    I am a rabbit
+        dtype: object
         """
-        if isinstance(arg, dict):
+        if isinstance(arg, (dict, pd.Series)):
             is_start = True
             # In case dictionary is empty.
             current = F.when(SF.lit(False), SF.lit(None).cast(self.spark.data_type))
@@ -1067,7 +1091,7 @@ def map(self, arg: Union[Dict, Callable]) -> "Series":
                 current = current.otherwise(SF.lit(None).cast(self.spark.data_type))
             return self._with_new_scol(current)
         else:
-            return self.apply(arg)
+            return self.pandas_on_spark.transform_batch(lambda pser: pser.map(arg, na_action))
 
     @property
     def shape(self) -> Tuple[int]:
diff --git a/python/pyspark/pandas/tests/test_series.py b/python/pyspark/pandas/tests/test_series.py
index cec6a475eb791..76a5b78b2115f 100644
--- a/python/pyspark/pandas/tests/test_series.py
+++ b/python/pyspark/pandas/tests/test_series.py
@@ -1161,13 +1161,34 @@ def test_append(self):
     def test_map(self):
         pser = pd.Series(["cat", "dog", None, "rabbit"])
         psser = ps.from_pandas(pser)
-        # Currently Koalas doesn't return NaN as pandas does.
-        self.assert_eq(psser.map({}), pser.map({}).replace({pd.np.nan: None}))
+
+        # dict correspondence
+        # Currently pandas API on Spark doesn't return NaN as pandas does.
+        self.assert_eq(psser.map({}), pser.map({}).replace({np.nan: None}))
 
         d = defaultdict(lambda: "abc")
         self.assertTrue("abc" in repr(psser.map(d)))
         self.assert_eq(psser.map(d), pser.map(d))
 
+        # series correspondence
+        pser_to_apply = pd.Series(["one", "two", "four"], index=["cat", "dog", "rabbit"])
+        self.assert_eq(psser.map(pser_to_apply), pser.map(pser_to_apply))
+        self.assert_eq(
+            psser.map(pser_to_apply, na_action="ignore"),
+            pser.map(pser_to_apply, na_action="ignore"),
+        )
+
+        # function correspondence
+        self.assert_eq(
+            psser.map(lambda x: x.upper(), na_action="ignore"),
+            pser.map(lambda x: x.upper(), na_action="ignore"),
+        )
+
+        def to_upper(string) -> str:
+            return string.upper() if string else ""
+
+        self.assert_eq(psser.map(to_upper), pser.map(to_upper))
+
         def tomorrow(date) -> datetime:
             return date + timedelta(days=1)
 

From bd08e792a748222e6291a8ae3216cc2134efd8a6 Mon Sep 17 00:00:00 2001
From: bjornjorgensen <bjornjorgensen@gmail.com>
Date: Thu, 10 Mar 2022 10:02:27 +0900
Subject: [PATCH 447/513] [SPARK-38355][PYTHON][TESTS] Use `mkstemp` instead of
 `mktemp`

### What changes were proposed in this pull request?
To change from `mktemp` to `mkstemp`.

### Why are the changes needed?
Pandas API on Spark use `mktemp` in test.
`mktemp` "THIS FUNCTION IS UNSAFE AND SHOULD NOT BE USED.  The file name may
refer to a file that did not exist at some point, but by the time
you get around to creating it, someone else may have beaten you to
the punch."

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Got the green light.

Closes #35775 from bjornjorgensen/Upgrade-mktemp-to-mkstemp.

Authored-by: bjornjorgensen <bjornjorgensen@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/testing/pandasutils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/testing/pandasutils.py b/python/pyspark/testing/pandasutils.py
index 6d402985f4aed..a5b913ec6727b 100644
--- a/python/pyspark/testing/pandasutils.py
+++ b/python/pyspark/testing/pandasutils.py
@@ -259,7 +259,7 @@ def temp_dir(self):
     @contextmanager
     def temp_file(self):
         with self.temp_dir() as tmp:
-            yield tempfile.mktemp(dir=tmp)
+            yield tempfile.mkstemp(dir=tmp)[1]
 
 
 class ComparisonTestBase(PandasOnSparkTestCase):

From ecabfb1c991f332ce26788018c7f9c7b6f4a2fc8 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Wed, 9 Mar 2022 17:29:26 -0800
Subject: [PATCH 448/513] [SPARK-38187][K8S][TESTS] Add K8S IT for `volcano`
 minResources cpu/memory spec

### What changes were proposed in this pull request?
This PR adds two tests to make sure resource reservation supported.
- Run SparkPi Jobs with minCPU
- Run SparkPi Jobs with minMemory

### Why are the changes needed?
Test resource reservation (min Resoruce) with volcano implementations

### Does this PR introduce _any_ user-facing change?
No, K8S IT only

### How was this patch tested?
- integration test
```
[info] VolcanoSuite:
[info] - Run SparkPi with volcano scheduler (12 seconds, 738 milliseconds)
[info] - SPARK-38188: Run SparkPi jobs with 2 queues (only 1 enabled) (13 seconds, 294 milliseconds)
[info] - SPARK-38188: Run SparkPi jobs with 2 queues (all enabled) (25 seconds, 659 milliseconds)
[info] - SPARK-38423: Run SparkPi Jobs with priorityClassName (19 seconds, 310 milliseconds)
[info] - SPARK-38423: Run driver job to validate priority order (16 seconds, 467 milliseconds)
[info] - SPARK-38187: Run SparkPi Jobs with minCPU (29 seconds, 546 milliseconds)
[info] - SPARK-38187: Run SparkPi Jobs with minMemory (30 seconds, 473 milliseconds)
[info] Run completed in 2 minutes, 30 seconds.
[info] Total number of tests run: 7
[info] Suites: completed 2, aborted 0
[info] Tests: succeeded 7, failed 0, canceled 0, ignored 0, pending 0
[info] All tests passed.
[success] Total time: 236 s (03:56), completed 2022-3-10 9:17:46
```

Closes #35640 from Yikun/SPARK-38187-minRes.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../driver-podgroup-template-cpu-2u.yml       | 23 +++++++
 .../driver-podgroup-template-memory-3g.yml    | 23 +++++++
 .../test/resources/volcano/queue-2u-3g.yml    | 25 +++++++
 .../integrationtest/VolcanoTestsSuite.scala   | 68 ++++++++++++++++++-
 4 files changed, 136 insertions(+), 3 deletions(-)
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/volcano/driver-podgroup-template-cpu-2u.yml
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/volcano/driver-podgroup-template-memory-3g.yml
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/volcano/queue-2u-3g.yml

diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/driver-podgroup-template-cpu-2u.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/driver-podgroup-template-cpu-2u.yml
new file mode 100644
index 0000000000000..e6d53ddc8b5cd
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/driver-podgroup-template-cpu-2u.yml
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: scheduling.volcano.sh/v1beta1
+kind: PodGroup
+spec:
+  queue: queue-2u-3g
+  minMember: 1
+  minResources:
+    cpu: "2"
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/driver-podgroup-template-memory-3g.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/driver-podgroup-template-memory-3g.yml
new file mode 100644
index 0000000000000..9aaa5cf20658b
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/driver-podgroup-template-memory-3g.yml
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: scheduling.volcano.sh/v1beta1
+kind: PodGroup
+spec:
+  queue: queue-2u-3g
+  minMember: 1
+  minResources:
+    memory: "3Gi"
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/queue-2u-3g.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/queue-2u-3g.yml
new file mode 100644
index 0000000000000..094ec233fd041
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/queue-2u-3g.yml
@@ -0,0 +1,25 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: scheduling.volcano.sh/v1beta1
+kind: Queue
+metadata:
+  name: queue-2u-3g
+spec:
+  weight: 1
+  capability:
+    cpu: "2"
+    memory: "3Gi"
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
index ce5f86345eb45..85c8497dea6f7 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
@@ -135,11 +135,13 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
       groupLoc: Option[String] = None,
       queue: Option[String] = None,
       driverTemplate: Option[String] = None,
-      isDriverJob: Boolean = false): Unit = {
+      isDriverJob: Boolean = false,
+      driverPodGroupTemplate: Option[String] = None): Unit = {
     val appLoc = s"${appLocator}${batchSuffix}"
     val podName = s"${driverPodName}-${batchSuffix}"
     // create new configuration for every job
-    val conf = createVolcanoSparkConf(podName, appLoc, groupLoc, queue, driverTemplate)
+    val conf = createVolcanoSparkConf(podName, appLoc, groupLoc, queue, driverTemplate,
+      driverPodGroupTemplate)
     if (isDriverJob) {
       runSparkDriverSubmissionAndVerifyCompletion(
         driverPodChecker = (driverPod: Pod) => {
@@ -199,7 +201,8 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
       appLoc: String = appLocator,
       groupLoc: Option[String] = None,
       queue: Option[String] = None,
-      driverTemplate: Option[String] = None): SparkAppConf = {
+      driverTemplate: Option[String] = None,
+      driverPodGroupTemplate: Option[String] = None): SparkAppConf = {
     val conf = kubernetesTestComponents.newSparkAppConf()
       .set(CONTAINER_IMAGE.key, image)
       .set(KUBERNETES_DRIVER_POD_NAME.key, driverPodName)
@@ -216,6 +219,7 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
           getClass.getResource(s"/volcano/$q-driver-podgroup-template.yml").getFile
         ).getAbsolutePath)
     }
+    driverPodGroupTemplate.foreach(conf.set(KUBERNETES_DRIVER_PODGROUP_TEMPLATE_FILE.key, _))
     groupLoc.foreach { locator =>
       conf.set(s"${KUBERNETES_DRIVER_LABEL_PREFIX}spark-group-locator", locator)
       conf.set(s"${KUBERNETES_EXECUTOR_LABEL_PREFIX}spark-group-locator", locator)
@@ -243,6 +247,55 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
     )
   }
 
+  private def verifyJobsSucceededOneByOne(jobNum: Int, groupName: String): Unit = {
+    // Check Pending jobs completed one by one
+    (1 until jobNum).map { completedNum =>
+      Eventually.eventually(TIMEOUT, INTERVAL) {
+        val pendingPods = getPods(role = "driver", groupName, statusPhase = "Pending")
+        assert(pendingPods.size === jobNum - completedNum)
+      }
+    }
+    // All jobs succeeded finally
+    Eventually.eventually(TIMEOUT, INTERVAL) {
+      val succeededPods = getPods(role = "driver", groupName, statusPhase = "Succeeded")
+      assert(succeededPods.size === jobNum)
+    }
+  }
+
+  test("SPARK-38187: Run SparkPi Jobs with minCPU", k8sTestTag, volcanoTag) {
+    val groupName = generateGroupName("min-cpu")
+    // Create a queue with 2 CPU, 3G memory capacity
+    createOrReplaceYAMLResource(QUEUE_2U_3G_YAML)
+    // Submit 3 jobs with minCPU = 2
+    val jobNum = 3
+    (1 to jobNum).map { i =>
+      Future {
+        runJobAndVerify(
+          i.toString,
+          groupLoc = Option(groupName),
+          driverPodGroupTemplate = Option(DRIVER_PG_TEMPLATE_CPU_2U))
+      }
+    }
+    verifyJobsSucceededOneByOne(jobNum, groupName)
+  }
+
+  test("SPARK-38187: Run SparkPi Jobs with minMemory", k8sTestTag, volcanoTag) {
+    val groupName = generateGroupName("min-mem")
+    // Create a queue with 2 CPU, 3G memory capacity
+    createOrReplaceYAMLResource(QUEUE_2U_3G_YAML)
+    // Submit 3 jobs with minMemory = 3g
+    val jobNum = 3
+    (1 to jobNum).map { i =>
+      Future {
+        runJobAndVerify(
+          i.toString,
+          groupLoc = Option(groupName),
+          driverPodGroupTemplate = Option(DRIVER_PG_TEMPLATE_MEMORY_3G))
+      }
+    }
+    verifyJobsSucceededOneByOne(jobNum, groupName)
+  }
+
   test("SPARK-38188: Run SparkPi jobs with 2 queues (only 1 enabled)", k8sTestTag, volcanoTag) {
     // Disabled queue0 and enabled queue1
     createOrReplaceYAMLResource(VOLCANO_Q0_DISABLE_Q1_ENABLE_YAML)
@@ -372,4 +425,13 @@ private[spark] object VolcanoTestsSuite extends SparkFunSuite {
   val DISABLE_QUEUE = new File(
     getClass.getResource("/volcano/disable-queue.yml").getFile
   ).getAbsolutePath
+  val QUEUE_2U_3G_YAML = new File(
+    getClass.getResource("/volcano/queue-2u-3g.yml").getFile
+  ).getAbsolutePath
+  val DRIVER_PG_TEMPLATE_CPU_2U = new File(
+    getClass.getResource("/volcano/driver-podgroup-template-cpu-2u.yml").getFile
+  ).getAbsolutePath
+  val DRIVER_PG_TEMPLATE_MEMORY_3G = new File(
+    getClass.getResource("/volcano/driver-podgroup-template-memory-3g.yml").getFile
+  ).getAbsolutePath
 }

From 82b61948f9afe5323965f01e23ffd9b39587dcaf Mon Sep 17 00:00:00 2001
From: Xinyi Yu <xinyi.yu@databricks.com>
Date: Thu, 10 Mar 2022 11:14:58 +0800
Subject: [PATCH 449/513] [SPARK-38385][SQL] Improve error messages of empty
 statement and <EOF> in ParseException
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
This PR handles case 2 and 3 mentioned in https://issues.apache.org/jira/browse/SPARK-38385.
Specifically,
1. For empty query, output a specific error message ‘syntax error, unexpected SQL statement’
    * Before
        ```
        ParseException:
        mismatched input '<EOF>' expecting {'(', 'CONVERT', 'COPY', 'OPTIMIZE', 'RESTORE', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)

        == SQL ==

        ^^^
        ```
    * After
        ```
        ParseException:
        syntax error, unexpected SQL statement(line 1, pos 0)

        == SQL ==

        ^^^
        ```

2. For the faulty token '\<EOF\>'., substitute it to a readable string ‘end of input’.
    * Before
        ```
        ParseException:
        mismatched input '<EOF>' expecting {'APPLY', 'CALLED', 'CHANGES', 'CLONE', 'COLLECT', 'CONTAINS', 'CONVERT', 'COPY', 'COPY_OPTIONS', 'CREDENTIAL', 'CREDENTIALS', 'DEEP', 'DEFINER', 'DELTA', 'DETERMINISTIC', 'ENCRYPTION', 'EXPECT', 'FAIL', 'FILES',… (omit long message) 'TRIM', 'TRUE', 'TRUNCATE', 'TRY_CAST', 'TYPE', 'UNARCHIVE', 'UNBOUNDED', 'UNCACHE', 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLOCK', 'UNSET', 'UPDATE', 'USE', 'USER', 'USING', 'VALUES', 'VERSION', 'VIEW', 'VIEWS', 'WHEN', 'WHERE', 'WINDOW', 'WITH', 'WITHIN', 'YEAR', 'ZONE', IDENTIFIER, BACKQUOTED_IDENTIFIER}(line 1, pos 11)

        == SQL ==
        select 1  (
        -----------^^^
        ```
    * After
        ```
        ParseException:
        syntax error at or near end of input(line 1, pos 11)

        == SQL ==
        select 1  (
        -----------^^^
        ```

#### Changes in code
* error-class.json
    Define a new type of error `PARSE_EMPTY_STATEMENT`
    ```json
      "PARSE_EMPTY_STATEMENT" : {
        "message" : [ "Syntax error, unexpected empty statement" ],
        "sqlState" : "42000"
      },
    ```

* ParserDriver.scala
    Let the `PARSE_EMPTY_STATEMENT` error class overrides the `PARSE_INPUT_MISMATCHED` when cmd (the SQL) is empty. That way, the error messages for empty queries would be "unexpected empty statement".

* SparkParserErrorStrategy.scala
    Define a new dictionary from the input jargon words to user-facing words. In the current mismatch input error, substitute the token to user-facing language. That way, there would be no '\<EOF\>'.

* test suites
    Add two more testcases testing the above cases.

### Why are the changes needed?
https://issues.apache.org/jira/browse/SPARK-38384 The description states the reason for the change.
TLDR, the error messages of ParseException directly coming from ANTLR are not user-friendly and we want to improve it.

### Does this PR introduce _any_ user-facing change?
If the error messages change are considered as user-facing change, then yes.
The changes in the error message:
1.  For empty query, output a specific error message ‘syntax error, unexpected SQL statement’
2. For the faulty token '\<EOF\>'., substitute it to a readable string ‘end of input’.

Example cases are listed in the top of this PR description.

### How was this patch tested?
Unit tests.

Closes #35777 from anchovYu/improve-parse-exception-mismatched-input-empty.

Authored-by: Xinyi Yu <xinyi.yu@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 core/src/main/resources/error/error-classes.json  |  4 ++++
 .../spark/sql/catalyst/parser/ParseDriver.scala   |  7 ++++++-
 .../parser/SparkParserErrorStrategy.scala         |  7 ++++++-
 .../sql/catalyst/parser/ErrorParserSuite.scala    | 15 +++++++++++++++
 .../sql-tests/results/show-tables.sql.out         |  2 +-
 5 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 55e9373256f83..a0e63270b5a64 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -131,6 +131,10 @@
     "message" : [ "PARTITION clause cannot contain a non-partition column name: %s" ],
     "sqlState" : "42000"
   },
+  "PARSE_EMPTY_STATEMENT" : {
+    "message" : [ "Syntax error, unexpected empty statement" ],
+    "sqlState" : "42000"
+  },
   "PARSE_INPUT_MISMATCHED" : {
     "message" : [ "Syntax error at or near %s" ],
     "sqlState" : "42000"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index 87825540f7427..5c9c382d08d04 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -289,7 +289,12 @@ class ParseException(
   }
 
   def withCommand(cmd: String): ParseException = {
-    new ParseException(Option(cmd), message, start, stop, errorClass, messageParameters)
+    // PARSE_EMPTY_STATEMENT error class overrides the PARSE_INPUT_MISMATCHED when cmd is empty
+    if (cmd.trim().isEmpty && errorClass.isDefined && errorClass.get == "PARSE_INPUT_MISMATCHED") {
+      new ParseException(Option(cmd), start, stop, "PARSE_EMPTY_STATEMENT", Array[String]())
+    } else {
+      new ParseException(Option(cmd), message, start, stop, errorClass, messageParameters)
+    }
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala
index d514a61e315dc..0ce514c4d2298 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala
@@ -60,6 +60,11 @@ class SparkRecognitionException(
  * message framework to these exceptions.
  */
 class SparkParserErrorStrategy() extends DefaultErrorStrategy {
+  private val userWordDict : Map[String, String] = Map("'<EOF>'" -> "end of input")
+  private def getUserFacingLanguage(input: String) = {
+    userWordDict.getOrElse(input, input)
+  }
+
   override def reportInputMismatch(recognizer: Parser, e: InputMismatchException): Unit = {
     // Keep the original error message in ANTLR
     val msg = "mismatched input " +
@@ -70,7 +75,7 @@ class SparkParserErrorStrategy() extends DefaultErrorStrategy {
     val exceptionWithErrorClass = new SparkRecognitionException(
       e,
       "PARSE_INPUT_MISMATCHED",
-      Array(getTokenErrorDisplay(e.getOffendingToken)))
+      Array(getUserFacingLanguage(getTokenErrorDisplay(e.getOffendingToken))))
     recognizer.notifyErrorListeners(e.getOffendingToken, msg, exceptionWithErrorClass)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
index c1c8393ce3df9..71296f0a26e4a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
@@ -98,6 +98,21 @@ class ErrorParserSuite extends AnalysisTest {
       "Syntax error at or near", "^^^")
   }
 
+  test("empty input") {
+    val expectedErrMsg = SparkThrowableHelper.getMessage("PARSE_EMPTY_STATEMENT", Array[String]())
+    intercept("", Some("PARSE_EMPTY_STATEMENT"), expectedErrMsg)
+    intercept("   ", Some("PARSE_EMPTY_STATEMENT"), expectedErrMsg)
+    intercept(" \n", Some("PARSE_EMPTY_STATEMENT"), expectedErrMsg)
+  }
+
+  test("jargon token substitute to user-facing language") {
+    // '<EOF>' -> end of input
+    intercept("select count(*", "PARSE_INPUT_MISMATCHED",
+      1, 14, 14, "Syntax error at or near end of input")
+    intercept("select 1 as a from", "PARSE_INPUT_MISMATCHED",
+      1, 18, 18, "Syntax error at or near end of input")
+  }
+
   test("semantic errors") {
     intercept("select *\nfrom r\norder by q\ncluster by q", 3, 0, 11,
       "Combination of ORDER BY/SORT BY/DISTRIBUTE BY/CLUSTER BY is not supported",
diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
index b771a54f3f8f9..70a4822ff916d 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
@@ -168,7 +168,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-Syntax error at or near '<EOF>'(line 1, pos 19)
+Syntax error at or near end of input(line 1, pos 19)
 
 == SQL ==
 SHOW TABLE EXTENDED

From f286416ee16e878de3c70a31cef20549b33aaa0a Mon Sep 17 00:00:00 2001
From: Thomas Graves <tgraves@nvidia.com>
Date: Wed, 9 Mar 2022 21:06:25 -0800
Subject: [PATCH 450/513] [SPARK-38379][K8S] Fix Kubernetes Client mode when
 mounting persistent volume with storage class

### What changes were proposed in this pull request?

Running spark-shell in client mode on Kubernetes cluster when mounting persistent volumes with a storage class results in a big warning being thrown on startup.

https://issues.apache.org/jira/browse/SPARK-38379

The issue here is there is a race condition between when spark.app.id is set in SparkContext and when its used, so change to use the KubernetesConf appId, which is what is used to set spark.app.id.

### Why are the changes needed?

Throws big warning to user and I believe the label is wrong as well.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test added.  The test fails without the fix.
Also manually tested on real k8s cluster.

Closes #35792 from tgravescs/fixVolk8s.

Authored-by: Thomas Graves <tgraves@nvidia.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../features/MountVolumesFeatureStep.scala    |  2 +-
 .../MountVolumesFeatureStepSuite.scala        | 25 +++++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
index 4e1647372ecdc..78dd6ec21ed34 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
@@ -85,7 +85,7 @@ private[spark] class MountVolumesFeatureStep(conf: KubernetesConf)
               .withApiVersion("v1")
               .withNewMetadata()
                 .withName(claimName)
-                .addToLabels(SPARK_APP_ID_LABEL, conf.sparkConf.getAppId)
+                .addToLabels(SPARK_APP_ID_LABEL, conf.appId)
                 .endMetadata()
               .withNewSpec()
                 .withStorageClassName(storageClass.get)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
index 38f8fac1858f1..468d1dde9fb6d 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
@@ -89,6 +89,31 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
     assert(executorPVC.getClaimName === s"pvc-spark-${KubernetesTestConf.EXECUTOR_ID}")
   }
 
+  test("SPARK-32713 Mounts parameterized persistentVolumeClaims in executors with storage class") {
+    val volumeConf = KubernetesVolumeSpec(
+      "testVolume",
+      "/tmp",
+      "",
+      true,
+      KubernetesPVCVolumeConf("pvc-spark-SPARK_EXECUTOR_ID", Some("fast"), Some("512mb"))
+    )
+    val driverConf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeConf))
+    val driverStep = new MountVolumesFeatureStep(driverConf)
+    val driverPod = driverStep.configurePod(SparkPod.initialPod())
+
+    assert(driverPod.pod.getSpec.getVolumes.size() === 1)
+    val driverPVC = driverPod.pod.getSpec.getVolumes.get(0).getPersistentVolumeClaim
+    assert(driverPVC.getClaimName === "pvc-spark-SPARK_EXECUTOR_ID")
+
+    val executorConf = KubernetesTestConf.createExecutorConf(volumes = Seq(volumeConf))
+    val executorStep = new MountVolumesFeatureStep(executorConf)
+    val executorPod = executorStep.configurePod(SparkPod.initialPod())
+
+    assert(executorPod.pod.getSpec.getVolumes.size() === 1)
+    val executorPVC = executorPod.pod.getSpec.getVolumes.get(0).getPersistentVolumeClaim
+    assert(executorPVC.getClaimName === s"pvc-spark-${KubernetesTestConf.EXECUTOR_ID}")
+  }
+
   test("Create and mounts persistentVolumeClaims in driver") {
     val volumeConf = KubernetesVolumeSpec(
       "testVolume",

From ec544ad284e49cbe0142ffd921cb3f9f07e9c097 Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Thu, 10 Mar 2022 13:06:36 +0800
Subject: [PATCH 451/513] [SPARK-38148][SQL] Do not add dynamic partition
 pruning if there exists static partition pruning

### What changes were proposed in this pull request?

Add check in `PartitionPruning` if the partition has static filter. So we can skip add dynamic partition pruning if there already exists.

### Why are the changes needed?

Dynamic partition pruning add a filter as long as the join condition contains partition columns. But if there exists other condition which contains the static partition pruning, it's unnecessary to add an extra dynamic partition pruning.

For example:
```sql
CREATE TABLE t1 (c1 int) USING PARQUET PARTITIONED BY (p1 string);
CREATE TABLE t2 (c2 int) USING PARQUET PARTITIONED BY (p2 string);

SELECT * FROM t1 JOIN t2 ON t1.p1 = t2.p2 and t1.p1 = 'a' AND t2.c2 > 0;
```

### Does this PR introduce _any_ user-facing change?

no, only change the plan

### How was this patch tested?

Add test in `DynamicPartitionPruningSuiteBase`

Closes #35451 from ulysses-you/dpp.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/SparkOptimizer.scala  |  2 +
 .../CleanupDynamicPruningFilters.scala        | 38 ++++++++++++++--
 .../approved-plans-v1_4/q13.sf100/explain.txt |  4 +-
 .../approved-plans-v1_4/q13/explain.txt       |  4 +-
 .../sql/DynamicPartitionPruningSuite.scala    | 45 +++++++++++++++++++
 5 files changed, 85 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
index dc3ceb5c595d0..7e8fb4a157262 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
@@ -47,6 +47,8 @@ class SparkOptimizer(
       PushDownPredicates) :+
     Batch("Cleanup filters that cannot be pushed down", Once,
       CleanupDynamicPruningFilters,
+      // cleanup the unnecessary TrueLiteral predicates
+      BooleanSimplification,
       PruneFilters)) ++
     postHocOptimizationBatches :+
     Batch("Extract Python UDFs", Once,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala
index abf0cf63a0bb0..65621fb1860e5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution.dynamicpruning
 
 import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
-import org.apache.spark.sql.catalyst.expressions.{DynamicPruning, PredicateHelper}
+import org.apache.spark.sql.catalyst.expressions.{DynamicPruning, DynamicPruningSubquery, EqualNullSafe, EqualTo, Expression, ExpressionSet, PredicateHelper}
 import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan}
@@ -34,6 +34,33 @@ import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
  */
 object CleanupDynamicPruningFilters extends Rule[LogicalPlan] with PredicateHelper {
 
+  private def collectEqualityConditionExpressions(condition: Expression): Seq[Expression] = {
+    splitConjunctivePredicates(condition).flatMap(_.collect {
+      case EqualTo(l, r) if l.deterministic && r.foldable => l
+      case EqualTo(l, r) if r.deterministic && l.foldable => r
+      case EqualNullSafe(l, r) if l.deterministic && r.foldable => l
+      case EqualNullSafe(l, r) if r.deterministic && l.foldable => r
+    })
+  }
+
+  /**
+   * If a partition key already has equality conditions, then its DPP filter is useless and
+   * can't prune anything. So we should remove it.
+   */
+  private def removeUnnecessaryDynamicPruningSubquery(plan: LogicalPlan): LogicalPlan = {
+    plan.transformWithPruning(_.containsPattern(DYNAMIC_PRUNING_SUBQUERY)) {
+      case f @ Filter(condition, _) =>
+        val unnecessaryPruningKeys = ExpressionSet(collectEqualityConditionExpressions(condition))
+        val newCondition = condition.transformWithPruning(
+          _.containsPattern(DYNAMIC_PRUNING_SUBQUERY)) {
+          case dynamicPruning: DynamicPruningSubquery
+              if unnecessaryPruningKeys.contains(dynamicPruning.pruningKey) =>
+            TrueLiteral
+        }
+        f.copy(condition = newCondition)
+    }
+  }
+
   override def apply(plan: LogicalPlan): LogicalPlan = {
     if (!conf.dynamicPartitionPruningEnabled) {
       return plan
@@ -43,10 +70,13 @@ object CleanupDynamicPruningFilters extends Rule[LogicalPlan] with PredicateHelp
       // No-op for trees that do not contain dynamic pruning.
       _.containsAnyPattern(DYNAMIC_PRUNING_EXPRESSION, DYNAMIC_PRUNING_SUBQUERY)) {
       // pass through anything that is pushed down into PhysicalOperation
-      case p @ PhysicalOperation(_, _, LogicalRelation(_: HadoopFsRelation, _, _, _)) => p
+      case p @ PhysicalOperation(_, _, LogicalRelation(_: HadoopFsRelation, _, _, _)) =>
+        removeUnnecessaryDynamicPruningSubquery(p)
       // pass through anything that is pushed down into PhysicalOperation
-      case p @ PhysicalOperation(_, _, HiveTableRelation(_, _, _, _, _)) => p
-      case p @ PhysicalOperation(_, _, _: DataSourceV2ScanRelation) => p
+      case p @ PhysicalOperation(_, _, HiveTableRelation(_, _, _, _, _)) =>
+        removeUnnecessaryDynamicPruningSubquery(p)
+      case p @ PhysicalOperation(_, _, _: DataSourceV2ScanRelation) =>
+        removeUnnecessaryDynamicPruningSubquery(p)
       // remove any Filters with DynamicPruning that didn't get pushed down to PhysicalOperation.
       case f @ Filter(condition, _) =>
         val newCondition = condition.transformWithPruning(
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt
index 7c4e7222a52e7..9d6b17e613ef1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt
@@ -81,7 +81,7 @@ Input [13]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quant
 Output [2]: [hd_demo_sk#16, hd_dep_count#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
-PushedFilters: [IsNotNull(hd_demo_sk), Or(Or(EqualTo(hd_dep_count,3),EqualTo(hd_dep_count,1)),EqualTo(hd_dep_count,1))]
+PushedFilters: [IsNotNull(hd_demo_sk), Or(EqualTo(hd_dep_count,3),EqualTo(hd_dep_count,1))]
 ReadSchema: struct<hd_demo_sk:int,hd_dep_count:int>
 
 (11) ColumnarToRow [codegen id : 2]
@@ -89,7 +89,7 @@ Input [2]: [hd_demo_sk#16, hd_dep_count#17]
 
 (12) Filter [codegen id : 2]
 Input [2]: [hd_demo_sk#16, hd_dep_count#17]
-Condition : (isnotnull(hd_demo_sk#16) AND (((hd_dep_count#17 = 3) OR (hd_dep_count#17 = 1)) OR (hd_dep_count#17 = 1)))
+Condition : (isnotnull(hd_demo_sk#16) AND ((hd_dep_count#17 = 3) OR (hd_dep_count#17 = 1)))
 
 (13) BroadcastExchange
 Input [2]: [hd_demo_sk#16, hd_dep_count#17]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt
index 31142b18a09fe..59e8cf7c4d063 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt
@@ -151,7 +151,7 @@ Input [9]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ex
 Output [2]: [hd_demo_sk#23, hd_dep_count#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
-PushedFilters: [IsNotNull(hd_demo_sk), Or(Or(EqualTo(hd_dep_count,3),EqualTo(hd_dep_count,1)),EqualTo(hd_dep_count,1))]
+PushedFilters: [IsNotNull(hd_demo_sk), Or(EqualTo(hd_dep_count,3),EqualTo(hd_dep_count,1))]
 ReadSchema: struct<hd_demo_sk:int,hd_dep_count:int>
 
 (27) ColumnarToRow [codegen id : 5]
@@ -159,7 +159,7 @@ Input [2]: [hd_demo_sk#23, hd_dep_count#24]
 
 (28) Filter [codegen id : 5]
 Input [2]: [hd_demo_sk#23, hd_dep_count#24]
-Condition : (isnotnull(hd_demo_sk#23) AND (((hd_dep_count#24 = 3) OR (hd_dep_count#24 = 1)) OR (hd_dep_count#24 = 1)))
+Condition : (isnotnull(hd_demo_sk#23) AND ((hd_dep_count#24 = 3) OR (hd_dep_count#24 = 1)))
 
 (29) BroadcastExchange
 Input [2]: [hd_demo_sk#23, hd_dep_count#24]
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
index c67b05a5ca238..3569775b72628 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
@@ -1483,6 +1483,51 @@ abstract class DynamicPartitionPruningSuiteBase
       checkAnswer(df, Row(1150, 1) :: Row(1130, 4) :: Row(1140, 4) :: Nil)
     }
   }
+
+  test("SPARK-38148: Do not add dynamic partition pruning if there exists static partition " +
+    "pruning") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+      Seq(
+        "f.store_id = 1" -> false,
+        "1 = f.store_id" -> false,
+        "f.store_id <=> 1" -> false,
+        "1 <=> f.store_id" -> false,
+        "f.store_id > 1" -> true,
+        "5 > f.store_id" -> true).foreach { case (condition, hasDPP) =>
+        // partitioned table at left side
+        val df1 = sql(
+          s"""
+             |SELECT /*+ broadcast(s) */ * FROM fact_sk f
+             |JOIN dim_store s ON f.store_id = s.store_id AND $condition
+            """.stripMargin)
+        checkPartitionPruningPredicate(df1, false, withBroadcast = hasDPP)
+
+        val df2 = sql(
+          s"""
+             |SELECT /*+ broadcast(s) */ * FROM fact_sk f
+             |JOIN dim_store s ON f.store_id = s.store_id
+             |WHERE $condition
+            """.stripMargin)
+        checkPartitionPruningPredicate(df2, false, withBroadcast = hasDPP)
+
+        // partitioned table at right side
+        val df3 = sql(
+          s"""
+             |SELECT /*+ broadcast(s) */ * FROM dim_store s
+             |JOIN fact_sk f ON f.store_id = s.store_id AND $condition
+            """.stripMargin)
+        checkPartitionPruningPredicate(df3, false, withBroadcast = hasDPP)
+
+        val df4 = sql(
+          s"""
+             |SELECT /*+ broadcast(s) */ * FROM dim_store s
+             |JOIN fact_sk f ON f.store_id = s.store_id
+             |WHERE $condition
+            """.stripMargin)
+        checkPartitionPruningPredicate(df4, false, withBroadcast = hasDPP)
+      }
+    }
+  }
 }
 
 abstract class DynamicPartitionPruningDataSourceSuiteBase

From e5a86a370fa43112a23e2ce95c6eb764b29b1992 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Wed, 9 Mar 2022 21:35:30 -0800
Subject: [PATCH 452/513] [SPARK-38453][K8S][DOCS] Add `volcano` section to K8s
 IT `README.md`

### What changes were proposed in this pull request?
Add K8S IT doc for volcano test
- Install Volcano according to [link](https://volcano.sh/en/docs/installation/).
- A minimum of 6 CPUs and 9G of memory is required to complete all Volcano test cases.

### Why are the changes needed?
Guide developers to run volcano test

### Does this PR introduce _any_ user-facing change?
No, doc only

### How was this patch tested?
```
[info] VolcanoSuite:
[info] - Run SparkPi with no resources (11 seconds, 363 milliseconds)
[info] - Run SparkPi with no resources & statefulset allocation (12 seconds, 181 milliseconds)
[info] - Run SparkPi with a very long application name. (10 seconds, 876 milliseconds)
[info] - Use SparkLauncher.NO_RESOURCE (11 seconds, 932 milliseconds)
[info] - Run SparkPi with a master URL without a scheme. (10 seconds, 756 milliseconds)
[info] - Run SparkPi with an argument. (10 seconds, 989 milliseconds)
[info] - Run SparkPi with custom labels, annotations, and environment variables. (13 seconds, 562 milliseconds)
[info] - All pods have the same service account by default (10 seconds, 703 milliseconds)
[info] - Run extraJVMOptions check on driver (5 seconds, 625 milliseconds)
[info] - Run SparkRemoteFileTest using a remote data file (10 seconds, 795 milliseconds)
[info] - Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j2.properties (16 seconds, 211 milliseconds)
[info] - Run SparkPi with env and mount secrets. (19 seconds, 830 milliseconds)
[info] - Run PySpark on simple pi.py example (11 seconds, 677 milliseconds)
[info] - Run PySpark to test a pyfiles example (16 seconds, 518 milliseconds)
[info] - Run PySpark with memory customization (11 seconds, 920 milliseconds)
[info] - Run in client mode. (10 seconds, 330 milliseconds)
[info] - Start pod creation from template (13 seconds, 8 milliseconds)
[info] - SPARK-38398: Schedule pod creation from template (12 seconds, 59 milliseconds)
[info] - Test basic decommissioning (45 seconds, 509 milliseconds)
[info] - Test basic decommissioning with shuffle cleanup (44 seconds, 664 milliseconds)
[info] - Test decommissioning with dynamic allocation & shuffle cleanups (2 minutes, 43 seconds)
[info] - Test decommissioning timeouts (47 seconds, 531 milliseconds)
[info] - SPARK-37576: Rolling decommissioning (1 minute, 7 seconds)
[info] - Run SparkPi with volcano scheduler (10 seconds, 844 milliseconds)
[info] - SPARK-38187: Run SparkPi Jobs with minCPU (32 seconds, 654 milliseconds)
[info] - SPARK-38187: Run SparkPi Jobs with minMemory (32 seconds, 610 milliseconds)
[info] - SPARK-38188: Run SparkPi jobs with 2 queues (only 1 enabled) (14 seconds, 323 milliseconds)
[info] - SPARK-38188: Run SparkPi jobs with 2 queues (all enabled) (26 seconds, 385 milliseconds)
[info] - SPARK-38423: Run SparkPi Jobs with priorityClassName (20 seconds, 209 milliseconds)
[info] - SPARK-38423: Run driver job to validate priority order (17 seconds, 427 milliseconds)
[info] Run completed in 12 minutes, 58 seconds.
[info] Total number of tests run: 30
[info] Suites: completed 1, aborted 0
[info] Tests: succeeded 30, failed 0, canceled 0, ignored 0, pending 0
[info] All tests passed.

$ k get node -oymal
    capacity:
      cpu: "6"
      memory: 9159716Ki
```

All test passed in 6U9G cluster.

Closes #35773 from Yikun/SPARK-38453.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../kubernetes/integration-tests/README.md    | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md
index ac82282526fe3..959829373e9a9 100644
--- a/resource-managers/kubernetes/integration-tests/README.md
+++ b/resource-managers/kubernetes/integration-tests/README.md
@@ -305,3 +305,24 @@ You can also specify your specific dockerfile to build JVM/Python/R based image
         -Dspark.kubernetes.test.pyDockerFile=/path/to/py/Dockerfile \
         -Dspark.kubernetes.test.rDockerFile=/path/to/r/Dockerfile \
         'kubernetes-integration-tests/test'
+
+# Running the Volcano Integration Tests
+
+Prerequisites
+- Install Volcano according to [link](https://volcano.sh/en/docs/installation/).
+- A minimum of 6 CPUs and 9G of memory is required to complete all Volcano test cases.
+
+You can specify `-Pvolcano` to enable volcano module to run all Kubernetes and Volcano tests
+
+    build/sbt -Pvolcano -Pkubernetes -Pkubernetes-integration-tests \
+        -Dtest.exclude.tags=minikube \
+        -Dspark.kubernetes.test.deployMode=docker-desktop \
+        'kubernetes-integration-tests/test'
+
+You can also specify `volcano` tag to only run Volcano test:
+
+    build/sbt -Pvolcano -Pkubernetes -Pkubernetes-integration-tests \
+        -Dtest.include.tags=volcano \
+        -Dtest.exclude.tags=minikube \
+        -Dspark.kubernetes.test.deployMode=docker-desktop \
+        'kubernetes-integration-tests/test'

From c483e2977cbc6ae33d999c9c9d1dbacd9c53d85a Mon Sep 17 00:00:00 2001
From: Xinrong Meng <xinrong.meng@databricks.com>
Date: Thu, 10 Mar 2022 15:32:48 +0900
Subject: [PATCH 453/513] [SPARK-38487][PYTHON][DOC] Fix docstrings of
 nlargest/nsmallest of DataFrame

### What changes were proposed in this pull request?
Fix docstrings of nlargest/nsmallest of DataFrame

### Why are the changes needed?
To make docstring less confusing.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manual test.

Closes #35793 from xinrong-databricks/frame.ntop.

Authored-by: Xinrong Meng <xinrong.meng@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/pandas/frame.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index d4803eb60261b..64a64711b4e17 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -7283,7 +7283,7 @@ def _swaplevel_index(self, i: Union[int, Name], j: Union[int, Name]) -> Internal
         )
         return internal
 
-    # TODO:  add keep = First
+    # TODO: add keep = First
     def nlargest(self, n: int, columns: Union[Name, List[Name]]) -> "DataFrame":
         """
         Return the first `n` rows ordered by `columns` in descending order.
@@ -7340,7 +7340,7 @@ def nlargest(self, n: int, columns: Union[Name, List[Name]]) -> "DataFrame":
         6  NaN  12
 
         In the following example, we will use ``nlargest`` to select the three
-        rows having the largest values in column "population".
+        rows having the largest values in column "X".
 
         >>> df.nlargest(n=3, columns='X')
              X   Y
@@ -7348,12 +7348,14 @@ def nlargest(self, n: int, columns: Union[Name, List[Name]]) -> "DataFrame":
         4  6.0  10
         3  5.0   9
 
+        To order by the largest values in column "Y" and then "X", we can
+        specify multiple columns like in the next example.
+
         >>> df.nlargest(n=3, columns=['Y', 'X'])
              X   Y
         6  NaN  12
         5  7.0  11
         4  6.0  10
-
         """
         return self.sort_values(by=columns, ascending=False).head(n=n)
 
@@ -7403,7 +7405,7 @@ def nsmallest(self, n: int, columns: Union[Name, List[Name]]) -> "DataFrame":
         6  NaN  12
 
         In the following example, we will use ``nsmallest`` to select the
-        three rows having the smallest values in column "a".
+        three rows having the smallest values in column "X".
 
         >>> df.nsmallest(n=3, columns='X') # doctest: +NORMALIZE_WHITESPACE
              X   Y
@@ -7411,7 +7413,7 @@ def nsmallest(self, n: int, columns: Union[Name, List[Name]]) -> "DataFrame":
         1  2.0   7
         2  3.0   8
 
-        To order by the largest values in column "a" and then "c", we can
+        To order by the smallest values in column "Y" and then "X", we can
         specify multiple columns like in the next example.
 
         >>> df.nsmallest(n=3, columns=['Y', 'X']) # doctest: +NORMALIZE_WHITESPACE

From 3ab2455e9d1ba4a25a03ddb1e932e2980b8230d8 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 10 Mar 2022 02:38:13 -0800
Subject: [PATCH 454/513] [SPARK-38499][BUILD] Upgrade Jackson to 2.13.2

### What changes were proposed in this pull request?

This PR aims to upgrade Jackson to 2.13.2.

### Why are the changes needed?

This brings the following latest bug fixes for Apache Spark 3.3.0.
- https://github.com/FasterXML/jackson/wiki/Jackson-Release-2.13.2

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

Closes #35800 from dongjoon-hyun/SPARK-38499.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2-hive-2.3 | 14 +++++++-------
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 14 +++++++-------
 pom.xml                               |  2 +-
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index 7c657469a0b66..6dbababb202af 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -112,16 +112,16 @@ httpclient/4.5.13//httpclient-4.5.13.jar
 httpcore/4.4.14//httpcore-4.4.14.jar
 istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
 ivy/2.5.0//ivy-2.5.0.jar
-jackson-annotations/2.13.1//jackson-annotations-2.13.1.jar
+jackson-annotations/2.13.2//jackson-annotations-2.13.2.jar
 jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
-jackson-core/2.13.1//jackson-core-2.13.1.jar
-jackson-databind/2.13.1//jackson-databind-2.13.1.jar
-jackson-dataformat-cbor/2.13.1//jackson-dataformat-cbor-2.13.1.jar
-jackson-dataformat-yaml/2.13.1//jackson-dataformat-yaml-2.13.1.jar
+jackson-core/2.13.2//jackson-core-2.13.2.jar
+jackson-databind/2.13.2//jackson-databind-2.13.2.jar
+jackson-dataformat-cbor/2.13.2//jackson-dataformat-cbor-2.13.2.jar
+jackson-dataformat-yaml/2.13.2//jackson-dataformat-yaml-2.13.2.jar
 jackson-datatype-jsr310/2.13.1//jackson-datatype-jsr310-2.13.1.jar
 jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar
 jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
-jackson-module-scala_2.12/2.13.1//jackson-module-scala_2.12-2.13.1.jar
+jackson-module-scala_2.12/2.13.2//jackson-module-scala_2.12-2.13.2.jar
 jackson-xc/1.9.13//jackson-xc-1.9.13.jar
 jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar
 jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar
@@ -245,7 +245,7 @@ scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar
 shapeless_2.12/2.3.7//shapeless_2.12-2.3.7.jar
 shims/0.9.25//shims-0.9.25.jar
 slf4j-api/1.7.32//slf4j-api-1.7.32.jar
-snakeyaml/1.28//snakeyaml-1.28.jar
+snakeyaml/1.30//snakeyaml-1.30.jar
 snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar
 spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar
 spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 41b1deeeca1b3..46f1bc019b608 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -101,15 +101,15 @@ httpcore/4.4.14//httpcore-4.4.14.jar
 ini4j/0.5.4//ini4j-0.5.4.jar
 istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
 ivy/2.5.0//ivy-2.5.0.jar
-jackson-annotations/2.13.1//jackson-annotations-2.13.1.jar
+jackson-annotations/2.13.2//jackson-annotations-2.13.2.jar
 jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
-jackson-core/2.13.1//jackson-core-2.13.1.jar
-jackson-databind/2.13.1//jackson-databind-2.13.1.jar
-jackson-dataformat-cbor/2.13.1//jackson-dataformat-cbor-2.13.1.jar
-jackson-dataformat-yaml/2.13.1//jackson-dataformat-yaml-2.13.1.jar
+jackson-core/2.13.2//jackson-core-2.13.2.jar
+jackson-databind/2.13.2//jackson-databind-2.13.2.jar
+jackson-dataformat-cbor/2.13.2//jackson-dataformat-cbor-2.13.2.jar
+jackson-dataformat-yaml/2.13.2//jackson-dataformat-yaml-2.13.2.jar
 jackson-datatype-jsr310/2.13.1//jackson-datatype-jsr310-2.13.1.jar
 jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
-jackson-module-scala_2.12/2.13.1//jackson-module-scala_2.12-2.13.1.jar
+jackson-module-scala_2.12/2.13.2//jackson-module-scala_2.12-2.13.2.jar
 jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar
 jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar
 jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar
@@ -233,7 +233,7 @@ scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar
 shapeless_2.12/2.3.7//shapeless_2.12-2.3.7.jar
 shims/0.9.25//shims-0.9.25.jar
 slf4j-api/1.7.32//slf4j-api-1.7.32.jar
-snakeyaml/1.28//snakeyaml-1.28.jar
+snakeyaml/1.30//snakeyaml-1.30.jar
 snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar
 spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar
 spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar
diff --git a/pom.xml b/pom.xml
index 414a749161ce6..2c9ee3412cc43 100644
--- a/pom.xml
+++ b/pom.xml
@@ -175,7 +175,7 @@
     <!-- for now, not running scalafmt as part of default verify pipeline -->
     <scalafmt.skip>true</scalafmt.skip>
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
-    <fasterxml.jackson.version>2.13.1</fasterxml.jackson.version>
+    <fasterxml.jackson.version>2.13.2</fasterxml.jackson.version>
     <snappy.version>1.1.8.4</snappy.version>
     <netlib.java.version>1.1.2</netlib.java.version>
     <netlib.ludovic.dev.version>2.2.1</netlib.ludovic.dev.version>

From bcf7849864baf5df0601bf63ca150bc8ea1fa6ac Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Thu, 10 Mar 2022 20:11:25 +0800
Subject: [PATCH 455/513] [SPARK-38489][SQL] Aggregate.groupOnly support
 foldable expressions

### What changes were proposed in this pull request?

This pr makes `Aggregate.groupOnly` support foldable expressions.

### Why are the changes needed?

Improve query performance.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #35795 from wangyum/SPARK-38489.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../plans/logical/basicLogicalOperators.scala   |  2 +-
 .../optimizer/AggregateOptimizeSuite.scala      | 17 +++++++++++++++--
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 02d6a1d3cce76..5b601fbd5eed6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -1003,7 +1003,7 @@ case class Aggregate(
     groupingExpressions.nonEmpty && aggregateExpressions.map {
       case Alias(child, _) => child
       case e => e
-    }.forall(a => groupingExpressions.exists(g => a.semanticEquals(g)))
+    }.forall(a => a.foldable || groupingExpressions.exists(g => a.semanticEquals(g)))
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala
index 7981dda495de4..1db04d2f5a7ce 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.analysis.AnalysisTest
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.plans.{LeftOuter, RightOuter}
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Distinct, LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -122,8 +123,7 @@ class AggregateOptimizeSuite extends AnalysisTest {
       Optimize.execute(
         x.join(y, LeftOuter, Some("x.a".attr === "y.a".attr))
           .groupBy("x.a".attr)("x.a".attr, Literal(1)).analyze),
-      x.join(y, LeftOuter, Some("x.a".attr === "y.a".attr))
-        .groupBy("x.a".attr)("x.a".attr, Literal(1)).analyze)
+      x.groupBy("x.a".attr)("x.a".attr, Literal(1)).analyze)
   }
 
   test("SPARK-37292: Removes outer join if it only has DISTINCT on streamed side with alias") {
@@ -148,4 +148,17 @@ class AggregateOptimizeSuite extends AnalysisTest {
       x.select("x.b".attr.as("newAlias1"), "x.b".attr.as("newAlias2"))
         .groupBy("newAlias1".attr, "newAlias2".attr)("newAlias1".attr, "newAlias2".attr).analyze)
   }
+
+  test("SPARK-38489: Aggregate.groupOnly support foldable expressions") {
+    val x = testRelation.subquery('x)
+    val y = testRelation.subquery('y)
+    comparePlans(
+      Optimize.execute(
+        Distinct(x.join(y, LeftOuter, Some("x.a".attr === "y.a".attr))
+          .select("x.b".attr, TrueLiteral, FalseLiteral.as("newAlias")))
+          .analyze),
+      x.select("x.b".attr, TrueLiteral, FalseLiteral.as("newAlias"))
+        .groupBy("x.b".attr)("x.b".attr, TrueLiteral, FalseLiteral.as("newAlias"))
+        .analyze)
+  }
 }

From 538c81bb7afea3db56ad7de1bc11d32c10c0688e Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Thu, 10 Mar 2022 15:24:38 +0300
Subject: [PATCH 456/513] [SPARK-38481][SQL] Substitute Java overflow exception
 from `TIMESTAMPADD` by Spark exception

### What changes were proposed in this pull request?
In the PR, I propose to throw `SparkArithmeticException` from the datetime function: `timestampadd()` and from its aliases `date_add()`/`dateadd()` with the error class `DATETIME_OVERFLOW` in the case when internal arithmetic or datetime overflow occurs.  The new error classes are added to `error-classes.json`.

### Why are the changes needed?
Porting the functions to new error framework should improve user experience with Spark SQL.

Before the changes:
```sql
spark-sql> select timestampadd(YEAR, 1000000, timestamp'2022-03-09 01:02:03');
java.lang.ArithmeticException: long overflow
	at java.lang.Math.multiplyExact(Math.java:892) ~[?:1.8.0_292]
```

After:
```sql
spark-sql> select timestampadd(YEAR, 1000000, timestamp'2022-03-09 01:02:03');
org.apache.spark.SparkArithmeticException: The 'timestampadd' function overflows the input '2022-03-08T22:02:03Z' timestamp by 1000000 YEAR.
```

### Does this PR introduce _any_ user-facing change?
Yes, but the datetime functions `timestampadd()` and its aliases `dateadd()`/`date_add()` haven't released yet.

### How was this patch tested?
By running the affected test suites:
```
$ build/sbt "test:testOnly *SparkThrowableSuite"
```
and new test:
```
$ build/sbt "test:testOnly *QueryExecutionErrorsSuite"
```

Closes #35787 from MaxGekk/timestamp_add_diff-overflow.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../main/resources/error/error-classes.json   |  4 ++
 .../sql/catalyst/util/DateTimeUtils.scala     | 51 +++++++++++--------
 .../sql/errors/QueryExecutionErrors.scala     |  8 ++-
 .../errors/QueryExecutionErrorsSuite.scala    | 12 ++++-
 4 files changed, 51 insertions(+), 24 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index a0e63270b5a64..3667bb2a4c7e4 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -25,6 +25,10 @@
   "CONCURRENT_QUERY" : {
     "message" : [ "Another instance of this query was just started by a concurrent session." ]
   },
+  "DATETIME_OVERFLOW" : {
+    "message" : [ "Datetime operation overflow: %s." ],
+    "sqlState" : "22008"
+  },
   "DIVIDE_BY_ZERO" : {
     "message" : [ "divide by zero. To return NULL instead, use 'try_divide'. If necessary set %s to false (except for ANSI interval type) to bypass this error." ],
     "sqlState" : "22012"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 7d2ead0c5f840..80e31f3d5346e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -1196,29 +1196,36 @@ object DateTimeUtils {
    * @return A timestamp value, expressed in microseconds since 1970-01-01 00:00:00Z.
    */
   def timestampAdd(unit: String, quantity: Int, micros: Long, zoneId: ZoneId): Long = {
-    unit.toUpperCase(Locale.ROOT) match {
-      case "MICROSECOND" =>
-        timestampAddDayTime(micros, quantity, zoneId)
-      case "MILLISECOND" =>
-        timestampAddDayTime(micros, quantity * MICROS_PER_MILLIS, zoneId)
-      case "SECOND" =>
-        timestampAddDayTime(micros, quantity * MICROS_PER_SECOND, zoneId)
-      case "MINUTE" =>
-        timestampAddDayTime(micros, quantity * MICROS_PER_MINUTE, zoneId)
-      case "HOUR" =>
-        timestampAddDayTime(micros, quantity * MICROS_PER_HOUR, zoneId)
-      case "DAY" | "DAYOFYEAR" =>
-        timestampAddDayTime(micros, quantity * MICROS_PER_DAY, zoneId)
-      case "WEEK" =>
-        timestampAddDayTime(micros, quantity * MICROS_PER_DAY * DAYS_PER_WEEK, zoneId)
-      case "MONTH" =>
-        timestampAddMonths(micros, quantity, zoneId)
-      case "QUARTER" =>
-        timestampAddMonths(micros, quantity * 3, zoneId)
-      case "YEAR" =>
-        timestampAddMonths(micros, quantity * MONTHS_PER_YEAR, zoneId)
-      case _ =>
+    try {
+      unit.toUpperCase(Locale.ROOT) match {
+        case "MICROSECOND" =>
+          timestampAddDayTime(micros, quantity, zoneId)
+        case "MILLISECOND" =>
+          timestampAddDayTime(micros, quantity * MICROS_PER_MILLIS, zoneId)
+        case "SECOND" =>
+          timestampAddDayTime(micros, quantity * MICROS_PER_SECOND, zoneId)
+        case "MINUTE" =>
+          timestampAddDayTime(micros, quantity * MICROS_PER_MINUTE, zoneId)
+        case "HOUR" =>
+          timestampAddDayTime(micros, quantity * MICROS_PER_HOUR, zoneId)
+        case "DAY" | "DAYOFYEAR" =>
+          timestampAddDayTime(micros, quantity * MICROS_PER_DAY, zoneId)
+        case "WEEK" =>
+          timestampAddDayTime(micros, quantity * MICROS_PER_DAY * DAYS_PER_WEEK, zoneId)
+        case "MONTH" =>
+          timestampAddMonths(micros, quantity, zoneId)
+        case "QUARTER" =>
+          timestampAddMonths(micros, quantity * 3, zoneId)
+        case "YEAR" =>
+          timestampAddMonths(micros, quantity * MONTHS_PER_YEAR, zoneId)
+      }
+    } catch {
+      case _: scala.MatchError =>
         throw QueryExecutionErrors.invalidUnitInTimestampAdd(unit)
+      case _: ArithmeticException | _: DateTimeException =>
+        throw QueryExecutionErrors.timestampAddOverflowError(micros, quantity, unit)
+      case e: Throwable =>
+        throw new IllegalStateException(s"Failure of 'timestampAdd': ${e.getMessage}")
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 62b961604a25a..49861304a78ab 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -48,7 +48,7 @@ import org.apache.spark.sql.catalyst.plans.JoinType
 import org.apache.spark.sql.catalyst.plans.logical.{DomainJoin, LogicalPlan}
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.ValueInterval
 import org.apache.spark.sql.catalyst.trees.TreeNode
-import org.apache.spark.sql.catalyst.util.{sideBySide, BadRecordException, FailFastMode}
+import org.apache.spark.sql.catalyst.util.{sideBySide, BadRecordException, DateTimeUtils, FailFastMode}
 import org.apache.spark.sql.connector.catalog.{CatalogNotFoundException, Identifier, Table, TableProvider}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.expressions.Transform
@@ -1997,4 +1997,10 @@ object QueryExecutionErrors {
       errorClass = "INVALID_PARAMETER_VALUE",
       messageParameters = Array("unit", "timestampdiff", unit))
   }
+
+  def timestampAddOverflowError(micros: Long, amount: Int, unit: String): ArithmeticException = {
+    new SparkArithmeticException(
+      errorClass = "DATETIME_OVERFLOW",
+      messageParameters = Array(s"add $amount $unit to '${DateTimeUtils.microsToInstant(micros)}'"))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index eb1b06647aaec..ea683ab176786 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.errors
 
 import java.sql.Timestamp
 
-import org.apache.spark.{SparkException, SparkIllegalArgumentException, SparkRuntimeException, SparkUnsupportedOperationException, SparkUpgradeException}
+import org.apache.spark.{SparkArithmeticException, SparkException, SparkIllegalArgumentException, SparkRuntimeException, SparkUnsupportedOperationException, SparkUpgradeException}
 import org.apache.spark.sql.{DataFrame, QueryTest, Row}
 import org.apache.spark.sql.execution.datasources.orc.OrcTest
 import org.apache.spark.sql.execution.datasources.parquet.ParquetTest
@@ -284,4 +284,14 @@ class QueryExecutionErrorsSuite extends QueryTest
       }
     }
   }
+
+  test("DATETIME_OVERFLOW: timestampadd() overflows its input timestamp") {
+    val e = intercept[SparkArithmeticException] {
+      sql("select timestampadd(YEAR, 1000000, timestamp'2022-03-09 01:02:03')").collect()
+    }
+    assert(e.getErrorClass === "DATETIME_OVERFLOW")
+    assert(e.getSqlState === "22008")
+    assert(e.getMessage ===
+      "Datetime operation overflow: add 1000000 YEAR to '2022-03-09T09:02:03Z'.")
+  }
 }

From 5cbd9b4ddaea2d7debc5d69057598510789e72c5 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Thu, 10 Mar 2022 21:58:52 +0900
Subject: [PATCH 457/513] [SPARK-38500][INFRA] Add ASF License header to all
 Service Provider configuration files

### What changes were proposed in this pull request?

Add ASF License header to all Service Provider configuration files

### Why are the changes needed?

to meet Apache Project Maturity Model - LC10

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

`dev/check-license` for license check

`build/sbt -Phive  "sql/testOnly *SparkSessionExtensionSuite"` for functionality

Closes #35801 from yaooqinn/SPARK-38500.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 ...ache.spark.deploy.history.EventFilterBuilder | 17 +++++++++++++++++
 ...spark.security.HadoopDelegationTokenProvider | 17 +++++++++++++++++
 ...pache.spark.scheduler.ExternalClusterManager | 17 +++++++++++++++++
 ...spark.security.HadoopDelegationTokenProvider | 17 +++++++++++++++++
 dev/.rat-excludes                               |  1 -
 ...che.spark.sql.SparkSessionExtensionsProvider | 17 +++++++++++++++++
 ...apache.spark.sql.jdbc.JdbcConnectionProvider | 17 +++++++++++++++++
 ....apache.spark.sql.sources.DataSourceRegister | 17 +++++++++++++++++
 ....apache.spark.sql.sources.DataSourceRegister | 17 +++++++++++++++++
 ...spark.security.HadoopDelegationTokenProvider | 17 +++++++++++++++++
 .../org.apache.spark.ml.util.MLFormatRegister   | 17 +++++++++++++++++
 ....apache.spark.sql.sources.DataSourceRegister | 17 +++++++++++++++++
 .../org.apache.spark.ml.util.MLFormatRegister   | 17 +++++++++++++++++
 ...org.apache.spark.deploy.SparkSubmitOperation | 17 +++++++++++++++++
 ...pache.spark.scheduler.ExternalClusterManager | 17 +++++++++++++++++
 ...pache.spark.scheduler.ExternalClusterManager | 17 +++++++++++++++++
 ...pache.spark.scheduler.ExternalClusterManager | 17 +++++++++++++++++
 ...ache.spark.deploy.history.EventFilterBuilder | 17 +++++++++++++++++
 ...apache.spark.sql.jdbc.JdbcConnectionProvider | 17 +++++++++++++++++
 ....apache.spark.sql.sources.DataSourceRegister | 17 +++++++++++++++++
 ...g.apache.spark.status.AppHistoryServerPlugin | 17 +++++++++++++++++
 ....apache.hadoop.crypto.key.KeyProviderFactory |  4 +++-
 ...che.spark.sql.SparkSessionExtensionsProvider | 17 +++++++++++++++++
 ...apache.spark.sql.jdbc.JdbcConnectionProvider | 17 +++++++++++++++++
 ....apache.spark.sql.sources.DataSourceRegister | 17 +++++++++++++++++
 ...g.apache.spark.status.AppHistoryServerPlugin | 17 +++++++++++++++++
 ...spark.security.HadoopDelegationTokenProvider | 17 +++++++++++++++++
 ....apache.spark.sql.sources.DataSourceRegister | 17 +++++++++++++++++
 28 files changed, 445 insertions(+), 2 deletions(-)

diff --git a/core/src/main/resources/META-INF/services/org.apache.spark.deploy.history.EventFilterBuilder b/core/src/main/resources/META-INF/services/org.apache.spark.deploy.history.EventFilterBuilder
index 784e58270ab42..e349eac3d0d07 100644
--- a/core/src/main/resources/META-INF/services/org.apache.spark.deploy.history.EventFilterBuilder
+++ b/core/src/main/resources/META-INF/services/org.apache.spark.deploy.history.EventFilterBuilder
@@ -1 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.deploy.history.BasicEventFilterBuilder
\ No newline at end of file
diff --git a/core/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider b/core/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
index c1f2060cabcff..3c2e241793d8e 100644
--- a/core/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
+++ b/core/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
@@ -1,2 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.deploy.security.HadoopFSDelegationTokenProvider
 org.apache.spark.deploy.security.HBaseDelegationTokenProvider
diff --git a/core/src/test/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager b/core/src/test/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager
index 33b162eb274c1..3ff68027f915d 100644
--- a/core/src/test/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager
+++ b/core/src/test/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager
@@ -1,3 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.scheduler.DummyExternalClusterManager
 org.apache.spark.scheduler.MockExternalClusterManager
 org.apache.spark.scheduler.CSMockExternalClusterManager
diff --git a/core/src/test/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider b/core/src/test/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
index f4107befc825b..ed3908e95e4cb 100644
--- a/core/src/test/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
+++ b/core/src/test/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
@@ -1 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.deploy.security.ExceptionThrowingDelegationTokenProvider
diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index 770b0d2651bd5..7fdc3839d8a4f 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -112,7 +112,6 @@ spark-deps-.*
 .*\.tsv
 .*\.sql
 .Rbuildignore
-META-INF/*
 spark-warehouse
 structured-streaming/*
 kafka-source-initial-offset-version-2.1.0.bin
diff --git a/examples/src/main/resources/META-INF/services/org.apache.spark.sql.SparkSessionExtensionsProvider b/examples/src/main/resources/META-INF/services/org.apache.spark.sql.SparkSessionExtensionsProvider
index c239843a3b502..7a65f53236933 100644
--- a/examples/src/main/resources/META-INF/services/org.apache.spark.sql.SparkSessionExtensionsProvider
+++ b/examples/src/main/resources/META-INF/services/org.apache.spark.sql.SparkSessionExtensionsProvider
@@ -1 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.examples.extensions.SessionExtensionsWithLoader
diff --git a/examples/src/main/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider b/examples/src/main/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider
index 776948cc04de7..ccfca6eafce86 100644
--- a/examples/src/main/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider
+++ b/examples/src/main/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider
@@ -1 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.examples.sql.jdbc.ExampleJdbcConnectionProvider
\ No newline at end of file
diff --git a/external/avro/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/external/avro/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index d89f963059642..c61270406c3f3 100644
--- a/external/avro/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/external/avro/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -1 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.sql.v2.avro.AvroDataSourceV2
diff --git a/external/kafka-0-10-sql/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/external/kafka-0-10-sql/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index 2f9e9fc0396d5..e096f120b8926 100644
--- a/external/kafka-0-10-sql/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/external/kafka-0-10-sql/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -1 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.sql.kafka010.KafkaSourceProvider
diff --git a/external/kafka-0-10-token-provider/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider b/external/kafka-0-10-token-provider/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
index 34014016584de..ff1987503183f 100644
--- a/external/kafka-0-10-token-provider/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
+++ b/external/kafka-0-10-token-provider/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
@@ -1 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.kafka010.KafkaDelegationTokenProvider
diff --git a/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.util.MLFormatRegister b/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.util.MLFormatRegister
index f14431d50feec..9fe4f4691ae24 100644
--- a/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.util.MLFormatRegister
+++ b/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.util.MLFormatRegister
@@ -1,3 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.ml.regression.InternalLinearRegressionModelWriter
 org.apache.spark.ml.regression.PMMLLinearRegressionModelWriter
 org.apache.spark.ml.clustering.InternalKMeansModelWriter
diff --git a/mllib/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/mllib/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index a7dfd2d5c1e70..c94ad5cfab07a 100644
--- a/mllib/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/mllib/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -1,2 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.ml.source.libsvm.LibSVMFileFormat
 org.apache.spark.ml.source.image.ImageFileFormat
diff --git a/mllib/src/test/resources/META-INF/services/org.apache.spark.ml.util.MLFormatRegister b/mllib/src/test/resources/META-INF/services/org.apache.spark.ml.util.MLFormatRegister
index 100ef2545418f..f458bb5bd0844 100644
--- a/mllib/src/test/resources/META-INF/services/org.apache.spark.ml.util.MLFormatRegister
+++ b/mllib/src/test/resources/META-INF/services/org.apache.spark.ml.util.MLFormatRegister
@@ -1,3 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.ml.util.DuplicateLinearRegressionWriter1
 org.apache.spark.ml.util.DuplicateLinearRegressionWriter2
 org.apache.spark.ml.util.FakeLinearRegressionWriterWithName
diff --git a/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.SparkSubmitOperation b/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.SparkSubmitOperation
index d589e6b60f847..057c234287469 100644
--- a/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.SparkSubmitOperation
+++ b/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.SparkSubmitOperation
@@ -1 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.deploy.k8s.submit.K8SSparkSubmitOperation
\ No newline at end of file
diff --git a/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager b/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager
index 81d14766ffb8d..72cb48ec46478 100644
--- a/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager
+++ b/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager
@@ -1 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.scheduler.cluster.k8s.KubernetesClusterManager
diff --git a/resource-managers/mesos/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager b/resource-managers/mesos/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager
index 12b6d5b64d68c..f83bfa166bec8 100644
--- a/resource-managers/mesos/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager
+++ b/resource-managers/mesos/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager
@@ -1 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.scheduler.cluster.mesos.MesosClusterManager
diff --git a/resource-managers/yarn/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager b/resource-managers/yarn/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager
index 6e8a1ebfc61da..3759c3f197a9c 100644
--- a/resource-managers/yarn/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager
+++ b/resource-managers/yarn/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager
@@ -1 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.scheduler.cluster.YarnClusterManager
diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.deploy.history.EventFilterBuilder b/sql/core/src/main/resources/META-INF/services/org.apache.spark.deploy.history.EventFilterBuilder
index 5025616b752d1..03cddd94645d6 100644
--- a/sql/core/src/main/resources/META-INF/services/org.apache.spark.deploy.history.EventFilterBuilder
+++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.deploy.history.EventFilterBuilder
@@ -1 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.sql.execution.history.SQLEventFilterBuilder
\ No newline at end of file
diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider
index 6e42517a6d40c..b3f30a6650017 100644
--- a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider
+++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider
@@ -1,3 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.sql.execution.datasources.jdbc.connection.BasicConnectionProvider
 org.apache.spark.sql.execution.datasources.jdbc.connection.DB2ConnectionProvider
 org.apache.spark.sql.execution.datasources.jdbc.connection.MariaDBConnectionProvider
diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index fe4554a9c50b3..1365134641758 100644
--- a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -1,3 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.sql.execution.datasources.v2.csv.CSVDataSourceV2
 org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider
 org.apache.spark.sql.execution.datasources.v2.json.JsonDataSourceV2
diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin b/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin
index 6771eef525307..2fca64c565d16 100644
--- a/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin
+++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin
@@ -1,2 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.sql.execution.ui.SQLHistoryServerPlugin
 org.apache.spark.sql.execution.ui.StreamingQueryHistoryServerPlugin
diff --git a/sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory b/sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory
index f436622b5fb42..246058e0bed70 100644
--- a/sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory
+++ b/sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory
@@ -1,3 +1,4 @@
+#
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
@@ -5,12 +6,13 @@
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
 test.org.apache.spark.sql.execution.datasources.orc.FakeKeyProvider$Factory
diff --git a/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.SparkSessionExtensionsProvider b/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.SparkSessionExtensionsProvider
index b5b01a09e6995..0584b8c8b4f0d 100644
--- a/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.SparkSessionExtensionsProvider
+++ b/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.SparkSessionExtensionsProvider
@@ -1 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.sql.YourExtensions
diff --git a/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider b/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider
index afb48e1a3511f..bf8d78edef4ce 100644
--- a/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider
+++ b/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider
@@ -1 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.sql.execution.datasources.jdbc.connection.IntentionallyFaultyConnectionProvider
\ No newline at end of file
diff --git a/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index dd22970203b3c..c1fc7234d7c19 100644
--- a/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -1,3 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.sql.sources.FakeSourceOne
 org.apache.spark.sql.sources.FakeSourceTwo
 org.apache.spark.sql.sources.FakeSourceThree
diff --git a/sql/hive-thriftserver/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin b/sql/hive-thriftserver/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin
index 96d990372ee4c..75feb9da53a93 100644
--- a/sql/hive-thriftserver/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin
+++ b/sql/hive-thriftserver/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin
@@ -1 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.sql.hive.thriftserver.ui.HiveThriftServer2HistoryServerPlugin
diff --git a/sql/hive/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider b/sql/hive/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
index 2b0acc0305c49..eb7862b407c61 100644
--- a/sql/hive/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
+++ b/sql/hive/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
@@ -1 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.sql.hive.security.HiveDelegationTokenProvider
diff --git a/sql/hive/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/hive/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index e7d762fbebe76..bb06156b63339 100644
--- a/sql/hive/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/sql/hive/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -1,2 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 org.apache.spark.sql.hive.orc.OrcFileFormat
 org.apache.spark.sql.hive.execution.HiveFileFormat
\ No newline at end of file

From 216b97220d6a6830c8e99c385ae7a06e805a5ae8 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Thu, 10 Mar 2022 21:05:20 +0800
Subject: [PATCH 458/513] [SPARK-38360][SQL][SS][PYTHON] Introduce a `exists`
 function for `TreeNode` to eliminate duplicate code patterns

### What changes were proposed in this pull request?
There are many duplicate code patterns in Spark code:

```scala
treeNode.find(condition).isDefined
treeNode.find(condition).nonEmpty
treeNode.find(condition).isEmpty
```

This pr Introduce a `exists` function for `TreeNode` to simplify them, after this pr:

- `treeNode.find(condition).isDefined ` and `treeNode.find(condition).nonEmpty ` -> `treeNode.exists(condition)`
- `treeNode.find(condition).isEmpty` -> `!treeNode.exists(condition)`

### Why are the changes needed?
Code simplification

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass GA and add a new UT for new function.

Closes #35694 from LuciferYang/treenode-exists.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../kafka010/KafkaMicroBatchSourceSuite.scala |  4 +-
 .../sql/catalyst/analysis/Analyzer.scala      | 30 +++----
 .../catalyst/analysis/CTESubstitution.scala   |  4 +-
 .../sql/catalyst/analysis/CheckAnalysis.scala |  4 +-
 .../expressions/EquivalentExpressions.scala   |  2 +-
 .../expressions/aggregate/interfaces.scala    |  2 +-
 .../sql/catalyst/expressions/subquery.scala   | 20 ++---
 .../optimizer/DecorrelateInnerQuery.scala     |  4 +-
 .../sql/catalyst/optimizer/InlineCTE.scala    |  2 +-
 .../optimizer/NestedColumnAliasing.scala      |  6 +-
 .../sql/catalyst/optimizer/Optimizer.scala    |  7 +-
 .../optimizer/RemoveRedundantAggregates.scala |  4 +-
 .../optimizer/ReplaceExceptWithFilter.scala   |  4 +-
 .../spark/sql/catalyst/optimizer/joins.scala  |  6 +-
 .../sql/catalyst/optimizer/subquery.scala     |  2 +-
 .../plans/logical/basicLogicalOperators.scala |  2 +-
 .../spark/sql/catalyst/trees/TreeNode.scala   | 10 +++
 .../analysis/AnsiTypeCoercionSuite.scala      |  2 +-
 .../catalyst/analysis/TypeCoercionSuite.scala |  2 +-
 .../DecorrelateInnerQuerySuite.scala          |  2 +-
 .../plans/logical/AnalysisHelperSuite.scala   |  2 +-
 .../sql/catalyst/trees/TreeNodeSuite.scala    | 38 +++++++++
 .../scala/org/apache/spark/sql/Dataset.scala  |  4 +-
 .../spark/sql/execution/CacheManager.scala    |  8 +-
 .../sql/execution/DataSourceScanExec.scala    |  2 +-
 .../sql/execution/WholeStageCodegenExec.scala |  2 +-
 .../sql/execution/adaptive/AQEOptimizer.scala |  2 +-
 .../adaptive/AdaptiveSparkPlanExec.scala      |  4 +-
 .../adaptive/InsertAdaptiveSparkPlan.scala    |  8 +-
 .../analysis/DetectAmbiguousSelfJoin.scala    |  2 +-
 .../DisableUnnecessaryBucketedScan.scala      |  4 +-
 .../dynamicpruning/PartitionPruning.scala     |  4 +-
 .../PlanDynamicPruningFilters.scala           |  4 +-
 .../python/AggregateInPandasExec.scala        |  2 +-
 .../sql/execution/python/EvalPythonExec.scala |  2 +-
 .../execution/python/ExtractPythonUDFs.scala  | 10 +--
 .../execution/python/WindowInPandasExec.scala |  2 +-
 .../apache/spark/sql/execution/subquery.scala |  4 +-
 .../org/apache/spark/sql/CTEInlineSuite.scala |  8 +-
 .../sql/DataFrameWindowFunctionsSuite.scala   |  8 +-
 .../apache/spark/sql/DatasetCacheSuite.scala  |  2 +-
 .../sql/DynamicPartitionPruningSuite.scala    | 18 ++--
 .../org/apache/spark/sql/JoinSuite.scala      |  4 +-
 ...ullWithFalseInPredicateEndToEndSuite.scala |  8 +-
 .../FileDataSourceV2FallBackSuite.scala       |  2 +-
 .../DeprecatedWholeStageCodegenSuite.scala    |  4 +-
 .../spark/sql/execution/PlannerSuite.scala    |  6 +-
 .../WholeStageCodegenSparkSubmitSuite.scala   |  2 +-
 .../execution/WholeStageCodegenSuite.scala    | 82 +++++++++----------
 .../execution/benchmark/JoinBenchmark.scala   | 23 +++---
 .../datasources/json/JsonSuite.scala          |  4 +-
 .../datasources/orc/OrcQuerySuite.scala       | 12 +--
 .../sources/ForeachBatchSinkSuite.scala       |  4 +-
 .../spark/sql/sources/BucketedReadSuite.scala | 12 +--
 54 files changed, 234 insertions(+), 188 deletions(-)

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index 5037af16c28e4..db71f0fd9184a 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -1416,10 +1416,10 @@ class KafkaMicroBatchV2SourceSuite extends KafkaMicroBatchSourceSuiteBase {
     testStream(kafka)(
       makeSureGetOffsetCalled,
       AssertOnQuery { query =>
-        query.logicalPlan.find {
+        query.logicalPlan.exists {
           case r: StreamingDataSourceV2Relation => r.stream.isInstanceOf[KafkaMicroBatchStream]
           case _ => false
-        }.isDefined
+        }
       }
     )
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 27e8ed2b32d23..21454bef142fc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -509,7 +509,7 @@ class Analyzer(override val catalogManager: CatalogManager)
     }
 
     private def hasUnresolvedAlias(exprs: Seq[NamedExpression]) =
-      exprs.exists(_.find(_.isInstanceOf[UnresolvedAlias]).isDefined)
+      exprs.exists(_.exists(_.isInstanceOf[UnresolvedAlias]))
 
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
       _.containsPattern(UNRESOLVED_ALIAS), ruleId) {
@@ -616,7 +616,7 @@ class Analyzer(override val catalogManager: CatalogManager)
       val aggsBuffer = ArrayBuffer[Expression]()
       // Returns whether the expression belongs to any expressions in `aggsBuffer` or not.
       def isPartOfAggregation(e: Expression): Boolean = {
-        aggsBuffer.exists(a => a.find(_ eq e).isDefined)
+        aggsBuffer.exists(a => a.exists(_ eq e))
       }
       replaceGroupingFunc(agg, groupByExprs, gid).transformDown {
         // AggregateExpression should be computed on the unmodified value of its argument
@@ -966,14 +966,14 @@ class Analyzer(override val catalogManager: CatalogManager)
     }
 
     private def hasMetadataCol(plan: LogicalPlan): Boolean = {
-      plan.expressions.exists(_.find {
+      plan.expressions.exists(_.exists {
         case a: Attribute =>
           // If an attribute is resolved before being labeled as metadata
           // (i.e. from the originating Dataset), we check with expression ID
           a.isMetadataCol ||
             plan.children.exists(c => c.metadataOutput.exists(_.exprId == a.exprId))
         case _ => false
-      }.isDefined)
+      })
     }
 
     private def addMetadataCol(plan: LogicalPlan): LogicalPlan = plan match {
@@ -1674,7 +1674,7 @@ class Analyzer(override val catalogManager: CatalogManager)
   }
 
   private def containsDeserializer(exprs: Seq[Expression]): Boolean = {
-    exprs.exists(_.find(_.isInstanceOf[UnresolvedDeserializer]).isDefined)
+    exprs.exists(_.exists(_.isInstanceOf[UnresolvedDeserializer]))
   }
 
   // support CURRENT_DATE, CURRENT_TIMESTAMP, and grouping__id
@@ -1869,7 +1869,7 @@ class Analyzer(override val catalogManager: CatalogManager)
         withPosition(ordinal) {
           if (index > 0 && index <= aggs.size) {
             val ordinalExpr = aggs(index - 1)
-            if (ordinalExpr.find(_.isInstanceOf[AggregateExpression]).nonEmpty) {
+            if (ordinalExpr.exists(_.isInstanceOf[AggregateExpression])) {
               throw QueryCompilationErrors.groupByPositionRefersToAggregateFunctionError(
                 index, ordinalExpr)
             } else {
@@ -2687,7 +2687,7 @@ class Analyzer(override val catalogManager: CatalogManager)
    */
   object ExtractGenerator extends Rule[LogicalPlan] {
     private def hasGenerator(expr: Expression): Boolean = {
-      expr.find(_.isInstanceOf[Generator]).isDefined
+      expr.exists(_.isInstanceOf[Generator])
     }
 
     private def hasNestedGenerator(expr: NamedExpression): Boolean = {
@@ -2697,10 +2697,10 @@ class Analyzer(override val catalogManager: CatalogManager)
         case go: GeneratorOuter =>
           hasInnerGenerator(go.child)
         case _ =>
-          g.children.exists { _.find {
+          g.children.exists { _.exists {
             case _: Generator => true
             case _ => false
-          }.isDefined }
+          } }
       }
       trimNonTopLevelAliases(expr) match {
         case UnresolvedAlias(g: Generator, _) => hasInnerGenerator(g)
@@ -2711,12 +2711,12 @@ class Analyzer(override val catalogManager: CatalogManager)
     }
 
     private def hasAggFunctionInGenerator(ne: Seq[NamedExpression]): Boolean = {
-      ne.exists(_.find {
+      ne.exists(_.exists {
         case g: Generator =>
-          g.children.exists(_.find(_.isInstanceOf[AggregateFunction]).isDefined)
+          g.children.exists(_.exists(_.isInstanceOf[AggregateFunction]))
         case _ =>
           false
-      }.nonEmpty)
+      })
     }
 
     private def trimAlias(expr: NamedExpression): Expression = expr match {
@@ -2917,10 +2917,10 @@ class Analyzer(override val catalogManager: CatalogManager)
       exprs.exists(hasWindowFunction)
 
     private def hasWindowFunction(expr: Expression): Boolean = {
-      expr.find {
+      expr.exists {
         case window: WindowExpression => true
         case _ => false
-      }.isDefined
+      }
     }
 
     /**
@@ -3756,7 +3756,7 @@ class Analyzer(override val catalogManager: CatalogManager)
     }
 
     private def hasUnresolvedFieldName(a: AlterTableCommand): Boolean = {
-      a.expressions.exists(_.find(_.isInstanceOf[UnresolvedFieldName]).isDefined)
+      a.expressions.exists(_.exists(_.isInstanceOf[UnresolvedFieldName]))
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
index 2397527133f13..c0ba3598e4ba1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
@@ -51,10 +51,10 @@ object CTESubstitution extends Rule[LogicalPlan] {
     if (!plan.containsPattern(UNRESOLVED_WITH)) {
       return plan
     }
-    val isCommand = plan.find {
+    val isCommand = plan.exists {
       case _: Command | _: ParsedStatement | _: InsertIntoDir => true
       case _ => false
-    }.isDefined
+    }
     val cteDefs = mutable.ArrayBuffer.empty[CTERelationDef]
     val (substituted, lastSubstituted) =
       LegacyBehaviorPolicy.withName(conf.getConf(LEGACY_CTE_PRECEDENCE_POLICY)) match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 731924813b694..c05b9326d2304 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -334,7 +334,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
             }
 
             def checkValidGroupingExprs(expr: Expression): Unit = {
-              if (expr.find(_.isInstanceOf[AggregateExpression]).isDefined) {
+              if (expr.exists(_.isInstanceOf[AggregateExpression])) {
                 failAnalysis(
                   "aggregate functions are not allowed in GROUP BY, but found " + expr.sql)
               }
@@ -718,7 +718,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
 
     // Check whether the given expressions contains the subquery expression.
     def containsExpr(expressions: Seq[Expression]): Boolean = {
-      expressions.exists(_.find(_.semanticEquals(expr)).isDefined)
+      expressions.exists(_.exists(_.semanticEquals(expr)))
     }
 
     // Validate the subquery plan.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
index 59e2be4a6f5aa..903a6fd7bd014 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
@@ -194,7 +194,7 @@ class EquivalentExpressions {
       expr.isInstanceOf[LeafExpression] ||
       // `LambdaVariable` is usually used as a loop variable, which can't be evaluated ahead of the
       // loop. So we can't evaluate sub-expressions containing `LambdaVariable` at the beginning.
-      expr.find(_.isInstanceOf[LambdaVariable]).isDefined ||
+      expr.exists(_.isInstanceOf[LambdaVariable]) ||
       // `PlanExpression` wraps query plan. To compare query plans of `PlanExpression` on executor,
       // can cause error like NPE.
       (expr.isInstanceOf[PlanExpression[_]] && Utils.isInRunningSparkTask)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
index 3ba90659748e5..f97293dc9b464 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
@@ -83,7 +83,7 @@ object AggregateExpression {
   }
 
   def containsAggregate(expr: Expression): Boolean = {
-    expr.find(isAggregate).isDefined
+    expr.exists(isAggregate)
   }
 
   def isAggregate(expr: Expression): Boolean = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
index d7112a291f661..71b36fa8ef9ba 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
@@ -88,11 +88,11 @@ object SubqueryExpression {
    * and false otherwise.
    */
   def hasInOrCorrelatedExistsSubquery(e: Expression): Boolean = {
-    e.find {
+    e.exists {
       case _: ListQuery => true
       case ex: Exists => ex.isCorrelated
       case _ => false
-    }.isDefined
+    }
   }
 
   /**
@@ -101,20 +101,20 @@ object SubqueryExpression {
    * [[org.apache.spark.sql.catalyst.analysis.Analyzer.ResolveSubquery]]
    */
   def hasCorrelatedSubquery(e: Expression): Boolean = {
-    e.find {
+    e.exists {
       case s: SubqueryExpression => s.isCorrelated
       case _ => false
-    }.isDefined
+    }
   }
 
   /**
    * Returns true when an expression contains a subquery
    */
   def hasSubquery(e: Expression): Boolean = {
-    e.find {
+    e.exists {
       case _: SubqueryExpression => true
       case _ => false
-    }.isDefined
+    }
   }
 }
 
@@ -124,7 +124,7 @@ object SubExprUtils extends PredicateHelper {
    * returns false otherwise.
    */
   def containsOuter(e: Expression): Boolean = {
-    e.find(_.isInstanceOf[OuterReference]).isDefined
+    e.exists(_.isInstanceOf[OuterReference])
   }
 
   /**
@@ -161,7 +161,7 @@ object SubExprUtils extends PredicateHelper {
    * Given a logical plan, returns TRUE if it has an outer reference and false otherwise.
    */
   def hasOuterReferences(plan: LogicalPlan): Boolean = {
-    plan.find(_.expressions.exists(containsOuter)).isDefined
+    plan.exists(_.expressions.exists(containsOuter))
   }
 
   /**
@@ -282,10 +282,10 @@ case class ScalarSubquery(
 
 object ScalarSubquery {
   def hasCorrelatedScalarSubquery(e: Expression): Boolean = {
-    e.find {
+    e.exists {
       case s: ScalarSubquery => s.isCorrelated
       case _ => false
-    }.isDefined
+    }
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala
index 5ad70f0e30e41..9a4d1a33e30bb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala
@@ -87,7 +87,7 @@ object DecorrelateInnerQuery extends PredicateHelper {
    * leaf node and will not be found here.
    */
   private def containsAttribute(expression: Expression): Boolean = {
-    expression.find(_.isInstanceOf[Attribute]).isDefined
+    expression.exists(_.isInstanceOf[Attribute])
   }
 
   /**
@@ -268,7 +268,7 @@ object DecorrelateInnerQuery extends PredicateHelper {
           // The decorrelation framework adds domain inner joins by traversing down the plan tree
           // recursively until it reaches a node that is not correlated with the outer query.
           // So the child node of a domain inner join shouldn't contain another domain join.
-          assert(child.find(_.isInstanceOf[DomainJoin]).isEmpty,
+          assert(!child.exists(_.isInstanceOf[DomainJoin]),
             s"Child of a domain inner join shouldn't contain another domain join.\n$child")
           child
         case o =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala
index 1de300ef9c09d..61577b1d21ea4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala
@@ -54,7 +54,7 @@ object InlineCTE extends Rule[LogicalPlan] {
     // 2) Any `CTERelationRef` that contains `OuterReference` would have been inlined first.
     refCount == 1 ||
       cteDef.deterministic ||
-      cteDef.child.find(_.expressions.exists(_.isInstanceOf[OuterReference])).isDefined
+      cteDef.child.exists(_.expressions.exists(_.isInstanceOf[OuterReference]))
   }
 
   private def buildCTEMap(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
index a2ee950dae9ee..4c7130e51e0b3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
@@ -247,7 +247,7 @@ object NestedColumnAliasing {
     exprList.foreach { e =>
       collectRootReferenceAndExtractValue(e).foreach {
         // we can not alias the attr from lambda variable whose expr id is not available
-        case ev: ExtractValue if ev.find(_.isInstanceOf[NamedLambdaVariable]).isEmpty =>
+        case ev: ExtractValue if !ev.exists(_.isInstanceOf[NamedLambdaVariable]) =>
           if (ev.references.size == 1) {
             nestedFieldReferences.append(ev)
           }
@@ -267,7 +267,7 @@ object NestedColumnAliasing {
         // that do should not have an alias generated as it can lead to pushing the aggregate down
         // into a projection.
         def containsAggregateFunction(ev: ExtractValue): Boolean =
-          ev.find(_.isInstanceOf[AggregateFunction]).isDefined
+          ev.exists(_.isInstanceOf[AggregateFunction])
 
         // Remove redundant [[ExtractValue]]s if they share the same parent nest field.
         // For example, when `a.b` and `a.b.c` are in project list, we only need to alias `a.b`.
@@ -277,7 +277,7 @@ object NestedColumnAliasing {
           // [[GetStructField]]
           case e @ (_: GetStructField | _: GetArrayStructFields) =>
             val child = e.children.head
-            nestedFields.forall(f => child.find(_.semanticEquals(f)).isEmpty)
+            nestedFields.forall(f => !child.exists(_.semanticEquals(f)))
           case _ => true
         }
           .distinct
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index f7ff566b14a95..e245d14854472 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -52,7 +52,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
       previousPlan: LogicalPlan,
       currentPlan: LogicalPlan): Boolean = {
     !Utils.isTesting || (currentPlan.resolved &&
-      currentPlan.find(PlanHelper.specialExpressionsInUnsupportedOperator(_).nonEmpty).isEmpty &&
+      !currentPlan.exists(PlanHelper.specialExpressionsInUnsupportedOperator(_).nonEmpty) &&
       LogicalPlanIntegrity.checkIfExprIdsAreGloballyUnique(currentPlan) &&
       DataType.equalsIgnoreNullability(previousPlan.schema, currentPlan.schema))
   }
@@ -1690,11 +1690,10 @@ object PushPredicateThroughNonJoin extends Rule[LogicalPlan] with PredicateHelpe
    */
   private def canPushThroughCondition(plan: LogicalPlan, condition: Expression): Boolean = {
     val attributes = plan.outputSet
-    val matched = condition.find {
+    !condition.exists {
       case s: SubqueryExpression => s.plan.outputSet.intersect(attributes).nonEmpty
       case _ => false
     }
-    matched.isEmpty
   }
 }
 
@@ -1956,7 +1955,7 @@ object ConvertToLocalRelation extends Rule[LogicalPlan] {
   }
 
   private def hasUnevaluableExpr(expr: Expression): Boolean = {
-    expr.find(e => e.isInstanceOf[Unevaluable] && !e.isInstanceOf[AttributeReference]).isDefined
+    expr.exists(e => e.isInstanceOf[Unevaluable] && !e.isInstanceOf[AttributeReference])
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregates.scala
index bf17791fdd0a0..beec90da2e56f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregates.scala
@@ -52,10 +52,10 @@ object RemoveRedundantAggregates extends Rule[LogicalPlan] with AliasHelper {
   private def isLowerRedundant(upper: Aggregate, lower: Aggregate): Boolean = {
     val upperHasNoDuplicateSensitiveAgg = upper
       .aggregateExpressions
-      .forall(expr => expr.find {
+      .forall(expr => !expr.exists {
         case ae: AggregateExpression => isDuplicateSensitive(ae)
         case e => AggregateExpression.isAggregate(e)
-      }.isEmpty)
+      })
 
     lazy val upperRefsOnlyDeterministicNonAgg = upper.references.subsetOf(AttributeSet(
       lower
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceExceptWithFilter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceExceptWithFilter.scala
index 8218051c584b3..f66128dcbc3fb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceExceptWithFilter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceExceptWithFilter.scala
@@ -87,8 +87,8 @@ object ReplaceExceptWithFilter extends Rule[LogicalPlan] {
     val rightProjectList = projectList(right)
 
     left.output.size == left.output.map(_.name).distinct.size &&
-      left.find(_.expressions.exists(SubqueryExpression.hasSubquery)).isEmpty &&
-        right.find(_.expressions.exists(SubqueryExpression.hasSubquery)).isEmpty &&
+      !left.exists(_.expressions.exists(SubqueryExpression.hasSubquery)) &&
+        !right.exists(_.expressions.exists(SubqueryExpression.hasSubquery)) &&
           Project(leftProjectList, nonFilterChild(skipProject(left))).sameResult(
             Project(rightProjectList, nonFilterChild(skipProject(right))))
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
index e03360d3d44d6..6d683a7a11384 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
@@ -143,7 +143,7 @@ object EliminateOuterJoin extends Rule[LogicalPlan] with PredicateHelper {
     val attributes = e.references.toSeq
     val emptyRow = new GenericInternalRow(attributes.length)
     val boundE = BindReferences.bindReference(e, attributes)
-    if (boundE.find(_.isInstanceOf[Unevaluable]).isDefined) return false
+    if (boundE.exists(_.isInstanceOf[Unevaluable])) return false
     val v = boundE.eval(emptyRow)
     v == null || v == false
   }
@@ -195,9 +195,9 @@ object EliminateOuterJoin extends Rule[LogicalPlan] with PredicateHelper {
 object ExtractPythonUDFFromJoinCondition extends Rule[LogicalPlan] with PredicateHelper {
 
   private def hasUnevaluablePythonUDF(expr: Expression, j: Join): Boolean = {
-    expr.find { e =>
+    expr.exists { e =>
       PythonUDF.isScalarPythonUDF(e) && !canEvaluate(e, j.left) && !canEvaluate(e, j.right)
-    }.isDefined
+    }
   }
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithPruning(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index 6d6b8b7d8aca8..7ef5ef55fabda 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -728,7 +728,7 @@ object OptimizeOneRowRelationSubquery extends Rule[LogicalPlan] {
   }
 
   private def hasCorrelatedSubquery(plan: LogicalPlan): Boolean = {
-    plan.find(_.expressions.exists(SubqueryExpression.hasCorrelatedSubquery)).isDefined
+    plan.exists(_.expressions.exists(SubqueryExpression.hasCorrelatedSubquery))
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 5b601fbd5eed6..8a6598f6f0841 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -989,7 +989,7 @@ case class Aggregate(
   final override val nodePatterns : Seq[TreePattern] = Seq(AGGREGATE)
 
   override lazy val validConstraints: ExpressionSet = {
-    val nonAgg = aggregateExpressions.filter(_.find(_.isInstanceOf[AggregateExpression]).isEmpty)
+    val nonAgg = aggregateExpressions.filter(!_.exists(_.isInstanceOf[AggregateExpression]))
     getAllValidConstraints(nonAgg)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 9e50be36a9f23..ac60e18b2c1bf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -246,6 +246,16 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product with Tre
     children.foldLeft(Option.empty[BaseType]) { (l, r) => l.orElse(r.find(f)) }
   }
 
+  /**
+   * Test whether there is [[TreeNode]] satisfies the conditions specified in `f`.
+   * The condition is recursively applied to this node and all of its children (pre-order).
+   */
+  def exists(f: BaseType => Boolean): Boolean = if (f(this)) {
+    true
+  } else {
+    children.exists(_.exists(f))
+  }
+
   /**
    * Runs the given function on this node and then recursively on [[children]].
    * @param f the function to be applied to each node in the tree.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
index 19ee6d855043f..1f23aeb61e1f4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
@@ -890,7 +890,7 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
     val wp1 = widenSetOperationTypes(union.select(p1.output.head, $"p2.v"))
     assert(wp1.isInstanceOf[Project])
     // The attribute `p1.output.head` should be replaced in the root `Project`.
-    assert(wp1.expressions.forall(_.find(_ == p1.output.head).isEmpty))
+    assert(wp1.expressions.forall(!_.exists(_ == p1.output.head)))
     val wp2 = widenSetOperationTypes(Aggregate(Nil, sum(p1.output.head).as("v") :: Nil, union))
     assert(wp2.isInstanceOf[Aggregate])
     assert(wp2.missingInput.isEmpty)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 63ad84e8a0947..782f3e41f42c7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -1490,7 +1490,7 @@ class TypeCoercionSuite extends TypeCoercionSuiteBase {
     val wp1 = widenSetOperationTypes(union.select(p1.output.head, $"p2.v"))
     assert(wp1.isInstanceOf[Project])
     // The attribute `p1.output.head` should be replaced in the root `Project`.
-    assert(wp1.expressions.forall(_.find(_ == p1.output.head).isEmpty))
+    assert(wp1.expressions.forall(!_.exists(_ == p1.output.head)))
     val wp2 = widenSetOperationTypes(Aggregate(Nil, sum(p1.output.head).as("v") :: Nil, union))
     assert(wp2.isInstanceOf[Aggregate])
     assert(wp2.missingInput.isEmpty)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuerySuite.scala
index dc50039da200b..c74eeea349b2c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuerySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuerySuite.scala
@@ -37,7 +37,7 @@ class DecorrelateInnerQuerySuite extends PlanTest {
   val testRelation2 = LocalRelation(x, y, z)
 
   private def hasOuterReferences(plan: LogicalPlan): Boolean = {
-    plan.find(_.expressions.exists(SubExprUtils.containsOuter)).isDefined
+    plan.exists(_.expressions.exists(SubExprUtils.containsOuter))
   }
 
   private def check(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelperSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelperSuite.scala
index 0a3f86ebf6808..4a426458e5bfe 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelperSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelperSuite.scala
@@ -41,7 +41,7 @@ class AnalysisHelperSuite extends SparkFunSuite {
   test("setAnalyze is recursive") {
     val plan = Project(Nil, LocalRelation())
     plan.setAnalyzed()
-    assert(plan.find(!_.analyzed).isEmpty)
+    assert(!plan.exists(!_.analyzed))
   }
 
   test("resolveOperator runs on operators recursively") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index b52ecb56ad995..b6087c54e664b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -248,6 +248,44 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper {
     assert(expected === actual)
   }
 
+  test("exists") {
+    val expression = Add(Literal(1), Multiply(Literal(2), Subtract(Literal(3), Literal(4))))
+    // Check the top node.
+    var exists = expression.exists {
+      case _: Add => true
+      case _ => false
+    }
+    assert(exists)
+
+    // Check the first children.
+    exists = expression.exists {
+      case Literal(1, IntegerType) => true
+      case _ => false
+    }
+    assert(exists)
+
+    // Check an internal node (Subtract).
+    exists = expression.exists {
+      case _: Subtract => true
+      case _ => false
+    }
+    assert(exists)
+
+    // Check a leaf node.
+    exists = expression.exists {
+      case Literal(3, IntegerType) => true
+      case _ => false
+    }
+    assert(exists)
+
+    // Check not exists.
+    exists = expression.exists {
+      case Literal(100, IntegerType) => true
+      case _ => false
+    }
+    assert(!exists)
+  }
+
   test("collectFirst") {
     val expression = Add(Literal(1), Multiply(Literal(2), Subtract(Literal(3), Literal(4))))
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 4a921b48fafb1..62dea96614a5f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1503,10 +1503,10 @@ class Dataset[T] private[sql](
       case typedCol: TypedColumn[_, _] =>
         // Checks if a `TypedColumn` has been inserted with
         // specific input type and schema by `withInputType`.
-        val needInputType = typedCol.expr.find {
+        val needInputType = typedCol.expr.exists {
           case ta: TypedAggregateExpression if ta.inputDeserializer.isEmpty => true
           case _ => false
-        }.isDefined
+        }
 
         if (!needInputType) {
           typedCol
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 426b2337f76a3..27d6bedad47c9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -161,7 +161,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
       blocking: Boolean = false): Unit = {
     val shouldRemove: LogicalPlan => Boolean =
       if (cascade) {
-        _.find(_.sameResult(plan)).isDefined
+        _.exists(_.sameResult(plan))
       } else {
         _.sameResult(plan)
       }
@@ -187,7 +187,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
         //    will keep it as it is. It means the physical plan has been re-compiled already in the
         //    other thread.
         val cacheAlreadyLoaded = cd.cachedRepresentation.cacheBuilder.isCachedColumnBuffersLoaded
-        cd.plan.find(_.sameResult(plan)).isDefined && !cacheAlreadyLoaded
+        cd.plan.exists(_.sameResult(plan)) && !cacheAlreadyLoaded
       })
     }
   }
@@ -207,7 +207,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
    * Tries to re-cache all the cache entries that refer to the given plan.
    */
   def recacheByPlan(spark: SparkSession, plan: LogicalPlan): Unit = {
-    recacheByCondition(spark, _.plan.find(_.sameResult(plan)).isDefined)
+    recacheByCondition(spark, _.plan.exists(_.sameResult(plan)))
   }
 
   /**
@@ -288,7 +288,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
    */
   def recacheByPath(spark: SparkSession, resourcePath: Path, fs: FileSystem): Unit = {
     val qualifiedPath = fs.makeQualified(resourcePath)
-    recacheByCondition(spark, _.plan.find(lookupAndRefresh(_, fs, qualifiedPath)).isDefined)
+    recacheByCondition(spark, _.plan.exists(lookupAndRefresh(_, fs, qualifiedPath)))
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index db86b382235f7..1e2fa41ef0f49 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -239,7 +239,7 @@ case class FileSourceScanExec(
   }
 
   private def isDynamicPruningFilter(e: Expression): Boolean =
-    e.find(_.isInstanceOf[PlanExpression[_]]).isDefined
+    e.exists(_.isInstanceOf[PlanExpression[_]])
 
   @transient lazy val selectedPartitions: Array[PartitionDirectory] = {
     val optimizerMetadataTimeNs = relation.location.metadataOpsTimeNs.getOrElse(0L)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index dde976c951718..7d36fd5d412a4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -892,7 +892,7 @@ case class CollapseCodegenStages(
 
   private def supportCodegen(plan: SparkPlan): Boolean = plan match {
     case plan: CodegenSupport if plan.supportCodegen =>
-      val willFallback = plan.expressions.exists(_.find(e => !supportCodegen(e)).isDefined)
+      val willFallback = plan.expressions.exists(_.exists(e => !supportCodegen(e)))
       // the generated code will be huge if there are too many columns
       val hasTooManyOutputFields =
         WholeStageCodegenExec.isTooManyFields(conf, plan.schema)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
index 06e9c180584a9..5533bb1cd7916 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
@@ -71,7 +71,7 @@ class AQEOptimizer(conf: SQLConf) extends RuleExecutor[LogicalPlan] {
       previousPlan: LogicalPlan,
       currentPlan: LogicalPlan): Boolean = {
     !Utils.isTesting || (currentPlan.resolved &&
-      currentPlan.find(PlanHelper.specialExpressionsInUnsupportedOperator(_).nonEmpty).isEmpty &&
+      !currentPlan.exists(PlanHelper.specialExpressionsInUnsupportedOperator(_).nonEmpty) &&
       LogicalPlanIntegrity.checkIfExprIdsAreGloballyUnique(currentPlan) &&
       DataType.equalsIgnoreNullability(previousPlan.schema, currentPlan.schema))
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index 3ec5aadabfaf4..14b45256a059a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -332,7 +332,7 @@ case class AdaptiveSparkPlanExec(
     // Subqueries that don't belong to any query stage of the main query will execute after the
     // last UI update in `getFinalPhysicalPlan`, so we need to update UI here again to make sure
     // the newly generated nodes of those subqueries are updated.
-    if (!isSubquery && currentPhysicalPlan.find(_.subqueries.nonEmpty).isDefined) {
+    if (!isSubquery && currentPhysicalPlan.exists(_.subqueries.nonEmpty)) {
       getExecutionId.foreach(onUpdatePlan(_, Seq.empty))
     }
     logOnLevel(s"Final plan: $currentPhysicalPlan")
@@ -611,7 +611,7 @@ case class AdaptiveSparkPlanExec(
       stagesToReplace: Seq[QueryStageExec]): LogicalPlan = {
     var logicalPlan = plan
     stagesToReplace.foreach {
-      case stage if currentPhysicalPlan.find(_.eq(stage)).isDefined =>
+      case stage if currentPhysicalPlan.exists(_.eq(stage)) =>
         val logicalNodeOpt = stage.getTagValue(TEMP_LOGICAL_PLAN_TAG).orElse(stage.logicalLink)
         assert(logicalNodeOpt.isDefined)
         val logicalNode = logicalNodeOpt.get
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
index 5c208457004cb..4410f7fea81af 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
@@ -88,14 +88,14 @@ case class InsertAdaptiveSparkPlan(
   //   - The query contains sub-query.
   private def shouldApplyAQE(plan: SparkPlan, isSubquery: Boolean): Boolean = {
     conf.getConf(SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY) || isSubquery || {
-      plan.find {
+      plan.exists {
         case _: Exchange => true
         case p if !p.requiredChildDistribution.forall(_ == UnspecifiedDistribution) => true
-        case p => p.expressions.exists(_.find {
+        case p => p.expressions.exists(_.exists {
           case _: SubqueryExpression => true
           case _ => false
-        }.isDefined)
-      }.isDefined
+        })
+      }
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala
index 17ea93e5ffede..7e9628c385130 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala
@@ -78,7 +78,7 @@ object DetectAmbiguousSelfJoin extends Rule[LogicalPlan] {
     // We always remove the special metadata from `AttributeReference` at the end of this rule, so
     // Dataset column reference only exists in the root node via Dataset transformations like
     // `Dataset#select`.
-    if (plan.find(_.isInstanceOf[Join]).isEmpty) return stripColumnReferenceMetadataInPlan(plan)
+    if (!plan.exists(_.isInstanceOf[Join])) return stripColumnReferenceMetadataInPlan(plan)
 
     val colRefAttrs = plan.expressions.flatMap(_.collect {
       case a: AttributeReference if isColumnReference(a) => a
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
index 479bc21e5e6c8..1eb1082402972 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
@@ -141,10 +141,10 @@ object DisableUnnecessaryBucketedScan extends Rule[SparkPlan] {
   }
 
   def apply(plan: SparkPlan): SparkPlan = {
-    lazy val hasBucketedScan = plan.find {
+    lazy val hasBucketedScan = plan.exists {
       case scan: FileSourceScanExec => scan.bucketedScan
       case _ => false
-    }.isDefined
+    }
 
     if (!conf.bucketingEnabled || !conf.autoBucketedScanEnabled || !hasBucketedScan) {
       plan
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala
index 48d31d83c17b0..3b5fc4aea5d8b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala
@@ -214,10 +214,10 @@ object PartitionPruning extends Rule[LogicalPlan] with PredicateHelper {
    * Search a filtering predicate in a given logical plan
    */
   private def hasSelectivePredicate(plan: LogicalPlan): Boolean = {
-    plan.find {
+    plan.exists {
       case f: Filter => isLikelySelective(f.condition)
       case _ => false
-    }.isDefined
+    }
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
index 9a05e396d4a70..252565fd9077b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
@@ -58,13 +58,13 @@ case class PlanDynamicPruningFilters(sparkSession: SparkSession)
         // Using `sparkPlan` is a little hacky as it is based on the assumption that this rule is
         // the first to be applied (apart from `InsertAdaptiveSparkPlan`).
         val canReuseExchange = conf.exchangeReuseEnabled && buildKeys.nonEmpty &&
-          plan.find {
+          plan.exists {
             case BroadcastHashJoinExec(_, _, _, BuildLeft, _, left, _, _) =>
               left.sameResult(sparkPlan)
             case BroadcastHashJoinExec(_, _, _, BuildRight, _, _, right, _) =>
               right.sameResult(sparkPlan)
             case _ => false
-          }.isDefined
+          }
 
         if (canReuseExchange) {
           val executedPlan = QueryExecution.prepareExecutedPlan(sparkSession, sparkPlan)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AggregateInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AggregateInPandasExec.scala
index 69802b143c113..a7f63aafc9f1d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AggregateInPandasExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AggregateInPandasExec.scala
@@ -88,7 +88,7 @@ case class AggregateInPandasExec(
         (ChainedPythonFunctions(chained.funcs ++ Seq(udf.func)), children)
       case children =>
         // There should not be any other UDFs, or the children can't be evaluated directly.
-        assert(children.forall(_.find(_.isInstanceOf[PythonUDF]).isEmpty))
+        assert(children.forall(!_.exists(_.isInstanceOf[PythonUDF])))
         (ChainedPythonFunctions(Seq(udf.func)), udf.children)
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
index fca43e454bff5..c567a70e1d3cd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
@@ -72,7 +72,7 @@ trait EvalPythonExec extends UnaryExecNode {
         (ChainedPythonFunctions(chained.funcs ++ Seq(udf.func)), children)
       case children =>
         // There should not be any other UDFs, or the children can't be evaluated directly.
-        assert(children.forall(_.find(_.isInstanceOf[PythonUDF]).isEmpty))
+        assert(children.forall(!_.exists(_.isInstanceOf[PythonUDF])))
         (ChainedPythonFunctions(Seq(udf.func)), udf.children)
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
index 407c498c81759..a809ea07d0ec6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
@@ -45,10 +45,10 @@ object ExtractPythonUDFFromAggregate extends Rule[LogicalPlan] {
   }
 
   private def hasPythonUdfOverAggregate(expr: Expression, agg: Aggregate): Boolean = {
-    expr.find {
+    expr.exists {
       e => PythonUDF.isScalarPythonUDF(e) &&
-        (e.references.isEmpty || e.find(belongAggregate(_, agg)).isDefined)
-    }.isDefined
+        (e.references.isEmpty || e.exists(belongAggregate(_, agg)))
+    }
   }
 
   private def extract(agg: Aggregate): LogicalPlan = {
@@ -90,7 +90,7 @@ object ExtractPythonUDFFromAggregate extends Rule[LogicalPlan] {
  */
 object ExtractGroupingPythonUDFFromAggregate extends Rule[LogicalPlan] {
   private def hasScalarPythonUDF(e: Expression): Boolean = {
-    e.find(PythonUDF.isScalarPythonUDF).isDefined
+    e.exists(PythonUDF.isScalarPythonUDF)
   }
 
   private def extract(agg: Aggregate): LogicalPlan = {
@@ -164,7 +164,7 @@ object ExtractPythonUDFs extends Rule[LogicalPlan] with PredicateHelper {
   private type EvalTypeChecker = EvalType => Boolean
 
   private def hasScalarPythonUDF(e: Expression): Boolean = {
-    e.find(PythonUDF.isScalarPythonUDF).isDefined
+    e.exists(PythonUDF.isScalarPythonUDF)
   }
 
   @scala.annotation.tailrec
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
index 87102ccac34a8..e73da99786ceb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
@@ -113,7 +113,7 @@ case class WindowInPandasExec(
         (ChainedPythonFunctions(chained.funcs ++ Seq(udf.func)), children)
       case children =>
         // There should not be any other UDFs, or the children can't be evaluated directly.
-        assert(children.forall(_.find(_.isInstanceOf[PythonUDF]).isEmpty))
+        assert(children.forall(!_.exists(_.isInstanceOf[PythonUDF])))
         (ChainedPythonFunctions(Seq(udf.func)), udf.children)
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
index 887867766ea92..afd0aba00680e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
@@ -46,10 +46,10 @@ object ExecSubqueryExpression {
    * Returns true when an expression contains a subquery
    */
   def hasSubquery(e: Expression): Boolean = {
-    e.find {
+    e.exists {
       case _: ExecSubqueryExpression => true
       case _ => false
-    }.isDefined
+    }
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
index 7ee533ac26d2b..dd30ff68da417 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
@@ -42,7 +42,7 @@ abstract class CTEInlineSuiteBase
          """.stripMargin)
       checkAnswer(df, Nil)
       assert(
-        df.queryExecution.optimizedPlan.find(_.isInstanceOf[WithCTE]).nonEmpty,
+        df.queryExecution.optimizedPlan.exists(_.isInstanceOf[WithCTE]),
         "Non-deterministic With-CTE with multiple references should be not inlined.")
     }
   }
@@ -59,7 +59,7 @@ abstract class CTEInlineSuiteBase
          """.stripMargin)
       checkAnswer(df, Nil)
       assert(
-        df.queryExecution.optimizedPlan.find(_.isInstanceOf[WithCTE]).nonEmpty,
+        df.queryExecution.optimizedPlan.exists(_.isInstanceOf[WithCTE]),
         "Non-deterministic With-CTE with multiple references should be not inlined.")
     }
   }
@@ -76,10 +76,10 @@ abstract class CTEInlineSuiteBase
          """.stripMargin)
       checkAnswer(df, Row(0, 1) :: Row(1, 2) :: Nil)
       assert(
-        df.queryExecution.analyzed.find(_.isInstanceOf[WithCTE]).nonEmpty,
+        df.queryExecution.analyzed.exists(_.isInstanceOf[WithCTE]),
         "With-CTE should not be inlined in analyzed plan.")
       assert(
-        df.queryExecution.optimizedPlan.find(_.isInstanceOf[WithCTE]).isEmpty,
+        !df.queryExecution.optimizedPlan.exists(_.isInstanceOf[WithCTE]),
         "With-CTE with one reference should be inlined in optimized plan.")
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index f33de7402a71e..11b2309ee38eb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -1110,16 +1110,16 @@ class DataFrameWindowFunctionsSuite extends QueryTest
 
       checkAnswer(windowed, Seq(Row("b", 4), Row(null, null), Row(null, null), Row(null, null)))
 
-      val shuffleByRequirement = windowed.queryExecution.executedPlan.find {
+      val shuffleByRequirement = windowed.queryExecution.executedPlan.exists {
         case w: WindowExec =>
-          w.child.find {
+          w.child.exists {
             case s: ShuffleExchangeExec => isShuffleExecByRequirement(s, Seq("key1", "key2"))
             case _ => false
-          }.nonEmpty
+          }
         case _ => false
       }
 
-      assert(shuffleByRequirement.nonEmpty, "Can't find desired shuffle node from the query plan")
+      assert(shuffleByRequirement, "Can't find desired shuffle node from the query plan")
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
index 009ccb9a45354..2f4098d7cc7eb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
@@ -250,7 +250,7 @@ class DatasetCacheSuite extends QueryTest
       case i: InMemoryRelation => i.cacheBuilder.cachedPlan
     }
     assert(df2LimitInnerPlan.isDefined &&
-      df2LimitInnerPlan.get.find(_.isInstanceOf[InMemoryTableScanExec]).isEmpty)
+      !df2LimitInnerPlan.get.exists(_.isInstanceOf[InMemoryTableScanExec]))
   }
 
   test("SPARK-27739 Save stats from optimized plan") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
index 3569775b72628..61885169ece4c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
@@ -207,10 +207,10 @@ abstract class DynamicPartitionPruningSuiteBase
         case _: ReusedExchangeExec => // reuse check ok.
         case BroadcastQueryStageExec(_, _: ReusedExchangeExec, _) => // reuse check ok.
         case b: BroadcastExchangeLike =>
-          val hasReuse = plan.find {
+          val hasReuse = plan.exists {
             case ReusedExchangeExec(_, e) => e eq b
             case _ => false
-          }.isDefined
+          }
           assert(hasReuse, s"$s\nshould have been reused in\n$plan")
         case a: AdaptiveSparkPlanExec =>
           val broadcastQueryStage = collectFirst(a) {
@@ -234,7 +234,7 @@ abstract class DynamicPartitionPruningSuiteBase
         case r: ReusedSubqueryExec => r.child
         case o => o
       }
-      assert(subquery.find(_.isInstanceOf[AdaptiveSparkPlanExec]).isDefined == isMainQueryAdaptive)
+      assert(subquery.exists(_.isInstanceOf[AdaptiveSparkPlanExec]) == isMainQueryAdaptive)
     }
   }
 
@@ -344,12 +344,12 @@ abstract class DynamicPartitionPruningSuiteBase
            | )
        """.stripMargin)
 
-      val found = df.queryExecution.executedPlan.find {
+      val found = df.queryExecution.executedPlan.exists {
         case BroadcastHashJoinExec(_, _, p: ExistenceJoin, _, _, _, _, _) => true
         case _ => false
       }
 
-      assert(found.isEmpty)
+      assert(!found)
     }
   }
 
@@ -1560,14 +1560,14 @@ abstract class DynamicPartitionPruningDataSourceSuiteBase
       }
       // search dynamic pruning predicates on the executed plan
       val plan = query.asInstanceOf[StreamingQueryWrapper].streamingQuery.lastExecution.executedPlan
-      val ret = plan.find {
+      val ret = plan.exists {
         case s: FileSourceScanExec => s.partitionFilters.exists {
           case _: DynamicPruningExpression => true
           case _ => false
         }
         case _ => false
       }
-      assert(ret.isDefined == false)
+      assert(!ret)
     }
   }
 }
@@ -1607,10 +1607,10 @@ abstract class DynamicPartitionPruningV1Suite extends DynamicPartitionPruningDat
           val scanOption =
             find(plan) {
               case s: FileSourceScanExec =>
-                s.output.exists(_.find(_.argString(maxFields = 100).contains("fid")).isDefined)
+                s.output.exists(_.exists(_.argString(maxFields = 100).contains("fid")))
               case s: BatchScanExec =>
                 // we use f1 col for v2 tables due to schema pruning
-                s.output.exists(_.find(_.argString(maxFields = 100).contains("f1")).isDefined)
+                s.output.exists(_.exists(_.argString(maxFields = 100).contains("f1")))
               case _ => false
             }
           assert(scanOption.isDefined)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index ec6c863b8183f..4a8421a221194 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -1074,8 +1074,8 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
     val df = left.crossJoin(right).where(pythonTestUDF(left("a")) === right.col("c"))
 
     // Before optimization, there is a logical Filter operator.
-    val filterInAnalysis = df.queryExecution.analyzed.find(_.isInstanceOf[Filter])
-    assert(filterInAnalysis.isDefined)
+    val filterInAnalysis = df.queryExecution.analyzed.exists(_.isInstanceOf[Filter])
+    assert(filterInAnalysis)
 
     // Filter predicate was pushdown as join condition. So there is no Filter exec operator.
     val filterExec = find(df.queryExecution.executedPlan)(_.isInstanceOf[FilterExec])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ReplaceNullWithFalseInPredicateEndToEndSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ReplaceNullWithFalseInPredicateEndToEndSuite.scala
index 739b4052ee90d..8883e9be1937e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ReplaceNullWithFalseInPredicateEndToEndSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ReplaceNullWithFalseInPredicateEndToEndSuite.scala
@@ -59,13 +59,13 @@ class ReplaceNullWithFalseInPredicateEndToEndSuite extends QueryTest with Shared
       val q5 = df1.selectExpr("IF(l > 1 AND null, 5, 1) AS out")
       checkAnswer(q5, Row(1) :: Row(1) :: Nil)
       q5.queryExecution.executedPlan.foreach { p =>
-        assert(p.expressions.forall(e => e.find(_.isInstanceOf[If]).isEmpty))
+        assert(p.expressions.forall(e => !e.exists(_.isInstanceOf[If])))
       }
 
       val q6 = df1.selectExpr("CASE WHEN (l > 2 AND null) THEN 3 ELSE 2 END")
       checkAnswer(q6, Row(2) :: Row(2) :: Nil)
       q6.queryExecution.executedPlan.foreach { p =>
-        assert(p.expressions.forall(e => e.find(_.isInstanceOf[CaseWhen]).isEmpty))
+        assert(p.expressions.forall(e => !e.exists(_.isInstanceOf[CaseWhen])))
       }
 
       checkAnswer(df1.where("IF(l > 10, false, b OR null)"), Row(1, true))
@@ -75,10 +75,10 @@ class ReplaceNullWithFalseInPredicateEndToEndSuite extends QueryTest with Shared
   test("SPARK-26107: Replace Literal(null, _) with FalseLiteral in higher-order functions") {
     def assertNoLiteralNullInPlan(df: DataFrame): Unit = {
       df.queryExecution.executedPlan.foreach { p =>
-        assert(p.expressions.forall(_.find {
+        assert(p.expressions.forall(!_.exists {
           case Literal(null, BooleanType) => true
           case _ => false
-        }.isEmpty))
+        }))
       }
     }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala
index 5156bd40bee69..cfc8b2cc84524 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala
@@ -184,7 +184,7 @@ class FileDataSourceV2FallBackSuite extends QueryTest with SharedSparkSession {
             val df = spark.read.format(format).load(path.getCanonicalPath)
             checkAnswer(df, inputData.toDF())
             assert(
-              df.queryExecution.executedPlan.find(_.isInstanceOf[FileSourceScanExec]).isDefined)
+              df.queryExecution.executedPlan.exists(_.isInstanceOf[FileSourceScanExec]))
           }
         } finally {
           spark.listenerManager.unregister(listener)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/DeprecatedWholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/DeprecatedWholeStageCodegenSuite.scala
index b27a940c364a4..635c794338065 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/DeprecatedWholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/DeprecatedWholeStageCodegenSuite.scala
@@ -36,9 +36,9 @@ class DeprecatedWholeStageCodegenSuite extends QueryTest
       .groupByKey(_._1).agg(typed.sum(_._2))
 
     val plan = ds.queryExecution.executedPlan
-    assert(plan.find(p =>
+    assert(plan.exists(p =>
       p.isInstanceOf[WholeStageCodegenExec] &&
-        p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[HashAggregateExec]).isDefined)
+        p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[HashAggregateExec]))
     assert(ds.collect() === Array(("a", 10.0), ("b", 3.0), ("c", 1.0)))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index dfc1b70cf4a5d..c3c8959d6e1ca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -221,7 +221,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
     val query = testData.select(Symbol("key"), Symbol("value"))
       .sort(Symbol("key")).limit(2).filter('key === 3)
     val planned = query.queryExecution.executedPlan
-    assert(planned.find(_.isInstanceOf[TakeOrderedAndProjectExec]).isDefined)
+    assert(planned.exists(_.isInstanceOf[TakeOrderedAndProjectExec]))
   }
 
   test("CollectLimit can appear in the middle of a plan when caching is used") {
@@ -234,11 +234,11 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
     withSQLConf(SQLConf.TOP_K_SORT_FALLBACK_THRESHOLD.key -> "1000") {
       val query0 = testData.select(Symbol("value")).orderBy(Symbol("key")).limit(100)
       val planned0 = query0.queryExecution.executedPlan
-      assert(planned0.find(_.isInstanceOf[TakeOrderedAndProjectExec]).isDefined)
+      assert(planned0.exists(_.isInstanceOf[TakeOrderedAndProjectExec]))
 
       val query1 = testData.select(Symbol("value")).orderBy(Symbol("key")).limit(2000)
       val planned1 = query1.queryExecution.executedPlan
-      assert(planned1.find(_.isInstanceOf[TakeOrderedAndProjectExec]).isEmpty)
+      assert(!planned1.exists(_.isInstanceOf[TakeOrderedAndProjectExec]))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala
index 5e0318d97ff94..73c4e4c3e1eb8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala
@@ -84,7 +84,7 @@ object WholeStageCodegenSparkSubmitSuite extends Assertions with Logging {
     val df = spark.range(71773).select((col("id") % lit(10)).cast(IntegerType) as "v")
       .groupBy(array(col("v"))).agg(count(col("*")))
     val plan = df.queryExecution.executedPlan
-    assert(plan.find(_.isInstanceOf[WholeStageCodegenExec]).isDefined)
+    assert(plan.exists(_.isInstanceOf[WholeStageCodegenExec]))
 
     val expectedAnswer =
       Row(Array(0), 7178) ::
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index b5b67287447c8..f0533f89b63e6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -37,16 +37,16 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
   test("range/filter should be combined") {
     val df = spark.range(10).filter("id = 1").selectExpr("id + 1")
     val plan = df.queryExecution.executedPlan
-    assert(plan.find(_.isInstanceOf[WholeStageCodegenExec]).isDefined)
+    assert(plan.exists(_.isInstanceOf[WholeStageCodegenExec]))
     assert(df.collect() === Array(Row(2)))
   }
 
   test("HashAggregate should be included in WholeStageCodegen") {
     val df = spark.range(10).groupBy().agg(max(col("id")), avg(col("id")))
     val plan = df.queryExecution.executedPlan
-    assert(plan.find(p =>
+    assert(plan.exists(p =>
       p.isInstanceOf[WholeStageCodegenExec] &&
-        p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[HashAggregateExec]).isDefined)
+        p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[HashAggregateExec]))
     assert(df.collect() === Array(Row(9, 4.5)))
   }
 
@@ -54,9 +54,9 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
     val df = spark.range(10).agg(max(col("id")), avg(col("id")))
     withSQLConf("spark.sql.test.forceApplySortAggregate" -> "true") {
       val plan = df.queryExecution.executedPlan
-      assert(plan.find(p =>
+      assert(plan.exists(p =>
         p.isInstanceOf[WholeStageCodegenExec] &&
-          p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[SortAggregateExec]).isDefined)
+          p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[SortAggregateExec]))
       assert(df.collect() === Array(Row(9, 4.5)))
     }
   }
@@ -70,22 +70,22 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
     // Array - explode
     var expDF = df.select($"name", explode($"knownLanguages"), $"properties")
     var plan = expDF.queryExecution.executedPlan
-    assert(plan.find {
+    assert(plan.exists {
       case stage: WholeStageCodegenExec =>
-        stage.find(_.isInstanceOf[GenerateExec]).isDefined
+        stage.exists(_.isInstanceOf[GenerateExec])
       case _ => !codegenEnabled.toBoolean
-    }.isDefined)
+    })
     checkAnswer(expDF, Array(Row("James", "Java", Map("hair" -> "black", "eye" -> "brown")),
       Row("James", "Scala", Map("hair" -> "black", "eye" -> "brown"))))
 
     // Map - explode
     expDF = df.select($"name", $"knownLanguages", explode($"properties"))
     plan = expDF.queryExecution.executedPlan
-    assert(plan.find {
+    assert(plan.exists {
       case stage: WholeStageCodegenExec =>
-        stage.find(_.isInstanceOf[GenerateExec]).isDefined
+        stage.exists(_.isInstanceOf[GenerateExec])
       case _ => !codegenEnabled.toBoolean
-    }.isDefined)
+    })
     checkAnswer(expDF,
       Array(Row("James", List("Java", "Scala"), "hair", "black"),
         Row("James", List("Java", "Scala"), "eye", "brown")))
@@ -93,33 +93,33 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
     // Array - posexplode
     expDF = df.select($"name", posexplode($"knownLanguages"))
     plan = expDF.queryExecution.executedPlan
-    assert(plan.find {
+    assert(plan.exists {
       case stage: WholeStageCodegenExec =>
-        stage.find(_.isInstanceOf[GenerateExec]).isDefined
+        stage.exists(_.isInstanceOf[GenerateExec])
       case _ => !codegenEnabled.toBoolean
-    }.isDefined)
+    })
     checkAnswer(expDF,
       Array(Row("James", 0, "Java"), Row("James", 1, "Scala")))
 
     // Map - posexplode
     expDF = df.select($"name", posexplode($"properties"))
     plan = expDF.queryExecution.executedPlan
-    assert(plan.find {
+    assert(plan.exists {
       case stage: WholeStageCodegenExec =>
-        stage.find(_.isInstanceOf[GenerateExec]).isDefined
+        stage.exists(_.isInstanceOf[GenerateExec])
       case _ => !codegenEnabled.toBoolean
-    }.isDefined)
+    })
     checkAnswer(expDF,
       Array(Row("James", 0, "hair", "black"), Row("James", 1, "eye", "brown")))
 
     // Array - explode , selecting all columns
     expDF = df.select($"*", explode($"knownLanguages"))
     plan = expDF.queryExecution.executedPlan
-    assert(plan.find {
+    assert(plan.exists {
       case stage: WholeStageCodegenExec =>
-        stage.find(_.isInstanceOf[GenerateExec]).isDefined
+        stage.exists(_.isInstanceOf[GenerateExec])
       case _ => !codegenEnabled.toBoolean
-    }.isDefined)
+    })
     checkAnswer(expDF,
       Array(Row("James", Seq("Java", "Scala"), Map("hair" -> "black", "eye" -> "brown"), "Java"),
         Row("James", Seq("Java", "Scala"), Map("hair" -> "black", "eye" -> "brown"), "Scala")))
@@ -127,11 +127,11 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
     // Map - explode, selecting all columns
     expDF = df.select($"*", explode($"properties"))
     plan = expDF.queryExecution.executedPlan
-    assert(plan.find {
+    assert(plan.exists {
       case stage: WholeStageCodegenExec =>
-        stage.find(_.isInstanceOf[GenerateExec]).isDefined
+        stage.exists(_.isInstanceOf[GenerateExec])
       case _ => !codegenEnabled.toBoolean
-    }.isDefined)
+    })
     checkAnswer(expDF,
       Array(
         Row("James", List("Java", "Scala"),
@@ -143,9 +143,9 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
   test("HashAggregate with grouping keys should be included in WholeStageCodegen") {
     val df = spark.range(3).groupBy(col("id") * 2).count().orderBy(col("id") * 2)
     val plan = df.queryExecution.executedPlan
-    assert(plan.find(p =>
+    assert(plan.exists(p =>
       p.isInstanceOf[WholeStageCodegenExec] &&
-        p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[HashAggregateExec]).isDefined)
+        p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[HashAggregateExec]))
     assert(df.collect() === Array(Row(0, 1), Row(2, 1), Row(4, 1)))
   }
 
@@ -154,9 +154,9 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
     val schema = new StructType().add("k", IntegerType).add("v", StringType)
     val smallDF = spark.createDataFrame(rdd, schema)
     val df = spark.range(10).join(broadcast(smallDF), col("k") === col("id"))
-    assert(df.queryExecution.executedPlan.find(p =>
+    assert(df.queryExecution.executedPlan.exists(p =>
       p.isInstanceOf[WholeStageCodegenExec] &&
-        p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[BroadcastHashJoinExec]).isDefined)
+        p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[BroadcastHashJoinExec]))
     assert(df.collect() === Array(Row(1, 1, "1"), Row(1, 1, "1"), Row(2, 2, "2")))
   }
 
@@ -434,9 +434,9 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
   test("Sort should be included in WholeStageCodegen") {
     val df = spark.range(3, 0, -1).toDF().sort(col("id"))
     val plan = df.queryExecution.executedPlan
-    assert(plan.find(p =>
+    assert(plan.exists(p =>
       p.isInstanceOf[WholeStageCodegenExec] &&
-        p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[SortExec]).isDefined)
+        p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[SortExec]))
     assert(df.collect() === Array(Row(1), Row(2), Row(3)))
   }
 
@@ -445,27 +445,27 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
 
     val ds = spark.range(10).map(_.toString)
     val plan = ds.queryExecution.executedPlan
-    assert(plan.find(p =>
+    assert(plan.exists(p =>
       p.isInstanceOf[WholeStageCodegenExec] &&
-      p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[SerializeFromObjectExec]).isDefined)
+      p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[SerializeFromObjectExec]))
     assert(ds.collect() === 0.until(10).map(_.toString).toArray)
   }
 
   test("typed filter should be included in WholeStageCodegen") {
     val ds = spark.range(10).filter(_ % 2 == 0)
     val plan = ds.queryExecution.executedPlan
-    assert(plan.find(p =>
+    assert(plan.exists(p =>
       p.isInstanceOf[WholeStageCodegenExec] &&
-        p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[FilterExec]).isDefined)
+        p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[FilterExec]))
     assert(ds.collect() === Array(0, 2, 4, 6, 8))
   }
 
   test("back-to-back typed filter should be included in WholeStageCodegen") {
     val ds = spark.range(10).filter(_ % 2 == 0).filter(_ % 3 == 0)
     val plan = ds.queryExecution.executedPlan
-    assert(plan.find(p =>
+    assert(plan.exists(p =>
       p.isInstanceOf[WholeStageCodegenExec] &&
-      p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[FilterExec]).isDefined)
+      p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[FilterExec]))
     assert(ds.collect() === Array(0, 6))
   }
 
@@ -517,10 +517,10 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
         .select("int")
 
       val plan = df.queryExecution.executedPlan
-      assert(plan.find(p =>
+      assert(!plan.exists(p =>
         p.isInstanceOf[WholeStageCodegenExec] &&
           p.asInstanceOf[WholeStageCodegenExec].child.children(0)
-            .isInstanceOf[SortMergeJoinExec]).isEmpty)
+            .isInstanceOf[SortMergeJoinExec]))
       assert(df.collect() === Array(Row(1), Row(2)))
     }
   }
@@ -639,9 +639,9 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
       val df = spark.range(100)
       val join = df.join(df, "id")
       val plan = join.queryExecution.executedPlan
-      assert(plan.find(p =>
+      assert(!plan.exists(p =>
         p.isInstanceOf[WholeStageCodegenExec] &&
-          p.asInstanceOf[WholeStageCodegenExec].codegenStageId == 0).isEmpty,
+          p.asInstanceOf[WholeStageCodegenExec].codegenStageId == 0),
         "codegen stage IDs should be preserved through ReuseExchange")
       checkAnswer(join, df.toDF)
     }
@@ -740,11 +740,11 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
     // HashAggregateExec supports WholeStageCodegen and it's the parent of
     // LocalTableScanExec so LocalTableScanExec should be within a WholeStageCodegen domain.
     assert(
-      executedPlan.find {
+      executedPlan.exists {
         case WholeStageCodegenExec(
           HashAggregateExec(_, _, _, _, _, _, _: LocalTableScanExec)) => true
         case _ => false
-      }.isDefined,
+      },
       "LocalTableScanExec should be within a WholeStageCodegen domain.")
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala
index 849c41307245e..787fdc7b59d67 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala
@@ -44,7 +44,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
     val dim = broadcast(spark.range(M).selectExpr("id as k", "cast(id as string) as v"))
     codegenBenchmark("Join w long", N) {
       val df = spark.range(N).join(dim, (col("id") % M) === col("k"))
-      assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined)
+      assert(df.queryExecution.sparkPlan.exists(_.isInstanceOf[BroadcastHashJoinExec]))
       df.noop()
     }
   }
@@ -55,7 +55,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
     val dim = broadcast(spark.range(M).selectExpr("cast(id/10 as long) as k"))
     codegenBenchmark("Join w long duplicated", N) {
       val df = spark.range(N).join(dim, (col("id") % M) === col("k"))
-      assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined)
+      assert(df.queryExecution.sparkPlan.exists(_.isInstanceOf[BroadcastHashJoinExec]))
       df.noop()
     }
   }
@@ -70,7 +70,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
       val df = spark.range(N).join(dim2,
         (col("id") % M).cast(IntegerType) === col("k1")
           && (col("id") % M).cast(IntegerType) === col("k2"))
-      assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined)
+      assert(df.queryExecution.sparkPlan.exists(_.isInstanceOf[BroadcastHashJoinExec]))
       df.noop()
     }
   }
@@ -84,7 +84,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
     codegenBenchmark("Join w 2 longs", N) {
       val df = spark.range(N).join(dim3,
         (col("id") % M) === col("k1") && (col("id") % M) === col("k2"))
-      assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined)
+      assert(df.queryExecution.sparkPlan.exists(_.isInstanceOf[BroadcastHashJoinExec]))
       df.noop()
     }
   }
@@ -98,7 +98,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
     codegenBenchmark("Join w 2 longs duplicated", N) {
       val df = spark.range(N).join(dim4,
         (col("id") bitwiseAND M) === col("k1") && (col("id") bitwiseAND M) === col("k2"))
-      assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined)
+      assert(df.queryExecution.sparkPlan.exists(_.isInstanceOf[BroadcastHashJoinExec]))
       df.noop()
     }
   }
@@ -109,7 +109,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
     val dim = broadcast(spark.range(M).selectExpr("id as k", "cast(id as string) as v"))
     codegenBenchmark("outer join w long", N) {
       val df = spark.range(N).join(dim, (col("id") % M) === col("k"), "left")
-      assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined)
+      assert(df.queryExecution.sparkPlan.exists(_.isInstanceOf[BroadcastHashJoinExec]))
       df.noop()
     }
   }
@@ -120,7 +120,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
     val dim = broadcast(spark.range(M).selectExpr("id as k", "cast(id as string) as v"))
     codegenBenchmark("semi join w long", N) {
       val df = spark.range(N).join(dim, (col("id") % M) === col("k"), "leftsemi")
-      assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined)
+      assert(df.queryExecution.sparkPlan.exists(_.isInstanceOf[BroadcastHashJoinExec]))
       df.noop()
     }
   }
@@ -131,7 +131,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
       val df1 = spark.range(N).selectExpr(s"id * 2 as k1")
       val df2 = spark.range(N).selectExpr(s"id * 3 as k2")
       val df = df1.join(df2, col("k1") === col("k2"))
-      assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[SortMergeJoinExec]).isDefined)
+      assert(df.queryExecution.sparkPlan.exists(_.isInstanceOf[SortMergeJoinExec]))
       df.noop()
     }
   }
@@ -144,7 +144,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
       val df2 = spark.range(N)
         .selectExpr(s"(id * 15485867) % ${N*10} as k2")
       val df = df1.join(df2, col("k1") === col("k2"))
-      assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[SortMergeJoinExec]).isDefined)
+      assert(df.queryExecution.sparkPlan.exists(_.isInstanceOf[SortMergeJoinExec]))
       df.noop()
     }
   }
@@ -159,7 +159,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
         val df1 = spark.range(N).selectExpr(s"id as k1")
         val df2 = spark.range(N / 3).selectExpr(s"id * 3 as k2")
         val df = df1.join(df2, col("k1") === col("k2"))
-        assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[ShuffledHashJoinExec]).isDefined)
+        assert(df.queryExecution.sparkPlan.exists(_.isInstanceOf[ShuffledHashJoinExec]))
         df.noop()
       }
     }
@@ -172,8 +172,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
     val dim = broadcast(spark.range(M).selectExpr("id as k", "cast(id as string) as v"))
     codegenBenchmark("broadcast nested loop join", N) {
       val df = spark.range(N).join(dim)
-      assert(df.queryExecution.sparkPlan.find(
-        _.isInstanceOf[BroadcastNestedLoopJoinExec]).isDefined)
+      assert(df.queryExecution.sparkPlan.exists(_.isInstanceOf[BroadcastNestedLoopJoinExec]))
       df.noop()
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 132b8f9be9dcc..0897ad2ff3009 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -1353,12 +1353,12 @@ abstract class JsonSuite
   }
 
   test("Dataset toJSON doesn't construct rdd") {
-    val containsRDD = spark.emptyDataFrame.toJSON.queryExecution.logical.find {
+    val containsRDDExists = spark.emptyDataFrame.toJSON.queryExecution.logical.exists {
       case ExternalRDD(_, _) => true
       case _ => false
     }
 
-    assert(containsRDD.isEmpty, "Expected logical plan of toJSON to not contain an RDD")
+    assert(!containsRDDExists, "Expected logical plan of toJSON to not contain an RDD")
   }
 
   test("JSONRelation equality test") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
index 280a88091089b..7f3809dd044f4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
@@ -734,10 +734,10 @@ abstract class OrcQuerySuite extends OrcQueryTest with SharedSparkSession {
 
       withSQLConf(SQLConf.ORC_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> "true") {
         val readDf = spark.read.orc(path)
-        val vectorizationEnabled = readDf.queryExecution.executedPlan.find {
+        val vectorizationEnabled = readDf.queryExecution.executedPlan.exists {
           case scan @ (_: FileSourceScanExec | _: BatchScanExec) => scan.supportsColumnar
           case _ => false
-        }.isDefined
+        }
         assert(vectorizationEnabled)
         checkAnswer(readDf, df)
       }
@@ -756,10 +756,10 @@ abstract class OrcQuerySuite extends OrcQueryTest with SharedSparkSession {
 
       withSQLConf(SQLConf.ORC_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> "true") {
         val readDf = spark.read.orc(path)
-        val vectorizationEnabled = readDf.queryExecution.executedPlan.find {
+        val vectorizationEnabled = readDf.queryExecution.executedPlan.exists {
           case scan @ (_: FileSourceScanExec | _: BatchScanExec) => scan.supportsColumnar
           case _ => false
-        }.isDefined
+        }
         assert(vectorizationEnabled)
         checkAnswer(readDf, df)
       }
@@ -783,10 +783,10 @@ abstract class OrcQuerySuite extends OrcQueryTest with SharedSparkSession {
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> "true",
             SQLConf.WHOLESTAGE_MAX_NUM_FIELDS.key -> maxNumFields) {
             val scanPlan = spark.read.orc(path).queryExecution.executedPlan
-            assert(scanPlan.find {
+            assert(scanPlan.exists {
               case scan @ (_: FileSourceScanExec | _: BatchScanExec) => scan.supportsColumnar
               case _ => false
-            }.isDefined == vectorizedEnabled)
+            } == vectorizedEnabled)
           }
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSinkSuite.scala
index a0bd0fb582ca2..ce98e2e6a5bb6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSinkSuite.scala
@@ -160,9 +160,9 @@ class ForeachBatchSinkSuite extends StreamTest {
       var planAsserted = false
 
       val writer: (Dataset[T], Long) => Unit = { case (df, _) =>
-        assert(df.queryExecution.executedPlan.find { p =>
+        assert(!df.queryExecution.executedPlan.exists { p =>
           p.isInstanceOf[SerializeFromObjectExec]
-        }.isEmpty, "Untyped Dataset should not introduce serialization on object!")
+        }, "Untyped Dataset should not introduce serialization on object!")
         planAsserted = true
       }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index f4de713d04fc0..18039db2ca744 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -463,18 +463,18 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
 
         // check existence of shuffle
         assert(
-          joinOperator.left.find(_.isInstanceOf[ShuffleExchangeExec]).isDefined == shuffleLeft,
+          joinOperator.left.exists(_.isInstanceOf[ShuffleExchangeExec]) == shuffleLeft,
           s"expected shuffle in plan to be $shuffleLeft but found\n${joinOperator.left}")
         assert(
-          joinOperator.right.find(_.isInstanceOf[ShuffleExchangeExec]).isDefined == shuffleRight,
+          joinOperator.right.exists(_.isInstanceOf[ShuffleExchangeExec]) == shuffleRight,
           s"expected shuffle in plan to be $shuffleRight but found\n${joinOperator.right}")
 
         // check existence of sort
         assert(
-          joinOperator.left.find(_.isInstanceOf[SortExec]).isDefined == sortLeft,
+          joinOperator.left.exists(_.isInstanceOf[SortExec]) == sortLeft,
           s"expected sort in the left child to be $sortLeft but found\n${joinOperator.left}")
         assert(
-          joinOperator.right.find(_.isInstanceOf[SortExec]).isDefined == sortRight,
+          joinOperator.right.exists(_.isInstanceOf[SortExec]) == sortRight,
           s"expected sort in the right child to be $sortRight but found\n${joinOperator.right}")
 
         // check the output partitioning
@@ -678,7 +678,7 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
         df1.groupBy("i", "j").agg(max("k")).sort("i", "j"))
 
       assert(
-        aggregated.queryExecution.executedPlan.find(_.isInstanceOf[ShuffleExchangeExec]).isEmpty)
+        !aggregated.queryExecution.executedPlan.exists(_.isInstanceOf[ShuffleExchangeExec]))
     }
   }
 
@@ -719,7 +719,7 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
         df1.groupBy("i", "j").agg(max("k")).sort("i", "j"))
 
       assert(
-        aggregated.queryExecution.executedPlan.find(_.isInstanceOf[ShuffleExchangeExec]).isEmpty)
+        !aggregated.queryExecution.executedPlan.exists(_.isInstanceOf[ShuffleExchangeExec]))
     }
   }
 

From 0a4a12dfcb6996859e18115193e0c59bb22f9c97 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Thu, 10 Mar 2022 21:58:09 +0800
Subject: [PATCH 459/513] [SPARK-38490][SQL][INFRA] Add Github action test job
 for ANSI SQL mode

### What changes were proposed in this pull request?

Add Github action test job for ANSI SQL mode. It will be triggered after each commit push on the master branch of apache/spark.
<img width="1268" alt="image" src="https://user-images.githubusercontent.com/1097932/157591269-ea5a4929-b157-4139-af71-b28e41eea97c.png">

To make the implementation easy, ANSI SQL mode becomes enabled if ENV variable `SPARK_ANSI_SQL_MODE` is set as `true`

### Why are the changes needed?

Testing the ANSI SQL mode on each commit push, so that we can find issues in time.

### Does this PR introduce _any_ user-facing change?

Yes, ANSI SQL mode becomes enabled if ENV variable `SPARK_ANSI_SQL_MODE` is set as `true`. This won't be in documentation since it is for testing purpose.

### How was this patch tested?

Manually try on my repo: https://github.com/gengliangwang/spark/actions/runs/1961176426

Closes #35797 from gengliangwang/ansiGAJob.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .github/workflows/ansi_sql_mode_test.yml      | 34 +++++++++++++++++++
 .github/workflows/build_and_test.yml          | 11 +++++-
 .../apache/spark/sql/internal/SQLConf.scala   |  2 +-
 3 files changed, 45 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/ansi_sql_mode_test.yml

diff --git a/.github/workflows/ansi_sql_mode_test.yml b/.github/workflows/ansi_sql_mode_test.yml
new file mode 100644
index 0000000000000..e68b04b5420f0
--- /dev/null
+++ b/.github/workflows/ansi_sql_mode_test.yml
@@ -0,0 +1,34 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: ANSI SQL mode test
+
+on:
+  push:
+    branches:
+      - master
+
+jobs:
+  ansi_sql_test:
+    uses: ./.github/workflows/build_and_test.yml
+    if: github.repository == 'apache/spark'
+    with:
+      ansi_enabled: true
+
+
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 67f57218d316b..fbd5db251334d 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -37,6 +37,12 @@ on:
     - cron: '0 13 * * *'
     # Java 17
     - cron: '0 16 * * *'
+  workflow_call:
+    inputs:
+      ansi_enabled:
+        required: false
+        type: boolean
+        default: false
 
 jobs:
   configure-jobs:
@@ -92,7 +98,7 @@ jobs:
           echo '::set-output name=java::8'
           echo '::set-output name=branch::master' # Default branch to run on. CHANGE here when a branch is cut out.
           echo '::set-output name=type::regular'
-          echo '::set-output name=envs::{}'
+          echo '::set-output name=envs::{"SPARK_ANSI_SQL_MODE": "${{ inputs.ansi_enabled }}"}'
           echo '::set-output name=hadoop::hadoop3'
         fi
 
@@ -311,6 +317,7 @@ jobs:
       SKIP_UNIDOC: true
       SKIP_MIMA: true
       METASPACE_SIZE: 1g
+      SPARK_ANSI_SQL_MODE: ${{ inputs.ansi_enabled }}
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
@@ -398,6 +405,7 @@ jobs:
       GITHUB_PREV_SHA: ${{ github.event.before }}
       SPARK_LOCAL_IP: localhost
       SKIP_MIMA: true
+      SPARK_ANSI_SQL_MODE: ${{ inputs.ansi_enabled }}
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
@@ -671,6 +679,7 @@ jobs:
     runs-on: ubuntu-20.04
     env:
       SPARK_LOCAL_IP: localhost
+      SPARK_ANSI_SQL_MODE: ${{ inputs.ansi_enabled }}
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index c7aec8e023b22..edb388e219877 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2706,7 +2706,7 @@ object SQLConf {
       "standard directly, but their behaviors align with ANSI SQL's style")
     .version("3.0.0")
     .booleanConf
-    .createWithDefault(false)
+    .createWithDefault(sys.env.get("SPARK_ANSI_SQL_MODE").contains("true"))
 
   val ENFORCE_RESERVED_KEYWORDS = buildConf("spark.sql.ansi.enforceReservedKeywords")
     .doc(s"When true and '${ANSI_ENABLED.key}' is true, the Spark SQL parser enforces the ANSI " +

From a26c01d85035b487e2048f1c106b14b89455e2d9 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Fri, 11 Mar 2022 00:02:49 +0800
Subject: [PATCH 460/513] [SPARK-38451][R][TESTS] Fix `make_date` test case to
 pass with ANSI mode

### What changes were proposed in this pull request?

This PR proposes to fix `expose make_date expression in R` test case to pass with ANSI mode enabled by excluding data that throws an exception when ANSI mode is enabled.

### Why are the changes needed?

To make the tests independent from configurations, and to improve test coverage (ANSI mode will be tested with GitHub Actions).

### Does this PR introduce _any_ user-facing change?

No, test-only.

### How was this patch tested?

CI in this PR should test it out.

Closes #35798 from HyukjinKwon/SPARK-38451.

Lead-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 R/pkg/tests/fulltests/test_sparkSQL.R | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 73b9dcc0a5728..df1094bacef64 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -2051,13 +2051,19 @@ test_that("date functions on a DataFrame", {
 })
 
 test_that("SPARK-37108: expose make_date expression in R", {
+  ansiEnabled <- sparkR.conf("spark.sql.ansi.enabled")[[1]] == "true"
   df <- createDataFrame(
-    list(list(2021, 10, 22), list(2021, 13, 1),
-         list(2021, 2, 29), list(2020, 2, 29)),
+    c(
+      list(list(2021, 10, 22), list(2020, 2, 29)),
+      if (ansiEnabled) list() else list(list(2021, 13, 1), list(2021, 2, 29))
+    ),
     list("year", "month", "day")
   )
   expect <- createDataFrame(
-    list(list(as.Date("2021-10-22")), NA, NA, list(as.Date("2020-02-29"))),
+    c(
+      list(list(as.Date("2021-10-22")), list(as.Date("2020-02-29"))),
+      if (ansiEnabled) list() else list(NA, NA)
+    ),
     list("make_date(year, month, day)")
   )
   actual <- select(df, make_date(df$year, df$month, df$day))

From 024d03efd7ce17c22bf6f333614f0da516237ae3 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Fri, 11 Mar 2022 01:27:14 +0800
Subject: [PATCH 461/513] [SPARK-38501][SQL] Fix thriftserver test failures
 under ANSI mode

### What changes were proposed in this pull request?

Fix thriftserver test failures under ANSI mode:

- CliSuite
- SparkThriftServerProtocolVersionsSuite
- ThriftServerWithSparkContextSuite

### Why are the changes needed?

To set up a new GA test job with ANSI mode on

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manually turn on ANSI mode and test .
Also it should pass GA tests.

Closes #35802 from gengliangwang/fixHiveTH.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../sql/hive/thriftserver/CliSuite.scala      | 16 +++++++--------
 ...arkThriftServerProtocolVersionsSuite.scala |  9 ++++++---
 .../ThriftServerWithSparkContextSuite.scala   | 20 +++++++------------
 3 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 74288ca0bc170..2f0fd858ba206 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -555,22 +555,22 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
     )
   }
 
-  test("AnalysisException with root cause will be printStacktrace") {
+  test("SparkException with root cause will be printStacktrace") {
     // If it is not in silent mode, will print the stacktrace
     runCliWithin(
       1.minute,
       extraArgs = Seq("--hiveconf", "hive.session.silent=false",
-        "-e", "select date_sub(date'2011-11-11', '1.2');"),
-      errorResponses = Seq("NumberFormatException"))(
-      ("", "Error in query: The second argument of 'date_sub' function needs to be an integer."),
-      ("", "NumberFormatException: invalid input syntax for type numeric: 1.2"))
+        "-e", "select from_json('a', 'a INT', map('mode', 'FAILFAST'));"),
+      errorResponses = Seq("JsonParseException"))(
+      ("", "SparkException: Malformed records are detected in record parsing"),
+      ("", "JsonParseException: Unrecognized token 'a'"))
     // If it is in silent mode, will print the error message only
     runCliWithin(
       1.minute,
       extraArgs = Seq("--conf", "spark.hive.session.silent=true",
-        "-e", "select date_sub(date'2011-11-11', '1.2');"),
-      errorResponses = Seq("AnalysisException"))(
-      ("", "Error in query: The second argument of 'date_sub' function needs to be an integer."))
+        "-e", "select from_json('a', 'a INT', map('mode', 'FAILFAST'));"),
+      errorResponses = Seq("SparkException"))(
+      ("", "SparkException: Malformed records are detected in record parsing"))
   }
 
   test("SPARK-30808: use Java 8 time API in Thrift SQL CLI by default") {
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
index 851b8e48684de..daf410556f5b8 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
@@ -30,6 +30,7 @@ import org.apache.thrift.protocol.TBinaryProtocol
 import org.apache.thrift.transport.TSocket
 
 import org.apache.spark.sql.catalyst.util.NumberConverter
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.unsafe.types.UTF8String
 
 class SparkThriftServerProtocolVersionsSuite extends HiveThriftServer2TestBase {
@@ -298,9 +299,11 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftServer2TestBase {
         assert(metaData.getPrecision(1) === Int.MaxValue)
         assert(metaData.getScale(1) === 0)
       }
-      testExecuteStatementWithProtocolVersion(version, "SELECT cast(49960 as binary)") { rs =>
-        assert(rs.next())
-        assert(rs.getString(1) === UTF8String.fromBytes(NumberConverter.toBinary(49960)).toString)
+      if (!SQLConf.get.ansiEnabled) {
+        testExecuteStatementWithProtocolVersion(version, "SELECT cast(49960 as binary)") { rs =>
+          assert(rs.next())
+          assert(rs.getString(1) === UTF8String.fromBytes(NumberConverter.toBinary(49960)).toString)
+        }
       }
       testExecuteStatementWithProtocolVersion(version, "SELECT cast(null as binary)") { rs =>
         assert(rs.next())
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
index ad527a2571898..b5cfa04bab581 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
@@ -56,7 +56,7 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer {
   }
 
   test("Full stack traces as error message for jdbc or thrift client") {
-    val sql = "select date_sub(date'2011-11-11', '1.2')"
+    val sql = "select from_json('a', 'a INT', map('mode', 'FAILFAST'))"
     withCLIServiceClient() { client =>
       val sessionHandle = client.openSession(user, "")
 
@@ -67,24 +67,18 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer {
           sql,
           confOverlay)
       }
-
-      assert(e.getMessage
-        .contains("The second argument of 'date_sub' function needs to be an integer."))
-      assert(!e.getMessage.contains("" +
-        "java.lang.NumberFormatException: invalid input syntax for type numeric: 1.2"))
-      assert(e.getSQLState == "22023")
+      assert(e.getMessage.contains("JsonParseException: Unrecognized token 'a'"))
+      assert(!e.getMessage.contains(
+        "SparkException: Malformed records are detected in record parsing"))
     }
 
     withJdbcStatement { statement =>
       val e = intercept[SQLException] {
         statement.executeQuery(sql)
       }
-      assert(e.getMessage
-        .contains("The second argument of 'date_sub' function needs to be an integer."))
-      assert(e.getMessage.contains("[SECOND_FUNCTION_ARGUMENT_NOT_INTEGER]"))
-      assert(e.getMessage.contains("" +
-        "java.lang.NumberFormatException: invalid input syntax for type numeric: 1.2"))
-      assert(e.getSQLState == "22023")
+      assert(e.getMessage.contains("JsonParseException: Unrecognized token 'a'"))
+      assert(e.getMessage.contains(
+        "SparkException: Malformed records are detected in record parsing"))
     }
   }
 

From f8521005b94aea7bec77c6679cc25d9c3df00f72 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 10 Mar 2022 15:21:36 -0800
Subject: [PATCH 462/513] [SPARK-38513][K8S] Move custom scheduler-specific
 configs to under `spark.kubernetes.scheduler.NAME` prefix

### What changes were proposed in this pull request?

This PR aims to move custom scheduler configs to under its name prefix, `spark.kubernetes.scheduler.NAME`.

Since this PR moved `spark.kubernetes.executor.podGroupTemplateFile` to `spark.kubernetes.scheduler.volcano.executor.podGroupTemplateFile`. We can delete it because there is no plan for `volcano` scheduler to use executor pod group template at Apache Spark 3.3.x.

For the other custom schedulers, they still can use `spark.kubernetes.scheduler.XXX.executor.podGroupTemplateFile`.

### Why are the changes needed?

To support multiple customer schedulers in an isolated manner, we need to isolate configurations from Spark's configuration.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs and K8s IT.

Closes #35809 from dongjoon-hyun/SPARK-38513.

Lead-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Co-authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/deploy/k8s/Config.scala  | 14 -------------
 .../k8s/features/VolcanoFeatureStep.scala     | 16 +++++++-------
 .../features/VolcanoFeatureStepSuite.scala    | 21 +------------------
 .../integrationtest/VolcanoTestsSuite.scala   |  4 ++--
 4 files changed, 10 insertions(+), 45 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index ff17ef51fe630..7930cd0ce1563 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -292,20 +292,6 @@ private[spark] object Config extends Logging {
       .stringConf
       .createOptional
 
-  val KUBERNETES_DRIVER_PODGROUP_TEMPLATE_FILE =
-    ConfigBuilder("spark.kubernetes.driver.podGroupTemplateFile")
-      .doc("File containing a template pod group spec for driver")
-      .version("3.3.0")
-      .stringConf
-      .createOptional
-
-  val KUBERNETES_EXECUTOR_PODGROUP_TEMPLATE_FILE =
-    ConfigBuilder("spark.kubernetes.executor.podGroupTemplateFile")
-      .doc("File containing a template pod group spec for executors")
-      .version("3.3.0")
-      .stringConf
-      .createOptional
-
   val KUBERNETES_EXECUTOR_REQUEST_CORES =
     ConfigBuilder("spark.kubernetes.executor.request.cores")
       .doc("Specify the cpu request for each executor pod")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
index 393edd2871ea0..9b37789d6c370 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
@@ -21,15 +21,13 @@ import io.fabric8.volcano.client.DefaultVolcanoClient
 import io.fabric8.volcano.scheduling.v1beta1.{PodGroup, PodGroupSpec}
 
 import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverConf, KubernetesExecutorConf, SparkPod}
-import org.apache.spark.deploy.k8s.Config._
 
 private[spark] class VolcanoFeatureStep extends KubernetesDriverCustomFeatureConfigStep
   with KubernetesExecutorCustomFeatureConfigStep {
+  import VolcanoFeatureStep._
 
   private var kubernetesConf: KubernetesConf = _
 
-  private val POD_GROUP_ANNOTATION = "scheduling.k8s.io/group-name"
-
   private lazy val podGroupName = s"${kubernetesConf.appId}-podgroup"
   private lazy val namespace = kubernetesConf.namespace
   private var priorityClassName: Option[String] = None
@@ -44,12 +42,7 @@ private[spark] class VolcanoFeatureStep extends KubernetesDriverCustomFeatureCon
 
   override def getAdditionalPreKubernetesResources(): Seq[HasMetadata] = {
     val client = new DefaultVolcanoClient
-
-    val template = if (kubernetesConf.isInstanceOf[KubernetesDriverConf]) {
-      kubernetesConf.get(KUBERNETES_DRIVER_PODGROUP_TEMPLATE_FILE)
-    } else {
-      kubernetesConf.get(KUBERNETES_EXECUTOR_PODGROUP_TEMPLATE_FILE)
-    }
+    val template = kubernetesConf.getOption(POD_GROUP_TEMPLATE_FILE_KEY)
     val pg = template.map(client.podGroups.load(_).get).getOrElse(new PodGroup())
     var metadata = pg.getMetadata
     if (metadata == null) metadata = new ObjectMeta
@@ -77,3 +70,8 @@ private[spark] class VolcanoFeatureStep extends KubernetesDriverCustomFeatureCon
     SparkPod(k8sPod, pod.container)
   }
 }
+
+private[spark] object VolcanoFeatureStep {
+  val POD_GROUP_ANNOTATION = "scheduling.k8s.io/group-name"
+  val POD_GROUP_TEMPLATE_FILE_KEY = "spark.kubernetes.scheduler.volcano.podGroupTemplateFile"
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
index 9f6bedb17626d..1b24e867e21e9 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
@@ -23,7 +23,6 @@ import io.fabric8.volcano.scheduling.v1beta1.PodGroup
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s._
-import org.apache.spark.deploy.k8s.Config._
 
 class VolcanoFeatureStepSuite extends SparkFunSuite {
 
@@ -74,7 +73,7 @@ class VolcanoFeatureStepSuite extends SparkFunSuite {
     val templatePath = new File(
       getClass.getResource("/driver-podgroup-template.yml").getFile).getAbsolutePath
     val sparkConf = new SparkConf()
-      .set(KUBERNETES_DRIVER_PODGROUP_TEMPLATE_FILE.key, templatePath)
+      .set(VolcanoFeatureStep.POD_GROUP_TEMPLATE_FILE_KEY, templatePath)
     val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf)
     val step = new VolcanoFeatureStep()
     step.init(kubernetesConf)
@@ -88,24 +87,6 @@ class VolcanoFeatureStepSuite extends SparkFunSuite {
     assert(podGroup.getSpec.getQueue == "driver-queue")
   }
 
-  test("SPARK-38455: Support executor podgroup template") {
-    val templatePath = new File(
-      getClass.getResource("/executor-podgroup-template.yml").getFile).getAbsolutePath
-    val sparkConf = new SparkConf()
-      .set(KUBERNETES_EXECUTOR_PODGROUP_TEMPLATE_FILE.key, templatePath)
-    val kubernetesConf = KubernetesTestConf.createExecutorConf(sparkConf)
-    val step = new VolcanoFeatureStep()
-    step.init(kubernetesConf)
-    step.configurePod(SparkPod.initialPod())
-    val podGroup = step.getAdditionalPreKubernetesResources().head.asInstanceOf[PodGroup]
-    assert(podGroup.getSpec.getMinMember == 1000)
-    assert(podGroup.getSpec.getMinResources.get("cpu").getAmount == "4")
-    assert(podGroup.getSpec.getMinResources.get("memory").getAmount == "16")
-    assert(podGroup.getSpec.getMinResources.get("memory").getFormat == "Gi")
-    assert(podGroup.getSpec.getPriorityClassName == "executor-priority")
-    assert(podGroup.getSpec.getQueue == "executor-queue")
-  }
-
   private def verifyPriority(pod: SparkPod): Unit = {
     val sparkConf = new SparkConf()
     val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf)
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
index 85c8497dea6f7..eab7993e252b3 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
@@ -214,12 +214,12 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
       .set(KUBERNETES_DRIVER_POD_FEATURE_STEPS.key, VOLCANO_FEATURE_STEP)
       .set(KUBERNETES_EXECUTOR_POD_FEATURE_STEPS.key, VOLCANO_FEATURE_STEP)
     queue.foreach { q =>
-      conf.set(KUBERNETES_DRIVER_PODGROUP_TEMPLATE_FILE.key,
+      conf.set(VolcanoFeatureStep.POD_GROUP_TEMPLATE_FILE_KEY,
         new File(
           getClass.getResource(s"/volcano/$q-driver-podgroup-template.yml").getFile
         ).getAbsolutePath)
     }
-    driverPodGroupTemplate.foreach(conf.set(KUBERNETES_DRIVER_PODGROUP_TEMPLATE_FILE.key, _))
+    driverPodGroupTemplate.foreach(conf.set(VolcanoFeatureStep.POD_GROUP_TEMPLATE_FILE_KEY, _))
     groupLoc.foreach { locator =>
       conf.set(s"${KUBERNETES_DRIVER_LABEL_PREFIX}spark-group-locator", locator)
       conf.set(s"${KUBERNETES_EXECUTOR_LABEL_PREFIX}spark-group-locator", locator)

From 2239e9d173e7f93d2d66e36f3b274ef1fc6f6d58 Mon Sep 17 00:00:00 2001
From: Brian Fallik <bfallik@gmail.com>
Date: Fri, 11 Mar 2022 09:30:10 +0900
Subject: [PATCH 463/513] [MINOR][DOCS] Fix minor typos at nulls_option in
 Window Functions

### What changes were proposed in this pull request?

Fix a typo: `RESECT` -> `RESPECT`.

### Why are the changes needed?

`RESECT` isn't a valid term here

### Does this PR introduce _any_ user-facing change?
Yes, the typos in the docs are corrected.

### How was this patch tested?
inspection via the Github UI

Closes #35774 from bfallik/patch-1.

Authored-by: Brian Fallik <bfallik@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/sql-ref-syntax-qry-select-window.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sql-ref-syntax-qry-select-window.md b/docs/sql-ref-syntax-qry-select-window.md
index 9fbebcf407933..3615252895592 100644
--- a/docs/sql-ref-syntax-qry-select-window.md
+++ b/docs/sql-ref-syntax-qry-select-window.md
@@ -52,7 +52,7 @@ window_function [ nulls_option ] OVER
 
 * **nulls_option**
 
-    Specifies whether or not to skip null values when evaluating the window function. `RESECT NULLS` means not skipping null values, while `IGNORE NULLS` means skipping. If not specified, the default is `RESECT NULLS`.
+    Specifies whether or not to skip null values when evaluating the window function. `RESPECT NULLS` means not skipping null values, while `IGNORE NULLS` means skipping. If not specified, the default is `RESPECT NULLS`.
 
     **Syntax:**
 

From 54abb85aa303b9ccca9a127b45f53e2acc5de438 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Fri, 11 Mar 2022 11:34:42 +0900
Subject: [PATCH 464/513] [SPARK-38517][INFRA] Fix PySpark documentation
 generation (missing ipython_genutils)

### What changes were proposed in this pull request?

Currently, Python documentation build fails as below:

```
Extension error:
Could not import extension nbsphinx (exception: No module named 'ipython_genutils')
make: *** [Makefile:35: html] Error 2
                    ------------------------------------------------
      Jekyll 4.2.1   Please append `--trace` to the `build` command
                     for any additional information or backtrace.
                    ------------------------------------------------
/__w/spark/spark/docs/_plugins/copy_api_dirs.rb:130:in `<top (required)>': Python doc generation failed (RuntimeError)
	from /__w/spark/spark/docs/.local_ruby_bundle/ruby/2.7.0/gems/jekyll-4.2.1/lib/jekyll/external.rb:60:in `require'
	from /__w/spark/spark/docs/.local_ruby_bundle/ruby/2.7.0/gems/jekyll-4.2.1/lib/jekyll/external.rb:60:in `block in require_with_graceful_fail'
	from /__w/spark/spark/docs/.local_ruby_bundle/ruby/2.7.0/gems/jekyll-4.2.1/lib/jekyll/external.rb:57:in `each'
	from /__w/spark/spark/docs/.local_ruby_bundle/ruby/2.7.0/gems/jekyll-4.2.1/lib/jekyll/external.rb:57:in `require_with_graceful_fail'
	from /__w/spark/spark/docs/.local_ruby_bundle/ruby/2.7.0/gems/jekyll-4.2.1/lib/jekyll/plugin_manager.rb:89:in `block in require_plugin_files'
	from /__w/spark/spark/docs/.local_ruby_bundle/ruby/2.7.0/gems/jekyll-4.2.1/lib/jekyll/plugin_manager.rb:87:in `each'
	from /__w/spark/spark/docs/.local_ruby_bundle/ruby/2.7.0/gems/jekyll-4.2.1/lib/jekyll/plugin_manager.rb:87:in `require_plugin_files'
	from /__w/spark/spark/docs/.local_ruby_bundle/ruby/2.7.0/gems/jekyll-4.2.1/lib/jekyll/plugin_manager.rb:21:in `conscientious_require'
	from /__w/spark/spark/docs/.local_ruby_bundle/ruby/2.7.0/gems/jekyll-4.2.1/lib/jekyll/site.rb:131:in `setup'
```

https://github.com/apache/spark/runs/5504729423?check_suite_focus=true

This PR proposes to simply install `ipython_genutils` to fix up the build for now.

### Why are the changes needed?

To fix the broken build.

### Does this PR introduce _any_ user-facing change?

No, deb-only.

### How was this patch tested?

CI in this PR should test it out.

Closes #35812 from HyukjinKwon/SPARK-38517.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .github/workflows/build_and_test.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index fbd5db251334d..ebe17b5963f20 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -540,6 +540,7 @@ jobs:
         # Pin the MarkupSafe to 2.0.1 to resolve the CI error.
         #   See also https://issues.apache.org/jira/browse/SPARK-38279.
         python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme ipython nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1'
+        python3.9 -m pip install ipython_genutils # See SPARK-38517
         python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 
         apt-get update -y
         apt-get install -y ruby ruby-dev

From aec70e802e0ff530c63f2f62eb19b29ec2c4ed35 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 10 Mar 2022 19:01:09 -0800
Subject: [PATCH 465/513] [SPARK-38511][K8S] Remove `priorityClassName`
 propagation in favor of explicit settings

### What changes were proposed in this pull request?

This PR aims to remove implicit `priorityClassName` propagation from driver pod to podgroup in favor of explicit settings via Driver Pod templates and PodGroup templates.

### Why are the changes needed?

Currently, Apache Spark supports different `priorityClassName` for driver and executors. In Apache Spark, the most complex case is that the users use three priorityClassNames for (1) driver pod, (2) executor pod, and (3) podgroup.

This PR allows users define and use different sets for pod priority and pod group in case of https://github.com/apache/spark/pull/35786#discussion_r823745191 .

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CI and K8s ITs.

```
[info] KubernetesSuite:
[info] VolcanoSuite:
[info] - Run SparkPi with volcano scheduler (9 seconds, 975 milliseconds)
[info] - SPARK-38187: Run SparkPi Jobs with minCPU (28 seconds, 508 milliseconds)
[info] - SPARK-38187: Run SparkPi Jobs with minMemory (27 seconds, 459 milliseconds)
[info] - SPARK-38188: Run SparkPi jobs with 2 queues (only 1 enabled) (13 seconds, 285 milliseconds)
[info] - SPARK-38188: Run SparkPi jobs with 2 queues (all enabled) (21 seconds, 344 milliseconds)
[info] - SPARK-38423: Run driver job to validate priority order (15 seconds, 382 milliseconds)
[info] Run completed in 3 minutes, 8 seconds.
[info] Total number of tests run: 6
[info] Suites: completed 2, aborted 0
[info] Tests: succeeded 6, failed 0, canceled 0, ignored 0, pending 0
[info] All tests passed.
```

Closes #35807 from dongjoon-hyun/SPARK-38511.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../k8s/features/VolcanoFeatureStep.scala     |  5 ----
 .../features/VolcanoFeatureStepSuite.scala    | 20 --------------
 ...high-priority-driver-podgroup-template.yml | 21 +++++++++++++++
 .../low-priority-driver-podgroup-template.yml | 21 +++++++++++++++
 ...dium-priority-driver-podgroup-template.yml | 21 +++++++++++++++
 .../integrationtest/VolcanoTestsSuite.scala   | 27 +++----------------
 6 files changed, 67 insertions(+), 48 deletions(-)
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/volcano/high-priority-driver-podgroup-template.yml
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/volcano/low-priority-driver-podgroup-template.yml
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/volcano/medium-priority-driver-podgroup-template.yml

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
index 9b37789d6c370..091923a78efe5 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
@@ -30,7 +30,6 @@ private[spark] class VolcanoFeatureStep extends KubernetesDriverCustomFeatureCon
 
   private lazy val podGroupName = s"${kubernetesConf.appId}-podgroup"
   private lazy val namespace = kubernetesConf.namespace
-  private var priorityClassName: Option[String] = None
 
   override def init(config: KubernetesDriverConf): Unit = {
     kubernetesConf = config
@@ -52,16 +51,12 @@ private[spark] class VolcanoFeatureStep extends KubernetesDriverCustomFeatureCon
 
     var spec = pg.getSpec
     if (spec == null) spec = new PodGroupSpec
-    priorityClassName.foreach(spec.setPriorityClassName(_))
     pg.setSpec(spec)
 
     Seq(pg)
   }
 
   override def configurePod(pod: SparkPod): SparkPod = {
-
-    priorityClassName = Option(pod.pod.getSpec.getPriorityClassName)
-
     val k8sPodBuilder = new PodBuilder(pod.pod)
       .editMetadata()
         .addToAnnotations(POD_GROUP_ANNOTATION, podGroupName)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
index 1b24e867e21e9..12daffc8961f5 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
@@ -18,7 +18,6 @@ package org.apache.spark.deploy.k8s.features
 
 import java.io.File
 
-import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder}
 import io.fabric8.volcano.scheduling.v1beta1.PodGroup
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
@@ -50,25 +49,6 @@ class VolcanoFeatureStepSuite extends SparkFunSuite {
     assert(annotations.get("scheduling.k8s.io/group-name") === s"${kubernetesConf.appId}-podgroup")
   }
 
-  test("SPARK-38423: Support priorityClassName") {
-    // test null priority
-    val podWithNullPriority = SparkPod.initialPod()
-    assert(podWithNullPriority.pod.getSpec.getPriorityClassName === null)
-    verifyPriority(SparkPod.initialPod())
-    // test normal priority
-    val podWithPriority = SparkPod(
-      new PodBuilder()
-        .withNewMetadata()
-        .endMetadata()
-        .withNewSpec()
-          .withPriorityClassName("priority")
-        .endSpec()
-        .build(),
-      new ContainerBuilder().build())
-    assert(podWithPriority.pod.getSpec.getPriorityClassName === "priority")
-    verifyPriority(podWithPriority)
-  }
-
   test("SPARK-38455: Support driver podgroup template") {
     val templatePath = new File(
       getClass.getResource("/driver-podgroup-template.yml").getFile).getAbsolutePath
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/high-priority-driver-podgroup-template.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/high-priority-driver-podgroup-template.yml
new file mode 100644
index 0000000000000..a64431d69daa5
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/high-priority-driver-podgroup-template.yml
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: scheduling.volcano.sh/v1beta1
+kind: PodGroup
+spec:
+  priorityClassName: high
+  queue: queue
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/low-priority-driver-podgroup-template.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/low-priority-driver-podgroup-template.yml
new file mode 100644
index 0000000000000..5e89630c01705
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/low-priority-driver-podgroup-template.yml
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: scheduling.volcano.sh/v1beta1
+kind: PodGroup
+spec:
+  priorityClassName: low
+  queue: queue
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/medium-priority-driver-podgroup-template.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/medium-priority-driver-podgroup-template.yml
new file mode 100644
index 0000000000000..5773e8b6b14be
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/medium-priority-driver-podgroup-template.yml
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+apiVersion: scheduling.volcano.sh/v1beta1
+kind: PodGroup
+spec:
+  priorityClassName: medium
+  queue: queue
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
index eab7993e252b3..8d5054465b9e5 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
@@ -336,29 +336,6 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
     }
   }
 
-  test("SPARK-38423: Run SparkPi Jobs with priorityClassName", k8sTestTag, volcanoTag) {
-    // Prepare the priority resource
-    createOrReplaceYAMLResource(VOLCANO_PRIORITY_YAML)
-    val priorities = Seq("low", "medium", "high")
-    val groupName = generateGroupName("priority")
-    priorities.foreach { p =>
-      Future {
-        val templatePath = new File(
-          getClass.getResource(s"/volcano/$p-priority-driver-template.yml").getFile
-        ).getAbsolutePath
-        runJobAndVerify(
-          p, groupLoc = Option(groupName),
-          driverTemplate = Option(templatePath)
-        )
-      }
-    }
-    // Make sure all jobs are Succeeded
-    Eventually.eventually(TIMEOUT, INTERVAL) {
-        val pods = getPods(role = "driver", groupName, statusPhase = "Succeeded")
-        assert(pods.size === priorities.size)
-    }
-  }
-
   test("SPARK-38423: Run driver job to validate priority order", k8sTestTag, volcanoTag) {
     // Prepare the priority resource and queue
     createOrReplaceYAMLResource(DISABLE_QUEUE)
@@ -370,11 +347,15 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
         val templatePath = new File(
           getClass.getResource(s"/volcano/$p-priority-driver-template.yml").getFile
         ).getAbsolutePath
+        val pgTemplatePath = new File(
+          getClass.getResource(s"/volcano/$p-priority-driver-podgroup-template.yml").getFile
+        ).getAbsolutePath
         val groupName = generateGroupName(p)
         runJobAndVerify(
           p, groupLoc = Option(groupName),
           queue = Option("queue"),
           driverTemplate = Option(templatePath),
+          driverPodGroupTemplate = Option(pgTemplatePath),
           isDriverJob = true
         )
       }

From 2e3ac4ffa332235f88aa8da523796617bc89db6f Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 11 Mar 2022 10:47:35 +0300
Subject: [PATCH 466/513] [SPARK-38509][SQL] Unregister the `TIMESTAMPADD/DIFF`
 functions and remove `DATE_ADD/DIFF`

### What changes were proposed in this pull request?
1. Unregister the functions `timestampadd()` and `timestampdiff()` in `FunctionRegistry.expressions`.
2. Remove the aliases `date_add` for `timestampadd()` and `date_diff` for `timestampdiff()`.
3. Align tests (regenerate golden files) to the syntax rules
```
primaryExpression
    | name=(TIMESTAMPADD | DATEADD) LEFT_PAREN unit=identifier COMMA unitsAmount=valueExpression COMMA timestamp=valueExpression RIGHT_PAREN             #timestampadd
    | name=(TIMESTAMPDIFF | DATEDIFF) LEFT_PAREN unit=identifier COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN    #timestampdiff
```
where the first parameter `unit` can have one of the identifiers:
   - YEAR
   - QUARTER
   - MONTH
   - WEEK
   - DAY, DAYOFYEAR (valid for timestampadd)
   - HOUR
   - MINUTE
   - SECOND
   - MILLISECOND
   - MICROSECOND

### Why are the changes needed?
1. The `timestampadd()`/`timestampdiff()` functions (and their aliases) with arbitrary string column as the first parameter is not require by any standard.
2. Remove the functions and aliases should reduce maintenance cost.

### Does this PR introduce _any_ user-facing change?
No, the functions and aliases haven't been released yet.

### How was this patch tested?
By running the affected test suites:
```
$ build/sbt "test:testOnly *QueryExecutionErrorsSuite"
$ build/sbt "test:testOnly *DateTimeUtilsSuite"
$ build/sbt "sql/test:testOnly org.apache.spark.sql.expressions.ExpressionInfoSuite"
$ build/sbt "sql/testOnly *ExpressionsSchemaSuite"
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z timestamp.sql"
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z timestamp-ansi.sql"
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z datetime-legacy.sql"
$ build/sbt "test:testOnly *DateExpressionsSuite"
$ build/sbt "test:testOnly *SQLKeywordSuite"
```

Closes #35805 from MaxGekk/unregister-timestampadd-func.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 docs/sql-ref-ansi-compliance.md               |  2 -
 .../spark/sql/catalyst/parser/SqlBaseLexer.g4 |  2 -
 .../sql/catalyst/parser/SqlBaseParser.g4      |  8 +-
 .../catalyst/analysis/FunctionRegistry.scala  |  2 -
 .../expressions/datetimeExpressions.scala     | 68 +++++++--------
 .../sql/catalyst/parser/AstBuilder.scala      | 12 +--
 .../expressions/DateExpressionsSuite.scala    | 36 +++-----
 .../sql-functions/sql-expression-schema.md    | 10 +--
 .../test/resources/sql-tests/inputs/date.sql  | 20 ++---
 .../sql-tests/inputs/timestamp-ntz.sql        |  4 +-
 .../resources/sql-tests/inputs/timestamp.sql  |  8 +-
 .../sql-tests/results/ansi/date.sql.out       | 60 ++++++-------
 .../sql-tests/results/ansi/timestamp.sql.out  | 24 +++---
 .../resources/sql-tests/results/date.sql.out  | 60 ++++++-------
 .../sql-tests/results/datetime-legacy.sql.out | 84 +++++++++----------
 .../sql-tests/results/timestamp-ntz.sql.out   | 12 +--
 .../sql-tests/results/timestamp.sql.out       | 24 +++---
 .../timestampNTZ/timestamp-ansi.sql.out       | 24 +++---
 .../results/timestampNTZ/timestamp.sql.out    | 24 +++---
 .../errors/QueryExecutionErrorsSuite.scala    |  4 +-
 20 files changed, 224 insertions(+), 264 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 5b187192d3c5e..ccb0ab2731829 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -391,9 +391,7 @@ Below is a list of all the keywords in Spark SQL.
 |DATABASE|non-reserved|non-reserved|non-reserved|
 |DATABASES|non-reserved|non-reserved|non-reserved|
 |DATEADD|non-reserved|non-reserved|non-reserved|
-|DATE_ADD|non-reserved|non-reserved|non-reserved|
 |DATEDIFF|non-reserved|non-reserved|non-reserved|
-|DATE_DIFF|non-reserved|non-reserved|non-reserved|
 |DAY|non-reserved|non-reserved|non-reserved|
 |DBPROPERTIES|non-reserved|non-reserved|non-reserved|
 |DEFAULT|non-reserved|non-reserved|non-reserved|
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
index a6e1b6530822b..1387f143a41b9 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
@@ -143,9 +143,7 @@ DATA: 'DATA';
 DATABASE: 'DATABASE';
 DATABASES: 'DATABASES';
 DATEADD: 'DATEADD';
-DATE_ADD: 'DATE_ADD';
 DATEDIFF: 'DATEDIFF';
-DATE_DIFF: 'DATE_DIFF';
 DBPROPERTIES: 'DBPROPERTIES';
 DEFAULT: 'DEFAULT';
 DEFINED: 'DEFINED';
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index b3b834710757c..9dcc5db69fd2d 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -808,8 +808,8 @@ valueExpression
 
 primaryExpression
     : name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER)                                   #currentLike
-    | name=(TIMESTAMPADD | DATEADD | DATE_ADD) LEFT_PAREN unit=identifier COMMA unitsAmount=valueExpression COMMA timestamp=valueExpression RIGHT_PAREN             #timestampadd
-    | name=(TIMESTAMPDIFF | DATEDIFF | DATE_DIFF) LEFT_PAREN unit=identifier COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN    #timestampdiff
+    | name=(TIMESTAMPADD | DATEADD) LEFT_PAREN unit=identifier COMMA unitsAmount=valueExpression COMMA timestamp=valueExpression RIGHT_PAREN             #timestampadd
+    | name=(TIMESTAMPDIFF | DATEDIFF) LEFT_PAREN unit=identifier COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN    #timestampdiff
     | CASE whenClause+ (ELSE elseExpression=expression)? END                                   #searchedCase
     | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END                  #simpleCase
     | name=(CAST | TRY_CAST) LEFT_PAREN expression AS dataType RIGHT_PAREN                     #cast
@@ -1095,9 +1095,7 @@ ansiNonReserved
     | DATABASE
     | DATABASES
     | DATEADD
-    | DATE_ADD
     | DATEDIFF
-    | DATE_DIFF
     | DAY
     | DBPROPERTIES
     | DEFAULT
@@ -1348,9 +1346,7 @@ nonReserved
     | DATABASE
     | DATABASES
     | DATEADD
-    | DATE_ADD
     | DATEDIFF
-    | DATE_DIFF
     | DAY
     | DBPROPERTIES
     | DEFAULT
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index e131bd87626b9..e5954c8f26942 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -631,8 +631,6 @@ object FunctionRegistry {
     expression[UnixMillis]("unix_millis"),
     expression[UnixMicros]("unix_micros"),
     expression[ConvertTimezone]("convert_timezone"),
-    expression[TimestampAdd]("timestampadd"),
-    expression[TimestampDiff]("timestampdiff"),
 
     // collection functions
     expression[CreateArray]("array"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index d8cf474e65e69..013f11ac29786 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -3070,9 +3070,9 @@ case class ConvertTimezone(
   """,
   examples = """
     Examples:
-      > SELECT _FUNC_('HOUR', 8, timestamp_ntz'2022-02-11 20:30:00');
+      > SELECT _FUNC_(HOUR, 8, timestamp_ntz'2022-02-11 20:30:00');
        2022-02-12 04:30:00
-      > SELECT _FUNC_('MONTH', 1, timestamp_ltz'2022-01-31 00:00:00');
+      > SELECT _FUNC_(MONTH, 1, timestamp_ltz'2022-01-31 00:00:00');
        2022-02-28 00:00:00
       > SELECT _FUNC_(SECOND, -10, date'2022-01-01');
        2021-12-31 23:59:50
@@ -3083,23 +3083,22 @@ case class ConvertTimezone(
   since = "3.3.0")
 // scalastyle:on line.size.limit
 case class TimestampAdd(
-    unit: Expression,
+    unit: String,
     quantity: Expression,
     timestamp: Expression,
     timeZoneId: Option[String] = None)
-  extends TernaryExpression
+  extends BinaryExpression
   with ImplicitCastInputTypes
   with NullIntolerant
   with TimeZoneAwareExpression {
 
-  def this(unit: Expression, quantity: Expression, timestamp: Expression) =
+  def this(unit: String, quantity: Expression, timestamp: Expression) =
     this(unit, quantity, timestamp, None)
 
-  override def first: Expression = unit
-  override def second: Expression = quantity
-  override def third: Expression = timestamp
+  override def left: Expression = quantity
+  override def right: Expression = timestamp
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, IntegerType, AnyTimestampType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType, AnyTimestampType)
   override def dataType: DataType = timestamp.dataType
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
@@ -3107,28 +3106,23 @@ case class TimestampAdd(
 
   @transient private lazy val zoneIdInEval: ZoneId = zoneIdForType(timestamp.dataType)
 
-  override def nullSafeEval(u: Any, q: Any, micros: Any): Any = {
-    DateTimeUtils.timestampAdd(
-      u.asInstanceOf[UTF8String].toString,
-      q.asInstanceOf[Int],
-      micros.asInstanceOf[Long],
-      zoneIdInEval)
+  override def nullSafeEval(q: Any, micros: Any): Any = {
+    DateTimeUtils.timestampAdd(unit, q.asInstanceOf[Int], micros.asInstanceOf[Long], zoneIdInEval)
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
     val zid = ctx.addReferenceObj("zoneId", zoneIdInEval, classOf[ZoneId].getName)
-    defineCodeGen(ctx, ev, (u, q, micros) =>
-      s"""$dtu.timestampAdd($u.toString(), $q, $micros, $zid)""")
+    defineCodeGen(ctx, ev, (q, micros) =>
+      s"""$dtu.timestampAdd("$unit", $q, $micros, $zid)""")
   }
 
   override def prettyName: String = "timestampadd"
 
   override protected def withNewChildrenInternal(
-      newFirst: Expression,
-      newSecond: Expression,
-      newThird: Expression): TimestampAdd = {
-    copy(unit = newFirst, quantity = newSecond, timestamp = newThird)
+      newLeft: Expression,
+      newRight: Expression): TimestampAdd = {
+    copy(quantity = newLeft, timestamp = newRight)
   }
 }
 
@@ -3154,9 +3148,9 @@ case class TimestampAdd(
   """,
   examples = """
     Examples:
-      > SELECT _FUNC_('HOUR', timestamp_ntz'2022-02-11 20:30:00', timestamp_ntz'2022-02-12 04:30:00');
+      > SELECT _FUNC_(HOUR, timestamp_ntz'2022-02-11 20:30:00', timestamp_ntz'2022-02-12 04:30:00');
        8
-      > SELECT _FUNC_('MONTH', timestamp_ltz'2022-01-01 00:00:00', timestamp_ltz'2022-02-28 00:00:00');
+      > SELECT _FUNC_(MONTH, timestamp_ltz'2022-01-01 00:00:00', timestamp_ltz'2022-02-28 00:00:00');
        1
       > SELECT _FUNC_(SECOND, date'2022-01-01', timestamp'2021-12-31 23:59:50');
        -10
@@ -3167,23 +3161,22 @@ case class TimestampAdd(
   since = "3.3.0")
 // scalastyle:on line.size.limit
 case class TimestampDiff(
-    unit: Expression,
+    unit: String,
     startTimestamp: Expression,
     endTimestamp: Expression,
     timeZoneId: Option[String] = None)
-  extends TernaryExpression
+  extends BinaryExpression
   with ImplicitCastInputTypes
   with NullIntolerant
   with TimeZoneAwareExpression {
 
-  def this(unit: Expression, quantity: Expression, timestamp: Expression) =
+  def this(unit: String, quantity: Expression, timestamp: Expression) =
     this(unit, quantity, timestamp, None)
 
-  override def first: Expression = unit
-  override def second: Expression = startTimestamp
-  override def third: Expression = endTimestamp
+  override def left: Expression = startTimestamp
+  override def right: Expression = endTimestamp
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, TimestampType, TimestampType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, TimestampType)
   override def dataType: DataType = LongType
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
@@ -3191,9 +3184,9 @@ case class TimestampDiff(
 
   @transient private lazy val zoneIdInEval: ZoneId = zoneIdForType(endTimestamp.dataType)
 
-  override def nullSafeEval(u: Any, startMicros: Any, endMicros: Any): Any = {
+  override def nullSafeEval(startMicros: Any, endMicros: Any): Any = {
     DateTimeUtils.timestampDiff(
-      u.asInstanceOf[UTF8String].toString,
+      unit,
       startMicros.asInstanceOf[Long],
       endMicros.asInstanceOf[Long],
       zoneIdInEval)
@@ -3202,16 +3195,15 @@ case class TimestampDiff(
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
     val zid = ctx.addReferenceObj("zoneId", zoneIdInEval, classOf[ZoneId].getName)
-    defineCodeGen(ctx, ev, (u, s, e) =>
-      s"""$dtu.timestampDiff($u.toString(), $s, $e, $zid)""")
+    defineCodeGen(ctx, ev, (s, e) =>
+      s"""$dtu.timestampDiff("$unit", $s, $e, $zid)""")
   }
 
   override def prettyName: String = "timestampdiff"
 
   override protected def withNewChildrenInternal(
-      newFirst: Expression,
-      newSecond: Expression,
-      newThird: Expression): TimestampDiff = {
-    copy(unit = newFirst, startTimestamp = newSecond, endTimestamp = newThird)
+      newLeft: Expression,
+      newRight: Expression): TimestampDiff = {
+    copy(startTimestamp = newLeft, endTimestamp = newRight)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 4619a3f9be280..ddbe18b472adc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -4557,21 +4557,13 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
    * Create a TimestampAdd expression.
    */
   override def visitTimestampadd(ctx: TimestampaddContext): Expression = withOrigin(ctx) {
-    val arguments = Seq(
-      Literal(ctx.unit.getText),
-      expression(ctx.unitsAmount),
-      expression(ctx.timestamp))
-    UnresolvedFunction("timestampadd", arguments, isDistinct = false)
+    TimestampAdd(ctx.unit.getText, expression(ctx.unitsAmount), expression(ctx.timestamp))
   }
 
   /**
    * Create a TimestampDiff expression.
    */
   override def visitTimestampdiff(ctx: TimestampdiffContext): Expression = withOrigin(ctx) {
-    val arguments = Seq(
-      Literal(ctx.unit.getText),
-      expression(ctx.startTimestamp),
-      expression(ctx.endTimestamp))
-    UnresolvedFunction("timestampdiff", arguments, isDistinct = false)
+    TimestampDiff(ctx.unit.getText, expression(ctx.startTimestamp), expression(ctx.endTimestamp))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index ed4e9348f1889..c5d559c4501af 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -1891,31 +1891,25 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("SPARK-38195: add a quantity of interval units to a timestamp") {
     // Check case-insensitivity
     checkEvaluation(
-      TimestampAdd(Literal("Hour"), Literal(1), Literal(LocalDateTime.of(2022, 2, 15, 12, 57, 0))),
+      TimestampAdd("Hour", Literal(1), Literal(LocalDateTime.of(2022, 2, 15, 12, 57, 0))),
       LocalDateTime.of(2022, 2, 15, 13, 57, 0))
     // Check nulls as input values
     checkEvaluation(
       TimestampAdd(
-        Literal.create(null, StringType),
-        Literal(1),
-        Literal(LocalDateTime.of(2022, 2, 15, 12, 57, 0))),
-      null)
-    checkEvaluation(
-      TimestampAdd(
-        Literal("MINUTE"),
+        "MINUTE",
         Literal.create(null, IntegerType),
         Literal(LocalDateTime.of(2022, 2, 15, 12, 57, 0))),
       null)
     checkEvaluation(
       TimestampAdd(
-        Literal("MINUTE"),
+        "MINUTE",
         Literal(1),
         Literal.create(null, TimestampType)),
       null)
     // Check crossing the daylight saving time
     checkEvaluation(
       TimestampAdd(
-        Literal("HOUR"),
+        "HOUR",
         Literal(6),
         Literal(Instant.parse("2022-03-12T23:30:00Z")),
         Some("America/Los_Angeles")),
@@ -1923,7 +1917,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     // Check the leap year
     checkEvaluation(
       TimestampAdd(
-        Literal("DAY"),
+        "DAY",
         Literal(2),
         Literal(LocalDateTime.of(2020, 2, 28, 10, 11, 12)),
         Some("America/Los_Angeles")),
@@ -1940,7 +1934,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
           checkConsistencyBetweenInterpretedAndCodegenAllowingException(
             (quantity: Expression, timestamp: Expression) =>
               TimestampAdd(
-                Literal(unit),
+                unit,
                 quantity,
                 timestamp,
                 Some(tz)),
@@ -1954,33 +1948,27 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     // Check case-insensitivity
     checkEvaluation(
       TimestampDiff(
-        Literal("Hour"),
+        "Hour",
         Literal(Instant.parse("2022-02-15T12:57:00Z")),
         Literal(Instant.parse("2022-02-15T13:57:00Z"))),
       1L)
     // Check nulls as input values
     checkEvaluation(
       TimestampDiff(
-        Literal.create(null, StringType),
-        Literal(Instant.parse("2021-02-15T12:57:00Z")),
-        Literal(Instant.parse("2022-02-15T12:57:00Z"))),
-      null)
-    checkEvaluation(
-      TimestampDiff(
-        Literal("MINUTE"),
+        "MINUTE",
         Literal.create(null, TimestampType),
         Literal(Instant.parse("2022-02-15T12:57:00Z"))),
       null)
     checkEvaluation(
       TimestampDiff(
-        Literal("MINUTE"),
+        "MINUTE",
         Literal(Instant.parse("2021-02-15T12:57:00Z")),
         Literal.create(null, TimestampType)),
       null)
     // Check crossing the daylight saving time
     checkEvaluation(
       TimestampDiff(
-        Literal("HOUR"),
+        "HOUR",
         Literal(Instant.parse("2022-03-12T23:30:00Z")),
         Literal(Instant.parse("2022-03-13T05:30:00Z")),
         Some("America/Los_Angeles")),
@@ -1988,7 +1976,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     // Check the leap year
     checkEvaluation(
       TimestampDiff(
-        Literal("DAY"),
+        "DAY",
         Literal(Instant.parse("2020-02-28T10:11:12Z")),
         Literal(Instant.parse("2020-03-01T10:21:12Z")),
         Some("America/Los_Angeles")),
@@ -2004,7 +1992,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
         checkConsistencyBetweenInterpretedAndCodegenAllowingException(
           (startTs: Expression, endTs: Expression) =>
             TimestampDiff(
-              Literal(unit),
+              unit,
               startTs,
               endTs,
               Some(tz)),
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 052e88e798440..7125c34fbbd73 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,6 +1,6 @@
 <!-- Automatically generated by ExpressionsSchemaSuite -->
 ## Summary
-  - Number of queries: 384
+  - Number of queries: 382
   - Number of expressions that missing example: 12
   - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint
 ## Schema of Built-in Functions
@@ -76,7 +76,7 @@
 | org.apache.spark.sql.catalyst.expressions.Coalesce | coalesce | SELECT coalesce(NULL, 1, NULL) | struct<coalesce(NULL, 1, NULL):int> |
 | org.apache.spark.sql.catalyst.expressions.Concat | concat | SELECT concat('Spark', 'SQL') | struct<concat(Spark, SQL):string> |
 | org.apache.spark.sql.catalyst.expressions.ConcatWs | concat_ws | SELECT concat_ws(' ', 'Spark', 'SQL') | struct<concat_ws( , Spark, SQL):string> |
-| org.apache.spark.sql.catalyst.expressions.Contains | contains | SELECT contains('Spark SQL', 'Spark') | struct<contains(Spark SQL, Spark):boolean> |
+| org.apache.spark.sql.catalyst.expressions.ContainsExpressionBuilder$ | contains | SELECT contains('Spark SQL', 'Spark') | struct<contains(Spark SQL, Spark):boolean> |
 | org.apache.spark.sql.catalyst.expressions.Conv | conv | SELECT conv('100', 2, 10) | struct<conv(100, 2, 10):string> |
 | org.apache.spark.sql.catalyst.expressions.ConvertTimezone | convert_timezone | SELECT convert_timezone('Europe/Amsterdam', 'America/Los_Angeles', timestamp_ntz'2021-12-06 00:00:00') | struct<convert_timezone(Europe/Amsterdam, America/Los_Angeles, TIMESTAMP_NTZ '2021-12-06 00:00:00'):timestamp_ntz> |
 | org.apache.spark.sql.catalyst.expressions.Cos | cos | SELECT cos(0) | struct<COS(0):double> |
@@ -112,7 +112,7 @@
 | org.apache.spark.sql.catalyst.expressions.ElementAt | element_at | SELECT element_at(array(1, 2, 3), 2) | struct<element_at(array(1, 2, 3), 2):int> |
 | org.apache.spark.sql.catalyst.expressions.Elt | elt | SELECT elt(1, 'scala', 'java') | struct<elt(1, scala, java):string> |
 | org.apache.spark.sql.catalyst.expressions.Encode | encode | SELECT encode('abc', 'utf-8') | struct<encode(abc, utf-8):binary> |
-| org.apache.spark.sql.catalyst.expressions.EndsWith | endswith | SELECT endswith('Spark SQL', 'SQL') | struct<endswith(Spark SQL, SQL):boolean> |
+| org.apache.spark.sql.catalyst.expressions.EndsWithExpressionBuilder$ | endswith | SELECT endswith('Spark SQL', 'SQL') | struct<endswith(Spark SQL, SQL):boolean> |
 | org.apache.spark.sql.catalyst.expressions.EqualNullSafe | <=> | SELECT 2 <=> 2 | struct<(2 <=> 2):boolean> |
 | org.apache.spark.sql.catalyst.expressions.EqualTo | = | SELECT 2 = 2 | struct<(2 = 2):boolean> |
 | org.apache.spark.sql.catalyst.expressions.EqualTo | == | SELECT 2 == 2 | struct<(2 = 2):boolean> |
@@ -277,7 +277,7 @@
 | org.apache.spark.sql.catalyst.expressions.SparkVersion | version | SELECT version() | struct<version():string> |
 | org.apache.spark.sql.catalyst.expressions.Sqrt | sqrt | SELECT sqrt(4) | struct<SQRT(4):double> |
 | org.apache.spark.sql.catalyst.expressions.Stack | stack | SELECT stack(2, 1, 2, 3) | struct<col0:int,col1:int> |
-| org.apache.spark.sql.catalyst.expressions.StartsWith | startswith | SELECT startswith('Spark SQL', 'Spark') | struct<startswith(Spark SQL, Spark):boolean> |
+| org.apache.spark.sql.catalyst.expressions.StartsWithExpressionBuilder$ | startswith | SELECT startswith('Spark SQL', 'Spark') | struct<startswith(Spark SQL, Spark):boolean> |
 | org.apache.spark.sql.catalyst.expressions.StringInstr | instr | SELECT instr('SparkSQL', 'SQL') | struct<instr(SparkSQL, SQL):int> |
 | org.apache.spark.sql.catalyst.expressions.StringLocate | locate | SELECT locate('bar', 'foobarbar') | struct<locate(bar, foobarbar, 1):int> |
 | org.apache.spark.sql.catalyst.expressions.StringLocate | position | SELECT position('bar', 'foobarbar') | struct<position(bar, foobarbar, 1):int> |
@@ -300,8 +300,6 @@
 | org.apache.spark.sql.catalyst.expressions.Tan | tan | SELECT tan(0) | struct<TAN(0):double> |
 | org.apache.spark.sql.catalyst.expressions.Tanh | tanh | SELECT tanh(0) | struct<TANH(0):double> |
 | org.apache.spark.sql.catalyst.expressions.TimeWindow | window | SELECT a, window.start, window.end, count(*) as cnt FROM VALUES ('A1', '2021-01-01 00:00:00'), ('A1', '2021-01-01 00:04:30'), ('A1', '2021-01-01 00:06:00'), ('A2', '2021-01-01 00:01:00') AS tab(a, b) GROUP by a, window(b, '5 minutes') ORDER BY a, start | struct<a:string,start:timestamp,end:timestamp,cnt:bigint> |
-| org.apache.spark.sql.catalyst.expressions.TimestampAdd | timestampadd | SELECT timestampadd('HOUR', 8, timestamp_ntz'2022-02-11 20:30:00') | struct<timestampadd(HOUR, 8, TIMESTAMP_NTZ '2022-02-11 20:30:00'):timestamp_ntz> |
-| org.apache.spark.sql.catalyst.expressions.TimestampDiff | timestampdiff | SELECT timestampdiff('HOUR', timestamp_ntz'2022-02-11 20:30:00', timestamp_ntz'2022-02-12 04:30:00') | struct<timestampdiff(HOUR, TIMESTAMP_NTZ '2022-02-11 20:30:00', TIMESTAMP_NTZ '2022-02-12 04:30:00'):bigint> |
 | org.apache.spark.sql.catalyst.expressions.ToBinary | to_binary | SELECT to_binary('abc', 'utf-8') | struct<to_binary(abc, utf-8):binary> |
 | org.apache.spark.sql.catalyst.expressions.ToDegrees | degrees | SELECT degrees(3.141592653589793) | struct<DEGREES(3.141592653589793):double> |
 | org.apache.spark.sql.catalyst.expressions.ToNumber | to_number | SELECT to_number('454', '999') | struct<to_number(454, 999):decimal(3,0)> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/date.sql b/sql/core/src/test/resources/sql-tests/inputs/date.sql
index 4c8d5a7b85a33..ab57c7c754c67 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/date.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/date.sql
@@ -143,24 +143,24 @@ select from_csv('26/October/2015', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'))
 
 -- Add a number of units to a timestamp or a date
 select dateadd(MICROSECOND, 1001, timestamp'2022-02-25 01:02:03.123');
-select date_add(MILLISECOND, -1, timestamp'2022-02-25 01:02:03.456');
+select dateadd(MILLISECOND, -1, timestamp'2022-02-25 01:02:03.456');
 select dateadd(SECOND, 58, timestamp'2022-02-25 01:02:03');
-select date_add(MINUTE, -100, date'2022-02-25');
+select dateadd(MINUTE, -100, date'2022-02-25');
 select dateadd(HOUR, -1, timestamp'2022-02-25 01:02:03');
-select date_add(DAY, 367, date'2022-02-25');
+select dateadd(DAY, 367, date'2022-02-25');
 select dateadd(WEEK, -4, timestamp'2022-02-25 01:02:03');
-select date_add(MONTH, -1, timestamp'2022-02-25 01:02:03');
+select dateadd(MONTH, -1, timestamp'2022-02-25 01:02:03');
 select dateadd(QUARTER, 5, date'2022-02-25');
-select date_add(YEAR, 1, date'2022-02-25');
+select dateadd(YEAR, 1, date'2022-02-25');
 
 -- Get the difference between timestamps or dates in the specified units
-select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001');
+select datediff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001');
 select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455');
-select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01');
+select datediff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01');
 select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00');
-select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03');
+select datediff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03');
 select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00');
-select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03');
+select datediff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03');
 select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03');
-select date_diff(QUARTER, date'2022-02-25', date'2023-05-25');
+select datediff(QUARTER, date'2022-02-25', date'2023-05-25');
 select datediff(YEAR, date'2022-02-25', date'2023-02-25');
diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql
index bbe5fb7bee6e6..b7dc2872e50d3 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql
@@ -19,7 +19,7 @@ SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 60.007);
 SELECT convert_timezone('Europe/Moscow', 'America/Los_Angeles', timestamp_ntz'2022-01-01 00:00:00');
 
 -- Get the difference between timestamps w/o time zone in the specified units
-select timestampdiff('QUARTER', timestamp_ntz'2022-01-01 01:02:03', timestamp_ntz'2022-05-02 05:06:07');
+select timestampdiff(QUARTER, timestamp_ntz'2022-01-01 01:02:03', timestamp_ntz'2022-05-02 05:06:07');
 select timestampdiff(HOUR, timestamp_ntz'2022-02-14 01:02:03', timestamp_ltz'2022-02-14 02:03:04');
 select timestampdiff(YEAR, date'2022-02-15', timestamp_ntz'2023-02-15 10:11:12');
-select timestampdiff('MILLISECOND', timestamp_ntz'2022-02-14 23:59:59.123', date'2022-02-15');
+select timestampdiff(MILLISECOND, timestamp_ntz'2022-02-14 23:59:59.123', date'2022-02-15');
diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql
index 9e1652a6cfa06..21d27e98ab440 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql
@@ -144,13 +144,13 @@ select from_json('{"t":"26/October/2015"}', 't Timestamp', map('timestampFormat'
 select from_csv('26/October/2015', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'));
 
 -- Add a number of units to a timestamp or a date
-select timestampadd('MONTH', -1, timestamp'2022-02-14 01:02:03');
+select timestampadd(MONTH, -1, timestamp'2022-02-14 01:02:03');
 select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03');
 select timestampadd(YEAR, 1, date'2022-02-15');
-select timestampadd('SECOND', -1, date'2022-02-15');
+select timestampadd(SECOND, -1, date'2022-02-15');
 
 -- Get the difference between timestamps in the specified units
-select timestampdiff('MONTH', timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03');
+select timestampdiff(MONTH, timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03');
 select timestampdiff(MINUTE, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 02:00:03');
 select timestampdiff(YEAR, date'2022-02-15', date'2023-02-15');
-select timestampdiff('SECOND', date'2022-02-15', timestamp'2022-02-14 23:59:59');
+select timestampdiff(SECOND, date'2022-02-15', timestamp'2022-02-14 23:59:59');
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
index 36cf228c6284b..437b56e2ffa3e 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
@@ -665,15 +665,15 @@ You may get a different result due to the upgrading to Spark >= 3.0: Fail to rec
 -- !query
 select dateadd(MICROSECOND, 1001, timestamp'2022-02-25 01:02:03.123')
 -- !query schema
-struct<timestampadd(MICROSECOND, 1001, TIMESTAMP '2022-02-25 01:02:03.123'):timestamp>
+struct<timestampadd(1001, TIMESTAMP '2022-02-25 01:02:03.123'):timestamp>
 -- !query output
 2022-02-25 01:02:03.124001
 
 
 -- !query
-select date_add(MILLISECOND, -1, timestamp'2022-02-25 01:02:03.456')
+select dateadd(MILLISECOND, -1, timestamp'2022-02-25 01:02:03.456')
 -- !query schema
-struct<timestampadd(MILLISECOND, -1, TIMESTAMP '2022-02-25 01:02:03.456'):timestamp>
+struct<timestampadd(-1, TIMESTAMP '2022-02-25 01:02:03.456'):timestamp>
 -- !query output
 2022-02-25 01:02:03.455
 
@@ -681,15 +681,15 @@ struct<timestampadd(MILLISECOND, -1, TIMESTAMP '2022-02-25 01:02:03.456'):timest
 -- !query
 select dateadd(SECOND, 58, timestamp'2022-02-25 01:02:03')
 -- !query schema
-struct<timestampadd(SECOND, 58, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+struct<timestampadd(58, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
 -- !query output
 2022-02-25 01:03:01
 
 
 -- !query
-select date_add(MINUTE, -100, date'2022-02-25')
+select dateadd(MINUTE, -100, date'2022-02-25')
 -- !query schema
-struct<timestampadd(MINUTE, -100, DATE '2022-02-25'):timestamp>
+struct<timestampadd(-100, DATE '2022-02-25'):timestamp>
 -- !query output
 2022-02-24 22:20:00
 
@@ -697,15 +697,15 @@ struct<timestampadd(MINUTE, -100, DATE '2022-02-25'):timestamp>
 -- !query
 select dateadd(HOUR, -1, timestamp'2022-02-25 01:02:03')
 -- !query schema
-struct<timestampadd(HOUR, -1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+struct<timestampadd(-1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
 -- !query output
 2022-02-25 00:02:03
 
 
 -- !query
-select date_add(DAY, 367, date'2022-02-25')
+select dateadd(DAY, 367, date'2022-02-25')
 -- !query schema
-struct<timestampadd(DAY, 367, DATE '2022-02-25'):timestamp>
+struct<timestampadd(367, DATE '2022-02-25'):timestamp>
 -- !query output
 2023-02-27 00:00:00
 
@@ -713,15 +713,15 @@ struct<timestampadd(DAY, 367, DATE '2022-02-25'):timestamp>
 -- !query
 select dateadd(WEEK, -4, timestamp'2022-02-25 01:02:03')
 -- !query schema
-struct<timestampadd(WEEK, -4, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+struct<timestampadd(-4, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
 -- !query output
 2022-01-28 01:02:03
 
 
 -- !query
-select date_add(MONTH, -1, timestamp'2022-02-25 01:02:03')
+select dateadd(MONTH, -1, timestamp'2022-02-25 01:02:03')
 -- !query schema
-struct<timestampadd(MONTH, -1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+struct<timestampadd(-1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
 -- !query output
 2022-01-25 01:02:03
 
@@ -729,23 +729,23 @@ struct<timestampadd(MONTH, -1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
 -- !query
 select dateadd(QUARTER, 5, date'2022-02-25')
 -- !query schema
-struct<timestampadd(QUARTER, 5, DATE '2022-02-25'):timestamp>
+struct<timestampadd(5, DATE '2022-02-25'):timestamp>
 -- !query output
 2023-05-25 00:00:00
 
 
 -- !query
-select date_add(YEAR, 1, date'2022-02-25')
+select dateadd(YEAR, 1, date'2022-02-25')
 -- !query schema
-struct<timestampadd(YEAR, 1, DATE '2022-02-25'):timestamp>
+struct<timestampadd(1, DATE '2022-02-25'):timestamp>
 -- !query output
 2023-02-25 00:00:00
 
 
 -- !query
-select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001')
+select datediff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001')
 -- !query schema
-struct<timestampdiff(MICROSECOND, TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP '2022-02-25 01:02:03.124001'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP '2022-02-25 01:02:03.124001'):bigint>
 -- !query output
 1001
 
@@ -753,15 +753,15 @@ struct<timestampdiff(MICROSECOND, TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP
 -- !query
 select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455')
 -- !query schema
-struct<timestampdiff(MILLISECOND, TIMESTAMP '2022-02-25 01:02:03.456', TIMESTAMP '2022-02-25 01:02:03.455'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-25 01:02:03.456', TIMESTAMP '2022-02-25 01:02:03.455'):bigint>
 -- !query output
 -1
 
 
 -- !query
-select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01')
+select datediff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01')
 -- !query schema
-struct<timestampdiff(SECOND, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 01:03:01'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 01:03:01'):bigint>
 -- !query output
 58
 
@@ -769,15 +769,15 @@ struct<timestampdiff(SECOND, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02
 -- !query
 select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00')
 -- !query schema
-struct<timestampdiff(MINUTE, DATE '2022-02-25', TIMESTAMP '2022-02-24 22:20:00'):bigint>
+struct<timestampdiff(DATE '2022-02-25', TIMESTAMP '2022-02-24 22:20:00'):bigint>
 -- !query output
 -100
 
 
 -- !query
-select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03')
+select datediff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03')
 -- !query schema
-struct<timestampdiff(HOUR, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 00:02:03'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 00:02:03'):bigint>
 -- !query output
 -1
 
@@ -785,15 +785,15 @@ struct<timestampdiff(HOUR, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-2
 -- !query
 select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00')
 -- !query schema
-struct<timestampdiff(DAY, DATE '2022-02-25', TIMESTAMP '2023-02-27 00:00:00'):bigint>
+struct<timestampdiff(DATE '2022-02-25', TIMESTAMP '2023-02-27 00:00:00'):bigint>
 -- !query output
 367
 
 
 -- !query
-select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03')
+select datediff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03')
 -- !query schema
-struct<timestampdiff(WEEK, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-28 01:02:03'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-28 01:02:03'):bigint>
 -- !query output
 -4
 
@@ -801,15 +801,15 @@ struct<timestampdiff(WEEK, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-2
 -- !query
 select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03')
 -- !query schema
-struct<timestampdiff(MONTH, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-25 01:02:03'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-25 01:02:03'):bigint>
 -- !query output
 -1
 
 
 -- !query
-select date_diff(QUARTER, date'2022-02-25', date'2023-05-25')
+select datediff(QUARTER, date'2022-02-25', date'2023-05-25')
 -- !query schema
-struct<timestampdiff(QUARTER, DATE '2022-02-25', DATE '2023-05-25'):bigint>
+struct<timestampdiff(DATE '2022-02-25', DATE '2023-05-25'):bigint>
 -- !query output
 5
 
@@ -817,6 +817,6 @@ struct<timestampdiff(QUARTER, DATE '2022-02-25', DATE '2023-05-25'):bigint>
 -- !query
 select datediff(YEAR, date'2022-02-25', date'2023-02-25')
 -- !query schema
-struct<timestampdiff(YEAR, DATE '2022-02-25', DATE '2023-02-25'):bigint>
+struct<timestampdiff(DATE '2022-02-25', DATE '2023-02-25'):bigint>
 -- !query output
 1
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
index dc25ed9b0d140..2946842e3f6e4 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
@@ -774,9 +774,9 @@ You may get a different result due to the upgrading to Spark >= 3.0: Fail to rec
 
 
 -- !query
-select timestampadd('MONTH', -1, timestamp'2022-02-14 01:02:03')
+select timestampadd(MONTH, -1, timestamp'2022-02-14 01:02:03')
 -- !query schema
-struct<timestampadd(MONTH, -1, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
+struct<timestampadd(-1, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
 -- !query output
 2022-01-14 01:02:03
 
@@ -784,7 +784,7 @@ struct<timestampadd(MONTH, -1, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
 -- !query
 select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03')
 -- !query schema
-struct<timestampadd(MINUTE, 58, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
+struct<timestampadd(58, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
 -- !query output
 2022-02-14 02:00:03
 
@@ -792,23 +792,23 @@ struct<timestampadd(MINUTE, 58, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
 -- !query
 select timestampadd(YEAR, 1, date'2022-02-15')
 -- !query schema
-struct<timestampadd(YEAR, 1, DATE '2022-02-15'):timestamp>
+struct<timestampadd(1, DATE '2022-02-15'):timestamp>
 -- !query output
 2023-02-15 00:00:00
 
 
 -- !query
-select timestampadd('SECOND', -1, date'2022-02-15')
+select timestampadd(SECOND, -1, date'2022-02-15')
 -- !query schema
-struct<timestampadd(SECOND, -1, DATE '2022-02-15'):timestamp>
+struct<timestampadd(-1, DATE '2022-02-15'):timestamp>
 -- !query output
 2022-02-14 23:59:59
 
 
 -- !query
-select timestampdiff('MONTH', timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03')
+select timestampdiff(MONTH, timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03')
 -- !query schema
-struct<timestampdiff(MONTH, TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-01-14 01:02:03'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-01-14 01:02:03'):bigint>
 -- !query output
 -1
 
@@ -816,7 +816,7 @@ struct<timestampdiff(MONTH, TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-01-
 -- !query
 select timestampdiff(MINUTE, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 02:00:03')
 -- !query schema
-struct<timestampdiff(MINUTE, TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-02-14 02:00:03'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-02-14 02:00:03'):bigint>
 -- !query output
 58
 
@@ -824,14 +824,14 @@ struct<timestampdiff(MINUTE, TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-02
 -- !query
 select timestampdiff(YEAR, date'2022-02-15', date'2023-02-15')
 -- !query schema
-struct<timestampdiff(YEAR, DATE '2022-02-15', DATE '2023-02-15'):bigint>
+struct<timestampdiff(DATE '2022-02-15', DATE '2023-02-15'):bigint>
 -- !query output
 1
 
 
 -- !query
-select timestampdiff('SECOND', date'2022-02-15', timestamp'2022-02-14 23:59:59')
+select timestampdiff(SECOND, date'2022-02-15', timestamp'2022-02-14 23:59:59')
 -- !query schema
-struct<timestampdiff(SECOND, DATE '2022-02-15', TIMESTAMP '2022-02-14 23:59:59'):bigint>
+struct<timestampdiff(DATE '2022-02-15', TIMESTAMP '2022-02-14 23:59:59'):bigint>
 -- !query output
 -1
diff --git a/sql/core/src/test/resources/sql-tests/results/date.sql.out b/sql/core/src/test/resources/sql-tests/results/date.sql.out
index ad6421a53df21..91c89ef5a93d7 100644
--- a/sql/core/src/test/resources/sql-tests/results/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/date.sql.out
@@ -664,15 +664,15 @@ You may get a different result due to the upgrading to Spark >= 3.0: Fail to rec
 -- !query
 select dateadd(MICROSECOND, 1001, timestamp'2022-02-25 01:02:03.123')
 -- !query schema
-struct<timestampadd(MICROSECOND, 1001, TIMESTAMP '2022-02-25 01:02:03.123'):timestamp>
+struct<timestampadd(1001, TIMESTAMP '2022-02-25 01:02:03.123'):timestamp>
 -- !query output
 2022-02-25 01:02:03.124001
 
 
 -- !query
-select date_add(MILLISECOND, -1, timestamp'2022-02-25 01:02:03.456')
+select dateadd(MILLISECOND, -1, timestamp'2022-02-25 01:02:03.456')
 -- !query schema
-struct<timestampadd(MILLISECOND, -1, TIMESTAMP '2022-02-25 01:02:03.456'):timestamp>
+struct<timestampadd(-1, TIMESTAMP '2022-02-25 01:02:03.456'):timestamp>
 -- !query output
 2022-02-25 01:02:03.455
 
@@ -680,15 +680,15 @@ struct<timestampadd(MILLISECOND, -1, TIMESTAMP '2022-02-25 01:02:03.456'):timest
 -- !query
 select dateadd(SECOND, 58, timestamp'2022-02-25 01:02:03')
 -- !query schema
-struct<timestampadd(SECOND, 58, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+struct<timestampadd(58, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
 -- !query output
 2022-02-25 01:03:01
 
 
 -- !query
-select date_add(MINUTE, -100, date'2022-02-25')
+select dateadd(MINUTE, -100, date'2022-02-25')
 -- !query schema
-struct<timestampadd(MINUTE, -100, DATE '2022-02-25'):timestamp>
+struct<timestampadd(-100, DATE '2022-02-25'):timestamp>
 -- !query output
 2022-02-24 22:20:00
 
@@ -696,15 +696,15 @@ struct<timestampadd(MINUTE, -100, DATE '2022-02-25'):timestamp>
 -- !query
 select dateadd(HOUR, -1, timestamp'2022-02-25 01:02:03')
 -- !query schema
-struct<timestampadd(HOUR, -1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+struct<timestampadd(-1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
 -- !query output
 2022-02-25 00:02:03
 
 
 -- !query
-select date_add(DAY, 367, date'2022-02-25')
+select dateadd(DAY, 367, date'2022-02-25')
 -- !query schema
-struct<timestampadd(DAY, 367, DATE '2022-02-25'):timestamp>
+struct<timestampadd(367, DATE '2022-02-25'):timestamp>
 -- !query output
 2023-02-27 00:00:00
 
@@ -712,15 +712,15 @@ struct<timestampadd(DAY, 367, DATE '2022-02-25'):timestamp>
 -- !query
 select dateadd(WEEK, -4, timestamp'2022-02-25 01:02:03')
 -- !query schema
-struct<timestampadd(WEEK, -4, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+struct<timestampadd(-4, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
 -- !query output
 2022-01-28 01:02:03
 
 
 -- !query
-select date_add(MONTH, -1, timestamp'2022-02-25 01:02:03')
+select dateadd(MONTH, -1, timestamp'2022-02-25 01:02:03')
 -- !query schema
-struct<timestampadd(MONTH, -1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+struct<timestampadd(-1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
 -- !query output
 2022-01-25 01:02:03
 
@@ -728,23 +728,23 @@ struct<timestampadd(MONTH, -1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
 -- !query
 select dateadd(QUARTER, 5, date'2022-02-25')
 -- !query schema
-struct<timestampadd(QUARTER, 5, DATE '2022-02-25'):timestamp>
+struct<timestampadd(5, DATE '2022-02-25'):timestamp>
 -- !query output
 2023-05-25 00:00:00
 
 
 -- !query
-select date_add(YEAR, 1, date'2022-02-25')
+select dateadd(YEAR, 1, date'2022-02-25')
 -- !query schema
-struct<timestampadd(YEAR, 1, DATE '2022-02-25'):timestamp>
+struct<timestampadd(1, DATE '2022-02-25'):timestamp>
 -- !query output
 2023-02-25 00:00:00
 
 
 -- !query
-select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001')
+select datediff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001')
 -- !query schema
-struct<timestampdiff(MICROSECOND, TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP '2022-02-25 01:02:03.124001'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP '2022-02-25 01:02:03.124001'):bigint>
 -- !query output
 1001
 
@@ -752,15 +752,15 @@ struct<timestampdiff(MICROSECOND, TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP
 -- !query
 select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455')
 -- !query schema
-struct<timestampdiff(MILLISECOND, TIMESTAMP '2022-02-25 01:02:03.456', TIMESTAMP '2022-02-25 01:02:03.455'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-25 01:02:03.456', TIMESTAMP '2022-02-25 01:02:03.455'):bigint>
 -- !query output
 -1
 
 
 -- !query
-select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01')
+select datediff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01')
 -- !query schema
-struct<timestampdiff(SECOND, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 01:03:01'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 01:03:01'):bigint>
 -- !query output
 58
 
@@ -768,15 +768,15 @@ struct<timestampdiff(SECOND, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02
 -- !query
 select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00')
 -- !query schema
-struct<timestampdiff(MINUTE, DATE '2022-02-25', TIMESTAMP '2022-02-24 22:20:00'):bigint>
+struct<timestampdiff(DATE '2022-02-25', TIMESTAMP '2022-02-24 22:20:00'):bigint>
 -- !query output
 -100
 
 
 -- !query
-select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03')
+select datediff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03')
 -- !query schema
-struct<timestampdiff(HOUR, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 00:02:03'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 00:02:03'):bigint>
 -- !query output
 -1
 
@@ -784,15 +784,15 @@ struct<timestampdiff(HOUR, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-2
 -- !query
 select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00')
 -- !query schema
-struct<timestampdiff(DAY, DATE '2022-02-25', TIMESTAMP '2023-02-27 00:00:00'):bigint>
+struct<timestampdiff(DATE '2022-02-25', TIMESTAMP '2023-02-27 00:00:00'):bigint>
 -- !query output
 367
 
 
 -- !query
-select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03')
+select datediff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03')
 -- !query schema
-struct<timestampdiff(WEEK, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-28 01:02:03'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-28 01:02:03'):bigint>
 -- !query output
 -4
 
@@ -800,15 +800,15 @@ struct<timestampdiff(WEEK, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-2
 -- !query
 select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03')
 -- !query schema
-struct<timestampdiff(MONTH, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-25 01:02:03'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-25 01:02:03'):bigint>
 -- !query output
 -1
 
 
 -- !query
-select date_diff(QUARTER, date'2022-02-25', date'2023-05-25')
+select datediff(QUARTER, date'2022-02-25', date'2023-05-25')
 -- !query schema
-struct<timestampdiff(QUARTER, DATE '2022-02-25', DATE '2023-05-25'):bigint>
+struct<timestampdiff(DATE '2022-02-25', DATE '2023-05-25'):bigint>
 -- !query output
 5
 
@@ -816,6 +816,6 @@ struct<timestampdiff(QUARTER, DATE '2022-02-25', DATE '2023-05-25'):bigint>
 -- !query
 select datediff(YEAR, date'2022-02-25', date'2023-02-25')
 -- !query schema
-struct<timestampdiff(YEAR, DATE '2022-02-25', DATE '2023-02-25'):bigint>
+struct<timestampdiff(DATE '2022-02-25', DATE '2023-02-25'):bigint>
 -- !query output
 1
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index 8eeed14473fa5..ebfdf60effdae 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -661,15 +661,15 @@ struct<from_csv(26/October/2015):struct<d:date>>
 -- !query
 select dateadd(MICROSECOND, 1001, timestamp'2022-02-25 01:02:03.123')
 -- !query schema
-struct<timestampadd(MICROSECOND, 1001, TIMESTAMP '2022-02-25 01:02:03.123'):timestamp>
+struct<timestampadd(1001, TIMESTAMP '2022-02-25 01:02:03.123'):timestamp>
 -- !query output
 2022-02-25 01:02:03.124001
 
 
 -- !query
-select date_add(MILLISECOND, -1, timestamp'2022-02-25 01:02:03.456')
+select dateadd(MILLISECOND, -1, timestamp'2022-02-25 01:02:03.456')
 -- !query schema
-struct<timestampadd(MILLISECOND, -1, TIMESTAMP '2022-02-25 01:02:03.456'):timestamp>
+struct<timestampadd(-1, TIMESTAMP '2022-02-25 01:02:03.456'):timestamp>
 -- !query output
 2022-02-25 01:02:03.455
 
@@ -677,15 +677,15 @@ struct<timestampadd(MILLISECOND, -1, TIMESTAMP '2022-02-25 01:02:03.456'):timest
 -- !query
 select dateadd(SECOND, 58, timestamp'2022-02-25 01:02:03')
 -- !query schema
-struct<timestampadd(SECOND, 58, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+struct<timestampadd(58, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
 -- !query output
 2022-02-25 01:03:01
 
 
 -- !query
-select date_add(MINUTE, -100, date'2022-02-25')
+select dateadd(MINUTE, -100, date'2022-02-25')
 -- !query schema
-struct<timestampadd(MINUTE, -100, DATE '2022-02-25'):timestamp>
+struct<timestampadd(-100, DATE '2022-02-25'):timestamp>
 -- !query output
 2022-02-24 22:20:00
 
@@ -693,15 +693,15 @@ struct<timestampadd(MINUTE, -100, DATE '2022-02-25'):timestamp>
 -- !query
 select dateadd(HOUR, -1, timestamp'2022-02-25 01:02:03')
 -- !query schema
-struct<timestampadd(HOUR, -1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+struct<timestampadd(-1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
 -- !query output
 2022-02-25 00:02:03
 
 
 -- !query
-select date_add(DAY, 367, date'2022-02-25')
+select dateadd(DAY, 367, date'2022-02-25')
 -- !query schema
-struct<timestampadd(DAY, 367, DATE '2022-02-25'):timestamp>
+struct<timestampadd(367, DATE '2022-02-25'):timestamp>
 -- !query output
 2023-02-27 00:00:00
 
@@ -709,15 +709,15 @@ struct<timestampadd(DAY, 367, DATE '2022-02-25'):timestamp>
 -- !query
 select dateadd(WEEK, -4, timestamp'2022-02-25 01:02:03')
 -- !query schema
-struct<timestampadd(WEEK, -4, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+struct<timestampadd(-4, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
 -- !query output
 2022-01-28 01:02:03
 
 
 -- !query
-select date_add(MONTH, -1, timestamp'2022-02-25 01:02:03')
+select dateadd(MONTH, -1, timestamp'2022-02-25 01:02:03')
 -- !query schema
-struct<timestampadd(MONTH, -1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
+struct<timestampadd(-1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
 -- !query output
 2022-01-25 01:02:03
 
@@ -725,23 +725,23 @@ struct<timestampadd(MONTH, -1, TIMESTAMP '2022-02-25 01:02:03'):timestamp>
 -- !query
 select dateadd(QUARTER, 5, date'2022-02-25')
 -- !query schema
-struct<timestampadd(QUARTER, 5, DATE '2022-02-25'):timestamp>
+struct<timestampadd(5, DATE '2022-02-25'):timestamp>
 -- !query output
 2023-05-25 00:00:00
 
 
 -- !query
-select date_add(YEAR, 1, date'2022-02-25')
+select dateadd(YEAR, 1, date'2022-02-25')
 -- !query schema
-struct<timestampadd(YEAR, 1, DATE '2022-02-25'):timestamp>
+struct<timestampadd(1, DATE '2022-02-25'):timestamp>
 -- !query output
 2023-02-25 00:00:00
 
 
 -- !query
-select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001')
+select datediff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001')
 -- !query schema
-struct<timestampdiff(MICROSECOND, TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP '2022-02-25 01:02:03.124001'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP '2022-02-25 01:02:03.124001'):bigint>
 -- !query output
 1001
 
@@ -749,15 +749,15 @@ struct<timestampdiff(MICROSECOND, TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP
 -- !query
 select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455')
 -- !query schema
-struct<timestampdiff(MILLISECOND, TIMESTAMP '2022-02-25 01:02:03.456', TIMESTAMP '2022-02-25 01:02:03.455'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-25 01:02:03.456', TIMESTAMP '2022-02-25 01:02:03.455'):bigint>
 -- !query output
 -1
 
 
 -- !query
-select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01')
+select datediff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01')
 -- !query schema
-struct<timestampdiff(SECOND, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 01:03:01'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 01:03:01'):bigint>
 -- !query output
 58
 
@@ -765,15 +765,15 @@ struct<timestampdiff(SECOND, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02
 -- !query
 select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00')
 -- !query schema
-struct<timestampdiff(MINUTE, DATE '2022-02-25', TIMESTAMP '2022-02-24 22:20:00'):bigint>
+struct<timestampdiff(DATE '2022-02-25', TIMESTAMP '2022-02-24 22:20:00'):bigint>
 -- !query output
 -100
 
 
 -- !query
-select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03')
+select datediff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03')
 -- !query schema
-struct<timestampdiff(HOUR, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 00:02:03'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 00:02:03'):bigint>
 -- !query output
 -1
 
@@ -781,15 +781,15 @@ struct<timestampdiff(HOUR, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-2
 -- !query
 select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00')
 -- !query schema
-struct<timestampdiff(DAY, DATE '2022-02-25', TIMESTAMP '2023-02-27 00:00:00'):bigint>
+struct<timestampdiff(DATE '2022-02-25', TIMESTAMP '2023-02-27 00:00:00'):bigint>
 -- !query output
 367
 
 
 -- !query
-select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03')
+select datediff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03')
 -- !query schema
-struct<timestampdiff(WEEK, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-28 01:02:03'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-28 01:02:03'):bigint>
 -- !query output
 -4
 
@@ -797,15 +797,15 @@ struct<timestampdiff(WEEK, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-2
 -- !query
 select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03')
 -- !query schema
-struct<timestampdiff(MONTH, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-25 01:02:03'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-25 01:02:03'):bigint>
 -- !query output
 -1
 
 
 -- !query
-select date_diff(QUARTER, date'2022-02-25', date'2023-05-25')
+select datediff(QUARTER, date'2022-02-25', date'2023-05-25')
 -- !query schema
-struct<timestampdiff(QUARTER, DATE '2022-02-25', DATE '2023-05-25'):bigint>
+struct<timestampdiff(DATE '2022-02-25', DATE '2023-05-25'):bigint>
 -- !query output
 5
 
@@ -813,7 +813,7 @@ struct<timestampdiff(QUARTER, DATE '2022-02-25', DATE '2023-05-25'):bigint>
 -- !query
 select datediff(YEAR, date'2022-02-25', date'2023-02-25')
 -- !query schema
-struct<timestampdiff(YEAR, DATE '2022-02-25', DATE '2023-02-25'):bigint>
+struct<timestampdiff(DATE '2022-02-25', DATE '2023-02-25'):bigint>
 -- !query output
 1
 
@@ -1578,9 +1578,9 @@ struct<from_csv(26/October/2015):struct<t:timestamp>>
 
 
 -- !query
-select timestampadd('MONTH', -1, timestamp'2022-02-14 01:02:03')
+select timestampadd(MONTH, -1, timestamp'2022-02-14 01:02:03')
 -- !query schema
-struct<timestampadd(MONTH, -1, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
+struct<timestampadd(-1, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
 -- !query output
 2022-01-14 01:02:03
 
@@ -1588,7 +1588,7 @@ struct<timestampadd(MONTH, -1, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
 -- !query
 select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03')
 -- !query schema
-struct<timestampadd(MINUTE, 58, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
+struct<timestampadd(58, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
 -- !query output
 2022-02-14 02:00:03
 
@@ -1596,23 +1596,23 @@ struct<timestampadd(MINUTE, 58, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
 -- !query
 select timestampadd(YEAR, 1, date'2022-02-15')
 -- !query schema
-struct<timestampadd(YEAR, 1, DATE '2022-02-15'):timestamp>
+struct<timestampadd(1, DATE '2022-02-15'):timestamp>
 -- !query output
 2023-02-15 00:00:00
 
 
 -- !query
-select timestampadd('SECOND', -1, date'2022-02-15')
+select timestampadd(SECOND, -1, date'2022-02-15')
 -- !query schema
-struct<timestampadd(SECOND, -1, DATE '2022-02-15'):timestamp>
+struct<timestampadd(-1, DATE '2022-02-15'):timestamp>
 -- !query output
 2022-02-14 23:59:59
 
 
 -- !query
-select timestampdiff('MONTH', timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03')
+select timestampdiff(MONTH, timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03')
 -- !query schema
-struct<timestampdiff(MONTH, TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-01-14 01:02:03'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-01-14 01:02:03'):bigint>
 -- !query output
 -1
 
@@ -1620,7 +1620,7 @@ struct<timestampdiff(MONTH, TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-01-
 -- !query
 select timestampdiff(MINUTE, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 02:00:03')
 -- !query schema
-struct<timestampdiff(MINUTE, TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-02-14 02:00:03'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-02-14 02:00:03'):bigint>
 -- !query output
 58
 
@@ -1628,14 +1628,14 @@ struct<timestampdiff(MINUTE, TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-02
 -- !query
 select timestampdiff(YEAR, date'2022-02-15', date'2023-02-15')
 -- !query schema
-struct<timestampdiff(YEAR, DATE '2022-02-15', DATE '2023-02-15'):bigint>
+struct<timestampdiff(DATE '2022-02-15', DATE '2023-02-15'):bigint>
 -- !query output
 1
 
 
 -- !query
-select timestampdiff('SECOND', date'2022-02-15', timestamp'2022-02-14 23:59:59')
+select timestampdiff(SECOND, date'2022-02-15', timestamp'2022-02-14 23:59:59')
 -- !query schema
-struct<timestampdiff(SECOND, DATE '2022-02-15', TIMESTAMP '2022-02-14 23:59:59'):bigint>
+struct<timestampdiff(DATE '2022-02-15', TIMESTAMP '2022-02-14 23:59:59'):bigint>
 -- !query output
 -1
diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out
index fcd74d88eb633..c4fcff4c2b81b 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out
@@ -68,9 +68,9 @@ struct<convert_timezone(Europe/Moscow, America/Los_Angeles, TIMESTAMP_NTZ '2022-
 
 
 -- !query
-select timestampdiff('QUARTER', timestamp_ntz'2022-01-01 01:02:03', timestamp_ntz'2022-05-02 05:06:07')
+select timestampdiff(QUARTER, timestamp_ntz'2022-01-01 01:02:03', timestamp_ntz'2022-05-02 05:06:07')
 -- !query schema
-struct<timestampdiff(QUARTER, TIMESTAMP_NTZ '2022-01-01 01:02:03', TIMESTAMP_NTZ '2022-05-02 05:06:07'):bigint>
+struct<timestampdiff(TIMESTAMP_NTZ '2022-01-01 01:02:03', TIMESTAMP_NTZ '2022-05-02 05:06:07'):bigint>
 -- !query output
 1
 
@@ -78,7 +78,7 @@ struct<timestampdiff(QUARTER, TIMESTAMP_NTZ '2022-01-01 01:02:03', TIMESTAMP_NTZ
 -- !query
 select timestampdiff(HOUR, timestamp_ntz'2022-02-14 01:02:03', timestamp_ltz'2022-02-14 02:03:04')
 -- !query schema
-struct<timestampdiff(HOUR, TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP '2022-02-14 02:03:04'):bigint>
+struct<timestampdiff(TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP '2022-02-14 02:03:04'):bigint>
 -- !query output
 1
 
@@ -86,14 +86,14 @@ struct<timestampdiff(HOUR, TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP '2022-
 -- !query
 select timestampdiff(YEAR, date'2022-02-15', timestamp_ntz'2023-02-15 10:11:12')
 -- !query schema
-struct<timestampdiff(YEAR, DATE '2022-02-15', TIMESTAMP_NTZ '2023-02-15 10:11:12'):bigint>
+struct<timestampdiff(DATE '2022-02-15', TIMESTAMP_NTZ '2023-02-15 10:11:12'):bigint>
 -- !query output
 1
 
 
 -- !query
-select timestampdiff('MILLISECOND', timestamp_ntz'2022-02-14 23:59:59.123', date'2022-02-15')
+select timestampdiff(MILLISECOND, timestamp_ntz'2022-02-14 23:59:59.123', date'2022-02-15')
 -- !query schema
-struct<timestampdiff(MILLISECOND, TIMESTAMP_NTZ '2022-02-14 23:59:59.123', DATE '2022-02-15'):bigint>
+struct<timestampdiff(TIMESTAMP_NTZ '2022-02-14 23:59:59.123', DATE '2022-02-15'):bigint>
 -- !query output
 877
diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
index 282e76351e805..0ebdf4cc01615 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
@@ -768,9 +768,9 @@ You may get a different result due to the upgrading to Spark >= 3.0: Fail to rec
 
 
 -- !query
-select timestampadd('MONTH', -1, timestamp'2022-02-14 01:02:03')
+select timestampadd(MONTH, -1, timestamp'2022-02-14 01:02:03')
 -- !query schema
-struct<timestampadd(MONTH, -1, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
+struct<timestampadd(-1, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
 -- !query output
 2022-01-14 01:02:03
 
@@ -778,7 +778,7 @@ struct<timestampadd(MONTH, -1, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
 -- !query
 select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03')
 -- !query schema
-struct<timestampadd(MINUTE, 58, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
+struct<timestampadd(58, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
 -- !query output
 2022-02-14 02:00:03
 
@@ -786,23 +786,23 @@ struct<timestampadd(MINUTE, 58, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
 -- !query
 select timestampadd(YEAR, 1, date'2022-02-15')
 -- !query schema
-struct<timestampadd(YEAR, 1, DATE '2022-02-15'):timestamp>
+struct<timestampadd(1, DATE '2022-02-15'):timestamp>
 -- !query output
 2023-02-15 00:00:00
 
 
 -- !query
-select timestampadd('SECOND', -1, date'2022-02-15')
+select timestampadd(SECOND, -1, date'2022-02-15')
 -- !query schema
-struct<timestampadd(SECOND, -1, DATE '2022-02-15'):timestamp>
+struct<timestampadd(-1, DATE '2022-02-15'):timestamp>
 -- !query output
 2022-02-14 23:59:59
 
 
 -- !query
-select timestampdiff('MONTH', timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03')
+select timestampdiff(MONTH, timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03')
 -- !query schema
-struct<timestampdiff(MONTH, TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-01-14 01:02:03'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-01-14 01:02:03'):bigint>
 -- !query output
 -1
 
@@ -810,7 +810,7 @@ struct<timestampdiff(MONTH, TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-01-
 -- !query
 select timestampdiff(MINUTE, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 02:00:03')
 -- !query schema
-struct<timestampdiff(MINUTE, TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-02-14 02:00:03'):bigint>
+struct<timestampdiff(TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-02-14 02:00:03'):bigint>
 -- !query output
 58
 
@@ -818,14 +818,14 @@ struct<timestampdiff(MINUTE, TIMESTAMP '2022-02-14 01:02:03', TIMESTAMP '2022-02
 -- !query
 select timestampdiff(YEAR, date'2022-02-15', date'2023-02-15')
 -- !query schema
-struct<timestampdiff(YEAR, DATE '2022-02-15', DATE '2023-02-15'):bigint>
+struct<timestampdiff(DATE '2022-02-15', DATE '2023-02-15'):bigint>
 -- !query output
 1
 
 
 -- !query
-select timestampdiff('SECOND', date'2022-02-15', timestamp'2022-02-14 23:59:59')
+select timestampdiff(SECOND, date'2022-02-15', timestamp'2022-02-14 23:59:59')
 -- !query schema
-struct<timestampdiff(SECOND, DATE '2022-02-15', TIMESTAMP '2022-02-14 23:59:59'):bigint>
+struct<timestampdiff(DATE '2022-02-15', TIMESTAMP '2022-02-14 23:59:59'):bigint>
 -- !query output
 -1
diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
index 95120a83931ec..f7552ed4f62cc 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
@@ -772,9 +772,9 @@ struct<from_csv(26/October/2015):struct<t:timestamp_ntz>>
 
 
 -- !query
-select timestampadd('MONTH', -1, timestamp'2022-02-14 01:02:03')
+select timestampadd(MONTH, -1, timestamp'2022-02-14 01:02:03')
 -- !query schema
-struct<timestampadd(MONTH, -1, TIMESTAMP_NTZ '2022-02-14 01:02:03'):timestamp_ntz>
+struct<timestampadd(-1, TIMESTAMP_NTZ '2022-02-14 01:02:03'):timestamp_ntz>
 -- !query output
 2022-01-14 01:02:03
 
@@ -782,7 +782,7 @@ struct<timestampadd(MONTH, -1, TIMESTAMP_NTZ '2022-02-14 01:02:03'):timestamp_nt
 -- !query
 select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03')
 -- !query schema
-struct<timestampadd(MINUTE, 58, TIMESTAMP_NTZ '2022-02-14 01:02:03'):timestamp_ntz>
+struct<timestampadd(58, TIMESTAMP_NTZ '2022-02-14 01:02:03'):timestamp_ntz>
 -- !query output
 2022-02-14 02:00:03
 
@@ -790,23 +790,23 @@ struct<timestampadd(MINUTE, 58, TIMESTAMP_NTZ '2022-02-14 01:02:03'):timestamp_n
 -- !query
 select timestampadd(YEAR, 1, date'2022-02-15')
 -- !query schema
-struct<timestampadd(YEAR, 1, DATE '2022-02-15'):timestamp>
+struct<timestampadd(1, DATE '2022-02-15'):timestamp>
 -- !query output
 2023-02-15 00:00:00
 
 
 -- !query
-select timestampadd('SECOND', -1, date'2022-02-15')
+select timestampadd(SECOND, -1, date'2022-02-15')
 -- !query schema
-struct<timestampadd(SECOND, -1, DATE '2022-02-15'):timestamp>
+struct<timestampadd(-1, DATE '2022-02-15'):timestamp>
 -- !query output
 2022-02-14 23:59:59
 
 
 -- !query
-select timestampdiff('MONTH', timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03')
+select timestampdiff(MONTH, timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03')
 -- !query schema
-struct<timestampdiff(MONTH, TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP_NTZ '2022-01-14 01:02:03'):bigint>
+struct<timestampdiff(TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP_NTZ '2022-01-14 01:02:03'):bigint>
 -- !query output
 -1
 
@@ -814,7 +814,7 @@ struct<timestampdiff(MONTH, TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP_NTZ '
 -- !query
 select timestampdiff(MINUTE, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 02:00:03')
 -- !query schema
-struct<timestampdiff(MINUTE, TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP_NTZ '2022-02-14 02:00:03'):bigint>
+struct<timestampdiff(TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP_NTZ '2022-02-14 02:00:03'):bigint>
 -- !query output
 58
 
@@ -822,14 +822,14 @@ struct<timestampdiff(MINUTE, TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP_NTZ
 -- !query
 select timestampdiff(YEAR, date'2022-02-15', date'2023-02-15')
 -- !query schema
-struct<timestampdiff(YEAR, DATE '2022-02-15', DATE '2023-02-15'):bigint>
+struct<timestampdiff(DATE '2022-02-15', DATE '2023-02-15'):bigint>
 -- !query output
 1
 
 
 -- !query
-select timestampdiff('SECOND', date'2022-02-15', timestamp'2022-02-14 23:59:59')
+select timestampdiff(SECOND, date'2022-02-15', timestamp'2022-02-14 23:59:59')
 -- !query schema
-struct<timestampdiff(SECOND, DATE '2022-02-15', TIMESTAMP_NTZ '2022-02-14 23:59:59'):bigint>
+struct<timestampdiff(DATE '2022-02-15', TIMESTAMP_NTZ '2022-02-14 23:59:59'):bigint>
 -- !query output
 -1
diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out
index 0364f553d2676..06e255a09c3e3 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out
@@ -766,9 +766,9 @@ struct<from_csv(26/October/2015):struct<t:timestamp_ntz>>
 
 
 -- !query
-select timestampadd('MONTH', -1, timestamp'2022-02-14 01:02:03')
+select timestampadd(MONTH, -1, timestamp'2022-02-14 01:02:03')
 -- !query schema
-struct<timestampadd(MONTH, -1, TIMESTAMP_NTZ '2022-02-14 01:02:03'):timestamp_ntz>
+struct<timestampadd(-1, TIMESTAMP_NTZ '2022-02-14 01:02:03'):timestamp_ntz>
 -- !query output
 2022-01-14 01:02:03
 
@@ -776,7 +776,7 @@ struct<timestampadd(MONTH, -1, TIMESTAMP_NTZ '2022-02-14 01:02:03'):timestamp_nt
 -- !query
 select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03')
 -- !query schema
-struct<timestampadd(MINUTE, 58, TIMESTAMP_NTZ '2022-02-14 01:02:03'):timestamp_ntz>
+struct<timestampadd(58, TIMESTAMP_NTZ '2022-02-14 01:02:03'):timestamp_ntz>
 -- !query output
 2022-02-14 02:00:03
 
@@ -784,23 +784,23 @@ struct<timestampadd(MINUTE, 58, TIMESTAMP_NTZ '2022-02-14 01:02:03'):timestamp_n
 -- !query
 select timestampadd(YEAR, 1, date'2022-02-15')
 -- !query schema
-struct<timestampadd(YEAR, 1, DATE '2022-02-15'):timestamp>
+struct<timestampadd(1, DATE '2022-02-15'):timestamp>
 -- !query output
 2023-02-15 00:00:00
 
 
 -- !query
-select timestampadd('SECOND', -1, date'2022-02-15')
+select timestampadd(SECOND, -1, date'2022-02-15')
 -- !query schema
-struct<timestampadd(SECOND, -1, DATE '2022-02-15'):timestamp>
+struct<timestampadd(-1, DATE '2022-02-15'):timestamp>
 -- !query output
 2022-02-14 23:59:59
 
 
 -- !query
-select timestampdiff('MONTH', timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03')
+select timestampdiff(MONTH, timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03')
 -- !query schema
-struct<timestampdiff(MONTH, TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP_NTZ '2022-01-14 01:02:03'):bigint>
+struct<timestampdiff(TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP_NTZ '2022-01-14 01:02:03'):bigint>
 -- !query output
 -1
 
@@ -808,7 +808,7 @@ struct<timestampdiff(MONTH, TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP_NTZ '
 -- !query
 select timestampdiff(MINUTE, timestamp'2022-02-14 01:02:03', timestamp'2022-02-14 02:00:03')
 -- !query schema
-struct<timestampdiff(MINUTE, TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP_NTZ '2022-02-14 02:00:03'):bigint>
+struct<timestampdiff(TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP_NTZ '2022-02-14 02:00:03'):bigint>
 -- !query output
 58
 
@@ -816,14 +816,14 @@ struct<timestampdiff(MINUTE, TIMESTAMP_NTZ '2022-02-14 01:02:03', TIMESTAMP_NTZ
 -- !query
 select timestampdiff(YEAR, date'2022-02-15', date'2023-02-15')
 -- !query schema
-struct<timestampdiff(YEAR, DATE '2022-02-15', DATE '2023-02-15'):bigint>
+struct<timestampdiff(DATE '2022-02-15', DATE '2023-02-15'):bigint>
 -- !query output
 1
 
 
 -- !query
-select timestampdiff('SECOND', date'2022-02-15', timestamp'2022-02-14 23:59:59')
+select timestampdiff(SECOND, date'2022-02-15', timestamp'2022-02-14 23:59:59')
 -- !query schema
-struct<timestampdiff(SECOND, DATE '2022-02-15', TIMESTAMP_NTZ '2022-02-14 23:59:59'):bigint>
+struct<timestampdiff(DATE '2022-02-15', TIMESTAMP_NTZ '2022-02-14 23:59:59'):bigint>
 -- !query output
 -1
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index ea683ab176786..b90950a014a79 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -105,10 +105,10 @@ class QueryExecutionErrorsSuite extends QueryTest
   test("INVALID_PARAMETER_VALUE: invalid unit passed to timestampadd/timestampdiff") {
     Seq(
       "timestampadd" ->
-        "select timestampadd('nanosecond', 100, timestamp'2022-02-13 18:00:00')",
+        "select timestampadd(nanosecond, 100, timestamp'2022-02-13 18:00:00')",
       "timestampdiff" ->
         """select timestampdiff(
-          |  'nanosecond',
+          |  nanosecond,
           |  timestamp'2022-02-13 18:00:00',
           |  timestamp'2022-02-22 12:52:00')""".stripMargin
     ).foreach { case (funcName, sqlStmt) =>

From 34e3029a43d2a8241f70f2343be8285cb7f231b9 Mon Sep 17 00:00:00 2001
From: itholic <haejoon.lee@databricks.com>
Date: Fri, 11 Mar 2022 19:13:07 +0900
Subject: [PATCH 467/513] [SPARK-38107][SQL] Use error classes in the
 compilation errors of python/pandas UDFs

### What changes were proposed in this pull request?

This PR proposes to migrate the following errors to use error class.

- pandasUDFAggregateNotSupportedInPivotError
- cannotUseMixtureOfAggFunctionAndGroupAggPandasUDFError
- usePythonUDFInJoinConditionUnsupportedError

### Why are the changes needed?

To throw a standardized user-facing error or exception for the python/pandas UDF related errors.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added tests to `QueryCompilationErrorsSuite`.

Closes #35656 from itholic/SPARK-38107.

Authored-by: itholic <haejoon.lee@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../main/resources/error/error-classes.json   |  3 +
 python/pyspark/sql/tests/test_udf.py          |  3 +-
 .../sql/errors/QueryCompilationErrors.scala   | 17 ++---
 ...tractPythonUDFFromJoinConditionSuite.scala |  3 +-
 .../spark/sql/execution/SparkStrategies.scala |  5 +-
 .../errors/QueryCompilationErrorsSuite.scala  | 71 ++++++++++++++++++-
 6 files changed, 88 insertions(+), 14 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 3667bb2a4c7e4..c7a9c854cb486 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -18,6 +18,9 @@
   "CANNOT_UP_CAST_DATATYPE" : {
     "message" : [ "Cannot up cast %s from %s to %s.\n%s" ]
   },
+  "CANNOT_USE_MIXTURE" : {
+    "message" : [ "Cannot use a mixture of aggregate function and group aggregate pandas UDF" ]
+  },
   "CAST_CAUSES_OVERFLOW" : {
     "message" : [ "Casting %s to %s causes overflow. To return NULL instead, use 'try_cast'. If necessary set %s to false to bypass this error." ],
     "sqlState" : "22005"
diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index 0e9d7661e2d94..805d5a8dfec9a 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -257,7 +257,8 @@ def test_udf_not_supported_in_join_condition(self):
 
         def runWithJoinType(join_type, type_string):
             with self.assertRaisesRegex(
-                AnalysisException, "Using PythonUDF.*%s is not supported." % type_string
+                AnalysisException,
+                "Using PythonUDF in join condition of join type %s is not supported" % type_string,
             ):
                 left.join(right, [f("a", "b"), left.a1 == right.b1], join_type).collect()
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 880c28d904195..6bf0ec8eb8c40 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -198,7 +198,9 @@ object QueryCompilationErrors {
   }
 
   def pandasUDFAggregateNotSupportedInPivotError(): Throwable = {
-    new AnalysisException("Pandas UDF aggregate expressions are currently not supported in pivot.")
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_FEATURE",
+      messageParameters = Array("Pandas UDF aggregate expressions don't support pivot."))
   }
 
   def aggregateExpressionRequiredForPivotError(sql: String): Throwable = {
@@ -1330,10 +1332,6 @@ object QueryCompilationErrors {
       s"Expected: ${dataType.typeName}; Found: ${expression.dataType.typeName}")
   }
 
-  def groupAggPandasUDFUnsupportedByStreamingAggError(): Throwable = {
-    new AnalysisException("Streaming aggregation doesn't support group aggregate pandas UDF")
-  }
-
   def streamJoinStreamWithoutEqualityPredicateUnsupportedError(plan: LogicalPlan): Throwable = {
     new AnalysisException(
       "Stream-stream join without equality predicate is not supported", plan = Some(plan))
@@ -1341,7 +1339,8 @@ object QueryCompilationErrors {
 
   def cannotUseMixtureOfAggFunctionAndGroupAggPandasUDFError(): Throwable = {
     new AnalysisException(
-      "Cannot use a mixture of aggregate function and group aggregate pandas UDF")
+      errorClass = "CANNOT_USE_MIXTURE",
+      messageParameters = Array.empty)
   }
 
   def ambiguousAttributesInSelfJoinError(
@@ -1570,8 +1569,10 @@ object QueryCompilationErrors {
   }
 
   def usePythonUDFInJoinConditionUnsupportedError(joinType: JoinType): Throwable = {
-    new AnalysisException("Using PythonUDF in join condition of join type" +
-      s" $joinType is not supported.")
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_FEATURE",
+      messageParameters = Array(
+        s"Using PythonUDF in join condition of join type $joinType is not supported"))
   }
 
   def conflictingAttributesInJoinConditionError(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala
index 77bfc0b3682a3..65c8f5d300c62 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala
@@ -188,7 +188,8 @@ class ExtractPythonUDFFromJoinConditionSuite extends PlanTest {
         Optimize.execute(query.analyze)
       }
       assert(e.message.contentEquals(
-        s"Using PythonUDF in join condition of join type $joinType is not supported."))
+        "The feature is not supported: " +
+        s"Using PythonUDF in join condition of join type $joinType is not supported"))
 
       val query2 = testRelationLeft.join(
         testRelationRight,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 9c2195d42786c..3b48a8f166014 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution
 import java.util.Locale
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{execution, Strategy}
+import org.apache.spark.sql.{execution, AnalysisException, Strategy}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions._
@@ -373,7 +373,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         namedGroupingExpressions, aggregateExpressions, rewrittenResultExpressions, child) =>
 
         if (aggregateExpressions.exists(PythonUDF.isGroupedAggPandasUDF)) {
-          throw QueryCompilationErrors.groupAggPandasUDFUnsupportedByStreamingAggError()
+          throw new AnalysisException(
+            "Streaming aggregation doesn't support group aggregate pandas UDF")
         }
 
         val sessionWindowOption = namedGroupingExpressions.find { p =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 485022c9c79dc..d5cbfc844ccdd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark.sql.errors
 
-import org.apache.spark.sql.{AnalysisException, QueryTest}
-import org.apache.spark.sql.functions.{grouping, grouping_id}
+import org.apache.spark.sql.{AnalysisException, IntegratedUDFTestUtils, QueryTest}
+import org.apache.spark.sql.functions.{grouping, grouping_id, sum}
 import org.apache.spark.sql.test.SharedSparkSession
 
 case class StringLongClass(a: String, b: Long)
@@ -101,4 +101,71 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession {
     assert(e.message ===
       "The argument_index of string format cannot contain position 0$.")
   }
+
+  test("CANNOT_USE_MIXTURE: Using aggregate function with grouped aggregate pandas UDF") {
+    import IntegratedUDFTestUtils._
+
+    val df = Seq(
+      (536361, "85123A", 2, 17850),
+      (536362, "85123B", 4, 17850),
+      (536363, "86123A", 6, 17851)
+    ).toDF("InvoiceNo", "StockCode", "Quantity", "CustomerID")
+    val e = intercept[AnalysisException] {
+      val pandasTestUDF = TestGroupedAggPandasUDF(name = "pandas_udf")
+      df.groupBy("CustomerId")
+        .agg(pandasTestUDF(df("Quantity")), sum(df("Quantity"))).collect()
+    }
+
+    assert(e.errorClass === Some("CANNOT_USE_MIXTURE"))
+    assert(e.message ===
+      "Cannot use a mixture of aggregate function and group aggregate pandas UDF")
+  }
+
+  test("UNSUPPORTED_FEATURE: Using Python UDF with unsupported join condition") {
+    import IntegratedUDFTestUtils._
+
+    val df1 = Seq(
+      (536361, "85123A", 2, 17850),
+      (536362, "85123B", 4, 17850),
+      (536363, "86123A", 6, 17851)
+    ).toDF("InvoiceNo", "StockCode", "Quantity", "CustomerID")
+    val df2 = Seq(
+      ("Bob", 17850),
+      ("Alice", 17850),
+      ("Tom", 17851)
+    ).toDF("CustomerName", "CustomerID")
+
+    val e = intercept[AnalysisException] {
+      val pythonTestUDF = TestPythonUDF(name = "python_udf")
+      df1.join(
+        df2, pythonTestUDF(df1("CustomerID") === df2("CustomerID")), "leftouter").collect()
+    }
+
+    assert(e.errorClass === Some("UNSUPPORTED_FEATURE"))
+    assert(e.getSqlState === "0A000")
+    assert(e.message ===
+      "The feature is not supported: " +
+      "Using PythonUDF in join condition of join type LeftOuter is not supported")
+  }
+
+  test("UNSUPPORTED_FEATURE: Using pandas UDF aggregate expression with pivot") {
+    import IntegratedUDFTestUtils._
+
+    val df = Seq(
+      (536361, "85123A", 2, 17850),
+      (536362, "85123B", 4, 17850),
+      (536363, "86123A", 6, 17851)
+    ).toDF("InvoiceNo", "StockCode", "Quantity", "CustomerID")
+
+    val e = intercept[AnalysisException] {
+      val pandasTestUDF = TestGroupedAggPandasUDF(name = "pandas_udf")
+      df.groupBy(df("CustomerID")).pivot(df("CustomerID")).agg(pandasTestUDF(df("Quantity")))
+    }
+
+    assert(e.errorClass === Some("UNSUPPORTED_FEATURE"))
+    assert(e.getSqlState === "0A000")
+    assert(e.message ===
+      "The feature is not supported: " +
+      "Pandas UDF aggregate expressions don't support pivot.")
+  }
 }

From 36023c27731dead53ab9b7ae9b33332476ce6aa7 Mon Sep 17 00:00:00 2001
From: Xinrong Meng <xinrong.meng@databricks.com>
Date: Fri, 11 Mar 2022 19:15:38 +0900
Subject: [PATCH 468/513] [SPARK-38491][PYTHON] Support `ignore_index` of
 `Series.sort_values`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
Support `ignore_index` of `Series.sort_values`, in which the resulting axis will be labeled `0, 1, …, n - 1`.

### Why are the changes needed?
To reach parity with pandas.

Older pandas support `ignore_index` as well:
```py
>>> pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5, None, 7], "b": [7, 6, 5, 4, 3, 2, 1]}, index=np.random.rand(7))
>>> pdf.sort_values("b", ignore_index=True)
     a  b
0  7.0  1
1  NaN  2
2  5.0  3
3  4.0  4
4  3.0  5
5  2.0  6
6  1.0  7
>>> pd.__version__
'1.0.0'
```

### Does this PR introduce _any_ user-facing change?
Yes. `ignore_index` of `Series.sort_values` is supported.

```py
>>> psdf = ps.DataFrame({"a": [1, 2, 3, 4, 5, None, 7], "b": [7, 6, 5, 4, 3, 2, 1]}, index=np.random.rand(7))
>>> psdf
            a  b
0.971253  1.0  7
0.401039  2.0  6
0.322310  3.0  5
0.932521  4.0  4
0.058432  5.0  3
0.122754  NaN  2
0.842971  7.0  1
>>> psdf.sort_values("b", ignore_index=True)
     a  b
0  7.0  1
1  NaN  2
2  5.0  3
3  4.0  4
4  3.0  5
5  2.0  6
6  1.0  7
```

### How was this patch tested?
Unit tests.

Closes #35794 from xinrong-databricks/frame.sort_values.

Authored-by: Xinrong Meng <xinrong.meng@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/pandas/frame.py                | 45 +++++++++++++------
 python/pyspark/pandas/tests/test_dataframe.py | 42 +++++++++++++++++
 2 files changed, 73 insertions(+), 14 deletions(-)

diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index 64a64711b4e17..7abcb9cb24ccc 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -6855,6 +6855,7 @@ def sort_values(
         ascending: Union[bool, List[bool]] = True,
         inplace: bool = False,
         na_position: str = "last",
+        ignore_index: bool = False,
     ) -> Optional["DataFrame"]:
         """
         Sort by the values along either axis.
@@ -6870,6 +6871,8 @@ def sort_values(
              if True, perform operation in-place
         na_position : {'first', 'last'}, default 'last'
              `first` puts NaNs at the beginning, `last` puts NaNs at the end
+        ignore_index : bool, default False
+            If True, the resulting axis will be labeled 0, 1, …, n - 1.
 
         Returns
         -------
@@ -6882,34 +6885,45 @@ def sort_values(
         ...     'col2': [2, 9, 8, 7, 4],
         ...     'col3': [0, 9, 4, 2, 3],
         ...   },
-        ...   columns=['col1', 'col2', 'col3'])
+        ...   columns=['col1', 'col2', 'col3'],
+        ...   index=['a', 'b', 'c', 'd', 'e'])
         >>> df
            col1  col2  col3
-        0     A     2     0
-        1     B     9     9
-        2  None     8     4
-        3     D     7     2
-        4     C     4     3
+        a     A     2     0
+        b     B     9     9
+        c  None     8     4
+        d     D     7     2
+        e     C     4     3
 
         Sort by col1
 
         >>> df.sort_values(by=['col1'])
            col1  col2  col3
+        a     A     2     0
+        b     B     9     9
+        e     C     4     3
+        d     D     7     2
+        c  None     8     4
+
+        Ignore index for the resulting axis
+
+        >>> df.sort_values(by=['col1'], ignore_index=True)
+           col1  col2  col3
         0     A     2     0
         1     B     9     9
-        4     C     4     3
+        2     C     4     3
         3     D     7     2
-        2  None     8     4
+        4  None     8     4
 
         Sort Descending
 
         >>> df.sort_values(by='col1', ascending=False)
            col1  col2  col3
-        3     D     7     2
-        4     C     4     3
-        1     B     9     9
-        0     A     2     0
-        2  None     8     4
+        d     D     7     2
+        e     C     4     3
+        b     B     9     9
+        a     A     2     0
+        c  None     8     4
 
         Sort by multiple columns
 
@@ -6945,11 +6959,14 @@ def sort_values(
             new_by.append(ser.spark.column)
 
         psdf = self._sort(by=new_by, ascending=ascending, na_position=na_position)
+
         if inplace:
+            if ignore_index:
+                psdf.reset_index(drop=True, inplace=inplace)
             self._update_internal_frame(psdf._internal)
             return None
         else:
-            return psdf
+            return psdf.reset_index(drop=True) if ignore_index else psdf
 
     def sort_index(
         self,
diff --git a/python/pyspark/pandas/tests/test_dataframe.py b/python/pyspark/pandas/tests/test_dataframe.py
index ab1edbe4b2c25..dc1ed97c6656a 100644
--- a/python/pyspark/pandas/tests/test_dataframe.py
+++ b/python/pyspark/pandas/tests/test_dataframe.py
@@ -1558,6 +1558,9 @@ def test_sort_values(self):
         psdf = ps.from_pandas(pdf)
 
         self.assert_eq(psdf.sort_values("b"), pdf.sort_values("b"))
+        self.assert_eq(
+            psdf.sort_values("b", ignore_index=True), pdf.sort_values("b", ignore_index=True)
+        )
 
         for ascending in [True, False]:
             for na_position in ["first", "last"]:
@@ -1567,6 +1570,10 @@ def test_sort_values(self):
                 )
 
         self.assert_eq(psdf.sort_values(["a", "b"]), pdf.sort_values(["a", "b"]))
+        self.assert_eq(
+            psdf.sort_values(["a", "b"], ignore_index=True),
+            pdf.sort_values(["a", "b"], ignore_index=True),
+        )
         self.assert_eq(
             psdf.sort_values(["a", "b"], ascending=[False, True]),
             pdf.sort_values(["a", "b"], ascending=[False, True]),
@@ -1587,6 +1594,41 @@ def test_sort_values(self):
         self.assert_eq(psdf, pdf)
         self.assert_eq(psserA, pserA)
 
+        pdf = pd.DataFrame(
+            {"a": [1, 2, 3, 4, 5, None, 7], "b": [7, 6, 5, 4, 3, 2, 1]}, index=np.random.rand(7)
+        )
+        psdf = ps.from_pandas(pdf)
+        pserA = pdf.a
+        psserA = psdf.a
+        self.assert_eq(
+            psdf.sort_values("b", inplace=True, ignore_index=True),
+            pdf.sort_values("b", inplace=True, ignore_index=True),
+        )
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(psserA, pserA)
+
+        # multi-index indexes
+
+        pdf = pd.DataFrame(
+            {"a": [1, 2, 3, 4, 5, None, 7], "b": [7, 6, 5, 4, 3, 2, 1]},
+            index=pd.MultiIndex.from_tuples(
+                [
+                    ("bar", "one"),
+                    ("bar", "two"),
+                    ("baz", "one"),
+                    ("baz", "two"),
+                    ("foo", "one"),
+                    ("foo", "two"),
+                    ("qux", "one"),
+                ]
+            ),
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psdf.sort_values("b"), pdf.sort_values("b"))
+        self.assert_eq(
+            psdf.sort_values("b", ignore_index=True), pdf.sort_values("b", ignore_index=True)
+        )
+
         # multi-index columns
         pdf = pd.DataFrame(
             {("X", 10): [1, 2, 3, 4, 5, None, 7], ("X", 20): [7, 6, 5, 4, 3, 2, 1]},

From b1d8f35a0e62839a5e9c5e66aff52f75d195c846 Mon Sep 17 00:00:00 2001
From: Xinrong Meng <xinrong.meng@databricks.com>
Date: Fri, 11 Mar 2022 19:40:11 +0900
Subject: [PATCH 469/513] [SPARK-38518][PYTHON] Implement `skipna` of
 `Series.all/Index.all` to exclude NA/null values

### What changes were proposed in this pull request?
Implement `skipna` of `Series.all/Index.all` to exclude NA/null values

### Why are the changes needed?
To reach parity with pandas.

### Does this PR introduce _any_ user-facing change?
Yes. `skipna` is supported as below (consistent with pandas API):
```py
>>> ps.Series([1, 2, 3, np.nan]).all(skipna=True)
True

>>> ps.Series([1, 2, 3, np.nan]).all(skipna=False)
True

>>> ps.Series([True, True, None]).all(skipna=True)
True

>>> ps.Series([True, True, None]).all(skipna=False)
False
```

### How was this patch tested?
Unit tests.

Closes #35813 from xinrong-databricks/series.all.

Authored-by: Xinrong Meng <xinrong.meng@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/pandas/base.py              | 38 ++++++++++++++++++----
 python/pyspark/pandas/indexes/category.py  |  4 +++
 python/pyspark/pandas/indexes/datetimes.py |  4 +++
 python/pyspark/pandas/indexes/timedelta.py |  4 +++
 python/pyspark/pandas/tests/test_series.py |  7 ++++
 5 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py
index d7de9b1774a7c..cb998b4cbf9fd 100644
--- a/python/pyspark/pandas/base.py
+++ b/python/pyspark/pandas/base.py
@@ -27,7 +27,7 @@
 import pandas as pd
 from pandas.api.types import is_list_like, CategoricalDtype  # type: ignore[attr-defined]
 from pyspark.sql import functions as F, Column, Window
-from pyspark.sql.types import LongType, BooleanType
+from pyspark.sql.types import LongType, BooleanType, NumericType
 
 from pyspark import pandas as ps  # For running doctests and reference resolution in PyCharm.
 from pyspark.pandas._typing import Axis, Dtype, IndexOpsLike, Label, SeriesOrIndex
@@ -965,8 +965,8 @@ def notnull(self: IndexOpsLike) -> IndexOpsLike:
 
     notna = notnull
 
-    # TODO: axis, skipna, and many arguments should be implemented.
-    def all(self, axis: Axis = 0) -> bool:
+    # TODO: axis and many arguments should be implemented.
+    def all(self, axis: Axis = 0, skipna: bool = True) -> bool:
         """
         Return whether all elements are True.
 
@@ -981,6 +981,11 @@ def all(self, axis: Axis = 0) -> bool:
             * 0 / 'index' : reduce the index, return a Series whose index is the
               original column labels.
 
+        skipna : boolean, default True
+            Exclude NA/null values. If an entire row/column is NA and skipna is True,
+            then the result will be True, as for an empty row/column.
+            If skipna is False, then NA are treated as True, because these are not equal to zero.
+
         Examples
         --------
         >>> ps.Series([True, True]).all()
@@ -998,6 +1003,9 @@ def all(self, axis: Axis = 0) -> bool:
         >>> ps.Series([True, True, None]).all()
         True
 
+        >>> ps.Series([True, True, None]).all(skipna=False)
+        False
+
         >>> ps.Series([True, False, None]).all()
         False
 
@@ -1007,6 +1015,15 @@ def all(self, axis: Axis = 0) -> bool:
         >>> ps.Series([np.nan]).all()
         True
 
+        >>> ps.Series([np.nan]).all(skipna=False)
+        True
+
+        >>> ps.Series([None]).all()
+        True
+
+        >>> ps.Series([None]).all(skipna=False)
+        False
+
         >>> df = ps.Series([True, False, None]).rename("a").to_frame()
         >>> df.set_index("a").index.all()
         False
@@ -1018,11 +1035,18 @@ def all(self, axis: Axis = 0) -> bool:
         sdf = self._internal.spark_frame.select(self.spark.column)
         col = scol_for(sdf, sdf.columns[0])
 
-        # Note that we're ignoring `None`s here for now.
-        # any and every was added as of Spark 3.0
+        # `any` and `every` was added as of Spark 3.0.
         # ret = sdf.select(F.expr("every(CAST(`%s` AS BOOLEAN))" % sdf.columns[0])).collect()[0][0]
-        # Here we use min as its alternative:
-        ret = sdf.select(F.min(F.coalesce(col.cast("boolean"), SF.lit(True)))).collect()[0][0]
+        # We use min as its alternative as below.
+        if isinstance(self.spark.data_type, NumericType) or skipna:
+            # np.nan takes no effect to the result; None takes no effect if `skipna`
+            ret = sdf.select(F.min(F.coalesce(col.cast("boolean"), SF.lit(True)))).collect()[0][0]
+        else:
+            # Take None as False when not `skipna`
+            ret = sdf.select(
+                F.min(F.when(col.isNull(), SF.lit(False)).otherwise(col.cast("boolean")))
+            ).collect()[0][0]
+
         if ret is None:
             return True
         else:
diff --git a/python/pyspark/pandas/indexes/category.py b/python/pyspark/pandas/indexes/category.py
index 40c6410cedc65..2dfc7f25eb0f3 100644
--- a/python/pyspark/pandas/indexes/category.py
+++ b/python/pyspark/pandas/indexes/category.py
@@ -705,6 +705,10 @@ def map(  # type: ignore[override]
         """
         return super().map(mapper)
 
+    @no_type_check
+    def all(self, *args, **kwargs) -> None:
+        raise TypeError("Cannot perform 'all' with this index type: %s" % type(self).__name__)
+
 
 def _test() -> None:
     import os
diff --git a/python/pyspark/pandas/indexes/datetimes.py b/python/pyspark/pandas/indexes/datetimes.py
index e673af77a2219..b4a7c1e8356a8 100644
--- a/python/pyspark/pandas/indexes/datetimes.py
+++ b/python/pyspark/pandas/indexes/datetimes.py
@@ -739,6 +739,10 @@ def pandas_at_time(pdf) -> ps.DataFrame[int]:  # type: ignore[no-untyped-def]
             psdf = psdf.pandas_on_spark.apply_batch(pandas_at_time)
         return ps.Index(first_series(psdf).rename(self.name))
 
+    @no_type_check
+    def all(self, *args, **kwargs) -> None:
+        raise TypeError("Cannot perform 'all' with this index type: %s" % type(self).__name__)
+
 
 def disallow_nanoseconds(freq: Union[str, DateOffset]) -> None:
     if freq in ["N", "ns"]:
diff --git a/python/pyspark/pandas/indexes/timedelta.py b/python/pyspark/pandas/indexes/timedelta.py
index c45f36e277882..564c484d9684b 100644
--- a/python/pyspark/pandas/indexes/timedelta.py
+++ b/python/pyspark/pandas/indexes/timedelta.py
@@ -192,3 +192,7 @@ def get_microseconds(scol):
             ).cast("int")
 
         return Index(self.to_series().spark.transform(get_microseconds))
+
+    @no_type_check
+    def all(self, *args, **kwargs) -> None:
+        raise TypeError("Cannot perform 'all' with this index type: %s" % type(self).__name__)
diff --git a/python/pyspark/pandas/tests/test_series.py b/python/pyspark/pandas/tests/test_series.py
index 76a5b78b2115f..dbfa5477e3d72 100644
--- a/python/pyspark/pandas/tests/test_series.py
+++ b/python/pyspark/pandas/tests/test_series.py
@@ -838,13 +838,20 @@ def test_all(self):
             pd.Series([True, False], name="x"),
             pd.Series([0, 1], name="x"),
             pd.Series([1, 2, 3], name="x"),
+            pd.Series([np.nan, 0, 1], name="x"),
+            pd.Series([np.nan, 1, 2, 3], name="x"),
             pd.Series([True, True, None], name="x"),
             pd.Series([True, False, None], name="x"),
             pd.Series([], name="x"),
             pd.Series([np.nan], name="x"),
+            pd.Series([np.nan, np.nan], name="x"),
+            pd.Series([None], name="x"),
+            pd.Series([None, None], name="x"),
         ]:
             psser = ps.from_pandas(pser)
             self.assert_eq(psser.all(), pser.all())
+            self.assert_eq(psser.all(skipna=False), pser.all(skipna=False))
+            self.assert_eq(psser.all(skipna=True), pser.all(skipna=True))
 
         pser = pd.Series([1, 2, 3, 4], name="x")
         psser = ps.from_pandas(pser)

From fd5896bf0ae89e62024b3e4d06643cef7c17bee7 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 11 Mar 2022 11:06:37 -0800
Subject: [PATCH 470/513] [SPARK-38527][K8S][DOCS] Set the minimum Volcano
 version

### What changes were proposed in this pull request?

This PR aims to set the minimum `Volcano` version instead of `latest` tag in order to prevent accidental failures like SPARK-38508 and SPARK-38515 due to the unknown Volcano regression.

### Why are the changes needed?

Volcano `v1.5.0` is the latest released version released 20 days ago.

- https://github.com/volcano-sh/volcano/releases/tag/v1.5.0

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual review because this is a doc-only change.

Closes #35822 from dongjoon-hyun/SPARK-38527.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../kubernetes/integration-tests/README.md       | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md
index 959829373e9a9..93dbc18554ce5 100644
--- a/resource-managers/kubernetes/integration-tests/README.md
+++ b/resource-managers/kubernetes/integration-tests/README.md
@@ -308,9 +308,21 @@ You can also specify your specific dockerfile to build JVM/Python/R based image
 
 # Running the Volcano Integration Tests
 
-Prerequisites
-- Install Volcano according to [link](https://volcano.sh/en/docs/installation/).
+Volcano integration is experimental in Aapche Spark 3.3.0 and the test coverage is limited.
+
+## Requirements
 - A minimum of 6 CPUs and 9G of memory is required to complete all Volcano test cases.
+- Volcano v1.5.0.
+
+## Installation
+
+    # x86_64
+    kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/release-1.5/installer/volcano-development.yaml
+
+    # arm64:
+    kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/release-1.5/installer/volcano-development-arm64.yaml
+
+## Run tests
 
 You can specify `-Pvolcano` to enable volcano module to run all Kubernetes and Volcano tests
 

From 60334d7661b406a482c76970ecc43a9715996744 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Fri, 11 Mar 2022 21:36:12 -0800
Subject: [PATCH 471/513] [SPARK-38516][BUILD] Add log4j-core and log4j-api to
 classpath if active hadoop-provided

### What changes were proposed in this pull request?

Add `log4j-core` and `log4j-api` to classpath if active hadoop-provided.

### Why are the changes needed?

`log4j-core` is needed:
```
Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/logging/log4j/core/Filter
	at java.lang.Class.getDeclaredMethods0(Native Method)
	at java.lang.Class.privateGetDeclaredMethods(Class.java:2701)
	at java.lang.Class.privateGetMethodRecursive(Class.java:3048)
	at java.lang.Class.getMethod0(Class.java:3018)
	at java.lang.Class.getMethod(Class.java:1784)
	at sun.launcher.LauncherHelper.validateMainClass(LauncherHelper.java:544)
	at sun.launcher.LauncherHelper.checkAndLoadMain(LauncherHelper.java:526)
Caused by: java.lang.ClassNotFoundException: org.apache.logging.log4j.core.Filter
	at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
	at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
	... 7 more
```

`log4j-api` is needed:
```
Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/logging/log4j/LogManager
	at org.apache.spark.deploy.yarn.SparkRackResolver.<init>(SparkRackResolver.scala:42)
	at org.apache.spark.deploy.yarn.SparkRackResolver$.get(SparkRackResolver.scala:114)
	at org.apache.spark.scheduler.cluster.YarnScheduler.<init>(YarnScheduler.scala:31)
	at org.apache.spark.scheduler.cluster.YarnClusterManager.createTaskScheduler(YarnClusterManager.scala:35)
	at org.apache.spark.SparkContext$.org$apache$spark$SparkContext$$createTaskScheduler(SparkContext.scala:2985)
	at org.apache.spark.SparkContext.<init>(SparkContext.scala:563)
	at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2704)
	at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$2(SparkSession.scala:953)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:947)
	at org.apache.spark.sql.hive.thriftserver.SparkSQLEnv$.init(SparkSQLEnv.scala:54)
	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.<init>(SparkSQLCLIDriver.scala:327)
	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:159)
	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
	at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:958)
	at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
	at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
	at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
	at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1046)
	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1055)
	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.ClassNotFoundException: org.apache.logging.log4j.LogManager
	at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
	at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
	... 26 more
```
`log4j-slf4j-impl` is not needed: https://github.com/apache/spark/pull/35811#issuecomment-1064855439

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual test:
```shell
 ./dev/make-distribution.sh --name SPARK-38516 --tgz  -Phive -Phive-thriftserver  -Pyarn -Phadoop-2 -Phadoop-provided
```

Closes #35811 from wangyum/SPARK-38516.

Lead-authored-by: Yuming Wang <yumwang@ebay.com>
Co-authored-by: Yuming Wang <wgyumg@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 pom.xml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 2c9ee3412cc43..5bc8d12d966ab 100644
--- a/pom.xml
+++ b/pom.xml
@@ -745,13 +745,11 @@
         <groupId>org.apache.logging.log4j</groupId>
         <artifactId>log4j-api</artifactId>
         <version>${log4j.version}</version>
-        <scope>${hadoop.deps.scope}</scope>
       </dependency>
       <dependency>
         <groupId>org.apache.logging.log4j</groupId>
         <artifactId>log4j-core</artifactId>
         <version>${log4j.version}</version>
-        <scope>${hadoop.deps.scope}</scope>
       </dependency>
       <dependency>
         <!-- API bridge between log4j 1 and 2 -->

From c91c2e9afec0d5d5bbbd2e155057fe409c5bb928 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 11 Mar 2022 21:38:24 -0800
Subject: [PATCH 472/513] [SPARK-38526][SQL] Fix misleading function alias name
 for RuntimeReplaceable

### What changes were proposed in this pull request?

This PR uses a manual recursion to replace `RuntimeReplaceable` expressions instead of `transformAllExpressionsWithPruning`. The problem of `transformAllExpressionsWithPruning` is it will automatically make the replacement expression inherit  the function alias name from the parent node, which is quite misleading. For example, `select date_part('month', c) from t`, the optimized plan in EXPLAIN before this PR is
```
Project [date_part(cast(c#18 as date)) AS date_part(month, c)#19]
+- Relation default.t[c#18] parquet
```
Now it's
```
Project [month(cast(c#9 as date)) AS date_part(month, c)#10]
+- Relation default.t[c#9] parquet
```

### Why are the changes needed?

fix misleading EXPLAIN result

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

new test

Closes #35821 from cloud-fan/follow2.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/catalyst/optimizer/finishAnalysis.scala    | 9 +++++++--
 .../test/scala/org/apache/spark/sql/ExplainSuite.scala   | 9 ++++++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index 7b896e2c9607c..ef9c4b9af40d3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -40,9 +40,14 @@ import org.apache.spark.util.Utils
  *   we use this to replace Every and Any with Min and Max respectively.
  */
 object ReplaceExpressions extends Rule[LogicalPlan] {
-  def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressionsWithPruning(
+  def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
     _.containsAnyPattern(RUNTIME_REPLACEABLE)) {
-    case e: RuntimeReplaceable => e.replacement
+    case p => p.mapExpressions(replace)
+  }
+
+  private def replace(e: Expression): Expression = e match {
+    case r: RuntimeReplaceable => replace(r.replacement)
+    case _ => e.mapChildren(replace)
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index 3659f20fb6ec2..073b67e0472bc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -106,7 +106,7 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
       keywords = "InMemoryRelation", "StorageLevel(disk, memory, deserialized, 1 replicas)")
   }
 
-  test("optimized plan should show the rewritten aggregate expression") {
+  test("optimized plan should show the rewritten expression") {
     withTempView("test_agg") {
       sql(
         """
@@ -125,6 +125,13 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
         "Aggregate [k#x], [k#x, every(v#x) AS every(v)#x, some(v#x) AS some(v)#x, " +
           "any(v#x) AS any(v)#x]")
     }
+
+    withTable("t") {
+      sql("CREATE TABLE t(col TIMESTAMP) USING parquet")
+      val df = sql("SELECT date_part('month', col) FROM t")
+      checkKeywordsExistsInExplain(df,
+        "Project [month(cast(col#x as date)) AS date_part(month, col)#x]")
+    }
   }
 
   test("explain inline tables cross-joins") {

From a511ca13ab392a620e2731d217cc273de9cf1b10 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sat, 12 Mar 2022 02:47:57 -0800
Subject: [PATCH 473/513] [SPARK-38534][SQL][TESTS] Disable
 `to_timestamp('366', 'DD')` test case

### What changes were proposed in this pull request?

This PR aims to disable `to_timestamp('366', 'DD')` to recover `ansi` test suite in Java11+.

### Why are the changes needed?

Currently, Daily Java 11 and 17 GitHub Action jobs are broken.
- https://github.com/apache/spark/runs/5511239176?check_suite_focus=true
- https://github.com/apache/spark/runs/5513540604?check_suite_focus=true

**Java 8**
```
$ bin/spark-shell --conf spark.sql.ansi.enabled=true
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
22/03/12 00:59:31 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Spark context Web UI available at http://172.16.0.31:4040
Spark context available as 'sc' (master = local[*], app id = local-1647075572229).
Spark session available as 'spark'.
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 3.3.0-SNAPSHOT
      /_/

Using Scala version 2.12.15 (OpenJDK 64-Bit Server VM, Java 1.8.0_322)
Type in expressions to have them evaluated.
Type :help for more information.

scala> sql("select to_timestamp('366', 'DD')").show
java.time.format.DateTimeParseException: Text '366' could not be parsed, unparsed text found at index 2. If necessary set spark.sql.ansi.enabled to false to bypass this error.
```

**Java 11+**
```
$ bin/spark-shell --conf spark.sql.ansi.enabled=true
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
22/03/12 01:00:07 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Spark context Web UI available at http://172.16.0.31:4040
Spark context available as 'sc' (master = local[*], app id = local-1647075607932).
Spark session available as 'spark'.
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 3.3.0-SNAPSHOT
      /_/

Using Scala version 2.12.15 (OpenJDK 64-Bit Server VM, Java 11.0.12)
Type in expressions to have them evaluated.
Type :help for more information.

scala> sql("select to_timestamp('366', 'DD')").show
java.time.DateTimeException: Invalid date 'DayOfYear 366' as '1970' is not a leap year. If necessary set spark.sql.ansi.enabled to false to bypass this error.
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Test with Java 11+.

**BEFORE**
```
$ java -version
openjdk version "17.0.2" 2022-01-18 LTS
OpenJDK Runtime Environment Zulu17.32+13-CA (build 17.0.2+8-LTS)
OpenJDK 64-Bit Server VM Zulu17.32+13-CA (build 17.0.2+8-LTS, mixed mode, sharing)

$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z ansi/datetime-parsing-invalid.sql"
...
[info] SQLQueryTestSuite:
01:23:00.219 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
01:23:05.209 ERROR org.apache.spark.sql.SQLQueryTestSuite: Error using configs:
[info] - ansi/datetime-parsing-invalid.sql *** FAILED *** (267 milliseconds)
[info]   ansi/datetime-parsing-invalid.sql
[info]   Expected "java.time.[format.DateTimeParseException
[info]   Text '366' could not be parsed, unparsed text found at index 2]. If necessary set s...", but got "java.time.[DateTimeException
[info]   Invalid date 'DayOfYear 366' as '1970' is not a leap year]. If necessary set s..." Result did not match for query #8
[info]   select to_timestamp('366', 'DD') (SQLQueryTestSuite.scala:476)
...
[info] Run completed in 7 seconds, 389 milliseconds.
[info] Total number of tests run: 1
[info] Suites: completed 1, aborted 0
[info] Tests: succeeded 0, failed 1, canceled 0, ignored 0, pending 0
[info] *** 1 TEST FAILED ***
[error] Failed tests:
[error] 	org.apache.spark.sql.SQLQueryTestSuite
[error] (sql / Test / testOnly) sbt.TestsFailedException: Tests unsuccessful
[error] Total time: 21 s, completed Mar 12, 2022, 1:23:05 AM
```

**AFTER**
```
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z ansi/datetime-parsing-invalid.sql"
...
[info] SQLQueryTestSuite:
[info] - ansi/datetime-parsing-invalid.sql (390 milliseconds)
...
[info] Run completed in 7 seconds, 673 milliseconds.
[info] Total number of tests run: 1
[info] Suites: completed 1, aborted 0
[info] Tests: succeeded 1, failed 0, canceled 0, ignored 0, pending 0
[info] All tests passed.
[success] Total time: 20 s, completed Mar 12, 2022, 1:24:52 AM
```

Closes #35825 from dongjoon-hyun/SPARK-38534.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql-tests/inputs/datetime-parsing-invalid.sql     |  3 ++-
 .../results/ansi/datetime-parsing-invalid.sql.out     | 11 +----------
 .../results/datetime-parsing-invalid.sql.out          | 10 +---------
 3 files changed, 4 insertions(+), 20 deletions(-)

diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime-parsing-invalid.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime-parsing-invalid.sql
index a6d743cab5480..1d1e2a5282c81 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/datetime-parsing-invalid.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/datetime-parsing-invalid.sql
@@ -14,7 +14,8 @@ select to_timestamp('366', 'D');
 select to_timestamp('9', 'DD');
 -- in java 8 this case is invalid, but valid in java 11, disabled for jenkins
 -- select to_timestamp('100', 'DD');
-select to_timestamp('366', 'DD');
+-- The error message is changed since Java 11+
+-- select to_timestamp('366', 'DD');
 select to_timestamp('9', 'DDD');
 select to_timestamp('99', 'DDD');
 select to_timestamp('30-365', 'dd-DDD');
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out
index 5dc3b85b3a9eb..59761d5ac53f0 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 29
+-- Number of queries: 28
 
 
 -- !query
@@ -74,15 +74,6 @@ org.apache.spark.SparkUpgradeException
 You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '9' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
 
 
--- !query
-select to_timestamp('366', 'DD')
--- !query schema
-struct<>
--- !query output
-java.time.format.DateTimeParseException
-Text '366' could not be parsed, unparsed text found at index 2. If necessary set spark.sql.ansi.enabled to false to bypass this error.
-
-
 -- !query
 select to_timestamp('9', 'DDD')
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out
index 33504709c08ec..9fc28876a5b2a 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 29
+-- Number of queries: 28
 
 
 -- !query
@@ -72,14 +72,6 @@ org.apache.spark.SparkUpgradeException
 You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '9' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
 
 
--- !query
-select to_timestamp('366', 'DD')
--- !query schema
-struct<to_timestamp(366, DD):timestamp>
--- !query output
-NULL
-
-
 -- !query
 select to_timestamp('9', 'DDD')
 -- !query schema

From c032928515e74367137c668ce692d8fd53696485 Mon Sep 17 00:00:00 2001
From: hi-zir <sql.alp@gmail.com>
Date: Sat, 12 Mar 2022 23:01:18 +0100
Subject: [PATCH 474/513] [SPARK-37430][PYTHON][MLLIB] Inline hints for
 pyspark.mllib.linalg.distributed

### What changes were proposed in this pull request?

Inline type hints for pyspark.mllib.linalg.distributed

### Why are the changes needed?

We can take advantage of static type checking within the functions by inlining the type hints.

### Does this PR introduce _any_ user-facing change?

No.
### How was this patch tested?

Existing tests.

Closes #35739 from hi-zir/SPARK-37430.

Authored-by: hi-zir <sql.alp@gmail.com>
Signed-off-by: zero323 <mszymkiewicz@gmail.com>
---
 python/pyspark/mllib/linalg/distributed.py  | 172 ++++++++++++--------
 python/pyspark/mllib/linalg/distributed.pyi | 145 -----------------
 2 files changed, 103 insertions(+), 214 deletions(-)
 delete mode 100644 python/pyspark/mllib/linalg/distributed.pyi

diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py
index f892d41b12c13..d49af66479311 100644
--- a/python/pyspark/mllib/linalg/distributed.py
+++ b/python/pyspark/mllib/linalg/distributed.py
@@ -20,16 +20,22 @@
 """
 
 import sys
+from typing import Any, Generic, Optional, Tuple, TypeVar, Union, TYPE_CHECKING
 
 from py4j.java_gateway import JavaObject
 
 from pyspark import RDD, since
 from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
-from pyspark.mllib.linalg import _convert_to_vector, DenseMatrix, Matrix, QRDecomposition
+from pyspark.mllib.linalg import _convert_to_vector, DenseMatrix, Matrix, QRDecomposition, Vector
 from pyspark.mllib.stat import MultivariateStatisticalSummary
 from pyspark.sql import DataFrame
 from pyspark.storagelevel import StorageLevel
 
+UT = TypeVar("UT", bound="DistributedMatrix")
+VT = TypeVar("VT", bound="Matrix")
+
+if TYPE_CHECKING:
+    from pyspark.ml._typing import VectorLike
 
 __all__ = [
     "BlockMatrix",
@@ -50,11 +56,11 @@ class DistributedMatrix:
 
     """
 
-    def numRows(self):
+    def numRows(self) -> int:
         """Get or compute the number of rows."""
         raise NotImplementedError
 
-    def numCols(self):
+    def numCols(self) -> int:
         """Get or compute the number of cols."""
         raise NotImplementedError
 
@@ -82,7 +88,12 @@ class RowMatrix(DistributedMatrix):
         the first row.
     """
 
-    def __init__(self, rows, numRows=0, numCols=0):
+    def __init__(
+        self,
+        rows: Union[RDD[Vector], DataFrame],
+        numRows: int = 0,
+        numCols: int = 0,
+    ):
         """
         Note: This docstring is not shown publicly.
 
@@ -121,7 +132,7 @@ def __init__(self, rows, numRows=0, numCols=0):
         self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
 
     @property
-    def rows(self):
+    def rows(self) -> RDD[Vector]:
         """
         Rows of the RowMatrix stored as an RDD of vectors.
 
@@ -134,7 +145,7 @@ def rows(self):
         """
         return self._java_matrix_wrapper.call("rows")
 
-    def numRows(self):
+    def numRows(self) -> int:
         """
         Get or compute the number of rows.
 
@@ -153,7 +164,7 @@ def numRows(self):
         """
         return self._java_matrix_wrapper.call("numRows")
 
-    def numCols(self):
+    def numCols(self) -> int:
         """
         Get or compute the number of cols.
 
@@ -172,7 +183,7 @@ def numCols(self):
         """
         return self._java_matrix_wrapper.call("numCols")
 
-    def computeColumnSummaryStatistics(self):
+    def computeColumnSummaryStatistics(self) -> MultivariateStatisticalSummary:
         """
         Computes column-wise summary statistics.
 
@@ -195,7 +206,7 @@ def computeColumnSummaryStatistics(self):
         java_col_stats = self._java_matrix_wrapper.call("computeColumnSummaryStatistics")
         return MultivariateStatisticalSummary(java_col_stats)
 
-    def computeCovariance(self):
+    def computeCovariance(self) -> Matrix:
         """
         Computes the covariance matrix, treating each row as an
         observation.
@@ -216,7 +227,7 @@ def computeCovariance(self):
         """
         return self._java_matrix_wrapper.call("computeCovariance")
 
-    def computeGramianMatrix(self):
+    def computeGramianMatrix(self) -> Matrix:
         """
         Computes the Gramian matrix `A^T A`.
 
@@ -237,7 +248,7 @@ def computeGramianMatrix(self):
         return self._java_matrix_wrapper.call("computeGramianMatrix")
 
     @since("2.0.0")
-    def columnSimilarities(self, threshold=0.0):
+    def columnSimilarities(self, threshold: float = 0.0) -> "CoordinateMatrix":
         """
         Compute similarities between columns of this matrix.
 
@@ -310,7 +321,9 @@ def columnSimilarities(self, threshold=0.0):
         java_sims_mat = self._java_matrix_wrapper.call("columnSimilarities", float(threshold))
         return CoordinateMatrix(java_sims_mat)
 
-    def tallSkinnyQR(self, computeQ=False):
+    def tallSkinnyQR(
+        self, computeQ: bool = False
+    ) -> QRDecomposition[Optional["RowMatrix"], Matrix]:
         """
         Compute the QR decomposition of this RowMatrix.
 
@@ -360,7 +373,9 @@ def tallSkinnyQR(self, computeQ=False):
         R = decomp.call("R")
         return QRDecomposition(Q, R)
 
-    def computeSVD(self, k, computeU=False, rCond=1e-9):
+    def computeSVD(
+        self, k: int, computeU: bool = False, rCond: float = 1e-9
+    ) -> "SingularValueDecomposition[RowMatrix, Matrix]":
         """
         Computes the singular value decomposition of the RowMatrix.
 
@@ -414,7 +429,7 @@ def computeSVD(self, k, computeU=False, rCond=1e-9):
         j_model = self._java_matrix_wrapper.call("computeSVD", int(k), bool(computeU), float(rCond))
         return SingularValueDecomposition(j_model)
 
-    def computePrincipalComponents(self, k):
+    def computePrincipalComponents(self, k: int) -> Matrix:
         """
         Computes the k principal components of the given row matrix
 
@@ -450,7 +465,7 @@ def computePrincipalComponents(self, k):
         """
         return self._java_matrix_wrapper.call("computePrincipalComponents", k)
 
-    def multiply(self, matrix):
+    def multiply(self, matrix: Matrix) -> "RowMatrix":
         """
         Multiply this matrix by a local dense matrix on the right.
 
@@ -478,16 +493,16 @@ def multiply(self, matrix):
         return RowMatrix(j_model)
 
 
-class SingularValueDecomposition(JavaModelWrapper):
+class SingularValueDecomposition(JavaModelWrapper, Generic[UT, VT]):
     """
     Represents singular value decomposition (SVD) factors.
 
     .. versionadded:: 2.2.0
     """
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.2.0")
-    def U(self):
+    def U(self) -> Optional[UT]:  # type: ignore[return]
         """
         Returns a distributed matrix whose columns are the left
         singular vectors of the SingularValueDecomposition if computeU was set to be True.
@@ -496,23 +511,23 @@ def U(self):
         if u is not None:
             mat_name = u.getClass().getSimpleName()
             if mat_name == "RowMatrix":
-                return RowMatrix(u)
+                return RowMatrix(u)  # type: ignore[return-value]
             elif mat_name == "IndexedRowMatrix":
-                return IndexedRowMatrix(u)
+                return IndexedRowMatrix(u)  # type: ignore[return-value]
             else:
                 raise TypeError("Expected RowMatrix/IndexedRowMatrix got %s" % mat_name)
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.2.0")
-    def s(self):
+    def s(self) -> Vector:
         """
         Returns a DenseVector with singular values in descending order.
         """
         return self.call("s")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.2.0")
-    def V(self):
+    def V(self) -> VT:
         """
         Returns a DenseMatrix whose columns are the right singular
         vectors of the SingularValueDecomposition.
@@ -534,15 +549,15 @@ class IndexedRow:
         The row in the matrix at the given index.
     """
 
-    def __init__(self, index, vector):
+    def __init__(self, index: int, vector: "VectorLike") -> None:
         self.index = int(index)
         self.vector = _convert_to_vector(vector)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return "IndexedRow(%s, %s)" % (self.index, self.vector)
 
 
-def _convert_to_indexed_row(row):
+def _convert_to_indexed_row(row: Any) -> IndexedRow:
     if isinstance(row, IndexedRow):
         return row
     elif isinstance(row, tuple) and len(row) == 2:
@@ -572,7 +587,12 @@ class IndexedRowMatrix(DistributedMatrix):
         the first row.
     """
 
-    def __init__(self, rows, numRows=0, numCols=0):
+    def __init__(
+        self,
+        rows: RDD[Union[Tuple[int, "VectorLike"], IndexedRow]],
+        numRows: int = 0,
+        numCols: int = 0,
+    ):
         """
         Note: This docstring is not shown publicly.
 
@@ -623,7 +643,7 @@ def __init__(self, rows, numRows=0, numCols=0):
         self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
 
     @property
-    def rows(self):
+    def rows(self) -> RDD[IndexedRow]:
         """
         Rows of the IndexedRowMatrix stored as an RDD of IndexedRows.
 
@@ -643,7 +663,7 @@ def rows(self):
         rows = rows_df.rdd.map(lambda row: IndexedRow(row[0], row[1]))
         return rows
 
-    def numRows(self):
+    def numRows(self) -> int:
         """
         Get or compute the number of rows.
 
@@ -664,7 +684,7 @@ def numRows(self):
         """
         return self._java_matrix_wrapper.call("numRows")
 
-    def numCols(self):
+    def numCols(self) -> int:
         """
         Get or compute the number of cols.
 
@@ -685,7 +705,7 @@ def numCols(self):
         """
         return self._java_matrix_wrapper.call("numCols")
 
-    def columnSimilarities(self):
+    def columnSimilarities(self) -> "CoordinateMatrix":
         """
         Compute all cosine similarities between columns.
 
@@ -701,7 +721,7 @@ def columnSimilarities(self):
         java_coordinate_matrix = self._java_matrix_wrapper.call("columnSimilarities")
         return CoordinateMatrix(java_coordinate_matrix)
 
-    def computeGramianMatrix(self):
+    def computeGramianMatrix(self) -> Matrix:
         """
         Computes the Gramian matrix `A^T A`.
 
@@ -722,7 +742,7 @@ def computeGramianMatrix(self):
         """
         return self._java_matrix_wrapper.call("computeGramianMatrix")
 
-    def toRowMatrix(self):
+    def toRowMatrix(self) -> RowMatrix:
         """
         Convert this matrix to a RowMatrix.
 
@@ -737,7 +757,7 @@ def toRowMatrix(self):
         java_row_matrix = self._java_matrix_wrapper.call("toRowMatrix")
         return RowMatrix(java_row_matrix)
 
-    def toCoordinateMatrix(self):
+    def toCoordinateMatrix(self) -> "CoordinateMatrix":
         """
         Convert this matrix to a CoordinateMatrix.
 
@@ -752,7 +772,7 @@ def toCoordinateMatrix(self):
         java_coordinate_matrix = self._java_matrix_wrapper.call("toCoordinateMatrix")
         return CoordinateMatrix(java_coordinate_matrix)
 
-    def toBlockMatrix(self, rowsPerBlock=1024, colsPerBlock=1024):
+    def toBlockMatrix(self, rowsPerBlock: int = 1024, colsPerBlock: int = 1024) -> "BlockMatrix":
         """
         Convert this matrix to a BlockMatrix.
 
@@ -787,7 +807,9 @@ def toBlockMatrix(self, rowsPerBlock=1024, colsPerBlock=1024):
         )
         return BlockMatrix(java_block_matrix, rowsPerBlock, colsPerBlock)
 
-    def computeSVD(self, k, computeU=False, rCond=1e-9):
+    def computeSVD(
+        self, k: int, computeU: bool = False, rCond: float = 1e-9
+    ) -> SingularValueDecomposition["IndexedRowMatrix", Matrix]:
         """
         Computes the singular value decomposition of the IndexedRowMatrix.
 
@@ -841,7 +863,7 @@ def computeSVD(self, k, computeU=False, rCond=1e-9):
         j_model = self._java_matrix_wrapper.call("computeSVD", int(k), bool(computeU), float(rCond))
         return SingularValueDecomposition(j_model)
 
-    def multiply(self, matrix):
+    def multiply(self, matrix: Matrix) -> "IndexedRowMatrix":
         """
         Multiply this matrix by a local dense matrix on the right.
 
@@ -884,16 +906,16 @@ class MatrixEntry:
         The (i, j)th entry of the matrix, as a float.
     """
 
-    def __init__(self, i, j, value):
+    def __init__(self, i: int, j: int, value: float) -> None:
         self.i = int(i)
         self.j = int(j)
         self.value = float(value)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return "MatrixEntry(%s, %s, %s)" % (self.i, self.j, self.value)
 
 
-def _convert_to_matrix_entry(entry):
+def _convert_to_matrix_entry(entry: Any) -> MatrixEntry:
     if isinstance(entry, MatrixEntry):
         return entry
     elif isinstance(entry, tuple) and len(entry) == 3:
@@ -923,7 +945,12 @@ class CoordinateMatrix(DistributedMatrix):
         index plus one.
     """
 
-    def __init__(self, entries, numRows=0, numCols=0):
+    def __init__(
+        self,
+        entries: RDD[Union[Tuple[int, int, float], MatrixEntry]],
+        numRows: int = 0,
+        numCols: int = 0,
+    ):
         """
         Note: This docstring is not shown publicly.
 
@@ -975,7 +1002,7 @@ def __init__(self, entries, numRows=0, numCols=0):
         self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
 
     @property
-    def entries(self):
+    def entries(self) -> RDD[MatrixEntry]:
         """
         Entries of the CoordinateMatrix stored as an RDD of
         MatrixEntries.
@@ -996,7 +1023,7 @@ def entries(self):
         entries = entries_df.rdd.map(lambda row: MatrixEntry(row[0], row[1], row[2]))
         return entries
 
-    def numRows(self):
+    def numRows(self) -> int:
         """
         Get or compute the number of rows.
 
@@ -1016,7 +1043,7 @@ def numRows(self):
         """
         return self._java_matrix_wrapper.call("numRows")
 
-    def numCols(self):
+    def numCols(self) -> int:
         """
         Get or compute the number of cols.
 
@@ -1036,7 +1063,7 @@ def numCols(self):
         """
         return self._java_matrix_wrapper.call("numCols")
 
-    def transpose(self):
+    def transpose(self) -> "CoordinateMatrix":
         """
         Transpose this CoordinateMatrix.
 
@@ -1059,7 +1086,7 @@ def transpose(self):
         java_transposed_matrix = self._java_matrix_wrapper.call("transpose")
         return CoordinateMatrix(java_transposed_matrix)
 
-    def toRowMatrix(self):
+    def toRowMatrix(self) -> RowMatrix:
         """
         Convert this matrix to a RowMatrix.
 
@@ -1085,7 +1112,7 @@ def toRowMatrix(self):
         java_row_matrix = self._java_matrix_wrapper.call("toRowMatrix")
         return RowMatrix(java_row_matrix)
 
-    def toIndexedRowMatrix(self):
+    def toIndexedRowMatrix(self) -> IndexedRowMatrix:
         """
         Convert this matrix to an IndexedRowMatrix.
 
@@ -1110,7 +1137,7 @@ def toIndexedRowMatrix(self):
         java_indexed_row_matrix = self._java_matrix_wrapper.call("toIndexedRowMatrix")
         return IndexedRowMatrix(java_indexed_row_matrix)
 
-    def toBlockMatrix(self, rowsPerBlock=1024, colsPerBlock=1024):
+    def toBlockMatrix(self, rowsPerBlock: int = 1024, colsPerBlock: int = 1024) -> "BlockMatrix":
         """
         Convert this matrix to a BlockMatrix.
 
@@ -1149,7 +1176,7 @@ def toBlockMatrix(self, rowsPerBlock=1024, colsPerBlock=1024):
         return BlockMatrix(java_block_matrix, rowsPerBlock, colsPerBlock)
 
 
-def _convert_to_matrix_block_tuple(block):
+def _convert_to_matrix_block_tuple(block: Any) -> Tuple[Tuple[int, int], Matrix]:
     if (
         isinstance(block, tuple)
         and len(block) == 2
@@ -1198,7 +1225,14 @@ class BlockMatrix(DistributedMatrix):
         invoked.
     """
 
-    def __init__(self, blocks, rowsPerBlock, colsPerBlock, numRows=0, numCols=0):
+    def __init__(
+        self,
+        blocks: RDD[Tuple[Tuple[int, int], Matrix]],
+        rowsPerBlock: int,
+        colsPerBlock: int,
+        numRows: int = 0,
+        numCols: int = 0,
+    ):
         """
         Note: This docstring is not shown publicly.
 
@@ -1254,7 +1288,7 @@ def __init__(self, blocks, rowsPerBlock, colsPerBlock, numRows=0, numCols=0):
         self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
 
     @property
-    def blocks(self):
+    def blocks(self) -> RDD[Tuple[Tuple[int, int], Matrix]]:
         """
         The RDD of sub-matrix blocks
         ((blockRowIndex, blockColIndex), sub-matrix) that form this
@@ -1279,7 +1313,7 @@ def blocks(self):
         return blocks
 
     @property
-    def rowsPerBlock(self):
+    def rowsPerBlock(self) -> int:
         """
         Number of rows that make up each block.
 
@@ -1294,7 +1328,7 @@ def rowsPerBlock(self):
         return self._java_matrix_wrapper.call("rowsPerBlock")
 
     @property
-    def colsPerBlock(self):
+    def colsPerBlock(self) -> int:
         """
         Number of columns that make up each block.
 
@@ -1309,7 +1343,7 @@ def colsPerBlock(self):
         return self._java_matrix_wrapper.call("colsPerBlock")
 
     @property
-    def numRowBlocks(self):
+    def numRowBlocks(self) -> int:
         """
         Number of rows of blocks in the BlockMatrix.
 
@@ -1324,7 +1358,7 @@ def numRowBlocks(self):
         return self._java_matrix_wrapper.call("numRowBlocks")
 
     @property
-    def numColBlocks(self):
+    def numColBlocks(self) -> int:
         """
         Number of columns of blocks in the BlockMatrix.
 
@@ -1338,7 +1372,7 @@ def numColBlocks(self):
         """
         return self._java_matrix_wrapper.call("numColBlocks")
 
-    def numRows(self):
+    def numRows(self) -> int:
         """
         Get or compute the number of rows.
 
@@ -1357,7 +1391,7 @@ def numRows(self):
         """
         return self._java_matrix_wrapper.call("numRows")
 
-    def numCols(self):
+    def numCols(self) -> int:
         """
         Get or compute the number of cols.
 
@@ -1377,7 +1411,7 @@ def numCols(self):
         return self._java_matrix_wrapper.call("numCols")
 
     @since("2.0.0")
-    def cache(self):
+    def cache(self) -> "BlockMatrix":
         """
         Caches the underlying RDD.
         """
@@ -1385,7 +1419,7 @@ def cache(self):
         return self
 
     @since("2.0.0")
-    def persist(self, storageLevel):
+    def persist(self, storageLevel: StorageLevel) -> "BlockMatrix":
         """
         Persists the underlying RDD with the specified storage level.
         """
@@ -1396,14 +1430,14 @@ def persist(self, storageLevel):
         return self
 
     @since("2.0.0")
-    def validate(self):
+    def validate(self) -> None:
         """
         Validates the block matrix info against the matrix data (`blocks`)
         and throws an exception if any error is found.
         """
         self._java_matrix_wrapper.call("validate")
 
-    def add(self, other):
+    def add(self, other: "BlockMatrix") -> "BlockMatrix":
         """
         Adds two block matrices together. The matrices must have the
         same size and matching `rowsPerBlock` and `colsPerBlock` values.
@@ -1438,7 +1472,7 @@ def add(self, other):
         java_block_matrix = self._java_matrix_wrapper.call("add", other_java_block_matrix)
         return BlockMatrix(java_block_matrix, self.rowsPerBlock, self.colsPerBlock)
 
-    def subtract(self, other):
+    def subtract(self, other: "BlockMatrix") -> "BlockMatrix":
         """
         Subtracts the given block matrix `other` from this block matrix:
         `this - other`. The matrices must have the same size and
@@ -1476,7 +1510,7 @@ def subtract(self, other):
         java_block_matrix = self._java_matrix_wrapper.call("subtract", other_java_block_matrix)
         return BlockMatrix(java_block_matrix, self.rowsPerBlock, self.colsPerBlock)
 
-    def multiply(self, other):
+    def multiply(self, other: "BlockMatrix") -> "BlockMatrix":
         """
         Left multiplies this BlockMatrix by `other`, another
         BlockMatrix. The `colsPerBlock` of this matrix must equal the
@@ -1513,7 +1547,7 @@ def multiply(self, other):
         java_block_matrix = self._java_matrix_wrapper.call("multiply", other_java_block_matrix)
         return BlockMatrix(java_block_matrix, self.rowsPerBlock, self.colsPerBlock)
 
-    def transpose(self):
+    def transpose(self) -> "BlockMatrix":
         """
         Transpose this BlockMatrix. Returns a new BlockMatrix
         instance sharing the same underlying data. Is a lazy operation.
@@ -1533,7 +1567,7 @@ def transpose(self):
         java_transposed_matrix = self._java_matrix_wrapper.call("transpose")
         return BlockMatrix(java_transposed_matrix, self.colsPerBlock, self.rowsPerBlock)
 
-    def toLocalMatrix(self):
+    def toLocalMatrix(self) -> Matrix:
         """
         Collect the distributed matrix on the driver as a DenseMatrix.
 
@@ -1557,7 +1591,7 @@ def toLocalMatrix(self):
         """
         return self._java_matrix_wrapper.call("toLocalMatrix")
 
-    def toIndexedRowMatrix(self):
+    def toIndexedRowMatrix(self) -> IndexedRowMatrix:
         """
         Convert this matrix to an IndexedRowMatrix.
 
@@ -1582,7 +1616,7 @@ def toIndexedRowMatrix(self):
         java_indexed_row_matrix = self._java_matrix_wrapper.call("toIndexedRowMatrix")
         return IndexedRowMatrix(java_indexed_row_matrix)
 
-    def toCoordinateMatrix(self):
+    def toCoordinateMatrix(self) -> CoordinateMatrix:
         """
         Convert this matrix to a CoordinateMatrix.
 
@@ -1598,7 +1632,7 @@ def toCoordinateMatrix(self):
         return CoordinateMatrix(java_coordinate_matrix)
 
 
-def _test():
+def _test() -> None:
     import doctest
     import numpy
     from pyspark.sql import SparkSession
diff --git a/python/pyspark/mllib/linalg/distributed.pyi b/python/pyspark/mllib/linalg/distributed.pyi
deleted file mode 100644
index 3d8a0c57b1d8c..0000000000000
--- a/python/pyspark/mllib/linalg/distributed.pyi
+++ /dev/null
@@ -1,145 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Generic, Sequence, Optional, Tuple, TypeVar, Union
-from pyspark.rdd import RDD
-from pyspark.storagelevel import StorageLevel
-from pyspark.mllib.common import JavaModelWrapper
-from pyspark.mllib.linalg import Vector, Matrix, QRDecomposition
-from pyspark.mllib.stat import MultivariateStatisticalSummary
-import pyspark.sql.dataframe
-from numpy import ndarray  # noqa: F401
-
-VectorLike = Union[Vector, Sequence[Union[float, int]]]
-
-UT = TypeVar("UT")
-VT = TypeVar("VT")
-
-class DistributedMatrix:
-    def numRows(self) -> int: ...
-    def numCols(self) -> int: ...
-
-class RowMatrix(DistributedMatrix):
-    def __init__(
-        self,
-        rows: Union[RDD[Vector], pyspark.sql.dataframe.DataFrame],
-        numRows: int = ...,
-        numCols: int = ...,
-    ) -> None: ...
-    @property
-    def rows(self) -> RDD[Vector]: ...
-    def numRows(self) -> int: ...
-    def numCols(self) -> int: ...
-    def computeColumnSummaryStatistics(self) -> MultivariateStatisticalSummary: ...
-    def computeCovariance(self) -> Matrix: ...
-    def computeGramianMatrix(self) -> Matrix: ...
-    def columnSimilarities(self, threshold: float = ...) -> CoordinateMatrix: ...
-    def tallSkinnyQR(self, computeQ: bool = ...) -> QRDecomposition[RowMatrix, Matrix]: ...
-    def computeSVD(
-        self, k: int, computeU: bool = ..., rCond: float = ...
-    ) -> SingularValueDecomposition[RowMatrix, Matrix]: ...
-    def computePrincipalComponents(self, k: int) -> Matrix: ...
-    def multiply(self, matrix: Matrix) -> RowMatrix: ...
-
-class SingularValueDecomposition(JavaModelWrapper, Generic[UT, VT]):
-    @property
-    def U(self) -> Optional[UT]: ...
-    @property
-    def s(self) -> Vector: ...
-    @property
-    def V(self) -> VT: ...
-
-class IndexedRow:
-    index: int
-    vector: VectorLike
-    def __init__(self, index: int, vector: VectorLike) -> None: ...
-
-class IndexedRowMatrix(DistributedMatrix):
-    def __init__(
-        self,
-        rows: RDD[Union[Tuple[int, VectorLike], IndexedRow]],
-        numRows: int = ...,
-        numCols: int = ...,
-    ) -> None: ...
-    @property
-    def rows(self) -> RDD[IndexedRow]: ...
-    def numRows(self) -> int: ...
-    def numCols(self) -> int: ...
-    def columnSimilarities(self) -> CoordinateMatrix: ...
-    def computeGramianMatrix(self) -> Matrix: ...
-    def toRowMatrix(self) -> RowMatrix: ...
-    def toCoordinateMatrix(self) -> CoordinateMatrix: ...
-    def toBlockMatrix(self, rowsPerBlock: int = ..., colsPerBlock: int = ...) -> BlockMatrix: ...
-    def computeSVD(
-        self, k: int, computeU: bool = ..., rCond: float = ...
-    ) -> SingularValueDecomposition[IndexedRowMatrix, Matrix]: ...
-    def multiply(self, matrix: Matrix) -> IndexedRowMatrix: ...
-
-class MatrixEntry:
-    i: int
-    j: int
-    value: float
-    def __init__(self, i: int, j: int, value: float) -> None: ...
-
-class CoordinateMatrix(DistributedMatrix):
-    def __init__(
-        self,
-        entries: RDD[Union[Tuple[int, int, float], MatrixEntry]],
-        numRows: int = ...,
-        numCols: int = ...,
-    ) -> None: ...
-    @property
-    def entries(self) -> RDD[MatrixEntry]: ...
-    def numRows(self) -> int: ...
-    def numCols(self) -> int: ...
-    def transpose(self) -> CoordinateMatrix: ...
-    def toRowMatrix(self) -> RowMatrix: ...
-    def toIndexedRowMatrix(self) -> IndexedRowMatrix: ...
-    def toBlockMatrix(self, rowsPerBlock: int = ..., colsPerBlock: int = ...) -> BlockMatrix: ...
-
-class BlockMatrix(DistributedMatrix):
-    def __init__(
-        self,
-        blocks: RDD[Tuple[Tuple[int, int], Matrix]],
-        rowsPerBlock: int,
-        colsPerBlock: int,
-        numRows: int = ...,
-        numCols: int = ...,
-    ) -> None: ...
-    @property
-    def blocks(self) -> RDD[Tuple[Tuple[int, int], Matrix]]: ...
-    @property
-    def rowsPerBlock(self) -> int: ...
-    @property
-    def colsPerBlock(self) -> int: ...
-    @property
-    def numRowBlocks(self) -> int: ...
-    @property
-    def numColBlocks(self) -> int: ...
-    def numRows(self) -> int: ...
-    def numCols(self) -> int: ...
-    def cache(self) -> BlockMatrix: ...
-    def persist(self, storageLevel: StorageLevel) -> BlockMatrix: ...
-    def validate(self) -> None: ...
-    def add(self, other: BlockMatrix) -> BlockMatrix: ...
-    def subtract(self, other: BlockMatrix) -> BlockMatrix: ...
-    def multiply(self, other: BlockMatrix) -> BlockMatrix: ...
-    def transpose(self) -> BlockMatrix: ...
-    def toLocalMatrix(self) -> Matrix: ...
-    def toIndexedRowMatrix(self) -> IndexedRowMatrix: ...
-    def toCoordinateMatrix(self) -> CoordinateMatrix: ...

From 6becf4e93e68e36fbcdc82768de497d86072abeb Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sun, 13 Mar 2022 11:33:23 +0800
Subject: [PATCH 475/513] [SPARK-38538][K8S][TESTS] Fix driver environment
 verification in BasicDriverFeatureStepSuite

### What changes were proposed in this pull request?

This PR aims to fix the driver environment verification logic in `BasicDriverFeatureStepSuite`.

### Why are the changes needed?

When SPARK-25876 added a test logic at Apache Spark 3.0.0, it used `envs(v) === v` instead of `envs(k) === v`.
https://github.com/apache/spark/blob/c032928515e74367137c668ce692d8fd53696485/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala#L94-L96

This bug was hidden because the test key-value pairs have identical set. If we have different strings for keys and values, the test case fails.

https://github.com/apache/spark/blob/c032928515e74367137c668ce692d8fd53696485/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala#L42-L44

### Does this PR introduce _any_ user-facing change?

To have a correct test coverage.

### How was this patch tested?

Pass the CIs.

Closes #35828 from dongjoon-hyun/SPARK-38538.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Yuming Wang <yumwang@ebay.com>
---
 .../deploy/k8s/features/BasicDriverFeatureStepSuite.scala   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
index 0b54599bd1d35..bf7fbcc912f54 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
@@ -40,8 +40,8 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
     "customAnnotation" -> "customAnnotationValue",
     "yunikorn.apache.org/app-id" -> "{{APPID}}")
   private val DRIVER_ENVS = Map(
-    "customDriverEnv1" -> "customDriverEnv2",
-    "customDriverEnv2" -> "customDriverEnv2")
+    "customDriverEnv1" -> "customDriverEnv1Value",
+    "customDriverEnv2" -> "customDriverEnv2Value")
   private val TEST_IMAGE_PULL_SECRETS = Seq("my-secret-1", "my-secret-2")
   private val TEST_IMAGE_PULL_SECRET_OBJECTS =
     TEST_IMAGE_PULL_SECRETS.map { secret =>
@@ -92,7 +92,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
       .map { env => (env.getName, env.getValue) }
       .toMap
     DRIVER_ENVS.foreach { case (k, v) =>
-      assert(envs(v) === v)
+      assert(envs(k) === v)
     }
     assert(envs(ENV_SPARK_USER) === Utils.getCurrentUserName())
     assert(envs(ENV_APPLICATION_ID) === kubernetesConf.appId)

From 96e5446ef32de4cfaf286781aa39c30078b3b40d Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sun, 13 Mar 2022 12:05:39 +0800
Subject: [PATCH 476/513] [SPARK-36058][K8S][TESTS][FOLLOWUP] Fix error message
 to include exception correctly

### What changes were proposed in this pull request?

This PR aims to fix error message to include the exception because #33508 missed the string interpolation prefix, `s"`.

https://github.com/apache/spark/blob/c032928515e74367137c668ce692d8fd53696485/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala#L110

### Why are the changes needed?

To show the intended message.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual review.

Closes #35829 from dongjoon-hyun/SPARK-36058.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Yuming Wang <yumwang@ebay.com>
---
 .../deploy/k8s/integrationtest/KubernetesSuite.scala      | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index 685149f09d72f..3db51b2860023 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -26,6 +26,7 @@ import com.google.common.base.Charsets
 import com.google.common.io.Files
 import io.fabric8.kubernetes.api.model.Pod
 import io.fabric8.kubernetes.client.{Watcher, WatcherException}
+import io.fabric8.kubernetes.client.KubernetesClientException
 import io.fabric8.kubernetes.client.Watcher.Action
 import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, Tag}
 import org.scalatest.concurrent.{Eventually, PatienceConfiguration}
@@ -106,11 +107,10 @@ class KubernetesSuite extends SparkFunSuite
             .withName(execPod.getMetadata.getName)
             .getLog
         } catch {
-          case e: io.fabric8.kubernetes.client.KubernetesClientException =>
-            "Error fetching log (pod is likely not ready) ${e}"
+          case e: KubernetesClientException =>
+            s"Error fetching log (pod is likely not ready) $e"
         }
-        logInfo(s"\nBEGIN executor (${execPod.getMetadata.getName}) POD log:\n" +
-        podLog)
+        logInfo(s"\nBEGIN executor (${execPod.getMetadata.getName}) POD log:\n$podLog")
         logInfo(s"END executor (${execPod.getMetadata.getName}) POD log")
       }
   }

From 6b64e5dc74cbdc7e2b4ae42232e9610319ad73f3 Mon Sep 17 00:00:00 2001
From: Alex Balikov <alex.balikov@databricks.com>
Date: Sun, 13 Mar 2022 13:55:13 +0900
Subject: [PATCH 477/513] [SPARK-38320][SS] Fix flatMapGroupsWithState timeout
 in batch with data for key

### What changes were proposed in this pull request?

As described in [SPARK-38320](https://issues.apache.org/jira/browse/SPARK-38320), the bug is that it is possible for (flat)MapGroupsWithState to timeout a key even if that key received data within the same batch. This is against the documented (flat)MapGroupsWithState contract. The problem is due to the StateStore.iterator not reflecting StateStore changes made *after* its creation - this is illustrated in the test this PR adds to StateStoreSuite.scala.

The fix is to *late bind* the timeoutProcessorIter timeout processing iterator in FlatMapGroupsWithStateExec to be created *after* the input iterator has been exhausted and the state changes applied to the StateStore.

### Why are the changes needed?

The changes are needed to ensure the state timeout processing iterator for (flat)MapGroupsWithState is created *after* the input is processed and the changes are applied into the StateStore, otherwise it may not notice these changes (the change to the key timeout timestamp being updated as part of the input processing).

### Does this PR introduce _any_ user-facing change?

No. Bug fix.

### How was this patch tested?

* Added a test to StateStoreSuite.scala to illustrate the difference of state store iterator behavior across the different implementations of state stores. In particular the test illustrates the RocksDB state store iterator not reflecting state store changes made after its creation.
* Added test to FlatMapGroupsWithStateSuite.scala which would fail with unexpected state timeout if the issue was not fixed.

Closes #35810 from alex-balikov/SPARK-38320-state-iterators2.

Authored-by: Alex Balikov <alex.balikov@databricks.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../FlatMapGroupsWithStateExec.scala          | 20 +++++--
 .../streaming/state/StateStoreSuite.scala     | 58 +++++++++++++++++++
 .../FlatMapGroupsWithStateSuite.scala         | 48 ++++++++++++++-
 3 files changed, 118 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
index dfcb707376663..ffacfefb552da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
@@ -172,12 +172,20 @@ case class FlatMapGroupsWithStateExec(
           timeoutProcessingStartTimeNs = System.nanoTime
         })
 
-    val timeoutProcessorIter =
-      CompletionIterator[InternalRow, Iterator[InternalRow]](processor.processTimedOutState(), {
-        // Note: `timeoutLatencyMs` also includes the time the parent operator took for
-        // processing output returned through iterator.
-        timeoutLatencyMs += NANOSECONDS.toMillis(System.nanoTime - timeoutProcessingStartTimeNs)
-      })
+    // SPARK-38320: Late-bind the timeout processing iterator so it is created *after* the input is
+    // processed (the input iterator is exhausted) and the state updates are written into the
+    // state store. Otherwise the iterator may not see the updates (e.g. with RocksDB state store).
+    val timeoutProcessorIter = new Iterator[InternalRow] {
+      private lazy val itr = getIterator()
+      override def hasNext = itr.hasNext
+      override def next() = itr.next()
+      private def getIterator(): Iterator[InternalRow] =
+        CompletionIterator[InternalRow, Iterator[InternalRow]](processor.processTimedOutState(), {
+          // Note: `timeoutLatencyMs` also includes the time the parent operator took for
+          // processing output returned through iterator.
+          timeoutLatencyMs += NANOSECONDS.toMillis(System.nanoTime - timeoutProcessingStartTimeNs)
+        })
+    }
 
     // Generate a iterator that returns the rows grouped by the grouping function
     // Note that this code ensures that the filtering for timeout occurs only after
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index 601b62bd81007..dde925bb2d96f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -1017,6 +1017,64 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
     }
   }
 
+  // This test illustrates state store iterator behavior differences leading to SPARK-38320.
+  testWithAllCodec("SPARK-38320 - state store iterator behavior differences") {
+    val ROCKSDB_STATE_STORE = "RocksDBStateStore"
+    val dir = newDir()
+    val storeId = StateStoreId(dir, 0L, 1)
+    var version = 0L
+
+    tryWithProviderResource(newStoreProvider(storeId)) { provider =>
+      val store = provider.getStore(version)
+      logInfo(s"Running SPARK-38320 test with state store ${store.getClass.getName}")
+
+      val itr1 = store.iterator()  // itr1 is created before any writes to the store.
+      put(store, "1", 11, 100)
+      put(store, "2", 22, 200)
+      val itr2 = store.iterator()  // itr2 is created in the middle of the writes.
+      put(store, "1", 11, 101)  // Overwrite row (1, 11)
+      put(store, "3", 33, 300)
+      val itr3 = store.iterator()  // itr3 is created after all writes.
+
+      val expected = Set(("1", 11) -> 101, ("2", 22) -> 200, ("3", 33) -> 300)  // The final state.
+      // Itr1 does not see any updates - original state of the store (SPARK-38320)
+      assert(rowPairsToDataSet(itr1) === Set.empty[Set[((String, Int), Int)]])
+      assert(rowPairsToDataSet(itr2) === expected)
+      assert(rowPairsToDataSet(itr3) === expected)
+
+      version = store.commit()
+    }
+
+    // Reload the store from the commited version and repeat the above test.
+    tryWithProviderResource(newStoreProvider(storeId)) { provider =>
+      assert(version > 0)
+      val store = provider.getStore(version)
+
+      val itr1 = store.iterator()  // itr1 is created before any writes to the store.
+      put(store, "3", 33, 301)  // Overwrite row (3, 33)
+      put(store, "4", 44, 400)
+      val itr2 = store.iterator()  // itr2 is created in the middle of the writes.
+      put(store, "4", 44, 401)  // Overwrite row (4, 44)
+      put(store, "5", 55, 500)
+      val itr3 = store.iterator()  // itr3 is created after all writes.
+
+      // The final state.
+      val expected = Set(
+        ("1", 11) -> 101, ("2", 22) -> 200, ("3", 33) -> 301, ("4", 44) -> 401, ("5", 55) -> 500)
+      if (store.getClass.getName contains ROCKSDB_STATE_STORE) {
+        // RocksDB itr1 does not see any updates - original state of the store (SPARK-38320)
+        assert(rowPairsToDataSet(itr1) === Set(
+          ("1", 11) -> 101, ("2", 22) -> 200, ("3", 33) -> 300))
+      } else {
+        assert(rowPairsToDataSet(itr1) === expected)
+      }
+      assert(rowPairsToDataSet(itr2) === expected)
+      assert(rowPairsToDataSet(itr3) === expected)
+
+      version = store.commit()
+    }
+  }
+
   test("StateStore.get") {
     quietly {
       val dir = newDir()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
index 5d3fcd52f592b..9d34ceea8dd47 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.streaming
 
 import java.io.File
-import java.sql.Date
+import java.sql.{Date, Timestamp}
 
 import org.apache.commons.io.FileUtils
 import org.scalatest.exceptions.TestFailedException
@@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.plans.physical.UnknownPartitioning
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.execution.RDDScanExec
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.execution.streaming.state.{FlatMapGroupsWithStateExecHelper, MemoryStateStore, StateStore}
+import org.apache.spark.sql.execution.streaming.state.{FlatMapGroupsWithStateExecHelper, MemoryStateStore, RocksDBStateStoreProvider, StateStore}
 import org.apache.spark.sql.functions.timestamp_seconds
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.util.StreamManualClock
@@ -1520,6 +1520,50 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
     )
   }
 
+  test("SPARK-38320 - flatMapGroupsWithState state with data should not timeout") {
+    withTempDir { dir =>
+      withSQLConf(
+        (SQLConf.STREAMING_NO_DATA_MICRO_BATCHES_ENABLED.key -> "false"),
+        (SQLConf.CHECKPOINT_LOCATION.key -> dir.getCanonicalPath),
+        (SQLConf.STATE_STORE_PROVIDER_CLASS.key -> classOf[RocksDBStateStoreProvider].getName)) {
+
+        val inputData = MemoryStream[Timestamp]
+        val stateFunc = (key: Int, values: Iterator[Timestamp], state: GroupState[Int]) => {
+          // Should never timeout. All batches should have data and even if a timeout is set,
+          // it should get cleared when the key receives data per contract.
+          require(!state.hasTimedOut, "The state should not have timed out!")
+          // Set state and timeout once, only on the first call. The timeout should get cleared
+          // in the subsequent batch which has data for the key.
+          if (!state.exists) {
+            state.update(0)
+            state.setTimeoutTimestamp(500)  // Timeout at 500 milliseconds.
+          }
+          0
+        }
+
+        val query = inputData.toDS()
+          .withWatermark("value", "0 seconds")
+          .groupByKey(_ => 0)  // Always the same key: 0.
+          .mapGroupsWithState(GroupStateTimeout.EventTimeTimeout())(stateFunc)
+          .writeStream
+          .format("console")
+          .outputMode("update")
+          .start()
+
+        try {
+          // 2 batches. Records are routed to the same key 0. The first batch sets timeout on
+          // the key, the second batch with data should clear the timeout.
+          (1 to 2).foreach {i =>
+            inputData.addData(new Timestamp(i * 1000))
+            query.processAllAvailable()
+          }
+        } finally {
+          query.stop()
+        }
+      }
+    }
+  }
+
   testWithAllStateVersions("mapGroupsWithState - initial state - null key") {
     val mapGroupsWithStateFunc =
         (key: String, values: Iterator[String], state: GroupState[RunningCount]) => {

From 786a70e710369b195d7c117b33fe9983044014d6 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sat, 12 Mar 2022 21:23:46 -0800
Subject: [PATCH 478/513] [SPARK-38537][K8S] Unify `Statefulset*` to
 `StatefulSet*`

### What changes were proposed in this pull request?

K8s has [StatefulSet](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) and Apache Spark is currently using both `Statefulset*` and `StatefulSet*`. The worst case is a mixed case like `class StatefulSetAllocatorSuite` in `StatefulsetAllocatorSuite.scala`. This PR aims to unify them to a K8s original name, `StatefulSet`.
https://github.com/apache/spark/blob/c032928515e74367137c668ce692d8fd53696485/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/StatefulsetAllocatorSuite.scala#L39

To sum up, two files are renamed and five files are changed.

```
$ git diff master --stat
 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/AbstractPodsAllocator.scala                                          | 5 ++---
 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala                                       | 2 +-
 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/{StatefulsetPodsAllocator.scala => StatefulSetPodsAllocator.scala}   | 2 +-
 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManagerSuite.scala                                  | 4 ++--
 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/{StatefulsetAllocatorSuite.scala => StatefulSetAllocatorSuite.scala} | 4 ++--
 5 files changed, 8 insertions(+), 9 deletions(-)
```

### Why are the changes needed?

To be consistent not only inside Apache Spark, but also with K8s.

### Does this PR introduce _any_ user-facing change?

No. This is a new code in Apache Spark 3.3.

### How was this patch tested?

Pass the CIs.

Closes #35827 from dongjoon-hyun/SPARK-38537.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/scheduler/cluster/k8s/AbstractPodsAllocator.scala  | 5 ++---
 .../scheduler/cluster/k8s/KubernetesClusterManager.scala     | 2 +-
 ...setPodsAllocator.scala => StatefulSetPodsAllocator.scala} | 2 +-
 .../cluster/k8s/KubernetesClusterManagerSuite.scala          | 4 ++--
 ...tAllocatorSuite.scala => StatefulSetAllocatorSuite.scala} | 4 ++--
 5 files changed, 8 insertions(+), 9 deletions(-)
 rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/{StatefulsetPodsAllocator.scala => StatefulSetPodsAllocator.scala} (99%)
 rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/{StatefulsetAllocatorSuite.scala => StatefulSetAllocatorSuite.scala} (98%)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/AbstractPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/AbstractPodsAllocator.scala
index 2e0d4fa7ca00b..cc081202cf89a 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/AbstractPodsAllocator.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/AbstractPodsAllocator.scala
@@ -26,9 +26,8 @@ import org.apache.spark.resource.ResourceProfile
  * :: DeveloperApi ::
  * A abstract interface for allowing different types of pods allocation.
  *
- * The internal Spark implementations are [[StatefulsetPodsAllocator]]
- * and [[ExecutorPodsAllocator]]. This may be useful for folks integrating with custom schedulers
- * such as Volcano, Yunikorn, etc.
+ * The internal Spark implementations are [[StatefulSetPodsAllocator]]
+ * and [[ExecutorPodsAllocator]]. This may be useful for folks integrating with custom schedulers.
  *
  * This API may change or be removed at anytime.
  *
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
index 9497349569efc..10ea3a8cb0e46 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
@@ -137,7 +137,7 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit
       snapshotsStore: ExecutorPodsSnapshotsStore) = {
     val executorPodsAllocatorName = sc.conf.get(KUBERNETES_ALLOCATION_PODS_ALLOCATOR) match {
       case "statefulset" =>
-        classOf[StatefulsetPodsAllocator].getName
+        classOf[StatefulSetPodsAllocator].getName
       case "direct" =>
         classOf[ExecutorPodsAllocator].getName
       case fullClass =>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/StatefulsetPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/StatefulSetPodsAllocator.scala
similarity index 99%
rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/StatefulsetPodsAllocator.scala
rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/StatefulSetPodsAllocator.scala
index 0d00d9678048e..294ee70168b23 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/StatefulsetPodsAllocator.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/StatefulSetPodsAllocator.scala
@@ -34,7 +34,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.util.{Clock, Utils}
 
-class StatefulsetPodsAllocator(
+class StatefulSetPodsAllocator(
     conf: SparkConf,
     secMgr: SecurityManager,
     executorBuilder: KubernetesExecutorBuilder,
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManagerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManagerSuite.scala
index ae1477e51bdf6..2b6bfe851dbd3 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManagerSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManagerSuite.scala
@@ -47,8 +47,8 @@ class KubernetesClusterManagerSuite extends SparkFunSuite with BeforeAndAfter {
 
   test("constructing a AbstractPodsAllocator works") {
     val validConfigs = List("statefulset", "direct",
-      "org.apache.spark.scheduler.cluster.k8s.StatefulsetPodsAllocator",
-      "org.apache.spark.scheduler.cluster.k8s.ExecutorPodsAllocator")
+      classOf[StatefulSetPodsAllocator].getName,
+      classOf[ExecutorPodsAllocator].getName)
     validConfigs.foreach { c =>
       val manager = new KubernetesClusterManager()
         when(sc.conf.get(KUBERNETES_ALLOCATION_PODS_ALLOCATOR)).thenReturn(c)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/StatefulsetAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/StatefulSetAllocatorSuite.scala
similarity index 98%
rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/StatefulsetAllocatorSuite.scala
rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/StatefulSetAllocatorSuite.scala
index 5f8ceb2d3ffc5..748f509e01303 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/StatefulsetAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/StatefulSetAllocatorSuite.scala
@@ -80,7 +80,7 @@ class StatefulSetAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
   @Mock
   private var driverPodOperations: PodResource[Pod] = _
 
-  private var podsAllocatorUnderTest: StatefulsetPodsAllocator = _
+  private var podsAllocatorUnderTest: StatefulSetPodsAllocator = _
 
   private var snapshotsStore: DeterministicExecutorPodsSnapshotsStore = _
 
@@ -111,7 +111,7 @@ class StatefulSetAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     when(executorBuilder.buildFromFeatures(any(classOf[KubernetesExecutorConf]), meq(secMgr),
       meq(kubernetesClient), any(classOf[ResourceProfile]))).thenAnswer(executorPodAnswer())
     snapshotsStore = new DeterministicExecutorPodsSnapshotsStore()
-    podsAllocatorUnderTest = new StatefulsetPodsAllocator(
+    podsAllocatorUnderTest = new StatefulSetPodsAllocator(
       conf, secMgr, executorBuilder, kubernetesClient, snapshotsStore, null)
     when(schedulerBackend.getExecutorIds).thenReturn(Seq.empty)
     podsAllocatorUnderTest.start(TEST_SPARK_APP_ID, schedulerBackend)

From 9bede263a745a28cb6d8b17271c8b04415a9e2eb Mon Sep 17 00:00:00 2001
From: William Hyun <william@apache.org>
Date: Sun, 13 Mar 2022 00:52:40 -0800
Subject: [PATCH 479/513] [MINOR][K8S][TESTS] Remove `verifyPriority` from
 `VolcanoFeatureStepSuite`

### What changes were proposed in this pull request?
This PR aims to remove `verifyPriority` from `VolcanoFeatureStepSuite.scala`.

### Why are the changes needed?
This is unused.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass the CIs.

Closes #35832 from williamhyun/remove-priority.

Authored-by: William Hyun <william@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../deploy/k8s/features/VolcanoFeatureStepSuite.scala  | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
index 12daffc8961f5..d0d1f5ee5e11b 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
@@ -66,14 +66,4 @@ class VolcanoFeatureStepSuite extends SparkFunSuite {
     assert(podGroup.getSpec.getPriorityClassName == "driver-priority")
     assert(podGroup.getSpec.getQueue == "driver-queue")
   }
-
-  private def verifyPriority(pod: SparkPod): Unit = {
-    val sparkConf = new SparkConf()
-    val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf)
-    val step = new VolcanoFeatureStep()
-    step.init(kubernetesConf)
-    val sparkPod = step.configurePod(pod)
-    val podGroup = step.getAdditionalPreKubernetesResources().head.asInstanceOf[PodGroup]
-    assert(podGroup.getSpec.getPriorityClassName === sparkPod.pod.getSpec.getPriorityClassName)
-  }
 }

From 0840b23ed611c5a765f881539e2bb7876d2da298 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Mon, 14 Mar 2022 07:36:27 +0800
Subject: [PATCH 480/513] [SPARK-38540][BUILD] Upgrade `compress-lzf` from
 1.0.3 to 1.1

### What changes were proposed in this pull request?
This pr aims to upgrade `compress-lzf` from 1.0.3 to 1.1.

### Why are the changes needed?
Before 1.0.4, `compress-lzf` is compiled by Java 6, and 1.1 is compiled by Java 8.
The change logs as follows:

- https://github.com/ning/compress/compare/compress-lzf-1.0.3...compress-lzf-1.1

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35834 from LuciferYang/upgrade-compress-lzf.

Lead-authored-by: yangjie01 <yangjie01@baidu.com>
Co-authored-by: YangJie <yangjie01@baidu.com>
Signed-off-by: Yuming Wang <yumwang@ebay.com>
---
 dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +-
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +-
 pom.xml                               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index 6dbababb202af..bcbf8b9908ae5 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -54,7 +54,7 @@ commons-math3/3.6.1//commons-math3-3.6.1.jar
 commons-net/3.1//commons-net-3.1.jar
 commons-pool/1.5.4//commons-pool-1.5.4.jar
 commons-text/1.9//commons-text-1.9.jar
-compress-lzf/1.0.3//compress-lzf-1.0.3.jar
+compress-lzf/1.1//compress-lzf-1.1.jar
 core/1.1.2//core-1.1.2.jar
 curator-client/2.7.1//curator-client-2.7.1.jar
 curator-framework/2.7.1//curator-framework-2.7.1.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 46f1bc019b608..8ca7880c7a34d 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -50,7 +50,7 @@ commons-logging/1.1.3//commons-logging-1.1.3.jar
 commons-math3/3.6.1//commons-math3-3.6.1.jar
 commons-pool/1.5.4//commons-pool-1.5.4.jar
 commons-text/1.9//commons-text-1.9.jar
-compress-lzf/1.0.3//compress-lzf-1.0.3.jar
+compress-lzf/1.1//compress-lzf-1.1.jar
 core/1.1.2//core-1.1.2.jar
 cos_api-bundle/5.6.19//cos_api-bundle-5.6.19.jar
 curator-client/2.13.0//curator-client-2.13.0.jar
diff --git a/pom.xml b/pom.xml
index 5bc8d12d966ab..d8b5b87c7d97b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -764,7 +764,7 @@
       <dependency>
         <groupId>com.ning</groupId>
         <artifactId>compress-lzf</artifactId>
-        <version>1.0.3</version>
+        <version>1.1</version>
       </dependency>
       <dependency>
         <groupId>org.xerial.snappy</groupId>

From 83673c8e28a6429386419770e8ef6f09d49cba85 Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Mon, 14 Mar 2022 11:02:56 +0900
Subject: [PATCH 481/513] [SPARK-38528][SQL] Eagerly iterate over aggregate
 sequence when building project list in `ExtractGenerator`

### What changes were proposed in this pull request?

When building the project list from an aggregate sequence in `ExtractGenerator`, convert the aggregate sequence to an `IndexedSeq` before performing the flatMap operation.

### Why are the changes needed?

This query fails with a `NullPointerException`:
```
val df = Seq(1, 2, 3).toDF("v")
df.select(Stream(explode(array(min($"v"), max($"v"))), sum($"v")): _*).collect
```
If you change `Stream` to `Seq`, then it succeeds.

`ExtractGenerator` uses a flatMap operation over `aggList` for two purposes:

- To produce a new aggregate list
- to update `projectExprs` (which is initialized as an array of nulls).

When `aggList` is a `Stream`, the flatMap operation evaluates lazily, so all entries in `projectExprs` after the first will still be null when the rule completes.

Changing `aggList` to an `IndexedSeq` forces the flatMap to evaluate eagerly.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

New unit test

Closes #35837 from bersprockets/generator_aggregate_issue.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala  | 1 +
 .../org/apache/spark/sql/GeneratorFunctionSuite.scala      | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 21454bef142fc..a785914540a67 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2769,6 +2769,7 @@ class Analyzer(override val catalogManager: CatalogManager)
 
         val projectExprs = Array.ofDim[NamedExpression](aggList.length)
         val newAggList = aggList
+          .toIndexedSeq
           .map(trimNonTopLevelAliases)
           .zipWithIndex
           .flatMap {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
index e270e0a528d9f..436ccb08294b3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
@@ -358,6 +358,13 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df.select(explode(array(min($"v"), max($"v")))), Row(1) :: Row(3) :: Nil)
   }
 
+  test("SPARK-38528: generator in stream of aggregate expressions") {
+    val df = Seq(1, 2, 3).toDF("v")
+    checkAnswer(
+      df.select(Stream(explode(array(min($"v"), max($"v"))), sum($"v")): _*),
+      Row(1, 6) :: Row(3, 6) :: Nil)
+  }
+
   test("SPARK-37947: lateral view <func>_outer()") {
     checkAnswer(
       sql("select * from values 1, 2 lateral view explode_outer(array()) a as b"),

From 715a06cc73d109b26f918bc3ed738cf539e121f2 Mon Sep 17 00:00:00 2001
From: nyingping <smileyingping@163.com>
Date: Mon, 14 Mar 2022 12:48:09 +0900
Subject: [PATCH 482/513] [SPARK-38532][SS][TESTS] Add test case for invalid
 gapDuration of sessionwindow

### What changes were proposed in this pull request?

Since the dynamic gapduration has been added in the session window,[#33691](https://github.com/apache/spark/pull/33691) users are allowed to enter invalid gapduration ,then filter invalide events . However, for now, test cases are only added for zero and negative gapduration. I think it is necessary to add test cases for invalid gapduration.

### Why are the changes needed?

Described in above section.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Pass the GA.

Closes #35824 from nyingping/AddtestcaseForInvalidGapDuration.

Lead-authored-by: nyingping <smileyingping@163.com>
Co-authored-by: Nie yingping <smileyingping@163.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  2 +-
 .../sql/DataFrameSessionWindowingSuite.scala  | 28 +++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index a785914540a67..6783d0b343a65 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -4062,7 +4062,7 @@ object SessionWindowing extends Rule[LogicalPlan] {
         }
 
         // As same as tumbling window, we add a filter to filter out nulls.
-        // And we also filter out events with negative or zero gap duration.
+        // And we also filter out events with negative or zero or invalid gap duration.
         val filterExpr = IsNotNull(session.timeColumn) &&
           (sessionAttr.getField(SESSION_END) > sessionAttr.getField(SESSION_START))
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala
index 376fa2e95a8e2..a5414f3e805fa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala
@@ -382,6 +382,34 @@ class DataFrameSessionWindowingSuite extends QueryTest with SharedSparkSession
     }
   }
 
+  test("SPARK-36465: filter out events with invalid gap duration.") {
+    val df = Seq(
+      ("2016-03-27 19:39:30", 1, "a")).toDF("time", "value", "id")
+
+    checkAnswer(
+      df.groupBy(session_window($"time", "x sec"))
+        .agg(count("*").as("counts"))
+        .orderBy($"session_window.start".asc)
+        .select($"session_window.start".cast("string"), $"session_window.end".cast("string"),
+          $"counts"),
+      Seq()
+    )
+
+    withTempTable { table =>
+      checkAnswer(
+        spark.sql("select session_window(time, " +
+          """case when value = 1 then "2 seconds" when value = 2 then "invalid gap duration" """ +
+          s"""else "20 seconds" end), value from $table""")
+          .select($"session_window.start".cast(StringType), $"session_window.end".cast(StringType),
+            $"value"),
+        Seq(
+          Row("2016-03-27 19:39:27", "2016-03-27 19:39:47", 4),
+          Row("2016-03-27 19:39:34", "2016-03-27 19:39:36", 1)
+        )
+      )
+    }
+  }
+
   test("SPARK-36724: Support timestamp_ntz as a type of time column for SessionWindow") {
     val df = Seq((LocalDateTime.parse("2016-03-27T19:39:30"), 1, "a"),
       (LocalDateTime.parse("2016-03-27T19:39:25"), 2, "a")).toDF("time", "value", "id")

From 5699095079611922af2cbef4b7c43f9434f12b3a Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Mon, 14 Mar 2022 14:06:47 +0800
Subject: [PATCH 483/513] [SPARK-38519][SQL] AQE throw exception should respect
 SparkFatalException

### What changes were proposed in this pull request?

Throw underlying exception in `SparkFatalException`

### Why are the changes needed?

BroadcastExchangeExec will wrap fatal exception inside SparkFatalException and unwarp it  before throw.

AQE should also respect SparkFatalException  and throw original error.

Before:
```
Caused by: org.apache.spark.util.SparkFatalException
	at org.apache.spark.sql.execution.exchange.BroadcastExchangeExec.$anonfun$relationFuture$1(BroadcastExchangeExec.scala:168)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withThreadLocalCaptured$1(SQLExecution.scala:191)
	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
```

### Does this PR introduce _any_ user-facing change?

yes, the exception may be changed

### How was this patch tested?

manually tested

Closes #35814 from ulysses-you/respect-fatal.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../adaptive/AdaptiveSparkPlanExec.scala          | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index 14b45256a059a..c6505a0ea5f73 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -43,7 +43,7 @@ import org.apache.spark.sql.execution.exchange._
 import org.apache.spark.sql.execution.ui.{SparkListenerSQLAdaptiveExecutionUpdate, SparkListenerSQLAdaptiveSQLMetricUpdates, SQLPlanMetric}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.vectorized.ColumnarBatch
-import org.apache.spark.util.ThreadUtils
+import org.apache.spark.util.{SparkFatalException, ThreadUtils}
 
 /**
  * A root node to execute the query plan adaptively. It splits the query plan into independent
@@ -728,11 +728,16 @@ case class AdaptiveSparkPlanExec(
         }
       case _ =>
     }
-    val e = if (errors.size == 1) {
-      errors.head
+    // Respect SparkFatalException which can be thrown by BroadcastExchangeExec
+    val originalErrors = errors.map {
+      case fatal: SparkFatalException => fatal.throwable
+      case other => other
+    }
+    val e = if (originalErrors.size == 1) {
+      originalErrors.head
     } else {
-      val se = QueryExecutionErrors.multiFailuresInStageMaterializationError(errors.head)
-      errors.tail.foreach(se.addSuppressed)
+      val se = QueryExecutionErrors.multiFailuresInStageMaterializationError(originalErrors.head)
+      originalErrors.tail.foreach(se.addSuppressed)
       se
     }
     throw e

From efe43306fcab18f076f755c81c0406ebc1a5fee9 Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Mon, 14 Mar 2022 14:08:38 +0800
Subject: [PATCH 484/513] [SPARK-38410][SQL] Support specify initial partition
 number for rebalance

### What changes were proposed in this pull request?

Pass `initialNumPartitions` into `RebalancePartitions`.

### Why are the changes needed?

Rebalance partitions resolve the skew issue during shuffle dataset. It always returns an indeterminate partition number so at the beginning we do not pass partition as parameter.

However, we find the initial partition number can affect the data compression ratio. So it would be better to make the partition number isolation.

Note that, it only affects the initial partition number at map side during shuffle.

### Does this PR introduce _any_ user-facing change?

yes, after this pr user can do something like `SELECT /*+ REBALANCE(3, col) */ * FROM t`

### How was this patch tested?

Add test

Closes #35729 from ulysses-you/SPARK-38410.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-performance-tuning.md                |  5 ++-
 docs/sql-ref-syntax-qry-select-hints.md       | 11 +++--
 .../sql/catalyst/analysis/ResolveHints.scala  | 24 ++++++++---
 .../sql/catalyst/optimizer/Optimizer.scala    |  4 +-
 .../plans/logical/basicLogicalOperators.scala | 14 ++++---
 .../catalyst/analysis/ResolveHintsSuite.scala | 41 ++++++++++++++++++-
 6 files changed, 80 insertions(+), 19 deletions(-)

diff --git a/docs/sql-performance-tuning.md b/docs/sql-performance-tuning.md
index 19799d95a7639..18453e2446a10 100644
--- a/docs/sql-performance-tuning.md
+++ b/docs/sql-performance-tuning.md
@@ -219,7 +219,8 @@ Coalesce hints allows the Spark SQL users to control the number of output files
 `coalesce`, `repartition` and `repartitionByRange` in Dataset API, they can be used for performance
 tuning and reducing the number of output files. The "COALESCE" hint only has a partition number as a
 parameter. The "REPARTITION" hint has a partition number, columns, or both/neither of them as parameters.
-The "REPARTITION_BY_RANGE" hint must have column names and a partition number is optional.
+The "REPARTITION_BY_RANGE" hint must have column names and a partition number is optional. The "REBALANCE"
+hint has an initial partition number, columns, or both/neither of them as parameters.
 
     SELECT /*+ COALESCE(3) */ * FROM t
     SELECT /*+ REPARTITION(3) */ * FROM t
@@ -229,7 +230,9 @@ The "REPARTITION_BY_RANGE" hint must have column names and a partition number is
     SELECT /*+ REPARTITION_BY_RANGE(c) */ * FROM t
     SELECT /*+ REPARTITION_BY_RANGE(3, c) */ * FROM t
     SELECT /*+ REBALANCE */ * FROM t
+    SELECT /*+ REBALANCE(3) */ * FROM t
     SELECT /*+ REBALANCE(c) */ * FROM t
+    SELECT /*+ REBALANCE(3, c) */ * FROM t
 
 For more details please refer to the documentation of [Partitioning Hints](sql-ref-syntax-qry-select-hints.html#partitioning-hints).
 
diff --git a/docs/sql-ref-syntax-qry-select-hints.md b/docs/sql-ref-syntax-qry-select-hints.md
index 53878a20af763..861e7fbae6687 100644
--- a/docs/sql-ref-syntax-qry-select-hints.md
+++ b/docs/sql-ref-syntax-qry-select-hints.md
@@ -33,9 +33,10 @@ Hints give users a way to suggest how Spark SQL to use specific approaches to ge
 
 Partitioning hints allow users to suggest a partitioning strategy that Spark should follow. `COALESCE`, `REPARTITION`,
 and `REPARTITION_BY_RANGE` hints are supported and are equivalent to `coalesce`, `repartition`, and
-`repartitionByRange` [Dataset APIs](api/scala/org/apache/spark/sql/Dataset.html), respectively. These hints give users
-a way to tune performance and control the number of output files in Spark SQL. When multiple partitioning hints are
-specified, multiple nodes are inserted into the logical plan, but the leftmost hint is picked by the optimizer.
+`repartitionByRange` [Dataset APIs](api/scala/org/apache/spark/sql/Dataset.html), respectively. The `REBALANCE` can only
+be used as a hint .These hints give users a way to tune performance and control the number of output files in Spark SQL.
+When multiple partitioning hints are specified, multiple nodes are inserted into the logical plan, but the leftmost hint
+is picked by the optimizer.
 
 #### Partitioning Hints Types
 
@@ -72,8 +73,12 @@ SELECT /*+ REPARTITION_BY_RANGE(3, c) */ * FROM t;
 
 SELECT /*+ REBALANCE */ * FROM t;
 
+SELECT /*+ REBALANCE(3) */ * FROM t;
+
 SELECT /*+ REBALANCE(c) */ * FROM t;
 
+SELECT /*+ REBALANCE(3, c) */ * FROM t;
+
 -- multiple partitioning hints
 EXPLAIN EXTENDED SELECT /*+ REPARTITION(100), COALESCE(500), REPARTITION_BY_RANGE(3, c) */ * FROM t;
 == Parsed Logical Plan ==
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
index 27f2a5f416d56..46ebffea1aec5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
@@ -250,14 +250,26 @@ object ResolveHints {
     }
 
     private def createRebalance(hint: UnresolvedHint): LogicalPlan = {
+      def createRebalancePartitions(
+          partitionExprs: Seq[Any], initialNumPartitions: Option[Int]): RebalancePartitions = {
+        val invalidParams = partitionExprs.filter(!_.isInstanceOf[UnresolvedAttribute])
+        if (invalidParams.nonEmpty) {
+          val hintName = hint.name.toUpperCase(Locale.ROOT)
+          throw QueryCompilationErrors.invalidHintParameterError(hintName, invalidParams)
+        }
+        RebalancePartitions(
+          partitionExprs.map(_.asInstanceOf[Expression]),
+          hint.child,
+          initialNumPartitions)
+      }
+
       hint.parameters match {
+        case param @ Seq(IntegerLiteral(numPartitions), _*) =>
+          createRebalancePartitions(param.tail, Some(numPartitions))
+        case param @ Seq(numPartitions: Int, _*) =>
+          createRebalancePartitions(param.tail, Some(numPartitions))
         case partitionExprs @ Seq(_*) =>
-          val invalidParams = partitionExprs.filter(!_.isInstanceOf[UnresolvedAttribute])
-          if (invalidParams.nonEmpty) {
-            val hintName = hint.name.toUpperCase(Locale.ROOT)
-            throw QueryCompilationErrors.invalidHintParameterError(hintName, invalidParams)
-          }
-          RebalancePartitions(partitionExprs.map(_.asInstanceOf[Expression]), hint.child)
+          createRebalancePartitions(partitionExprs, None)
       }
     }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index e245d14854472..debd5a66adb23 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -1072,11 +1072,11 @@ object CollapseRepartition extends Rule[LogicalPlan] {
       r.withNewChildren(child.children)
     // Case 3: When a RebalancePartitions has a child of local or global Sort, Repartition or
     // RepartitionByExpression we can remove the child.
-    case r @ RebalancePartitions(_, child @ (_: Sort | _: RepartitionOperation)) =>
+    case r @ RebalancePartitions(_, child @ (_: Sort | _: RepartitionOperation), _) =>
       r.withNewChildren(child.children)
     // Case 4: When a RebalancePartitions has a child of RebalancePartitions we can remove the
     // child.
-    case r @ RebalancePartitions(_, child: RebalancePartitions) =>
+    case r @ RebalancePartitions(_, child: RebalancePartitions, _) =>
       r.withNewChildren(child.children)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 8a6598f6f0841..895eeb772075d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -1479,15 +1479,19 @@ object RepartitionByExpression {
  */
 case class RebalancePartitions(
     partitionExpressions: Seq[Expression],
-    child: LogicalPlan) extends UnaryNode {
+    child: LogicalPlan,
+    initialNumPartitionOpt: Option[Int] = None) extends UnaryNode {
   override def maxRows: Option[Long] = child.maxRows
   override def output: Seq[Attribute] = child.output
   override val nodePatterns: Seq[TreePattern] = Seq(REBALANCE_PARTITIONS)
 
-  def partitioning: Partitioning = if (partitionExpressions.isEmpty) {
-    RoundRobinPartitioning(conf.numShufflePartitions)
-  } else {
-    HashPartitioning(partitionExpressions, conf.numShufflePartitions)
+  def partitioning: Partitioning = {
+    val initialNumPartitions = initialNumPartitionOpt.getOrElse(conf.numShufflePartitions)
+    if (partitionExpressions.isEmpty) {
+      RoundRobinPartitioning(initialNumPartitions)
+    } else {
+      HashPartitioning(partitionExpressions, initialNumPartitions)
+    }
   }
 
   override protected def withNewChildInternal(newChild: LogicalPlan): RebalancePartitions =
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
index 77dc5b4ccedc4..ab8bcee121232 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
@@ -299,11 +299,19 @@ class ResolveHintsSuite extends AnalysisTest {
     }
   }
 
-  test("SPARK-35786: Support optimize repartition by expression in AQE") {
+  test("SPARK-35786: Support optimize rebalance by expression in AQE") {
     checkAnalysisWithoutViewWrapper(
       UnresolvedHint("REBALANCE", Seq(UnresolvedAttribute("a")), table("TaBlE")),
       RebalancePartitions(Seq(AttributeReference("a", IntegerType)()), testRelation))
 
+    checkAnalysisWithoutViewWrapper(
+      UnresolvedHint("REBALANCE", Seq(1, UnresolvedAttribute("a")), table("TaBlE")),
+      RebalancePartitions(Seq(AttributeReference("a", IntegerType)()), testRelation, Some(1)))
+
+    checkAnalysisWithoutViewWrapper(
+      UnresolvedHint("REBALANCE", Seq(Literal(1), UnresolvedAttribute("a")), table("TaBlE")),
+      RebalancePartitions(Seq(AttributeReference("a", IntegerType)()), testRelation, Some(1)))
+
     checkAnalysisWithoutViewWrapper(
       UnresolvedHint("REBALANCE", Seq.empty, table("TaBlE")),
       RebalancePartitions(Seq.empty, testRelation))
@@ -313,13 +321,42 @@ class ResolveHintsSuite extends AnalysisTest {
         UnresolvedHint("REBALANCE", Seq(UnresolvedAttribute("a")), table("TaBlE")),
         testRelation)
 
+      checkAnalysisWithoutViewWrapper(
+        UnresolvedHint("REBALANCE", Seq(1, UnresolvedAttribute("a")), table("TaBlE")),
+        testRelation)
+
+      checkAnalysisWithoutViewWrapper(
+        UnresolvedHint("REBALANCE", Seq(Literal(1), UnresolvedAttribute("a")), table("TaBlE")),
+        testRelation)
+
       checkAnalysisWithoutViewWrapper(
         UnresolvedHint("REBALANCE", Seq.empty, table("TaBlE")),
         testRelation)
+
+      checkAnalysisWithoutViewWrapper(
+        UnresolvedHint("REBALANCE", 1 :: Nil, table("TaBlE")),
+        testRelation)
     }
 
     assertAnalysisError(
-      UnresolvedHint("REBALANCE", Seq(Literal(1)), table("TaBlE")),
+      UnresolvedHint("REBALANCE", Seq(Literal(1), Literal(1)), table("TaBlE")),
       Seq("Hint parameter should include columns"))
+
+    assertAnalysisError(
+      UnresolvedHint("REBALANCE", Seq(1, Literal(1)), table("TaBlE")),
+      Seq("Hint parameter should include columns"))
+  }
+
+  test("SPARK-38410: Support specify initial partition number for rebalance") {
+    withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "3") {
+      Seq(
+        Nil -> 3,
+        Seq(1) -> 1,
+        Seq(UnresolvedAttribute("a")) -> 3,
+        Seq(1, UnresolvedAttribute("a")) -> 1).foreach { case (param, initialNumPartitions) =>
+        assert(UnresolvedHint("REBALANCE", param, testRelation).analyze
+          .asInstanceOf[RebalancePartitions].partitioning.numPartitions == initialNumPartitions)
+      }
+    }
   }
 }

From 959694271e30879c944d7fd5de2740571012460a Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 14 Mar 2022 14:10:58 +0800
Subject: [PATCH 485/513] [SPARK-38523][SQL] Fix referring to the corrupt
 record column from CSV

### What changes were proposed in this pull request?
In the case when an user specifies the corrupt record column via the CSV option `columnNameOfCorruptRecord`:
1. Disable the column pruning feature in the CSV parser.
2. Don't push filters to `UnivocityParser` that refer to the "virtual" column `columnNameOfCorruptRecord`. Since the column cannot present in the input CSV, user's queries fail while compiling predicates. After the changes, the skipped filters are applied later on the upper layer.

### Why are the changes needed?
The changes allow to refer to the corrupt record column from user's queries:

```Scala
spark.read.format("csv")
  .option("header", "true")
  .option("columnNameOfCorruptRecord", "corrRec")
  .schema(schema)
  .load("csv_corrupt_record.csv")
  .filter($"corrRec".isNotNull)
  .show()
```
for the input file "csv_corrupt_record.csv":
```
0,2013-111_11 12:13:14
1,1983-08-04
```
the query returns:
```
+---+----+----------------------+
|a  |b   |corrRec               |
+---+----+----------------------+
|0  |null|0,2013-111_11 12:13:14|
+---+----+----------------------+
```

### Does this PR introduce _any_ user-facing change?
Yes. Before the changes, the query above fails with the exception:
```Java
java.lang.IllegalArgumentException: _corrupt_record does not exist. Available: a, b
	at org.apache.spark.sql.types.StructType.$anonfun$fieldIndex$1(StructType.scala:310) ~[classes/:?]
```

### How was this patch tested?
By running new CSV test:
```
$ build/sbt "sql/testOnly *.CSVv1Suite"
$ build/sbt "sql/testOnly *.CSVv2Suite"
$ build/sbt "sql/testOnly *.CSVLegacyTimeParserSuite"
```

Closes #35817 from MaxGekk/csv-ref-_corupt_record.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/csv/CSVFileFormat.scala       | 18 ++++----
 .../v2/csv/CSVPartitionReaderFactory.scala    |  3 +-
 .../datasources/v2/csv/CSVScan.scala          | 16 +++----
 .../execution/datasources/csv/CSVSuite.scala  | 42 ++++++++-----------
 4 files changed, 34 insertions(+), 45 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
index d40ad9d1bf0e9..8d9525078402e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
@@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.csv.{CSVHeaderChecker, CSVOptions, UnivocityParser}
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
 import org.apache.spark.sql.catalyst.util.CompressionCodecs
-import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
@@ -101,21 +100,20 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister {
       hadoopConf: Configuration): (PartitionedFile) => Iterator[InternalRow] = {
     val broadcastedHadoopConf =
       sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
-
+    val columnPruning = sparkSession.sessionState.conf.csvColumnPruning &&
+      !requiredSchema.exists(_.name == sparkSession.sessionState.conf.columnNameOfCorruptRecord)
     val parsedOptions = new CSVOptions(
       options,
-      sparkSession.sessionState.conf.csvColumnPruning,
+      columnPruning,
       sparkSession.sessionState.conf.sessionLocalTimeZone,
       sparkSession.sessionState.conf.columnNameOfCorruptRecord)
 
     // Check a field requirement for corrupt records here to throw an exception in a driver side
     ExprUtils.verifyColumnNameOfCorruptRecord(dataSchema, parsedOptions.columnNameOfCorruptRecord)
-
-    if (requiredSchema.length == 1 &&
-      requiredSchema.head.name == parsedOptions.columnNameOfCorruptRecord) {
-      throw QueryCompilationErrors.queryFromRawFilesIncludeCorruptRecordColumnError()
-    }
-    val columnPruning = sparkSession.sessionState.conf.csvColumnPruning
+    // Don't push any filter which refers to the "virtual" column which cannot present in the input.
+    // Such filters will be applied later on the upper layer.
+    val actualFilters =
+      filters.filterNot(_.references.contains(parsedOptions.columnNameOfCorruptRecord))
 
     (file: PartitionedFile) => {
       val conf = broadcastedHadoopConf.value.value
@@ -127,7 +125,7 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister {
         actualDataSchema,
         actualRequiredSchema,
         parsedOptions,
-        filters)
+        actualFilters)
       val schema = if (columnPruning) actualRequiredSchema else actualDataSchema
       val isStartOfFile = file.start == 0
       val headerChecker = new CSVHeaderChecker(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
index 31d31bd43f453..bf996ab1b3111 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
@@ -46,7 +46,6 @@ case class CSVPartitionReaderFactory(
     partitionSchema: StructType,
     parsedOptions: CSVOptions,
     filters: Seq[Filter]) extends FilePartitionReaderFactory {
-  private val columnPruning = sqlConf.csvColumnPruning
 
   override def buildReader(file: PartitionedFile): PartitionReader[InternalRow] = {
     val conf = broadcastedConf.value.value
@@ -59,7 +58,7 @@ case class CSVPartitionReaderFactory(
       actualReadDataSchema,
       parsedOptions,
       filters)
-    val schema = if (columnPruning) actualReadDataSchema else actualDataSchema
+    val schema = if (parsedOptions.columnPruning) actualReadDataSchema else actualDataSchema
     val isStartOfFile = file.start == 0
     val headerChecker = new CSVHeaderChecker(
       schema, parsedOptions, source = s"CSV file: ${file.filePath}", isStartOfFile)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
index cc3c146106670..5c33a1047a12f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
@@ -24,7 +24,6 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.csv.CSVOptions
 import org.apache.spark.sql.catalyst.expressions.{Expression, ExprUtils}
 import org.apache.spark.sql.connector.read.PartitionReaderFactory
-import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.csv.CSVDataSource
 import org.apache.spark.sql.execution.datasources.v2.TextBasedFileScan
@@ -45,9 +44,11 @@ case class CSVScan(
     dataFilters: Seq[Expression] = Seq.empty)
   extends TextBasedFileScan(sparkSession, options) {
 
+  val columnPruning = sparkSession.sessionState.conf.csvColumnPruning &&
+    !readDataSchema.exists(_.name == sparkSession.sessionState.conf.columnNameOfCorruptRecord)
   private lazy val parsedOptions: CSVOptions = new CSVOptions(
     options.asScala.toMap,
-    columnPruning = sparkSession.sessionState.conf.csvColumnPruning,
+    columnPruning = columnPruning,
     sparkSession.sessionState.conf.sessionLocalTimeZone,
     sparkSession.sessionState.conf.columnNameOfCorruptRecord)
 
@@ -67,11 +68,10 @@ case class CSVScan(
   override def createReaderFactory(): PartitionReaderFactory = {
     // Check a field requirement for corrupt records here to throw an exception in a driver side
     ExprUtils.verifyColumnNameOfCorruptRecord(dataSchema, parsedOptions.columnNameOfCorruptRecord)
-
-    if (readDataSchema.length == 1 &&
-      readDataSchema.head.name == parsedOptions.columnNameOfCorruptRecord) {
-      throw QueryCompilationErrors.queryFromRawFilesIncludeCorruptRecordColumnError()
-    }
+    // Don't push any filter which refers to the "virtual" column which cannot present in the input.
+    // Such filters will be applied later on the upper layer.
+    val actualFilters =
+      pushedFilters.filterNot(_.references.contains(parsedOptions.columnNameOfCorruptRecord))
 
     val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
     // Hadoop Configurations are case sensitive.
@@ -81,7 +81,7 @@ case class CSVScan(
     // The partition values are already truncated in `FileScan.partitions`.
     // We should use `readPartitionSchema` as the partition schema here.
     CSVPartitionReaderFactory(sparkSession.sessionState.conf, broadcastedConf,
-      dataSchema, readDataSchema, readPartitionSchema, parsedOptions, pushedFilters)
+      dataSchema, readDataSchema, readPartitionSchema, parsedOptions, actualFilters)
   }
 
   override def equals(obj: Any): Boolean = obj match {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 9f9b7b72ab329..41b4f909ce958 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -1621,38 +1621,30 @@ abstract class CSVSuite
     checkAnswer(df, Row("a", null, "a"))
   }
 
-  test("SPARK-21610: Corrupt records are not handled properly when creating a dataframe " +
-    "from a file") {
-    val columnNameOfCorruptRecord = "_corrupt_record"
+  test("SPARK-38523: referring to the corrupt record column") {
     val schema = new StructType()
       .add("a", IntegerType)
       .add("b", DateType)
-      .add(columnNameOfCorruptRecord, StringType)
-    // negative cases
-    val msg = intercept[AnalysisException] {
-      spark
-        .read
-        .option("columnNameOfCorruptRecord", columnNameOfCorruptRecord)
-        .schema(schema)
-        .csv(testFile(valueMalformedFile))
-        .select(columnNameOfCorruptRecord)
-        .collect()
-    }.getMessage
-    assert(msg.contains("only include the internal corrupt record column"))
-
-    // workaround
-    val df = spark
+      .add("corrRec", StringType)
+    val readback = spark
       .read
-      .option("columnNameOfCorruptRecord", columnNameOfCorruptRecord)
+      .option("columnNameOfCorruptRecord", "corrRec")
       .schema(schema)
       .csv(testFile(valueMalformedFile))
-      .cache()
-    assert(df.filter($"_corrupt_record".isNotNull).count() == 1)
-    assert(df.filter($"_corrupt_record".isNull).count() == 1)
     checkAnswer(
-      df.select(columnNameOfCorruptRecord),
-      Row("0,2013-111_11 12:13:14") :: Row(null) :: Nil
-    )
+      readback,
+      Row(0, null, "0,2013-111_11 12:13:14") ::
+      Row(1, Date.valueOf("1983-08-04"), null) :: Nil)
+    checkAnswer(
+      readback.filter($"corrRec".isNotNull),
+      Row(0, null, "0,2013-111_11 12:13:14"))
+    checkAnswer(
+      readback.select($"corrRec", $"b"),
+      Row("0,2013-111_11 12:13:14", null) ::
+      Row(null, Date.valueOf("1983-08-04")) :: Nil)
+    checkAnswer(
+      readback.filter($"corrRec".isNull && $"a" === 1),
+      Row(1, Date.valueOf("1983-08-04"), null) :: Nil)
   }
 
   test("SPARK-23846: schema inferring touches less data if samplingRatio < 1.0") {

From 35536a1478020fbd844703517b34d655f89906b2 Mon Sep 17 00:00:00 2001
From: Yuto Akutsu <yuto.akutsu@oss.nttdata.com>
Date: Mon, 14 Mar 2022 09:11:58 +0300
Subject: [PATCH 486/513] [SPARK-38103][SQL] Migrate parsing errors of
 transform into the new error framework

### What changes were proposed in this pull request?

In this PR, I migrated parsing errors of transform listed below into the new error framework.

- transformNotSupportQuantifierError
- transformWithSerdeUnsupportedError
- tooManyArgumentsForTransformError
- notEnoughArgumentsForTransformError
- invalidTransformArgumentError

### Why are the changes needed?

Porting the parsing errors of transform into the new error framework should improve user experience with Spark SQL.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

`$ build/sbt "test:testOnly *QueryParsingErrorsSuite"`

Closes #35759 from yutoacts/SPARK-38103.

Authored-by: Yuto Akutsu <yuto.akutsu@oss.nttdata.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../sql/catalyst/parser/AstBuilder.scala      |  4 +-
 .../spark/sql/errors/QueryParsingErrors.scala | 16 ++----
 .../sql-tests/results/transform.sql.out       |  4 +-
 .../sql/errors/QueryParsingErrorsSuite.scala  | 49 +++++++++++++++++++
 4 files changed, 58 insertions(+), 15 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index ddbe18b472adc..5eb72af6b2f09 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3024,7 +3024,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
       if (arguments.size > 1) {
         throw QueryParsingErrors.tooManyArgumentsForTransformError(name, ctx)
       } else if (arguments.isEmpty) {
-        throw QueryParsingErrors.notEnoughArgumentsForTransformError(name, ctx)
+        throw new IllegalStateException(s"Not enough arguments for transform $name")
       } else {
         getFieldReference(ctx, arguments.head)
       }
@@ -3085,7 +3085,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
           .map(typedVisit[Literal])
           .map(lit => LiteralValue(lit.value, lit.dataType))
       reference.orElse(literal)
-          .getOrElse(throw QueryParsingErrors.invalidTransformArgumentError(ctx))
+          .getOrElse(throw new IllegalStateException("Invalid transform argument"))
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index 4c62550a299b5..c03b1b45f644d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -90,11 +90,13 @@ object QueryParsingErrors {
   }
 
   def transformNotSupportQuantifierError(ctx: ParserRuleContext): Throwable = {
-    new ParseException("TRANSFORM does not support DISTINCT/ALL in inputs", ctx)
+    new ParseException("UNSUPPORTED_FEATURE",
+      Array("TRANSFORM does not support DISTINCT/ALL in inputs"), ctx)
   }
 
   def transformWithSerdeUnsupportedError(ctx: ParserRuleContext): Throwable = {
-    new ParseException("TRANSFORM with serde is only supported in hive mode", ctx)
+    new ParseException("UNSUPPORTED_FEATURE",
+      Array("TRANSFORM with serde is only supported in hive mode"), ctx)
   }
 
   def lateralWithPivotInFromClauseNotAllowedError(ctx: FromClauseContext): Throwable = {
@@ -224,21 +226,13 @@ object QueryParsingErrors {
   }
 
   def tooManyArgumentsForTransformError(name: String, ctx: ApplyTransformContext): Throwable = {
-    new ParseException(s"Too many arguments for transform $name", ctx)
-  }
-
-  def notEnoughArgumentsForTransformError(name: String, ctx: ApplyTransformContext): Throwable = {
-    new ParseException(s"Not enough arguments for transform $name", ctx)
+    new ParseException("INVALID_SQL_SYNTAX", Array(s"Too many arguments for transform $name"), ctx)
   }
 
   def invalidBucketsNumberError(describe: String, ctx: ApplyTransformContext): Throwable = {
     new ParseException(s"Invalid number of buckets: $describe", ctx)
   }
 
-  def invalidTransformArgumentError(ctx: TransformArgumentContext): Throwable = {
-    new ParseException("Invalid transform argument", ctx)
-  }
-
   def cannotCleanReservedNamespacePropertyError(
       property: String, ctx: ParserRuleContext, msg: String): Throwable = {
     new ParseException(s"$property is a reserved namespace property, $msg.", ctx)
diff --git a/sql/core/src/test/resources/sql-tests/results/transform.sql.out b/sql/core/src/test/resources/sql-tests/results/transform.sql.out
index c1c13cdf276c0..c9a04c99b9fb2 100644
--- a/sql/core/src/test/resources/sql-tests/results/transform.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/transform.sql.out
@@ -719,7 +719,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-TRANSFORM does not support DISTINCT/ALL in inputs(line 1, pos 17)
+The feature is not supported: TRANSFORM does not support DISTINCT/ALL in inputs(line 1, pos 17)
 
 == SQL ==
 SELECT TRANSFORM(DISTINCT b, a, c)
@@ -739,7 +739,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-TRANSFORM does not support DISTINCT/ALL in inputs(line 1, pos 17)
+The feature is not supported: TRANSFORM does not support DISTINCT/ALL in inputs(line 1, pos 17)
 
 == SQL ==
 SELECT TRANSFORM(ALL b, a, c)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
index f7b891ead6134..5610c4d000bfa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
@@ -169,4 +169,53 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession {
           |-------------------------------^^^
           |""".stripMargin)
   }
+
+  test("UNSUPPORTED_FEATURE: TRANSFORM does not support DISTINCT/ALL") {
+    validateParsingError(
+      sqlText = "SELECT TRANSFORM(DISTINCT a) USING 'a' FROM t",
+      errorClass = "UNSUPPORTED_FEATURE",
+      sqlState = "0A000",
+      message =
+        """
+          |The feature is not supported: """.stripMargin +
+        """TRANSFORM does not support DISTINCT/ALL in inputs(line 1, pos 17)
+          |
+          |== SQL ==
+          |SELECT TRANSFORM(DISTINCT a) USING 'a' FROM t
+          |-----------------^^^
+          |""".stripMargin)
+  }
+
+  test("UNSUPPORTED_FEATURE: In-memory mode does not support TRANSFORM with serde") {
+    validateParsingError(
+      sqlText = "SELECT TRANSFORM(a) ROW FORMAT SERDE " +
+        "'org.apache.hadoop.hive.serde2.OpenCSVSerde' USING 'a' FROM t",
+      errorClass = "UNSUPPORTED_FEATURE",
+      sqlState = "0A000",
+      message =
+        """
+          |The feature is not supported: """.stripMargin +
+        """TRANSFORM with serde is only supported in hive mode(line 1, pos 0)
+          |
+          |== SQL ==
+          |SELECT TRANSFORM(a) ROW FORMAT SERDE """.stripMargin +
+        """'org.apache.hadoop.hive.serde2.OpenCSVSerde' USING 'a' FROM t
+          |^^^
+          |""".stripMargin)
+  }
+
+  test("INVALID_SQL_SYNTAX: Too many arguments for transform") {
+    validateParsingError(
+      sqlText = "CREATE TABLE table(col int) PARTITIONED BY (years(col,col))",
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      message =
+        """
+          |Invalid SQL syntax: Too many arguments for transform years(line 1, pos 44)
+          |
+          |== SQL ==
+          |CREATE TABLE table(col int) PARTITIONED BY (years(col,col))
+          |--------------------------------------------^^^
+          |""".stripMargin)
+  }
 }

From 8e44791910ab4866f558ddb7db583c28c2766585 Mon Sep 17 00:00:00 2001
From: Jiaan Geng <beliefer@163.com>
Date: Mon, 14 Mar 2022 14:13:18 +0800
Subject: [PATCH 487/513] [SPARK-38504][SQL] Cannot read TimestampNTZ as
 TimestampLTZ

### What changes were proposed in this pull request?
Based discussion on https://github.com/apache/spark/pull/34984#discussion_r823685909.
This PR don't allow reading NTZ as LTZ for now.

### Why are the changes needed?
Forbid the data error when read TimestampNTZ as TimestampLTZ.

### Does this PR introduce _any_ user-facing change?
'Yes'.
This PR don't allow reading NTZ as LTZ for now explicitly.

### How was this patch tested?
New tests.

Closes #35803 from beliefer/SPARK-38504.

Authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/errors/QueryExecutionErrors.scala     |  7 ++++
 .../execution/datasources/orc/OrcUtils.scala  |  2 ++
 .../errors/QueryExecutionErrorsSuite.scala    | 32 +++++++++++--------
 .../datasources/orc/OrcQuerySuite.scala       | 25 +--------------
 4 files changed, 28 insertions(+), 38 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 49861304a78ab..9381cecc7d08f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -1954,6 +1954,13 @@ object QueryExecutionErrors {
       messageParameters = Array("Unable to convert timestamp of Orc to data type 'timestamp_ntz'"))
   }
 
+  def cannotConvertOrcTimestampNTZToTimestampLTZError(): Throwable = {
+    new SparkUnsupportedOperationException(
+      errorClass = "UNSUPPORTED_OPERATION",
+      messageParameters =
+        Array("Unable to convert timestamp ntz of Orc to data type 'timestamp_ltz'"))
+  }
+
   def writePartitionExceedConfigSizeWhenDynamicPartitionError(
       numWrittenParts: Int,
       maxDynamicPartitions: Int,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
index a96f77b9acbeb..1f05117462db8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
@@ -205,6 +205,8 @@ object OrcUtils extends Logging {
       orcCatalystSchema.fields.map(_.dataType).zip(dataSchema.fields.map(_.dataType)).foreach {
         case (TimestampType, TimestampNTZType) =>
           throw QueryExecutionErrors.cannotConvertOrcTimestampToTimestampNTZError()
+        case (TimestampNTZType, TimestampType) =>
+          throw QueryExecutionErrors.cannotConvertOrcTimestampNTZToTimestampLTZError()
         case (t1: StructType, t2: StructType) => checkTimestampCompatibility(t1, t2)
         case _ =>
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index b90950a014a79..cdd50a61bcf99 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -17,17 +17,15 @@
 
 package org.apache.spark.sql.errors
 
-import java.sql.Timestamp
-
 import org.apache.spark.{SparkArithmeticException, SparkException, SparkIllegalArgumentException, SparkRuntimeException, SparkUnsupportedOperationException, SparkUpgradeException}
-import org.apache.spark.sql.{DataFrame, QueryTest, Row}
+import org.apache.spark.sql.{DataFrame, QueryTest}
 import org.apache.spark.sql.execution.datasources.orc.OrcTest
 import org.apache.spark.sql.execution.datasources.parquet.ParquetTest
 import org.apache.spark.sql.functions.{lit, lower, struct, sum}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy.EXCEPTION
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{StructField, StructType, TimestampNTZType, TimestampType}
+import org.apache.spark.sql.types.{StructType, TimestampType}
 import org.apache.spark.sql.util.ArrowUtils
 
 class QueryExecutionErrorsSuite extends QueryTest
@@ -262,25 +260,31 @@ class QueryExecutionErrorsSuite extends QueryTest
   }
 
   test("UNSUPPORTED_OPERATION - SPARK-36346: can't read Timestamp as TimestampNTZ") {
-    val data = (1 to 10).map { i =>
-      val ts = new Timestamp(i)
-      Row(ts)
-    }
+    withTempPath { file =>
+      sql("select timestamp_ltz'2019-03-21 00:02:03'").write.orc(file.getCanonicalPath)
+      withAllNativeOrcReaders {
+        val e = intercept[SparkException] {
+          spark.read.schema("time timestamp_ntz").orc(file.getCanonicalPath).collect()
+        }.getCause.asInstanceOf[SparkUnsupportedOperationException]
 
-    val actualSchema = StructType(Seq(StructField("time", TimestampType, false)))
-    val providedSchema = StructType(Seq(StructField("time", TimestampNTZType, false)))
+        assert(e.getErrorClass === "UNSUPPORTED_OPERATION")
+        assert(e.getMessage === "The operation is not supported: " +
+          "Unable to convert timestamp of Orc to data type 'timestamp_ntz'")
+      }
+    }
+  }
 
+  test("UNSUPPORTED_OPERATION - SPARK-38504: can't read TimestampNTZ as TimestampLTZ") {
     withTempPath { file =>
-      val df = spark.createDataFrame(sparkContext.parallelize(data), actualSchema)
-      df.write.orc(file.getCanonicalPath)
+      sql("select timestamp_ntz'2019-03-21 00:02:03'").write.orc(file.getCanonicalPath)
       withAllNativeOrcReaders {
         val e = intercept[SparkException] {
-          spark.read.schema(providedSchema).orc(file.getCanonicalPath).collect()
+          spark.read.schema("time timestamp_ltz").orc(file.getCanonicalPath).collect()
         }.getCause.asInstanceOf[SparkUnsupportedOperationException]
 
         assert(e.getErrorClass === "UNSUPPORTED_OPERATION")
         assert(e.getMessage === "The operation is not supported: " +
-          "Unable to convert timestamp of Orc to data type 'timestamp_ntz'")
+          "Unable to convert timestamp ntz of Orc to data type 'timestamp_ltz'")
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
index 7f3809dd044f4..49b7cfa9d3724 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.orc
 import java.io.File
 import java.nio.charset.StandardCharsets
 import java.sql.Timestamp
-import java.time.{LocalDateTime, ZoneOffset}
+import java.time.LocalDateTime
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
@@ -803,29 +803,6 @@ abstract class OrcQuerySuite extends OrcQueryTest with SharedSparkSession {
       }
     }
   }
-
-  test("SPARK-36346: read TimestampNTZ as TimestampLTZ") {
-    val data = (1 to 10).map { i =>
-      // The second parameter is `nanoOfSecond`, while java.sql.Timestamp accepts milliseconds
-      // as input. So here we multiple the `nanoOfSecond` by NANOS_PER_MILLIS
-      val ts = LocalDateTime.ofEpochSecond(0, i * 1000000, ZoneOffset.UTC)
-      Row(ts)
-    }
-    val answer = (1 to 10).map { i =>
-      val ts = new java.sql.Timestamp(i)
-      Row(ts)
-    }
-    val actualSchema = StructType(Seq(StructField("time", TimestampNTZType, false)))
-    val providedSchema = StructType(Seq(StructField("time", TimestampType, false)))
-
-    withTempPath { file =>
-      val df = spark.createDataFrame(sparkContext.parallelize(data), actualSchema)
-      df.write.orc(file.getCanonicalPath)
-      withAllNativeOrcReaders {
-        checkAnswer(spark.read.schema(providedSchema).orc(file.getCanonicalPath), answer)
-      }
-    }
-  }
 }
 
 class OrcV1QuerySuite extends OrcQuerySuite {

From 2844a18a0084fba37ac75dad7b3f855d2df5e81b Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Mon, 14 Mar 2022 14:14:49 +0800
Subject: [PATCH 488/513] [SPARK-38360][SQL][AVRO][SS][FOLLOWUP] Replace
 `TreeNode.collectFirst` + `isDefined/isEmpty` with `exists`

### What changes were proposed in this pull request?
This is a follow up of SPARK-38360 to simplify code patterns related to `TreeNode.collectFirst`, the simplified rules are as follows:

- `treeNode.collectFirst(condition).isDefined` -> `treeNode.exists(condition)`
- `treeNode.collectFirst(condition).isEmpty` -> `!treeNode.exists(condition)`

### Why are the changes needed?
Code simplification

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass GA

Closes #35833 from LuciferYang/SPARK-38360-FOLLOWUP.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Ruifeng Zheng <ruifengz@foxmail.com>
---
 .../org/apache/spark/sql/avro/AvroSuite.scala     |  7 ++++---
 .../spark/sql/catalyst/analysis/Analyzer.scala    | 15 ++++++---------
 .../catalyst/analysis/StreamingJoinHelper.scala   |  2 +-
 .../sql/catalyst/plans/logical/LogicalPlan.scala  |  7 ++++---
 .../spark/sql/execution/SparkStrategies.scala     |  5 ++++-
 .../spark/sql/FileBasedDataSourceSuite.scala      |  7 ++++---
 .../scala/org/apache/spark/sql/QueryTest.scala    | 11 ++++++-----
 .../sql/execution/WholeStageCodegenSuite.scala    | 10 ++++++----
 .../streaming/test/DataStreamTableAPISuite.scala  |  8 +++-----
 .../apache/spark/sql/hive/client/HiveShim.scala   |  9 +++++----
 10 files changed, 43 insertions(+), 38 deletions(-)

diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index 05d57ecf2408e..a70fbc0d833e8 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -2329,9 +2329,10 @@ class AvroV2Suite extends AvroSuite with ExplainSuiteHelper {
        }
       assert(filterCondition.isDefined)
       // The partitions filters should be pushed down and no need to be reevaluated.
-      assert(filterCondition.get.collectFirst {
-        case a: AttributeReference if a.name == "p1" || a.name == "p2" => a
-      }.isEmpty)
+      assert(!filterCondition.get.exists {
+        case a: AttributeReference => a.name == "p1" || a.name == "p2"
+        case _ => false
+      })
 
       val fileScan = df.queryExecution.executedPlan collectFirst {
         case BatchScanExec(_, f: AvroScan, _) => f
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 6783d0b343a65..528998398ddeb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -530,10 +530,7 @@ class Analyzer(override val catalogManager: CatalogManager)
 
   object ResolveGroupingAnalytics extends Rule[LogicalPlan] {
     private[analysis] def hasGroupingFunction(e: Expression): Boolean = {
-      e.collectFirst {
-        case g: Grouping => g
-        case g: GroupingID => g
-      }.isDefined
+      e.exists (g => g.isInstanceOf[Grouping] || g.isInstanceOf[GroupingID])
     }
 
     private def replaceGroupingFunc(
@@ -2523,11 +2520,11 @@ class Analyzer(override val catalogManager: CatalogManager)
       }.toSet
 
       // Find the first Aggregate Expression that is not Windowed.
-      exprs.exists(_.collectFirst {
-        case ae: AggregateExpression if !windowedAggExprs.contains(ae) => ae
-        case e: PythonUDF if PythonUDF.isGroupedAggPandasUDF(e) &&
-          !windowedAggExprs.contains(e) => e
-      }.isDefined)
+      exprs.exists(_.exists {
+        case ae: AggregateExpression => !windowedAggExprs.contains(ae)
+        case e: PythonUDF => PythonUDF.isGroupedAggPandasUDF(e) && !windowedAggExprs.contains(e)
+        case _ => false
+      })
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
index 9cdd77ee5a52d..3c5ab55a8a72a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
@@ -169,7 +169,7 @@ object StreamingJoinHelper extends PredicateHelper with Logging {
       return None
     }
     val constraintTerm = constraintTerms.head
-    if (constraintTerm.collectFirst { case u: UnaryMinus => u }.isEmpty) {
+    if (!constraintTerm.exists(_.isInstanceOf[UnaryMinus])) {
       // Incorrect condition. We want the constraint term in canonical form to be `-leftTime`
       // so that resolve for it as `-leftTime + watermark + c < 0` ==> `watermark + c < leftTime`.
       // Now, if the original conditions is `rightTime-with-watermark > leftTime` and watermark
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 5ae2a7da826a4..36a3b110fda29 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -212,11 +212,12 @@ object LogicalPlanIntegrity {
 
   private def canGetOutputAttrs(p: LogicalPlan): Boolean = {
     p.resolved && !p.expressions.exists { e =>
-      e.collectFirst {
+      e.exists {
         // We cannot call `output` in plans with a `ScalarSubquery` expr having no column,
         // so, we filter out them in advance.
-        case s: ScalarSubquery if s.plan.schema.fields.isEmpty => true
-      }.isDefined
+        case s: ScalarSubquery => s.plan.schema.fields.isEmpty
+        case _ => false
+      }
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 3b48a8f166014..675b158100394 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -441,7 +441,10 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
       /** Ensures that this plan does not have a streaming aggregate in it. */
       def hasNoStreamingAgg: Boolean = {
-        plan.collectFirst { case a: Aggregate if a.isStreaming => a }.isEmpty
+        !plan.exists {
+          case a: Aggregate => a.isStreaming
+          case _ => false
+        }
       }
 
       // The following cases of limits on a streaming plan has to be executed with a stateful
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 11886f80f9455..bc7a7b2977aca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -836,9 +836,10 @@ class FileBasedDataSourceSuite extends QueryTest
           }
           assert(filterCondition.isDefined)
           // The partitions filters should be pushed down and no need to be reevaluated.
-          assert(filterCondition.get.collectFirst {
-            case a: AttributeReference if a.name == "p1" || a.name == "p2" => a
-          }.isEmpty)
+          assert(!filterCondition.get.exists {
+            case a: AttributeReference => a.name == "p1" || a.name == "p2"
+            case _ => false
+          })
 
           val fileScan = df.queryExecution.executedPlan collectFirst {
             case BatchScanExec(_, f: FileScan, _) => f
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
index 31569d82b4dc9..06f94c62d9c25 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -207,11 +207,12 @@ abstract class QueryTest extends PlanTest {
    */
   def assertCached(query: Dataset[_], cachedName: String, storageLevel: StorageLevel): Unit = {
     val planWithCaching = query.queryExecution.withCachedData
-    val matched = planWithCaching.collectFirst { case cached: InMemoryRelation =>
-      val cacheBuilder = cached.cacheBuilder
-      cachedName == cacheBuilder.tableName.get &&
-        (storageLevel == cacheBuilder.storageLevel)
-    }.getOrElse(false)
+    val matched = planWithCaching.exists {
+      case cached: InMemoryRelation =>
+        val cacheBuilder = cached.cacheBuilder
+        cachedName == cacheBuilder.tableName.get && (storageLevel == cacheBuilder.storageLevel)
+      case _ => false
+    }
 
     assert(matched, s"Expected query plan to hit cache $cachedName with storage " +
       s"level $storageLevel, but it doesn't.")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index f0533f89b63e6..005e2c5c8c9ea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -700,10 +700,11 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
       // BroadcastHashJoinExec with a HashAggregateExec child containing no aggregate expressions
       val distinctWithId = baseTable.distinct().withColumn("id", monotonically_increasing_id())
         .join(baseTable, "idx")
-      assert(distinctWithId.queryExecution.executedPlan.collectFirst {
+      assert(distinctWithId.queryExecution.executedPlan.exists {
         case WholeStageCodegenExec(
           ProjectExec(_, BroadcastHashJoinExec(_, _, _, _, _, _: HashAggregateExec, _, _))) => true
-      }.isDefined)
+        case _ => false
+      })
       checkAnswer(distinctWithId, Seq(Row(1, 0), Row(1, 0)))
 
       // BroadcastHashJoinExec with a HashAggregateExec child containing a Final mode aggregate
@@ -711,10 +712,11 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
       val groupByWithId =
         baseTable.groupBy("idx").sum().withColumn("id", monotonically_increasing_id())
         .join(baseTable, "idx")
-      assert(groupByWithId.queryExecution.executedPlan.collectFirst {
+      assert(groupByWithId.queryExecution.executedPlan.exists {
         case WholeStageCodegenExec(
           ProjectExec(_, BroadcastHashJoinExec(_, _, _, _, _, _: HashAggregateExec, _, _))) => true
-      }.isDefined)
+        case _ => false
+      })
       checkAnswer(groupByWithId, Seq(Row(1, 2, 0), Row(1, 2, 0)))
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
index 62e944c96ef9a..61b3ec26a4d20 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
@@ -162,11 +162,9 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
         spark.sql(s"CREATE TABLE $tblName (data int) USING $v2Source")
 
         // Check the StreamingRelationV2 has been replaced by StreamingRelation
-        val plan = spark.readStream.option("path", tempDir.getCanonicalPath).table(tblName)
-          .queryExecution.analyzed.collectFirst {
-            case d: StreamingRelationV2 => d
-          }
-        assert(plan.isEmpty)
+        val exists = spark.readStream.option("path", tempDir.getCanonicalPath).table(tblName)
+          .queryExecution.analyzed.exists(_.isInstanceOf[StreamingRelationV2])
+        assert(!exists)
       }
     }
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 71be39a23af37..67bb72c187802 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -1145,10 +1145,11 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
     // Because there is no way to know whether the partition properties has timeZone,
     // client-side filtering cannot be used with TimeZoneAwareExpression.
     def hasTimeZoneAwareExpression(e: Expression): Boolean = {
-      e.collectFirst {
-        case cast: CastBase if cast.needsTimeZone => cast
-        case tz: TimeZoneAwareExpression if !tz.isInstanceOf[CastBase] => tz
-      }.isDefined
+      e.exists {
+        case cast: CastBase => cast.needsTimeZone
+        case tz: TimeZoneAwareExpression => !tz.isInstanceOf[CastBase]
+        case _ => false
+      }
     }
 
     if (!SQLConf.get.metastorePartitionPruningFastFallback ||

From 130bcce678279255120c0b2c623fc9550c1243f2 Mon Sep 17 00:00:00 2001
From: Daniel Tenedorio <daniel.tenedorio@databricks.com>
Date: Mon, 14 Mar 2022 14:26:37 +0800
Subject: [PATCH 489/513] [SPARK-38415][SQL] Update the histogram_numeric (x,
 y) result type to make x == the input type

### What changes were proposed in this pull request?

This pull request updates the histogram_numeric SQL function to support more numeric input types, returning the results an an array of structs of two fields each. The first field has the same type as the first argument to the histogram_numeric aggregate function (rather than always having double type before this change). This removes the need for the user to apply a cast function to the result in order to use it.

Example behavior after this change becomes effective:

SELECT histogram_numeric(col, 3) FROM VALUES (TIMESTAMP '2017-03-01 00:00:00'),
(TIMESTAMP '2017-04-01 00:00:00'), (TIMESTAMP '2017-05-01 00:00:00') AS tab(col);

Returns type: struct<histogram_numeric(col, 3):array<struct<x:timestamp,y:double>>>.
Query output: [{"x":2017-03-01 00:00:00,"y":1.0},{"x":2017-04-01 00:00:00,"y":1.0},{"x":2017-05-01 00:00:00,"y":1.0}].

### Why are the changes needed?

This removes the need for users to explicitly cast the function result type in many cases.

### Does this PR introduce _any_ user-facing change?

Yes, it changes the `histogram_numeric` function result type.

### How was this patch tested?

Unit tests, file-based query tests.

Closes #35735 from dtenedor/numeric-histogram-types.

Lead-authored-by: Daniel Tenedorio <daniel.tenedorio@databricks.com>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide.md                   |   2 +
 .../aggregate/HistogramNumeric.scala          |  48 ++++++-
 .../apache/spark/sql/internal/SQLConf.scala   |  15 ++
 .../aggregate/HistogramNumericSuite.scala     | 124 ++++++++++++++--
 .../sql-functions/sql-expression-schema.md    |   2 +-
 .../resources/sql-tests/inputs/group-by.sql   |  27 ++++
 .../sql-tests/results/group-by.sql.out        | 136 +++++++++++++++++-
 7 files changed, 336 insertions(+), 18 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 578586c96a2fa..32c55d1826e6f 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -24,6 +24,8 @@ license: |
 
 ## Upgrading from Spark SQL 3.2 to 3.3
 
+  - Since Spark 3.3, the `histogram_numeric` function in Spark SQL returns an output type of an array of structs (x, y), where the type of the 'x' field in the return value is propagated from the input values consumed in the aggregate function. In Spark 3.2 or earlier, 'x' always had double type. Optionally, use the configuration `spark.sql.legacy.histogramNumericPropagateInputType` since Spark 3.3 to revert back to the previous behavior. 
+
   - Since Spark 3.3, `DayTimeIntervalType` in Spark SQL is mapped to Arrow's `Duration` type in `ArrowWriter` and `ArrowColumnVector` developer APIs. Previously, `DayTimeIntervalType` was mapped to Arrow's `Interval` type which does not match with the types of other languages Spark SQL maps. For example, `DayTimeIntervalType` is mapped to `java.time.Duration` in Java.
 
   - Since Spark 3.3, the functions `lpad` and `rpad` have been overloaded to support byte sequences. When the first argument is a byte sequence, the optional padding pattern must also be a byte sequence and the result is a BINARY value. The default padding pattern in this case is the zero byte.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala
index 09408e6eff18a..23609faad9a76 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala
@@ -27,7 +27,8 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure,
 import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription, ImplicitCastInputTypes}
 import org.apache.spark.sql.catalyst.trees.BinaryLike
 import org.apache.spark.sql.catalyst.util.GenericArrayData
-import org.apache.spark.sql.types.{AbstractDataType, ArrayType, DataType, DateType, DayTimeIntervalType, DoubleType, IntegerType, NumericType, StructField, StructType, TimestampNTZType, TimestampType, TypeCollection, YearMonthIntervalType}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.NumericHistogram
 
 /**
@@ -46,12 +47,13 @@ import org.apache.spark.sql.util.NumericHistogram
       smaller datasets. Note that this function creates a histogram with non-uniform
       bin widths. It offers no guarantees in terms of the mean-squared-error of the
       histogram, but in practice is comparable to the histograms produced by the R/S-Plus
-      statistical computing packages.
+      statistical computing packages. Note: the output type of the 'x' field in the return value is
+      propagated from the input value consumed in the aggregate function.
     """,
   examples = """
     Examples:
       > SELECT _FUNC_(col, 5) FROM VALUES (0), (1), (2), (10) AS tab(col);
-       [{"x":0.0,"y":1.0},{"x":1.0,"y":1.0},{"x":2.0,"y":1.0},{"x":10.0,"y":1.0}]
+       [{"x":0,"y":1.0},{"x":1,"y":1.0},{"x":2,"y":1.0},{"x":10,"y":1.0}]
   """,
   group = "agg_funcs",
   since = "3.3.0")
@@ -72,6 +74,8 @@ case class HistogramNumeric(
     case n: Int => n
   }
 
+  private lazy val propagateInputType: Boolean = SQLConf.get.histogramNumericPropagateInputType
+
   override def inputTypes: Seq[AbstractDataType] = {
     // Support NumericType, DateType, TimestampType and TimestampNTZType, YearMonthIntervalType,
     // DayTimeIntervalType since their internal types are all numeric,
@@ -124,8 +128,33 @@ case class HistogramNumeric(
       null
     } else {
       val result = (0 until buffer.getUsedBins).map { index =>
+        // Note that the 'coord.x' and 'coord.y' have double-precision floating point type here.
         val coord = buffer.getBin(index)
-        InternalRow.apply(coord.x, coord.y)
+        if (propagateInputType) {
+          // If the SQLConf.spark.sql.legacy.histogramNumericPropagateInputType is set to true,
+          // we need to internally convert the 'coord.x' value to the expected result type, for
+          // cases like integer types, timestamps, and intervals which are valid inputs to the
+          // numeric histogram aggregate function. For example, in this case:
+          // 'SELECT histogram_numeric(val, 3) FROM VALUES (0L), (1L), (2L), (10L) AS tab(col)'
+          // returns an array of structs where the first field has LongType.
+          val result: Any = left.dataType match {
+            case ByteType => coord.x.toByte
+            case IntegerType | DateType | _: YearMonthIntervalType =>
+              coord.x.toInt
+            case FloatType => coord.x.toFloat
+            case ShortType => coord.x.toShort
+            case _: DayTimeIntervalType | LongType | TimestampType | TimestampNTZType =>
+              coord.x.toLong
+            case _ => coord.x
+          }
+          InternalRow.apply(result, coord.y)
+        } else {
+          // Otherwise, just apply the double-precision values in 'coord.x' and 'coord.y' to the
+          // output row directly. In this case: 'SELECT histogram_numeric(val, 3)
+          // FROM VALUES (0L), (1L), (2L), (10L) AS tab(col)' returns an array of structs where the
+          // first field has DoubleType.
+          InternalRow.apply(coord.x, coord.y)
+        }
       }
       new GenericArrayData(result)
     }
@@ -157,10 +186,17 @@ case class HistogramNumeric(
 
   override def nullable: Boolean = true
 
-  override def dataType: DataType =
+  override def dataType: DataType = {
+    // If the SQLConf.spark.sql.legacy.histogramNumericPropagateInputType is set to true,
+    // the output data type of this aggregate function is an array of structs, where each struct
+    // has two fields (x, y): one of the same data type as the left child and another of double
+    // type. Otherwise, the 'x' field always has double type.
     ArrayType(new StructType(Array(
-      StructField("x", DoubleType, true),
+      StructField(name = "x",
+        dataType = if (propagateInputType) left.dataType else DoubleType,
+        nullable = true),
       StructField("y", DoubleType, true))), true)
+  }
 
   override def prettyName: String = "histogram_numeric"
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index edb388e219877..42e44cfe575fc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -3561,6 +3561,18 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val HISTOGRAM_NUMERIC_PROPAGATE_INPUT_TYPE =
+    buildConf("spark.sql.legacy.histogramNumericPropagateInputType")
+      .internal()
+      .doc("The histogram_numeric function computes a histogram on numeric 'expr' using nb bins. " +
+        "The return value is an array of (x,y) pairs representing the centers of the histogram's " +
+        "bins. If this config is set to true, the output type of the 'x' field in the return " +
+        "value is propagated from the input value consumed in the aggregate function. Otherwise, " +
+        "'x' always has double type.")
+      .version("3.3.0")
+      .booleanConf
+      .createWithDefault(true)
+
   /**
    * Holds information about keys that have been deprecated.
    *
@@ -4299,6 +4311,9 @@ class SQLConf extends Serializable with Logging {
 
   def useV1Command: Boolean = getConf(SQLConf.LEGACY_USE_V1_COMMAND)
 
+  def histogramNumericPropagateInputType: Boolean =
+    getConf(SQLConf.HISTOGRAM_NUMERIC_PROPAGATE_INPUT_TYPE)
+
   /** ********************** SQLConf functionality methods ************ */
 
   /** Set Spark SQL configuration properties. */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumericSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumericSuite.scala
index 60b53c660f6ef..f603563ee3d0f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumericSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumericSuite.scala
@@ -17,18 +17,25 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
+import java.sql.Timestamp
+import java.time.{Duration, Period}
+
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.dsl.expressions.{DslString, DslSymbol}
 import org.apache.spark.sql.catalyst.dsl.plans.DslLogicalPlan
 import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, BoundReference, Cast, GenericInternalRow, Literal}
+import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
-import org.apache.spark.sql.types.{DoubleType, IntegerType}
+import org.apache.spark.sql.catalyst.util.GenericArrayData
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.NumericHistogram
 
-class HistogramNumericSuite extends SparkFunSuite {
+class HistogramNumericSuite extends SparkFunSuite with SQLHelper with Logging {
 
   private val random = new java.util.Random()
 
@@ -76,7 +83,6 @@ class HistogramNumericSuite extends SparkFunSuite {
   }
 
   test("class HistogramNumeric, sql string") {
-    val defaultAccuracy = ApproximatePercentile.DEFAULT_PERCENTILE_ACCURACY
     assertEqual(s"histogram_numeric(a, 3)",
       new HistogramNumeric("a".attr, Literal(3)).sql: String)
 
@@ -106,23 +112,47 @@ class HistogramNumericSuite extends SparkFunSuite {
   }
 
   test("class HistogramNumeric, automatically add type casting for parameters") {
-    val testRelation = LocalRelation('a.int)
+    // These are the types of input relations under test. We exercise the unit test with several
+    // input column types to inspect the behavior of query analysis for the aggregate function.
+    val relations = Seq(LocalRelation('a.double),
+      LocalRelation('a.int),
+      LocalRelation('a.timestamp),
+      LocalRelation('a.dayTimeInterval()),
+      LocalRelation('a.yearMonthInterval()))
 
-    // accuracy types must be integral, no type casting
+    // These are the types of the second 'nbins' argument to the aggregate function.
+    // These accuracy types must be integral, no type casting is allowed.
     val nBinsExpressions = Seq(
       Literal(2.toByte),
       Literal(100.toShort),
       Literal(100),
       Literal(1000L))
 
-    nBinsExpressions.foreach { nBins =>
+    // Iterate through each of the input relation column types and 'nbins' expression types under
+    // test.
+    for {
+      relation <- relations
+      nBins <- nBinsExpressions
+    } {
+      // We expect each relation under test to have exactly one output attribute.
+      assert(relation.output.length == 1)
+      val relationAttributeType = relation.output(0).dataType
       val agg = new HistogramNumeric(UnresolvedAttribute("a"), nBins)
-      val analyzed = testRelation.select(agg).analyze.expressions.head
+      val analyzed = relation.select(agg).analyze.expressions.head
       analyzed match {
         case Alias(agg: HistogramNumeric, _) =>
           assert(agg.resolved)
-          assert(agg.child.dataType == IntegerType)
+          assert(agg.child.dataType == relationAttributeType)
           assert(agg.nBins.dataType == IntegerType)
+          // We expect the output type of the histogram aggregate function to be an array of structs
+          // where the first element of each struct has the same type as the original input
+          // attribute.
+          val expectedType =
+          ArrayType(
+            StructType(Seq(
+              StructField("x", relationAttributeType, nullable = true),
+              StructField("y", DoubleType, nullable = true))))
+          assert(agg.dataType == expectedType)
         case _ => fail()
       }
     }
@@ -151,6 +181,84 @@ class HistogramNumericSuite extends SparkFunSuite {
     assert(agg.eval(buffer) != null)
   }
 
+  test("class HistogramNumeric, exercise many different numeric input types") {
+    val inputs = Seq(
+      (Literal(null),
+        Literal(null),
+        Literal(null)),
+      (Literal(0),
+        Literal(1),
+        Literal(2)),
+      (Literal(0L),
+        Literal(1L),
+        Literal(2L)),
+      (Literal(0.toShort),
+        Literal(1.toShort),
+        Literal(2.toShort)),
+      (Literal(0F),
+        Literal(1F),
+        Literal(2F)),
+      (Literal(0D),
+        Literal(1D),
+        Literal(2D)),
+      (Literal(Timestamp.valueOf("2017-03-01 00:00:00")),
+        Literal(Timestamp.valueOf("2017-03-02 00:00:00")),
+        Literal(Timestamp.valueOf("2017-03-03 00:00:00"))),
+      (Literal(Duration.ofSeconds(1111)),
+        Literal(Duration.ofSeconds(1211)),
+        Literal(Duration.ofSeconds(1311))),
+      (Literal(Period.ofMonths(10)),
+        Literal(Period.ofMonths(11)),
+        Literal(Period.ofMonths(12))))
+    for ((left, middle, right) <- inputs) {
+      // Check that the 'propagateInputType' bit correctly toggles the output type.
+      withSQLConf(SQLConf.HISTOGRAM_NUMERIC_PROPAGATE_INPUT_TYPE.key -> "false") {
+        val aggDoubleOutputType = new HistogramNumeric(
+          BoundReference(0, left.dataType, nullable = true), Literal(5))
+        assert(aggDoubleOutputType.dataType match {
+          case ArrayType(StructType(Array(
+          StructField("x", DoubleType, _, _),
+          StructField("y", _, _, _))), true) => true
+        })
+      }
+      val aggPropagateOutputType = new HistogramNumeric(
+        BoundReference(0, left.dataType, nullable = true), Literal(5))
+      assert(aggPropagateOutputType.left.dataType ==
+        (aggPropagateOutputType.dataType match {
+          case
+            ArrayType(StructType(Array(
+            StructField("x", lhs@_, true, _),
+            StructField("y", _, true, _))), true) => lhs
+        }))
+      // Now consume some input values and check the result.
+      val buffer = new GenericInternalRow(new Array[Any](1))
+      aggPropagateOutputType.initialize(buffer)
+      // Consume three non-empty rows in the aggregation.
+      aggPropagateOutputType.update(buffer, InternalRow(left.value))
+      aggPropagateOutputType.update(buffer, InternalRow(middle.value))
+      aggPropagateOutputType.update(buffer, InternalRow(right.value))
+      // Evaluate the aggregate function.
+      val result = aggPropagateOutputType.eval(buffer)
+      if (left.dataType != NullType) {
+        assert(result != null)
+        // Sanity-check the sum of the heights.
+        var ys = 0.0
+        result match {
+          case v: GenericArrayData =>
+            for (row <- v.array) {
+              row match {
+                case r: GenericInternalRow =>
+                  assert(r.values.length == 2)
+                  ys += r.values(1).asInstanceOf[Double]
+              }
+            }
+        }
+        assert(ys > 1)
+      }
+      // As a basic sanity check, the sum of the heights of the bins should be greater than one.
+    }
+  }
+
   private def compareEquals(left: NumericHistogram, right: NumericHistogram): Boolean = {
     left.getNumBins == right.getNumBins && left.getUsedBins == right.getUsedBins &&
       (0 until left.getUsedBins).forall { i =>
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 7125c34fbbd73..386dd1fe0ae17 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -357,7 +357,7 @@
 | org.apache.spark.sql.catalyst.expressions.aggregate.CovSample | covar_samp | SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) | struct<covar_samp(c1, c2):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.First | first | SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<first(col):int> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.First | first_value | SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<first_value(col):int> |
-| org.apache.spark.sql.catalyst.expressions.aggregate.HistogramNumeric | histogram_numeric | SELECT histogram_numeric(col, 5) FROM VALUES (0), (1), (2), (10) AS tab(col) | struct<histogram_numeric(col, 5):array<struct<x:double,y:double>>> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.HistogramNumeric | histogram_numeric | SELECT histogram_numeric(col, 5) FROM VALUES (0), (1), (2), (10) AS tab(col) | struct<histogram_numeric(col, 5):array<struct<x:int,y:double>>> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.HyperLogLogPlusPlus | approx_count_distinct | SELECT approx_count_distinct(col1) FROM VALUES (1), (1), (2), (2), (3) tab(col1) | struct<approx_count_distinct(col1):bigint> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Kurtosis | kurtosis | SELECT kurtosis(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) | struct<kurtosis(col):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Last | last | SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<last(col):int> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index 75933f86b2ab3..ef3d523a23d84 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -199,6 +199,7 @@ FROM testData
 GROUP BY a IS NULL;
 
 
+-- Histogram aggregates with different numeric input types
 SELECT
   histogram_numeric(col, 2) as histogram_2,
   histogram_numeric(col, 3) as histogram_3,
@@ -210,6 +211,32 @@ FROM VALUES
  (21), (22), (23), (24), (25), (26), (27), (28), (29), (30),
  (31), (32), (33), (34), (35), (3), (37), (38), (39), (40),
  (41), (42), (43), (44), (45), (46), (47), (48), (49), (50) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (1), (2), (3) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (1L), (2L), (3L) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (1F), (2F), (3F) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (1D), (2D), (3D) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (1S), (2S), (3S) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES
+  (CAST(1 AS BYTE)), (CAST(2 AS BYTE)), (CAST(3 AS BYTE)) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES
+  (CAST(1 AS TINYINT)), (CAST(2 AS TINYINT)), (CAST(3 AS TINYINT)) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES
+  (CAST(1 AS SMALLINT)), (CAST(2 AS SMALLINT)), (CAST(3 AS SMALLINT)) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES
+  (CAST(1 AS BIGINT)), (CAST(2 AS BIGINT)), (CAST(3 AS BIGINT)) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (TIMESTAMP '2017-03-01 00:00:00'),
+  (TIMESTAMP '2017-04-01 00:00:00'), (TIMESTAMP '2017-05-01 00:00:00') AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (INTERVAL '100-00' YEAR TO MONTH),
+  (INTERVAL '110-00' YEAR TO MONTH), (INTERVAL '120-00' YEAR TO MONTH) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES (INTERVAL '12 20:4:0' DAY TO SECOND),
+  (INTERVAL '12 21:4:0' DAY TO SECOND), (INTERVAL '12 22:4:0' DAY TO SECOND) AS tab(col);
+SELECT histogram_numeric(col, 3)
+FROM VALUES (NULL), (NULL), (NULL) AS tab(col);
+SELECT histogram_numeric(col, 3)
+FROM VALUES (CAST(NULL AS DOUBLE)), (CAST(NULL AS DOUBLE)), (CAST(NULL AS DOUBLE)) AS tab(col);
+SELECT histogram_numeric(col, 3)
+FROM VALUES (CAST(NULL AS INT)), (CAST(NULL AS INT)), (CAST(NULL AS INT)) AS tab(col);
+
 
 -- SPARK-37613: Support ANSI Aggregate Function: regr_count
 SELECT regr_count(y, x) FROM testRegression;
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 6d5ea7d87ed8f..7ae9199701536 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 80
+-- Number of queries: 95
 
 
 -- !query
@@ -708,9 +708,139 @@ FROM VALUES
  (31), (32), (33), (34), (35), (3), (37), (38), (39), (40),
  (41), (42), (43), (44), (45), (46), (47), (48), (49), (50) AS tab(col)
 -- !query schema
-struct<histogram_2:array<struct<x:double,y:double>>,histogram_3:array<struct<x:double,y:double>>,histogram_5:array<struct<x:double,y:double>>,histogram_10:array<struct<x:double,y:double>>>
+struct<histogram_2:array<struct<x:int,y:double>>,histogram_3:array<struct<x:int,y:double>>,histogram_5:array<struct<x:int,y:double>>,histogram_10:array<struct<x:int,y:double>>>
 -- !query output
-[{"x":12.615384615384613,"y":26.0},{"x":38.083333333333336,"y":24.0}]	[{"x":9.649999999999999,"y":20.0},{"x":25.0,"y":11.0},{"x":40.736842105263165,"y":19.0}]	[{"x":5.272727272727273,"y":11.0},{"x":14.5,"y":8.0},{"x":22.0,"y":7.0},{"x":30.499999999999996,"y":10.0},{"x":43.5,"y":14.0}]	[{"x":3.0,"y":6.0},{"x":8.5,"y":6.0},{"x":13.5,"y":4.0},{"x":17.0,"y":3.0},{"x":20.5,"y":4.0},{"x":25.5,"y":6.0},{"x":31.999999999999996,"y":7.0},{"x":39.0,"y":5.0},{"x":43.5,"y":4.0},{"x":48.0,"y":5.0}]
+[{"x":12,"y":26.0},{"x":38,"y":24.0}]	[{"x":9,"y":20.0},{"x":25,"y":11.0},{"x":40,"y":19.0}]	[{"x":5,"y":11.0},{"x":14,"y":8.0},{"x":22,"y":7.0},{"x":30,"y":10.0},{"x":43,"y":14.0}]	[{"x":3,"y":6.0},{"x":8,"y":6.0},{"x":13,"y":4.0},{"x":17,"y":3.0},{"x":20,"y":4.0},{"x":25,"y":6.0},{"x":31,"y":7.0},{"x":39,"y":5.0},{"x":43,"y":4.0},{"x":48,"y":5.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES (1), (2), (3) AS tab(col)
+-- !query schema
+struct<histogram_numeric(col, 3):array<struct<x:int,y:double>>>
+-- !query output
+[{"x":1,"y":1.0},{"x":2,"y":1.0},{"x":3,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES (1L), (2L), (3L) AS tab(col)
+-- !query schema
+struct<histogram_numeric(col, 3):array<struct<x:bigint,y:double>>>
+-- !query output
+[{"x":1,"y":1.0},{"x":2,"y":1.0},{"x":3,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES (1F), (2F), (3F) AS tab(col)
+-- !query schema
+struct<histogram_numeric(col, 3):array<struct<x:float,y:double>>>
+-- !query output
+[{"x":1.0,"y":1.0},{"x":2.0,"y":1.0},{"x":3.0,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES (1D), (2D), (3D) AS tab(col)
+-- !query schema
+struct<histogram_numeric(col, 3):array<struct<x:double,y:double>>>
+-- !query output
+[{"x":1.0,"y":1.0},{"x":2.0,"y":1.0},{"x":3.0,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES (1S), (2S), (3S) AS tab(col)
+-- !query schema
+struct<histogram_numeric(col, 3):array<struct<x:smallint,y:double>>>
+-- !query output
+[{"x":1,"y":1.0},{"x":2,"y":1.0},{"x":3,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES
+  (CAST(1 AS BYTE)), (CAST(2 AS BYTE)), (CAST(3 AS BYTE)) AS tab(col)
+-- !query schema
+struct<histogram_numeric(col, 3):array<struct<x:tinyint,y:double>>>
+-- !query output
+[{"x":1,"y":1.0},{"x":2,"y":1.0},{"x":3,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES
+  (CAST(1 AS TINYINT)), (CAST(2 AS TINYINT)), (CAST(3 AS TINYINT)) AS tab(col)
+-- !query schema
+struct<histogram_numeric(col, 3):array<struct<x:tinyint,y:double>>>
+-- !query output
+[{"x":1,"y":1.0},{"x":2,"y":1.0},{"x":3,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES
+  (CAST(1 AS SMALLINT)), (CAST(2 AS SMALLINT)), (CAST(3 AS SMALLINT)) AS tab(col)
+-- !query schema
+struct<histogram_numeric(col, 3):array<struct<x:smallint,y:double>>>
+-- !query output
+[{"x":1,"y":1.0},{"x":2,"y":1.0},{"x":3,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES
+  (CAST(1 AS BIGINT)), (CAST(2 AS BIGINT)), (CAST(3 AS BIGINT)) AS tab(col)
+-- !query schema
+struct<histogram_numeric(col, 3):array<struct<x:bigint,y:double>>>
+-- !query output
+[{"x":1,"y":1.0},{"x":2,"y":1.0},{"x":3,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES (TIMESTAMP '2017-03-01 00:00:00'),
+  (TIMESTAMP '2017-04-01 00:00:00'), (TIMESTAMP '2017-05-01 00:00:00') AS tab(col)
+-- !query schema
+struct<histogram_numeric(col, 3):array<struct<x:timestamp,y:double>>>
+-- !query output
+[{"x":2017-03-01 00:00:00,"y":1.0},{"x":2017-04-01 00:00:00,"y":1.0},{"x":2017-05-01 00:00:00,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES (INTERVAL '100-00' YEAR TO MONTH),
+  (INTERVAL '110-00' YEAR TO MONTH), (INTERVAL '120-00' YEAR TO MONTH) AS tab(col)
+-- !query schema
+struct<histogram_numeric(col, 3):array<struct<x:interval year to month,y:double>>>
+-- !query output
+[{"x":100-0,"y":1.0},{"x":110-0,"y":1.0},{"x":120-0,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES (INTERVAL '12 20:4:0' DAY TO SECOND),
+  (INTERVAL '12 21:4:0' DAY TO SECOND), (INTERVAL '12 22:4:0' DAY TO SECOND) AS tab(col)
+-- !query schema
+struct<histogram_numeric(col, 3):array<struct<x:interval day to second,y:double>>>
+-- !query output
+[{"x":12 20:04:00.000000000,"y":1.0},{"x":12 21:04:00.000000000,"y":1.0},{"x":12 22:04:00.000000000,"y":1.0}]
+
+
+-- !query
+SELECT histogram_numeric(col, 3)
+FROM VALUES (NULL), (NULL), (NULL) AS tab(col)
+-- !query schema
+struct<histogram_numeric(col, 3):array<struct<x:double,y:double>>>
+-- !query output
+NULL
+
+
+-- !query
+SELECT histogram_numeric(col, 3)
+FROM VALUES (CAST(NULL AS DOUBLE)), (CAST(NULL AS DOUBLE)), (CAST(NULL AS DOUBLE)) AS tab(col)
+-- !query schema
+struct<histogram_numeric(col, 3):array<struct<x:double,y:double>>>
+-- !query output
+NULL
+
+
+-- !query
+SELECT histogram_numeric(col, 3)
+FROM VALUES (CAST(NULL AS INT)), (CAST(NULL AS INT)), (CAST(NULL AS INT)) AS tab(col)
+-- !query schema
+struct<histogram_numeric(col, 3):array<struct<x:int,y:double>>>
+-- !query output
+NULL
 
 
 -- !query

From a342214b7b909864ff41c9890bf56a108591c91e Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 14 Mar 2022 12:19:45 +0300
Subject: [PATCH 490/513] [SPARK-38535][SQL] Add the `datetimeUnit` enum and
 use it in `TIMESTAMPADD/DIFF`

### What changes were proposed in this pull request?
In the PR, I propose to add new enum `datetimeUnit` and re-use it in the datetime "function"s `TIMESTAMPADD` (see SPARK-38195) and `TIMESTAMPDIFF` (see SPARK-38284):

SqlBaseParser.g4:
```
datetimeUnit
    : YEAR | QUARTER | MONTH
    | WEEK | DAY | DAYOFYEAR
    | HOUR | MINUTE | SECOND | MILLISECOND | MICROSECOND
    ;
```

### Why are the changes needed?
1. The enum will allow to document the list of supported units in the grammar which should improve user experience with Spark SQL.
2. Switching to unified parse error should improve UX too.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
By running test suites for `TIMESTAMPADD`/`TIMESTAMPDIFF`:
```
$ build/sbt "test:testOnly *SQLKeywordSuite"
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z timestamp.sql"
```

Closes #35826 from MaxGekk/timestampadd-unit-enum.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 docs/sql-ref-ansi-compliance.md               |  5 +++++
 .../spark/sql/catalyst/parser/SqlBaseLexer.g4 |  5 +++++
 .../sql/catalyst/parser/SqlBaseParser.g4      | 20 +++++++++++++++--
 .../sql/catalyst/util/DateTimeUtils.scala     |  4 ++--
 .../sql/errors/QueryExecutionErrors.scala     | 12 ----------
 .../catalyst/util/DateTimeUtilsSuite.scala    | 10 ++++-----
 .../errors/QueryExecutionErrorsSuite.scala    | 22 +------------------
 7 files changed, 36 insertions(+), 42 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index ccb0ab2731829..ccfc60122d31c 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -393,6 +393,7 @@ Below is a list of all the keywords in Spark SQL.
 |DATEADD|non-reserved|non-reserved|non-reserved|
 |DATEDIFF|non-reserved|non-reserved|non-reserved|
 |DAY|non-reserved|non-reserved|non-reserved|
+|DAYOFYEAR|non-reserved|non-reserved|non-reserved|
 |DBPROPERTIES|non-reserved|non-reserved|non-reserved|
 |DEFAULT|non-reserved|non-reserved|non-reserved|
 |DEFINED|non-reserved|non-reserved|non-reserved|
@@ -477,6 +478,8 @@ Below is a list of all the keywords in Spark SQL.
 |MAP|non-reserved|non-reserved|non-reserved|
 |MATCHED|non-reserved|non-reserved|non-reserved|
 |MERGE|non-reserved|non-reserved|non-reserved|
+|MICROSECOND|non-reserved|non-reserved|non-reserved|
+|MILLISECOND|non-reserved|non-reserved|non-reserved|
 |MINUTE|non-reserved|non-reserved|non-reserved|
 |MINUS|non-reserved|strict-non-reserved|non-reserved|
 |MONTH|non-reserved|non-reserved|non-reserved|
@@ -515,6 +518,7 @@ Below is a list of all the keywords in Spark SQL.
 |PRINCIPALS|non-reserved|non-reserved|non-reserved|
 |PROPERTIES|non-reserved|non-reserved|non-reserved|
 |PURGE|non-reserved|non-reserved|non-reserved|
+|QUARTER|non-reserved|non-reserved|non-reserved|
 |QUERY|non-reserved|non-reserved|non-reserved|
 |RANGE|non-reserved|non-reserved|reserved|
 |RECORDREADER|non-reserved|non-reserved|non-reserved|
@@ -605,6 +609,7 @@ Below is a list of all the keywords in Spark SQL.
 |VERSION|non-reserved|non-reserved|non-reserved|
 |VIEW|non-reserved|non-reserved|non-reserved|
 |VIEWS|non-reserved|non-reserved|non-reserved|
+|WEEK|non-reserved|non-reserved|non-reserved|
 |WHEN|reserved|non-reserved|reserved|
 |WHERE|reserved|non-reserved|reserved|
 |WINDOW|non-reserved|non-reserved|reserved|
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
index 1387f143a41b9..e84d4fa45eb99 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
@@ -139,6 +139,7 @@ CURRENT_TIME: 'CURRENT_TIME';
 CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP';
 CURRENT_USER: 'CURRENT_USER';
 DAY: 'DAY';
+DAYOFYEAR: 'DAYOFYEAR';
 DATA: 'DATA';
 DATABASE: 'DATABASE';
 DATABASES: 'DATABASES';
@@ -228,6 +229,8 @@ MACRO: 'MACRO';
 MAP: 'MAP';
 MATCHED: 'MATCHED';
 MERGE: 'MERGE';
+MICROSECOND: 'MICROSECOND';
+MILLISECOND: 'MILLISECOND';
 MINUTE: 'MINUTE';
 MONTH: 'MONTH';
 MSCK: 'MSCK';
@@ -265,6 +268,7 @@ PRIMARY: 'PRIMARY';
 PRINCIPALS: 'PRINCIPALS';
 PROPERTIES: 'PROPERTIES';
 PURGE: 'PURGE';
+QUARTER: 'QUARTER';
 QUERY: 'QUERY';
 RANGE: 'RANGE';
 RECORDREADER: 'RECORDREADER';
@@ -354,6 +358,7 @@ VALUES: 'VALUES';
 VERSION: 'VERSION';
 VIEW: 'VIEW';
 VIEWS: 'VIEWS';
+WEEK: 'WEEK';
 WHEN: 'WHEN';
 WHERE: 'WHERE';
 WINDOW: 'WINDOW';
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 9dcc5db69fd2d..fb3bccacaf94b 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -806,10 +806,16 @@ valueExpression
     | left=valueExpression comparisonOperator right=valueExpression                          #comparison
     ;
 
+datetimeUnit
+    : YEAR | QUARTER | MONTH
+    | WEEK | DAY | DAYOFYEAR
+    | HOUR | MINUTE | SECOND | MILLISECOND | MICROSECOND
+    ;
+
 primaryExpression
     : name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER)                                   #currentLike
-    | name=(TIMESTAMPADD | DATEADD) LEFT_PAREN unit=identifier COMMA unitsAmount=valueExpression COMMA timestamp=valueExpression RIGHT_PAREN             #timestampadd
-    | name=(TIMESTAMPDIFF | DATEDIFF) LEFT_PAREN unit=identifier COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN    #timestampdiff
+    | name=(TIMESTAMPADD | DATEADD) LEFT_PAREN unit=datetimeUnit COMMA unitsAmount=valueExpression COMMA timestamp=valueExpression RIGHT_PAREN             #timestampadd
+    | name=(TIMESTAMPDIFF | DATEDIFF) LEFT_PAREN unit=datetimeUnit COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN    #timestampdiff
     | CASE whenClause+ (ELSE elseExpression=expression)? END                                   #searchedCase
     | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END                  #simpleCase
     | name=(CAST | TRY_CAST) LEFT_PAREN expression AS dataType RIGHT_PAREN                     #cast
@@ -1097,6 +1103,7 @@ ansiNonReserved
     | DATEADD
     | DATEDIFF
     | DAY
+    | DAYOFYEAR
     | DBPROPERTIES
     | DEFAULT
     | DEFINED
@@ -1157,6 +1164,8 @@ ansiNonReserved
     | MAP
     | MATCHED
     | MERGE
+    | MICROSECOND
+    | MILLISECOND
     | MINUTE
     | MONTH
     | MSCK
@@ -1183,6 +1192,7 @@ ansiNonReserved
     | PRINCIPALS
     | PROPERTIES
     | PURGE
+    | QUARTER
     | QUERY
     | RANGE
     | RECORDREADER
@@ -1257,6 +1267,7 @@ ansiNonReserved
     | VERSION
     | VIEW
     | VIEWS
+    | WEEK
     | WINDOW
     | YEAR
     | ZONE
@@ -1348,6 +1359,7 @@ nonReserved
     | DATEADD
     | DATEDIFF
     | DAY
+    | DAYOFYEAR
     | DBPROPERTIES
     | DEFAULT
     | DEFINED
@@ -1425,6 +1437,8 @@ nonReserved
     | MAP
     | MATCHED
     | MERGE
+    | MICROSECOND
+    | MILLISECOND
     | MINUTE
     | MONTH
     | MSCK
@@ -1460,6 +1474,7 @@ nonReserved
     | PRINCIPALS
     | PROPERTIES
     | PURGE
+    | QUARTER
     | QUERY
     | RANGE
     | RECORDREADER
@@ -1544,6 +1559,7 @@ nonReserved
     | VERSION
     | VIEW
     | VIEWS
+    | WEEK
     | WHEN
     | WHERE
     | WINDOW
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 80e31f3d5346e..65da5e9cb4251 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -1221,7 +1221,7 @@ object DateTimeUtils {
       }
     } catch {
       case _: scala.MatchError =>
-        throw QueryExecutionErrors.invalidUnitInTimestampAdd(unit)
+        throw new IllegalStateException(s"Got the unexpected unit '$unit'.")
       case _: ArithmeticException | _: DateTimeException =>
         throw QueryExecutionErrors.timestampAddOverflowError(micros, quantity, unit)
       case e: Throwable =>
@@ -1259,7 +1259,7 @@ object DateTimeUtils {
       val endLocalTs = getLocalDateTime(endTs, zoneId)
       timestampDiffMap(unitInUpperCase)(startLocalTs, endLocalTs)
     } else {
-      throw QueryExecutionErrors.invalidUnitInTimestampDiff(unit)
+      throw new IllegalStateException(s"Got the unexpected unit '$unit'.")
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 9381cecc7d08f..c6a69e4ce5d6b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -1993,18 +1993,6 @@ object QueryExecutionErrors {
     new SQLFeatureNotSupportedException("Drop namespace restrict is not supported")
   }
 
-  def invalidUnitInTimestampAdd(unit: String): Throwable = {
-    new SparkIllegalArgumentException(
-      errorClass = "INVALID_PARAMETER_VALUE",
-      messageParameters = Array("unit", "timestampadd", unit))
-  }
-
-  def invalidUnitInTimestampDiff(unit: String): Throwable = {
-    new SparkIllegalArgumentException(
-      errorClass = "INVALID_PARAMETER_VALUE",
-      messageParameters = Array("unit", "timestampdiff", unit))
-  }
-
   def timestampAddOverflowError(micros: Long, amount: Int, unit: String): ArithmeticException = {
     new SparkArithmeticException(
       errorClass = "DATETIME_OVERFLOW",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 6efeba92db38b..41da5409feb06 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -26,7 +26,7 @@ import java.util.concurrent.TimeUnit
 import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
 
-import org.apache.spark.{SparkFunSuite, SparkIllegalArgumentException}
+import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
@@ -984,10 +984,10 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
         date(1, 1, 1, 0, 0, 0, 1, zid))
     }
 
-    val e = intercept[SparkIllegalArgumentException] {
+    val e = intercept[IllegalStateException] {
       timestampAdd("SECS", 1, date(1969, 1, 1, 0, 0, 0, 1, getZoneId("UTC")), getZoneId("UTC"))
     }
-    assert(e.getMessage.contains("invalid: SECS"))
+    assert(e.getMessage === "Got the unexpected unit 'SECS'.")
   }
 
   test("SPARK-38284: difference between two timestamps in units") {
@@ -1034,13 +1034,13 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
         zid) === -9998)
     }
 
-    val e = intercept[SparkIllegalArgumentException] {
+    val e = intercept[IllegalStateException] {
       timestampDiff(
         "SECS",
         date(1969, 1, 1, 0, 0, 0, 1, getZoneId("UTC")),
         date(2022, 1, 1, 0, 0, 0, 1, getZoneId("UTC")),
         getZoneId("UTC"))
     }
-    assert(e.getMessage.contains("invalid: SECS"))
+    assert(e.getMessage === "Got the unexpected unit 'SECS'.")
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index cdd50a61bcf99..9268be43ba490 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.errors
 
-import org.apache.spark.{SparkArithmeticException, SparkException, SparkIllegalArgumentException, SparkRuntimeException, SparkUnsupportedOperationException, SparkUpgradeException}
+import org.apache.spark.{SparkArithmeticException, SparkException, SparkRuntimeException, SparkUnsupportedOperationException, SparkUpgradeException}
 import org.apache.spark.sql.{DataFrame, QueryTest}
 import org.apache.spark.sql.execution.datasources.orc.OrcTest
 import org.apache.spark.sql.execution.datasources.parquet.ParquetTest
@@ -100,26 +100,6 @@ class QueryExecutionErrorsSuite extends QueryTest
     }
   }
 
-  test("INVALID_PARAMETER_VALUE: invalid unit passed to timestampadd/timestampdiff") {
-    Seq(
-      "timestampadd" ->
-        "select timestampadd(nanosecond, 100, timestamp'2022-02-13 18:00:00')",
-      "timestampdiff" ->
-        """select timestampdiff(
-          |  nanosecond,
-          |  timestamp'2022-02-13 18:00:00',
-          |  timestamp'2022-02-22 12:52:00')""".stripMargin
-    ).foreach { case (funcName, sqlStmt) =>
-      val e = intercept[SparkIllegalArgumentException] {
-        sql(sqlStmt).collect()
-      }
-      assert(e.getErrorClass === "INVALID_PARAMETER_VALUE")
-      assert(e.getSqlState === "22023")
-      assert(e.getMessage ===
-        s"The value of parameter(s) 'unit' in $funcName is invalid: nanosecond")
-    }
-  }
-
   test("UNSUPPORTED_FEATURE: unsupported combinations of AES modes and padding") {
     val key16 = "abcdefghijklmnop"
     val key32 = "abcdefghijklmnop12345678ABCDEFGH"

From 5bb001b2bf01be4ed04c9571cf556f253b3d13ed Mon Sep 17 00:00:00 2001
From: Kun Wan <wankun@apache.org>
Date: Mon, 14 Mar 2022 18:04:16 +0800
Subject: [PATCH 491/513] [SPARK-36967][FOLLOWUP][CORE] Report accurate shuffle
 block size if its skewed

### What changes were proposed in this pull request?

Now we will sort the shuffle blocks twice in map side to find out the skewed blocks.
We can add a flag in `Utils.median(sizes: Array[Long])` to avoid sort the input array.

### Why are the changes needed?

To avoid additional sort.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing UTs

Closes #35619 from wankunde/skip_extra_sort_for_map_status.

Authored-by: Kun Wan <wankun@apache.org>
Signed-off-by: Ruifeng Zheng <ruifengz@foxmail.com>
---
 .../main/scala/org/apache/spark/scheduler/MapStatus.scala    | 2 +-
 core/src/main/scala/org/apache/spark/util/Utils.scala        | 5 +++--
 .../scala/org/apache/spark/scheduler/MapStatusSuite.scala    | 2 +-
 .../spark/sql/execution/adaptive/OptimizeSkewedJoin.scala    | 4 ++--
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
index 1a7a1675fe05f..d10cf55ed0d10 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
@@ -265,7 +265,7 @@ private[spark] object HighlyCompressedMapStatus {
     val threshold =
       if (accurateBlockSkewedFactor > 0) {
         val sortedSizes = uncompressedSizes.sorted
-        val medianSize: Long = Utils.median(sortedSizes)
+        val medianSize: Long = Utils.median(sortedSizes, true)
         val maxAccurateSkewedBlockNumber =
           Math.min(
             Option(SparkEnv.get)
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 17bec9f666aef..c8c7ea627b864 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -3224,11 +3224,12 @@ private[spark] object Utils extends Logging {
    * Return the median number of a long array
    *
    * @param sizes
+   * @param alreadySorted
    * @return
    */
-  def median(sizes: Array[Long]): Long = {
+  def median(sizes: Array[Long], alreadySorted: Boolean): Long = {
     val len = sizes.length
-    val sortedSize = sizes.sorted
+    val sortedSize = if (alreadySorted) sizes else sizes.sorted
     len match {
       case _ if (len % 2 == 0) =>
         math.max((sortedSize(len / 2) + sortedSize(len / 2 - 1)) / 2, 1)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
index 47723c5d8c689..fe76b1bc322cd 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
@@ -262,7 +262,7 @@ class MapStatusSuite extends SparkFunSuite {
       smallBlockSizes ++: untrackedSkewedBlocksSizes ++: trackedSkewedBlocksSizes
     val allBlocks = emptyBlocks ++: nonEmptyBlocks
 
-    val skewThreshold = Utils.median(allBlocks.sorted) * accurateBlockSkewedFactor
+    val skewThreshold = Utils.median(allBlocks, false) * accurateBlockSkewedFactor
     assert(nonEmptyBlocks.filter(_ > skewThreshold).size ==
       untrackedSkewedBlocksLength + trackedSkewedBlocksLength,
       "number of skewed block sizes")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala
index c3e3eb7b2b21e..d4a173bb9cceb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala
@@ -121,8 +121,8 @@ case class OptimizeSkewedJoin(ensureRequirements: EnsureRequirements)
     assert(leftSizes.length == rightSizes.length)
     val numPartitions = leftSizes.length
     // We use the median size of the original shuffle partitions to detect skewed partitions.
-    val leftMedSize = Utils.median(leftSizes)
-    val rightMedSize = Utils.median(rightSizes)
+    val leftMedSize = Utils.median(leftSizes, false)
+    val rightMedSize = Utils.median(rightSizes, false)
     logDebug(
       s"""
          |Optimizing skewed join.

From 0005b413c3534bfbb53cf6ffb9f6278231ced8a9 Mon Sep 17 00:00:00 2001
From: Xinrong Meng <xinrong.meng@databricks.com>
Date: Mon, 14 Mar 2022 19:19:03 +0900
Subject: [PATCH 492/513] [SPARK-38400][PYTHON] Enable Series.rename to change
 index labels

### What changes were proposed in this pull request?
Enable Series.rename to change index labels, with function `index` input.

### Why are the changes needed?
To reach parity with pandas.

### Does this PR introduce _any_ user-facing change?
Yes. Using function `index` input to change index labels of Series is supported as below:
```py
>>> s = ps.Series([1, 2, 3])
>>> s.rename(lambda x: x ** 2)
0    1
1    2
4    3
dtype: int64
```

### How was this patch tested?
Unit test.

Closes #35717 from xinrong-databricks/series.rename.

Authored-by: Xinrong Meng <xinrong.meng@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/pandas/series.py            | 33 +++++++++++++++++-----
 python/pyspark/pandas/tests/test_series.py | 20 ++++++++++---
 2 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index 853e0e2749f5d..e6f68678317eb 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -1111,16 +1111,18 @@ def name(self) -> Name:
     def name(self, name: Name) -> None:
         self.rename(name, inplace=True)
 
-    # TODO: Functionality and documentation should be matched. Currently, changing index labels
-    # taking dictionary and function to change index are not supported.
-    def rename(self, index: Optional[Name] = None, **kwargs: Any) -> "Series":
+    # TODO: Currently, changing index labels taking dictionary/Series is not supported.
+    def rename(
+        self, index: Optional[Union[Name, Callable[[Any], Any]]] = None, **kwargs: Any
+    ) -> "Series":
         """
-        Alter Series name.
+        Alter Series index labels or name.
 
         Parameters
         ----------
-        index : scalar
-            Scalar will alter the ``Series.name`` attribute.
+        index : scalar or function, optional
+            Functions are transformations to apply to the index.
+            Scalar will alter the Series.name attribute.
 
         inplace : bool, default False
             Whether to return a new Series. If True then value of copy is
@@ -1129,7 +1131,7 @@ def rename(self, index: Optional[Name] = None, **kwargs: Any) -> "Series":
         Returns
         -------
         Series
-            Series with name altered.
+            Series with index labels or name altered.
 
         Examples
         --------
@@ -1146,9 +1148,26 @@ def rename(self, index: Optional[Name] = None, **kwargs: Any) -> "Series":
         1    2
         2    3
         Name: my_name, dtype: int64
+
+        >>> s.rename(lambda x: x ** 2)  # function, changes labels
+        0    1
+        1    2
+        4    3
+        dtype: int64
         """
         if index is None:
             pass
+        if callable(index):
+            if kwargs.get("inplace", False):
+                raise ValueError("inplace True is not supported yet for a function 'index'")
+            frame = self.to_frame()
+            new_index_name = verify_temp_column_name(frame, "__index_name__")
+            frame[new_index_name] = self.index.map(index)
+            frame.set_index(new_index_name, inplace=True)
+            frame.index.name = self.index.name
+            return first_series(frame).rename(self.name)
+        elif isinstance(index, (pd.Series, dict)):
+            raise ValueError("'index' of %s type is not supported yet" % type(index).__name__)
         elif not is_hashable(index):
             raise TypeError("Series.name must be a hashable type")
         elif not isinstance(index, tuple):
diff --git a/python/pyspark/pandas/tests/test_series.py b/python/pyspark/pandas/tests/test_series.py
index dbfa5477e3d72..eeadb060c6a3e 100644
--- a/python/pyspark/pandas/tests/test_series.py
+++ b/python/pyspark/pandas/tests/test_series.py
@@ -193,14 +193,26 @@ def test_rename_method(self):
         with self.assertRaisesRegex(TypeError, expected_error_message):
             psser.rename(["0", "1"])
 
+        # Function index
+        self.assert_eq(psser.rename(lambda x: x ** 2), pser.rename(lambda x: x ** 2))
+        self.assert_eq((psser + 1).rename(lambda x: x ** 2), (pser + 1).rename(lambda x: x ** 2))
+
+        expected_error_message = "inplace True is not supported yet for a function 'index'"
+        with self.assertRaisesRegex(ValueError, expected_error_message):
+            psser.rename(lambda x: x ** 2, inplace=True)
+
+        unsupported_index_inputs = (pd.Series([2, 3, 4, 5, 6, 7, 8]), {0: "zero", 1: "one"})
+        for index in unsupported_index_inputs:
+            expected_error_message = (
+                "'index' of %s type is not supported yet" % type(index).__name__
+            )
+            with self.assertRaisesRegex(ValueError, expected_error_message):
+                psser.rename(index)
+
         # Series index
         # pser = pd.Series(['a', 'b', 'c', 'd', 'e', 'f', 'g'], name='x')
         # psser = ps.from_pandas(s)
 
-        # TODO: index
-        # res = psser.rename(lambda x: x ** 2)
-        # self.assert_eq(res, pser.rename(lambda x: x ** 2))
-
         # res = psser.rename(pser)
         # self.assert_eq(res, pser.rename(pser))
 

From c16a66a64bf9599b4fefcdc2beaad7c4f2a4517e Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 14 Mar 2022 21:58:24 +0800
Subject: [PATCH 493/513] [SPARK-36194][SQL] Add a logical plan visitor to
 propagate the distinct attributes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

1. This pr add a new logical plan visitor named `DistinctKeyVisitor` to find out all the distinct attributes in current logical plan. For example:
   ```scala
   spark.sql("CREATE TABLE t(a int, b int, c int) using parquet")
   spark.sql("SELECT a, b, a % 10, max(c), sum(b) FROM t GROUP BY a, b").queryExecution.analyzed.distinctKeys
   ```
   The output is: {a#1, b#2}.

2. Enhance `RemoveRedundantAggregates` to remove the aggregation if it is groupOnly and the child can guarantee distinct. For example:
   ```sql
   set spark.sql.autoBroadcastJoinThreshold=-1; -- avoid PushDownLeftSemiAntiJoin
   create table t1 using parquet as select id a, id as b from range(10);
   create table t2 using parquet as select id as a, id as b from range(8);
   select t11.a, t11.b from (select distinct a, b from t1) t11 left semi join t2 on (t11.a = t2.a) group by t11.a, t11.b;
   ```

   Before this PR:
   ```
   == Optimized Logical Plan ==
   Aggregate [a#6L, b#7L], [a#6L, b#7L], Statistics(sizeInBytes=1492.0 B)
   +- Join LeftSemi, (a#6L = a#8L), Statistics(sizeInBytes=1492.0 B)
      :- Aggregate [a#6L, b#7L], [a#6L, b#7L], Statistics(sizeInBytes=1492.0 B)
      :  +- Filter isnotnull(a#6L), Statistics(sizeInBytes=1492.0 B)
      :     +- Relation default.t1[a#6L,b#7L] parquet, Statistics(sizeInBytes=1492.0 B)
      +- Project [a#8L], Statistics(sizeInBytes=984.0 B)
         +- Filter isnotnull(a#8L), Statistics(sizeInBytes=1476.0 B)
            +- Relation default.t2[a#8L,b#9L] parquet, Statistics(sizeInBytes=1476.0 B)
   ```

   After this PR:
   ```
   == Optimized Logical Plan ==
   Join LeftSemi, (a#6L = a#8L), Statistics(sizeInBytes=1492.0 B)
   :- Aggregate [a#6L, b#7L], [a#6L, b#7L], Statistics(sizeInBytes=1492.0 B)
   :  +- Filter isnotnull(a#6L), Statistics(sizeInBytes=1492.0 B)
   :     +- Relation default.t1[a#6L,b#7L] parquet, Statistics(sizeInBytes=1492.0 B)
   +- Project [a#8L], Statistics(sizeInBytes=984.0 B)
      +- Filter isnotnull(a#8L), Statistics(sizeInBytes=1476.0 B)
         +- Relation default.t2[a#8L,b#9L] parquet, Statistics(sizeInBytes=1476.0 B)
   ```

### Why are the changes needed?

Improve query performance.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test and TPC-DS benchmark test.

SQL | Before this PR(Seconds) | After this PR(Seconds)
-- | -- | --
q14a | 206  | 193
q38 | 59 | 41
q87 | 127 | 113

Closes #35779 from wangyum/SPARK-36194.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Yuming Wang <yumwang@ebay.com>
---
 .../optimizer/RemoveRedundantAggregates.scala |    8 +-
 .../plans/logical/DistinctKeyVisitor.scala    |  140 ++
 .../catalyst/plans/logical/LogicalPlan.scala  |    1 +
 .../logical/LogicalPlanDistinctKeys.scala     |   34 +
 .../apache/spark/sql/internal/SQLConf.scala   |    9 +
 .../RemoveRedundantAggregatesSuite.scala      |  141 +-
 .../logical/DistinctKeyVisitorSuite.scala     |  182 +++
 .../q14a.sf100/explain.txt                    |  941 +++++++------
 .../q14a.sf100/simplified.txt                 |  231 ++--
 .../approved-plans-v1_4/q14a/explain.txt      |  474 +++----
 .../approved-plans-v1_4/q14a/simplified.txt   |  126 +-
 .../q14b.sf100/explain.txt                    |  807 ++++++-----
 .../q14b.sf100/simplified.txt                 |  217 ++-
 .../approved-plans-v1_4/q14b/explain.txt      |  442 +++---
 .../approved-plans-v1_4/q14b/simplified.txt   |  126 +-
 .../approved-plans-v1_4/q38.sf100/explain.txt |  333 ++---
 .../q38.sf100/simplified.txt                  |  237 ++--
 .../approved-plans-v1_4/q38/explain.txt       |  185 ++-
 .../approved-plans-v1_4/q38/simplified.txt    |  123 +-
 .../approved-plans-v1_4/q87.sf100/explain.txt |  333 ++---
 .../q87.sf100/simplified.txt                  |  237 ++--
 .../approved-plans-v1_4/q87/explain.txt       |  185 ++-
 .../approved-plans-v1_4/q87/simplified.txt    |  123 +-
 .../approved-plans-v2_7/q14.sf100/explain.txt |  807 ++++++-----
 .../q14.sf100/simplified.txt                  |  217 ++-
 .../approved-plans-v2_7/q14/explain.txt       |  442 +++---
 .../approved-plans-v2_7/q14/simplified.txt    |  126 +-
 .../q14a.sf100/explain.txt                    | 1225 ++++++++---------
 .../q14a.sf100/simplified.txt                 |  261 ++--
 .../approved-plans-v2_7/q14a/explain.txt      |  574 ++++----
 .../approved-plans-v2_7/q14a/simplified.txt   |  126 +-
 31 files changed, 4770 insertions(+), 4643 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitor.scala
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanDistinctKeys.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitorSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregates.scala
index beec90da2e56f..2104bce3711f0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregates.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.sql.catalyst.analysis.PullOutNondeterministic
-import org.apache.spark.sql.catalyst.expressions.{AliasHelper, AttributeSet}
+import org.apache.spark.sql.catalyst.expressions.{AliasHelper, AttributeSet, ExpressionSet}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.AGGREGATE
 
@@ -47,6 +47,10 @@ object RemoveRedundantAggregates extends Rule[LogicalPlan] with AliasHelper {
       } else {
         newAggregate
       }
+
+    case agg @ Aggregate(groupingExps, _, child)
+        if agg.groupOnly && child.distinctKeys.exists(_.subsetOf(ExpressionSet(groupingExps))) =>
+      Project(agg.aggregateExpressions, child)
   }
 
   private def isLowerRedundant(upper: Aggregate, lower: Aggregate): Boolean = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitor.scala
new file mode 100644
index 0000000000000..bb2bc4e3d2f93
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitor.scala
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, ExpressionSet, NamedExpression}
+import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
+import org.apache.spark.sql.catalyst.plans.{Inner, LeftOuter, LeftSemiOrAnti, RightOuter}
+
+/**
+ * A visitor pattern for traversing a [[LogicalPlan]] tree and propagate the distinct attributes.
+ */
+object DistinctKeyVisitor extends LogicalPlanVisitor[Set[ExpressionSet]] {
+
+  private def projectDistinctKeys(
+      keys: Set[ExpressionSet], projectList: Seq[NamedExpression]): Set[ExpressionSet] = {
+    val outputSet = ExpressionSet(projectList.map(_.toAttribute))
+    val aliases = projectList.filter(_.isInstanceOf[Alias])
+    if (aliases.isEmpty) {
+      keys.filter(_.subsetOf(outputSet))
+    } else {
+      val aliasedDistinctKeys = keys.map { expressionSet =>
+        expressionSet.map { expression =>
+          expression transform {
+            case expr: Expression =>
+              // TODO: Expand distinctKeys for redundant aliases on the same expression
+              aliases
+                .collectFirst { case a: Alias if a.child.semanticEquals(expr) => a.toAttribute }
+                .getOrElse(expr)
+          }
+        }
+      }
+      aliasedDistinctKeys.collect {
+        case es: ExpressionSet if es.subsetOf(outputSet) => ExpressionSet(es)
+      } ++ keys.filter(_.subsetOf(outputSet))
+    }.filter(_.nonEmpty)
+  }
+
+  override def default(p: LogicalPlan): Set[ExpressionSet] = Set.empty[ExpressionSet]
+
+  override def visitAggregate(p: Aggregate): Set[ExpressionSet] = {
+    val groupingExps = ExpressionSet(p.groupingExpressions) // handle group by a, a
+    projectDistinctKeys(Set(groupingExps), p.aggregateExpressions)
+  }
+
+  override def visitDistinct(p: Distinct): Set[ExpressionSet] = Set(ExpressionSet(p.output))
+
+  override def visitExcept(p: Except): Set[ExpressionSet] =
+    if (!p.isAll) Set(ExpressionSet(p.output)) else default(p)
+
+  override def visitExpand(p: Expand): Set[ExpressionSet] = default(p)
+
+  override def visitFilter(p: Filter): Set[ExpressionSet] = p.child.distinctKeys
+
+  override def visitGenerate(p: Generate): Set[ExpressionSet] = default(p)
+
+  override def visitGlobalLimit(p: GlobalLimit): Set[ExpressionSet] = {
+    p.maxRows match {
+      case Some(value) if value <= 1 => Set(ExpressionSet(p.output))
+      case _ => p.child.distinctKeys
+    }
+  }
+
+  override def visitIntersect(p: Intersect): Set[ExpressionSet] = {
+    if (!p.isAll) Set(ExpressionSet(p.output)) else default(p)
+  }
+
+  override def visitJoin(p: Join): Set[ExpressionSet] = {
+    p match {
+      case Join(_, _, LeftSemiOrAnti(_), _, _) =>
+        p.left.distinctKeys
+      case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, _, _, left, right, _)
+          if left.distinctKeys.nonEmpty || right.distinctKeys.nonEmpty =>
+        val rightJoinKeySet = ExpressionSet(rightKeys)
+        val leftJoinKeySet = ExpressionSet(leftKeys)
+        joinType match {
+          case Inner if left.distinctKeys.exists(_.subsetOf(leftJoinKeySet)) &&
+            right.distinctKeys.exists(_.subsetOf(rightJoinKeySet)) =>
+            left.distinctKeys ++ right.distinctKeys
+          case Inner | LeftOuter if right.distinctKeys.exists(_.subsetOf(rightJoinKeySet)) =>
+            p.left.distinctKeys
+          case Inner | RightOuter if left.distinctKeys.exists(_.subsetOf(leftJoinKeySet)) =>
+            p.right.distinctKeys
+          case _ =>
+            default(p)
+        }
+      case _ => default(p)
+    }
+  }
+
+  override def visitLocalLimit(p: LocalLimit): Set[ExpressionSet] = p.child.distinctKeys
+
+  override def visitPivot(p: Pivot): Set[ExpressionSet] = default(p)
+
+  override def visitProject(p: Project): Set[ExpressionSet] = {
+    if (p.child.distinctKeys.nonEmpty) {
+      projectDistinctKeys(p.child.distinctKeys, p.projectList)
+    } else {
+      default(p)
+    }
+  }
+
+  override def visitRepartition(p: Repartition): Set[ExpressionSet] = p.child.distinctKeys
+
+  override def visitRepartitionByExpr(p: RepartitionByExpression): Set[ExpressionSet] =
+    p.child.distinctKeys
+
+  override def visitSample(p: Sample): Set[ExpressionSet] = {
+    if (!p.withReplacement) p.child.distinctKeys else default(p)
+  }
+
+  override def visitScriptTransform(p: ScriptTransformation): Set[ExpressionSet] = default(p)
+
+  override def visitUnion(p: Union): Set[ExpressionSet] = default(p)
+
+  override def visitWindow(p: Window): Set[ExpressionSet] = p.child.distinctKeys
+
+  override def visitTail(p: Tail): Set[ExpressionSet] = p.child.distinctKeys
+
+  override def visitSort(p: Sort): Set[ExpressionSet] = p.child.distinctKeys
+
+  override def visitRebalancePartitions(p: RebalancePartitions): Set[ExpressionSet] =
+    p.child.distinctKeys
+
+  override def visitWithCTE(p: WithCTE): Set[ExpressionSet] = p.plan.distinctKeys
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 36a3b110fda29..7640d9234c71f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -31,6 +31,7 @@ abstract class LogicalPlan
   extends QueryPlan[LogicalPlan]
   with AnalysisHelper
   with LogicalPlanStats
+  with LogicalPlanDistinctKeys
   with QueryPlanConstraints
   with Logging {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanDistinctKeys.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanDistinctKeys.scala
new file mode 100644
index 0000000000000..1843c2da478ef
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanDistinctKeys.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.ExpressionSet
+import org.apache.spark.sql.internal.SQLConf.PROPAGATE_DISTINCT_KEYS_ENABLED
+
+/**
+ * A trait to add distinct attributes to [[LogicalPlan]]. For example:
+ * {{{
+ *   SELECT a, b, SUM(c) FROM Tab1 GROUP BY a, b
+ *   // returns a, b
+ * }}}
+ */
+trait LogicalPlanDistinctKeys { self: LogicalPlan =>
+  lazy val distinctKeys: Set[ExpressionSet] = {
+    if (conf.getConf(PROPAGATE_DISTINCT_KEYS_ENABLED)) DistinctKeyVisitor.visit(self) else Set.empty
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 42e44cfe575fc..33919f3acaa0e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -744,6 +744,15 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val PROPAGATE_DISTINCT_KEYS_ENABLED =
+    buildConf("spark.sql.optimizer.propagateDistinctKeys.enabled")
+      .internal()
+      .doc("When true, the query optimizer will propagate a set of distinct attributes from the " +
+        "current node and use it to optimize query.")
+      .version("3.3.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val ESCAPED_STRING_LITERALS = buildConf("spark.sql.parser.escapedStringLiterals")
     .internal()
     .doc("When true, string literals (including regex patterns) remain escaped in our SQL " +
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregatesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregatesSuite.scala
index d11ff16229b14..963332103b6cb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregatesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregatesSuite.scala
@@ -21,8 +21,9 @@ import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.{Expression, PythonUDF}
-import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
+import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi, PlanTest}
+import org.apache.spark.sql.catalyst.plans.logical.{Distinct, LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.types.IntegerType
 
@@ -33,6 +34,10 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
       RemoveRedundantAggregates) :: Nil
   }
 
+  private val relation = LocalRelation('a.int, 'b.int)
+  private val x = relation.subquery('x)
+  private val y = relation.subquery('y)
+
   private def aggregates(e: Expression): Seq[Expression] = {
     Seq(
       count(e),
@@ -42,7 +47,6 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
   }
 
   test("Remove redundant aggregate") {
-    val relation = LocalRelation('a.int, 'b.int)
     for (agg <- aggregates('b)) {
       val query = relation
         .groupBy('a)('a, agg)
@@ -57,7 +61,6 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
   }
 
   test("Remove 2 redundant aggregates") {
-    val relation = LocalRelation('a.int, 'b.int)
     for (agg <- aggregates('b)) {
       val query = relation
         .groupBy('a)('a, agg)
@@ -73,7 +76,6 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
   }
 
   test("Remove redundant aggregate with different grouping") {
-    val relation = LocalRelation('a.int, 'b.int)
     val query = relation
       .groupBy('a, 'b)('a)
       .groupBy('a)('a)
@@ -86,7 +88,6 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
   }
 
   test("Remove redundant aggregate with aliases") {
-    val relation = LocalRelation('a.int, 'b.int)
     for (agg <- aggregates('b)) {
       val query = relation
         .groupBy('a + 'b)(('a + 'b) as 'c, agg)
@@ -101,7 +102,6 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
   }
 
   test("Remove redundant aggregate with non-deterministic upper") {
-    val relation = LocalRelation('a.int, 'b.int)
     val query = relation
       .groupBy('a)('a)
       .groupBy('a)('a, rand(0) as 'c)
@@ -114,7 +114,6 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
   }
 
   test("Remove redundant aggregate with non-deterministic lower") {
-    val relation = LocalRelation('a.int, 'b.int)
     val query = relation
       .groupBy('a, 'c)('a, rand(0) as 'c)
       .groupBy('a, 'c)('a, 'c)
@@ -127,7 +126,6 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
   }
 
   test("Keep non-redundant aggregate - upper has duplicate sensitive agg expression") {
-    val relation = LocalRelation('a.int, 'b.int)
     for (agg <- aggregates('b)) {
       val query = relation
         .groupBy('a, 'b)('a, 'b)
@@ -140,7 +138,6 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
   }
 
   test("Remove redundant aggregate - upper has duplicate agnostic agg expression") {
-    val relation = LocalRelation('a.int, 'b.int)
     val query = relation
       .groupBy('a, 'b)('a, 'b)
       // The max and countDistinct does not change if there are duplicate values
@@ -153,8 +150,14 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
     comparePlans(optimized, expected)
   }
 
+  test("Remove redundant aggregate - upper has contains foldable expressions") {
+    val originalQuery = x.groupBy('a, 'b)('a, 'b).groupBy('a)('a, TrueLiteral).analyze
+    val correctAnswer = x.groupBy('a)('a, TrueLiteral).analyze
+    val optimized = Optimize.execute(originalQuery)
+    comparePlans(optimized, correctAnswer)
+  }
+
   test("Keep non-redundant aggregate - upper references agg expression") {
-    val relation = LocalRelation('a.int, 'b.int)
     for (agg <- aggregates('b)) {
       val query = relation
         .groupBy('a)('a, agg as 'c)
@@ -165,13 +168,123 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
     }
   }
 
-  test("Keep non-redundant aggregate - upper references non-deterministic non-grouping") {
-    val relation = LocalRelation('a.int, 'b.int)
+  test("Remove non-redundant aggregate - upper references non-deterministic non-grouping") {
     val query = relation
       .groupBy('a)('a, ('a + rand(0)) as 'c)
       .groupBy('a, 'c)('a, 'c)
       .analyze
+    val expected = relation
+      .groupBy('a)('a, ('a + rand(0)) as 'c)
+      .select('a, 'c)
+      .analyze
     val optimized = Optimize.execute(query)
-    comparePlans(optimized, query)
+    comparePlans(optimized, expected)
+  }
+
+  test("SPARK-36194: Remove aggregation from left semi/anti join if aggregation the same") {
+    Seq(LeftSemi, LeftAnti).foreach { joinType =>
+      val originalQuery = x.groupBy('a, 'b)('a, 'b)
+        .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
+        .groupBy("x.a".attr, "x.b".attr)("x.a".attr, "x.b".attr)
+      val correctAnswer = x.groupBy('a, 'b)('a, 'b)
+        .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
+        .select("x.a".attr, "x.b".attr)
+
+      val optimized = Optimize.execute(originalQuery.analyze)
+      comparePlans(optimized, correctAnswer.analyze)
+    }
+  }
+
+  test("SPARK-36194: Remove aggregation from left semi/anti join with alias") {
+    Seq(LeftSemi, LeftAnti).foreach { joinType =>
+      val originalQuery = x.groupBy('a, 'b)('a, 'b.as("d"))
+        .join(y, joinType, Some("x.a".attr === "y.a".attr && "d".attr === "y.b".attr))
+        .groupBy("x.a".attr, "d".attr)("x.a".attr, "d".attr)
+      val correctAnswer = x.groupBy('a, 'b)('a, 'b.as("d"))
+        .join(y, joinType, Some("x.a".attr === "y.a".attr && "d".attr === "y.b".attr))
+        .select("x.a".attr, "d".attr)
+
+      val optimized = Optimize.execute(originalQuery.analyze)
+      comparePlans(optimized, correctAnswer.analyze)
+    }
+  }
+
+  test("SPARK-36194: Remove aggregation from left semi/anti join if it is the sub aggregateExprs") {
+    Seq(LeftSemi, LeftAnti).foreach { joinType =>
+      val originalQuery = x.groupBy('a, 'b)('a, 'b)
+        .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
+        .groupBy("x.a".attr, "x.b".attr)("x.a".attr)
+      val correctAnswer = x.groupBy('a, 'b)('a, 'b)
+        .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
+        .select("x.a".attr)
+
+      val optimized = Optimize.execute(originalQuery.analyze)
+      comparePlans(optimized, correctAnswer.analyze)
+    }
+  }
+
+  test("SPARK-36194: Transform down to remove more aggregates") {
+    Seq(LeftSemi, LeftAnti).foreach { joinType =>
+      val originalQuery = x.groupBy('a, 'b)('a, 'b)
+        .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
+        .groupBy("x.a".attr, "x.b".attr)("x.a".attr, "x.b".attr)
+        .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
+        .groupBy("x.a".attr, "x.b".attr)("x.a".attr)
+      val correctAnswer = x.groupBy('a, 'b)('a, 'b)
+        .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
+        .select("x.a".attr, "x.b".attr)
+        .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
+        .select("x.a".attr)
+
+      val optimized = Optimize.execute(originalQuery.analyze)
+      comparePlans(optimized, correctAnswer.analyze)
+    }
+  }
+
+  test("SPARK-36194: Child distinct keys is the subset of required keys") {
+    val originalQuery = relation
+      .groupBy('a)('a, count('b).as("cnt"))
+      .groupBy('a, 'cnt)('a, 'cnt)
+      .analyze
+    val correctAnswer = relation
+      .groupBy('a)('a, count('b).as("cnt"))
+      .select('a, 'cnt)
+      .analyze
+    val optimized = Optimize.execute(originalQuery)
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("SPARK-36194: Child distinct keys are subsets and aggregateExpressions are foldable") {
+    val originalQuery = x.groupBy('a, 'b)('a, 'b)
+      .join(y, LeftSemi, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
+      .groupBy("x.a".attr, "x.b".attr)(TrueLiteral)
+      .analyze
+    val correctAnswer = x.groupBy('a, 'b)('a, 'b)
+      .join(y, LeftSemi, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
+      .select(TrueLiteral)
+      .analyze
+    val optimized = Optimize.execute(originalQuery)
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("SPARK-36194: Negative case: child distinct keys is not the subset of required keys") {
+    Seq(LeftSemi, LeftAnti).foreach { joinType =>
+      val originalQuery1 = x.groupBy('a, 'b)('a, 'b)
+        .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
+        .groupBy("x.a".attr)("x.a".attr)
+        .analyze
+      comparePlans(Optimize.execute(originalQuery1), originalQuery1)
+
+      val originalQuery2 = x.groupBy('a, 'b)('a, 'b)
+        .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
+        .groupBy("x.a".attr)(count("x.b".attr))
+        .analyze
+      comparePlans(Optimize.execute(originalQuery2), originalQuery2)
+    }
+  }
+
+  test("SPARK-36194: Negative case: child distinct keys is empty") {
+    val originalQuery = Distinct(x.groupBy('a, 'b)('a, TrueLiteral)).analyze
+    comparePlans(Optimize.execute(originalQuery), originalQuery)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitorSuite.scala
new file mode 100644
index 0000000000000..131155f8c04d1
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitorSuite.scala
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import scala.collection.mutable
+import scala.reflect.runtime.universe.TypeTag
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, ExpressionSet, UnspecifiedFrame}
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.types.IntegerType
+
+class DistinctKeyVisitorSuite extends PlanTest {
+
+  private val a = AttributeReference("a", IntegerType)()
+  private val b = AttributeReference("b", IntegerType)()
+  private val c = AttributeReference("c", IntegerType)()
+  private val d = a.as("aliased_a")
+  private val e = b.as("aliased_b")
+  private val f = Alias(a + 1, (a + 1).toString)()
+  private val x = AttributeReference("x", IntegerType)()
+  private val y = AttributeReference("y", IntegerType)()
+  private val z = AttributeReference("z", IntegerType)()
+
+
+  private val t1 = LocalRelation(a, b, c).as("t1")
+  private val t2 = LocalRelation(x, y, z).as("t2")
+
+  private def checkDistinctAttributes(plan: LogicalPlan, distinctKeys: Set[ExpressionSet]) = {
+    assert(plan.analyze.distinctKeys === distinctKeys)
+  }
+
+  implicit private def productEncoder[T <: Product : TypeTag] = ExpressionEncoder[T]()
+
+  test("Aggregate's distinct attributes") {
+    checkDistinctAttributes(t1.groupBy('a, 'b)('a, 'b, 1), Set(ExpressionSet(Seq(a, b))))
+    checkDistinctAttributes(t1.groupBy('a)('a), Set(ExpressionSet(Seq(a))))
+    checkDistinctAttributes(t1.groupBy('a, 'b)('a, 'b), Set(ExpressionSet(Seq(a, b))))
+    checkDistinctAttributes(t1.groupBy('a, 'b, 1)('a, 'b), Set(ExpressionSet(Seq(a, b))))
+    checkDistinctAttributes(t1.groupBy('a, 'b)('a, 'b, 1), Set(ExpressionSet(Seq(a, b))))
+    checkDistinctAttributes(t1.groupBy('a, 'b, 1)('a, 'b, 1), Set(ExpressionSet(Seq(a, b))))
+    checkDistinctAttributes(t1.groupBy('a, 'b)('a, 'a), Set.empty)
+    checkDistinctAttributes(t1.groupBy('a, 'b)('a), Set.empty)
+    checkDistinctAttributes(t1.groupBy('a)('a, max('b)), Set(ExpressionSet(Seq(a))))
+    checkDistinctAttributes(t1.groupBy('a, 'b)('a, 'b, d, e),
+      Set(ExpressionSet(Seq(a, b)), ExpressionSet(Seq(d.toAttribute, e.toAttribute))))
+    checkDistinctAttributes(t1.groupBy()(sum('c)), Set.empty)
+    checkDistinctAttributes(t1.groupBy('a)('a, 'a % 10, d, sum('b)),
+      Set(ExpressionSet(Seq(a)), ExpressionSet(Seq(d.toAttribute))))
+    checkDistinctAttributes(t1.groupBy(f.child, 'b)(f, 'b, sum('c)),
+      Set(ExpressionSet(Seq(f.toAttribute, b))))
+  }
+
+  test("Distinct's distinct attributes") {
+    checkDistinctAttributes(Distinct(t1), Set(ExpressionSet(Seq(a, b, c))))
+    checkDistinctAttributes(Distinct(t1.select('a, 'c)), Set(ExpressionSet(Seq(a, c))))
+  }
+
+  test("Except's distinct attributes") {
+    checkDistinctAttributes(Except(t1, t2, false), Set(ExpressionSet(Seq(a, b, c))))
+    checkDistinctAttributes(Except(t1, t2, true), Set.empty)
+  }
+
+  test("Filter's distinct attributes") {
+    checkDistinctAttributes(Filter('a > 1, t1), Set.empty)
+    checkDistinctAttributes(Filter('a > 1, Distinct(t1)), Set(ExpressionSet(Seq(a, b, c))))
+  }
+
+  test("Limit's distinct attributes") {
+    checkDistinctAttributes(Distinct(t1).limit(10), Set(ExpressionSet(Seq(a, b, c))))
+    checkDistinctAttributes(LocalLimit(10, Distinct(t1)), Set(ExpressionSet(Seq(a, b, c))))
+    checkDistinctAttributes(t1.limit(1), Set(ExpressionSet(Seq(a, b, c))))
+  }
+
+  test("Intersect's distinct attributes") {
+    checkDistinctAttributes(Intersect(t1, t2, false), Set(ExpressionSet(Seq(a, b, c))))
+    checkDistinctAttributes(Intersect(t1, t2, true), Set.empty)
+  }
+
+  test("Join's distinct attributes") {
+    Seq(LeftSemi, LeftAnti).foreach { joinType =>
+      checkDistinctAttributes(
+        Distinct(t1).join(t2, joinType, Some('a === 'x)), Set(ExpressionSet(Seq(a, b, c))))
+    }
+
+    checkDistinctAttributes(
+      Distinct(t1).join(Distinct(t2), Inner, Some('a === 'x && 'b === 'y && 'c === 'z)),
+      Set(ExpressionSet(Seq(a, b, c)), ExpressionSet(Seq(x, y, z))))
+
+    checkDistinctAttributes(
+      Distinct(t1).join(Distinct(t2), LeftOuter, Some('a === 'x && 'b === 'y && 'c === 'z)),
+      Set(ExpressionSet(Seq(a, b, c))))
+
+    checkDistinctAttributes(
+      Distinct(t1).join(Distinct(t2), RightOuter, Some('a === 'x && 'b === 'y && 'c === 'z)),
+      Set(ExpressionSet(Seq(x, y, z))))
+
+    Seq(Inner, Cross, LeftOuter, RightOuter).foreach { joinType =>
+      checkDistinctAttributes(t1.join(t2, joinType, Some('a === 'x)),
+        Set.empty)
+      checkDistinctAttributes(
+        Distinct(t1).join(Distinct(t2), joinType, Some('a === 'x && 'b === 'y)),
+        Set.empty)
+      checkDistinctAttributes(
+        Distinct(t1).join(Distinct(t2), joinType,
+          Some('a === 'x && 'b === 'y && 'c % 5 === 'z % 5)),
+        Set.empty)
+    }
+
+    checkDistinctAttributes(
+      Distinct(t1).join(Distinct(t2), Cross, Some('a === 'x && 'b === 'y && 'c === 'z)),
+      Set.empty)
+  }
+
+  test("Project's distinct attributes") {
+    checkDistinctAttributes(t1.select('a, 'b), Set.empty)
+    checkDistinctAttributes(Distinct(t1).select('a), Set.empty)
+    checkDistinctAttributes(Distinct(t1).select('a, 'b, d, e), Set.empty)
+    checkDistinctAttributes(Distinct(t1).select('a, 'b, 'c, 1), Set(ExpressionSet(Seq(a, b, c))))
+    checkDistinctAttributes(Distinct(t1).select('a, 'b, c, d),
+      Set(ExpressionSet(Seq(a, b, c)), ExpressionSet(Seq(b, c, d.toAttribute))))
+    checkDistinctAttributes(t1.groupBy('a, 'b)('a, 'b, d).select('a, 'b, e),
+      Set(ExpressionSet(Seq(a, b)), ExpressionSet(Seq(a, e.toAttribute))))
+  }
+
+  test("Repartition's distinct attributes") {
+    checkDistinctAttributes(t1.repartition(8), Set.empty)
+    checkDistinctAttributes(Distinct(t1).repartition(8), Set(ExpressionSet(Seq(a, b, c))))
+    checkDistinctAttributes(RepartitionByExpression(Seq(a), Distinct(t1), None),
+      Set(ExpressionSet(Seq(a, b, c))))
+  }
+
+  test("Sample's distinct attributes") {
+    checkDistinctAttributes(t1.sample(0, 0.2, false, 1), Set.empty)
+    checkDistinctAttributes(Distinct(t1).sample(0, 0.2, false, 1), Set(ExpressionSet(Seq(a, b, c))))
+  }
+
+  test("Window's distinct attributes") {
+    val winExpr = windowExpr(count('b), windowSpec('a :: Nil, 'b.asc :: Nil, UnspecifiedFrame))
+
+    checkDistinctAttributes(
+      Distinct(t1).select('a, 'b, 'c, winExpr.as('window)), Set(ExpressionSet(Seq(a, b, c))))
+    checkDistinctAttributes(
+      Distinct(t1).select('a, 'b, winExpr.as('window)), Set())
+  }
+
+  test("Tail's distinct attributes") {
+    checkDistinctAttributes(Tail(10, Distinct(t1)), Set(ExpressionSet(Seq(a, b, c))))
+  }
+
+  test("Sort's distinct attributes") {
+    checkDistinctAttributes(t1.sortBy('a.asc), Set.empty)
+    checkDistinctAttributes(Distinct(t1).sortBy('a.asc), Set(ExpressionSet(Seq(a, b, c))))
+  }
+
+  test("RebalancePartitions's distinct attributes") {
+    checkDistinctAttributes(RebalancePartitions(Seq(a), Distinct(t1)),
+      Set(ExpressionSet(Seq(a, b, c))))
+  }
+
+  test("WithCTE's distinct attributes") {
+    checkDistinctAttributes(WithCTE(Distinct(t1), mutable.ArrayBuffer.empty[CTERelationDef].toSeq),
+      Set(ExpressionSet(Seq(a, b, c))))
+  }
+}
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt
index e3eac82fee26b..4105a94131dda 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt
@@ -1,130 +1,127 @@
 == Physical Plan ==
-TakeOrderedAndProject (126)
-+- * HashAggregate (125)
-   +- Exchange (124)
-      +- * HashAggregate (123)
-         +- * Expand (122)
-            +- Union (121)
-               :- * Project (82)
-               :  +- * Filter (81)
-               :     +- * HashAggregate (80)
-               :        +- Exchange (79)
-               :           +- * HashAggregate (78)
-               :              +- * Project (77)
-               :                 +- * BroadcastHashJoin Inner BuildRight (76)
-               :                    :- * Project (66)
-               :                    :  +- * BroadcastHashJoin Inner BuildRight (65)
-               :                    :     :- * SortMergeJoin LeftSemi (63)
+TakeOrderedAndProject (123)
++- * HashAggregate (122)
+   +- Exchange (121)
+      +- * HashAggregate (120)
+         +- * Expand (119)
+            +- Union (118)
+               :- * Project (79)
+               :  +- * Filter (78)
+               :     +- * HashAggregate (77)
+               :        +- Exchange (76)
+               :           +- * HashAggregate (75)
+               :              +- * Project (74)
+               :                 +- * BroadcastHashJoin Inner BuildRight (73)
+               :                    :- * Project (63)
+               :                    :  +- * BroadcastHashJoin Inner BuildRight (62)
+               :                    :     :- * SortMergeJoin LeftSemi (60)
                :                    :     :  :- * Sort (5)
                :                    :     :  :  +- Exchange (4)
                :                    :     :  :     +- * Filter (3)
                :                    :     :  :        +- * ColumnarToRow (2)
                :                    :     :  :           +- Scan parquet default.store_sales (1)
-               :                    :     :  +- * Sort (62)
-               :                    :     :     +- Exchange (61)
-               :                    :     :        +- * Project (60)
-               :                    :     :           +- * BroadcastHashJoin Inner BuildRight (59)
+               :                    :     :  +- * Sort (59)
+               :                    :     :     +- Exchange (58)
+               :                    :     :        +- * Project (57)
+               :                    :     :           +- * BroadcastHashJoin Inner BuildRight (56)
                :                    :     :              :- * Filter (8)
                :                    :     :              :  +- * ColumnarToRow (7)
                :                    :     :              :     +- Scan parquet default.item (6)
-               :                    :     :              +- BroadcastExchange (58)
-               :                    :     :                 +- * HashAggregate (57)
-               :                    :     :                    +- Exchange (56)
-               :                    :     :                       +- * HashAggregate (55)
-               :                    :     :                          +- * SortMergeJoin LeftSemi (54)
-               :                    :     :                             :- * Sort (42)
-               :                    :     :                             :  +- Exchange (41)
-               :                    :     :                             :     +- * HashAggregate (40)
-               :                    :     :                             :        +- Exchange (39)
-               :                    :     :                             :           +- * HashAggregate (38)
-               :                    :     :                             :              +- * Project (37)
-               :                    :     :                             :                 +- * BroadcastHashJoin Inner BuildRight (36)
-               :                    :     :                             :                    :- * Project (14)
-               :                    :     :                             :                    :  +- * BroadcastHashJoin Inner BuildRight (13)
-               :                    :     :                             :                    :     :- * Filter (11)
-               :                    :     :                             :                    :     :  +- * ColumnarToRow (10)
-               :                    :     :                             :                    :     :     +- Scan parquet default.store_sales (9)
-               :                    :     :                             :                    :     +- ReusedExchange (12)
-               :                    :     :                             :                    +- BroadcastExchange (35)
-               :                    :     :                             :                       +- * SortMergeJoin LeftSemi (34)
-               :                    :     :                             :                          :- * Sort (19)
-               :                    :     :                             :                          :  +- Exchange (18)
-               :                    :     :                             :                          :     +- * Filter (17)
-               :                    :     :                             :                          :        +- * ColumnarToRow (16)
-               :                    :     :                             :                          :           +- Scan parquet default.item (15)
-               :                    :     :                             :                          +- * Sort (33)
-               :                    :     :                             :                             +- Exchange (32)
-               :                    :     :                             :                                +- * Project (31)
-               :                    :     :                             :                                   +- * BroadcastHashJoin Inner BuildRight (30)
-               :                    :     :                             :                                      :- * Project (25)
-               :                    :     :                             :                                      :  +- * BroadcastHashJoin Inner BuildRight (24)
-               :                    :     :                             :                                      :     :- * Filter (22)
-               :                    :     :                             :                                      :     :  +- * ColumnarToRow (21)
-               :                    :     :                             :                                      :     :     +- Scan parquet default.catalog_sales (20)
-               :                    :     :                             :                                      :     +- ReusedExchange (23)
-               :                    :     :                             :                                      +- BroadcastExchange (29)
-               :                    :     :                             :                                         +- * Filter (28)
-               :                    :     :                             :                                            +- * ColumnarToRow (27)
-               :                    :     :                             :                                               +- Scan parquet default.item (26)
-               :                    :     :                             +- * Sort (53)
-               :                    :     :                                +- Exchange (52)
-               :                    :     :                                   +- * Project (51)
-               :                    :     :                                      +- * BroadcastHashJoin Inner BuildRight (50)
-               :                    :     :                                         :- * Project (48)
-               :                    :     :                                         :  +- * BroadcastHashJoin Inner BuildRight (47)
-               :                    :     :                                         :     :- * Filter (45)
-               :                    :     :                                         :     :  +- * ColumnarToRow (44)
-               :                    :     :                                         :     :     +- Scan parquet default.web_sales (43)
-               :                    :     :                                         :     +- ReusedExchange (46)
-               :                    :     :                                         +- ReusedExchange (49)
-               :                    :     +- ReusedExchange (64)
-               :                    +- BroadcastExchange (75)
-               :                       +- * SortMergeJoin LeftSemi (74)
-               :                          :- * Sort (71)
-               :                          :  +- Exchange (70)
-               :                          :     +- * Filter (69)
-               :                          :        +- * ColumnarToRow (68)
-               :                          :           +- Scan parquet default.item (67)
-               :                          +- * Sort (73)
-               :                             +- ReusedExchange (72)
-               :- * Project (101)
-               :  +- * Filter (100)
-               :     +- * HashAggregate (99)
-               :        +- Exchange (98)
-               :           +- * HashAggregate (97)
-               :              +- * Project (96)
-               :                 +- * BroadcastHashJoin Inner BuildRight (95)
-               :                    :- * Project (93)
-               :                    :  +- * BroadcastHashJoin Inner BuildRight (92)
-               :                    :     :- * SortMergeJoin LeftSemi (90)
-               :                    :     :  :- * Sort (87)
-               :                    :     :  :  +- Exchange (86)
-               :                    :     :  :     +- * Filter (85)
-               :                    :     :  :        +- * ColumnarToRow (84)
-               :                    :     :  :           +- Scan parquet default.catalog_sales (83)
-               :                    :     :  +- * Sort (89)
-               :                    :     :     +- ReusedExchange (88)
-               :                    :     +- ReusedExchange (91)
-               :                    +- ReusedExchange (94)
-               +- * Project (120)
-                  +- * Filter (119)
-                     +- * HashAggregate (118)
-                        +- Exchange (117)
-                           +- * HashAggregate (116)
-                              +- * Project (115)
-                                 +- * BroadcastHashJoin Inner BuildRight (114)
-                                    :- * Project (112)
-                                    :  +- * BroadcastHashJoin Inner BuildRight (111)
-                                    :     :- * SortMergeJoin LeftSemi (109)
-                                    :     :  :- * Sort (106)
-                                    :     :  :  +- Exchange (105)
-                                    :     :  :     +- * Filter (104)
-                                    :     :  :        +- * ColumnarToRow (103)
-                                    :     :  :           +- Scan parquet default.web_sales (102)
-                                    :     :  +- * Sort (108)
-                                    :     :     +- ReusedExchange (107)
-                                    :     +- ReusedExchange (110)
-                                    +- ReusedExchange (113)
+               :                    :     :              +- BroadcastExchange (55)
+               :                    :     :                 +- * SortMergeJoin LeftSemi (54)
+               :                    :     :                    :- * Sort (42)
+               :                    :     :                    :  +- Exchange (41)
+               :                    :     :                    :     +- * HashAggregate (40)
+               :                    :     :                    :        +- Exchange (39)
+               :                    :     :                    :           +- * HashAggregate (38)
+               :                    :     :                    :              +- * Project (37)
+               :                    :     :                    :                 +- * BroadcastHashJoin Inner BuildRight (36)
+               :                    :     :                    :                    :- * Project (14)
+               :                    :     :                    :                    :  +- * BroadcastHashJoin Inner BuildRight (13)
+               :                    :     :                    :                    :     :- * Filter (11)
+               :                    :     :                    :                    :     :  +- * ColumnarToRow (10)
+               :                    :     :                    :                    :     :     +- Scan parquet default.store_sales (9)
+               :                    :     :                    :                    :     +- ReusedExchange (12)
+               :                    :     :                    :                    +- BroadcastExchange (35)
+               :                    :     :                    :                       +- * SortMergeJoin LeftSemi (34)
+               :                    :     :                    :                          :- * Sort (19)
+               :                    :     :                    :                          :  +- Exchange (18)
+               :                    :     :                    :                          :     +- * Filter (17)
+               :                    :     :                    :                          :        +- * ColumnarToRow (16)
+               :                    :     :                    :                          :           +- Scan parquet default.item (15)
+               :                    :     :                    :                          +- * Sort (33)
+               :                    :     :                    :                             +- Exchange (32)
+               :                    :     :                    :                                +- * Project (31)
+               :                    :     :                    :                                   +- * BroadcastHashJoin Inner BuildRight (30)
+               :                    :     :                    :                                      :- * Project (25)
+               :                    :     :                    :                                      :  +- * BroadcastHashJoin Inner BuildRight (24)
+               :                    :     :                    :                                      :     :- * Filter (22)
+               :                    :     :                    :                                      :     :  +- * ColumnarToRow (21)
+               :                    :     :                    :                                      :     :     +- Scan parquet default.catalog_sales (20)
+               :                    :     :                    :                                      :     +- ReusedExchange (23)
+               :                    :     :                    :                                      +- BroadcastExchange (29)
+               :                    :     :                    :                                         +- * Filter (28)
+               :                    :     :                    :                                            +- * ColumnarToRow (27)
+               :                    :     :                    :                                               +- Scan parquet default.item (26)
+               :                    :     :                    +- * Sort (53)
+               :                    :     :                       +- Exchange (52)
+               :                    :     :                          +- * Project (51)
+               :                    :     :                             +- * BroadcastHashJoin Inner BuildRight (50)
+               :                    :     :                                :- * Project (48)
+               :                    :     :                                :  +- * BroadcastHashJoin Inner BuildRight (47)
+               :                    :     :                                :     :- * Filter (45)
+               :                    :     :                                :     :  +- * ColumnarToRow (44)
+               :                    :     :                                :     :     +- Scan parquet default.web_sales (43)
+               :                    :     :                                :     +- ReusedExchange (46)
+               :                    :     :                                +- ReusedExchange (49)
+               :                    :     +- ReusedExchange (61)
+               :                    +- BroadcastExchange (72)
+               :                       +- * SortMergeJoin LeftSemi (71)
+               :                          :- * Sort (68)
+               :                          :  +- Exchange (67)
+               :                          :     +- * Filter (66)
+               :                          :        +- * ColumnarToRow (65)
+               :                          :           +- Scan parquet default.item (64)
+               :                          +- * Sort (70)
+               :                             +- ReusedExchange (69)
+               :- * Project (98)
+               :  +- * Filter (97)
+               :     +- * HashAggregate (96)
+               :        +- Exchange (95)
+               :           +- * HashAggregate (94)
+               :              +- * Project (93)
+               :                 +- * BroadcastHashJoin Inner BuildRight (92)
+               :                    :- * Project (90)
+               :                    :  +- * BroadcastHashJoin Inner BuildRight (89)
+               :                    :     :- * SortMergeJoin LeftSemi (87)
+               :                    :     :  :- * Sort (84)
+               :                    :     :  :  +- Exchange (83)
+               :                    :     :  :     +- * Filter (82)
+               :                    :     :  :        +- * ColumnarToRow (81)
+               :                    :     :  :           +- Scan parquet default.catalog_sales (80)
+               :                    :     :  +- * Sort (86)
+               :                    :     :     +- ReusedExchange (85)
+               :                    :     +- ReusedExchange (88)
+               :                    +- ReusedExchange (91)
+               +- * Project (117)
+                  +- * Filter (116)
+                     +- * HashAggregate (115)
+                        +- Exchange (114)
+                           +- * HashAggregate (113)
+                              +- * Project (112)
+                                 +- * BroadcastHashJoin Inner BuildRight (111)
+                                    :- * Project (109)
+                                    :  +- * BroadcastHashJoin Inner BuildRight (108)
+                                    :     :- * SortMergeJoin LeftSemi (106)
+                                    :     :  :- * Sort (103)
+                                    :     :  :  +- Exchange (102)
+                                    :     :  :     +- * Filter (101)
+                                    :     :  :        +- * ColumnarToRow (100)
+                                    :     :  :           +- Scan parquet default.web_sales (99)
+                                    :     :  +- * Sort (105)
+                                    :     :     +- ReusedExchange (104)
+                                    :     +- ReusedExchange (107)
+                                    +- ReusedExchange (110)
 
 
 (1) Scan parquet default.store_sales
@@ -157,10 +154,10 @@ Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(7) ColumnarToRow [codegen id : 20]
+(7) ColumnarToRow [codegen id : 19]
 Input [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10]
 
-(8) Filter [codegen id : 20]
+(8) Filter [codegen id : 19]
 Input [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10]
 Condition : ((isnotnull(i_brand_id#8) AND isnotnull(i_class_id#9)) AND isnotnull(i_category_id#10))
 
@@ -179,7 +176,7 @@ Input [2]: [ss_item_sk#11, ss_sold_date_sk#12]
 Input [2]: [ss_item_sk#11, ss_sold_date_sk#12]
 Condition : isnotnull(ss_item_sk#11)
 
-(12) ReusedExchange [Reuses operator id: 155]
+(12) ReusedExchange [Reuses operator id: 152]
 Output [1]: [d_date_sk#14]
 
 (13) BroadcastHashJoin [codegen id : 11]
@@ -228,7 +225,7 @@ Input [2]: [cs_item_sk#20, cs_sold_date_sk#21]
 Input [2]: [cs_item_sk#20, cs_sold_date_sk#21]
 Condition : isnotnull(cs_item_sk#20)
 
-(23) ReusedExchange [Reuses operator id: 155]
+(23) ReusedExchange [Reuses operator id: 152]
 Output [1]: [d_date_sk#22]
 
 (24) BroadcastHashJoin [codegen id : 8]
@@ -334,7 +331,7 @@ Input [2]: [ws_item_sk#35, ws_sold_date_sk#36]
 Input [2]: [ws_item_sk#35, ws_sold_date_sk#36]
 Condition : isnotnull(ws_item_sk#35)
 
-(46) ReusedExchange [Reuses operator id: 155]
+(46) ReusedExchange [Reuses operator id: 152]
 Output [1]: [d_date_sk#37]
 
 (47) BroadcastHashJoin [codegen id : 16]
@@ -371,519 +368,501 @@ Left keys [6]: [coalesce(brand_id#30, 0), isnull(brand_id#30), coalesce(class_id
 Right keys [6]: [coalesce(i_brand_id#39, 0), isnull(i_brand_id#39), coalesce(i_class_id#40, 0), isnull(i_class_id#40), coalesce(i_category_id#41, 0), isnull(i_category_id#41)]
 Join condition: None
 
-(55) HashAggregate [codegen id : 18]
+(55) BroadcastExchange
 Input [3]: [brand_id#30, class_id#31, category_id#32]
-Keys [3]: [brand_id#30, class_id#31, category_id#32]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [brand_id#30, class_id#31, category_id#32]
-
-(56) Exchange
-Input [3]: [brand_id#30, class_id#31, category_id#32]
-Arguments: hashpartitioning(brand_id#30, class_id#31, category_id#32, 5), ENSURE_REQUIREMENTS, [id=#43]
-
-(57) HashAggregate [codegen id : 19]
-Input [3]: [brand_id#30, class_id#31, category_id#32]
-Keys [3]: [brand_id#30, class_id#31, category_id#32]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [brand_id#30, class_id#31, category_id#32]
-
-(58) BroadcastExchange
-Input [3]: [brand_id#30, class_id#31, category_id#32]
-Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#44]
+Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#43]
 
-(59) BroadcastHashJoin [codegen id : 20]
+(56) BroadcastHashJoin [codegen id : 19]
 Left keys [3]: [i_brand_id#8, i_class_id#9, i_category_id#10]
 Right keys [3]: [brand_id#30, class_id#31, category_id#32]
 Join condition: None
 
-(60) Project [codegen id : 20]
-Output [1]: [i_item_sk#7 AS ss_item_sk#45]
+(57) Project [codegen id : 19]
+Output [1]: [i_item_sk#7 AS ss_item_sk#44]
 Input [7]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, brand_id#30, class_id#31, category_id#32]
 
-(61) Exchange
-Input [1]: [ss_item_sk#45]
-Arguments: hashpartitioning(ss_item_sk#45, 5), ENSURE_REQUIREMENTS, [id=#46]
+(58) Exchange
+Input [1]: [ss_item_sk#44]
+Arguments: hashpartitioning(ss_item_sk#44, 5), ENSURE_REQUIREMENTS, [id=#45]
 
-(62) Sort [codegen id : 21]
-Input [1]: [ss_item_sk#45]
-Arguments: [ss_item_sk#45 ASC NULLS FIRST], false, 0
+(59) Sort [codegen id : 20]
+Input [1]: [ss_item_sk#44]
+Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0
 
-(63) SortMergeJoin [codegen id : 45]
+(60) SortMergeJoin [codegen id : 43]
 Left keys [1]: [ss_item_sk#1]
-Right keys [1]: [ss_item_sk#45]
+Right keys [1]: [ss_item_sk#44]
 Join condition: None
 
-(64) ReusedExchange [Reuses operator id: 150]
-Output [1]: [d_date_sk#47]
+(61) ReusedExchange [Reuses operator id: 147]
+Output [1]: [d_date_sk#46]
 
-(65) BroadcastHashJoin [codegen id : 45]
+(62) BroadcastHashJoin [codegen id : 43]
 Left keys [1]: [ss_sold_date_sk#4]
-Right keys [1]: [d_date_sk#47]
+Right keys [1]: [d_date_sk#46]
 Join condition: None
 
-(66) Project [codegen id : 45]
+(63) Project [codegen id : 43]
 Output [3]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3]
-Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#47]
+Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#46]
 
-(67) Scan parquet default.item
-Output [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
+(64) Scan parquet default.item
+Output [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(68) ColumnarToRow [codegen id : 23]
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
+(65) ColumnarToRow [codegen id : 22]
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
 
-(69) Filter [codegen id : 23]
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
-Condition : isnotnull(i_item_sk#48)
+(66) Filter [codegen id : 22]
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
+Condition : isnotnull(i_item_sk#47)
 
-(70) Exchange
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
-Arguments: hashpartitioning(i_item_sk#48, 5), ENSURE_REQUIREMENTS, [id=#52]
+(67) Exchange
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
+Arguments: hashpartitioning(i_item_sk#47, 5), ENSURE_REQUIREMENTS, [id=#51]
 
-(71) Sort [codegen id : 24]
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
-Arguments: [i_item_sk#48 ASC NULLS FIRST], false, 0
+(68) Sort [codegen id : 23]
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
+Arguments: [i_item_sk#47 ASC NULLS FIRST], false, 0
 
-(72) ReusedExchange [Reuses operator id: 61]
-Output [1]: [ss_item_sk#45]
+(69) ReusedExchange [Reuses operator id: 58]
+Output [1]: [ss_item_sk#44]
 
-(73) Sort [codegen id : 43]
-Input [1]: [ss_item_sk#45]
-Arguments: [ss_item_sk#45 ASC NULLS FIRST], false, 0
+(70) Sort [codegen id : 41]
+Input [1]: [ss_item_sk#44]
+Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0
 
-(74) SortMergeJoin [codegen id : 44]
-Left keys [1]: [i_item_sk#48]
-Right keys [1]: [ss_item_sk#45]
+(71) SortMergeJoin [codegen id : 42]
+Left keys [1]: [i_item_sk#47]
+Right keys [1]: [ss_item_sk#44]
 Join condition: None
 
-(75) BroadcastExchange
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#53]
+(72) BroadcastExchange
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#52]
 
-(76) BroadcastHashJoin [codegen id : 45]
+(73) BroadcastHashJoin [codegen id : 43]
 Left keys [1]: [ss_item_sk#1]
-Right keys [1]: [i_item_sk#48]
+Right keys [1]: [i_item_sk#47]
 Join condition: None
 
-(77) Project [codegen id : 45]
-Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#49, i_class_id#50, i_category_id#51]
-Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
+(74) Project [codegen id : 43]
+Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#48, i_class_id#49, i_category_id#50]
+Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
 
-(78) HashAggregate [codegen id : 45]
-Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#49, i_class_id#50, i_category_id#51]
-Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
+(75) HashAggregate [codegen id : 43]
+Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#48, i_class_id#49, i_category_id#50]
+Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50]
 Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
-Aggregate Attributes [3]: [sum#54, isEmpty#55, count#56]
-Results [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
+Aggregate Attributes [3]: [sum#53, isEmpty#54, count#55]
+Results [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58]
 
-(79) Exchange
-Input [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
-Arguments: hashpartitioning(i_brand_id#49, i_class_id#50, i_category_id#51, 5), ENSURE_REQUIREMENTS, [id=#60]
+(76) Exchange
+Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58]
+Arguments: hashpartitioning(i_brand_id#48, i_class_id#49, i_category_id#50, 5), ENSURE_REQUIREMENTS, [id=#59]
 
-(80) HashAggregate [codegen id : 46]
-Input [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
-Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
+(77) HashAggregate [codegen id : 44]
+Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58]
+Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#61, count(1)#62]
-Results [5]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#61 AS sales#63, count(1)#62 AS number_sales#64]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#60, count(1)#61]
+Results [5]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#60 AS sales#62, count(1)#61 AS number_sales#63]
 
-(81) Filter [codegen id : 46]
-Input [5]: [i_brand_id#49, i_class_id#50, i_category_id#51, sales#63, number_sales#64]
-Condition : (isnotnull(sales#63) AND (cast(sales#63 as decimal(32,6)) > cast(Subquery scalar-subquery#65, [id=#66] as decimal(32,6))))
+(78) Filter [codegen id : 44]
+Input [5]: [i_brand_id#48, i_class_id#49, i_category_id#50, sales#62, number_sales#63]
+Condition : (isnotnull(sales#62) AND (cast(sales#62 as decimal(32,6)) > cast(Subquery scalar-subquery#64, [id=#65] as decimal(32,6))))
 
-(82) Project [codegen id : 46]
-Output [6]: [sales#63, number_sales#64, store AS channel#67, i_brand_id#49, i_class_id#50, i_category_id#51]
-Input [5]: [i_brand_id#49, i_class_id#50, i_category_id#51, sales#63, number_sales#64]
+(79) Project [codegen id : 44]
+Output [6]: [sales#62, number_sales#63, store AS channel#66, i_brand_id#48, i_class_id#49, i_category_id#50]
+Input [5]: [i_brand_id#48, i_class_id#49, i_category_id#50, sales#62, number_sales#63]
 
-(83) Scan parquet default.catalog_sales
-Output [4]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70, cs_sold_date_sk#71]
+(80) Scan parquet default.catalog_sales
+Output [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#71), dynamicpruningexpression(cs_sold_date_sk#71 IN dynamicpruning#5)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#70), dynamicpruningexpression(cs_sold_date_sk#70 IN dynamicpruning#5)]
 PushedFilters: [IsNotNull(cs_item_sk)]
 ReadSchema: struct<cs_item_sk:int,cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(84) ColumnarToRow [codegen id : 47]
-Input [4]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70, cs_sold_date_sk#71]
+(81) ColumnarToRow [codegen id : 45]
+Input [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70]
 
-(85) Filter [codegen id : 47]
-Input [4]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70, cs_sold_date_sk#71]
-Condition : isnotnull(cs_item_sk#68)
+(82) Filter [codegen id : 45]
+Input [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70]
+Condition : isnotnull(cs_item_sk#67)
 
-(86) Exchange
-Input [4]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70, cs_sold_date_sk#71]
-Arguments: hashpartitioning(cs_item_sk#68, 5), ENSURE_REQUIREMENTS, [id=#72]
+(83) Exchange
+Input [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70]
+Arguments: hashpartitioning(cs_item_sk#67, 5), ENSURE_REQUIREMENTS, [id=#71]
 
-(87) Sort [codegen id : 48]
-Input [4]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70, cs_sold_date_sk#71]
-Arguments: [cs_item_sk#68 ASC NULLS FIRST], false, 0
+(84) Sort [codegen id : 46]
+Input [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70]
+Arguments: [cs_item_sk#67 ASC NULLS FIRST], false, 0
 
-(88) ReusedExchange [Reuses operator id: 61]
-Output [1]: [ss_item_sk#45]
+(85) ReusedExchange [Reuses operator id: 58]
+Output [1]: [ss_item_sk#44]
 
-(89) Sort [codegen id : 67]
-Input [1]: [ss_item_sk#45]
-Arguments: [ss_item_sk#45 ASC NULLS FIRST], false, 0
+(86) Sort [codegen id : 64]
+Input [1]: [ss_item_sk#44]
+Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0
 
-(90) SortMergeJoin [codegen id : 91]
-Left keys [1]: [cs_item_sk#68]
-Right keys [1]: [ss_item_sk#45]
+(87) SortMergeJoin [codegen id : 87]
+Left keys [1]: [cs_item_sk#67]
+Right keys [1]: [ss_item_sk#44]
 Join condition: None
 
-(91) ReusedExchange [Reuses operator id: 150]
-Output [1]: [d_date_sk#73]
+(88) ReusedExchange [Reuses operator id: 147]
+Output [1]: [d_date_sk#72]
 
-(92) BroadcastHashJoin [codegen id : 91]
-Left keys [1]: [cs_sold_date_sk#71]
-Right keys [1]: [d_date_sk#73]
+(89) BroadcastHashJoin [codegen id : 87]
+Left keys [1]: [cs_sold_date_sk#70]
+Right keys [1]: [d_date_sk#72]
 Join condition: None
 
-(93) Project [codegen id : 91]
-Output [3]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70]
-Input [5]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70, cs_sold_date_sk#71, d_date_sk#73]
+(90) Project [codegen id : 87]
+Output [3]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69]
+Input [5]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70, d_date_sk#72]
 
-(94) ReusedExchange [Reuses operator id: 75]
-Output [4]: [i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77]
+(91) ReusedExchange [Reuses operator id: 72]
+Output [4]: [i_item_sk#73, i_brand_id#74, i_class_id#75, i_category_id#76]
 
-(95) BroadcastHashJoin [codegen id : 91]
-Left keys [1]: [cs_item_sk#68]
-Right keys [1]: [i_item_sk#74]
+(92) BroadcastHashJoin [codegen id : 87]
+Left keys [1]: [cs_item_sk#67]
+Right keys [1]: [i_item_sk#73]
 Join condition: None
 
-(96) Project [codegen id : 91]
-Output [5]: [cs_quantity#69, cs_list_price#70, i_brand_id#75, i_class_id#76, i_category_id#77]
-Input [7]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70, i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77]
-
-(97) HashAggregate [codegen id : 91]
-Input [5]: [cs_quantity#69, cs_list_price#70, i_brand_id#75, i_class_id#76, i_category_id#77]
-Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
-Aggregate Attributes [3]: [sum#78, isEmpty#79, count#80]
-Results [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
-
-(98) Exchange
-Input [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
-Arguments: hashpartitioning(i_brand_id#75, i_class_id#76, i_category_id#77, 5), ENSURE_REQUIREMENTS, [id=#84]
-
-(99) HashAggregate [codegen id : 92]
-Input [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
-Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2)))#85, count(1)#86]
-Results [5]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2)))#85 AS sales#87, count(1)#86 AS number_sales#88]
-
-(100) Filter [codegen id : 92]
-Input [5]: [i_brand_id#75, i_class_id#76, i_category_id#77, sales#87, number_sales#88]
-Condition : (isnotnull(sales#87) AND (cast(sales#87 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#65, [id=#66] as decimal(32,6))))
-
-(101) Project [codegen id : 92]
-Output [6]: [sales#87, number_sales#88, catalog AS channel#89, i_brand_id#75, i_class_id#76, i_category_id#77]
-Input [5]: [i_brand_id#75, i_class_id#76, i_category_id#77, sales#87, number_sales#88]
-
-(102) Scan parquet default.web_sales
-Output [4]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93]
+(93) Project [codegen id : 87]
+Output [5]: [cs_quantity#68, cs_list_price#69, i_brand_id#74, i_class_id#75, i_category_id#76]
+Input [7]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, i_item_sk#73, i_brand_id#74, i_class_id#75, i_category_id#76]
+
+(94) HashAggregate [codegen id : 87]
+Input [5]: [cs_quantity#68, cs_list_price#69, i_brand_id#74, i_class_id#75, i_category_id#76]
+Keys [3]: [i_brand_id#74, i_class_id#75, i_category_id#76]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#68 as decimal(12,2))) * promote_precision(cast(cs_list_price#69 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Aggregate Attributes [3]: [sum#77, isEmpty#78, count#79]
+Results [6]: [i_brand_id#74, i_class_id#75, i_category_id#76, sum#80, isEmpty#81, count#82]
+
+(95) Exchange
+Input [6]: [i_brand_id#74, i_class_id#75, i_category_id#76, sum#80, isEmpty#81, count#82]
+Arguments: hashpartitioning(i_brand_id#74, i_class_id#75, i_category_id#76, 5), ENSURE_REQUIREMENTS, [id=#83]
+
+(96) HashAggregate [codegen id : 88]
+Input [6]: [i_brand_id#74, i_class_id#75, i_category_id#76, sum#80, isEmpty#81, count#82]
+Keys [3]: [i_brand_id#74, i_class_id#75, i_category_id#76]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#68 as decimal(12,2))) * promote_precision(cast(cs_list_price#69 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#68 as decimal(12,2))) * promote_precision(cast(cs_list_price#69 as decimal(12,2)))), DecimalType(18,2)))#84, count(1)#85]
+Results [5]: [i_brand_id#74, i_class_id#75, i_category_id#76, sum(CheckOverflow((promote_precision(cast(cs_quantity#68 as decimal(12,2))) * promote_precision(cast(cs_list_price#69 as decimal(12,2)))), DecimalType(18,2)))#84 AS sales#86, count(1)#85 AS number_sales#87]
+
+(97) Filter [codegen id : 88]
+Input [5]: [i_brand_id#74, i_class_id#75, i_category_id#76, sales#86, number_sales#87]
+Condition : (isnotnull(sales#86) AND (cast(sales#86 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#64, [id=#65] as decimal(32,6))))
+
+(98) Project [codegen id : 88]
+Output [6]: [sales#86, number_sales#87, catalog AS channel#88, i_brand_id#74, i_class_id#75, i_category_id#76]
+Input [5]: [i_brand_id#74, i_class_id#75, i_category_id#76, sales#86, number_sales#87]
+
+(99) Scan parquet default.web_sales
+Output [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#93), dynamicpruningexpression(ws_sold_date_sk#93 IN dynamicpruning#5)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#92), dynamicpruningexpression(ws_sold_date_sk#92 IN dynamicpruning#5)]
 PushedFilters: [IsNotNull(ws_item_sk)]
 ReadSchema: struct<ws_item_sk:int,ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(103) ColumnarToRow [codegen id : 93]
-Input [4]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93]
+(100) ColumnarToRow [codegen id : 89]
+Input [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92]
 
-(104) Filter [codegen id : 93]
-Input [4]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93]
-Condition : isnotnull(ws_item_sk#90)
+(101) Filter [codegen id : 89]
+Input [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92]
+Condition : isnotnull(ws_item_sk#89)
 
-(105) Exchange
-Input [4]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93]
-Arguments: hashpartitioning(ws_item_sk#90, 5), ENSURE_REQUIREMENTS, [id=#94]
+(102) Exchange
+Input [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92]
+Arguments: hashpartitioning(ws_item_sk#89, 5), ENSURE_REQUIREMENTS, [id=#93]
 
-(106) Sort [codegen id : 94]
-Input [4]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93]
-Arguments: [ws_item_sk#90 ASC NULLS FIRST], false, 0
+(103) Sort [codegen id : 90]
+Input [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92]
+Arguments: [ws_item_sk#89 ASC NULLS FIRST], false, 0
 
-(107) ReusedExchange [Reuses operator id: 61]
-Output [1]: [ss_item_sk#45]
+(104) ReusedExchange [Reuses operator id: 58]
+Output [1]: [ss_item_sk#44]
 
-(108) Sort [codegen id : 113]
-Input [1]: [ss_item_sk#45]
-Arguments: [ss_item_sk#45 ASC NULLS FIRST], false, 0
+(105) Sort [codegen id : 108]
+Input [1]: [ss_item_sk#44]
+Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0
 
-(109) SortMergeJoin [codegen id : 137]
-Left keys [1]: [ws_item_sk#90]
-Right keys [1]: [ss_item_sk#45]
+(106) SortMergeJoin [codegen id : 131]
+Left keys [1]: [ws_item_sk#89]
+Right keys [1]: [ss_item_sk#44]
 Join condition: None
 
-(110) ReusedExchange [Reuses operator id: 150]
-Output [1]: [d_date_sk#95]
+(107) ReusedExchange [Reuses operator id: 147]
+Output [1]: [d_date_sk#94]
 
-(111) BroadcastHashJoin [codegen id : 137]
-Left keys [1]: [ws_sold_date_sk#93]
-Right keys [1]: [d_date_sk#95]
+(108) BroadcastHashJoin [codegen id : 131]
+Left keys [1]: [ws_sold_date_sk#92]
+Right keys [1]: [d_date_sk#94]
 Join condition: None
 
-(112) Project [codegen id : 137]
-Output [3]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92]
-Input [5]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93, d_date_sk#95]
+(109) Project [codegen id : 131]
+Output [3]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91]
+Input [5]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92, d_date_sk#94]
 
-(113) ReusedExchange [Reuses operator id: 75]
-Output [4]: [i_item_sk#96, i_brand_id#97, i_class_id#98, i_category_id#99]
+(110) ReusedExchange [Reuses operator id: 72]
+Output [4]: [i_item_sk#95, i_brand_id#96, i_class_id#97, i_category_id#98]
 
-(114) BroadcastHashJoin [codegen id : 137]
-Left keys [1]: [ws_item_sk#90]
-Right keys [1]: [i_item_sk#96]
+(111) BroadcastHashJoin [codegen id : 131]
+Left keys [1]: [ws_item_sk#89]
+Right keys [1]: [i_item_sk#95]
 Join condition: None
 
-(115) Project [codegen id : 137]
-Output [5]: [ws_quantity#91, ws_list_price#92, i_brand_id#97, i_class_id#98, i_category_id#99]
-Input [7]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92, i_item_sk#96, i_brand_id#97, i_class_id#98, i_category_id#99]
-
-(116) HashAggregate [codegen id : 137]
-Input [5]: [ws_quantity#91, ws_list_price#92, i_brand_id#97, i_class_id#98, i_category_id#99]
-Keys [3]: [i_brand_id#97, i_class_id#98, i_category_id#99]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
-Aggregate Attributes [3]: [sum#100, isEmpty#101, count#102]
-Results [6]: [i_brand_id#97, i_class_id#98, i_category_id#99, sum#103, isEmpty#104, count#105]
-
-(117) Exchange
-Input [6]: [i_brand_id#97, i_class_id#98, i_category_id#99, sum#103, isEmpty#104, count#105]
-Arguments: hashpartitioning(i_brand_id#97, i_class_id#98, i_category_id#99, 5), ENSURE_REQUIREMENTS, [id=#106]
-
-(118) HashAggregate [codegen id : 138]
-Input [6]: [i_brand_id#97, i_class_id#98, i_category_id#99, sum#103, isEmpty#104, count#105]
-Keys [3]: [i_brand_id#97, i_class_id#98, i_category_id#99]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2)))#107, count(1)#108]
-Results [5]: [i_brand_id#97, i_class_id#98, i_category_id#99, sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2)))#107 AS sales#109, count(1)#108 AS number_sales#110]
-
-(119) Filter [codegen id : 138]
-Input [5]: [i_brand_id#97, i_class_id#98, i_category_id#99, sales#109, number_sales#110]
-Condition : (isnotnull(sales#109) AND (cast(sales#109 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#65, [id=#66] as decimal(32,6))))
-
-(120) Project [codegen id : 138]
-Output [6]: [sales#109, number_sales#110, web AS channel#111, i_brand_id#97, i_class_id#98, i_category_id#99]
-Input [5]: [i_brand_id#97, i_class_id#98, i_category_id#99, sales#109, number_sales#110]
-
-(121) Union
-
-(122) Expand [codegen id : 139]
-Input [6]: [sales#63, number_sales#64, channel#67, i_brand_id#49, i_class_id#50, i_category_id#51]
-Arguments: [[sales#63, number_sales#64, channel#67, i_brand_id#49, i_class_id#50, i_category_id#51, 0], [sales#63, number_sales#64, channel#67, i_brand_id#49, i_class_id#50, null, 1], [sales#63, number_sales#64, channel#67, i_brand_id#49, null, null, 3], [sales#63, number_sales#64, channel#67, null, null, null, 7], [sales#63, number_sales#64, null, null, null, null, 15]], [sales#63, number_sales#64, channel#112, i_brand_id#113, i_class_id#114, i_category_id#115, spark_grouping_id#116]
-
-(123) HashAggregate [codegen id : 139]
-Input [7]: [sales#63, number_sales#64, channel#112, i_brand_id#113, i_class_id#114, i_category_id#115, spark_grouping_id#116]
-Keys [5]: [channel#112, i_brand_id#113, i_class_id#114, i_category_id#115, spark_grouping_id#116]
-Functions [2]: [partial_sum(sales#63), partial_sum(number_sales#64)]
-Aggregate Attributes [3]: [sum#117, isEmpty#118, sum#119]
-Results [8]: [channel#112, i_brand_id#113, i_class_id#114, i_category_id#115, spark_grouping_id#116, sum#120, isEmpty#121, sum#122]
-
-(124) Exchange
-Input [8]: [channel#112, i_brand_id#113, i_class_id#114, i_category_id#115, spark_grouping_id#116, sum#120, isEmpty#121, sum#122]
-Arguments: hashpartitioning(channel#112, i_brand_id#113, i_class_id#114, i_category_id#115, spark_grouping_id#116, 5), ENSURE_REQUIREMENTS, [id=#123]
-
-(125) HashAggregate [codegen id : 140]
-Input [8]: [channel#112, i_brand_id#113, i_class_id#114, i_category_id#115, spark_grouping_id#116, sum#120, isEmpty#121, sum#122]
-Keys [5]: [channel#112, i_brand_id#113, i_class_id#114, i_category_id#115, spark_grouping_id#116]
-Functions [2]: [sum(sales#63), sum(number_sales#64)]
-Aggregate Attributes [2]: [sum(sales#63)#124, sum(number_sales#64)#125]
-Results [6]: [channel#112, i_brand_id#113, i_class_id#114, i_category_id#115, sum(sales#63)#124 AS sum(sales)#126, sum(number_sales#64)#125 AS sum(number_sales)#127]
-
-(126) TakeOrderedAndProject
-Input [6]: [channel#112, i_brand_id#113, i_class_id#114, i_category_id#115, sum(sales)#126, sum(number_sales)#127]
-Arguments: 100, [channel#112 ASC NULLS FIRST, i_brand_id#113 ASC NULLS FIRST, i_class_id#114 ASC NULLS FIRST, i_category_id#115 ASC NULLS FIRST], [channel#112, i_brand_id#113, i_class_id#114, i_category_id#115, sum(sales)#126, sum(number_sales)#127]
+(112) Project [codegen id : 131]
+Output [5]: [ws_quantity#90, ws_list_price#91, i_brand_id#96, i_class_id#97, i_category_id#98]
+Input [7]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, i_item_sk#95, i_brand_id#96, i_class_id#97, i_category_id#98]
+
+(113) HashAggregate [codegen id : 131]
+Input [5]: [ws_quantity#90, ws_list_price#91, i_brand_id#96, i_class_id#97, i_category_id#98]
+Keys [3]: [i_brand_id#96, i_class_id#97, i_category_id#98]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#90 as decimal(12,2))) * promote_precision(cast(ws_list_price#91 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Aggregate Attributes [3]: [sum#99, isEmpty#100, count#101]
+Results [6]: [i_brand_id#96, i_class_id#97, i_category_id#98, sum#102, isEmpty#103, count#104]
+
+(114) Exchange
+Input [6]: [i_brand_id#96, i_class_id#97, i_category_id#98, sum#102, isEmpty#103, count#104]
+Arguments: hashpartitioning(i_brand_id#96, i_class_id#97, i_category_id#98, 5), ENSURE_REQUIREMENTS, [id=#105]
+
+(115) HashAggregate [codegen id : 132]
+Input [6]: [i_brand_id#96, i_class_id#97, i_category_id#98, sum#102, isEmpty#103, count#104]
+Keys [3]: [i_brand_id#96, i_class_id#97, i_category_id#98]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#90 as decimal(12,2))) * promote_precision(cast(ws_list_price#91 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#90 as decimal(12,2))) * promote_precision(cast(ws_list_price#91 as decimal(12,2)))), DecimalType(18,2)))#106, count(1)#107]
+Results [5]: [i_brand_id#96, i_class_id#97, i_category_id#98, sum(CheckOverflow((promote_precision(cast(ws_quantity#90 as decimal(12,2))) * promote_precision(cast(ws_list_price#91 as decimal(12,2)))), DecimalType(18,2)))#106 AS sales#108, count(1)#107 AS number_sales#109]
+
+(116) Filter [codegen id : 132]
+Input [5]: [i_brand_id#96, i_class_id#97, i_category_id#98, sales#108, number_sales#109]
+Condition : (isnotnull(sales#108) AND (cast(sales#108 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#64, [id=#65] as decimal(32,6))))
+
+(117) Project [codegen id : 132]
+Output [6]: [sales#108, number_sales#109, web AS channel#110, i_brand_id#96, i_class_id#97, i_category_id#98]
+Input [5]: [i_brand_id#96, i_class_id#97, i_category_id#98, sales#108, number_sales#109]
+
+(118) Union
+
+(119) Expand [codegen id : 133]
+Input [6]: [sales#62, number_sales#63, channel#66, i_brand_id#48, i_class_id#49, i_category_id#50]
+Arguments: [[sales#62, number_sales#63, channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, 0], [sales#62, number_sales#63, channel#66, i_brand_id#48, i_class_id#49, null, 1], [sales#62, number_sales#63, channel#66, i_brand_id#48, null, null, 3], [sales#62, number_sales#63, channel#66, null, null, null, 7], [sales#62, number_sales#63, null, null, null, null, 15]], [sales#62, number_sales#63, channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, spark_grouping_id#115]
+
+(120) HashAggregate [codegen id : 133]
+Input [7]: [sales#62, number_sales#63, channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, spark_grouping_id#115]
+Keys [5]: [channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, spark_grouping_id#115]
+Functions [2]: [partial_sum(sales#62), partial_sum(number_sales#63)]
+Aggregate Attributes [3]: [sum#116, isEmpty#117, sum#118]
+Results [8]: [channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, spark_grouping_id#115, sum#119, isEmpty#120, sum#121]
+
+(121) Exchange
+Input [8]: [channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, spark_grouping_id#115, sum#119, isEmpty#120, sum#121]
+Arguments: hashpartitioning(channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, spark_grouping_id#115, 5), ENSURE_REQUIREMENTS, [id=#122]
+
+(122) HashAggregate [codegen id : 134]
+Input [8]: [channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, spark_grouping_id#115, sum#119, isEmpty#120, sum#121]
+Keys [5]: [channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, spark_grouping_id#115]
+Functions [2]: [sum(sales#62), sum(number_sales#63)]
+Aggregate Attributes [2]: [sum(sales#62)#123, sum(number_sales#63)#124]
+Results [6]: [channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, sum(sales#62)#123 AS sum(sales)#125, sum(number_sales#63)#124 AS sum(number_sales)#126]
+
+(123) TakeOrderedAndProject
+Input [6]: [channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, sum(sales)#125, sum(number_sales)#126]
+Arguments: 100, [channel#111 ASC NULLS FIRST, i_brand_id#112 ASC NULLS FIRST, i_class_id#113 ASC NULLS FIRST, i_category_id#114 ASC NULLS FIRST], [channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, sum(sales)#125, sum(number_sales)#126]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 81 Hosting Expression = Subquery scalar-subquery#65, [id=#66]
-* HashAggregate (145)
-+- Exchange (144)
-   +- * HashAggregate (143)
-      +- Union (142)
-         :- * Project (131)
-         :  +- * BroadcastHashJoin Inner BuildRight (130)
-         :     :- * ColumnarToRow (128)
-         :     :  +- Scan parquet default.store_sales (127)
-         :     +- ReusedExchange (129)
-         :- * Project (136)
-         :  +- * BroadcastHashJoin Inner BuildRight (135)
-         :     :- * ColumnarToRow (133)
-         :     :  +- Scan parquet default.catalog_sales (132)
-         :     +- ReusedExchange (134)
-         +- * Project (141)
-            +- * BroadcastHashJoin Inner BuildRight (140)
-               :- * ColumnarToRow (138)
-               :  +- Scan parquet default.web_sales (137)
-               +- ReusedExchange (139)
-
-
-(127) Scan parquet default.store_sales
-Output [3]: [ss_quantity#128, ss_list_price#129, ss_sold_date_sk#130]
+Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquery#64, [id=#65]
+* HashAggregate (142)
++- Exchange (141)
+   +- * HashAggregate (140)
+      +- Union (139)
+         :- * Project (128)
+         :  +- * BroadcastHashJoin Inner BuildRight (127)
+         :     :- * ColumnarToRow (125)
+         :     :  +- Scan parquet default.store_sales (124)
+         :     +- ReusedExchange (126)
+         :- * Project (133)
+         :  +- * BroadcastHashJoin Inner BuildRight (132)
+         :     :- * ColumnarToRow (130)
+         :     :  +- Scan parquet default.catalog_sales (129)
+         :     +- ReusedExchange (131)
+         +- * Project (138)
+            +- * BroadcastHashJoin Inner BuildRight (137)
+               :- * ColumnarToRow (135)
+               :  +- Scan parquet default.web_sales (134)
+               +- ReusedExchange (136)
+
+
+(124) Scan parquet default.store_sales
+Output [3]: [ss_quantity#127, ss_list_price#128, ss_sold_date_sk#129]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#130), dynamicpruningexpression(ss_sold_date_sk#130 IN dynamicpruning#13)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#129), dynamicpruningexpression(ss_sold_date_sk#129 IN dynamicpruning#13)]
 ReadSchema: struct<ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(128) ColumnarToRow [codegen id : 2]
-Input [3]: [ss_quantity#128, ss_list_price#129, ss_sold_date_sk#130]
+(125) ColumnarToRow [codegen id : 2]
+Input [3]: [ss_quantity#127, ss_list_price#128, ss_sold_date_sk#129]
 
-(129) ReusedExchange [Reuses operator id: 155]
-Output [1]: [d_date_sk#131]
+(126) ReusedExchange [Reuses operator id: 152]
+Output [1]: [d_date_sk#130]
 
-(130) BroadcastHashJoin [codegen id : 2]
-Left keys [1]: [ss_sold_date_sk#130]
-Right keys [1]: [d_date_sk#131]
+(127) BroadcastHashJoin [codegen id : 2]
+Left keys [1]: [ss_sold_date_sk#129]
+Right keys [1]: [d_date_sk#130]
 Join condition: None
 
-(131) Project [codegen id : 2]
-Output [2]: [ss_quantity#128 AS quantity#132, ss_list_price#129 AS list_price#133]
-Input [4]: [ss_quantity#128, ss_list_price#129, ss_sold_date_sk#130, d_date_sk#131]
+(128) Project [codegen id : 2]
+Output [2]: [ss_quantity#127 AS quantity#131, ss_list_price#128 AS list_price#132]
+Input [4]: [ss_quantity#127, ss_list_price#128, ss_sold_date_sk#129, d_date_sk#130]
 
-(132) Scan parquet default.catalog_sales
-Output [3]: [cs_quantity#134, cs_list_price#135, cs_sold_date_sk#136]
+(129) Scan parquet default.catalog_sales
+Output [3]: [cs_quantity#133, cs_list_price#134, cs_sold_date_sk#135]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#136), dynamicpruningexpression(cs_sold_date_sk#136 IN dynamicpruning#13)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#135), dynamicpruningexpression(cs_sold_date_sk#135 IN dynamicpruning#13)]
 ReadSchema: struct<cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(133) ColumnarToRow [codegen id : 4]
-Input [3]: [cs_quantity#134, cs_list_price#135, cs_sold_date_sk#136]
+(130) ColumnarToRow [codegen id : 4]
+Input [3]: [cs_quantity#133, cs_list_price#134, cs_sold_date_sk#135]
 
-(134) ReusedExchange [Reuses operator id: 155]
-Output [1]: [d_date_sk#137]
+(131) ReusedExchange [Reuses operator id: 152]
+Output [1]: [d_date_sk#136]
 
-(135) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [cs_sold_date_sk#136]
-Right keys [1]: [d_date_sk#137]
+(132) BroadcastHashJoin [codegen id : 4]
+Left keys [1]: [cs_sold_date_sk#135]
+Right keys [1]: [d_date_sk#136]
 Join condition: None
 
-(136) Project [codegen id : 4]
-Output [2]: [cs_quantity#134 AS quantity#138, cs_list_price#135 AS list_price#139]
-Input [4]: [cs_quantity#134, cs_list_price#135, cs_sold_date_sk#136, d_date_sk#137]
+(133) Project [codegen id : 4]
+Output [2]: [cs_quantity#133 AS quantity#137, cs_list_price#134 AS list_price#138]
+Input [4]: [cs_quantity#133, cs_list_price#134, cs_sold_date_sk#135, d_date_sk#136]
 
-(137) Scan parquet default.web_sales
-Output [3]: [ws_quantity#140, ws_list_price#141, ws_sold_date_sk#142]
+(134) Scan parquet default.web_sales
+Output [3]: [ws_quantity#139, ws_list_price#140, ws_sold_date_sk#141]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#142), dynamicpruningexpression(ws_sold_date_sk#142 IN dynamicpruning#13)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#141), dynamicpruningexpression(ws_sold_date_sk#141 IN dynamicpruning#13)]
 ReadSchema: struct<ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(138) ColumnarToRow [codegen id : 6]
-Input [3]: [ws_quantity#140, ws_list_price#141, ws_sold_date_sk#142]
+(135) ColumnarToRow [codegen id : 6]
+Input [3]: [ws_quantity#139, ws_list_price#140, ws_sold_date_sk#141]
 
-(139) ReusedExchange [Reuses operator id: 155]
-Output [1]: [d_date_sk#143]
+(136) ReusedExchange [Reuses operator id: 152]
+Output [1]: [d_date_sk#142]
 
-(140) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [ws_sold_date_sk#142]
-Right keys [1]: [d_date_sk#143]
+(137) BroadcastHashJoin [codegen id : 6]
+Left keys [1]: [ws_sold_date_sk#141]
+Right keys [1]: [d_date_sk#142]
 Join condition: None
 
-(141) Project [codegen id : 6]
-Output [2]: [ws_quantity#140 AS quantity#144, ws_list_price#141 AS list_price#145]
-Input [4]: [ws_quantity#140, ws_list_price#141, ws_sold_date_sk#142, d_date_sk#143]
+(138) Project [codegen id : 6]
+Output [2]: [ws_quantity#139 AS quantity#143, ws_list_price#140 AS list_price#144]
+Input [4]: [ws_quantity#139, ws_list_price#140, ws_sold_date_sk#141, d_date_sk#142]
 
-(142) Union
+(139) Union
 
-(143) HashAggregate [codegen id : 7]
-Input [2]: [quantity#132, list_price#133]
+(140) HashAggregate [codegen id : 7]
+Input [2]: [quantity#131, list_price#132]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#132 as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [2]: [sum#146, count#147]
-Results [2]: [sum#148, count#149]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#131 as decimal(12,2))) * promote_precision(cast(list_price#132 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [2]: [sum#145, count#146]
+Results [2]: [sum#147, count#148]
 
-(144) Exchange
-Input [2]: [sum#148, count#149]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#150]
+(141) Exchange
+Input [2]: [sum#147, count#148]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#149]
 
-(145) HashAggregate [codegen id : 8]
-Input [2]: [sum#148, count#149]
+(142) HashAggregate [codegen id : 8]
+Input [2]: [sum#147, count#148]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#132 as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#132 as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2)))#151]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#132 as decimal(12,2))) * promote_precision(cast(list_price#133 as decimal(12,2)))), DecimalType(18,2)))#151 AS average_sales#152]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#131 as decimal(12,2))) * promote_precision(cast(list_price#132 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#131 as decimal(12,2))) * promote_precision(cast(list_price#132 as decimal(12,2)))), DecimalType(18,2)))#150]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#131 as decimal(12,2))) * promote_precision(cast(list_price#132 as decimal(12,2)))), DecimalType(18,2)))#150 AS average_sales#151]
 
-Subquery:2 Hosting operator id = 127 Hosting Expression = ss_sold_date_sk#130 IN dynamicpruning#13
+Subquery:2 Hosting operator id = 124 Hosting Expression = ss_sold_date_sk#129 IN dynamicpruning#13
 
-Subquery:3 Hosting operator id = 132 Hosting Expression = cs_sold_date_sk#136 IN dynamicpruning#13
+Subquery:3 Hosting operator id = 129 Hosting Expression = cs_sold_date_sk#135 IN dynamicpruning#13
 
-Subquery:4 Hosting operator id = 137 Hosting Expression = ws_sold_date_sk#142 IN dynamicpruning#13
+Subquery:4 Hosting operator id = 134 Hosting Expression = ws_sold_date_sk#141 IN dynamicpruning#13
 
 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (150)
-+- * Project (149)
-   +- * Filter (148)
-      +- * ColumnarToRow (147)
-         +- Scan parquet default.date_dim (146)
+BroadcastExchange (147)
++- * Project (146)
+   +- * Filter (145)
+      +- * ColumnarToRow (144)
+         +- Scan parquet default.date_dim (143)
 
 
-(146) Scan parquet default.date_dim
-Output [3]: [d_date_sk#47, d_year#153, d_moy#154]
+(143) Scan parquet default.date_dim
+Output [3]: [d_date_sk#46, d_year#152, d_moy#153]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(147) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#47, d_year#153, d_moy#154]
+(144) ColumnarToRow [codegen id : 1]
+Input [3]: [d_date_sk#46, d_year#152, d_moy#153]
 
-(148) Filter [codegen id : 1]
-Input [3]: [d_date_sk#47, d_year#153, d_moy#154]
-Condition : ((((isnotnull(d_year#153) AND isnotnull(d_moy#154)) AND (d_year#153 = 2001)) AND (d_moy#154 = 11)) AND isnotnull(d_date_sk#47))
+(145) Filter [codegen id : 1]
+Input [3]: [d_date_sk#46, d_year#152, d_moy#153]
+Condition : ((((isnotnull(d_year#152) AND isnotnull(d_moy#153)) AND (d_year#152 = 2001)) AND (d_moy#153 = 11)) AND isnotnull(d_date_sk#46))
 
-(149) Project [codegen id : 1]
-Output [1]: [d_date_sk#47]
-Input [3]: [d_date_sk#47, d_year#153, d_moy#154]
+(146) Project [codegen id : 1]
+Output [1]: [d_date_sk#46]
+Input [3]: [d_date_sk#46, d_year#152, d_moy#153]
 
-(150) BroadcastExchange
-Input [1]: [d_date_sk#47]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#155]
+(147) BroadcastExchange
+Input [1]: [d_date_sk#46]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#154]
 
 Subquery:6 Hosting operator id = 9 Hosting Expression = ss_sold_date_sk#12 IN dynamicpruning#13
-BroadcastExchange (155)
-+- * Project (154)
-   +- * Filter (153)
-      +- * ColumnarToRow (152)
-         +- Scan parquet default.date_dim (151)
+BroadcastExchange (152)
++- * Project (151)
+   +- * Filter (150)
+      +- * ColumnarToRow (149)
+         +- Scan parquet default.date_dim (148)
 
 
-(151) Scan parquet default.date_dim
-Output [2]: [d_date_sk#14, d_year#156]
+(148) Scan parquet default.date_dim
+Output [2]: [d_date_sk#14, d_year#155]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(152) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#14, d_year#156]
+(149) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#14, d_year#155]
 
-(153) Filter [codegen id : 1]
-Input [2]: [d_date_sk#14, d_year#156]
-Condition : (((isnotnull(d_year#156) AND (d_year#156 >= 1999)) AND (d_year#156 <= 2001)) AND isnotnull(d_date_sk#14))
+(150) Filter [codegen id : 1]
+Input [2]: [d_date_sk#14, d_year#155]
+Condition : (((isnotnull(d_year#155) AND (d_year#155 >= 1999)) AND (d_year#155 <= 2001)) AND isnotnull(d_date_sk#14))
 
-(154) Project [codegen id : 1]
+(151) Project [codegen id : 1]
 Output [1]: [d_date_sk#14]
-Input [2]: [d_date_sk#14, d_year#156]
+Input [2]: [d_date_sk#14, d_year#155]
 
-(155) BroadcastExchange
+(152) BroadcastExchange
 Input [1]: [d_date_sk#14]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#157]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#156]
 
 Subquery:7 Hosting operator id = 20 Hosting Expression = cs_sold_date_sk#21 IN dynamicpruning#13
 
 Subquery:8 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#36 IN dynamicpruning#13
 
-Subquery:9 Hosting operator id = 100 Hosting Expression = ReusedSubquery Subquery scalar-subquery#65, [id=#66]
+Subquery:9 Hosting operator id = 97 Hosting Expression = ReusedSubquery Subquery scalar-subquery#64, [id=#65]
 
-Subquery:10 Hosting operator id = 83 Hosting Expression = cs_sold_date_sk#71 IN dynamicpruning#5
+Subquery:10 Hosting operator id = 80 Hosting Expression = cs_sold_date_sk#70 IN dynamicpruning#5
 
-Subquery:11 Hosting operator id = 119 Hosting Expression = ReusedSubquery Subquery scalar-subquery#65, [id=#66]
+Subquery:11 Hosting operator id = 116 Hosting Expression = ReusedSubquery Subquery scalar-subquery#64, [id=#65]
 
-Subquery:12 Hosting operator id = 102 Hosting Expression = ws_sold_date_sk#93 IN dynamicpruning#5
+Subquery:12 Hosting operator id = 99 Hosting Expression = ws_sold_date_sk#92 IN dynamicpruning#5
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt
index 5984e5165f78d..f445a370581af 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt
@@ -1,21 +1,21 @@
 TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),sum(number_sales)]
-  WholeStageCodegen (140)
+  WholeStageCodegen (134)
     HashAggregate [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum(sales),sum(number_sales),sum,isEmpty,sum]
       InputAdapter
         Exchange [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id] #1
-          WholeStageCodegen (139)
+          WholeStageCodegen (133)
             HashAggregate [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
               Expand [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id]
                 InputAdapter
                   Union
-                    WholeStageCodegen (46)
+                    WholeStageCodegen (44)
                       Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sales]
                           Subquery #3
                             WholeStageCodegen (8)
                               HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
                                 InputAdapter
-                                  Exchange #18
+                                  Exchange #17
                                     WholeStageCodegen (7)
                                       HashAggregate [quantity,list_price] [sum,count,sum,count]
                                         InputAdapter
@@ -28,7 +28,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                       Scan parquet default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
                                                         ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
-                                                    ReusedExchange [d_date_sk] #10
+                                                    ReusedExchange [d_date_sk] #9
                                             WholeStageCodegen (4)
                                               Project [cs_quantity,cs_list_price]
                                                 BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
@@ -37,7 +37,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                       Scan parquet default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
                                                         ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
-                                                    ReusedExchange [d_date_sk] #10
+                                                    ReusedExchange [d_date_sk] #9
                                             WholeStageCodegen (6)
                                               Project [ws_quantity,ws_list_price]
                                                 BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
@@ -46,11 +46,11 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                       Scan parquet default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
                                                         ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
-                                                    ReusedExchange [d_date_sk] #10
+                                                    ReusedExchange [d_date_sk] #9
                           HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #2
-                                WholeStageCodegen (45)
+                                WholeStageCodegen (43)
                                   HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                                     Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id]
                                       BroadcastHashJoin [ss_item_sk,i_item_sk]
@@ -76,11 +76,11 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                                               InputAdapter
                                                                                 Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
                                               InputAdapter
-                                                WholeStageCodegen (21)
+                                                WholeStageCodegen (20)
                                                   Sort [ss_item_sk]
                                                     InputAdapter
                                                       Exchange [ss_item_sk] #5
-                                                        WholeStageCodegen (20)
+                                                        WholeStageCodegen (19)
                                                           Project [i_item_sk]
                                                             BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id]
                                                               Filter [i_brand_id,i_class_id,i_category_id]
@@ -89,128 +89,123 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                                     Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                               InputAdapter
                                                                 BroadcastExchange #6
-                                                                  WholeStageCodegen (19)
-                                                                    HashAggregate [brand_id,class_id,category_id]
+                                                                  WholeStageCodegen (18)
+                                                                    SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
                                                                       InputAdapter
-                                                                        Exchange [brand_id,class_id,category_id] #7
-                                                                          WholeStageCodegen (18)
-                                                                            HashAggregate [brand_id,class_id,category_id]
-                                                                              SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
-                                                                                InputAdapter
-                                                                                  WholeStageCodegen (13)
-                                                                                    Sort [brand_id,class_id,category_id]
-                                                                                      InputAdapter
-                                                                                        Exchange [brand_id,class_id,category_id] #8
-                                                                                          WholeStageCodegen (12)
-                                                                                            HashAggregate [brand_id,class_id,category_id]
-                                                                                              InputAdapter
-                                                                                                Exchange [brand_id,class_id,category_id] #9
-                                                                                                  WholeStageCodegen (11)
-                                                                                                    HashAggregate [brand_id,class_id,category_id]
-                                                                                                      Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                        BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                                                          Project [ss_item_sk]
-                                                                                                            BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                                              Filter [ss_item_sk]
-                                                                                                                ColumnarToRow
-                                                                                                                  InputAdapter
-                                                                                                                    Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
-                                                                                                                      SubqueryBroadcast [d_date_sk] #2
-                                                                                                                        BroadcastExchange #10
-                                                                                                                          WholeStageCodegen (1)
-                                                                                                                            Project [d_date_sk]
-                                                                                                                              Filter [d_year,d_date_sk]
-                                                                                                                                ColumnarToRow
-                                                                                                                                  InputAdapter
-                                                                                                                                    Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                        WholeStageCodegen (13)
+                                                                          Sort [brand_id,class_id,category_id]
+                                                                            InputAdapter
+                                                                              Exchange [brand_id,class_id,category_id] #7
+                                                                                WholeStageCodegen (12)
+                                                                                  HashAggregate [brand_id,class_id,category_id]
+                                                                                    InputAdapter
+                                                                                      Exchange [brand_id,class_id,category_id] #8
+                                                                                        WholeStageCodegen (11)
+                                                                                          HashAggregate [brand_id,class_id,category_id]
+                                                                                            Project [i_brand_id,i_class_id,i_category_id]
+                                                                                              BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                                                Project [ss_item_sk]
+                                                                                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                                                    Filter [ss_item_sk]
+                                                                                                      ColumnarToRow
+                                                                                                        InputAdapter
+                                                                                                          Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                                                            SubqueryBroadcast [d_date_sk] #2
+                                                                                                              BroadcastExchange #9
+                                                                                                                WholeStageCodegen (1)
+                                                                                                                  Project [d_date_sk]
+                                                                                                                    Filter [d_year,d_date_sk]
+                                                                                                                      ColumnarToRow
+                                                                                                                        InputAdapter
+                                                                                                                          Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                                    InputAdapter
+                                                                                                      ReusedExchange [d_date_sk] #9
+                                                                                                InputAdapter
+                                                                                                  BroadcastExchange #10
+                                                                                                    WholeStageCodegen (10)
+                                                                                                      SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
+                                                                                                        InputAdapter
+                                                                                                          WholeStageCodegen (5)
+                                                                                                            Sort [i_brand_id,i_class_id,i_category_id]
                                                                                                               InputAdapter
-                                                                                                                ReusedExchange [d_date_sk] #10
-                                                                                                          InputAdapter
-                                                                                                            BroadcastExchange #11
-                                                                                                              WholeStageCodegen (10)
-                                                                                                                SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
-                                                                                                                  InputAdapter
-                                                                                                                    WholeStageCodegen (5)
-                                                                                                                      Sort [i_brand_id,i_class_id,i_category_id]
+                                                                                                                Exchange [i_brand_id,i_class_id,i_category_id] #11
+                                                                                                                  WholeStageCodegen (4)
+                                                                                                                    Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                                      ColumnarToRow
                                                                                                                         InputAdapter
-                                                                                                                          Exchange [i_brand_id,i_class_id,i_category_id] #12
-                                                                                                                            WholeStageCodegen (4)
-                                                                                                                              Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                                          Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                        InputAdapter
+                                                                                                          WholeStageCodegen (9)
+                                                                                                            Sort [i_brand_id,i_class_id,i_category_id]
+                                                                                                              InputAdapter
+                                                                                                                Exchange [i_brand_id,i_class_id,i_category_id] #12
+                                                                                                                  WholeStageCodegen (8)
+                                                                                                                    Project [i_brand_id,i_class_id,i_category_id]
+                                                                                                                      BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                                                                        Project [cs_item_sk]
+                                                                                                                          BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                                                                            Filter [cs_item_sk]
+                                                                                                                              ColumnarToRow
+                                                                                                                                InputAdapter
+                                                                                                                                  Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
+                                                                                                                                    ReusedSubquery [d_date_sk] #2
+                                                                                                                            InputAdapter
+                                                                                                                              ReusedExchange [d_date_sk] #9
+                                                                                                                        InputAdapter
+                                                                                                                          BroadcastExchange #13
+                                                                                                                            WholeStageCodegen (7)
+                                                                                                                              Filter [i_item_sk]
                                                                                                                                 ColumnarToRow
                                                                                                                                   InputAdapter
                                                                                                                                     Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                                  InputAdapter
-                                                                                                                    WholeStageCodegen (9)
-                                                                                                                      Sort [i_brand_id,i_class_id,i_category_id]
-                                                                                                                        InputAdapter
-                                                                                                                          Exchange [i_brand_id,i_class_id,i_category_id] #13
-                                                                                                                            WholeStageCodegen (8)
-                                                                                                                              Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                                                BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                                                                  Project [cs_item_sk]
-                                                                                                                                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                                                                      Filter [cs_item_sk]
-                                                                                                                                        ColumnarToRow
-                                                                                                                                          InputAdapter
-                                                                                                                                            Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
-                                                                                                                                              ReusedSubquery [d_date_sk] #2
-                                                                                                                                      InputAdapter
-                                                                                                                                        ReusedExchange [d_date_sk] #10
-                                                                                                                                  InputAdapter
-                                                                                                                                    BroadcastExchange #14
-                                                                                                                                      WholeStageCodegen (7)
-                                                                                                                                        Filter [i_item_sk]
-                                                                                                                                          ColumnarToRow
-                                                                                                                                            InputAdapter
-                                                                                                                                              Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                InputAdapter
-                                                                                  WholeStageCodegen (17)
-                                                                                    Sort [i_brand_id,i_class_id,i_category_id]
+                                                                      InputAdapter
+                                                                        WholeStageCodegen (17)
+                                                                          Sort [i_brand_id,i_class_id,i_category_id]
+                                                                            InputAdapter
+                                                                              Exchange [i_brand_id,i_class_id,i_category_id] #14
+                                                                                WholeStageCodegen (16)
+                                                                                  Project [i_brand_id,i_class_id,i_category_id]
+                                                                                    BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                                                      Project [ws_item_sk]
+                                                                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                                          Filter [ws_item_sk]
+                                                                                            ColumnarToRow
+                                                                                              InputAdapter
+                                                                                                Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
+                                                                                                  ReusedSubquery [d_date_sk] #2
+                                                                                          InputAdapter
+                                                                                            ReusedExchange [d_date_sk] #9
                                                                                       InputAdapter
-                                                                                        Exchange [i_brand_id,i_class_id,i_category_id] #15
-                                                                                          WholeStageCodegen (16)
-                                                                                            Project [i_brand_id,i_class_id,i_category_id]
-                                                                                              BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                                                Project [ws_item_sk]
-                                                                                                  BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                                                    Filter [ws_item_sk]
-                                                                                                      ColumnarToRow
-                                                                                                        InputAdapter
-                                                                                                          Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
-                                                                                                            ReusedSubquery [d_date_sk] #2
-                                                                                                    InputAdapter
-                                                                                                      ReusedExchange [d_date_sk] #10
-                                                                                                InputAdapter
-                                                                                                  ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #14
+                                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #13
                                             InputAdapter
                                               ReusedExchange [d_date_sk] #4
                                         InputAdapter
-                                          BroadcastExchange #16
-                                            WholeStageCodegen (44)
+                                          BroadcastExchange #15
+                                            WholeStageCodegen (42)
                                               SortMergeJoin [i_item_sk,ss_item_sk]
                                                 InputAdapter
-                                                  WholeStageCodegen (24)
+                                                  WholeStageCodegen (23)
                                                     Sort [i_item_sk]
                                                       InputAdapter
-                                                        Exchange [i_item_sk] #17
-                                                          WholeStageCodegen (23)
+                                                        Exchange [i_item_sk] #16
+                                                          WholeStageCodegen (22)
                                                             Filter [i_item_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
                                                                   Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                 InputAdapter
-                                                  WholeStageCodegen (43)
+                                                  WholeStageCodegen (41)
                                                     Sort [ss_item_sk]
                                                       InputAdapter
                                                         ReusedExchange [ss_item_sk] #5
-                    WholeStageCodegen (92)
+                    WholeStageCodegen (88)
                       Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sales]
                           ReusedSubquery [average_sales] #3
                           HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
-                              Exchange [i_brand_id,i_class_id,i_category_id] #19
-                                WholeStageCodegen (91)
+                              Exchange [i_brand_id,i_class_id,i_category_id] #18
+                                WholeStageCodegen (87)
                                   HashAggregate [i_brand_id,i_class_id,i_category_id,cs_quantity,cs_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                                     Project [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id]
                                       BroadcastHashJoin [cs_item_sk,i_item_sk]
@@ -218,33 +213,33 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                           BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                             SortMergeJoin [cs_item_sk,ss_item_sk]
                                               InputAdapter
-                                                WholeStageCodegen (48)
+                                                WholeStageCodegen (46)
                                                   Sort [cs_item_sk]
                                                     InputAdapter
-                                                      Exchange [cs_item_sk] #20
-                                                        WholeStageCodegen (47)
+                                                      Exchange [cs_item_sk] #19
+                                                        WholeStageCodegen (45)
                                                           Filter [cs_item_sk]
                                                             ColumnarToRow
                                                               InputAdapter
                                                                 Scan parquet default.catalog_sales [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk]
                                                                   ReusedSubquery [d_date_sk] #1
                                               InputAdapter
-                                                WholeStageCodegen (67)
+                                                WholeStageCodegen (64)
                                                   Sort [ss_item_sk]
                                                     InputAdapter
                                                       ReusedExchange [ss_item_sk] #5
                                             InputAdapter
                                               ReusedExchange [d_date_sk] #4
                                         InputAdapter
-                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #16
-                    WholeStageCodegen (138)
+                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #15
+                    WholeStageCodegen (132)
                       Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sales]
                           ReusedSubquery [average_sales] #3
                           HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
-                              Exchange [i_brand_id,i_class_id,i_category_id] #21
-                                WholeStageCodegen (137)
+                              Exchange [i_brand_id,i_class_id,i_category_id] #20
+                                WholeStageCodegen (131)
                                   HashAggregate [i_brand_id,i_class_id,i_category_id,ws_quantity,ws_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                                     Project [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id]
                                       BroadcastHashJoin [ws_item_sk,i_item_sk]
@@ -252,22 +247,22 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                           BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                             SortMergeJoin [ws_item_sk,ss_item_sk]
                                               InputAdapter
-                                                WholeStageCodegen (94)
+                                                WholeStageCodegen (90)
                                                   Sort [ws_item_sk]
                                                     InputAdapter
-                                                      Exchange [ws_item_sk] #22
-                                                        WholeStageCodegen (93)
+                                                      Exchange [ws_item_sk] #21
+                                                        WholeStageCodegen (89)
                                                           Filter [ws_item_sk]
                                                             ColumnarToRow
                                                               InputAdapter
                                                                 Scan parquet default.web_sales [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk]
                                                                   ReusedSubquery [d_date_sk] #1
                                               InputAdapter
-                                                WholeStageCodegen (113)
+                                                WholeStageCodegen (108)
                                                   Sort [ss_item_sk]
                                                     InputAdapter
                                                       ReusedExchange [ss_item_sk] #5
                                             InputAdapter
                                               ReusedExchange [d_date_sk] #4
                                         InputAdapter
-                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #16
+                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #15
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt
index b263d0f642e45..300cfd7ccbb21 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt
@@ -1,111 +1,109 @@
 == Physical Plan ==
-TakeOrderedAndProject (107)
-+- * HashAggregate (106)
-   +- Exchange (105)
-      +- * HashAggregate (104)
-         +- * Expand (103)
-            +- Union (102)
-               :- * Project (69)
-               :  +- * Filter (68)
-               :     +- * HashAggregate (67)
-               :        +- Exchange (66)
-               :           +- * HashAggregate (65)
-               :              +- * Project (64)
-               :                 +- * BroadcastHashJoin Inner BuildRight (63)
-               :                    :- * Project (61)
-               :                    :  +- * BroadcastHashJoin Inner BuildRight (60)
-               :                    :     :- * BroadcastHashJoin LeftSemi BuildRight (53)
+TakeOrderedAndProject (105)
++- * HashAggregate (104)
+   +- Exchange (103)
+      +- * HashAggregate (102)
+         +- * Expand (101)
+            +- Union (100)
+               :- * Project (67)
+               :  +- * Filter (66)
+               :     +- * HashAggregate (65)
+               :        +- Exchange (64)
+               :           +- * HashAggregate (63)
+               :              +- * Project (62)
+               :                 +- * BroadcastHashJoin Inner BuildRight (61)
+               :                    :- * Project (59)
+               :                    :  +- * BroadcastHashJoin Inner BuildRight (58)
+               :                    :     :- * BroadcastHashJoin LeftSemi BuildRight (51)
                :                    :     :  :- * Filter (3)
                :                    :     :  :  +- * ColumnarToRow (2)
                :                    :     :  :     +- Scan parquet default.store_sales (1)
-               :                    :     :  +- BroadcastExchange (52)
-               :                    :     :     +- * Project (51)
-               :                    :     :        +- * BroadcastHashJoin Inner BuildRight (50)
+               :                    :     :  +- BroadcastExchange (50)
+               :                    :     :     +- * Project (49)
+               :                    :     :        +- * BroadcastHashJoin Inner BuildRight (48)
                :                    :     :           :- * Filter (6)
                :                    :     :           :  +- * ColumnarToRow (5)
                :                    :     :           :     +- Scan parquet default.item (4)
-               :                    :     :           +- BroadcastExchange (49)
-               :                    :     :              +- * HashAggregate (48)
-               :                    :     :                 +- * HashAggregate (47)
-               :                    :     :                    +- * BroadcastHashJoin LeftSemi BuildRight (46)
-               :                    :     :                       :- * HashAggregate (35)
-               :                    :     :                       :  +- Exchange (34)
-               :                    :     :                       :     +- * HashAggregate (33)
-               :                    :     :                       :        +- * Project (32)
-               :                    :     :                       :           +- * BroadcastHashJoin Inner BuildRight (31)
-               :                    :     :                       :              :- * Project (29)
-               :                    :     :                       :              :  +- * BroadcastHashJoin Inner BuildRight (28)
-               :                    :     :                       :              :     :- * Filter (9)
-               :                    :     :                       :              :     :  +- * ColumnarToRow (8)
-               :                    :     :                       :              :     :     +- Scan parquet default.store_sales (7)
-               :                    :     :                       :              :     +- BroadcastExchange (27)
-               :                    :     :                       :              :        +- * BroadcastHashJoin LeftSemi BuildRight (26)
-               :                    :     :                       :              :           :- * Filter (12)
-               :                    :     :                       :              :           :  +- * ColumnarToRow (11)
-               :                    :     :                       :              :           :     +- Scan parquet default.item (10)
-               :                    :     :                       :              :           +- BroadcastExchange (25)
-               :                    :     :                       :              :              +- * Project (24)
-               :                    :     :                       :              :                 +- * BroadcastHashJoin Inner BuildRight (23)
-               :                    :     :                       :              :                    :- * Project (21)
-               :                    :     :                       :              :                    :  +- * BroadcastHashJoin Inner BuildRight (20)
-               :                    :     :                       :              :                    :     :- * Filter (15)
-               :                    :     :                       :              :                    :     :  +- * ColumnarToRow (14)
-               :                    :     :                       :              :                    :     :     +- Scan parquet default.catalog_sales (13)
-               :                    :     :                       :              :                    :     +- BroadcastExchange (19)
-               :                    :     :                       :              :                    :        +- * Filter (18)
-               :                    :     :                       :              :                    :           +- * ColumnarToRow (17)
-               :                    :     :                       :              :                    :              +- Scan parquet default.item (16)
-               :                    :     :                       :              :                    +- ReusedExchange (22)
-               :                    :     :                       :              +- ReusedExchange (30)
-               :                    :     :                       +- BroadcastExchange (45)
-               :                    :     :                          +- * Project (44)
-               :                    :     :                             +- * BroadcastHashJoin Inner BuildRight (43)
-               :                    :     :                                :- * Project (41)
-               :                    :     :                                :  +- * BroadcastHashJoin Inner BuildRight (40)
-               :                    :     :                                :     :- * Filter (38)
-               :                    :     :                                :     :  +- * ColumnarToRow (37)
-               :                    :     :                                :     :     +- Scan parquet default.web_sales (36)
-               :                    :     :                                :     +- ReusedExchange (39)
-               :                    :     :                                +- ReusedExchange (42)
-               :                    :     +- BroadcastExchange (59)
-               :                    :        +- * BroadcastHashJoin LeftSemi BuildRight (58)
-               :                    :           :- * Filter (56)
-               :                    :           :  +- * ColumnarToRow (55)
-               :                    :           :     +- Scan parquet default.item (54)
-               :                    :           +- ReusedExchange (57)
-               :                    +- ReusedExchange (62)
-               :- * Project (85)
-               :  +- * Filter (84)
-               :     +- * HashAggregate (83)
-               :        +- Exchange (82)
-               :           +- * HashAggregate (81)
-               :              +- * Project (80)
-               :                 +- * BroadcastHashJoin Inner BuildRight (79)
-               :                    :- * Project (77)
-               :                    :  +- * BroadcastHashJoin Inner BuildRight (76)
-               :                    :     :- * BroadcastHashJoin LeftSemi BuildRight (74)
-               :                    :     :  :- * Filter (72)
-               :                    :     :  :  +- * ColumnarToRow (71)
-               :                    :     :  :     +- Scan parquet default.catalog_sales (70)
-               :                    :     :  +- ReusedExchange (73)
-               :                    :     +- ReusedExchange (75)
-               :                    +- ReusedExchange (78)
-               +- * Project (101)
-                  +- * Filter (100)
-                     +- * HashAggregate (99)
-                        +- Exchange (98)
-                           +- * HashAggregate (97)
-                              +- * Project (96)
-                                 +- * BroadcastHashJoin Inner BuildRight (95)
-                                    :- * Project (93)
-                                    :  +- * BroadcastHashJoin Inner BuildRight (92)
-                                    :     :- * BroadcastHashJoin LeftSemi BuildRight (90)
-                                    :     :  :- * Filter (88)
-                                    :     :  :  +- * ColumnarToRow (87)
-                                    :     :  :     +- Scan parquet default.web_sales (86)
-                                    :     :  +- ReusedExchange (89)
-                                    :     +- ReusedExchange (91)
-                                    +- ReusedExchange (94)
+               :                    :     :           +- BroadcastExchange (47)
+               :                    :     :              +- * BroadcastHashJoin LeftSemi BuildRight (46)
+               :                    :     :                 :- * HashAggregate (35)
+               :                    :     :                 :  +- Exchange (34)
+               :                    :     :                 :     +- * HashAggregate (33)
+               :                    :     :                 :        +- * Project (32)
+               :                    :     :                 :           +- * BroadcastHashJoin Inner BuildRight (31)
+               :                    :     :                 :              :- * Project (29)
+               :                    :     :                 :              :  +- * BroadcastHashJoin Inner BuildRight (28)
+               :                    :     :                 :              :     :- * Filter (9)
+               :                    :     :                 :              :     :  +- * ColumnarToRow (8)
+               :                    :     :                 :              :     :     +- Scan parquet default.store_sales (7)
+               :                    :     :                 :              :     +- BroadcastExchange (27)
+               :                    :     :                 :              :        +- * BroadcastHashJoin LeftSemi BuildRight (26)
+               :                    :     :                 :              :           :- * Filter (12)
+               :                    :     :                 :              :           :  +- * ColumnarToRow (11)
+               :                    :     :                 :              :           :     +- Scan parquet default.item (10)
+               :                    :     :                 :              :           +- BroadcastExchange (25)
+               :                    :     :                 :              :              +- * Project (24)
+               :                    :     :                 :              :                 +- * BroadcastHashJoin Inner BuildRight (23)
+               :                    :     :                 :              :                    :- * Project (21)
+               :                    :     :                 :              :                    :  +- * BroadcastHashJoin Inner BuildRight (20)
+               :                    :     :                 :              :                    :     :- * Filter (15)
+               :                    :     :                 :              :                    :     :  +- * ColumnarToRow (14)
+               :                    :     :                 :              :                    :     :     +- Scan parquet default.catalog_sales (13)
+               :                    :     :                 :              :                    :     +- BroadcastExchange (19)
+               :                    :     :                 :              :                    :        +- * Filter (18)
+               :                    :     :                 :              :                    :           +- * ColumnarToRow (17)
+               :                    :     :                 :              :                    :              +- Scan parquet default.item (16)
+               :                    :     :                 :              :                    +- ReusedExchange (22)
+               :                    :     :                 :              +- ReusedExchange (30)
+               :                    :     :                 +- BroadcastExchange (45)
+               :                    :     :                    +- * Project (44)
+               :                    :     :                       +- * BroadcastHashJoin Inner BuildRight (43)
+               :                    :     :                          :- * Project (41)
+               :                    :     :                          :  +- * BroadcastHashJoin Inner BuildRight (40)
+               :                    :     :                          :     :- * Filter (38)
+               :                    :     :                          :     :  +- * ColumnarToRow (37)
+               :                    :     :                          :     :     +- Scan parquet default.web_sales (36)
+               :                    :     :                          :     +- ReusedExchange (39)
+               :                    :     :                          +- ReusedExchange (42)
+               :                    :     +- BroadcastExchange (57)
+               :                    :        +- * BroadcastHashJoin LeftSemi BuildRight (56)
+               :                    :           :- * Filter (54)
+               :                    :           :  +- * ColumnarToRow (53)
+               :                    :           :     +- Scan parquet default.item (52)
+               :                    :           +- ReusedExchange (55)
+               :                    +- ReusedExchange (60)
+               :- * Project (83)
+               :  +- * Filter (82)
+               :     +- * HashAggregate (81)
+               :        +- Exchange (80)
+               :           +- * HashAggregate (79)
+               :              +- * Project (78)
+               :                 +- * BroadcastHashJoin Inner BuildRight (77)
+               :                    :- * Project (75)
+               :                    :  +- * BroadcastHashJoin Inner BuildRight (74)
+               :                    :     :- * BroadcastHashJoin LeftSemi BuildRight (72)
+               :                    :     :  :- * Filter (70)
+               :                    :     :  :  +- * ColumnarToRow (69)
+               :                    :     :  :     +- Scan parquet default.catalog_sales (68)
+               :                    :     :  +- ReusedExchange (71)
+               :                    :     +- ReusedExchange (73)
+               :                    +- ReusedExchange (76)
+               +- * Project (99)
+                  +- * Filter (98)
+                     +- * HashAggregate (97)
+                        +- Exchange (96)
+                           +- * HashAggregate (95)
+                              +- * Project (94)
+                                 +- * BroadcastHashJoin Inner BuildRight (93)
+                                    :- * Project (91)
+                                    :  +- * BroadcastHashJoin Inner BuildRight (90)
+                                    :     :- * BroadcastHashJoin LeftSemi BuildRight (88)
+                                    :     :  :- * Filter (86)
+                                    :     :  :  +- * ColumnarToRow (85)
+                                    :     :  :     +- Scan parquet default.web_sales (84)
+                                    :     :  +- ReusedExchange (87)
+                                    :     +- ReusedExchange (89)
+                                    +- ReusedExchange (92)
 
 
 (1) Scan parquet default.store_sales
@@ -208,7 +206,7 @@ Join condition: None
 Output [4]: [cs_sold_date_sk#18, i_brand_id#20, i_class_id#21, i_category_id#22]
 Input [6]: [cs_item_sk#17, cs_sold_date_sk#18, i_item_sk#19, i_brand_id#20, i_class_id#21, i_category_id#22]
 
-(22) ReusedExchange [Reuses operator id: 136]
+(22) ReusedExchange [Reuses operator id: 134]
 Output [1]: [d_date_sk#24]
 
 (23) BroadcastHashJoin [codegen id : 3]
@@ -242,7 +240,7 @@ Join condition: None
 Output [4]: [ss_sold_date_sk#11, i_brand_id#14, i_class_id#15, i_category_id#16]
 Input [6]: [ss_item_sk#10, ss_sold_date_sk#11, i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16]
 
-(30) ReusedExchange [Reuses operator id: 136]
+(30) ReusedExchange [Reuses operator id: 134]
 Output [1]: [d_date_sk#27]
 
 (31) BroadcastHashJoin [codegen id : 6]
@@ -299,7 +297,7 @@ Join condition: None
 Output [4]: [ws_sold_date_sk#33, i_brand_id#35, i_class_id#36, i_category_id#37]
 Input [6]: [ws_item_sk#32, ws_sold_date_sk#33, i_item_sk#34, i_brand_id#35, i_class_id#36, i_category_id#37]
 
-(42) ReusedExchange [Reuses operator id: 136]
+(42) ReusedExchange [Reuses operator id: 134]
 Output [1]: [d_date_sk#38]
 
 (43) BroadcastHashJoin [codegen id : 9]
@@ -320,116 +318,102 @@ Left keys [6]: [coalesce(brand_id#28, 0), isnull(brand_id#28), coalesce(class_id
 Right keys [6]: [coalesce(i_brand_id#35, 0), isnull(i_brand_id#35), coalesce(i_class_id#36, 0), isnull(i_class_id#36), coalesce(i_category_id#37, 0), isnull(i_category_id#37)]
 Join condition: None
 
-(47) HashAggregate [codegen id : 10]
-Input [3]: [brand_id#28, class_id#29, category_id#30]
-Keys [3]: [brand_id#28, class_id#29, category_id#30]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [brand_id#28, class_id#29, category_id#30]
-
-(48) HashAggregate [codegen id : 10]
-Input [3]: [brand_id#28, class_id#29, category_id#30]
-Keys [3]: [brand_id#28, class_id#29, category_id#30]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [brand_id#28, class_id#29, category_id#30]
-
-(49) BroadcastExchange
+(47) BroadcastExchange
 Input [3]: [brand_id#28, class_id#29, category_id#30]
 Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#40]
 
-(50) BroadcastHashJoin [codegen id : 11]
+(48) BroadcastHashJoin [codegen id : 11]
 Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Right keys [3]: [brand_id#28, class_id#29, category_id#30]
 Join condition: None
 
-(51) Project [codegen id : 11]
+(49) Project [codegen id : 11]
 Output [1]: [i_item_sk#6 AS ss_item_sk#41]
 Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#28, class_id#29, category_id#30]
 
-(52) BroadcastExchange
+(50) BroadcastExchange
 Input [1]: [ss_item_sk#41]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#42]
 
-(53) BroadcastHashJoin [codegen id : 25]
+(51) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [ss_item_sk#41]
 Join condition: None
 
-(54) Scan parquet default.item
+(52) Scan parquet default.item
 Output [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(55) ColumnarToRow [codegen id : 23]
+(53) ColumnarToRow [codegen id : 23]
 Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 
-(56) Filter [codegen id : 23]
+(54) Filter [codegen id : 23]
 Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 Condition : isnotnull(i_item_sk#43)
 
-(57) ReusedExchange [Reuses operator id: 52]
+(55) ReusedExchange [Reuses operator id: 50]
 Output [1]: [ss_item_sk#41]
 
-(58) BroadcastHashJoin [codegen id : 23]
+(56) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [i_item_sk#43]
 Right keys [1]: [ss_item_sk#41]
 Join condition: None
 
-(59) BroadcastExchange
+(57) BroadcastExchange
 Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#47]
 
-(60) BroadcastHashJoin [codegen id : 25]
+(58) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#43]
 Join condition: None
 
-(61) Project [codegen id : 25]
+(59) Project [codegen id : 25]
 Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_class_id#45, i_category_id#46]
 Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 
-(62) ReusedExchange [Reuses operator id: 131]
+(60) ReusedExchange [Reuses operator id: 129]
 Output [1]: [d_date_sk#48]
 
-(63) BroadcastHashJoin [codegen id : 25]
+(61) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#48]
 Join condition: None
 
-(64) Project [codegen id : 25]
+(62) Project [codegen id : 25]
 Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46]
 Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_class_id#45, i_category_id#46, d_date_sk#48]
 
-(65) HashAggregate [codegen id : 25]
+(63) HashAggregate [codegen id : 25]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
 Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#49, isEmpty#50, count#51]
 Results [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 
-(66) Exchange
+(64) Exchange
 Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 Arguments: hashpartitioning(i_brand_id#44, i_class_id#45, i_category_id#46, 5), ENSURE_REQUIREMENTS, [id=#55]
 
-(67) HashAggregate [codegen id : 26]
+(65) HashAggregate [codegen id : 26]
 Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56, count(1)#57]
 Results [5]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56 AS sales#58, count(1)#57 AS number_sales#59]
 
-(68) Filter [codegen id : 26]
+(66) Filter [codegen id : 26]
 Input [5]: [i_brand_id#44, i_class_id#45, i_category_id#46, sales#58, number_sales#59]
 Condition : (isnotnull(sales#58) AND (cast(sales#58 as decimal(32,6)) > cast(Subquery scalar-subquery#60, [id=#61] as decimal(32,6))))
 
-(69) Project [codegen id : 26]
+(67) Project [codegen id : 26]
 Output [6]: [sales#58, number_sales#59, store AS channel#62, i_brand_id#44, i_class_id#45, i_category_id#46]
 Input [5]: [i_brand_id#44, i_class_id#45, i_category_id#46, sales#58, number_sales#59]
 
-(70) Scan parquet default.catalog_sales
+(68) Scan parquet default.catalog_sales
 Output [4]: [cs_item_sk#63, cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66]
 Batched: true
 Location: InMemoryFileIndex []
@@ -437,72 +421,72 @@ PartitionFilters: [isnotnull(cs_sold_date_sk#66), dynamicpruningexpression(cs_so
 PushedFilters: [IsNotNull(cs_item_sk)]
 ReadSchema: struct<cs_item_sk:int,cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(71) ColumnarToRow [codegen id : 51]
+(69) ColumnarToRow [codegen id : 51]
 Input [4]: [cs_item_sk#63, cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66]
 
-(72) Filter [codegen id : 51]
+(70) Filter [codegen id : 51]
 Input [4]: [cs_item_sk#63, cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66]
 Condition : isnotnull(cs_item_sk#63)
 
-(73) ReusedExchange [Reuses operator id: 52]
+(71) ReusedExchange [Reuses operator id: 50]
 Output [1]: [ss_item_sk#41]
 
-(74) BroadcastHashJoin [codegen id : 51]
+(72) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [cs_item_sk#63]
 Right keys [1]: [ss_item_sk#41]
 Join condition: None
 
-(75) ReusedExchange [Reuses operator id: 59]
+(73) ReusedExchange [Reuses operator id: 57]
 Output [4]: [i_item_sk#67, i_brand_id#68, i_class_id#69, i_category_id#70]
 
-(76) BroadcastHashJoin [codegen id : 51]
+(74) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [cs_item_sk#63]
 Right keys [1]: [i_item_sk#67]
 Join condition: None
 
-(77) Project [codegen id : 51]
+(75) Project [codegen id : 51]
 Output [6]: [cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66, i_brand_id#68, i_class_id#69, i_category_id#70]
 Input [8]: [cs_item_sk#63, cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66, i_item_sk#67, i_brand_id#68, i_class_id#69, i_category_id#70]
 
-(78) ReusedExchange [Reuses operator id: 131]
+(76) ReusedExchange [Reuses operator id: 129]
 Output [1]: [d_date_sk#71]
 
-(79) BroadcastHashJoin [codegen id : 51]
+(77) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [cs_sold_date_sk#66]
 Right keys [1]: [d_date_sk#71]
 Join condition: None
 
-(80) Project [codegen id : 51]
+(78) Project [codegen id : 51]
 Output [5]: [cs_quantity#64, cs_list_price#65, i_brand_id#68, i_class_id#69, i_category_id#70]
 Input [7]: [cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66, i_brand_id#68, i_class_id#69, i_category_id#70, d_date_sk#71]
 
-(81) HashAggregate [codegen id : 51]
+(79) HashAggregate [codegen id : 51]
 Input [5]: [cs_quantity#64, cs_list_price#65, i_brand_id#68, i_class_id#69, i_category_id#70]
 Keys [3]: [i_brand_id#68, i_class_id#69, i_category_id#70]
 Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#72, isEmpty#73, count#74]
 Results [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77]
 
-(82) Exchange
+(80) Exchange
 Input [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77]
 Arguments: hashpartitioning(i_brand_id#68, i_class_id#69, i_category_id#70, 5), ENSURE_REQUIREMENTS, [id=#78]
 
-(83) HashAggregate [codegen id : 52]
+(81) HashAggregate [codegen id : 52]
 Input [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77]
 Keys [3]: [i_brand_id#68, i_class_id#69, i_category_id#70]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2))), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#79, count(1)#80]
 Results [5]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#79 AS sales#81, count(1)#80 AS number_sales#82]
 
-(84) Filter [codegen id : 52]
+(82) Filter [codegen id : 52]
 Input [5]: [i_brand_id#68, i_class_id#69, i_category_id#70, sales#81, number_sales#82]
 Condition : (isnotnull(sales#81) AND (cast(sales#81 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#60, [id=#61] as decimal(32,6))))
 
-(85) Project [codegen id : 52]
+(83) Project [codegen id : 52]
 Output [6]: [sales#81, number_sales#82, catalog AS channel#83, i_brand_id#68, i_class_id#69, i_category_id#70]
 Input [5]: [i_brand_id#68, i_class_id#69, i_category_id#70, sales#81, number_sales#82]
 
-(86) Scan parquet default.web_sales
+(84) Scan parquet default.web_sales
 Output [4]: [ws_item_sk#84, ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87]
 Batched: true
 Location: InMemoryFileIndex []
@@ -510,272 +494,272 @@ PartitionFilters: [isnotnull(ws_sold_date_sk#87), dynamicpruningexpression(ws_so
 PushedFilters: [IsNotNull(ws_item_sk)]
 ReadSchema: struct<ws_item_sk:int,ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(87) ColumnarToRow [codegen id : 77]
+(85) ColumnarToRow [codegen id : 77]
 Input [4]: [ws_item_sk#84, ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87]
 
-(88) Filter [codegen id : 77]
+(86) Filter [codegen id : 77]
 Input [4]: [ws_item_sk#84, ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87]
 Condition : isnotnull(ws_item_sk#84)
 
-(89) ReusedExchange [Reuses operator id: 52]
+(87) ReusedExchange [Reuses operator id: 50]
 Output [1]: [ss_item_sk#41]
 
-(90) BroadcastHashJoin [codegen id : 77]
+(88) BroadcastHashJoin [codegen id : 77]
 Left keys [1]: [ws_item_sk#84]
 Right keys [1]: [ss_item_sk#41]
 Join condition: None
 
-(91) ReusedExchange [Reuses operator id: 59]
+(89) ReusedExchange [Reuses operator id: 57]
 Output [4]: [i_item_sk#88, i_brand_id#89, i_class_id#90, i_category_id#91]
 
-(92) BroadcastHashJoin [codegen id : 77]
+(90) BroadcastHashJoin [codegen id : 77]
 Left keys [1]: [ws_item_sk#84]
 Right keys [1]: [i_item_sk#88]
 Join condition: None
 
-(93) Project [codegen id : 77]
+(91) Project [codegen id : 77]
 Output [6]: [ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87, i_brand_id#89, i_class_id#90, i_category_id#91]
 Input [8]: [ws_item_sk#84, ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87, i_item_sk#88, i_brand_id#89, i_class_id#90, i_category_id#91]
 
-(94) ReusedExchange [Reuses operator id: 131]
+(92) ReusedExchange [Reuses operator id: 129]
 Output [1]: [d_date_sk#92]
 
-(95) BroadcastHashJoin [codegen id : 77]
+(93) BroadcastHashJoin [codegen id : 77]
 Left keys [1]: [ws_sold_date_sk#87]
 Right keys [1]: [d_date_sk#92]
 Join condition: None
 
-(96) Project [codegen id : 77]
+(94) Project [codegen id : 77]
 Output [5]: [ws_quantity#85, ws_list_price#86, i_brand_id#89, i_class_id#90, i_category_id#91]
 Input [7]: [ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87, i_brand_id#89, i_class_id#90, i_category_id#91, d_date_sk#92]
 
-(97) HashAggregate [codegen id : 77]
+(95) HashAggregate [codegen id : 77]
 Input [5]: [ws_quantity#85, ws_list_price#86, i_brand_id#89, i_class_id#90, i_category_id#91]
 Keys [3]: [i_brand_id#89, i_class_id#90, i_category_id#91]
 Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#93, isEmpty#94, count#95]
 Results [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98]
 
-(98) Exchange
+(96) Exchange
 Input [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98]
 Arguments: hashpartitioning(i_brand_id#89, i_class_id#90, i_category_id#91, 5), ENSURE_REQUIREMENTS, [id=#99]
 
-(99) HashAggregate [codegen id : 78]
+(97) HashAggregate [codegen id : 78]
 Input [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98]
 Keys [3]: [i_brand_id#89, i_class_id#90, i_category_id#91]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2))), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2)))#100, count(1)#101]
 Results [5]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2)))#100 AS sales#102, count(1)#101 AS number_sales#103]
 
-(100) Filter [codegen id : 78]
+(98) Filter [codegen id : 78]
 Input [5]: [i_brand_id#89, i_class_id#90, i_category_id#91, sales#102, number_sales#103]
 Condition : (isnotnull(sales#102) AND (cast(sales#102 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#60, [id=#61] as decimal(32,6))))
 
-(101) Project [codegen id : 78]
+(99) Project [codegen id : 78]
 Output [6]: [sales#102, number_sales#103, web AS channel#104, i_brand_id#89, i_class_id#90, i_category_id#91]
 Input [5]: [i_brand_id#89, i_class_id#90, i_category_id#91, sales#102, number_sales#103]
 
-(102) Union
+(100) Union
 
-(103) Expand [codegen id : 79]
+(101) Expand [codegen id : 79]
 Input [6]: [sales#58, number_sales#59, channel#62, i_brand_id#44, i_class_id#45, i_category_id#46]
 Arguments: [[sales#58, number_sales#59, channel#62, i_brand_id#44, i_class_id#45, i_category_id#46, 0], [sales#58, number_sales#59, channel#62, i_brand_id#44, i_class_id#45, null, 1], [sales#58, number_sales#59, channel#62, i_brand_id#44, null, null, 3], [sales#58, number_sales#59, channel#62, null, null, null, 7], [sales#58, number_sales#59, null, null, null, null, 15]], [sales#58, number_sales#59, channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, spark_grouping_id#109]
 
-(104) HashAggregate [codegen id : 79]
+(102) HashAggregate [codegen id : 79]
 Input [7]: [sales#58, number_sales#59, channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, spark_grouping_id#109]
 Keys [5]: [channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, spark_grouping_id#109]
 Functions [2]: [partial_sum(sales#58), partial_sum(number_sales#59)]
 Aggregate Attributes [3]: [sum#110, isEmpty#111, sum#112]
 Results [8]: [channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, spark_grouping_id#109, sum#113, isEmpty#114, sum#115]
 
-(105) Exchange
+(103) Exchange
 Input [8]: [channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, spark_grouping_id#109, sum#113, isEmpty#114, sum#115]
 Arguments: hashpartitioning(channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, spark_grouping_id#109, 5), ENSURE_REQUIREMENTS, [id=#116]
 
-(106) HashAggregate [codegen id : 80]
+(104) HashAggregate [codegen id : 80]
 Input [8]: [channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, spark_grouping_id#109, sum#113, isEmpty#114, sum#115]
 Keys [5]: [channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, spark_grouping_id#109]
 Functions [2]: [sum(sales#58), sum(number_sales#59)]
 Aggregate Attributes [2]: [sum(sales#58)#117, sum(number_sales#59)#118]
 Results [6]: [channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, sum(sales#58)#117 AS sum(sales)#119, sum(number_sales#59)#118 AS sum(number_sales)#120]
 
-(107) TakeOrderedAndProject
+(105) TakeOrderedAndProject
 Input [6]: [channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, sum(sales)#119, sum(number_sales)#120]
 Arguments: 100, [channel#105 ASC NULLS FIRST, i_brand_id#106 ASC NULLS FIRST, i_class_id#107 ASC NULLS FIRST, i_category_id#108 ASC NULLS FIRST], [channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, sum(sales)#119, sum(number_sales)#120]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 68 Hosting Expression = Subquery scalar-subquery#60, [id=#61]
-* HashAggregate (126)
-+- Exchange (125)
-   +- * HashAggregate (124)
-      +- Union (123)
-         :- * Project (112)
-         :  +- * BroadcastHashJoin Inner BuildRight (111)
-         :     :- * ColumnarToRow (109)
-         :     :  +- Scan parquet default.store_sales (108)
-         :     +- ReusedExchange (110)
-         :- * Project (117)
-         :  +- * BroadcastHashJoin Inner BuildRight (116)
-         :     :- * ColumnarToRow (114)
-         :     :  +- Scan parquet default.catalog_sales (113)
-         :     +- ReusedExchange (115)
-         +- * Project (122)
-            +- * BroadcastHashJoin Inner BuildRight (121)
-               :- * ColumnarToRow (119)
-               :  +- Scan parquet default.web_sales (118)
-               +- ReusedExchange (120)
-
-
-(108) Scan parquet default.store_sales
+Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquery#60, [id=#61]
+* HashAggregate (124)
++- Exchange (123)
+   +- * HashAggregate (122)
+      +- Union (121)
+         :- * Project (110)
+         :  +- * BroadcastHashJoin Inner BuildRight (109)
+         :     :- * ColumnarToRow (107)
+         :     :  +- Scan parquet default.store_sales (106)
+         :     +- ReusedExchange (108)
+         :- * Project (115)
+         :  +- * BroadcastHashJoin Inner BuildRight (114)
+         :     :- * ColumnarToRow (112)
+         :     :  +- Scan parquet default.catalog_sales (111)
+         :     +- ReusedExchange (113)
+         +- * Project (120)
+            +- * BroadcastHashJoin Inner BuildRight (119)
+               :- * ColumnarToRow (117)
+               :  +- Scan parquet default.web_sales (116)
+               +- ReusedExchange (118)
+
+
+(106) Scan parquet default.store_sales
 Output [3]: [ss_quantity#121, ss_list_price#122, ss_sold_date_sk#123]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(ss_sold_date_sk#123), dynamicpruningexpression(ss_sold_date_sk#123 IN dynamicpruning#12)]
 ReadSchema: struct<ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(109) ColumnarToRow [codegen id : 2]
+(107) ColumnarToRow [codegen id : 2]
 Input [3]: [ss_quantity#121, ss_list_price#122, ss_sold_date_sk#123]
 
-(110) ReusedExchange [Reuses operator id: 136]
+(108) ReusedExchange [Reuses operator id: 134]
 Output [1]: [d_date_sk#124]
 
-(111) BroadcastHashJoin [codegen id : 2]
+(109) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#123]
 Right keys [1]: [d_date_sk#124]
 Join condition: None
 
-(112) Project [codegen id : 2]
+(110) Project [codegen id : 2]
 Output [2]: [ss_quantity#121 AS quantity#125, ss_list_price#122 AS list_price#126]
 Input [4]: [ss_quantity#121, ss_list_price#122, ss_sold_date_sk#123, d_date_sk#124]
 
-(113) Scan parquet default.catalog_sales
+(111) Scan parquet default.catalog_sales
 Output [3]: [cs_quantity#127, cs_list_price#128, cs_sold_date_sk#129]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(cs_sold_date_sk#129), dynamicpruningexpression(cs_sold_date_sk#129 IN dynamicpruning#12)]
 ReadSchema: struct<cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(114) ColumnarToRow [codegen id : 4]
+(112) ColumnarToRow [codegen id : 4]
 Input [3]: [cs_quantity#127, cs_list_price#128, cs_sold_date_sk#129]
 
-(115) ReusedExchange [Reuses operator id: 136]
+(113) ReusedExchange [Reuses operator id: 134]
 Output [1]: [d_date_sk#130]
 
-(116) BroadcastHashJoin [codegen id : 4]
+(114) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#129]
 Right keys [1]: [d_date_sk#130]
 Join condition: None
 
-(117) Project [codegen id : 4]
+(115) Project [codegen id : 4]
 Output [2]: [cs_quantity#127 AS quantity#131, cs_list_price#128 AS list_price#132]
 Input [4]: [cs_quantity#127, cs_list_price#128, cs_sold_date_sk#129, d_date_sk#130]
 
-(118) Scan parquet default.web_sales
+(116) Scan parquet default.web_sales
 Output [3]: [ws_quantity#133, ws_list_price#134, ws_sold_date_sk#135]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(ws_sold_date_sk#135), dynamicpruningexpression(ws_sold_date_sk#135 IN dynamicpruning#12)]
 ReadSchema: struct<ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(119) ColumnarToRow [codegen id : 6]
+(117) ColumnarToRow [codegen id : 6]
 Input [3]: [ws_quantity#133, ws_list_price#134, ws_sold_date_sk#135]
 
-(120) ReusedExchange [Reuses operator id: 136]
+(118) ReusedExchange [Reuses operator id: 134]
 Output [1]: [d_date_sk#136]
 
-(121) BroadcastHashJoin [codegen id : 6]
+(119) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_sold_date_sk#135]
 Right keys [1]: [d_date_sk#136]
 Join condition: None
 
-(122) Project [codegen id : 6]
+(120) Project [codegen id : 6]
 Output [2]: [ws_quantity#133 AS quantity#137, ws_list_price#134 AS list_price#138]
 Input [4]: [ws_quantity#133, ws_list_price#134, ws_sold_date_sk#135, d_date_sk#136]
 
-(123) Union
+(121) Union
 
-(124) HashAggregate [codegen id : 7]
+(122) HashAggregate [codegen id : 7]
 Input [2]: [quantity#125, list_price#126]
 Keys: []
 Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#139, count#140]
 Results [2]: [sum#141, count#142]
 
-(125) Exchange
+(123) Exchange
 Input [2]: [sum#141, count#142]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#143]
 
-(126) HashAggregate [codegen id : 8]
+(124) HashAggregate [codegen id : 8]
 Input [2]: [sum#141, count#142]
 Keys: []
 Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2)))#144]
 Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2)))#144 AS average_sales#145]
 
-Subquery:2 Hosting operator id = 108 Hosting Expression = ss_sold_date_sk#123 IN dynamicpruning#12
+Subquery:2 Hosting operator id = 106 Hosting Expression = ss_sold_date_sk#123 IN dynamicpruning#12
 
-Subquery:3 Hosting operator id = 113 Hosting Expression = cs_sold_date_sk#129 IN dynamicpruning#12
+Subquery:3 Hosting operator id = 111 Hosting Expression = cs_sold_date_sk#129 IN dynamicpruning#12
 
-Subquery:4 Hosting operator id = 118 Hosting Expression = ws_sold_date_sk#135 IN dynamicpruning#12
+Subquery:4 Hosting operator id = 116 Hosting Expression = ws_sold_date_sk#135 IN dynamicpruning#12
 
 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (131)
-+- * Project (130)
-   +- * Filter (129)
-      +- * ColumnarToRow (128)
-         +- Scan parquet default.date_dim (127)
+BroadcastExchange (129)
++- * Project (128)
+   +- * Filter (127)
+      +- * ColumnarToRow (126)
+         +- Scan parquet default.date_dim (125)
 
 
-(127) Scan parquet default.date_dim
+(125) Scan parquet default.date_dim
 Output [3]: [d_date_sk#48, d_year#146, d_moy#147]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(128) ColumnarToRow [codegen id : 1]
+(126) ColumnarToRow [codegen id : 1]
 Input [3]: [d_date_sk#48, d_year#146, d_moy#147]
 
-(129) Filter [codegen id : 1]
+(127) Filter [codegen id : 1]
 Input [3]: [d_date_sk#48, d_year#146, d_moy#147]
 Condition : ((((isnotnull(d_year#146) AND isnotnull(d_moy#147)) AND (d_year#146 = 2001)) AND (d_moy#147 = 11)) AND isnotnull(d_date_sk#48))
 
-(130) Project [codegen id : 1]
+(128) Project [codegen id : 1]
 Output [1]: [d_date_sk#48]
 Input [3]: [d_date_sk#48, d_year#146, d_moy#147]
 
-(131) BroadcastExchange
+(129) BroadcastExchange
 Input [1]: [d_date_sk#48]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#148]
 
 Subquery:6 Hosting operator id = 7 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12
-BroadcastExchange (136)
-+- * Project (135)
-   +- * Filter (134)
-      +- * ColumnarToRow (133)
-         +- Scan parquet default.date_dim (132)
+BroadcastExchange (134)
++- * Project (133)
+   +- * Filter (132)
+      +- * ColumnarToRow (131)
+         +- Scan parquet default.date_dim (130)
 
 
-(132) Scan parquet default.date_dim
+(130) Scan parquet default.date_dim
 Output [2]: [d_date_sk#27, d_year#149]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(133) ColumnarToRow [codegen id : 1]
+(131) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#27, d_year#149]
 
-(134) Filter [codegen id : 1]
+(132) Filter [codegen id : 1]
 Input [2]: [d_date_sk#27, d_year#149]
 Condition : (((isnotnull(d_year#149) AND (d_year#149 >= 1999)) AND (d_year#149 <= 2001)) AND isnotnull(d_date_sk#27))
 
-(135) Project [codegen id : 1]
+(133) Project [codegen id : 1]
 Output [1]: [d_date_sk#27]
 Input [2]: [d_date_sk#27, d_year#149]
 
-(136) BroadcastExchange
+(134) BroadcastExchange
 Input [1]: [d_date_sk#27]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#150]
 
@@ -783,12 +767,12 @@ Subquery:7 Hosting operator id = 13 Hosting Expression = cs_sold_date_sk#18 IN d
 
 Subquery:8 Hosting operator id = 36 Hosting Expression = ws_sold_date_sk#33 IN dynamicpruning#12
 
-Subquery:9 Hosting operator id = 84 Hosting Expression = ReusedSubquery Subquery scalar-subquery#60, [id=#61]
+Subquery:9 Hosting operator id = 82 Hosting Expression = ReusedSubquery Subquery scalar-subquery#60, [id=#61]
 
-Subquery:10 Hosting operator id = 70 Hosting Expression = cs_sold_date_sk#66 IN dynamicpruning#5
+Subquery:10 Hosting operator id = 68 Hosting Expression = cs_sold_date_sk#66 IN dynamicpruning#5
 
-Subquery:11 Hosting operator id = 100 Hosting Expression = ReusedSubquery Subquery scalar-subquery#60, [id=#61]
+Subquery:11 Hosting operator id = 98 Hosting Expression = ReusedSubquery Subquery scalar-subquery#60, [id=#61]
 
-Subquery:12 Hosting operator id = 86 Hosting Expression = ws_sold_date_sk#87 IN dynamicpruning#5
+Subquery:12 Hosting operator id = 84 Hosting Expression = ws_sold_date_sk#87 IN dynamicpruning#5
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt
index 653e3e6564e41..b8125b2af8e92 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt
@@ -81,77 +81,75 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                         InputAdapter
                                                           BroadcastExchange #5
                                                             WholeStageCodegen (10)
-                                                              HashAggregate [brand_id,class_id,category_id]
+                                                              BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
                                                                 HashAggregate [brand_id,class_id,category_id]
-                                                                  BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
-                                                                    HashAggregate [brand_id,class_id,category_id]
-                                                                      InputAdapter
-                                                                        Exchange [brand_id,class_id,category_id] #6
-                                                                          WholeStageCodegen (6)
-                                                                            HashAggregate [brand_id,class_id,category_id]
-                                                                              Project [i_brand_id,i_class_id,i_category_id]
-                                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                  Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                                      Filter [ss_item_sk]
-                                                                                        ColumnarToRow
-                                                                                          InputAdapter
-                                                                                            Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
-                                                                                              SubqueryBroadcast [d_date_sk] #2
-                                                                                                BroadcastExchange #7
-                                                                                                  WholeStageCodegen (1)
-                                                                                                    Project [d_date_sk]
-                                                                                                      Filter [d_year,d_date_sk]
-                                                                                                        ColumnarToRow
-                                                                                                          InputAdapter
-                                                                                                            Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                                      InputAdapter
-                                                                                        BroadcastExchange #8
-                                                                                          WholeStageCodegen (4)
-                                                                                            BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
-                                                                                              Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                ColumnarToRow
-                                                                                                  InputAdapter
-                                                                                                    Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                              InputAdapter
-                                                                                                BroadcastExchange #9
-                                                                                                  WholeStageCodegen (3)
-                                                                                                    Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                      BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                                        Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                          BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                                            Filter [cs_item_sk]
-                                                                                                              ColumnarToRow
-                                                                                                                InputAdapter
-                                                                                                                  Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
-                                                                                                                    ReusedSubquery [d_date_sk] #2
-                                                                                                            InputAdapter
-                                                                                                              BroadcastExchange #10
-                                                                                                                WholeStageCodegen (1)
-                                                                                                                  Filter [i_item_sk]
-                                                                                                                    ColumnarToRow
-                                                                                                                      InputAdapter
-                                                                                                                        Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                        InputAdapter
-                                                                                                          ReusedExchange [d_date_sk] #7
-                                                                                  InputAdapter
-                                                                                    ReusedExchange [d_date_sk] #7
-                                                                    InputAdapter
-                                                                      BroadcastExchange #11
-                                                                        WholeStageCodegen (9)
+                                                                  InputAdapter
+                                                                    Exchange [brand_id,class_id,category_id] #6
+                                                                      WholeStageCodegen (6)
+                                                                        HashAggregate [brand_id,class_id,category_id]
                                                                           Project [i_brand_id,i_class_id,i_category_id]
-                                                                            BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                              Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                                  Filter [ws_item_sk]
+                                                                            BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                              Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                                  Filter [ss_item_sk]
                                                                                     ColumnarToRow
                                                                                       InputAdapter
-                                                                                        Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
-                                                                                          ReusedSubquery [d_date_sk] #2
+                                                                                        Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                                          SubqueryBroadcast [d_date_sk] #2
+                                                                                            BroadcastExchange #7
+                                                                                              WholeStageCodegen (1)
+                                                                                                Project [d_date_sk]
+                                                                                                  Filter [d_year,d_date_sk]
+                                                                                                    ColumnarToRow
+                                                                                                      InputAdapter
+                                                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
                                                                                   InputAdapter
-                                                                                    ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #10
+                                                                                    BroadcastExchange #8
+                                                                                      WholeStageCodegen (4)
+                                                                                        BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
+                                                                                          Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                            ColumnarToRow
+                                                                                              InputAdapter
+                                                                                                Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                          InputAdapter
+                                                                                            BroadcastExchange #9
+                                                                                              WholeStageCodegen (3)
+                                                                                                Project [i_brand_id,i_class_id,i_category_id]
+                                                                                                  BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                                                    Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                      BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                                                        Filter [cs_item_sk]
+                                                                                                          ColumnarToRow
+                                                                                                            InputAdapter
+                                                                                                              Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
+                                                                                                                ReusedSubquery [d_date_sk] #2
+                                                                                                        InputAdapter
+                                                                                                          BroadcastExchange #10
+                                                                                                            WholeStageCodegen (1)
+                                                                                                              Filter [i_item_sk]
+                                                                                                                ColumnarToRow
+                                                                                                                  InputAdapter
+                                                                                                                    Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                    InputAdapter
+                                                                                                      ReusedExchange [d_date_sk] #7
                                                                               InputAdapter
                                                                                 ReusedExchange [d_date_sk] #7
+                                                                InputAdapter
+                                                                  BroadcastExchange #11
+                                                                    WholeStageCodegen (9)
+                                                                      Project [i_brand_id,i_class_id,i_category_id]
+                                                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                          Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id]
+                                                                            BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                                              Filter [ws_item_sk]
+                                                                                ColumnarToRow
+                                                                                  InputAdapter
+                                                                                    Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
+                                                                                      ReusedSubquery [d_date_sk] #2
+                                                                              InputAdapter
+                                                                                ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #10
+                                                                          InputAdapter
+                                                                            ReusedExchange [d_date_sk] #7
                                             InputAdapter
                                               BroadcastExchange #12
                                                 WholeStageCodegen (23)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt
index 78133a44c9a69..3f0acc0ea73be 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt
@@ -1,106 +1,103 @@
 == Physical Plan ==
-TakeOrderedAndProject (102)
-+- * BroadcastHashJoin Inner BuildRight (101)
-   :- * Filter (81)
-   :  +- * HashAggregate (80)
-   :     +- Exchange (79)
-   :        +- * HashAggregate (78)
-   :           +- * Project (77)
-   :              +- * BroadcastHashJoin Inner BuildRight (76)
-   :                 :- * Project (66)
-   :                 :  +- * BroadcastHashJoin Inner BuildRight (65)
-   :                 :     :- * SortMergeJoin LeftSemi (63)
+TakeOrderedAndProject (99)
++- * BroadcastHashJoin Inner BuildRight (98)
+   :- * Filter (78)
+   :  +- * HashAggregate (77)
+   :     +- Exchange (76)
+   :        +- * HashAggregate (75)
+   :           +- * Project (74)
+   :              +- * BroadcastHashJoin Inner BuildRight (73)
+   :                 :- * Project (63)
+   :                 :  +- * BroadcastHashJoin Inner BuildRight (62)
+   :                 :     :- * SortMergeJoin LeftSemi (60)
    :                 :     :  :- * Sort (5)
    :                 :     :  :  +- Exchange (4)
    :                 :     :  :     +- * Filter (3)
    :                 :     :  :        +- * ColumnarToRow (2)
    :                 :     :  :           +- Scan parquet default.store_sales (1)
-   :                 :     :  +- * Sort (62)
-   :                 :     :     +- Exchange (61)
-   :                 :     :        +- * Project (60)
-   :                 :     :           +- * BroadcastHashJoin Inner BuildRight (59)
+   :                 :     :  +- * Sort (59)
+   :                 :     :     +- Exchange (58)
+   :                 :     :        +- * Project (57)
+   :                 :     :           +- * BroadcastHashJoin Inner BuildRight (56)
    :                 :     :              :- * Filter (8)
    :                 :     :              :  +- * ColumnarToRow (7)
    :                 :     :              :     +- Scan parquet default.item (6)
-   :                 :     :              +- BroadcastExchange (58)
-   :                 :     :                 +- * HashAggregate (57)
-   :                 :     :                    +- Exchange (56)
-   :                 :     :                       +- * HashAggregate (55)
-   :                 :     :                          +- * SortMergeJoin LeftSemi (54)
-   :                 :     :                             :- * Sort (42)
-   :                 :     :                             :  +- Exchange (41)
-   :                 :     :                             :     +- * HashAggregate (40)
-   :                 :     :                             :        +- Exchange (39)
-   :                 :     :                             :           +- * HashAggregate (38)
-   :                 :     :                             :              +- * Project (37)
-   :                 :     :                             :                 +- * BroadcastHashJoin Inner BuildRight (36)
-   :                 :     :                             :                    :- * Project (14)
-   :                 :     :                             :                    :  +- * BroadcastHashJoin Inner BuildRight (13)
-   :                 :     :                             :                    :     :- * Filter (11)
-   :                 :     :                             :                    :     :  +- * ColumnarToRow (10)
-   :                 :     :                             :                    :     :     +- Scan parquet default.store_sales (9)
-   :                 :     :                             :                    :     +- ReusedExchange (12)
-   :                 :     :                             :                    +- BroadcastExchange (35)
-   :                 :     :                             :                       +- * SortMergeJoin LeftSemi (34)
-   :                 :     :                             :                          :- * Sort (19)
-   :                 :     :                             :                          :  +- Exchange (18)
-   :                 :     :                             :                          :     +- * Filter (17)
-   :                 :     :                             :                          :        +- * ColumnarToRow (16)
-   :                 :     :                             :                          :           +- Scan parquet default.item (15)
-   :                 :     :                             :                          +- * Sort (33)
-   :                 :     :                             :                             +- Exchange (32)
-   :                 :     :                             :                                +- * Project (31)
-   :                 :     :                             :                                   +- * BroadcastHashJoin Inner BuildRight (30)
-   :                 :     :                             :                                      :- * Project (25)
-   :                 :     :                             :                                      :  +- * BroadcastHashJoin Inner BuildRight (24)
-   :                 :     :                             :                                      :     :- * Filter (22)
-   :                 :     :                             :                                      :     :  +- * ColumnarToRow (21)
-   :                 :     :                             :                                      :     :     +- Scan parquet default.catalog_sales (20)
-   :                 :     :                             :                                      :     +- ReusedExchange (23)
-   :                 :     :                             :                                      +- BroadcastExchange (29)
-   :                 :     :                             :                                         +- * Filter (28)
-   :                 :     :                             :                                            +- * ColumnarToRow (27)
-   :                 :     :                             :                                               +- Scan parquet default.item (26)
-   :                 :     :                             +- * Sort (53)
-   :                 :     :                                +- Exchange (52)
-   :                 :     :                                   +- * Project (51)
-   :                 :     :                                      +- * BroadcastHashJoin Inner BuildRight (50)
-   :                 :     :                                         :- * Project (48)
-   :                 :     :                                         :  +- * BroadcastHashJoin Inner BuildRight (47)
-   :                 :     :                                         :     :- * Filter (45)
-   :                 :     :                                         :     :  +- * ColumnarToRow (44)
-   :                 :     :                                         :     :     +- Scan parquet default.web_sales (43)
-   :                 :     :                                         :     +- ReusedExchange (46)
-   :                 :     :                                         +- ReusedExchange (49)
-   :                 :     +- ReusedExchange (64)
-   :                 +- BroadcastExchange (75)
-   :                    +- * SortMergeJoin LeftSemi (74)
-   :                       :- * Sort (71)
-   :                       :  +- Exchange (70)
-   :                       :     +- * Filter (69)
-   :                       :        +- * ColumnarToRow (68)
-   :                       :           +- Scan parquet default.item (67)
-   :                       +- * Sort (73)
-   :                          +- ReusedExchange (72)
-   +- BroadcastExchange (100)
-      +- * Filter (99)
-         +- * HashAggregate (98)
-            +- Exchange (97)
-               +- * HashAggregate (96)
-                  +- * Project (95)
-                     +- * BroadcastHashJoin Inner BuildRight (94)
-                        :- * Project (92)
-                        :  +- * BroadcastHashJoin Inner BuildRight (91)
-                        :     :- * SortMergeJoin LeftSemi (89)
-                        :     :  :- * Sort (86)
-                        :     :  :  +- Exchange (85)
-                        :     :  :     +- * Filter (84)
-                        :     :  :        +- * ColumnarToRow (83)
-                        :     :  :           +- Scan parquet default.store_sales (82)
-                        :     :  +- * Sort (88)
-                        :     :     +- ReusedExchange (87)
-                        :     +- ReusedExchange (90)
-                        +- ReusedExchange (93)
+   :                 :     :              +- BroadcastExchange (55)
+   :                 :     :                 +- * SortMergeJoin LeftSemi (54)
+   :                 :     :                    :- * Sort (42)
+   :                 :     :                    :  +- Exchange (41)
+   :                 :     :                    :     +- * HashAggregate (40)
+   :                 :     :                    :        +- Exchange (39)
+   :                 :     :                    :           +- * HashAggregate (38)
+   :                 :     :                    :              +- * Project (37)
+   :                 :     :                    :                 +- * BroadcastHashJoin Inner BuildRight (36)
+   :                 :     :                    :                    :- * Project (14)
+   :                 :     :                    :                    :  +- * BroadcastHashJoin Inner BuildRight (13)
+   :                 :     :                    :                    :     :- * Filter (11)
+   :                 :     :                    :                    :     :  +- * ColumnarToRow (10)
+   :                 :     :                    :                    :     :     +- Scan parquet default.store_sales (9)
+   :                 :     :                    :                    :     +- ReusedExchange (12)
+   :                 :     :                    :                    +- BroadcastExchange (35)
+   :                 :     :                    :                       +- * SortMergeJoin LeftSemi (34)
+   :                 :     :                    :                          :- * Sort (19)
+   :                 :     :                    :                          :  +- Exchange (18)
+   :                 :     :                    :                          :     +- * Filter (17)
+   :                 :     :                    :                          :        +- * ColumnarToRow (16)
+   :                 :     :                    :                          :           +- Scan parquet default.item (15)
+   :                 :     :                    :                          +- * Sort (33)
+   :                 :     :                    :                             +- Exchange (32)
+   :                 :     :                    :                                +- * Project (31)
+   :                 :     :                    :                                   +- * BroadcastHashJoin Inner BuildRight (30)
+   :                 :     :                    :                                      :- * Project (25)
+   :                 :     :                    :                                      :  +- * BroadcastHashJoin Inner BuildRight (24)
+   :                 :     :                    :                                      :     :- * Filter (22)
+   :                 :     :                    :                                      :     :  +- * ColumnarToRow (21)
+   :                 :     :                    :                                      :     :     +- Scan parquet default.catalog_sales (20)
+   :                 :     :                    :                                      :     +- ReusedExchange (23)
+   :                 :     :                    :                                      +- BroadcastExchange (29)
+   :                 :     :                    :                                         +- * Filter (28)
+   :                 :     :                    :                                            +- * ColumnarToRow (27)
+   :                 :     :                    :                                               +- Scan parquet default.item (26)
+   :                 :     :                    +- * Sort (53)
+   :                 :     :                       +- Exchange (52)
+   :                 :     :                          +- * Project (51)
+   :                 :     :                             +- * BroadcastHashJoin Inner BuildRight (50)
+   :                 :     :                                :- * Project (48)
+   :                 :     :                                :  +- * BroadcastHashJoin Inner BuildRight (47)
+   :                 :     :                                :     :- * Filter (45)
+   :                 :     :                                :     :  +- * ColumnarToRow (44)
+   :                 :     :                                :     :     +- Scan parquet default.web_sales (43)
+   :                 :     :                                :     +- ReusedExchange (46)
+   :                 :     :                                +- ReusedExchange (49)
+   :                 :     +- ReusedExchange (61)
+   :                 +- BroadcastExchange (72)
+   :                    +- * SortMergeJoin LeftSemi (71)
+   :                       :- * Sort (68)
+   :                       :  +- Exchange (67)
+   :                       :     +- * Filter (66)
+   :                       :        +- * ColumnarToRow (65)
+   :                       :           +- Scan parquet default.item (64)
+   :                       +- * Sort (70)
+   :                          +- ReusedExchange (69)
+   +- BroadcastExchange (97)
+      +- * Filter (96)
+         +- * HashAggregate (95)
+            +- Exchange (94)
+               +- * HashAggregate (93)
+                  +- * Project (92)
+                     +- * BroadcastHashJoin Inner BuildRight (91)
+                        :- * Project (89)
+                        :  +- * BroadcastHashJoin Inner BuildRight (88)
+                        :     :- * SortMergeJoin LeftSemi (86)
+                        :     :  :- * Sort (83)
+                        :     :  :  +- Exchange (82)
+                        :     :  :     +- * Filter (81)
+                        :     :  :        +- * ColumnarToRow (80)
+                        :     :  :           +- Scan parquet default.store_sales (79)
+                        :     :  +- * Sort (85)
+                        :     :     +- ReusedExchange (84)
+                        :     +- ReusedExchange (87)
+                        +- ReusedExchange (90)
 
 
 (1) Scan parquet default.store_sales
@@ -133,10 +130,10 @@ Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(7) ColumnarToRow [codegen id : 20]
+(7) ColumnarToRow [codegen id : 19]
 Input [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10]
 
-(8) Filter [codegen id : 20]
+(8) Filter [codegen id : 19]
 Input [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10]
 Condition : ((isnotnull(i_brand_id#8) AND isnotnull(i_class_id#9)) AND isnotnull(i_category_id#10))
 
@@ -155,7 +152,7 @@ Input [2]: [ss_item_sk#11, ss_sold_date_sk#12]
 Input [2]: [ss_item_sk#11, ss_sold_date_sk#12]
 Condition : isnotnull(ss_item_sk#11)
 
-(12) ReusedExchange [Reuses operator id: 135]
+(12) ReusedExchange [Reuses operator id: 132]
 Output [1]: [d_date_sk#14]
 
 (13) BroadcastHashJoin [codegen id : 11]
@@ -204,7 +201,7 @@ Input [2]: [cs_item_sk#20, cs_sold_date_sk#21]
 Input [2]: [cs_item_sk#20, cs_sold_date_sk#21]
 Condition : isnotnull(cs_item_sk#20)
 
-(23) ReusedExchange [Reuses operator id: 135]
+(23) ReusedExchange [Reuses operator id: 132]
 Output [1]: [d_date_sk#22]
 
 (24) BroadcastHashJoin [codegen id : 8]
@@ -310,7 +307,7 @@ Input [2]: [ws_item_sk#35, ws_sold_date_sk#36]
 Input [2]: [ws_item_sk#35, ws_sold_date_sk#36]
 Condition : isnotnull(ws_item_sk#35)
 
-(46) ReusedExchange [Reuses operator id: 135]
+(46) ReusedExchange [Reuses operator id: 132]
 Output [1]: [d_date_sk#37]
 
 (47) BroadcastHashJoin [codegen id : 16]
@@ -347,485 +344,467 @@ Left keys [6]: [coalesce(brand_id#30, 0), isnull(brand_id#30), coalesce(class_id
 Right keys [6]: [coalesce(i_brand_id#39, 0), isnull(i_brand_id#39), coalesce(i_class_id#40, 0), isnull(i_class_id#40), coalesce(i_category_id#41, 0), isnull(i_category_id#41)]
 Join condition: None
 
-(55) HashAggregate [codegen id : 18]
+(55) BroadcastExchange
 Input [3]: [brand_id#30, class_id#31, category_id#32]
-Keys [3]: [brand_id#30, class_id#31, category_id#32]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [brand_id#30, class_id#31, category_id#32]
-
-(56) Exchange
-Input [3]: [brand_id#30, class_id#31, category_id#32]
-Arguments: hashpartitioning(brand_id#30, class_id#31, category_id#32, 5), ENSURE_REQUIREMENTS, [id=#43]
-
-(57) HashAggregate [codegen id : 19]
-Input [3]: [brand_id#30, class_id#31, category_id#32]
-Keys [3]: [brand_id#30, class_id#31, category_id#32]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [brand_id#30, class_id#31, category_id#32]
-
-(58) BroadcastExchange
-Input [3]: [brand_id#30, class_id#31, category_id#32]
-Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#44]
+Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#43]
 
-(59) BroadcastHashJoin [codegen id : 20]
+(56) BroadcastHashJoin [codegen id : 19]
 Left keys [3]: [i_brand_id#8, i_class_id#9, i_category_id#10]
 Right keys [3]: [brand_id#30, class_id#31, category_id#32]
 Join condition: None
 
-(60) Project [codegen id : 20]
-Output [1]: [i_item_sk#7 AS ss_item_sk#45]
+(57) Project [codegen id : 19]
+Output [1]: [i_item_sk#7 AS ss_item_sk#44]
 Input [7]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, brand_id#30, class_id#31, category_id#32]
 
-(61) Exchange
-Input [1]: [ss_item_sk#45]
-Arguments: hashpartitioning(ss_item_sk#45, 5), ENSURE_REQUIREMENTS, [id=#46]
+(58) Exchange
+Input [1]: [ss_item_sk#44]
+Arguments: hashpartitioning(ss_item_sk#44, 5), ENSURE_REQUIREMENTS, [id=#45]
 
-(62) Sort [codegen id : 21]
-Input [1]: [ss_item_sk#45]
-Arguments: [ss_item_sk#45 ASC NULLS FIRST], false, 0
+(59) Sort [codegen id : 20]
+Input [1]: [ss_item_sk#44]
+Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0
 
-(63) SortMergeJoin [codegen id : 45]
+(60) SortMergeJoin [codegen id : 43]
 Left keys [1]: [ss_item_sk#1]
-Right keys [1]: [ss_item_sk#45]
+Right keys [1]: [ss_item_sk#44]
 Join condition: None
 
-(64) ReusedExchange [Reuses operator id: 126]
-Output [1]: [d_date_sk#47]
+(61) ReusedExchange [Reuses operator id: 123]
+Output [1]: [d_date_sk#46]
 
-(65) BroadcastHashJoin [codegen id : 45]
+(62) BroadcastHashJoin [codegen id : 43]
 Left keys [1]: [ss_sold_date_sk#4]
-Right keys [1]: [d_date_sk#47]
+Right keys [1]: [d_date_sk#46]
 Join condition: None
 
-(66) Project [codegen id : 45]
+(63) Project [codegen id : 43]
 Output [3]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3]
-Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#47]
+Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#46]
 
-(67) Scan parquet default.item
-Output [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
+(64) Scan parquet default.item
+Output [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(68) ColumnarToRow [codegen id : 23]
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
+(65) ColumnarToRow [codegen id : 22]
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
 
-(69) Filter [codegen id : 23]
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
-Condition : (((isnotnull(i_item_sk#48) AND isnotnull(i_brand_id#49)) AND isnotnull(i_class_id#50)) AND isnotnull(i_category_id#51))
+(66) Filter [codegen id : 22]
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
+Condition : (((isnotnull(i_item_sk#47) AND isnotnull(i_brand_id#48)) AND isnotnull(i_class_id#49)) AND isnotnull(i_category_id#50))
 
-(70) Exchange
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
-Arguments: hashpartitioning(i_item_sk#48, 5), ENSURE_REQUIREMENTS, [id=#52]
+(67) Exchange
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
+Arguments: hashpartitioning(i_item_sk#47, 5), ENSURE_REQUIREMENTS, [id=#51]
 
-(71) Sort [codegen id : 24]
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
-Arguments: [i_item_sk#48 ASC NULLS FIRST], false, 0
+(68) Sort [codegen id : 23]
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
+Arguments: [i_item_sk#47 ASC NULLS FIRST], false, 0
 
-(72) ReusedExchange [Reuses operator id: 61]
-Output [1]: [ss_item_sk#45]
+(69) ReusedExchange [Reuses operator id: 58]
+Output [1]: [ss_item_sk#44]
 
-(73) Sort [codegen id : 43]
-Input [1]: [ss_item_sk#45]
-Arguments: [ss_item_sk#45 ASC NULLS FIRST], false, 0
+(70) Sort [codegen id : 41]
+Input [1]: [ss_item_sk#44]
+Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0
 
-(74) SortMergeJoin [codegen id : 44]
-Left keys [1]: [i_item_sk#48]
-Right keys [1]: [ss_item_sk#45]
+(71) SortMergeJoin [codegen id : 42]
+Left keys [1]: [i_item_sk#47]
+Right keys [1]: [ss_item_sk#44]
 Join condition: None
 
-(75) BroadcastExchange
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#53]
+(72) BroadcastExchange
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#52]
 
-(76) BroadcastHashJoin [codegen id : 45]
+(73) BroadcastHashJoin [codegen id : 43]
 Left keys [1]: [ss_item_sk#1]
-Right keys [1]: [i_item_sk#48]
+Right keys [1]: [i_item_sk#47]
 Join condition: None
 
-(77) Project [codegen id : 45]
-Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#49, i_class_id#50, i_category_id#51]
-Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
+(74) Project [codegen id : 43]
+Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#48, i_class_id#49, i_category_id#50]
+Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
 
-(78) HashAggregate [codegen id : 45]
-Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#49, i_class_id#50, i_category_id#51]
-Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
+(75) HashAggregate [codegen id : 43]
+Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#48, i_class_id#49, i_category_id#50]
+Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50]
 Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
-Aggregate Attributes [3]: [sum#54, isEmpty#55, count#56]
-Results [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
+Aggregate Attributes [3]: [sum#53, isEmpty#54, count#55]
+Results [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58]
 
-(79) Exchange
-Input [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
-Arguments: hashpartitioning(i_brand_id#49, i_class_id#50, i_category_id#51, 5), ENSURE_REQUIREMENTS, [id=#60]
+(76) Exchange
+Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58]
+Arguments: hashpartitioning(i_brand_id#48, i_class_id#49, i_category_id#50, 5), ENSURE_REQUIREMENTS, [id=#59]
 
-(80) HashAggregate [codegen id : 92]
-Input [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
-Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
+(77) HashAggregate [codegen id : 88]
+Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58]
+Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#61, count(1)#62]
-Results [6]: [store AS channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#61 AS sales#64, count(1)#62 AS number_sales#65]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#60, count(1)#61]
+Results [6]: [store AS channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#60 AS sales#63, count(1)#61 AS number_sales#64]
 
-(81) Filter [codegen id : 92]
-Input [6]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sales#64, number_sales#65]
-Condition : (isnotnull(sales#64) AND (cast(sales#64 as decimal(32,6)) > cast(Subquery scalar-subquery#66, [id=#67] as decimal(32,6))))
+(78) Filter [codegen id : 88]
+Input [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64]
+Condition : (isnotnull(sales#63) AND (cast(sales#63 as decimal(32,6)) > cast(Subquery scalar-subquery#65, [id=#66] as decimal(32,6))))
 
-(82) Scan parquet default.store_sales
-Output [4]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70, ss_sold_date_sk#71]
+(79) Scan parquet default.store_sales
+Output [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#71), dynamicpruningexpression(ss_sold_date_sk#71 IN dynamicpruning#72)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#70), dynamicpruningexpression(ss_sold_date_sk#70 IN dynamicpruning#71)]
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(83) ColumnarToRow [codegen id : 46]
-Input [4]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70, ss_sold_date_sk#71]
+(80) ColumnarToRow [codegen id : 44]
+Input [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70]
 
-(84) Filter [codegen id : 46]
-Input [4]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70, ss_sold_date_sk#71]
-Condition : isnotnull(ss_item_sk#68)
+(81) Filter [codegen id : 44]
+Input [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70]
+Condition : isnotnull(ss_item_sk#67)
 
-(85) Exchange
-Input [4]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70, ss_sold_date_sk#71]
-Arguments: hashpartitioning(ss_item_sk#68, 5), ENSURE_REQUIREMENTS, [id=#73]
+(82) Exchange
+Input [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70]
+Arguments: hashpartitioning(ss_item_sk#67, 5), ENSURE_REQUIREMENTS, [id=#72]
 
-(86) Sort [codegen id : 47]
-Input [4]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70, ss_sold_date_sk#71]
-Arguments: [ss_item_sk#68 ASC NULLS FIRST], false, 0
+(83) Sort [codegen id : 45]
+Input [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70]
+Arguments: [ss_item_sk#67 ASC NULLS FIRST], false, 0
 
-(87) ReusedExchange [Reuses operator id: 61]
-Output [1]: [ss_item_sk#45]
+(84) ReusedExchange [Reuses operator id: 58]
+Output [1]: [ss_item_sk#44]
 
-(88) Sort [codegen id : 66]
-Input [1]: [ss_item_sk#45]
-Arguments: [ss_item_sk#45 ASC NULLS FIRST], false, 0
+(85) Sort [codegen id : 63]
+Input [1]: [ss_item_sk#44]
+Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0
 
-(89) SortMergeJoin [codegen id : 90]
-Left keys [1]: [ss_item_sk#68]
-Right keys [1]: [ss_item_sk#45]
+(86) SortMergeJoin [codegen id : 86]
+Left keys [1]: [ss_item_sk#67]
+Right keys [1]: [ss_item_sk#44]
 Join condition: None
 
-(90) ReusedExchange [Reuses operator id: 140]
-Output [1]: [d_date_sk#74]
+(87) ReusedExchange [Reuses operator id: 137]
+Output [1]: [d_date_sk#73]
 
-(91) BroadcastHashJoin [codegen id : 90]
-Left keys [1]: [ss_sold_date_sk#71]
-Right keys [1]: [d_date_sk#74]
+(88) BroadcastHashJoin [codegen id : 86]
+Left keys [1]: [ss_sold_date_sk#70]
+Right keys [1]: [d_date_sk#73]
 Join condition: None
 
-(92) Project [codegen id : 90]
-Output [3]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70]
-Input [5]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70, ss_sold_date_sk#71, d_date_sk#74]
+(89) Project [codegen id : 86]
+Output [3]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69]
+Input [5]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70, d_date_sk#73]
 
-(93) ReusedExchange [Reuses operator id: 75]
-Output [4]: [i_item_sk#75, i_brand_id#76, i_class_id#77, i_category_id#78]
+(90) ReusedExchange [Reuses operator id: 72]
+Output [4]: [i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77]
 
-(94) BroadcastHashJoin [codegen id : 90]
-Left keys [1]: [ss_item_sk#68]
-Right keys [1]: [i_item_sk#75]
+(91) BroadcastHashJoin [codegen id : 86]
+Left keys [1]: [ss_item_sk#67]
+Right keys [1]: [i_item_sk#74]
 Join condition: None
 
-(95) Project [codegen id : 90]
-Output [5]: [ss_quantity#69, ss_list_price#70, i_brand_id#76, i_class_id#77, i_category_id#78]
-Input [7]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70, i_item_sk#75, i_brand_id#76, i_class_id#77, i_category_id#78]
-
-(96) HashAggregate [codegen id : 90]
-Input [5]: [ss_quantity#69, ss_list_price#70, i_brand_id#76, i_class_id#77, i_category_id#78]
-Keys [3]: [i_brand_id#76, i_class_id#77, i_category_id#78]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
-Aggregate Attributes [3]: [sum#79, isEmpty#80, count#81]
-Results [6]: [i_brand_id#76, i_class_id#77, i_category_id#78, sum#82, isEmpty#83, count#84]
-
-(97) Exchange
-Input [6]: [i_brand_id#76, i_class_id#77, i_category_id#78, sum#82, isEmpty#83, count#84]
-Arguments: hashpartitioning(i_brand_id#76, i_class_id#77, i_category_id#78, 5), ENSURE_REQUIREMENTS, [id=#85]
-
-(98) HashAggregate [codegen id : 91]
-Input [6]: [i_brand_id#76, i_class_id#77, i_category_id#78, sum#82, isEmpty#83, count#84]
-Keys [3]: [i_brand_id#76, i_class_id#77, i_category_id#78]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2)))#86, count(1)#87]
-Results [6]: [store AS channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2)))#86 AS sales#89, count(1)#87 AS number_sales#90]
-
-(99) Filter [codegen id : 91]
-Input [6]: [channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sales#89, number_sales#90]
-Condition : (isnotnull(sales#89) AND (cast(sales#89 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#66, [id=#67] as decimal(32,6))))
-
-(100) BroadcastExchange
-Input [6]: [channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sales#89, number_sales#90]
-Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#91]
-
-(101) BroadcastHashJoin [codegen id : 92]
-Left keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
-Right keys [3]: [i_brand_id#76, i_class_id#77, i_category_id#78]
+(92) Project [codegen id : 86]
+Output [5]: [ss_quantity#68, ss_list_price#69, i_brand_id#75, i_class_id#76, i_category_id#77]
+Input [7]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77]
+
+(93) HashAggregate [codegen id : 86]
+Input [5]: [ss_quantity#68, ss_list_price#69, i_brand_id#75, i_class_id#76, i_category_id#77]
+Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_list_price#69 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Aggregate Attributes [3]: [sum#78, isEmpty#79, count#80]
+Results [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
+
+(94) Exchange
+Input [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
+Arguments: hashpartitioning(i_brand_id#75, i_class_id#76, i_category_id#77, 5), ENSURE_REQUIREMENTS, [id=#84]
+
+(95) HashAggregate [codegen id : 87]
+Input [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
+Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_list_price#69 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_list_price#69 as decimal(12,2)))), DecimalType(18,2)))#85, count(1)#86]
+Results [6]: [store AS channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_list_price#69 as decimal(12,2)))), DecimalType(18,2)))#85 AS sales#88, count(1)#86 AS number_sales#89]
+
+(96) Filter [codegen id : 87]
+Input [6]: [channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sales#88, number_sales#89]
+Condition : (isnotnull(sales#88) AND (cast(sales#88 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#65, [id=#66] as decimal(32,6))))
+
+(97) BroadcastExchange
+Input [6]: [channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sales#88, number_sales#89]
+Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#90]
+
+(98) BroadcastHashJoin [codegen id : 88]
+Left keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50]
+Right keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77]
 Join condition: None
 
-(102) TakeOrderedAndProject
-Input [12]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sales#64, number_sales#65, channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sales#89, number_sales#90]
-Arguments: 100, [i_brand_id#49 ASC NULLS FIRST, i_class_id#50 ASC NULLS FIRST, i_category_id#51 ASC NULLS FIRST], [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sales#64, number_sales#65, channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sales#89, number_sales#90]
+(99) TakeOrderedAndProject
+Input [12]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sales#88, number_sales#89]
+Arguments: 100, [i_brand_id#48 ASC NULLS FIRST, i_class_id#49 ASC NULLS FIRST, i_category_id#50 ASC NULLS FIRST], [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sales#88, number_sales#89]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 81 Hosting Expression = Subquery scalar-subquery#66, [id=#67]
-* HashAggregate (121)
-+- Exchange (120)
-   +- * HashAggregate (119)
-      +- Union (118)
-         :- * Project (107)
-         :  +- * BroadcastHashJoin Inner BuildRight (106)
-         :     :- * ColumnarToRow (104)
-         :     :  +- Scan parquet default.store_sales (103)
-         :     +- ReusedExchange (105)
-         :- * Project (112)
-         :  +- * BroadcastHashJoin Inner BuildRight (111)
-         :     :- * ColumnarToRow (109)
-         :     :  +- Scan parquet default.catalog_sales (108)
-         :     +- ReusedExchange (110)
-         +- * Project (117)
-            +- * BroadcastHashJoin Inner BuildRight (116)
-               :- * ColumnarToRow (114)
-               :  +- Scan parquet default.web_sales (113)
-               +- ReusedExchange (115)
-
-
-(103) Scan parquet default.store_sales
-Output [3]: [ss_quantity#92, ss_list_price#93, ss_sold_date_sk#94]
+Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquery#65, [id=#66]
+* HashAggregate (118)
++- Exchange (117)
+   +- * HashAggregate (116)
+      +- Union (115)
+         :- * Project (104)
+         :  +- * BroadcastHashJoin Inner BuildRight (103)
+         :     :- * ColumnarToRow (101)
+         :     :  +- Scan parquet default.store_sales (100)
+         :     +- ReusedExchange (102)
+         :- * Project (109)
+         :  +- * BroadcastHashJoin Inner BuildRight (108)
+         :     :- * ColumnarToRow (106)
+         :     :  +- Scan parquet default.catalog_sales (105)
+         :     +- ReusedExchange (107)
+         +- * Project (114)
+            +- * BroadcastHashJoin Inner BuildRight (113)
+               :- * ColumnarToRow (111)
+               :  +- Scan parquet default.web_sales (110)
+               +- ReusedExchange (112)
+
+
+(100) Scan parquet default.store_sales
+Output [3]: [ss_quantity#91, ss_list_price#92, ss_sold_date_sk#93]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#94), dynamicpruningexpression(ss_sold_date_sk#94 IN dynamicpruning#13)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#93), dynamicpruningexpression(ss_sold_date_sk#93 IN dynamicpruning#13)]
 ReadSchema: struct<ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(104) ColumnarToRow [codegen id : 2]
-Input [3]: [ss_quantity#92, ss_list_price#93, ss_sold_date_sk#94]
+(101) ColumnarToRow [codegen id : 2]
+Input [3]: [ss_quantity#91, ss_list_price#92, ss_sold_date_sk#93]
 
-(105) ReusedExchange [Reuses operator id: 135]
-Output [1]: [d_date_sk#95]
+(102) ReusedExchange [Reuses operator id: 132]
+Output [1]: [d_date_sk#94]
 
-(106) BroadcastHashJoin [codegen id : 2]
-Left keys [1]: [ss_sold_date_sk#94]
-Right keys [1]: [d_date_sk#95]
+(103) BroadcastHashJoin [codegen id : 2]
+Left keys [1]: [ss_sold_date_sk#93]
+Right keys [1]: [d_date_sk#94]
 Join condition: None
 
-(107) Project [codegen id : 2]
-Output [2]: [ss_quantity#92 AS quantity#96, ss_list_price#93 AS list_price#97]
-Input [4]: [ss_quantity#92, ss_list_price#93, ss_sold_date_sk#94, d_date_sk#95]
+(104) Project [codegen id : 2]
+Output [2]: [ss_quantity#91 AS quantity#95, ss_list_price#92 AS list_price#96]
+Input [4]: [ss_quantity#91, ss_list_price#92, ss_sold_date_sk#93, d_date_sk#94]
 
-(108) Scan parquet default.catalog_sales
-Output [3]: [cs_quantity#98, cs_list_price#99, cs_sold_date_sk#100]
+(105) Scan parquet default.catalog_sales
+Output [3]: [cs_quantity#97, cs_list_price#98, cs_sold_date_sk#99]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#100), dynamicpruningexpression(cs_sold_date_sk#100 IN dynamicpruning#13)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#99), dynamicpruningexpression(cs_sold_date_sk#99 IN dynamicpruning#13)]
 ReadSchema: struct<cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(109) ColumnarToRow [codegen id : 4]
-Input [3]: [cs_quantity#98, cs_list_price#99, cs_sold_date_sk#100]
+(106) ColumnarToRow [codegen id : 4]
+Input [3]: [cs_quantity#97, cs_list_price#98, cs_sold_date_sk#99]
 
-(110) ReusedExchange [Reuses operator id: 135]
-Output [1]: [d_date_sk#101]
+(107) ReusedExchange [Reuses operator id: 132]
+Output [1]: [d_date_sk#100]
 
-(111) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [cs_sold_date_sk#100]
-Right keys [1]: [d_date_sk#101]
+(108) BroadcastHashJoin [codegen id : 4]
+Left keys [1]: [cs_sold_date_sk#99]
+Right keys [1]: [d_date_sk#100]
 Join condition: None
 
-(112) Project [codegen id : 4]
-Output [2]: [cs_quantity#98 AS quantity#102, cs_list_price#99 AS list_price#103]
-Input [4]: [cs_quantity#98, cs_list_price#99, cs_sold_date_sk#100, d_date_sk#101]
+(109) Project [codegen id : 4]
+Output [2]: [cs_quantity#97 AS quantity#101, cs_list_price#98 AS list_price#102]
+Input [4]: [cs_quantity#97, cs_list_price#98, cs_sold_date_sk#99, d_date_sk#100]
 
-(113) Scan parquet default.web_sales
-Output [3]: [ws_quantity#104, ws_list_price#105, ws_sold_date_sk#106]
+(110) Scan parquet default.web_sales
+Output [3]: [ws_quantity#103, ws_list_price#104, ws_sold_date_sk#105]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#106), dynamicpruningexpression(ws_sold_date_sk#106 IN dynamicpruning#13)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#105), dynamicpruningexpression(ws_sold_date_sk#105 IN dynamicpruning#13)]
 ReadSchema: struct<ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(114) ColumnarToRow [codegen id : 6]
-Input [3]: [ws_quantity#104, ws_list_price#105, ws_sold_date_sk#106]
+(111) ColumnarToRow [codegen id : 6]
+Input [3]: [ws_quantity#103, ws_list_price#104, ws_sold_date_sk#105]
 
-(115) ReusedExchange [Reuses operator id: 135]
-Output [1]: [d_date_sk#107]
+(112) ReusedExchange [Reuses operator id: 132]
+Output [1]: [d_date_sk#106]
 
-(116) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [ws_sold_date_sk#106]
-Right keys [1]: [d_date_sk#107]
+(113) BroadcastHashJoin [codegen id : 6]
+Left keys [1]: [ws_sold_date_sk#105]
+Right keys [1]: [d_date_sk#106]
 Join condition: None
 
-(117) Project [codegen id : 6]
-Output [2]: [ws_quantity#104 AS quantity#108, ws_list_price#105 AS list_price#109]
-Input [4]: [ws_quantity#104, ws_list_price#105, ws_sold_date_sk#106, d_date_sk#107]
+(114) Project [codegen id : 6]
+Output [2]: [ws_quantity#103 AS quantity#107, ws_list_price#104 AS list_price#108]
+Input [4]: [ws_quantity#103, ws_list_price#104, ws_sold_date_sk#105, d_date_sk#106]
 
-(118) Union
+(115) Union
 
-(119) HashAggregate [codegen id : 7]
-Input [2]: [quantity#96, list_price#97]
+(116) HashAggregate [codegen id : 7]
+Input [2]: [quantity#95, list_price#96]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [2]: [sum#110, count#111]
-Results [2]: [sum#112, count#113]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#95 as decimal(12,2))) * promote_precision(cast(list_price#96 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [2]: [sum#109, count#110]
+Results [2]: [sum#111, count#112]
 
-(120) Exchange
-Input [2]: [sum#112, count#113]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#114]
+(117) Exchange
+Input [2]: [sum#111, count#112]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#113]
 
-(121) HashAggregate [codegen id : 8]
-Input [2]: [sum#112, count#113]
+(118) HashAggregate [codegen id : 8]
+Input [2]: [sum#111, count#112]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2)))#115]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2)))#115 AS average_sales#116]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#95 as decimal(12,2))) * promote_precision(cast(list_price#96 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#95 as decimal(12,2))) * promote_precision(cast(list_price#96 as decimal(12,2)))), DecimalType(18,2)))#114]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#95 as decimal(12,2))) * promote_precision(cast(list_price#96 as decimal(12,2)))), DecimalType(18,2)))#114 AS average_sales#115]
 
-Subquery:2 Hosting operator id = 103 Hosting Expression = ss_sold_date_sk#94 IN dynamicpruning#13
+Subquery:2 Hosting operator id = 100 Hosting Expression = ss_sold_date_sk#93 IN dynamicpruning#13
 
-Subquery:3 Hosting operator id = 108 Hosting Expression = cs_sold_date_sk#100 IN dynamicpruning#13
+Subquery:3 Hosting operator id = 105 Hosting Expression = cs_sold_date_sk#99 IN dynamicpruning#13
 
-Subquery:4 Hosting operator id = 113 Hosting Expression = ws_sold_date_sk#106 IN dynamicpruning#13
+Subquery:4 Hosting operator id = 110 Hosting Expression = ws_sold_date_sk#105 IN dynamicpruning#13
 
 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (126)
-+- * Project (125)
-   +- * Filter (124)
-      +- * ColumnarToRow (123)
-         +- Scan parquet default.date_dim (122)
+BroadcastExchange (123)
++- * Project (122)
+   +- * Filter (121)
+      +- * ColumnarToRow (120)
+         +- Scan parquet default.date_dim (119)
 
 
-(122) Scan parquet default.date_dim
-Output [2]: [d_date_sk#47, d_week_seq#117]
+(119) Scan parquet default.date_dim
+Output [2]: [d_date_sk#46, d_week_seq#116]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_week_seq:int>
 
-(123) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#47, d_week_seq#117]
+(120) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#46, d_week_seq#116]
 
-(124) Filter [codegen id : 1]
-Input [2]: [d_date_sk#47, d_week_seq#117]
-Condition : ((isnotnull(d_week_seq#117) AND (d_week_seq#117 = Subquery scalar-subquery#118, [id=#119])) AND isnotnull(d_date_sk#47))
+(121) Filter [codegen id : 1]
+Input [2]: [d_date_sk#46, d_week_seq#116]
+Condition : ((isnotnull(d_week_seq#116) AND (d_week_seq#116 = Subquery scalar-subquery#117, [id=#118])) AND isnotnull(d_date_sk#46))
 
-(125) Project [codegen id : 1]
-Output [1]: [d_date_sk#47]
-Input [2]: [d_date_sk#47, d_week_seq#117]
+(122) Project [codegen id : 1]
+Output [1]: [d_date_sk#46]
+Input [2]: [d_date_sk#46, d_week_seq#116]
 
-(126) BroadcastExchange
-Input [1]: [d_date_sk#47]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#120]
+(123) BroadcastExchange
+Input [1]: [d_date_sk#46]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#119]
 
-Subquery:6 Hosting operator id = 124 Hosting Expression = Subquery scalar-subquery#118, [id=#119]
-* Project (130)
-+- * Filter (129)
-   +- * ColumnarToRow (128)
-      +- Scan parquet default.date_dim (127)
+Subquery:6 Hosting operator id = 121 Hosting Expression = Subquery scalar-subquery#117, [id=#118]
+* Project (127)
++- * Filter (126)
+   +- * ColumnarToRow (125)
+      +- Scan parquet default.date_dim (124)
 
 
-(127) Scan parquet default.date_dim
-Output [4]: [d_week_seq#121, d_year#122, d_moy#123, d_dom#124]
+(124) Scan parquet default.date_dim
+Output [4]: [d_week_seq#120, d_year#121, d_moy#122, d_dom#123]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,2000), EqualTo(d_moy,12), EqualTo(d_dom,11)]
 ReadSchema: struct<d_week_seq:int,d_year:int,d_moy:int,d_dom:int>
 
-(128) ColumnarToRow [codegen id : 1]
-Input [4]: [d_week_seq#121, d_year#122, d_moy#123, d_dom#124]
+(125) ColumnarToRow [codegen id : 1]
+Input [4]: [d_week_seq#120, d_year#121, d_moy#122, d_dom#123]
 
-(129) Filter [codegen id : 1]
-Input [4]: [d_week_seq#121, d_year#122, d_moy#123, d_dom#124]
-Condition : (((((isnotnull(d_year#122) AND isnotnull(d_moy#123)) AND isnotnull(d_dom#124)) AND (d_year#122 = 2000)) AND (d_moy#123 = 12)) AND (d_dom#124 = 11))
+(126) Filter [codegen id : 1]
+Input [4]: [d_week_seq#120, d_year#121, d_moy#122, d_dom#123]
+Condition : (((((isnotnull(d_year#121) AND isnotnull(d_moy#122)) AND isnotnull(d_dom#123)) AND (d_year#121 = 2000)) AND (d_moy#122 = 12)) AND (d_dom#123 = 11))
 
-(130) Project [codegen id : 1]
-Output [1]: [d_week_seq#121]
-Input [4]: [d_week_seq#121, d_year#122, d_moy#123, d_dom#124]
+(127) Project [codegen id : 1]
+Output [1]: [d_week_seq#120]
+Input [4]: [d_week_seq#120, d_year#121, d_moy#122, d_dom#123]
 
 Subquery:7 Hosting operator id = 9 Hosting Expression = ss_sold_date_sk#12 IN dynamicpruning#13
-BroadcastExchange (135)
-+- * Project (134)
-   +- * Filter (133)
-      +- * ColumnarToRow (132)
-         +- Scan parquet default.date_dim (131)
+BroadcastExchange (132)
++- * Project (131)
+   +- * Filter (130)
+      +- * ColumnarToRow (129)
+         +- Scan parquet default.date_dim (128)
 
 
-(131) Scan parquet default.date_dim
-Output [2]: [d_date_sk#14, d_year#125]
+(128) Scan parquet default.date_dim
+Output [2]: [d_date_sk#14, d_year#124]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(132) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#14, d_year#125]
+(129) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#14, d_year#124]
 
-(133) Filter [codegen id : 1]
-Input [2]: [d_date_sk#14, d_year#125]
-Condition : (((isnotnull(d_year#125) AND (d_year#125 >= 1999)) AND (d_year#125 <= 2001)) AND isnotnull(d_date_sk#14))
+(130) Filter [codegen id : 1]
+Input [2]: [d_date_sk#14, d_year#124]
+Condition : (((isnotnull(d_year#124) AND (d_year#124 >= 1999)) AND (d_year#124 <= 2001)) AND isnotnull(d_date_sk#14))
 
-(134) Project [codegen id : 1]
+(131) Project [codegen id : 1]
 Output [1]: [d_date_sk#14]
-Input [2]: [d_date_sk#14, d_year#125]
+Input [2]: [d_date_sk#14, d_year#124]
 
-(135) BroadcastExchange
+(132) BroadcastExchange
 Input [1]: [d_date_sk#14]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#126]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#125]
 
 Subquery:8 Hosting operator id = 20 Hosting Expression = cs_sold_date_sk#21 IN dynamicpruning#13
 
 Subquery:9 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#36 IN dynamicpruning#13
 
-Subquery:10 Hosting operator id = 99 Hosting Expression = ReusedSubquery Subquery scalar-subquery#66, [id=#67]
+Subquery:10 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#65, [id=#66]
 
-Subquery:11 Hosting operator id = 82 Hosting Expression = ss_sold_date_sk#71 IN dynamicpruning#72
-BroadcastExchange (140)
-+- * Project (139)
-   +- * Filter (138)
-      +- * ColumnarToRow (137)
-         +- Scan parquet default.date_dim (136)
+Subquery:11 Hosting operator id = 79 Hosting Expression = ss_sold_date_sk#70 IN dynamicpruning#71
+BroadcastExchange (137)
++- * Project (136)
+   +- * Filter (135)
+      +- * ColumnarToRow (134)
+         +- Scan parquet default.date_dim (133)
 
 
-(136) Scan parquet default.date_dim
-Output [2]: [d_date_sk#74, d_week_seq#127]
+(133) Scan parquet default.date_dim
+Output [2]: [d_date_sk#73, d_week_seq#126]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_week_seq:int>
 
-(137) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#74, d_week_seq#127]
+(134) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#73, d_week_seq#126]
 
-(138) Filter [codegen id : 1]
-Input [2]: [d_date_sk#74, d_week_seq#127]
-Condition : ((isnotnull(d_week_seq#127) AND (d_week_seq#127 = Subquery scalar-subquery#128, [id=#129])) AND isnotnull(d_date_sk#74))
+(135) Filter [codegen id : 1]
+Input [2]: [d_date_sk#73, d_week_seq#126]
+Condition : ((isnotnull(d_week_seq#126) AND (d_week_seq#126 = Subquery scalar-subquery#127, [id=#128])) AND isnotnull(d_date_sk#73))
 
-(139) Project [codegen id : 1]
-Output [1]: [d_date_sk#74]
-Input [2]: [d_date_sk#74, d_week_seq#127]
+(136) Project [codegen id : 1]
+Output [1]: [d_date_sk#73]
+Input [2]: [d_date_sk#73, d_week_seq#126]
 
-(140) BroadcastExchange
-Input [1]: [d_date_sk#74]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#130]
+(137) BroadcastExchange
+Input [1]: [d_date_sk#73]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#129]
 
-Subquery:12 Hosting operator id = 138 Hosting Expression = Subquery scalar-subquery#128, [id=#129]
-* Project (144)
-+- * Filter (143)
-   +- * ColumnarToRow (142)
-      +- Scan parquet default.date_dim (141)
+Subquery:12 Hosting operator id = 135 Hosting Expression = Subquery scalar-subquery#127, [id=#128]
+* Project (141)
++- * Filter (140)
+   +- * ColumnarToRow (139)
+      +- Scan parquet default.date_dim (138)
 
 
-(141) Scan parquet default.date_dim
-Output [4]: [d_week_seq#131, d_year#132, d_moy#133, d_dom#134]
+(138) Scan parquet default.date_dim
+Output [4]: [d_week_seq#130, d_year#131, d_moy#132, d_dom#133]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,11)]
 ReadSchema: struct<d_week_seq:int,d_year:int,d_moy:int,d_dom:int>
 
-(142) ColumnarToRow [codegen id : 1]
-Input [4]: [d_week_seq#131, d_year#132, d_moy#133, d_dom#134]
+(139) ColumnarToRow [codegen id : 1]
+Input [4]: [d_week_seq#130, d_year#131, d_moy#132, d_dom#133]
 
-(143) Filter [codegen id : 1]
-Input [4]: [d_week_seq#131, d_year#132, d_moy#133, d_dom#134]
-Condition : (((((isnotnull(d_year#132) AND isnotnull(d_moy#133)) AND isnotnull(d_dom#134)) AND (d_year#132 = 1999)) AND (d_moy#133 = 12)) AND (d_dom#134 = 11))
+(140) Filter [codegen id : 1]
+Input [4]: [d_week_seq#130, d_year#131, d_moy#132, d_dom#133]
+Condition : (((((isnotnull(d_year#131) AND isnotnull(d_moy#132)) AND isnotnull(d_dom#133)) AND (d_year#131 = 1999)) AND (d_moy#132 = 12)) AND (d_dom#133 = 11))
 
-(144) Project [codegen id : 1]
-Output [1]: [d_week_seq#131]
-Input [4]: [d_week_seq#131, d_year#132, d_moy#133, d_dom#134]
+(141) Project [codegen id : 1]
+Output [1]: [d_week_seq#130]
+Input [4]: [d_week_seq#130, d_year#131, d_moy#132, d_dom#133]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt
index e7d3f84db0c72..82e338515f431 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt
@@ -1,12 +1,12 @@
 TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
-  WholeStageCodegen (92)
+  WholeStageCodegen (88)
     BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
       Filter [sales]
         Subquery #4
           WholeStageCodegen (8)
             HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
               InputAdapter
-                Exchange #17
+                Exchange #16
                   WholeStageCodegen (7)
                     HashAggregate [quantity,list_price] [sum,count,sum,count]
                       InputAdapter
@@ -19,7 +19,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                     Scan parquet default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
-                                  ReusedExchange [d_date_sk] #9
+                                  ReusedExchange [d_date_sk] #8
                           WholeStageCodegen (4)
                             Project [cs_quantity,cs_list_price]
                               BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
@@ -28,7 +28,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                     Scan parquet default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
-                                  ReusedExchange [d_date_sk] #9
+                                  ReusedExchange [d_date_sk] #8
                           WholeStageCodegen (6)
                             Project [ws_quantity,ws_list_price]
                               BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
@@ -37,11 +37,11 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                     Scan parquet default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
-                                  ReusedExchange [d_date_sk] #9
+                                  ReusedExchange [d_date_sk] #8
         HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
           InputAdapter
             Exchange [i_brand_id,i_class_id,i_category_id] #1
-              WholeStageCodegen (45)
+              WholeStageCodegen (43)
                 HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                   Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id]
                     BroadcastHashJoin [ss_item_sk,i_item_sk]
@@ -74,11 +74,11 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                             InputAdapter
                                                               Scan parquet default.date_dim [d_date_sk,d_week_seq]
                             InputAdapter
-                              WholeStageCodegen (21)
+                              WholeStageCodegen (20)
                                 Sort [ss_item_sk]
                                   InputAdapter
                                     Exchange [ss_item_sk] #4
-                                      WholeStageCodegen (20)
+                                      WholeStageCodegen (19)
                                         Project [i_item_sk]
                                           BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id]
                                             Filter [i_brand_id,i_class_id,i_category_id]
@@ -87,129 +87,124 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                   Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                             InputAdapter
                                               BroadcastExchange #5
-                                                WholeStageCodegen (19)
-                                                  HashAggregate [brand_id,class_id,category_id]
+                                                WholeStageCodegen (18)
+                                                  SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
                                                     InputAdapter
-                                                      Exchange [brand_id,class_id,category_id] #6
-                                                        WholeStageCodegen (18)
-                                                          HashAggregate [brand_id,class_id,category_id]
-                                                            SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
-                                                              InputAdapter
-                                                                WholeStageCodegen (13)
-                                                                  Sort [brand_id,class_id,category_id]
-                                                                    InputAdapter
-                                                                      Exchange [brand_id,class_id,category_id] #7
-                                                                        WholeStageCodegen (12)
-                                                                          HashAggregate [brand_id,class_id,category_id]
-                                                                            InputAdapter
-                                                                              Exchange [brand_id,class_id,category_id] #8
-                                                                                WholeStageCodegen (11)
-                                                                                  HashAggregate [brand_id,class_id,category_id]
-                                                                                    Project [i_brand_id,i_class_id,i_category_id]
-                                                                                      BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                                        Project [ss_item_sk]
-                                                                                          BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                            Filter [ss_item_sk]
-                                                                                              ColumnarToRow
-                                                                                                InputAdapter
-                                                                                                  Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
-                                                                                                    SubqueryBroadcast [d_date_sk] #3
-                                                                                                      BroadcastExchange #9
-                                                                                                        WholeStageCodegen (1)
-                                                                                                          Project [d_date_sk]
-                                                                                                            Filter [d_year,d_date_sk]
-                                                                                                              ColumnarToRow
-                                                                                                                InputAdapter
-                                                                                                                  Scan parquet default.date_dim [d_date_sk,d_year]
+                                                      WholeStageCodegen (13)
+                                                        Sort [brand_id,class_id,category_id]
+                                                          InputAdapter
+                                                            Exchange [brand_id,class_id,category_id] #6
+                                                              WholeStageCodegen (12)
+                                                                HashAggregate [brand_id,class_id,category_id]
+                                                                  InputAdapter
+                                                                    Exchange [brand_id,class_id,category_id] #7
+                                                                      WholeStageCodegen (11)
+                                                                        HashAggregate [brand_id,class_id,category_id]
+                                                                          Project [i_brand_id,i_class_id,i_category_id]
+                                                                            BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                              Project [ss_item_sk]
+                                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                                  Filter [ss_item_sk]
+                                                                                    ColumnarToRow
+                                                                                      InputAdapter
+                                                                                        Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                                          SubqueryBroadcast [d_date_sk] #3
+                                                                                            BroadcastExchange #8
+                                                                                              WholeStageCodegen (1)
+                                                                                                Project [d_date_sk]
+                                                                                                  Filter [d_year,d_date_sk]
+                                                                                                    ColumnarToRow
+                                                                                                      InputAdapter
+                                                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                  InputAdapter
+                                                                                    ReusedExchange [d_date_sk] #8
+                                                                              InputAdapter
+                                                                                BroadcastExchange #9
+                                                                                  WholeStageCodegen (10)
+                                                                                    SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
+                                                                                      InputAdapter
+                                                                                        WholeStageCodegen (5)
+                                                                                          Sort [i_brand_id,i_class_id,i_category_id]
                                                                                             InputAdapter
-                                                                                              ReusedExchange [d_date_sk] #9
-                                                                                        InputAdapter
-                                                                                          BroadcastExchange #10
-                                                                                            WholeStageCodegen (10)
-                                                                                              SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
-                                                                                                InputAdapter
-                                                                                                  WholeStageCodegen (5)
-                                                                                                    Sort [i_brand_id,i_class_id,i_category_id]
+                                                                                              Exchange [i_brand_id,i_class_id,i_category_id] #10
+                                                                                                WholeStageCodegen (4)
+                                                                                                  Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                    ColumnarToRow
                                                                                                       InputAdapter
-                                                                                                        Exchange [i_brand_id,i_class_id,i_category_id] #11
-                                                                                                          WholeStageCodegen (4)
-                                                                                                            Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                        Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                      InputAdapter
+                                                                                        WholeStageCodegen (9)
+                                                                                          Sort [i_brand_id,i_class_id,i_category_id]
+                                                                                            InputAdapter
+                                                                                              Exchange [i_brand_id,i_class_id,i_category_id] #11
+                                                                                                WholeStageCodegen (8)
+                                                                                                  Project [i_brand_id,i_class_id,i_category_id]
+                                                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                                                      Project [cs_item_sk]
+                                                                                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                                                          Filter [cs_item_sk]
+                                                                                                            ColumnarToRow
+                                                                                                              InputAdapter
+                                                                                                                Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
+                                                                                                                  ReusedSubquery [d_date_sk] #3
+                                                                                                          InputAdapter
+                                                                                                            ReusedExchange [d_date_sk] #8
+                                                                                                      InputAdapter
+                                                                                                        BroadcastExchange #12
+                                                                                                          WholeStageCodegen (7)
+                                                                                                            Filter [i_item_sk]
                                                                                                               ColumnarToRow
                                                                                                                 InputAdapter
                                                                                                                   Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                InputAdapter
-                                                                                                  WholeStageCodegen (9)
-                                                                                                    Sort [i_brand_id,i_class_id,i_category_id]
-                                                                                                      InputAdapter
-                                                                                                        Exchange [i_brand_id,i_class_id,i_category_id] #12
-                                                                                                          WholeStageCodegen (8)
-                                                                                                            Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                              BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                                                Project [cs_item_sk]
-                                                                                                                  BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                                                    Filter [cs_item_sk]
-                                                                                                                      ColumnarToRow
-                                                                                                                        InputAdapter
-                                                                                                                          Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
-                                                                                                                            ReusedSubquery [d_date_sk] #3
-                                                                                                                    InputAdapter
-                                                                                                                      ReusedExchange [d_date_sk] #9
-                                                                                                                InputAdapter
-                                                                                                                  BroadcastExchange #13
-                                                                                                                    WholeStageCodegen (7)
-                                                                                                                      Filter [i_item_sk]
-                                                                                                                        ColumnarToRow
-                                                                                                                          InputAdapter
-                                                                                                                            Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                              InputAdapter
-                                                                WholeStageCodegen (17)
-                                                                  Sort [i_brand_id,i_class_id,i_category_id]
+                                                    InputAdapter
+                                                      WholeStageCodegen (17)
+                                                        Sort [i_brand_id,i_class_id,i_category_id]
+                                                          InputAdapter
+                                                            Exchange [i_brand_id,i_class_id,i_category_id] #13
+                                                              WholeStageCodegen (16)
+                                                                Project [i_brand_id,i_class_id,i_category_id]
+                                                                  BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                                    Project [ws_item_sk]
+                                                                      BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                        Filter [ws_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
+                                                                                ReusedSubquery [d_date_sk] #3
+                                                                        InputAdapter
+                                                                          ReusedExchange [d_date_sk] #8
                                                                     InputAdapter
-                                                                      Exchange [i_brand_id,i_class_id,i_category_id] #14
-                                                                        WholeStageCodegen (16)
-                                                                          Project [i_brand_id,i_class_id,i_category_id]
-                                                                            BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                              Project [ws_item_sk]
-                                                                                BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                                  Filter [ws_item_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
-                                                                                          ReusedSubquery [d_date_sk] #3
-                                                                                  InputAdapter
-                                                                                    ReusedExchange [d_date_sk] #9
-                                                                              InputAdapter
-                                                                                ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #13
+                                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #12
                           InputAdapter
                             ReusedExchange [d_date_sk] #3
                       InputAdapter
-                        BroadcastExchange #15
-                          WholeStageCodegen (44)
+                        BroadcastExchange #14
+                          WholeStageCodegen (42)
                             SortMergeJoin [i_item_sk,ss_item_sk]
                               InputAdapter
-                                WholeStageCodegen (24)
+                                WholeStageCodegen (23)
                                   Sort [i_item_sk]
                                     InputAdapter
-                                      Exchange [i_item_sk] #16
-                                        WholeStageCodegen (23)
+                                      Exchange [i_item_sk] #15
+                                        WholeStageCodegen (22)
                                           Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                             ColumnarToRow
                                               InputAdapter
                                                 Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                               InputAdapter
-                                WholeStageCodegen (43)
+                                WholeStageCodegen (41)
                                   Sort [ss_item_sk]
                                     InputAdapter
                                       ReusedExchange [ss_item_sk] #4
       InputAdapter
-        BroadcastExchange #18
-          WholeStageCodegen (91)
+        BroadcastExchange #17
+          WholeStageCodegen (87)
             Filter [sales]
               ReusedSubquery [average_sales] #4
               HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
                 InputAdapter
-                  Exchange [i_brand_id,i_class_id,i_category_id] #19
-                    WholeStageCodegen (90)
+                  Exchange [i_brand_id,i_class_id,i_category_id] #18
+                    WholeStageCodegen (86)
                       HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                         Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id]
                           BroadcastHashJoin [ss_item_sk,i_item_sk]
@@ -217,17 +212,17 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                               BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                 SortMergeJoin [ss_item_sk,ss_item_sk]
                                   InputAdapter
-                                    WholeStageCodegen (47)
+                                    WholeStageCodegen (45)
                                       Sort [ss_item_sk]
                                         InputAdapter
-                                          Exchange [ss_item_sk] #20
-                                            WholeStageCodegen (46)
+                                          Exchange [ss_item_sk] #19
+                                            WholeStageCodegen (44)
                                               Filter [ss_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
                                                     Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
                                                       SubqueryBroadcast [d_date_sk] #5
-                                                        BroadcastExchange #21
+                                                        BroadcastExchange #20
                                                           WholeStageCodegen (1)
                                                             Project [d_date_sk]
                                                               Filter [d_week_seq,d_date_sk]
@@ -242,11 +237,11 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                                   InputAdapter
                                                                     Scan parquet default.date_dim [d_date_sk,d_week_seq]
                                   InputAdapter
-                                    WholeStageCodegen (66)
+                                    WholeStageCodegen (63)
                                       Sort [ss_item_sk]
                                         InputAdapter
                                           ReusedExchange [ss_item_sk] #4
                                 InputAdapter
-                                  ReusedExchange [d_date_sk] #21
+                                  ReusedExchange [d_date_sk] #20
                             InputAdapter
-                              ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #15
+                              ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #14
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt
index b0fe619430132..69be776d2ac28 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt
@@ -1,90 +1,88 @@
 == Physical Plan ==
-TakeOrderedAndProject (86)
-+- * BroadcastHashJoin Inner BuildRight (85)
-   :- * Filter (68)
-   :  +- * HashAggregate (67)
-   :     +- Exchange (66)
-   :        +- * HashAggregate (65)
-   :           +- * Project (64)
-   :              +- * BroadcastHashJoin Inner BuildRight (63)
-   :                 :- * Project (61)
-   :                 :  +- * BroadcastHashJoin Inner BuildRight (60)
-   :                 :     :- * BroadcastHashJoin LeftSemi BuildRight (53)
+TakeOrderedAndProject (84)
++- * BroadcastHashJoin Inner BuildRight (83)
+   :- * Filter (66)
+   :  +- * HashAggregate (65)
+   :     +- Exchange (64)
+   :        +- * HashAggregate (63)
+   :           +- * Project (62)
+   :              +- * BroadcastHashJoin Inner BuildRight (61)
+   :                 :- * Project (59)
+   :                 :  +- * BroadcastHashJoin Inner BuildRight (58)
+   :                 :     :- * BroadcastHashJoin LeftSemi BuildRight (51)
    :                 :     :  :- * Filter (3)
    :                 :     :  :  +- * ColumnarToRow (2)
    :                 :     :  :     +- Scan parquet default.store_sales (1)
-   :                 :     :  +- BroadcastExchange (52)
-   :                 :     :     +- * Project (51)
-   :                 :     :        +- * BroadcastHashJoin Inner BuildRight (50)
+   :                 :     :  +- BroadcastExchange (50)
+   :                 :     :     +- * Project (49)
+   :                 :     :        +- * BroadcastHashJoin Inner BuildRight (48)
    :                 :     :           :- * Filter (6)
    :                 :     :           :  +- * ColumnarToRow (5)
    :                 :     :           :     +- Scan parquet default.item (4)
-   :                 :     :           +- BroadcastExchange (49)
-   :                 :     :              +- * HashAggregate (48)
-   :                 :     :                 +- * HashAggregate (47)
-   :                 :     :                    +- * BroadcastHashJoin LeftSemi BuildRight (46)
-   :                 :     :                       :- * HashAggregate (35)
-   :                 :     :                       :  +- Exchange (34)
-   :                 :     :                       :     +- * HashAggregate (33)
-   :                 :     :                       :        +- * Project (32)
-   :                 :     :                       :           +- * BroadcastHashJoin Inner BuildRight (31)
-   :                 :     :                       :              :- * Project (29)
-   :                 :     :                       :              :  +- * BroadcastHashJoin Inner BuildRight (28)
-   :                 :     :                       :              :     :- * Filter (9)
-   :                 :     :                       :              :     :  +- * ColumnarToRow (8)
-   :                 :     :                       :              :     :     +- Scan parquet default.store_sales (7)
-   :                 :     :                       :              :     +- BroadcastExchange (27)
-   :                 :     :                       :              :        +- * BroadcastHashJoin LeftSemi BuildRight (26)
-   :                 :     :                       :              :           :- * Filter (12)
-   :                 :     :                       :              :           :  +- * ColumnarToRow (11)
-   :                 :     :                       :              :           :     +- Scan parquet default.item (10)
-   :                 :     :                       :              :           +- BroadcastExchange (25)
-   :                 :     :                       :              :              +- * Project (24)
-   :                 :     :                       :              :                 +- * BroadcastHashJoin Inner BuildRight (23)
-   :                 :     :                       :              :                    :- * Project (21)
-   :                 :     :                       :              :                    :  +- * BroadcastHashJoin Inner BuildRight (20)
-   :                 :     :                       :              :                    :     :- * Filter (15)
-   :                 :     :                       :              :                    :     :  +- * ColumnarToRow (14)
-   :                 :     :                       :              :                    :     :     +- Scan parquet default.catalog_sales (13)
-   :                 :     :                       :              :                    :     +- BroadcastExchange (19)
-   :                 :     :                       :              :                    :        +- * Filter (18)
-   :                 :     :                       :              :                    :           +- * ColumnarToRow (17)
-   :                 :     :                       :              :                    :              +- Scan parquet default.item (16)
-   :                 :     :                       :              :                    +- ReusedExchange (22)
-   :                 :     :                       :              +- ReusedExchange (30)
-   :                 :     :                       +- BroadcastExchange (45)
-   :                 :     :                          +- * Project (44)
-   :                 :     :                             +- * BroadcastHashJoin Inner BuildRight (43)
-   :                 :     :                                :- * Project (41)
-   :                 :     :                                :  +- * BroadcastHashJoin Inner BuildRight (40)
-   :                 :     :                                :     :- * Filter (38)
-   :                 :     :                                :     :  +- * ColumnarToRow (37)
-   :                 :     :                                :     :     +- Scan parquet default.web_sales (36)
-   :                 :     :                                :     +- ReusedExchange (39)
-   :                 :     :                                +- ReusedExchange (42)
-   :                 :     +- BroadcastExchange (59)
-   :                 :        +- * BroadcastHashJoin LeftSemi BuildRight (58)
-   :                 :           :- * Filter (56)
-   :                 :           :  +- * ColumnarToRow (55)
-   :                 :           :     +- Scan parquet default.item (54)
-   :                 :           +- ReusedExchange (57)
-   :                 +- ReusedExchange (62)
-   +- BroadcastExchange (84)
-      +- * Filter (83)
-         +- * HashAggregate (82)
-            +- Exchange (81)
-               +- * HashAggregate (80)
-                  +- * Project (79)
-                     +- * BroadcastHashJoin Inner BuildRight (78)
-                        :- * Project (76)
-                        :  +- * BroadcastHashJoin Inner BuildRight (75)
-                        :     :- * BroadcastHashJoin LeftSemi BuildRight (73)
-                        :     :  :- * Filter (71)
-                        :     :  :  +- * ColumnarToRow (70)
-                        :     :  :     +- Scan parquet default.store_sales (69)
-                        :     :  +- ReusedExchange (72)
-                        :     +- ReusedExchange (74)
-                        +- ReusedExchange (77)
+   :                 :     :           +- BroadcastExchange (47)
+   :                 :     :              +- * BroadcastHashJoin LeftSemi BuildRight (46)
+   :                 :     :                 :- * HashAggregate (35)
+   :                 :     :                 :  +- Exchange (34)
+   :                 :     :                 :     +- * HashAggregate (33)
+   :                 :     :                 :        +- * Project (32)
+   :                 :     :                 :           +- * BroadcastHashJoin Inner BuildRight (31)
+   :                 :     :                 :              :- * Project (29)
+   :                 :     :                 :              :  +- * BroadcastHashJoin Inner BuildRight (28)
+   :                 :     :                 :              :     :- * Filter (9)
+   :                 :     :                 :              :     :  +- * ColumnarToRow (8)
+   :                 :     :                 :              :     :     +- Scan parquet default.store_sales (7)
+   :                 :     :                 :              :     +- BroadcastExchange (27)
+   :                 :     :                 :              :        +- * BroadcastHashJoin LeftSemi BuildRight (26)
+   :                 :     :                 :              :           :- * Filter (12)
+   :                 :     :                 :              :           :  +- * ColumnarToRow (11)
+   :                 :     :                 :              :           :     +- Scan parquet default.item (10)
+   :                 :     :                 :              :           +- BroadcastExchange (25)
+   :                 :     :                 :              :              +- * Project (24)
+   :                 :     :                 :              :                 +- * BroadcastHashJoin Inner BuildRight (23)
+   :                 :     :                 :              :                    :- * Project (21)
+   :                 :     :                 :              :                    :  +- * BroadcastHashJoin Inner BuildRight (20)
+   :                 :     :                 :              :                    :     :- * Filter (15)
+   :                 :     :                 :              :                    :     :  +- * ColumnarToRow (14)
+   :                 :     :                 :              :                    :     :     +- Scan parquet default.catalog_sales (13)
+   :                 :     :                 :              :                    :     +- BroadcastExchange (19)
+   :                 :     :                 :              :                    :        +- * Filter (18)
+   :                 :     :                 :              :                    :           +- * ColumnarToRow (17)
+   :                 :     :                 :              :                    :              +- Scan parquet default.item (16)
+   :                 :     :                 :              :                    +- ReusedExchange (22)
+   :                 :     :                 :              +- ReusedExchange (30)
+   :                 :     :                 +- BroadcastExchange (45)
+   :                 :     :                    +- * Project (44)
+   :                 :     :                       +- * BroadcastHashJoin Inner BuildRight (43)
+   :                 :     :                          :- * Project (41)
+   :                 :     :                          :  +- * BroadcastHashJoin Inner BuildRight (40)
+   :                 :     :                          :     :- * Filter (38)
+   :                 :     :                          :     :  +- * ColumnarToRow (37)
+   :                 :     :                          :     :     +- Scan parquet default.web_sales (36)
+   :                 :     :                          :     +- ReusedExchange (39)
+   :                 :     :                          +- ReusedExchange (42)
+   :                 :     +- BroadcastExchange (57)
+   :                 :        +- * BroadcastHashJoin LeftSemi BuildRight (56)
+   :                 :           :- * Filter (54)
+   :                 :           :  +- * ColumnarToRow (53)
+   :                 :           :     +- Scan parquet default.item (52)
+   :                 :           +- ReusedExchange (55)
+   :                 +- ReusedExchange (60)
+   +- BroadcastExchange (82)
+      +- * Filter (81)
+         +- * HashAggregate (80)
+            +- Exchange (79)
+               +- * HashAggregate (78)
+                  +- * Project (77)
+                     +- * BroadcastHashJoin Inner BuildRight (76)
+                        :- * Project (74)
+                        :  +- * BroadcastHashJoin Inner BuildRight (73)
+                        :     :- * BroadcastHashJoin LeftSemi BuildRight (71)
+                        :     :  :- * Filter (69)
+                        :     :  :  +- * ColumnarToRow (68)
+                        :     :  :     +- Scan parquet default.store_sales (67)
+                        :     :  +- ReusedExchange (70)
+                        :     +- ReusedExchange (72)
+                        +- ReusedExchange (75)
 
 
 (1) Scan parquet default.store_sales
@@ -187,7 +185,7 @@ Join condition: None
 Output [4]: [cs_sold_date_sk#18, i_brand_id#20, i_class_id#21, i_category_id#22]
 Input [6]: [cs_item_sk#17, cs_sold_date_sk#18, i_item_sk#19, i_brand_id#20, i_class_id#21, i_category_id#22]
 
-(22) ReusedExchange [Reuses operator id: 119]
+(22) ReusedExchange [Reuses operator id: 117]
 Output [1]: [d_date_sk#24]
 
 (23) BroadcastHashJoin [codegen id : 3]
@@ -221,7 +219,7 @@ Join condition: None
 Output [4]: [ss_sold_date_sk#11, i_brand_id#14, i_class_id#15, i_category_id#16]
 Input [6]: [ss_item_sk#10, ss_sold_date_sk#11, i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16]
 
-(30) ReusedExchange [Reuses operator id: 119]
+(30) ReusedExchange [Reuses operator id: 117]
 Output [1]: [d_date_sk#27]
 
 (31) BroadcastHashJoin [codegen id : 6]
@@ -278,7 +276,7 @@ Join condition: None
 Output [4]: [ws_sold_date_sk#33, i_brand_id#35, i_class_id#36, i_category_id#37]
 Input [6]: [ws_item_sk#32, ws_sold_date_sk#33, i_item_sk#34, i_brand_id#35, i_class_id#36, i_category_id#37]
 
-(42) ReusedExchange [Reuses operator id: 119]
+(42) ReusedExchange [Reuses operator id: 117]
 Output [1]: [d_date_sk#38]
 
 (43) BroadcastHashJoin [codegen id : 9]
@@ -299,112 +297,98 @@ Left keys [6]: [coalesce(brand_id#28, 0), isnull(brand_id#28), coalesce(class_id
 Right keys [6]: [coalesce(i_brand_id#35, 0), isnull(i_brand_id#35), coalesce(i_class_id#36, 0), isnull(i_class_id#36), coalesce(i_category_id#37, 0), isnull(i_category_id#37)]
 Join condition: None
 
-(47) HashAggregate [codegen id : 10]
-Input [3]: [brand_id#28, class_id#29, category_id#30]
-Keys [3]: [brand_id#28, class_id#29, category_id#30]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [brand_id#28, class_id#29, category_id#30]
-
-(48) HashAggregate [codegen id : 10]
-Input [3]: [brand_id#28, class_id#29, category_id#30]
-Keys [3]: [brand_id#28, class_id#29, category_id#30]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [brand_id#28, class_id#29, category_id#30]
-
-(49) BroadcastExchange
+(47) BroadcastExchange
 Input [3]: [brand_id#28, class_id#29, category_id#30]
 Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#40]
 
-(50) BroadcastHashJoin [codegen id : 11]
+(48) BroadcastHashJoin [codegen id : 11]
 Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Right keys [3]: [brand_id#28, class_id#29, category_id#30]
 Join condition: None
 
-(51) Project [codegen id : 11]
+(49) Project [codegen id : 11]
 Output [1]: [i_item_sk#6 AS ss_item_sk#41]
 Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#28, class_id#29, category_id#30]
 
-(52) BroadcastExchange
+(50) BroadcastExchange
 Input [1]: [ss_item_sk#41]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#42]
 
-(53) BroadcastHashJoin [codegen id : 25]
+(51) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [ss_item_sk#41]
 Join condition: None
 
-(54) Scan parquet default.item
+(52) Scan parquet default.item
 Output [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(55) ColumnarToRow [codegen id : 23]
+(53) ColumnarToRow [codegen id : 23]
 Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 
-(56) Filter [codegen id : 23]
+(54) Filter [codegen id : 23]
 Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 Condition : (((isnotnull(i_item_sk#43) AND isnotnull(i_brand_id#44)) AND isnotnull(i_class_id#45)) AND isnotnull(i_category_id#46))
 
-(57) ReusedExchange [Reuses operator id: 52]
+(55) ReusedExchange [Reuses operator id: 50]
 Output [1]: [ss_item_sk#41]
 
-(58) BroadcastHashJoin [codegen id : 23]
+(56) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [i_item_sk#43]
 Right keys [1]: [ss_item_sk#41]
 Join condition: None
 
-(59) BroadcastExchange
+(57) BroadcastExchange
 Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#47]
 
-(60) BroadcastHashJoin [codegen id : 25]
+(58) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#43]
 Join condition: None
 
-(61) Project [codegen id : 25]
+(59) Project [codegen id : 25]
 Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_class_id#45, i_category_id#46]
 Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 
-(62) ReusedExchange [Reuses operator id: 110]
+(60) ReusedExchange [Reuses operator id: 108]
 Output [1]: [d_date_sk#48]
 
-(63) BroadcastHashJoin [codegen id : 25]
+(61) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#48]
 Join condition: None
 
-(64) Project [codegen id : 25]
+(62) Project [codegen id : 25]
 Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46]
 Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_class_id#45, i_category_id#46, d_date_sk#48]
 
-(65) HashAggregate [codegen id : 25]
+(63) HashAggregate [codegen id : 25]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
 Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#49, isEmpty#50, count#51]
 Results [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 
-(66) Exchange
+(64) Exchange
 Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 Arguments: hashpartitioning(i_brand_id#44, i_class_id#45, i_category_id#46, 5), ENSURE_REQUIREMENTS, [id=#55]
 
-(67) HashAggregate [codegen id : 52]
+(65) HashAggregate [codegen id : 52]
 Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56, count(1)#57]
 Results [6]: [store AS channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56 AS sales#59, count(1)#57 AS number_sales#60]
 
-(68) Filter [codegen id : 52]
+(66) Filter [codegen id : 52]
 Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60]
 Condition : (isnotnull(sales#59) AND (cast(sales#59 as decimal(32,6)) > cast(Subquery scalar-subquery#61, [id=#62] as decimal(32,6))))
 
-(69) Scan parquet default.store_sales
+(67) Scan parquet default.store_sales
 Output [4]: [ss_item_sk#63, ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66]
 Batched: true
 Location: InMemoryFileIndex []
@@ -412,278 +396,278 @@ PartitionFilters: [isnotnull(ss_sold_date_sk#66), dynamicpruningexpression(ss_so
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(70) ColumnarToRow [codegen id : 50]
+(68) ColumnarToRow [codegen id : 50]
 Input [4]: [ss_item_sk#63, ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66]
 
-(71) Filter [codegen id : 50]
+(69) Filter [codegen id : 50]
 Input [4]: [ss_item_sk#63, ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66]
 Condition : isnotnull(ss_item_sk#63)
 
-(72) ReusedExchange [Reuses operator id: 52]
+(70) ReusedExchange [Reuses operator id: 50]
 Output [1]: [ss_item_sk#41]
 
-(73) BroadcastHashJoin [codegen id : 50]
+(71) BroadcastHashJoin [codegen id : 50]
 Left keys [1]: [ss_item_sk#63]
 Right keys [1]: [ss_item_sk#41]
 Join condition: None
 
-(74) ReusedExchange [Reuses operator id: 59]
+(72) ReusedExchange [Reuses operator id: 57]
 Output [4]: [i_item_sk#68, i_brand_id#69, i_class_id#70, i_category_id#71]
 
-(75) BroadcastHashJoin [codegen id : 50]
+(73) BroadcastHashJoin [codegen id : 50]
 Left keys [1]: [ss_item_sk#63]
 Right keys [1]: [i_item_sk#68]
 Join condition: None
 
-(76) Project [codegen id : 50]
+(74) Project [codegen id : 50]
 Output [6]: [ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66, i_brand_id#69, i_class_id#70, i_category_id#71]
 Input [8]: [ss_item_sk#63, ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66, i_item_sk#68, i_brand_id#69, i_class_id#70, i_category_id#71]
 
-(77) ReusedExchange [Reuses operator id: 124]
+(75) ReusedExchange [Reuses operator id: 122]
 Output [1]: [d_date_sk#72]
 
-(78) BroadcastHashJoin [codegen id : 50]
+(76) BroadcastHashJoin [codegen id : 50]
 Left keys [1]: [ss_sold_date_sk#66]
 Right keys [1]: [d_date_sk#72]
 Join condition: None
 
-(79) Project [codegen id : 50]
+(77) Project [codegen id : 50]
 Output [5]: [ss_quantity#64, ss_list_price#65, i_brand_id#69, i_class_id#70, i_category_id#71]
 Input [7]: [ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66, i_brand_id#69, i_class_id#70, i_category_id#71, d_date_sk#72]
 
-(80) HashAggregate [codegen id : 50]
+(78) HashAggregate [codegen id : 50]
 Input [5]: [ss_quantity#64, ss_list_price#65, i_brand_id#69, i_class_id#70, i_category_id#71]
 Keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71]
 Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#73, isEmpty#74, count#75]
 Results [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78]
 
-(81) Exchange
+(79) Exchange
 Input [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78]
 Arguments: hashpartitioning(i_brand_id#69, i_class_id#70, i_category_id#71, 5), ENSURE_REQUIREMENTS, [id=#79]
 
-(82) HashAggregate [codegen id : 51]
+(80) HashAggregate [codegen id : 51]
 Input [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78]
 Keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2))), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#80, count(1)#81]
 Results [6]: [store AS channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#80 AS sales#83, count(1)#81 AS number_sales#84]
 
-(83) Filter [codegen id : 51]
+(81) Filter [codegen id : 51]
 Input [6]: [channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84]
 Condition : (isnotnull(sales#83) AND (cast(sales#83 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#61, [id=#62] as decimal(32,6))))
 
-(84) BroadcastExchange
+(82) BroadcastExchange
 Input [6]: [channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84]
 Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#85]
 
-(85) BroadcastHashJoin [codegen id : 52]
+(83) BroadcastHashJoin [codegen id : 52]
 Left keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
 Right keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71]
 Join condition: None
 
-(86) TakeOrderedAndProject
+(84) TakeOrderedAndProject
 Input [12]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60, channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84]
 Arguments: 100, [i_brand_id#44 ASC NULLS FIRST, i_class_id#45 ASC NULLS FIRST, i_category_id#46 ASC NULLS FIRST], [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60, channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 68 Hosting Expression = Subquery scalar-subquery#61, [id=#62]
-* HashAggregate (105)
-+- Exchange (104)
-   +- * HashAggregate (103)
-      +- Union (102)
-         :- * Project (91)
-         :  +- * BroadcastHashJoin Inner BuildRight (90)
-         :     :- * ColumnarToRow (88)
-         :     :  +- Scan parquet default.store_sales (87)
-         :     +- ReusedExchange (89)
-         :- * Project (96)
-         :  +- * BroadcastHashJoin Inner BuildRight (95)
-         :     :- * ColumnarToRow (93)
-         :     :  +- Scan parquet default.catalog_sales (92)
-         :     +- ReusedExchange (94)
-         +- * Project (101)
-            +- * BroadcastHashJoin Inner BuildRight (100)
-               :- * ColumnarToRow (98)
-               :  +- Scan parquet default.web_sales (97)
-               +- ReusedExchange (99)
-
-
-(87) Scan parquet default.store_sales
+Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquery#61, [id=#62]
+* HashAggregate (103)
++- Exchange (102)
+   +- * HashAggregate (101)
+      +- Union (100)
+         :- * Project (89)
+         :  +- * BroadcastHashJoin Inner BuildRight (88)
+         :     :- * ColumnarToRow (86)
+         :     :  +- Scan parquet default.store_sales (85)
+         :     +- ReusedExchange (87)
+         :- * Project (94)
+         :  +- * BroadcastHashJoin Inner BuildRight (93)
+         :     :- * ColumnarToRow (91)
+         :     :  +- Scan parquet default.catalog_sales (90)
+         :     +- ReusedExchange (92)
+         +- * Project (99)
+            +- * BroadcastHashJoin Inner BuildRight (98)
+               :- * ColumnarToRow (96)
+               :  +- Scan parquet default.web_sales (95)
+               +- ReusedExchange (97)
+
+
+(85) Scan parquet default.store_sales
 Output [3]: [ss_quantity#86, ss_list_price#87, ss_sold_date_sk#88]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(ss_sold_date_sk#88), dynamicpruningexpression(ss_sold_date_sk#88 IN dynamicpruning#12)]
 ReadSchema: struct<ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(88) ColumnarToRow [codegen id : 2]
+(86) ColumnarToRow [codegen id : 2]
 Input [3]: [ss_quantity#86, ss_list_price#87, ss_sold_date_sk#88]
 
-(89) ReusedExchange [Reuses operator id: 119]
+(87) ReusedExchange [Reuses operator id: 117]
 Output [1]: [d_date_sk#89]
 
-(90) BroadcastHashJoin [codegen id : 2]
+(88) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#88]
 Right keys [1]: [d_date_sk#89]
 Join condition: None
 
-(91) Project [codegen id : 2]
+(89) Project [codegen id : 2]
 Output [2]: [ss_quantity#86 AS quantity#90, ss_list_price#87 AS list_price#91]
 Input [4]: [ss_quantity#86, ss_list_price#87, ss_sold_date_sk#88, d_date_sk#89]
 
-(92) Scan parquet default.catalog_sales
+(90) Scan parquet default.catalog_sales
 Output [3]: [cs_quantity#92, cs_list_price#93, cs_sold_date_sk#94]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(cs_sold_date_sk#94), dynamicpruningexpression(cs_sold_date_sk#94 IN dynamicpruning#12)]
 ReadSchema: struct<cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(93) ColumnarToRow [codegen id : 4]
+(91) ColumnarToRow [codegen id : 4]
 Input [3]: [cs_quantity#92, cs_list_price#93, cs_sold_date_sk#94]
 
-(94) ReusedExchange [Reuses operator id: 119]
+(92) ReusedExchange [Reuses operator id: 117]
 Output [1]: [d_date_sk#95]
 
-(95) BroadcastHashJoin [codegen id : 4]
+(93) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#94]
 Right keys [1]: [d_date_sk#95]
 Join condition: None
 
-(96) Project [codegen id : 4]
+(94) Project [codegen id : 4]
 Output [2]: [cs_quantity#92 AS quantity#96, cs_list_price#93 AS list_price#97]
 Input [4]: [cs_quantity#92, cs_list_price#93, cs_sold_date_sk#94, d_date_sk#95]
 
-(97) Scan parquet default.web_sales
+(95) Scan parquet default.web_sales
 Output [3]: [ws_quantity#98, ws_list_price#99, ws_sold_date_sk#100]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(ws_sold_date_sk#100), dynamicpruningexpression(ws_sold_date_sk#100 IN dynamicpruning#12)]
 ReadSchema: struct<ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(98) ColumnarToRow [codegen id : 6]
+(96) ColumnarToRow [codegen id : 6]
 Input [3]: [ws_quantity#98, ws_list_price#99, ws_sold_date_sk#100]
 
-(99) ReusedExchange [Reuses operator id: 119]
+(97) ReusedExchange [Reuses operator id: 117]
 Output [1]: [d_date_sk#101]
 
-(100) BroadcastHashJoin [codegen id : 6]
+(98) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_sold_date_sk#100]
 Right keys [1]: [d_date_sk#101]
 Join condition: None
 
-(101) Project [codegen id : 6]
+(99) Project [codegen id : 6]
 Output [2]: [ws_quantity#98 AS quantity#102, ws_list_price#99 AS list_price#103]
 Input [4]: [ws_quantity#98, ws_list_price#99, ws_sold_date_sk#100, d_date_sk#101]
 
-(102) Union
+(100) Union
 
-(103) HashAggregate [codegen id : 7]
+(101) HashAggregate [codegen id : 7]
 Input [2]: [quantity#90, list_price#91]
 Keys: []
 Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#104, count#105]
 Results [2]: [sum#106, count#107]
 
-(104) Exchange
+(102) Exchange
 Input [2]: [sum#106, count#107]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#108]
 
-(105) HashAggregate [codegen id : 8]
+(103) HashAggregate [codegen id : 8]
 Input [2]: [sum#106, count#107]
 Keys: []
 Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))#109]
 Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))#109 AS average_sales#110]
 
-Subquery:2 Hosting operator id = 87 Hosting Expression = ss_sold_date_sk#88 IN dynamicpruning#12
+Subquery:2 Hosting operator id = 85 Hosting Expression = ss_sold_date_sk#88 IN dynamicpruning#12
 
-Subquery:3 Hosting operator id = 92 Hosting Expression = cs_sold_date_sk#94 IN dynamicpruning#12
+Subquery:3 Hosting operator id = 90 Hosting Expression = cs_sold_date_sk#94 IN dynamicpruning#12
 
-Subquery:4 Hosting operator id = 97 Hosting Expression = ws_sold_date_sk#100 IN dynamicpruning#12
+Subquery:4 Hosting operator id = 95 Hosting Expression = ws_sold_date_sk#100 IN dynamicpruning#12
 
 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (110)
-+- * Project (109)
-   +- * Filter (108)
-      +- * ColumnarToRow (107)
-         +- Scan parquet default.date_dim (106)
+BroadcastExchange (108)
++- * Project (107)
+   +- * Filter (106)
+      +- * ColumnarToRow (105)
+         +- Scan parquet default.date_dim (104)
 
 
-(106) Scan parquet default.date_dim
+(104) Scan parquet default.date_dim
 Output [2]: [d_date_sk#48, d_week_seq#111]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_week_seq:int>
 
-(107) ColumnarToRow [codegen id : 1]
+(105) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#48, d_week_seq#111]
 
-(108) Filter [codegen id : 1]
+(106) Filter [codegen id : 1]
 Input [2]: [d_date_sk#48, d_week_seq#111]
 Condition : ((isnotnull(d_week_seq#111) AND (d_week_seq#111 = Subquery scalar-subquery#112, [id=#113])) AND isnotnull(d_date_sk#48))
 
-(109) Project [codegen id : 1]
+(107) Project [codegen id : 1]
 Output [1]: [d_date_sk#48]
 Input [2]: [d_date_sk#48, d_week_seq#111]
 
-(110) BroadcastExchange
+(108) BroadcastExchange
 Input [1]: [d_date_sk#48]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#114]
 
-Subquery:6 Hosting operator id = 108 Hosting Expression = Subquery scalar-subquery#112, [id=#113]
-* Project (114)
-+- * Filter (113)
-   +- * ColumnarToRow (112)
-      +- Scan parquet default.date_dim (111)
+Subquery:6 Hosting operator id = 106 Hosting Expression = Subquery scalar-subquery#112, [id=#113]
+* Project (112)
++- * Filter (111)
+   +- * ColumnarToRow (110)
+      +- Scan parquet default.date_dim (109)
 
 
-(111) Scan parquet default.date_dim
+(109) Scan parquet default.date_dim
 Output [4]: [d_week_seq#115, d_year#116, d_moy#117, d_dom#118]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,2000), EqualTo(d_moy,12), EqualTo(d_dom,11)]
 ReadSchema: struct<d_week_seq:int,d_year:int,d_moy:int,d_dom:int>
 
-(112) ColumnarToRow [codegen id : 1]
+(110) ColumnarToRow [codegen id : 1]
 Input [4]: [d_week_seq#115, d_year#116, d_moy#117, d_dom#118]
 
-(113) Filter [codegen id : 1]
+(111) Filter [codegen id : 1]
 Input [4]: [d_week_seq#115, d_year#116, d_moy#117, d_dom#118]
 Condition : (((((isnotnull(d_year#116) AND isnotnull(d_moy#117)) AND isnotnull(d_dom#118)) AND (d_year#116 = 2000)) AND (d_moy#117 = 12)) AND (d_dom#118 = 11))
 
-(114) Project [codegen id : 1]
+(112) Project [codegen id : 1]
 Output [1]: [d_week_seq#115]
 Input [4]: [d_week_seq#115, d_year#116, d_moy#117, d_dom#118]
 
 Subquery:7 Hosting operator id = 7 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12
-BroadcastExchange (119)
-+- * Project (118)
-   +- * Filter (117)
-      +- * ColumnarToRow (116)
-         +- Scan parquet default.date_dim (115)
+BroadcastExchange (117)
++- * Project (116)
+   +- * Filter (115)
+      +- * ColumnarToRow (114)
+         +- Scan parquet default.date_dim (113)
 
 
-(115) Scan parquet default.date_dim
+(113) Scan parquet default.date_dim
 Output [2]: [d_date_sk#27, d_year#119]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(116) ColumnarToRow [codegen id : 1]
+(114) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#27, d_year#119]
 
-(117) Filter [codegen id : 1]
+(115) Filter [codegen id : 1]
 Input [2]: [d_date_sk#27, d_year#119]
 Condition : (((isnotnull(d_year#119) AND (d_year#119 >= 1999)) AND (d_year#119 <= 2001)) AND isnotnull(d_date_sk#27))
 
-(118) Project [codegen id : 1]
+(116) Project [codegen id : 1]
 Output [1]: [d_date_sk#27]
 Input [2]: [d_date_sk#27, d_year#119]
 
-(119) BroadcastExchange
+(117) BroadcastExchange
 Input [1]: [d_date_sk#27]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#120]
 
@@ -691,60 +675,60 @@ Subquery:8 Hosting operator id = 13 Hosting Expression = cs_sold_date_sk#18 IN d
 
 Subquery:9 Hosting operator id = 36 Hosting Expression = ws_sold_date_sk#33 IN dynamicpruning#12
 
-Subquery:10 Hosting operator id = 83 Hosting Expression = ReusedSubquery Subquery scalar-subquery#61, [id=#62]
+Subquery:10 Hosting operator id = 81 Hosting Expression = ReusedSubquery Subquery scalar-subquery#61, [id=#62]
 
-Subquery:11 Hosting operator id = 69 Hosting Expression = ss_sold_date_sk#66 IN dynamicpruning#67
-BroadcastExchange (124)
-+- * Project (123)
-   +- * Filter (122)
-      +- * ColumnarToRow (121)
-         +- Scan parquet default.date_dim (120)
+Subquery:11 Hosting operator id = 67 Hosting Expression = ss_sold_date_sk#66 IN dynamicpruning#67
+BroadcastExchange (122)
++- * Project (121)
+   +- * Filter (120)
+      +- * ColumnarToRow (119)
+         +- Scan parquet default.date_dim (118)
 
 
-(120) Scan parquet default.date_dim
+(118) Scan parquet default.date_dim
 Output [2]: [d_date_sk#72, d_week_seq#121]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_week_seq:int>
 
-(121) ColumnarToRow [codegen id : 1]
+(119) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#72, d_week_seq#121]
 
-(122) Filter [codegen id : 1]
+(120) Filter [codegen id : 1]
 Input [2]: [d_date_sk#72, d_week_seq#121]
 Condition : ((isnotnull(d_week_seq#121) AND (d_week_seq#121 = Subquery scalar-subquery#122, [id=#123])) AND isnotnull(d_date_sk#72))
 
-(123) Project [codegen id : 1]
+(121) Project [codegen id : 1]
 Output [1]: [d_date_sk#72]
 Input [2]: [d_date_sk#72, d_week_seq#121]
 
-(124) BroadcastExchange
+(122) BroadcastExchange
 Input [1]: [d_date_sk#72]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#124]
 
-Subquery:12 Hosting operator id = 122 Hosting Expression = Subquery scalar-subquery#122, [id=#123]
-* Project (128)
-+- * Filter (127)
-   +- * ColumnarToRow (126)
-      +- Scan parquet default.date_dim (125)
+Subquery:12 Hosting operator id = 120 Hosting Expression = Subquery scalar-subquery#122, [id=#123]
+* Project (126)
++- * Filter (125)
+   +- * ColumnarToRow (124)
+      +- Scan parquet default.date_dim (123)
 
 
-(125) Scan parquet default.date_dim
+(123) Scan parquet default.date_dim
 Output [4]: [d_week_seq#125, d_year#126, d_moy#127, d_dom#128]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,11)]
 ReadSchema: struct<d_week_seq:int,d_year:int,d_moy:int,d_dom:int>
 
-(126) ColumnarToRow [codegen id : 1]
+(124) ColumnarToRow [codegen id : 1]
 Input [4]: [d_week_seq#125, d_year#126, d_moy#127, d_dom#128]
 
-(127) Filter [codegen id : 1]
+(125) Filter [codegen id : 1]
 Input [4]: [d_week_seq#125, d_year#126, d_moy#127, d_dom#128]
 Condition : (((((isnotnull(d_year#126) AND isnotnull(d_moy#127)) AND isnotnull(d_dom#128)) AND (d_year#126 = 1999)) AND (d_moy#127 = 12)) AND (d_dom#128 = 11))
 
-(128) Project [codegen id : 1]
+(126) Project [codegen id : 1]
 Output [1]: [d_week_seq#125]
 Input [4]: [d_week_seq#125, d_year#126, d_moy#127, d_dom#128]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt
index 8f722e735172f..259178d0e432f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt
@@ -79,77 +79,75 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                       InputAdapter
                                         BroadcastExchange #4
                                           WholeStageCodegen (10)
-                                            HashAggregate [brand_id,class_id,category_id]
+                                            BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
                                               HashAggregate [brand_id,class_id,category_id]
-                                                BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
-                                                  HashAggregate [brand_id,class_id,category_id]
-                                                    InputAdapter
-                                                      Exchange [brand_id,class_id,category_id] #5
-                                                        WholeStageCodegen (6)
-                                                          HashAggregate [brand_id,class_id,category_id]
-                                                            Project [i_brand_id,i_class_id,i_category_id]
-                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id]
-                                                                  BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                    Filter [ss_item_sk]
-                                                                      ColumnarToRow
-                                                                        InputAdapter
-                                                                          Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
-                                                                            SubqueryBroadcast [d_date_sk] #3
-                                                                              BroadcastExchange #6
-                                                                                WholeStageCodegen (1)
-                                                                                  Project [d_date_sk]
-                                                                                    Filter [d_year,d_date_sk]
-                                                                                      ColumnarToRow
-                                                                                        InputAdapter
-                                                                                          Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                    InputAdapter
-                                                                      BroadcastExchange #7
-                                                                        WholeStageCodegen (4)
-                                                                          BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
-                                                                            Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                              ColumnarToRow
-                                                                                InputAdapter
-                                                                                  Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                            InputAdapter
-                                                                              BroadcastExchange #8
-                                                                                WholeStageCodegen (3)
-                                                                                  Project [i_brand_id,i_class_id,i_category_id]
-                                                                                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                      Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                        BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                          Filter [cs_item_sk]
-                                                                                            ColumnarToRow
-                                                                                              InputAdapter
-                                                                                                Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
-                                                                                                  ReusedSubquery [d_date_sk] #3
-                                                                                          InputAdapter
-                                                                                            BroadcastExchange #9
-                                                                                              WholeStageCodegen (1)
-                                                                                                Filter [i_item_sk]
-                                                                                                  ColumnarToRow
-                                                                                                    InputAdapter
-                                                                                                      Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                      InputAdapter
-                                                                                        ReusedExchange [d_date_sk] #6
-                                                                InputAdapter
-                                                                  ReusedExchange [d_date_sk] #6
-                                                  InputAdapter
-                                                    BroadcastExchange #10
-                                                      WholeStageCodegen (9)
+                                                InputAdapter
+                                                  Exchange [brand_id,class_id,category_id] #5
+                                                    WholeStageCodegen (6)
+                                                      HashAggregate [brand_id,class_id,category_id]
                                                         Project [i_brand_id,i_class_id,i_category_id]
-                                                          BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                            Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id]
-                                                              BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                Filter [ws_item_sk]
+                                                          BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                            Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id]
+                                                              BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                Filter [ss_item_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
-                                                                        ReusedSubquery [d_date_sk] #3
+                                                                      Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                        SubqueryBroadcast [d_date_sk] #3
+                                                                          BroadcastExchange #6
+                                                                            WholeStageCodegen (1)
+                                                                              Project [d_date_sk]
+                                                                                Filter [d_year,d_date_sk]
+                                                                                  ColumnarToRow
+                                                                                    InputAdapter
+                                                                                      Scan parquet default.date_dim [d_date_sk,d_year]
                                                                 InputAdapter
-                                                                  ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #9
+                                                                  BroadcastExchange #7
+                                                                    WholeStageCodegen (4)
+                                                                      BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
+                                                                        Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                        InputAdapter
+                                                                          BroadcastExchange #8
+                                                                            WholeStageCodegen (3)
+                                                                              Project [i_brand_id,i_class_id,i_category_id]
+                                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                                  Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                                      Filter [cs_item_sk]
+                                                                                        ColumnarToRow
+                                                                                          InputAdapter
+                                                                                            Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
+                                                                                              ReusedSubquery [d_date_sk] #3
+                                                                                      InputAdapter
+                                                                                        BroadcastExchange #9
+                                                                                          WholeStageCodegen (1)
+                                                                                            Filter [i_item_sk]
+                                                                                              ColumnarToRow
+                                                                                                InputAdapter
+                                                                                                  Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                  InputAdapter
+                                                                                    ReusedExchange [d_date_sk] #6
                                                             InputAdapter
                                                               ReusedExchange [d_date_sk] #6
+                                              InputAdapter
+                                                BroadcastExchange #10
+                                                  WholeStageCodegen (9)
+                                                    Project [i_brand_id,i_class_id,i_category_id]
+                                                      BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                        Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id]
+                                                          BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                            Filter [ws_item_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
+                                                                    ReusedSubquery [d_date_sk] #3
+                                                            InputAdapter
+                                                              ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #9
+                                                        InputAdapter
+                                                          ReusedExchange [d_date_sk] #6
                           InputAdapter
                             BroadcastExchange #11
                               WholeStageCodegen (23)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt
index 6011410caced0..3d266ee2c01c7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt
@@ -1,71 +1,64 @@
 == Physical Plan ==
-* HashAggregate (67)
-+- Exchange (66)
-   +- * HashAggregate (65)
-      +- * HashAggregate (64)
-         +- Exchange (63)
-            +- * HashAggregate (62)
-               +- * SortMergeJoin LeftSemi (61)
-                  :- * Sort (43)
-                  :  +- Exchange (42)
-                  :     +- * HashAggregate (41)
-                  :        +- Exchange (40)
-                  :           +- * HashAggregate (39)
-                  :              +- * SortMergeJoin LeftSemi (38)
-                  :                 :- * Sort (20)
-                  :                 :  +- Exchange (19)
-                  :                 :     +- * HashAggregate (18)
-                  :                 :        +- Exchange (17)
-                  :                 :           +- * HashAggregate (16)
-                  :                 :              +- * Project (15)
-                  :                 :                 +- * SortMergeJoin Inner (14)
-                  :                 :                    :- * Sort (8)
-                  :                 :                    :  +- Exchange (7)
-                  :                 :                    :     +- * Project (6)
-                  :                 :                    :        +- * BroadcastHashJoin Inner BuildRight (5)
-                  :                 :                    :           :- * Filter (3)
-                  :                 :                    :           :  +- * ColumnarToRow (2)
-                  :                 :                    :           :     +- Scan parquet default.store_sales (1)
-                  :                 :                    :           +- ReusedExchange (4)
-                  :                 :                    +- * Sort (13)
-                  :                 :                       +- Exchange (12)
-                  :                 :                          +- * Filter (11)
-                  :                 :                             +- * ColumnarToRow (10)
-                  :                 :                                +- Scan parquet default.customer (9)
-                  :                 +- * Sort (37)
-                  :                    +- Exchange (36)
-                  :                       +- * HashAggregate (35)
-                  :                          +- Exchange (34)
-                  :                             +- * HashAggregate (33)
-                  :                                +- * Project (32)
-                  :                                   +- * SortMergeJoin Inner (31)
-                  :                                      :- * Sort (28)
-                  :                                      :  +- Exchange (27)
-                  :                                      :     +- * Project (26)
-                  :                                      :        +- * BroadcastHashJoin Inner BuildRight (25)
-                  :                                      :           :- * Filter (23)
-                  :                                      :           :  +- * ColumnarToRow (22)
-                  :                                      :           :     +- Scan parquet default.catalog_sales (21)
-                  :                                      :           +- ReusedExchange (24)
-                  :                                      +- * Sort (30)
-                  :                                         +- ReusedExchange (29)
-                  +- * Sort (60)
-                     +- Exchange (59)
-                        +- * HashAggregate (58)
-                           +- Exchange (57)
-                              +- * HashAggregate (56)
-                                 +- * Project (55)
-                                    +- * SortMergeJoin Inner (54)
-                                       :- * Sort (51)
-                                       :  +- Exchange (50)
-                                       :     +- * Project (49)
-                                       :        +- * BroadcastHashJoin Inner BuildRight (48)
-                                       :           :- * Filter (46)
-                                       :           :  +- * ColumnarToRow (45)
-                                       :           :     +- Scan parquet default.web_sales (44)
-                                       :           +- ReusedExchange (47)
-                                       +- * Sort (53)
-                                          +- ReusedExchange (52)
+* HashAggregate (60)
++- Exchange (59)
+   +- * HashAggregate (58)
+      +- * Project (57)
+         +- * SortMergeJoin LeftSemi (56)
+            :- * SortMergeJoin LeftSemi (38)
+            :  :- * Sort (20)
+            :  :  +- Exchange (19)
+            :  :     +- * HashAggregate (18)
+            :  :        +- Exchange (17)
+            :  :           +- * HashAggregate (16)
+            :  :              +- * Project (15)
+            :  :                 +- * SortMergeJoin Inner (14)
+            :  :                    :- * Sort (8)
+            :  :                    :  +- Exchange (7)
+            :  :                    :     +- * Project (6)
+            :  :                    :        +- * BroadcastHashJoin Inner BuildRight (5)
+            :  :                    :           :- * Filter (3)
+            :  :                    :           :  +- * ColumnarToRow (2)
+            :  :                    :           :     +- Scan parquet default.store_sales (1)
+            :  :                    :           +- ReusedExchange (4)
+            :  :                    +- * Sort (13)
+            :  :                       +- Exchange (12)
+            :  :                          +- * Filter (11)
+            :  :                             +- * ColumnarToRow (10)
+            :  :                                +- Scan parquet default.customer (9)
+            :  +- * Sort (37)
+            :     +- Exchange (36)
+            :        +- * HashAggregate (35)
+            :           +- Exchange (34)
+            :              +- * HashAggregate (33)
+            :                 +- * Project (32)
+            :                    +- * SortMergeJoin Inner (31)
+            :                       :- * Sort (28)
+            :                       :  +- Exchange (27)
+            :                       :     +- * Project (26)
+            :                       :        +- * BroadcastHashJoin Inner BuildRight (25)
+            :                       :           :- * Filter (23)
+            :                       :           :  +- * ColumnarToRow (22)
+            :                       :           :     +- Scan parquet default.catalog_sales (21)
+            :                       :           +- ReusedExchange (24)
+            :                       +- * Sort (30)
+            :                          +- ReusedExchange (29)
+            +- * Sort (55)
+               +- Exchange (54)
+                  +- * HashAggregate (53)
+                     +- Exchange (52)
+                        +- * HashAggregate (51)
+                           +- * Project (50)
+                              +- * SortMergeJoin Inner (49)
+                                 :- * Sort (46)
+                                 :  +- Exchange (45)
+                                 :     +- * Project (44)
+                                 :        +- * BroadcastHashJoin Inner BuildRight (43)
+                                 :           :- * Filter (41)
+                                 :           :  +- * ColumnarToRow (40)
+                                 :           :     +- Scan parquet default.web_sales (39)
+                                 :           +- ReusedExchange (42)
+                                 +- * Sort (48)
+                                    +- ReusedExchange (47)
 
 
 (1) Scan parquet default.store_sales
@@ -83,7 +76,7 @@ Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2]
 Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2]
 Condition : isnotnull(ss_customer_sk#1)
 
-(4) ReusedExchange [Reuses operator id: 72]
+(4) ReusedExchange [Reuses operator id: 65]
 Output [2]: [d_date_sk#4, d_date#5]
 
 (5) BroadcastHashJoin [codegen id : 2]
@@ -175,7 +168,7 @@ Input [2]: [cs_bill_customer_sk#13, cs_sold_date_sk#14]
 Input [2]: [cs_bill_customer_sk#13, cs_sold_date_sk#14]
 Condition : isnotnull(cs_bill_customer_sk#13)
 
-(24) ReusedExchange [Reuses operator id: 72]
+(24) ReusedExchange [Reuses operator id: 65]
 Output [2]: [d_date_sk#15, d_date#16]
 
 (25) BroadcastHashJoin [codegen id : 10]
@@ -242,184 +235,144 @@ Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_fir
 Right keys [6]: [coalesce(c_last_name#20, ), isnull(c_last_name#20), coalesce(c_first_name#19, ), isnull(c_first_name#19), coalesce(d_date#16, 1970-01-01), isnull(d_date#16)]
 Join condition: None
 
-(39) HashAggregate [codegen id : 17]
-Input [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Keys [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [c_last_name#9, c_first_name#8, d_date#5]
-
-(40) Exchange
-Input [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Arguments: hashpartitioning(c_last_name#9, c_first_name#8, d_date#5, 5), ENSURE_REQUIREMENTS, [id=#23]
-
-(41) HashAggregate [codegen id : 18]
-Input [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Keys [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [c_last_name#9, c_first_name#8, d_date#5]
-
-(42) Exchange
-Input [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Arguments: hashpartitioning(coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#5, 1970-01-01), isnull(d_date#5), 5), ENSURE_REQUIREMENTS, [id=#24]
-
-(43) Sort [codegen id : 19]
-Input [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Arguments: [coalesce(c_last_name#9, ) ASC NULLS FIRST, isnull(c_last_name#9) ASC NULLS FIRST, coalesce(c_first_name#8, ) ASC NULLS FIRST, isnull(c_first_name#8) ASC NULLS FIRST, coalesce(d_date#5, 1970-01-01) ASC NULLS FIRST, isnull(d_date#5) ASC NULLS FIRST], false, 0
-
-(44) Scan parquet default.web_sales
-Output [2]: [ws_bill_customer_sk#25, ws_sold_date_sk#26]
+(39) Scan parquet default.web_sales
+Output [2]: [ws_bill_customer_sk#23, ws_sold_date_sk#24]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#26), dynamicpruningexpression(ws_sold_date_sk#26 IN dynamicpruning#3)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#24), dynamicpruningexpression(ws_sold_date_sk#24 IN dynamicpruning#3)]
 PushedFilters: [IsNotNull(ws_bill_customer_sk)]
 ReadSchema: struct<ws_bill_customer_sk:int>
 
-(45) ColumnarToRow [codegen id : 21]
-Input [2]: [ws_bill_customer_sk#25, ws_sold_date_sk#26]
+(40) ColumnarToRow [codegen id : 19]
+Input [2]: [ws_bill_customer_sk#23, ws_sold_date_sk#24]
 
-(46) Filter [codegen id : 21]
-Input [2]: [ws_bill_customer_sk#25, ws_sold_date_sk#26]
-Condition : isnotnull(ws_bill_customer_sk#25)
+(41) Filter [codegen id : 19]
+Input [2]: [ws_bill_customer_sk#23, ws_sold_date_sk#24]
+Condition : isnotnull(ws_bill_customer_sk#23)
 
-(47) ReusedExchange [Reuses operator id: 72]
-Output [2]: [d_date_sk#27, d_date#28]
+(42) ReusedExchange [Reuses operator id: 65]
+Output [2]: [d_date_sk#25, d_date#26]
 
-(48) BroadcastHashJoin [codegen id : 21]
-Left keys [1]: [ws_sold_date_sk#26]
-Right keys [1]: [d_date_sk#27]
+(43) BroadcastHashJoin [codegen id : 19]
+Left keys [1]: [ws_sold_date_sk#24]
+Right keys [1]: [d_date_sk#25]
 Join condition: None
 
-(49) Project [codegen id : 21]
-Output [2]: [ws_bill_customer_sk#25, d_date#28]
-Input [4]: [ws_bill_customer_sk#25, ws_sold_date_sk#26, d_date_sk#27, d_date#28]
+(44) Project [codegen id : 19]
+Output [2]: [ws_bill_customer_sk#23, d_date#26]
+Input [4]: [ws_bill_customer_sk#23, ws_sold_date_sk#24, d_date_sk#25, d_date#26]
 
-(50) Exchange
-Input [2]: [ws_bill_customer_sk#25, d_date#28]
-Arguments: hashpartitioning(ws_bill_customer_sk#25, 5), ENSURE_REQUIREMENTS, [id=#29]
+(45) Exchange
+Input [2]: [ws_bill_customer_sk#23, d_date#26]
+Arguments: hashpartitioning(ws_bill_customer_sk#23, 5), ENSURE_REQUIREMENTS, [id=#27]
 
-(51) Sort [codegen id : 22]
-Input [2]: [ws_bill_customer_sk#25, d_date#28]
-Arguments: [ws_bill_customer_sk#25 ASC NULLS FIRST], false, 0
+(46) Sort [codegen id : 20]
+Input [2]: [ws_bill_customer_sk#23, d_date#26]
+Arguments: [ws_bill_customer_sk#23 ASC NULLS FIRST], false, 0
 
-(52) ReusedExchange [Reuses operator id: 12]
-Output [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32]
+(47) ReusedExchange [Reuses operator id: 12]
+Output [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30]
 
-(53) Sort [codegen id : 24]
-Input [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32]
-Arguments: [c_customer_sk#30 ASC NULLS FIRST], false, 0
+(48) Sort [codegen id : 22]
+Input [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(54) SortMergeJoin [codegen id : 25]
-Left keys [1]: [ws_bill_customer_sk#25]
-Right keys [1]: [c_customer_sk#30]
+(49) SortMergeJoin [codegen id : 23]
+Left keys [1]: [ws_bill_customer_sk#23]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(55) Project [codegen id : 25]
-Output [3]: [c_last_name#32, c_first_name#31, d_date#28]
-Input [5]: [ws_bill_customer_sk#25, d_date#28, c_customer_sk#30, c_first_name#31, c_last_name#32]
+(50) Project [codegen id : 23]
+Output [3]: [c_last_name#30, c_first_name#29, d_date#26]
+Input [5]: [ws_bill_customer_sk#23, d_date#26, c_customer_sk#28, c_first_name#29, c_last_name#30]
 
-(56) HashAggregate [codegen id : 25]
-Input [3]: [c_last_name#32, c_first_name#31, d_date#28]
-Keys [3]: [c_last_name#32, c_first_name#31, d_date#28]
+(51) HashAggregate [codegen id : 23]
+Input [3]: [c_last_name#30, c_first_name#29, d_date#26]
+Keys [3]: [c_last_name#30, c_first_name#29, d_date#26]
 Functions: []
 Aggregate Attributes: []
-Results [3]: [c_last_name#32, c_first_name#31, d_date#28]
+Results [3]: [c_last_name#30, c_first_name#29, d_date#26]
 
-(57) Exchange
-Input [3]: [c_last_name#32, c_first_name#31, d_date#28]
-Arguments: hashpartitioning(c_last_name#32, c_first_name#31, d_date#28, 5), ENSURE_REQUIREMENTS, [id=#33]
+(52) Exchange
+Input [3]: [c_last_name#30, c_first_name#29, d_date#26]
+Arguments: hashpartitioning(c_last_name#30, c_first_name#29, d_date#26, 5), ENSURE_REQUIREMENTS, [id=#31]
 
-(58) HashAggregate [codegen id : 26]
-Input [3]: [c_last_name#32, c_first_name#31, d_date#28]
-Keys [3]: [c_last_name#32, c_first_name#31, d_date#28]
+(53) HashAggregate [codegen id : 24]
+Input [3]: [c_last_name#30, c_first_name#29, d_date#26]
+Keys [3]: [c_last_name#30, c_first_name#29, d_date#26]
 Functions: []
 Aggregate Attributes: []
-Results [3]: [c_last_name#32, c_first_name#31, d_date#28]
+Results [3]: [c_last_name#30, c_first_name#29, d_date#26]
 
-(59) Exchange
-Input [3]: [c_last_name#32, c_first_name#31, d_date#28]
-Arguments: hashpartitioning(coalesce(c_last_name#32, ), isnull(c_last_name#32), coalesce(c_first_name#31, ), isnull(c_first_name#31), coalesce(d_date#28, 1970-01-01), isnull(d_date#28), 5), ENSURE_REQUIREMENTS, [id=#34]
+(54) Exchange
+Input [3]: [c_last_name#30, c_first_name#29, d_date#26]
+Arguments: hashpartitioning(coalesce(c_last_name#30, ), isnull(c_last_name#30), coalesce(c_first_name#29, ), isnull(c_first_name#29), coalesce(d_date#26, 1970-01-01), isnull(d_date#26), 5), ENSURE_REQUIREMENTS, [id=#32]
 
-(60) Sort [codegen id : 27]
-Input [3]: [c_last_name#32, c_first_name#31, d_date#28]
-Arguments: [coalesce(c_last_name#32, ) ASC NULLS FIRST, isnull(c_last_name#32) ASC NULLS FIRST, coalesce(c_first_name#31, ) ASC NULLS FIRST, isnull(c_first_name#31) ASC NULLS FIRST, coalesce(d_date#28, 1970-01-01) ASC NULLS FIRST, isnull(d_date#28) ASC NULLS FIRST], false, 0
+(55) Sort [codegen id : 25]
+Input [3]: [c_last_name#30, c_first_name#29, d_date#26]
+Arguments: [coalesce(c_last_name#30, ) ASC NULLS FIRST, isnull(c_last_name#30) ASC NULLS FIRST, coalesce(c_first_name#29, ) ASC NULLS FIRST, isnull(c_first_name#29) ASC NULLS FIRST, coalesce(d_date#26, 1970-01-01) ASC NULLS FIRST, isnull(d_date#26) ASC NULLS FIRST], false, 0
 
-(61) SortMergeJoin [codegen id : 28]
+(56) SortMergeJoin [codegen id : 26]
 Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)]
-Right keys [6]: [coalesce(c_last_name#32, ), isnull(c_last_name#32), coalesce(c_first_name#31, ), isnull(c_first_name#31), coalesce(d_date#28, 1970-01-01), isnull(d_date#28)]
+Right keys [6]: [coalesce(c_last_name#30, ), isnull(c_last_name#30), coalesce(c_first_name#29, ), isnull(c_first_name#29), coalesce(d_date#26, 1970-01-01), isnull(d_date#26)]
 Join condition: None
 
-(62) HashAggregate [codegen id : 28]
+(57) Project [codegen id : 26]
+Output: []
 Input [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Keys [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [c_last_name#9, c_first_name#8, d_date#5]
 
-(63) Exchange
-Input [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Arguments: hashpartitioning(c_last_name#9, c_first_name#8, d_date#5, 5), ENSURE_REQUIREMENTS, [id=#35]
-
-(64) HashAggregate [codegen id : 29]
-Input [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Keys [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Functions: []
-Aggregate Attributes: []
-Results: []
-
-(65) HashAggregate [codegen id : 29]
+(58) HashAggregate [codegen id : 26]
 Input: []
 Keys: []
 Functions [1]: [partial_count(1)]
-Aggregate Attributes [1]: [count#36]
-Results [1]: [count#37]
+Aggregate Attributes [1]: [count#33]
+Results [1]: [count#34]
 
-(66) Exchange
-Input [1]: [count#37]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#38]
+(59) Exchange
+Input [1]: [count#34]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#35]
 
-(67) HashAggregate [codegen id : 30]
-Input [1]: [count#37]
+(60) HashAggregate [codegen id : 27]
+Input [1]: [count#34]
 Keys: []
 Functions [1]: [count(1)]
-Aggregate Attributes [1]: [count(1)#39]
-Results [1]: [count(1)#39 AS count(1)#40]
+Aggregate Attributes [1]: [count(1)#36]
+Results [1]: [count(1)#36 AS count(1)#37]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#2 IN dynamicpruning#3
-BroadcastExchange (72)
-+- * Project (71)
-   +- * Filter (70)
-      +- * ColumnarToRow (69)
-         +- Scan parquet default.date_dim (68)
+BroadcastExchange (65)
++- * Project (64)
+   +- * Filter (63)
+      +- * ColumnarToRow (62)
+         +- Scan parquet default.date_dim (61)
 
 
-(68) Scan parquet default.date_dim
-Output [3]: [d_date_sk#4, d_date#5, d_month_seq#41]
+(61) Scan parquet default.date_dim
+Output [3]: [d_date_sk#4, d_date#5, d_month_seq#38]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date,d_month_seq:int>
 
-(69) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#4, d_date#5, d_month_seq#41]
+(62) ColumnarToRow [codegen id : 1]
+Input [3]: [d_date_sk#4, d_date#5, d_month_seq#38]
 
-(70) Filter [codegen id : 1]
-Input [3]: [d_date_sk#4, d_date#5, d_month_seq#41]
-Condition : (((isnotnull(d_month_seq#41) AND (d_month_seq#41 >= 1200)) AND (d_month_seq#41 <= 1211)) AND isnotnull(d_date_sk#4))
+(63) Filter [codegen id : 1]
+Input [3]: [d_date_sk#4, d_date#5, d_month_seq#38]
+Condition : (((isnotnull(d_month_seq#38) AND (d_month_seq#38 >= 1200)) AND (d_month_seq#38 <= 1211)) AND isnotnull(d_date_sk#4))
 
-(71) Project [codegen id : 1]
+(64) Project [codegen id : 1]
 Output [2]: [d_date_sk#4, d_date#5]
-Input [3]: [d_date_sk#4, d_date#5, d_month_seq#41]
+Input [3]: [d_date_sk#4, d_date#5, d_month_seq#38]
 
-(72) BroadcastExchange
+(65) BroadcastExchange
 Input [2]: [d_date_sk#4, d_date#5]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#42]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#39]
 
 Subquery:2 Hosting operator id = 21 Hosting Expression = cs_sold_date_sk#14 IN dynamicpruning#3
 
-Subquery:3 Hosting operator id = 44 Hosting Expression = ws_sold_date_sk#26 IN dynamicpruning#3
+Subquery:3 Hosting operator id = 39 Hosting Expression = ws_sold_date_sk#24 IN dynamicpruning#3
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt
index eda0d4b03f483..cc66a0040ef9a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt
@@ -1,135 +1,122 @@
-WholeStageCodegen (30)
+WholeStageCodegen (27)
   HashAggregate [count] [count(1),count(1),count]
     InputAdapter
       Exchange #1
-        WholeStageCodegen (29)
+        WholeStageCodegen (26)
           HashAggregate [count,count]
-            HashAggregate [c_last_name,c_first_name,d_date]
-              InputAdapter
-                Exchange [c_last_name,c_first_name,d_date] #2
-                  WholeStageCodegen (28)
-                    HashAggregate [c_last_name,c_first_name,d_date]
-                      SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
-                        InputAdapter
-                          WholeStageCodegen (19)
-                            Sort [c_last_name,c_first_name,d_date]
-                              InputAdapter
-                                Exchange [c_last_name,c_first_name,d_date] #3
-                                  WholeStageCodegen (18)
-                                    HashAggregate [c_last_name,c_first_name,d_date]
-                                      InputAdapter
-                                        Exchange [c_last_name,c_first_name,d_date] #4
-                                          WholeStageCodegen (17)
-                                            HashAggregate [c_last_name,c_first_name,d_date]
-                                              SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
+            Project
+              SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
+                InputAdapter
+                  WholeStageCodegen (17)
+                    SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
+                      InputAdapter
+                        WholeStageCodegen (8)
+                          Sort [c_last_name,c_first_name,d_date]
+                            InputAdapter
+                              Exchange [c_last_name,c_first_name,d_date] #2
+                                WholeStageCodegen (7)
+                                  HashAggregate [c_last_name,c_first_name,d_date]
+                                    InputAdapter
+                                      Exchange [c_last_name,c_first_name,d_date] #3
+                                        WholeStageCodegen (6)
+                                          HashAggregate [c_last_name,c_first_name,d_date]
+                                            Project [c_last_name,c_first_name,d_date]
+                                              SortMergeJoin [ss_customer_sk,c_customer_sk]
                                                 InputAdapter
-                                                  WholeStageCodegen (8)
-                                                    Sort [c_last_name,c_first_name,d_date]
+                                                  WholeStageCodegen (3)
+                                                    Sort [ss_customer_sk]
                                                       InputAdapter
-                                                        Exchange [c_last_name,c_first_name,d_date] #5
-                                                          WholeStageCodegen (7)
-                                                            HashAggregate [c_last_name,c_first_name,d_date]
-                                                              InputAdapter
-                                                                Exchange [c_last_name,c_first_name,d_date] #6
-                                                                  WholeStageCodegen (6)
-                                                                    HashAggregate [c_last_name,c_first_name,d_date]
-                                                                      Project [c_last_name,c_first_name,d_date]
-                                                                        SortMergeJoin [ss_customer_sk,c_customer_sk]
-                                                                          InputAdapter
-                                                                            WholeStageCodegen (3)
-                                                                              Sort [ss_customer_sk]
-                                                                                InputAdapter
-                                                                                  Exchange [ss_customer_sk] #7
-                                                                                    WholeStageCodegen (2)
-                                                                                      Project [ss_customer_sk,d_date]
-                                                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                          Filter [ss_customer_sk]
-                                                                                            ColumnarToRow
-                                                                                              InputAdapter
-                                                                                                Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
-                                                                                                  SubqueryBroadcast [d_date_sk] #1
-                                                                                                    BroadcastExchange #8
-                                                                                                      WholeStageCodegen (1)
-                                                                                                        Project [d_date_sk,d_date]
-                                                                                                          Filter [d_month_seq,d_date_sk]
-                                                                                                            ColumnarToRow
-                                                                                                              InputAdapter
-                                                                                                                Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
-                                                                                          InputAdapter
-                                                                                            ReusedExchange [d_date_sk,d_date] #8
-                                                                          InputAdapter
-                                                                            WholeStageCodegen (5)
-                                                                              Sort [c_customer_sk]
-                                                                                InputAdapter
-                                                                                  Exchange [c_customer_sk] #9
-                                                                                    WholeStageCodegen (4)
-                                                                                      Filter [c_customer_sk]
-                                                                                        ColumnarToRow
-                                                                                          InputAdapter
-                                                                                            Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
+                                                        Exchange [ss_customer_sk] #4
+                                                          WholeStageCodegen (2)
+                                                            Project [ss_customer_sk,d_date]
+                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                Filter [ss_customer_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                                                        SubqueryBroadcast [d_date_sk] #1
+                                                                          BroadcastExchange #5
+                                                                            WholeStageCodegen (1)
+                                                                              Project [d_date_sk,d_date]
+                                                                                Filter [d_month_seq,d_date_sk]
+                                                                                  ColumnarToRow
+                                                                                    InputAdapter
+                                                                                      Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
+                                                                InputAdapter
+                                                                  ReusedExchange [d_date_sk,d_date] #5
                                                 InputAdapter
-                                                  WholeStageCodegen (16)
-                                                    Sort [c_last_name,c_first_name,d_date]
+                                                  WholeStageCodegen (5)
+                                                    Sort [c_customer_sk]
                                                       InputAdapter
-                                                        Exchange [c_last_name,c_first_name,d_date] #10
-                                                          WholeStageCodegen (15)
-                                                            HashAggregate [c_last_name,c_first_name,d_date]
-                                                              InputAdapter
-                                                                Exchange [c_last_name,c_first_name,d_date] #11
-                                                                  WholeStageCodegen (14)
-                                                                    HashAggregate [c_last_name,c_first_name,d_date]
-                                                                      Project [c_last_name,c_first_name,d_date]
-                                                                        SortMergeJoin [cs_bill_customer_sk,c_customer_sk]
-                                                                          InputAdapter
-                                                                            WholeStageCodegen (11)
-                                                                              Sort [cs_bill_customer_sk]
-                                                                                InputAdapter
-                                                                                  Exchange [cs_bill_customer_sk] #12
-                                                                                    WholeStageCodegen (10)
-                                                                                      Project [cs_bill_customer_sk,d_date]
-                                                                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                          Filter [cs_bill_customer_sk]
-                                                                                            ColumnarToRow
-                                                                                              InputAdapter
-                                                                                                Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk]
-                                                                                                  ReusedSubquery [d_date_sk] #1
-                                                                                          InputAdapter
-                                                                                            ReusedExchange [d_date_sk,d_date] #8
-                                                                          InputAdapter
-                                                                            WholeStageCodegen (13)
-                                                                              Sort [c_customer_sk]
-                                                                                InputAdapter
-                                                                                  ReusedExchange [c_customer_sk,c_first_name,c_last_name] #9
-                        InputAdapter
-                          WholeStageCodegen (27)
-                            Sort [c_last_name,c_first_name,d_date]
+                                                        Exchange [c_customer_sk] #6
+                                                          WholeStageCodegen (4)
+                                                            Filter [c_customer_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
+                      InputAdapter
+                        WholeStageCodegen (16)
+                          Sort [c_last_name,c_first_name,d_date]
+                            InputAdapter
+                              Exchange [c_last_name,c_first_name,d_date] #7
+                                WholeStageCodegen (15)
+                                  HashAggregate [c_last_name,c_first_name,d_date]
+                                    InputAdapter
+                                      Exchange [c_last_name,c_first_name,d_date] #8
+                                        WholeStageCodegen (14)
+                                          HashAggregate [c_last_name,c_first_name,d_date]
+                                            Project [c_last_name,c_first_name,d_date]
+                                              SortMergeJoin [cs_bill_customer_sk,c_customer_sk]
+                                                InputAdapter
+                                                  WholeStageCodegen (11)
+                                                    Sort [cs_bill_customer_sk]
+                                                      InputAdapter
+                                                        Exchange [cs_bill_customer_sk] #9
+                                                          WholeStageCodegen (10)
+                                                            Project [cs_bill_customer_sk,d_date]
+                                                              BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                Filter [cs_bill_customer_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk]
+                                                                        ReusedSubquery [d_date_sk] #1
+                                                                InputAdapter
+                                                                  ReusedExchange [d_date_sk,d_date] #5
+                                                InputAdapter
+                                                  WholeStageCodegen (13)
+                                                    Sort [c_customer_sk]
+                                                      InputAdapter
+                                                        ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6
+                InputAdapter
+                  WholeStageCodegen (25)
+                    Sort [c_last_name,c_first_name,d_date]
+                      InputAdapter
+                        Exchange [c_last_name,c_first_name,d_date] #10
+                          WholeStageCodegen (24)
+                            HashAggregate [c_last_name,c_first_name,d_date]
                               InputAdapter
-                                Exchange [c_last_name,c_first_name,d_date] #13
-                                  WholeStageCodegen (26)
+                                Exchange [c_last_name,c_first_name,d_date] #11
+                                  WholeStageCodegen (23)
                                     HashAggregate [c_last_name,c_first_name,d_date]
-                                      InputAdapter
-                                        Exchange [c_last_name,c_first_name,d_date] #14
-                                          WholeStageCodegen (25)
-                                            HashAggregate [c_last_name,c_first_name,d_date]
-                                              Project [c_last_name,c_first_name,d_date]
-                                                SortMergeJoin [ws_bill_customer_sk,c_customer_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (22)
-                                                      Sort [ws_bill_customer_sk]
-                                                        InputAdapter
-                                                          Exchange [ws_bill_customer_sk] #15
-                                                            WholeStageCodegen (21)
-                                                              Project [ws_bill_customer_sk,d_date]
-                                                                BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                  Filter [ws_bill_customer_sk]
-                                                                    ColumnarToRow
-                                                                      InputAdapter
-                                                                        Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
-                                                                          ReusedSubquery [d_date_sk] #1
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_date] #8
-                                                  InputAdapter
-                                                    WholeStageCodegen (24)
-                                                      Sort [c_customer_sk]
-                                                        InputAdapter
-                                                          ReusedExchange [c_customer_sk,c_first_name,c_last_name] #9
+                                      Project [c_last_name,c_first_name,d_date]
+                                        SortMergeJoin [ws_bill_customer_sk,c_customer_sk]
+                                          InputAdapter
+                                            WholeStageCodegen (20)
+                                              Sort [ws_bill_customer_sk]
+                                                InputAdapter
+                                                  Exchange [ws_bill_customer_sk] #12
+                                                    WholeStageCodegen (19)
+                                                      Project [ws_bill_customer_sk,d_date]
+                                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                          Filter [ws_bill_customer_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                                                  ReusedSubquery [d_date_sk] #1
+                                                          InputAdapter
+                                                            ReusedExchange [d_date_sk,d_date] #5
+                                          InputAdapter
+                                            WholeStageCodegen (22)
+                                              Sort [c_customer_sk]
+                                                InputAdapter
+                                                  ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt
index ca4a34d7b6087..60190c9f39e43 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt
@@ -1,54 +1,51 @@
 == Physical Plan ==
-* HashAggregate (50)
-+- Exchange (49)
-   +- * HashAggregate (48)
-      +- * HashAggregate (47)
-         +- * HashAggregate (46)
-            +- * BroadcastHashJoin LeftSemi BuildRight (45)
-               :- * HashAggregate (31)
-               :  +- * HashAggregate (30)
-               :     +- * BroadcastHashJoin LeftSemi BuildRight (29)
-               :        :- * HashAggregate (15)
-               :        :  +- Exchange (14)
-               :        :     +- * HashAggregate (13)
-               :        :        +- * Project (12)
-               :        :           +- * BroadcastHashJoin Inner BuildRight (11)
-               :        :              :- * Project (6)
-               :        :              :  +- * BroadcastHashJoin Inner BuildRight (5)
-               :        :              :     :- * Filter (3)
-               :        :              :     :  +- * ColumnarToRow (2)
-               :        :              :     :     +- Scan parquet default.store_sales (1)
-               :        :              :     +- ReusedExchange (4)
-               :        :              +- BroadcastExchange (10)
-               :        :                 +- * Filter (9)
-               :        :                    +- * ColumnarToRow (8)
-               :        :                       +- Scan parquet default.customer (7)
-               :        +- BroadcastExchange (28)
-               :           +- * HashAggregate (27)
-               :              +- Exchange (26)
-               :                 +- * HashAggregate (25)
-               :                    +- * Project (24)
-               :                       +- * BroadcastHashJoin Inner BuildRight (23)
-               :                          :- * Project (21)
-               :                          :  +- * BroadcastHashJoin Inner BuildRight (20)
-               :                          :     :- * Filter (18)
-               :                          :     :  +- * ColumnarToRow (17)
-               :                          :     :     +- Scan parquet default.catalog_sales (16)
-               :                          :     +- ReusedExchange (19)
-               :                          +- ReusedExchange (22)
-               +- BroadcastExchange (44)
-                  +- * HashAggregate (43)
-                     +- Exchange (42)
-                        +- * HashAggregate (41)
-                           +- * Project (40)
-                              +- * BroadcastHashJoin Inner BuildRight (39)
-                                 :- * Project (37)
-                                 :  +- * BroadcastHashJoin Inner BuildRight (36)
-                                 :     :- * Filter (34)
-                                 :     :  +- * ColumnarToRow (33)
-                                 :     :     +- Scan parquet default.web_sales (32)
-                                 :     +- ReusedExchange (35)
-                                 +- ReusedExchange (38)
+* HashAggregate (47)
++- Exchange (46)
+   +- * HashAggregate (45)
+      +- * Project (44)
+         +- * BroadcastHashJoin LeftSemi BuildRight (43)
+            :- * BroadcastHashJoin LeftSemi BuildRight (29)
+            :  :- * HashAggregate (15)
+            :  :  +- Exchange (14)
+            :  :     +- * HashAggregate (13)
+            :  :        +- * Project (12)
+            :  :           +- * BroadcastHashJoin Inner BuildRight (11)
+            :  :              :- * Project (6)
+            :  :              :  +- * BroadcastHashJoin Inner BuildRight (5)
+            :  :              :     :- * Filter (3)
+            :  :              :     :  +- * ColumnarToRow (2)
+            :  :              :     :     +- Scan parquet default.store_sales (1)
+            :  :              :     +- ReusedExchange (4)
+            :  :              +- BroadcastExchange (10)
+            :  :                 +- * Filter (9)
+            :  :                    +- * ColumnarToRow (8)
+            :  :                       +- Scan parquet default.customer (7)
+            :  +- BroadcastExchange (28)
+            :     +- * HashAggregate (27)
+            :        +- Exchange (26)
+            :           +- * HashAggregate (25)
+            :              +- * Project (24)
+            :                 +- * BroadcastHashJoin Inner BuildRight (23)
+            :                    :- * Project (21)
+            :                    :  +- * BroadcastHashJoin Inner BuildRight (20)
+            :                    :     :- * Filter (18)
+            :                    :     :  +- * ColumnarToRow (17)
+            :                    :     :     +- Scan parquet default.catalog_sales (16)
+            :                    :     +- ReusedExchange (19)
+            :                    +- ReusedExchange (22)
+            +- BroadcastExchange (42)
+               +- * HashAggregate (41)
+                  +- Exchange (40)
+                     +- * HashAggregate (39)
+                        +- * Project (38)
+                           +- * BroadcastHashJoin Inner BuildRight (37)
+                              :- * Project (35)
+                              :  +- * BroadcastHashJoin Inner BuildRight (34)
+                              :     :- * Filter (32)
+                              :     :  +- * ColumnarToRow (31)
+                              :     :     +- Scan parquet default.web_sales (30)
+                              :     +- ReusedExchange (33)
+                              +- ReusedExchange (36)
 
 
 (1) Scan parquet default.store_sales
@@ -66,7 +63,7 @@ Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2]
 Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2]
 Condition : isnotnull(ss_customer_sk#1)
 
-(4) ReusedExchange [Reuses operator id: 55]
+(4) ReusedExchange [Reuses operator id: 52]
 Output [2]: [d_date_sk#4, d_date#5]
 
 (5) BroadcastHashJoin [codegen id : 3]
@@ -138,7 +135,7 @@ Input [2]: [cs_bill_customer_sk#11, cs_sold_date_sk#12]
 Input [2]: [cs_bill_customer_sk#11, cs_sold_date_sk#12]
 Condition : isnotnull(cs_bill_customer_sk#11)
 
-(19) ReusedExchange [Reuses operator id: 55]
+(19) ReusedExchange [Reuses operator id: 52]
 Output [2]: [d_date_sk#13, d_date#14]
 
 (20) BroadcastHashJoin [codegen id : 6]
@@ -189,21 +186,7 @@ Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_fir
 Right keys [6]: [coalesce(c_last_name#17, ), isnull(c_last_name#17), coalesce(c_first_name#16, ), isnull(c_first_name#16), coalesce(d_date#14, 1970-01-01), isnull(d_date#14)]
 Join condition: None
 
-(30) HashAggregate [codegen id : 12]
-Input [3]: [c_last_name#8, c_first_name#7, d_date#5]
-Keys [3]: [c_last_name#8, c_first_name#7, d_date#5]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [c_last_name#8, c_first_name#7, d_date#5]
-
-(31) HashAggregate [codegen id : 12]
-Input [3]: [c_last_name#8, c_first_name#7, d_date#5]
-Keys [3]: [c_last_name#8, c_first_name#7, d_date#5]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [c_last_name#8, c_first_name#7, d_date#5]
-
-(32) Scan parquet default.web_sales
+(30) Scan parquet default.web_sales
 Output [2]: [ws_bill_customer_sk#20, ws_sold_date_sk#21]
 Batched: true
 Location: InMemoryFileIndex []
@@ -211,90 +194,80 @@ PartitionFilters: [isnotnull(ws_sold_date_sk#21), dynamicpruningexpression(ws_so
 PushedFilters: [IsNotNull(ws_bill_customer_sk)]
 ReadSchema: struct<ws_bill_customer_sk:int>
 
-(33) ColumnarToRow [codegen id : 10]
+(31) ColumnarToRow [codegen id : 10]
 Input [2]: [ws_bill_customer_sk#20, ws_sold_date_sk#21]
 
-(34) Filter [codegen id : 10]
+(32) Filter [codegen id : 10]
 Input [2]: [ws_bill_customer_sk#20, ws_sold_date_sk#21]
 Condition : isnotnull(ws_bill_customer_sk#20)
 
-(35) ReusedExchange [Reuses operator id: 55]
+(33) ReusedExchange [Reuses operator id: 52]
 Output [2]: [d_date_sk#22, d_date#23]
 
-(36) BroadcastHashJoin [codegen id : 10]
+(34) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ws_sold_date_sk#21]
 Right keys [1]: [d_date_sk#22]
 Join condition: None
 
-(37) Project [codegen id : 10]
+(35) Project [codegen id : 10]
 Output [2]: [ws_bill_customer_sk#20, d_date#23]
 Input [4]: [ws_bill_customer_sk#20, ws_sold_date_sk#21, d_date_sk#22, d_date#23]
 
-(38) ReusedExchange [Reuses operator id: 10]
+(36) ReusedExchange [Reuses operator id: 10]
 Output [3]: [c_customer_sk#24, c_first_name#25, c_last_name#26]
 
-(39) BroadcastHashJoin [codegen id : 10]
+(37) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ws_bill_customer_sk#20]
 Right keys [1]: [c_customer_sk#24]
 Join condition: None
 
-(40) Project [codegen id : 10]
+(38) Project [codegen id : 10]
 Output [3]: [c_last_name#26, c_first_name#25, d_date#23]
 Input [5]: [ws_bill_customer_sk#20, d_date#23, c_customer_sk#24, c_first_name#25, c_last_name#26]
 
-(41) HashAggregate [codegen id : 10]
+(39) HashAggregate [codegen id : 10]
 Input [3]: [c_last_name#26, c_first_name#25, d_date#23]
 Keys [3]: [c_last_name#26, c_first_name#25, d_date#23]
 Functions: []
 Aggregate Attributes: []
 Results [3]: [c_last_name#26, c_first_name#25, d_date#23]
 
-(42) Exchange
+(40) Exchange
 Input [3]: [c_last_name#26, c_first_name#25, d_date#23]
 Arguments: hashpartitioning(c_last_name#26, c_first_name#25, d_date#23, 5), ENSURE_REQUIREMENTS, [id=#27]
 
-(43) HashAggregate [codegen id : 11]
+(41) HashAggregate [codegen id : 11]
 Input [3]: [c_last_name#26, c_first_name#25, d_date#23]
 Keys [3]: [c_last_name#26, c_first_name#25, d_date#23]
 Functions: []
 Aggregate Attributes: []
 Results [3]: [c_last_name#26, c_first_name#25, d_date#23]
 
-(44) BroadcastExchange
+(42) BroadcastExchange
 Input [3]: [c_last_name#26, c_first_name#25, d_date#23]
 Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [id=#28]
 
-(45) BroadcastHashJoin [codegen id : 12]
+(43) BroadcastHashJoin [codegen id : 12]
 Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)]
 Right keys [6]: [coalesce(c_last_name#26, ), isnull(c_last_name#26), coalesce(c_first_name#25, ), isnull(c_first_name#25), coalesce(d_date#23, 1970-01-01), isnull(d_date#23)]
 Join condition: None
 
-(46) HashAggregate [codegen id : 12]
-Input [3]: [c_last_name#8, c_first_name#7, d_date#5]
-Keys [3]: [c_last_name#8, c_first_name#7, d_date#5]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [c_last_name#8, c_first_name#7, d_date#5]
-
-(47) HashAggregate [codegen id : 12]
+(44) Project [codegen id : 12]
+Output: []
 Input [3]: [c_last_name#8, c_first_name#7, d_date#5]
-Keys [3]: [c_last_name#8, c_first_name#7, d_date#5]
-Functions: []
-Aggregate Attributes: []
-Results: []
 
-(48) HashAggregate [codegen id : 12]
+(45) HashAggregate [codegen id : 12]
 Input: []
 Keys: []
 Functions [1]: [partial_count(1)]
 Aggregate Attributes [1]: [count#29]
 Results [1]: [count#30]
 
-(49) Exchange
+(46) Exchange
 Input [1]: [count#30]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#31]
 
-(50) HashAggregate [codegen id : 13]
+(47) HashAggregate [codegen id : 13]
 Input [1]: [count#30]
 Keys: []
 Functions [1]: [count(1)]
@@ -304,37 +277,37 @@ Results [1]: [count(1)#32 AS count(1)#33]
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#2 IN dynamicpruning#3
-BroadcastExchange (55)
-+- * Project (54)
-   +- * Filter (53)
-      +- * ColumnarToRow (52)
-         +- Scan parquet default.date_dim (51)
+BroadcastExchange (52)
++- * Project (51)
+   +- * Filter (50)
+      +- * ColumnarToRow (49)
+         +- Scan parquet default.date_dim (48)
 
 
-(51) Scan parquet default.date_dim
+(48) Scan parquet default.date_dim
 Output [3]: [d_date_sk#4, d_date#5, d_month_seq#34]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date,d_month_seq:int>
 
-(52) ColumnarToRow [codegen id : 1]
+(49) ColumnarToRow [codegen id : 1]
 Input [3]: [d_date_sk#4, d_date#5, d_month_seq#34]
 
-(53) Filter [codegen id : 1]
+(50) Filter [codegen id : 1]
 Input [3]: [d_date_sk#4, d_date#5, d_month_seq#34]
 Condition : (((isnotnull(d_month_seq#34) AND (d_month_seq#34 >= 1200)) AND (d_month_seq#34 <= 1211)) AND isnotnull(d_date_sk#4))
 
-(54) Project [codegen id : 1]
+(51) Project [codegen id : 1]
 Output [2]: [d_date_sk#4, d_date#5]
 Input [3]: [d_date_sk#4, d_date#5, d_month_seq#34]
 
-(55) BroadcastExchange
+(52) BroadcastExchange
 Input [2]: [d_date_sk#4, d_date#5]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#35]
 
 Subquery:2 Hosting operator id = 16 Hosting Expression = cs_sold_date_sk#12 IN dynamicpruning#3
 
-Subquery:3 Hosting operator id = 32 Hosting Expression = ws_sold_date_sk#21 IN dynamicpruning#3
+Subquery:3 Hosting operator id = 30 Hosting Expression = ws_sold_date_sk#21 IN dynamicpruning#3
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt
index 7f96f5657836a..34d46c5671774 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt
@@ -4,81 +4,78 @@ WholeStageCodegen (13)
       Exchange #1
         WholeStageCodegen (12)
           HashAggregate [count,count]
-            HashAggregate [c_last_name,c_first_name,d_date]
-              HashAggregate [c_last_name,c_first_name,d_date]
+            Project
+              BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
                 BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
                   HashAggregate [c_last_name,c_first_name,d_date]
-                    HashAggregate [c_last_name,c_first_name,d_date]
-                      BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
-                        HashAggregate [c_last_name,c_first_name,d_date]
-                          InputAdapter
-                            Exchange [c_last_name,c_first_name,d_date] #2
-                              WholeStageCodegen (3)
-                                HashAggregate [c_last_name,c_first_name,d_date]
-                                  Project [c_last_name,c_first_name,d_date]
-                                    BroadcastHashJoin [ss_customer_sk,c_customer_sk]
-                                      Project [ss_customer_sk,d_date]
-                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                          Filter [ss_customer_sk]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
-                                                  SubqueryBroadcast [d_date_sk] #1
-                                                    BroadcastExchange #3
-                                                      WholeStageCodegen (1)
-                                                        Project [d_date_sk,d_date]
-                                                          Filter [d_month_seq,d_date_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
-                                          InputAdapter
-                                            ReusedExchange [d_date_sk,d_date] #3
-                                      InputAdapter
-                                        BroadcastExchange #4
-                                          WholeStageCodegen (2)
-                                            Filter [c_customer_sk]
-                                              ColumnarToRow
-                                                InputAdapter
-                                                  Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
-                        InputAdapter
-                          BroadcastExchange #5
-                            WholeStageCodegen (7)
-                              HashAggregate [c_last_name,c_first_name,d_date]
+                    InputAdapter
+                      Exchange [c_last_name,c_first_name,d_date] #2
+                        WholeStageCodegen (3)
+                          HashAggregate [c_last_name,c_first_name,d_date]
+                            Project [c_last_name,c_first_name,d_date]
+                              BroadcastHashJoin [ss_customer_sk,c_customer_sk]
+                                Project [ss_customer_sk,d_date]
+                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                    Filter [ss_customer_sk]
+                                      ColumnarToRow
+                                        InputAdapter
+                                          Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                            SubqueryBroadcast [d_date_sk] #1
+                                              BroadcastExchange #3
+                                                WholeStageCodegen (1)
+                                                  Project [d_date_sk,d_date]
+                                                    Filter [d_month_seq,d_date_sk]
+                                                      ColumnarToRow
+                                                        InputAdapter
+                                                          Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
+                                    InputAdapter
+                                      ReusedExchange [d_date_sk,d_date] #3
                                 InputAdapter
-                                  Exchange [c_last_name,c_first_name,d_date] #6
-                                    WholeStageCodegen (6)
-                                      HashAggregate [c_last_name,c_first_name,d_date]
-                                        Project [c_last_name,c_first_name,d_date]
-                                          BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk]
-                                            Project [cs_bill_customer_sk,d_date]
-                                              BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                Filter [cs_bill_customer_sk]
-                                                  ColumnarToRow
-                                                    InputAdapter
-                                                      Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk]
-                                                        ReusedSubquery [d_date_sk] #1
-                                                InputAdapter
-                                                  ReusedExchange [d_date_sk,d_date] #3
-                                            InputAdapter
-                                              ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4
+                                  BroadcastExchange #4
+                                    WholeStageCodegen (2)
+                                      Filter [c_customer_sk]
+                                        ColumnarToRow
+                                          InputAdapter
+                                            Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
                   InputAdapter
-                    BroadcastExchange #7
-                      WholeStageCodegen (11)
+                    BroadcastExchange #5
+                      WholeStageCodegen (7)
                         HashAggregate [c_last_name,c_first_name,d_date]
                           InputAdapter
-                            Exchange [c_last_name,c_first_name,d_date] #8
-                              WholeStageCodegen (10)
+                            Exchange [c_last_name,c_first_name,d_date] #6
+                              WholeStageCodegen (6)
                                 HashAggregate [c_last_name,c_first_name,d_date]
                                   Project [c_last_name,c_first_name,d_date]
-                                    BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk]
-                                      Project [ws_bill_customer_sk,d_date]
-                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                          Filter [ws_bill_customer_sk]
+                                    BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk]
+                                      Project [cs_bill_customer_sk,d_date]
+                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                          Filter [cs_bill_customer_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                                Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [d_date_sk,d_date] #3
                                       InputAdapter
                                         ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4
+                InputAdapter
+                  BroadcastExchange #7
+                    WholeStageCodegen (11)
+                      HashAggregate [c_last_name,c_first_name,d_date]
+                        InputAdapter
+                          Exchange [c_last_name,c_first_name,d_date] #8
+                            WholeStageCodegen (10)
+                              HashAggregate [c_last_name,c_first_name,d_date]
+                                Project [c_last_name,c_first_name,d_date]
+                                  BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk]
+                                    Project [ws_bill_customer_sk,d_date]
+                                      BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                        Filter [ws_bill_customer_sk]
+                                          ColumnarToRow
+                                            InputAdapter
+                                              Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                                ReusedSubquery [d_date_sk] #1
+                                        InputAdapter
+                                          ReusedExchange [d_date_sk,d_date] #3
+                                    InputAdapter
+                                      ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/explain.txt
index 408b0defda53c..38ecc6f3ed822 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/explain.txt
@@ -1,71 +1,64 @@
 == Physical Plan ==
-* HashAggregate (67)
-+- Exchange (66)
-   +- * HashAggregate (65)
-      +- * HashAggregate (64)
-         +- Exchange (63)
-            +- * HashAggregate (62)
-               +- * SortMergeJoin LeftAnti (61)
-                  :- * Sort (43)
-                  :  +- Exchange (42)
-                  :     +- * HashAggregate (41)
-                  :        +- Exchange (40)
-                  :           +- * HashAggregate (39)
-                  :              +- * SortMergeJoin LeftAnti (38)
-                  :                 :- * Sort (20)
-                  :                 :  +- Exchange (19)
-                  :                 :     +- * HashAggregate (18)
-                  :                 :        +- Exchange (17)
-                  :                 :           +- * HashAggregate (16)
-                  :                 :              +- * Project (15)
-                  :                 :                 +- * SortMergeJoin Inner (14)
-                  :                 :                    :- * Sort (8)
-                  :                 :                    :  +- Exchange (7)
-                  :                 :                    :     +- * Project (6)
-                  :                 :                    :        +- * BroadcastHashJoin Inner BuildRight (5)
-                  :                 :                    :           :- * Filter (3)
-                  :                 :                    :           :  +- * ColumnarToRow (2)
-                  :                 :                    :           :     +- Scan parquet default.store_sales (1)
-                  :                 :                    :           +- ReusedExchange (4)
-                  :                 :                    +- * Sort (13)
-                  :                 :                       +- Exchange (12)
-                  :                 :                          +- * Filter (11)
-                  :                 :                             +- * ColumnarToRow (10)
-                  :                 :                                +- Scan parquet default.customer (9)
-                  :                 +- * Sort (37)
-                  :                    +- Exchange (36)
-                  :                       +- * HashAggregate (35)
-                  :                          +- Exchange (34)
-                  :                             +- * HashAggregate (33)
-                  :                                +- * Project (32)
-                  :                                   +- * SortMergeJoin Inner (31)
-                  :                                      :- * Sort (28)
-                  :                                      :  +- Exchange (27)
-                  :                                      :     +- * Project (26)
-                  :                                      :        +- * BroadcastHashJoin Inner BuildRight (25)
-                  :                                      :           :- * Filter (23)
-                  :                                      :           :  +- * ColumnarToRow (22)
-                  :                                      :           :     +- Scan parquet default.catalog_sales (21)
-                  :                                      :           +- ReusedExchange (24)
-                  :                                      +- * Sort (30)
-                  :                                         +- ReusedExchange (29)
-                  +- * Sort (60)
-                     +- Exchange (59)
-                        +- * HashAggregate (58)
-                           +- Exchange (57)
-                              +- * HashAggregate (56)
-                                 +- * Project (55)
-                                    +- * SortMergeJoin Inner (54)
-                                       :- * Sort (51)
-                                       :  +- Exchange (50)
-                                       :     +- * Project (49)
-                                       :        +- * BroadcastHashJoin Inner BuildRight (48)
-                                       :           :- * Filter (46)
-                                       :           :  +- * ColumnarToRow (45)
-                                       :           :     +- Scan parquet default.web_sales (44)
-                                       :           +- ReusedExchange (47)
-                                       +- * Sort (53)
-                                          +- ReusedExchange (52)
+* HashAggregate (60)
++- Exchange (59)
+   +- * HashAggregate (58)
+      +- * Project (57)
+         +- * SortMergeJoin LeftAnti (56)
+            :- * SortMergeJoin LeftAnti (38)
+            :  :- * Sort (20)
+            :  :  +- Exchange (19)
+            :  :     +- * HashAggregate (18)
+            :  :        +- Exchange (17)
+            :  :           +- * HashAggregate (16)
+            :  :              +- * Project (15)
+            :  :                 +- * SortMergeJoin Inner (14)
+            :  :                    :- * Sort (8)
+            :  :                    :  +- Exchange (7)
+            :  :                    :     +- * Project (6)
+            :  :                    :        +- * BroadcastHashJoin Inner BuildRight (5)
+            :  :                    :           :- * Filter (3)
+            :  :                    :           :  +- * ColumnarToRow (2)
+            :  :                    :           :     +- Scan parquet default.store_sales (1)
+            :  :                    :           +- ReusedExchange (4)
+            :  :                    +- * Sort (13)
+            :  :                       +- Exchange (12)
+            :  :                          +- * Filter (11)
+            :  :                             +- * ColumnarToRow (10)
+            :  :                                +- Scan parquet default.customer (9)
+            :  +- * Sort (37)
+            :     +- Exchange (36)
+            :        +- * HashAggregate (35)
+            :           +- Exchange (34)
+            :              +- * HashAggregate (33)
+            :                 +- * Project (32)
+            :                    +- * SortMergeJoin Inner (31)
+            :                       :- * Sort (28)
+            :                       :  +- Exchange (27)
+            :                       :     +- * Project (26)
+            :                       :        +- * BroadcastHashJoin Inner BuildRight (25)
+            :                       :           :- * Filter (23)
+            :                       :           :  +- * ColumnarToRow (22)
+            :                       :           :     +- Scan parquet default.catalog_sales (21)
+            :                       :           +- ReusedExchange (24)
+            :                       +- * Sort (30)
+            :                          +- ReusedExchange (29)
+            +- * Sort (55)
+               +- Exchange (54)
+                  +- * HashAggregate (53)
+                     +- Exchange (52)
+                        +- * HashAggregate (51)
+                           +- * Project (50)
+                              +- * SortMergeJoin Inner (49)
+                                 :- * Sort (46)
+                                 :  +- Exchange (45)
+                                 :     +- * Project (44)
+                                 :        +- * BroadcastHashJoin Inner BuildRight (43)
+                                 :           :- * Filter (41)
+                                 :           :  +- * ColumnarToRow (40)
+                                 :           :     +- Scan parquet default.web_sales (39)
+                                 :           +- ReusedExchange (42)
+                                 +- * Sort (48)
+                                    +- ReusedExchange (47)
 
 
 (1) Scan parquet default.store_sales
@@ -83,7 +76,7 @@ Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2]
 Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2]
 Condition : isnotnull(ss_customer_sk#1)
 
-(4) ReusedExchange [Reuses operator id: 72]
+(4) ReusedExchange [Reuses operator id: 65]
 Output [2]: [d_date_sk#4, d_date#5]
 
 (5) BroadcastHashJoin [codegen id : 2]
@@ -175,7 +168,7 @@ Input [2]: [cs_bill_customer_sk#13, cs_sold_date_sk#14]
 Input [2]: [cs_bill_customer_sk#13, cs_sold_date_sk#14]
 Condition : isnotnull(cs_bill_customer_sk#13)
 
-(24) ReusedExchange [Reuses operator id: 72]
+(24) ReusedExchange [Reuses operator id: 65]
 Output [2]: [d_date_sk#15, d_date#16]
 
 (25) BroadcastHashJoin [codegen id : 10]
@@ -242,184 +235,144 @@ Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_fir
 Right keys [6]: [coalesce(c_last_name#20, ), isnull(c_last_name#20), coalesce(c_first_name#19, ), isnull(c_first_name#19), coalesce(d_date#16, 1970-01-01), isnull(d_date#16)]
 Join condition: None
 
-(39) HashAggregate [codegen id : 17]
-Input [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Keys [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [c_last_name#9, c_first_name#8, d_date#5]
-
-(40) Exchange
-Input [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Arguments: hashpartitioning(c_last_name#9, c_first_name#8, d_date#5, 5), ENSURE_REQUIREMENTS, [id=#23]
-
-(41) HashAggregate [codegen id : 18]
-Input [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Keys [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [c_last_name#9, c_first_name#8, d_date#5]
-
-(42) Exchange
-Input [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Arguments: hashpartitioning(coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#5, 1970-01-01), isnull(d_date#5), 5), ENSURE_REQUIREMENTS, [id=#24]
-
-(43) Sort [codegen id : 19]
-Input [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Arguments: [coalesce(c_last_name#9, ) ASC NULLS FIRST, isnull(c_last_name#9) ASC NULLS FIRST, coalesce(c_first_name#8, ) ASC NULLS FIRST, isnull(c_first_name#8) ASC NULLS FIRST, coalesce(d_date#5, 1970-01-01) ASC NULLS FIRST, isnull(d_date#5) ASC NULLS FIRST], false, 0
-
-(44) Scan parquet default.web_sales
-Output [2]: [ws_bill_customer_sk#25, ws_sold_date_sk#26]
+(39) Scan parquet default.web_sales
+Output [2]: [ws_bill_customer_sk#23, ws_sold_date_sk#24]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#26), dynamicpruningexpression(ws_sold_date_sk#26 IN dynamicpruning#3)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#24), dynamicpruningexpression(ws_sold_date_sk#24 IN dynamicpruning#3)]
 PushedFilters: [IsNotNull(ws_bill_customer_sk)]
 ReadSchema: struct<ws_bill_customer_sk:int>
 
-(45) ColumnarToRow [codegen id : 21]
-Input [2]: [ws_bill_customer_sk#25, ws_sold_date_sk#26]
+(40) ColumnarToRow [codegen id : 19]
+Input [2]: [ws_bill_customer_sk#23, ws_sold_date_sk#24]
 
-(46) Filter [codegen id : 21]
-Input [2]: [ws_bill_customer_sk#25, ws_sold_date_sk#26]
-Condition : isnotnull(ws_bill_customer_sk#25)
+(41) Filter [codegen id : 19]
+Input [2]: [ws_bill_customer_sk#23, ws_sold_date_sk#24]
+Condition : isnotnull(ws_bill_customer_sk#23)
 
-(47) ReusedExchange [Reuses operator id: 72]
-Output [2]: [d_date_sk#27, d_date#28]
+(42) ReusedExchange [Reuses operator id: 65]
+Output [2]: [d_date_sk#25, d_date#26]
 
-(48) BroadcastHashJoin [codegen id : 21]
-Left keys [1]: [ws_sold_date_sk#26]
-Right keys [1]: [d_date_sk#27]
+(43) BroadcastHashJoin [codegen id : 19]
+Left keys [1]: [ws_sold_date_sk#24]
+Right keys [1]: [d_date_sk#25]
 Join condition: None
 
-(49) Project [codegen id : 21]
-Output [2]: [ws_bill_customer_sk#25, d_date#28]
-Input [4]: [ws_bill_customer_sk#25, ws_sold_date_sk#26, d_date_sk#27, d_date#28]
+(44) Project [codegen id : 19]
+Output [2]: [ws_bill_customer_sk#23, d_date#26]
+Input [4]: [ws_bill_customer_sk#23, ws_sold_date_sk#24, d_date_sk#25, d_date#26]
 
-(50) Exchange
-Input [2]: [ws_bill_customer_sk#25, d_date#28]
-Arguments: hashpartitioning(ws_bill_customer_sk#25, 5), ENSURE_REQUIREMENTS, [id=#29]
+(45) Exchange
+Input [2]: [ws_bill_customer_sk#23, d_date#26]
+Arguments: hashpartitioning(ws_bill_customer_sk#23, 5), ENSURE_REQUIREMENTS, [id=#27]
 
-(51) Sort [codegen id : 22]
-Input [2]: [ws_bill_customer_sk#25, d_date#28]
-Arguments: [ws_bill_customer_sk#25 ASC NULLS FIRST], false, 0
+(46) Sort [codegen id : 20]
+Input [2]: [ws_bill_customer_sk#23, d_date#26]
+Arguments: [ws_bill_customer_sk#23 ASC NULLS FIRST], false, 0
 
-(52) ReusedExchange [Reuses operator id: 12]
-Output [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32]
+(47) ReusedExchange [Reuses operator id: 12]
+Output [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30]
 
-(53) Sort [codegen id : 24]
-Input [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32]
-Arguments: [c_customer_sk#30 ASC NULLS FIRST], false, 0
+(48) Sort [codegen id : 22]
+Input [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(54) SortMergeJoin [codegen id : 25]
-Left keys [1]: [ws_bill_customer_sk#25]
-Right keys [1]: [c_customer_sk#30]
+(49) SortMergeJoin [codegen id : 23]
+Left keys [1]: [ws_bill_customer_sk#23]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(55) Project [codegen id : 25]
-Output [3]: [c_last_name#32, c_first_name#31, d_date#28]
-Input [5]: [ws_bill_customer_sk#25, d_date#28, c_customer_sk#30, c_first_name#31, c_last_name#32]
+(50) Project [codegen id : 23]
+Output [3]: [c_last_name#30, c_first_name#29, d_date#26]
+Input [5]: [ws_bill_customer_sk#23, d_date#26, c_customer_sk#28, c_first_name#29, c_last_name#30]
 
-(56) HashAggregate [codegen id : 25]
-Input [3]: [c_last_name#32, c_first_name#31, d_date#28]
-Keys [3]: [c_last_name#32, c_first_name#31, d_date#28]
+(51) HashAggregate [codegen id : 23]
+Input [3]: [c_last_name#30, c_first_name#29, d_date#26]
+Keys [3]: [c_last_name#30, c_first_name#29, d_date#26]
 Functions: []
 Aggregate Attributes: []
-Results [3]: [c_last_name#32, c_first_name#31, d_date#28]
+Results [3]: [c_last_name#30, c_first_name#29, d_date#26]
 
-(57) Exchange
-Input [3]: [c_last_name#32, c_first_name#31, d_date#28]
-Arguments: hashpartitioning(c_last_name#32, c_first_name#31, d_date#28, 5), ENSURE_REQUIREMENTS, [id=#33]
+(52) Exchange
+Input [3]: [c_last_name#30, c_first_name#29, d_date#26]
+Arguments: hashpartitioning(c_last_name#30, c_first_name#29, d_date#26, 5), ENSURE_REQUIREMENTS, [id=#31]
 
-(58) HashAggregate [codegen id : 26]
-Input [3]: [c_last_name#32, c_first_name#31, d_date#28]
-Keys [3]: [c_last_name#32, c_first_name#31, d_date#28]
+(53) HashAggregate [codegen id : 24]
+Input [3]: [c_last_name#30, c_first_name#29, d_date#26]
+Keys [3]: [c_last_name#30, c_first_name#29, d_date#26]
 Functions: []
 Aggregate Attributes: []
-Results [3]: [c_last_name#32, c_first_name#31, d_date#28]
+Results [3]: [c_last_name#30, c_first_name#29, d_date#26]
 
-(59) Exchange
-Input [3]: [c_last_name#32, c_first_name#31, d_date#28]
-Arguments: hashpartitioning(coalesce(c_last_name#32, ), isnull(c_last_name#32), coalesce(c_first_name#31, ), isnull(c_first_name#31), coalesce(d_date#28, 1970-01-01), isnull(d_date#28), 5), ENSURE_REQUIREMENTS, [id=#34]
+(54) Exchange
+Input [3]: [c_last_name#30, c_first_name#29, d_date#26]
+Arguments: hashpartitioning(coalesce(c_last_name#30, ), isnull(c_last_name#30), coalesce(c_first_name#29, ), isnull(c_first_name#29), coalesce(d_date#26, 1970-01-01), isnull(d_date#26), 5), ENSURE_REQUIREMENTS, [id=#32]
 
-(60) Sort [codegen id : 27]
-Input [3]: [c_last_name#32, c_first_name#31, d_date#28]
-Arguments: [coalesce(c_last_name#32, ) ASC NULLS FIRST, isnull(c_last_name#32) ASC NULLS FIRST, coalesce(c_first_name#31, ) ASC NULLS FIRST, isnull(c_first_name#31) ASC NULLS FIRST, coalesce(d_date#28, 1970-01-01) ASC NULLS FIRST, isnull(d_date#28) ASC NULLS FIRST], false, 0
+(55) Sort [codegen id : 25]
+Input [3]: [c_last_name#30, c_first_name#29, d_date#26]
+Arguments: [coalesce(c_last_name#30, ) ASC NULLS FIRST, isnull(c_last_name#30) ASC NULLS FIRST, coalesce(c_first_name#29, ) ASC NULLS FIRST, isnull(c_first_name#29) ASC NULLS FIRST, coalesce(d_date#26, 1970-01-01) ASC NULLS FIRST, isnull(d_date#26) ASC NULLS FIRST], false, 0
 
-(61) SortMergeJoin [codegen id : 28]
+(56) SortMergeJoin [codegen id : 26]
 Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)]
-Right keys [6]: [coalesce(c_last_name#32, ), isnull(c_last_name#32), coalesce(c_first_name#31, ), isnull(c_first_name#31), coalesce(d_date#28, 1970-01-01), isnull(d_date#28)]
+Right keys [6]: [coalesce(c_last_name#30, ), isnull(c_last_name#30), coalesce(c_first_name#29, ), isnull(c_first_name#29), coalesce(d_date#26, 1970-01-01), isnull(d_date#26)]
 Join condition: None
 
-(62) HashAggregate [codegen id : 28]
+(57) Project [codegen id : 26]
+Output: []
 Input [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Keys [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [c_last_name#9, c_first_name#8, d_date#5]
 
-(63) Exchange
-Input [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Arguments: hashpartitioning(c_last_name#9, c_first_name#8, d_date#5, 5), ENSURE_REQUIREMENTS, [id=#35]
-
-(64) HashAggregate [codegen id : 29]
-Input [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Keys [3]: [c_last_name#9, c_first_name#8, d_date#5]
-Functions: []
-Aggregate Attributes: []
-Results: []
-
-(65) HashAggregate [codegen id : 29]
+(58) HashAggregate [codegen id : 26]
 Input: []
 Keys: []
 Functions [1]: [partial_count(1)]
-Aggregate Attributes [1]: [count#36]
-Results [1]: [count#37]
+Aggregate Attributes [1]: [count#33]
+Results [1]: [count#34]
 
-(66) Exchange
-Input [1]: [count#37]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#38]
+(59) Exchange
+Input [1]: [count#34]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#35]
 
-(67) HashAggregate [codegen id : 30]
-Input [1]: [count#37]
+(60) HashAggregate [codegen id : 27]
+Input [1]: [count#34]
 Keys: []
 Functions [1]: [count(1)]
-Aggregate Attributes [1]: [count(1)#39]
-Results [1]: [count(1)#39 AS count(1)#40]
+Aggregate Attributes [1]: [count(1)#36]
+Results [1]: [count(1)#36 AS count(1)#37]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#2 IN dynamicpruning#3
-BroadcastExchange (72)
-+- * Project (71)
-   +- * Filter (70)
-      +- * ColumnarToRow (69)
-         +- Scan parquet default.date_dim (68)
+BroadcastExchange (65)
++- * Project (64)
+   +- * Filter (63)
+      +- * ColumnarToRow (62)
+         +- Scan parquet default.date_dim (61)
 
 
-(68) Scan parquet default.date_dim
-Output [3]: [d_date_sk#4, d_date#5, d_month_seq#41]
+(61) Scan parquet default.date_dim
+Output [3]: [d_date_sk#4, d_date#5, d_month_seq#38]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date,d_month_seq:int>
 
-(69) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#4, d_date#5, d_month_seq#41]
+(62) ColumnarToRow [codegen id : 1]
+Input [3]: [d_date_sk#4, d_date#5, d_month_seq#38]
 
-(70) Filter [codegen id : 1]
-Input [3]: [d_date_sk#4, d_date#5, d_month_seq#41]
-Condition : (((isnotnull(d_month_seq#41) AND (d_month_seq#41 >= 1200)) AND (d_month_seq#41 <= 1211)) AND isnotnull(d_date_sk#4))
+(63) Filter [codegen id : 1]
+Input [3]: [d_date_sk#4, d_date#5, d_month_seq#38]
+Condition : (((isnotnull(d_month_seq#38) AND (d_month_seq#38 >= 1200)) AND (d_month_seq#38 <= 1211)) AND isnotnull(d_date_sk#4))
 
-(71) Project [codegen id : 1]
+(64) Project [codegen id : 1]
 Output [2]: [d_date_sk#4, d_date#5]
-Input [3]: [d_date_sk#4, d_date#5, d_month_seq#41]
+Input [3]: [d_date_sk#4, d_date#5, d_month_seq#38]
 
-(72) BroadcastExchange
+(65) BroadcastExchange
 Input [2]: [d_date_sk#4, d_date#5]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#42]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#39]
 
 Subquery:2 Hosting operator id = 21 Hosting Expression = cs_sold_date_sk#14 IN dynamicpruning#3
 
-Subquery:3 Hosting operator id = 44 Hosting Expression = ws_sold_date_sk#26 IN dynamicpruning#3
+Subquery:3 Hosting operator id = 39 Hosting Expression = ws_sold_date_sk#24 IN dynamicpruning#3
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/simplified.txt
index eda0d4b03f483..cc66a0040ef9a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/simplified.txt
@@ -1,135 +1,122 @@
-WholeStageCodegen (30)
+WholeStageCodegen (27)
   HashAggregate [count] [count(1),count(1),count]
     InputAdapter
       Exchange #1
-        WholeStageCodegen (29)
+        WholeStageCodegen (26)
           HashAggregate [count,count]
-            HashAggregate [c_last_name,c_first_name,d_date]
-              InputAdapter
-                Exchange [c_last_name,c_first_name,d_date] #2
-                  WholeStageCodegen (28)
-                    HashAggregate [c_last_name,c_first_name,d_date]
-                      SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
-                        InputAdapter
-                          WholeStageCodegen (19)
-                            Sort [c_last_name,c_first_name,d_date]
-                              InputAdapter
-                                Exchange [c_last_name,c_first_name,d_date] #3
-                                  WholeStageCodegen (18)
-                                    HashAggregate [c_last_name,c_first_name,d_date]
-                                      InputAdapter
-                                        Exchange [c_last_name,c_first_name,d_date] #4
-                                          WholeStageCodegen (17)
-                                            HashAggregate [c_last_name,c_first_name,d_date]
-                                              SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
+            Project
+              SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
+                InputAdapter
+                  WholeStageCodegen (17)
+                    SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
+                      InputAdapter
+                        WholeStageCodegen (8)
+                          Sort [c_last_name,c_first_name,d_date]
+                            InputAdapter
+                              Exchange [c_last_name,c_first_name,d_date] #2
+                                WholeStageCodegen (7)
+                                  HashAggregate [c_last_name,c_first_name,d_date]
+                                    InputAdapter
+                                      Exchange [c_last_name,c_first_name,d_date] #3
+                                        WholeStageCodegen (6)
+                                          HashAggregate [c_last_name,c_first_name,d_date]
+                                            Project [c_last_name,c_first_name,d_date]
+                                              SortMergeJoin [ss_customer_sk,c_customer_sk]
                                                 InputAdapter
-                                                  WholeStageCodegen (8)
-                                                    Sort [c_last_name,c_first_name,d_date]
+                                                  WholeStageCodegen (3)
+                                                    Sort [ss_customer_sk]
                                                       InputAdapter
-                                                        Exchange [c_last_name,c_first_name,d_date] #5
-                                                          WholeStageCodegen (7)
-                                                            HashAggregate [c_last_name,c_first_name,d_date]
-                                                              InputAdapter
-                                                                Exchange [c_last_name,c_first_name,d_date] #6
-                                                                  WholeStageCodegen (6)
-                                                                    HashAggregate [c_last_name,c_first_name,d_date]
-                                                                      Project [c_last_name,c_first_name,d_date]
-                                                                        SortMergeJoin [ss_customer_sk,c_customer_sk]
-                                                                          InputAdapter
-                                                                            WholeStageCodegen (3)
-                                                                              Sort [ss_customer_sk]
-                                                                                InputAdapter
-                                                                                  Exchange [ss_customer_sk] #7
-                                                                                    WholeStageCodegen (2)
-                                                                                      Project [ss_customer_sk,d_date]
-                                                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                          Filter [ss_customer_sk]
-                                                                                            ColumnarToRow
-                                                                                              InputAdapter
-                                                                                                Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
-                                                                                                  SubqueryBroadcast [d_date_sk] #1
-                                                                                                    BroadcastExchange #8
-                                                                                                      WholeStageCodegen (1)
-                                                                                                        Project [d_date_sk,d_date]
-                                                                                                          Filter [d_month_seq,d_date_sk]
-                                                                                                            ColumnarToRow
-                                                                                                              InputAdapter
-                                                                                                                Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
-                                                                                          InputAdapter
-                                                                                            ReusedExchange [d_date_sk,d_date] #8
-                                                                          InputAdapter
-                                                                            WholeStageCodegen (5)
-                                                                              Sort [c_customer_sk]
-                                                                                InputAdapter
-                                                                                  Exchange [c_customer_sk] #9
-                                                                                    WholeStageCodegen (4)
-                                                                                      Filter [c_customer_sk]
-                                                                                        ColumnarToRow
-                                                                                          InputAdapter
-                                                                                            Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
+                                                        Exchange [ss_customer_sk] #4
+                                                          WholeStageCodegen (2)
+                                                            Project [ss_customer_sk,d_date]
+                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                Filter [ss_customer_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                                                        SubqueryBroadcast [d_date_sk] #1
+                                                                          BroadcastExchange #5
+                                                                            WholeStageCodegen (1)
+                                                                              Project [d_date_sk,d_date]
+                                                                                Filter [d_month_seq,d_date_sk]
+                                                                                  ColumnarToRow
+                                                                                    InputAdapter
+                                                                                      Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
+                                                                InputAdapter
+                                                                  ReusedExchange [d_date_sk,d_date] #5
                                                 InputAdapter
-                                                  WholeStageCodegen (16)
-                                                    Sort [c_last_name,c_first_name,d_date]
+                                                  WholeStageCodegen (5)
+                                                    Sort [c_customer_sk]
                                                       InputAdapter
-                                                        Exchange [c_last_name,c_first_name,d_date] #10
-                                                          WholeStageCodegen (15)
-                                                            HashAggregate [c_last_name,c_first_name,d_date]
-                                                              InputAdapter
-                                                                Exchange [c_last_name,c_first_name,d_date] #11
-                                                                  WholeStageCodegen (14)
-                                                                    HashAggregate [c_last_name,c_first_name,d_date]
-                                                                      Project [c_last_name,c_first_name,d_date]
-                                                                        SortMergeJoin [cs_bill_customer_sk,c_customer_sk]
-                                                                          InputAdapter
-                                                                            WholeStageCodegen (11)
-                                                                              Sort [cs_bill_customer_sk]
-                                                                                InputAdapter
-                                                                                  Exchange [cs_bill_customer_sk] #12
-                                                                                    WholeStageCodegen (10)
-                                                                                      Project [cs_bill_customer_sk,d_date]
-                                                                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                          Filter [cs_bill_customer_sk]
-                                                                                            ColumnarToRow
-                                                                                              InputAdapter
-                                                                                                Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk]
-                                                                                                  ReusedSubquery [d_date_sk] #1
-                                                                                          InputAdapter
-                                                                                            ReusedExchange [d_date_sk,d_date] #8
-                                                                          InputAdapter
-                                                                            WholeStageCodegen (13)
-                                                                              Sort [c_customer_sk]
-                                                                                InputAdapter
-                                                                                  ReusedExchange [c_customer_sk,c_first_name,c_last_name] #9
-                        InputAdapter
-                          WholeStageCodegen (27)
-                            Sort [c_last_name,c_first_name,d_date]
+                                                        Exchange [c_customer_sk] #6
+                                                          WholeStageCodegen (4)
+                                                            Filter [c_customer_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
+                      InputAdapter
+                        WholeStageCodegen (16)
+                          Sort [c_last_name,c_first_name,d_date]
+                            InputAdapter
+                              Exchange [c_last_name,c_first_name,d_date] #7
+                                WholeStageCodegen (15)
+                                  HashAggregate [c_last_name,c_first_name,d_date]
+                                    InputAdapter
+                                      Exchange [c_last_name,c_first_name,d_date] #8
+                                        WholeStageCodegen (14)
+                                          HashAggregate [c_last_name,c_first_name,d_date]
+                                            Project [c_last_name,c_first_name,d_date]
+                                              SortMergeJoin [cs_bill_customer_sk,c_customer_sk]
+                                                InputAdapter
+                                                  WholeStageCodegen (11)
+                                                    Sort [cs_bill_customer_sk]
+                                                      InputAdapter
+                                                        Exchange [cs_bill_customer_sk] #9
+                                                          WholeStageCodegen (10)
+                                                            Project [cs_bill_customer_sk,d_date]
+                                                              BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                Filter [cs_bill_customer_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk]
+                                                                        ReusedSubquery [d_date_sk] #1
+                                                                InputAdapter
+                                                                  ReusedExchange [d_date_sk,d_date] #5
+                                                InputAdapter
+                                                  WholeStageCodegen (13)
+                                                    Sort [c_customer_sk]
+                                                      InputAdapter
+                                                        ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6
+                InputAdapter
+                  WholeStageCodegen (25)
+                    Sort [c_last_name,c_first_name,d_date]
+                      InputAdapter
+                        Exchange [c_last_name,c_first_name,d_date] #10
+                          WholeStageCodegen (24)
+                            HashAggregate [c_last_name,c_first_name,d_date]
                               InputAdapter
-                                Exchange [c_last_name,c_first_name,d_date] #13
-                                  WholeStageCodegen (26)
+                                Exchange [c_last_name,c_first_name,d_date] #11
+                                  WholeStageCodegen (23)
                                     HashAggregate [c_last_name,c_first_name,d_date]
-                                      InputAdapter
-                                        Exchange [c_last_name,c_first_name,d_date] #14
-                                          WholeStageCodegen (25)
-                                            HashAggregate [c_last_name,c_first_name,d_date]
-                                              Project [c_last_name,c_first_name,d_date]
-                                                SortMergeJoin [ws_bill_customer_sk,c_customer_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (22)
-                                                      Sort [ws_bill_customer_sk]
-                                                        InputAdapter
-                                                          Exchange [ws_bill_customer_sk] #15
-                                                            WholeStageCodegen (21)
-                                                              Project [ws_bill_customer_sk,d_date]
-                                                                BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                  Filter [ws_bill_customer_sk]
-                                                                    ColumnarToRow
-                                                                      InputAdapter
-                                                                        Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
-                                                                          ReusedSubquery [d_date_sk] #1
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_date] #8
-                                                  InputAdapter
-                                                    WholeStageCodegen (24)
-                                                      Sort [c_customer_sk]
-                                                        InputAdapter
-                                                          ReusedExchange [c_customer_sk,c_first_name,c_last_name] #9
+                                      Project [c_last_name,c_first_name,d_date]
+                                        SortMergeJoin [ws_bill_customer_sk,c_customer_sk]
+                                          InputAdapter
+                                            WholeStageCodegen (20)
+                                              Sort [ws_bill_customer_sk]
+                                                InputAdapter
+                                                  Exchange [ws_bill_customer_sk] #12
+                                                    WholeStageCodegen (19)
+                                                      Project [ws_bill_customer_sk,d_date]
+                                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                          Filter [ws_bill_customer_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                                                  ReusedSubquery [d_date_sk] #1
+                                                          InputAdapter
+                                                            ReusedExchange [d_date_sk,d_date] #5
+                                          InputAdapter
+                                            WholeStageCodegen (22)
+                                              Sort [c_customer_sk]
+                                                InputAdapter
+                                                  ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt
index 7193c4f8c57ef..ed2a97704b2f7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt
@@ -1,54 +1,51 @@
 == Physical Plan ==
-* HashAggregate (50)
-+- Exchange (49)
-   +- * HashAggregate (48)
-      +- * HashAggregate (47)
-         +- * HashAggregate (46)
-            +- * BroadcastHashJoin LeftAnti BuildRight (45)
-               :- * HashAggregate (31)
-               :  +- * HashAggregate (30)
-               :     +- * BroadcastHashJoin LeftAnti BuildRight (29)
-               :        :- * HashAggregate (15)
-               :        :  +- Exchange (14)
-               :        :     +- * HashAggregate (13)
-               :        :        +- * Project (12)
-               :        :           +- * BroadcastHashJoin Inner BuildRight (11)
-               :        :              :- * Project (6)
-               :        :              :  +- * BroadcastHashJoin Inner BuildRight (5)
-               :        :              :     :- * Filter (3)
-               :        :              :     :  +- * ColumnarToRow (2)
-               :        :              :     :     +- Scan parquet default.store_sales (1)
-               :        :              :     +- ReusedExchange (4)
-               :        :              +- BroadcastExchange (10)
-               :        :                 +- * Filter (9)
-               :        :                    +- * ColumnarToRow (8)
-               :        :                       +- Scan parquet default.customer (7)
-               :        +- BroadcastExchange (28)
-               :           +- * HashAggregate (27)
-               :              +- Exchange (26)
-               :                 +- * HashAggregate (25)
-               :                    +- * Project (24)
-               :                       +- * BroadcastHashJoin Inner BuildRight (23)
-               :                          :- * Project (21)
-               :                          :  +- * BroadcastHashJoin Inner BuildRight (20)
-               :                          :     :- * Filter (18)
-               :                          :     :  +- * ColumnarToRow (17)
-               :                          :     :     +- Scan parquet default.catalog_sales (16)
-               :                          :     +- ReusedExchange (19)
-               :                          +- ReusedExchange (22)
-               +- BroadcastExchange (44)
-                  +- * HashAggregate (43)
-                     +- Exchange (42)
-                        +- * HashAggregate (41)
-                           +- * Project (40)
-                              +- * BroadcastHashJoin Inner BuildRight (39)
-                                 :- * Project (37)
-                                 :  +- * BroadcastHashJoin Inner BuildRight (36)
-                                 :     :- * Filter (34)
-                                 :     :  +- * ColumnarToRow (33)
-                                 :     :     +- Scan parquet default.web_sales (32)
-                                 :     +- ReusedExchange (35)
-                                 +- ReusedExchange (38)
+* HashAggregate (47)
++- Exchange (46)
+   +- * HashAggregate (45)
+      +- * Project (44)
+         +- * BroadcastHashJoin LeftAnti BuildRight (43)
+            :- * BroadcastHashJoin LeftAnti BuildRight (29)
+            :  :- * HashAggregate (15)
+            :  :  +- Exchange (14)
+            :  :     +- * HashAggregate (13)
+            :  :        +- * Project (12)
+            :  :           +- * BroadcastHashJoin Inner BuildRight (11)
+            :  :              :- * Project (6)
+            :  :              :  +- * BroadcastHashJoin Inner BuildRight (5)
+            :  :              :     :- * Filter (3)
+            :  :              :     :  +- * ColumnarToRow (2)
+            :  :              :     :     +- Scan parquet default.store_sales (1)
+            :  :              :     +- ReusedExchange (4)
+            :  :              +- BroadcastExchange (10)
+            :  :                 +- * Filter (9)
+            :  :                    +- * ColumnarToRow (8)
+            :  :                       +- Scan parquet default.customer (7)
+            :  +- BroadcastExchange (28)
+            :     +- * HashAggregate (27)
+            :        +- Exchange (26)
+            :           +- * HashAggregate (25)
+            :              +- * Project (24)
+            :                 +- * BroadcastHashJoin Inner BuildRight (23)
+            :                    :- * Project (21)
+            :                    :  +- * BroadcastHashJoin Inner BuildRight (20)
+            :                    :     :- * Filter (18)
+            :                    :     :  +- * ColumnarToRow (17)
+            :                    :     :     +- Scan parquet default.catalog_sales (16)
+            :                    :     +- ReusedExchange (19)
+            :                    +- ReusedExchange (22)
+            +- BroadcastExchange (42)
+               +- * HashAggregate (41)
+                  +- Exchange (40)
+                     +- * HashAggregate (39)
+                        +- * Project (38)
+                           +- * BroadcastHashJoin Inner BuildRight (37)
+                              :- * Project (35)
+                              :  +- * BroadcastHashJoin Inner BuildRight (34)
+                              :     :- * Filter (32)
+                              :     :  +- * ColumnarToRow (31)
+                              :     :     +- Scan parquet default.web_sales (30)
+                              :     +- ReusedExchange (33)
+                              +- ReusedExchange (36)
 
 
 (1) Scan parquet default.store_sales
@@ -66,7 +63,7 @@ Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2]
 Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2]
 Condition : isnotnull(ss_customer_sk#1)
 
-(4) ReusedExchange [Reuses operator id: 55]
+(4) ReusedExchange [Reuses operator id: 52]
 Output [2]: [d_date_sk#4, d_date#5]
 
 (5) BroadcastHashJoin [codegen id : 3]
@@ -138,7 +135,7 @@ Input [2]: [cs_bill_customer_sk#11, cs_sold_date_sk#12]
 Input [2]: [cs_bill_customer_sk#11, cs_sold_date_sk#12]
 Condition : isnotnull(cs_bill_customer_sk#11)
 
-(19) ReusedExchange [Reuses operator id: 55]
+(19) ReusedExchange [Reuses operator id: 52]
 Output [2]: [d_date_sk#13, d_date#14]
 
 (20) BroadcastHashJoin [codegen id : 6]
@@ -189,21 +186,7 @@ Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_fir
 Right keys [6]: [coalesce(c_last_name#17, ), isnull(c_last_name#17), coalesce(c_first_name#16, ), isnull(c_first_name#16), coalesce(d_date#14, 1970-01-01), isnull(d_date#14)]
 Join condition: None
 
-(30) HashAggregate [codegen id : 12]
-Input [3]: [c_last_name#8, c_first_name#7, d_date#5]
-Keys [3]: [c_last_name#8, c_first_name#7, d_date#5]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [c_last_name#8, c_first_name#7, d_date#5]
-
-(31) HashAggregate [codegen id : 12]
-Input [3]: [c_last_name#8, c_first_name#7, d_date#5]
-Keys [3]: [c_last_name#8, c_first_name#7, d_date#5]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [c_last_name#8, c_first_name#7, d_date#5]
-
-(32) Scan parquet default.web_sales
+(30) Scan parquet default.web_sales
 Output [2]: [ws_bill_customer_sk#20, ws_sold_date_sk#21]
 Batched: true
 Location: InMemoryFileIndex []
@@ -211,90 +194,80 @@ PartitionFilters: [isnotnull(ws_sold_date_sk#21), dynamicpruningexpression(ws_so
 PushedFilters: [IsNotNull(ws_bill_customer_sk)]
 ReadSchema: struct<ws_bill_customer_sk:int>
 
-(33) ColumnarToRow [codegen id : 10]
+(31) ColumnarToRow [codegen id : 10]
 Input [2]: [ws_bill_customer_sk#20, ws_sold_date_sk#21]
 
-(34) Filter [codegen id : 10]
+(32) Filter [codegen id : 10]
 Input [2]: [ws_bill_customer_sk#20, ws_sold_date_sk#21]
 Condition : isnotnull(ws_bill_customer_sk#20)
 
-(35) ReusedExchange [Reuses operator id: 55]
+(33) ReusedExchange [Reuses operator id: 52]
 Output [2]: [d_date_sk#22, d_date#23]
 
-(36) BroadcastHashJoin [codegen id : 10]
+(34) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ws_sold_date_sk#21]
 Right keys [1]: [d_date_sk#22]
 Join condition: None
 
-(37) Project [codegen id : 10]
+(35) Project [codegen id : 10]
 Output [2]: [ws_bill_customer_sk#20, d_date#23]
 Input [4]: [ws_bill_customer_sk#20, ws_sold_date_sk#21, d_date_sk#22, d_date#23]
 
-(38) ReusedExchange [Reuses operator id: 10]
+(36) ReusedExchange [Reuses operator id: 10]
 Output [3]: [c_customer_sk#24, c_first_name#25, c_last_name#26]
 
-(39) BroadcastHashJoin [codegen id : 10]
+(37) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ws_bill_customer_sk#20]
 Right keys [1]: [c_customer_sk#24]
 Join condition: None
 
-(40) Project [codegen id : 10]
+(38) Project [codegen id : 10]
 Output [3]: [c_last_name#26, c_first_name#25, d_date#23]
 Input [5]: [ws_bill_customer_sk#20, d_date#23, c_customer_sk#24, c_first_name#25, c_last_name#26]
 
-(41) HashAggregate [codegen id : 10]
+(39) HashAggregate [codegen id : 10]
 Input [3]: [c_last_name#26, c_first_name#25, d_date#23]
 Keys [3]: [c_last_name#26, c_first_name#25, d_date#23]
 Functions: []
 Aggregate Attributes: []
 Results [3]: [c_last_name#26, c_first_name#25, d_date#23]
 
-(42) Exchange
+(40) Exchange
 Input [3]: [c_last_name#26, c_first_name#25, d_date#23]
 Arguments: hashpartitioning(c_last_name#26, c_first_name#25, d_date#23, 5), ENSURE_REQUIREMENTS, [id=#27]
 
-(43) HashAggregate [codegen id : 11]
+(41) HashAggregate [codegen id : 11]
 Input [3]: [c_last_name#26, c_first_name#25, d_date#23]
 Keys [3]: [c_last_name#26, c_first_name#25, d_date#23]
 Functions: []
 Aggregate Attributes: []
 Results [3]: [c_last_name#26, c_first_name#25, d_date#23]
 
-(44) BroadcastExchange
+(42) BroadcastExchange
 Input [3]: [c_last_name#26, c_first_name#25, d_date#23]
 Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [id=#28]
 
-(45) BroadcastHashJoin [codegen id : 12]
+(43) BroadcastHashJoin [codegen id : 12]
 Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)]
 Right keys [6]: [coalesce(c_last_name#26, ), isnull(c_last_name#26), coalesce(c_first_name#25, ), isnull(c_first_name#25), coalesce(d_date#23, 1970-01-01), isnull(d_date#23)]
 Join condition: None
 
-(46) HashAggregate [codegen id : 12]
-Input [3]: [c_last_name#8, c_first_name#7, d_date#5]
-Keys [3]: [c_last_name#8, c_first_name#7, d_date#5]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [c_last_name#8, c_first_name#7, d_date#5]
-
-(47) HashAggregate [codegen id : 12]
+(44) Project [codegen id : 12]
+Output: []
 Input [3]: [c_last_name#8, c_first_name#7, d_date#5]
-Keys [3]: [c_last_name#8, c_first_name#7, d_date#5]
-Functions: []
-Aggregate Attributes: []
-Results: []
 
-(48) HashAggregate [codegen id : 12]
+(45) HashAggregate [codegen id : 12]
 Input: []
 Keys: []
 Functions [1]: [partial_count(1)]
 Aggregate Attributes [1]: [count#29]
 Results [1]: [count#30]
 
-(49) Exchange
+(46) Exchange
 Input [1]: [count#30]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#31]
 
-(50) HashAggregate [codegen id : 13]
+(47) HashAggregate [codegen id : 13]
 Input [1]: [count#30]
 Keys: []
 Functions [1]: [count(1)]
@@ -304,37 +277,37 @@ Results [1]: [count(1)#32 AS count(1)#33]
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#2 IN dynamicpruning#3
-BroadcastExchange (55)
-+- * Project (54)
-   +- * Filter (53)
-      +- * ColumnarToRow (52)
-         +- Scan parquet default.date_dim (51)
+BroadcastExchange (52)
++- * Project (51)
+   +- * Filter (50)
+      +- * ColumnarToRow (49)
+         +- Scan parquet default.date_dim (48)
 
 
-(51) Scan parquet default.date_dim
+(48) Scan parquet default.date_dim
 Output [3]: [d_date_sk#4, d_date#5, d_month_seq#34]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date,d_month_seq:int>
 
-(52) ColumnarToRow [codegen id : 1]
+(49) ColumnarToRow [codegen id : 1]
 Input [3]: [d_date_sk#4, d_date#5, d_month_seq#34]
 
-(53) Filter [codegen id : 1]
+(50) Filter [codegen id : 1]
 Input [3]: [d_date_sk#4, d_date#5, d_month_seq#34]
 Condition : (((isnotnull(d_month_seq#34) AND (d_month_seq#34 >= 1200)) AND (d_month_seq#34 <= 1211)) AND isnotnull(d_date_sk#4))
 
-(54) Project [codegen id : 1]
+(51) Project [codegen id : 1]
 Output [2]: [d_date_sk#4, d_date#5]
 Input [3]: [d_date_sk#4, d_date#5, d_month_seq#34]
 
-(55) BroadcastExchange
+(52) BroadcastExchange
 Input [2]: [d_date_sk#4, d_date#5]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#35]
 
 Subquery:2 Hosting operator id = 16 Hosting Expression = cs_sold_date_sk#12 IN dynamicpruning#3
 
-Subquery:3 Hosting operator id = 32 Hosting Expression = ws_sold_date_sk#21 IN dynamicpruning#3
+Subquery:3 Hosting operator id = 30 Hosting Expression = ws_sold_date_sk#21 IN dynamicpruning#3
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt
index 7f96f5657836a..34d46c5671774 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt
@@ -4,81 +4,78 @@ WholeStageCodegen (13)
       Exchange #1
         WholeStageCodegen (12)
           HashAggregate [count,count]
-            HashAggregate [c_last_name,c_first_name,d_date]
-              HashAggregate [c_last_name,c_first_name,d_date]
+            Project
+              BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
                 BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
                   HashAggregate [c_last_name,c_first_name,d_date]
-                    HashAggregate [c_last_name,c_first_name,d_date]
-                      BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
-                        HashAggregate [c_last_name,c_first_name,d_date]
-                          InputAdapter
-                            Exchange [c_last_name,c_first_name,d_date] #2
-                              WholeStageCodegen (3)
-                                HashAggregate [c_last_name,c_first_name,d_date]
-                                  Project [c_last_name,c_first_name,d_date]
-                                    BroadcastHashJoin [ss_customer_sk,c_customer_sk]
-                                      Project [ss_customer_sk,d_date]
-                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                          Filter [ss_customer_sk]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
-                                                  SubqueryBroadcast [d_date_sk] #1
-                                                    BroadcastExchange #3
-                                                      WholeStageCodegen (1)
-                                                        Project [d_date_sk,d_date]
-                                                          Filter [d_month_seq,d_date_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
-                                          InputAdapter
-                                            ReusedExchange [d_date_sk,d_date] #3
-                                      InputAdapter
-                                        BroadcastExchange #4
-                                          WholeStageCodegen (2)
-                                            Filter [c_customer_sk]
-                                              ColumnarToRow
-                                                InputAdapter
-                                                  Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
-                        InputAdapter
-                          BroadcastExchange #5
-                            WholeStageCodegen (7)
-                              HashAggregate [c_last_name,c_first_name,d_date]
+                    InputAdapter
+                      Exchange [c_last_name,c_first_name,d_date] #2
+                        WholeStageCodegen (3)
+                          HashAggregate [c_last_name,c_first_name,d_date]
+                            Project [c_last_name,c_first_name,d_date]
+                              BroadcastHashJoin [ss_customer_sk,c_customer_sk]
+                                Project [ss_customer_sk,d_date]
+                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                    Filter [ss_customer_sk]
+                                      ColumnarToRow
+                                        InputAdapter
+                                          Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                            SubqueryBroadcast [d_date_sk] #1
+                                              BroadcastExchange #3
+                                                WholeStageCodegen (1)
+                                                  Project [d_date_sk,d_date]
+                                                    Filter [d_month_seq,d_date_sk]
+                                                      ColumnarToRow
+                                                        InputAdapter
+                                                          Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
+                                    InputAdapter
+                                      ReusedExchange [d_date_sk,d_date] #3
                                 InputAdapter
-                                  Exchange [c_last_name,c_first_name,d_date] #6
-                                    WholeStageCodegen (6)
-                                      HashAggregate [c_last_name,c_first_name,d_date]
-                                        Project [c_last_name,c_first_name,d_date]
-                                          BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk]
-                                            Project [cs_bill_customer_sk,d_date]
-                                              BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                Filter [cs_bill_customer_sk]
-                                                  ColumnarToRow
-                                                    InputAdapter
-                                                      Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk]
-                                                        ReusedSubquery [d_date_sk] #1
-                                                InputAdapter
-                                                  ReusedExchange [d_date_sk,d_date] #3
-                                            InputAdapter
-                                              ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4
+                                  BroadcastExchange #4
+                                    WholeStageCodegen (2)
+                                      Filter [c_customer_sk]
+                                        ColumnarToRow
+                                          InputAdapter
+                                            Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
                   InputAdapter
-                    BroadcastExchange #7
-                      WholeStageCodegen (11)
+                    BroadcastExchange #5
+                      WholeStageCodegen (7)
                         HashAggregate [c_last_name,c_first_name,d_date]
                           InputAdapter
-                            Exchange [c_last_name,c_first_name,d_date] #8
-                              WholeStageCodegen (10)
+                            Exchange [c_last_name,c_first_name,d_date] #6
+                              WholeStageCodegen (6)
                                 HashAggregate [c_last_name,c_first_name,d_date]
                                   Project [c_last_name,c_first_name,d_date]
-                                    BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk]
-                                      Project [ws_bill_customer_sk,d_date]
-                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                          Filter [ws_bill_customer_sk]
+                                    BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk]
+                                      Project [cs_bill_customer_sk,d_date]
+                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                          Filter [cs_bill_customer_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                                Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [d_date_sk,d_date] #3
                                       InputAdapter
                                         ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4
+                InputAdapter
+                  BroadcastExchange #7
+                    WholeStageCodegen (11)
+                      HashAggregate [c_last_name,c_first_name,d_date]
+                        InputAdapter
+                          Exchange [c_last_name,c_first_name,d_date] #8
+                            WholeStageCodegen (10)
+                              HashAggregate [c_last_name,c_first_name,d_date]
+                                Project [c_last_name,c_first_name,d_date]
+                                  BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk]
+                                    Project [ws_bill_customer_sk,d_date]
+                                      BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                        Filter [ws_bill_customer_sk]
+                                          ColumnarToRow
+                                            InputAdapter
+                                              Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                                ReusedSubquery [d_date_sk] #1
+                                        InputAdapter
+                                          ReusedExchange [d_date_sk,d_date] #3
+                                    InputAdapter
+                                      ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt
index ae613fa051425..92b80b4085c67 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt
@@ -1,106 +1,103 @@
 == Physical Plan ==
-TakeOrderedAndProject (102)
-+- * BroadcastHashJoin Inner BuildRight (101)
-   :- * Filter (81)
-   :  +- * HashAggregate (80)
-   :     +- Exchange (79)
-   :        +- * HashAggregate (78)
-   :           +- * Project (77)
-   :              +- * BroadcastHashJoin Inner BuildRight (76)
-   :                 :- * Project (66)
-   :                 :  +- * BroadcastHashJoin Inner BuildRight (65)
-   :                 :     :- * SortMergeJoin LeftSemi (63)
+TakeOrderedAndProject (99)
++- * BroadcastHashJoin Inner BuildRight (98)
+   :- * Filter (78)
+   :  +- * HashAggregate (77)
+   :     +- Exchange (76)
+   :        +- * HashAggregate (75)
+   :           +- * Project (74)
+   :              +- * BroadcastHashJoin Inner BuildRight (73)
+   :                 :- * Project (63)
+   :                 :  +- * BroadcastHashJoin Inner BuildRight (62)
+   :                 :     :- * SortMergeJoin LeftSemi (60)
    :                 :     :  :- * Sort (5)
    :                 :     :  :  +- Exchange (4)
    :                 :     :  :     +- * Filter (3)
    :                 :     :  :        +- * ColumnarToRow (2)
    :                 :     :  :           +- Scan parquet default.store_sales (1)
-   :                 :     :  +- * Sort (62)
-   :                 :     :     +- Exchange (61)
-   :                 :     :        +- * Project (60)
-   :                 :     :           +- * BroadcastHashJoin Inner BuildRight (59)
+   :                 :     :  +- * Sort (59)
+   :                 :     :     +- Exchange (58)
+   :                 :     :        +- * Project (57)
+   :                 :     :           +- * BroadcastHashJoin Inner BuildRight (56)
    :                 :     :              :- * Filter (8)
    :                 :     :              :  +- * ColumnarToRow (7)
    :                 :     :              :     +- Scan parquet default.item (6)
-   :                 :     :              +- BroadcastExchange (58)
-   :                 :     :                 +- * HashAggregate (57)
-   :                 :     :                    +- Exchange (56)
-   :                 :     :                       +- * HashAggregate (55)
-   :                 :     :                          +- * SortMergeJoin LeftSemi (54)
-   :                 :     :                             :- * Sort (42)
-   :                 :     :                             :  +- Exchange (41)
-   :                 :     :                             :     +- * HashAggregate (40)
-   :                 :     :                             :        +- Exchange (39)
-   :                 :     :                             :           +- * HashAggregate (38)
-   :                 :     :                             :              +- * Project (37)
-   :                 :     :                             :                 +- * BroadcastHashJoin Inner BuildRight (36)
-   :                 :     :                             :                    :- * Project (14)
-   :                 :     :                             :                    :  +- * BroadcastHashJoin Inner BuildRight (13)
-   :                 :     :                             :                    :     :- * Filter (11)
-   :                 :     :                             :                    :     :  +- * ColumnarToRow (10)
-   :                 :     :                             :                    :     :     +- Scan parquet default.store_sales (9)
-   :                 :     :                             :                    :     +- ReusedExchange (12)
-   :                 :     :                             :                    +- BroadcastExchange (35)
-   :                 :     :                             :                       +- * SortMergeJoin LeftSemi (34)
-   :                 :     :                             :                          :- * Sort (19)
-   :                 :     :                             :                          :  +- Exchange (18)
-   :                 :     :                             :                          :     +- * Filter (17)
-   :                 :     :                             :                          :        +- * ColumnarToRow (16)
-   :                 :     :                             :                          :           +- Scan parquet default.item (15)
-   :                 :     :                             :                          +- * Sort (33)
-   :                 :     :                             :                             +- Exchange (32)
-   :                 :     :                             :                                +- * Project (31)
-   :                 :     :                             :                                   +- * BroadcastHashJoin Inner BuildRight (30)
-   :                 :     :                             :                                      :- * Project (25)
-   :                 :     :                             :                                      :  +- * BroadcastHashJoin Inner BuildRight (24)
-   :                 :     :                             :                                      :     :- * Filter (22)
-   :                 :     :                             :                                      :     :  +- * ColumnarToRow (21)
-   :                 :     :                             :                                      :     :     +- Scan parquet default.catalog_sales (20)
-   :                 :     :                             :                                      :     +- ReusedExchange (23)
-   :                 :     :                             :                                      +- BroadcastExchange (29)
-   :                 :     :                             :                                         +- * Filter (28)
-   :                 :     :                             :                                            +- * ColumnarToRow (27)
-   :                 :     :                             :                                               +- Scan parquet default.item (26)
-   :                 :     :                             +- * Sort (53)
-   :                 :     :                                +- Exchange (52)
-   :                 :     :                                   +- * Project (51)
-   :                 :     :                                      +- * BroadcastHashJoin Inner BuildRight (50)
-   :                 :     :                                         :- * Project (48)
-   :                 :     :                                         :  +- * BroadcastHashJoin Inner BuildRight (47)
-   :                 :     :                                         :     :- * Filter (45)
-   :                 :     :                                         :     :  +- * ColumnarToRow (44)
-   :                 :     :                                         :     :     +- Scan parquet default.web_sales (43)
-   :                 :     :                                         :     +- ReusedExchange (46)
-   :                 :     :                                         +- ReusedExchange (49)
-   :                 :     +- ReusedExchange (64)
-   :                 +- BroadcastExchange (75)
-   :                    +- * SortMergeJoin LeftSemi (74)
-   :                       :- * Sort (71)
-   :                       :  +- Exchange (70)
-   :                       :     +- * Filter (69)
-   :                       :        +- * ColumnarToRow (68)
-   :                       :           +- Scan parquet default.item (67)
-   :                       +- * Sort (73)
-   :                          +- ReusedExchange (72)
-   +- BroadcastExchange (100)
-      +- * Filter (99)
-         +- * HashAggregate (98)
-            +- Exchange (97)
-               +- * HashAggregate (96)
-                  +- * Project (95)
-                     +- * BroadcastHashJoin Inner BuildRight (94)
-                        :- * Project (92)
-                        :  +- * BroadcastHashJoin Inner BuildRight (91)
-                        :     :- * SortMergeJoin LeftSemi (89)
-                        :     :  :- * Sort (86)
-                        :     :  :  +- Exchange (85)
-                        :     :  :     +- * Filter (84)
-                        :     :  :        +- * ColumnarToRow (83)
-                        :     :  :           +- Scan parquet default.store_sales (82)
-                        :     :  +- * Sort (88)
-                        :     :     +- ReusedExchange (87)
-                        :     +- ReusedExchange (90)
-                        +- ReusedExchange (93)
+   :                 :     :              +- BroadcastExchange (55)
+   :                 :     :                 +- * SortMergeJoin LeftSemi (54)
+   :                 :     :                    :- * Sort (42)
+   :                 :     :                    :  +- Exchange (41)
+   :                 :     :                    :     +- * HashAggregate (40)
+   :                 :     :                    :        +- Exchange (39)
+   :                 :     :                    :           +- * HashAggregate (38)
+   :                 :     :                    :              +- * Project (37)
+   :                 :     :                    :                 +- * BroadcastHashJoin Inner BuildRight (36)
+   :                 :     :                    :                    :- * Project (14)
+   :                 :     :                    :                    :  +- * BroadcastHashJoin Inner BuildRight (13)
+   :                 :     :                    :                    :     :- * Filter (11)
+   :                 :     :                    :                    :     :  +- * ColumnarToRow (10)
+   :                 :     :                    :                    :     :     +- Scan parquet default.store_sales (9)
+   :                 :     :                    :                    :     +- ReusedExchange (12)
+   :                 :     :                    :                    +- BroadcastExchange (35)
+   :                 :     :                    :                       +- * SortMergeJoin LeftSemi (34)
+   :                 :     :                    :                          :- * Sort (19)
+   :                 :     :                    :                          :  +- Exchange (18)
+   :                 :     :                    :                          :     +- * Filter (17)
+   :                 :     :                    :                          :        +- * ColumnarToRow (16)
+   :                 :     :                    :                          :           +- Scan parquet default.item (15)
+   :                 :     :                    :                          +- * Sort (33)
+   :                 :     :                    :                             +- Exchange (32)
+   :                 :     :                    :                                +- * Project (31)
+   :                 :     :                    :                                   +- * BroadcastHashJoin Inner BuildRight (30)
+   :                 :     :                    :                                      :- * Project (25)
+   :                 :     :                    :                                      :  +- * BroadcastHashJoin Inner BuildRight (24)
+   :                 :     :                    :                                      :     :- * Filter (22)
+   :                 :     :                    :                                      :     :  +- * ColumnarToRow (21)
+   :                 :     :                    :                                      :     :     +- Scan parquet default.catalog_sales (20)
+   :                 :     :                    :                                      :     +- ReusedExchange (23)
+   :                 :     :                    :                                      +- BroadcastExchange (29)
+   :                 :     :                    :                                         +- * Filter (28)
+   :                 :     :                    :                                            +- * ColumnarToRow (27)
+   :                 :     :                    :                                               +- Scan parquet default.item (26)
+   :                 :     :                    +- * Sort (53)
+   :                 :     :                       +- Exchange (52)
+   :                 :     :                          +- * Project (51)
+   :                 :     :                             +- * BroadcastHashJoin Inner BuildRight (50)
+   :                 :     :                                :- * Project (48)
+   :                 :     :                                :  +- * BroadcastHashJoin Inner BuildRight (47)
+   :                 :     :                                :     :- * Filter (45)
+   :                 :     :                                :     :  +- * ColumnarToRow (44)
+   :                 :     :                                :     :     +- Scan parquet default.web_sales (43)
+   :                 :     :                                :     +- ReusedExchange (46)
+   :                 :     :                                +- ReusedExchange (49)
+   :                 :     +- ReusedExchange (61)
+   :                 +- BroadcastExchange (72)
+   :                    +- * SortMergeJoin LeftSemi (71)
+   :                       :- * Sort (68)
+   :                       :  +- Exchange (67)
+   :                       :     +- * Filter (66)
+   :                       :        +- * ColumnarToRow (65)
+   :                       :           +- Scan parquet default.item (64)
+   :                       +- * Sort (70)
+   :                          +- ReusedExchange (69)
+   +- BroadcastExchange (97)
+      +- * Filter (96)
+         +- * HashAggregate (95)
+            +- Exchange (94)
+               +- * HashAggregate (93)
+                  +- * Project (92)
+                     +- * BroadcastHashJoin Inner BuildRight (91)
+                        :- * Project (89)
+                        :  +- * BroadcastHashJoin Inner BuildRight (88)
+                        :     :- * SortMergeJoin LeftSemi (86)
+                        :     :  :- * Sort (83)
+                        :     :  :  +- Exchange (82)
+                        :     :  :     +- * Filter (81)
+                        :     :  :        +- * ColumnarToRow (80)
+                        :     :  :           +- Scan parquet default.store_sales (79)
+                        :     :  +- * Sort (85)
+                        :     :     +- ReusedExchange (84)
+                        :     +- ReusedExchange (87)
+                        +- ReusedExchange (90)
 
 
 (1) Scan parquet default.store_sales
@@ -133,10 +130,10 @@ Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(7) ColumnarToRow [codegen id : 20]
+(7) ColumnarToRow [codegen id : 19]
 Input [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10]
 
-(8) Filter [codegen id : 20]
+(8) Filter [codegen id : 19]
 Input [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10]
 Condition : ((isnotnull(i_brand_id#8) AND isnotnull(i_class_id#9)) AND isnotnull(i_category_id#10))
 
@@ -155,7 +152,7 @@ Input [2]: [ss_item_sk#11, ss_sold_date_sk#12]
 Input [2]: [ss_item_sk#11, ss_sold_date_sk#12]
 Condition : isnotnull(ss_item_sk#11)
 
-(12) ReusedExchange [Reuses operator id: 135]
+(12) ReusedExchange [Reuses operator id: 132]
 Output [1]: [d_date_sk#14]
 
 (13) BroadcastHashJoin [codegen id : 11]
@@ -204,7 +201,7 @@ Input [2]: [cs_item_sk#20, cs_sold_date_sk#21]
 Input [2]: [cs_item_sk#20, cs_sold_date_sk#21]
 Condition : isnotnull(cs_item_sk#20)
 
-(23) ReusedExchange [Reuses operator id: 135]
+(23) ReusedExchange [Reuses operator id: 132]
 Output [1]: [d_date_sk#22]
 
 (24) BroadcastHashJoin [codegen id : 8]
@@ -310,7 +307,7 @@ Input [2]: [ws_item_sk#35, ws_sold_date_sk#36]
 Input [2]: [ws_item_sk#35, ws_sold_date_sk#36]
 Condition : isnotnull(ws_item_sk#35)
 
-(46) ReusedExchange [Reuses operator id: 135]
+(46) ReusedExchange [Reuses operator id: 132]
 Output [1]: [d_date_sk#37]
 
 (47) BroadcastHashJoin [codegen id : 16]
@@ -347,485 +344,467 @@ Left keys [6]: [coalesce(brand_id#30, 0), isnull(brand_id#30), coalesce(class_id
 Right keys [6]: [coalesce(i_brand_id#39, 0), isnull(i_brand_id#39), coalesce(i_class_id#40, 0), isnull(i_class_id#40), coalesce(i_category_id#41, 0), isnull(i_category_id#41)]
 Join condition: None
 
-(55) HashAggregate [codegen id : 18]
+(55) BroadcastExchange
 Input [3]: [brand_id#30, class_id#31, category_id#32]
-Keys [3]: [brand_id#30, class_id#31, category_id#32]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [brand_id#30, class_id#31, category_id#32]
-
-(56) Exchange
-Input [3]: [brand_id#30, class_id#31, category_id#32]
-Arguments: hashpartitioning(brand_id#30, class_id#31, category_id#32, 5), ENSURE_REQUIREMENTS, [id=#43]
-
-(57) HashAggregate [codegen id : 19]
-Input [3]: [brand_id#30, class_id#31, category_id#32]
-Keys [3]: [brand_id#30, class_id#31, category_id#32]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [brand_id#30, class_id#31, category_id#32]
-
-(58) BroadcastExchange
-Input [3]: [brand_id#30, class_id#31, category_id#32]
-Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#44]
+Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#43]
 
-(59) BroadcastHashJoin [codegen id : 20]
+(56) BroadcastHashJoin [codegen id : 19]
 Left keys [3]: [i_brand_id#8, i_class_id#9, i_category_id#10]
 Right keys [3]: [brand_id#30, class_id#31, category_id#32]
 Join condition: None
 
-(60) Project [codegen id : 20]
-Output [1]: [i_item_sk#7 AS ss_item_sk#45]
+(57) Project [codegen id : 19]
+Output [1]: [i_item_sk#7 AS ss_item_sk#44]
 Input [7]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, brand_id#30, class_id#31, category_id#32]
 
-(61) Exchange
-Input [1]: [ss_item_sk#45]
-Arguments: hashpartitioning(ss_item_sk#45, 5), ENSURE_REQUIREMENTS, [id=#46]
+(58) Exchange
+Input [1]: [ss_item_sk#44]
+Arguments: hashpartitioning(ss_item_sk#44, 5), ENSURE_REQUIREMENTS, [id=#45]
 
-(62) Sort [codegen id : 21]
-Input [1]: [ss_item_sk#45]
-Arguments: [ss_item_sk#45 ASC NULLS FIRST], false, 0
+(59) Sort [codegen id : 20]
+Input [1]: [ss_item_sk#44]
+Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0
 
-(63) SortMergeJoin [codegen id : 45]
+(60) SortMergeJoin [codegen id : 43]
 Left keys [1]: [ss_item_sk#1]
-Right keys [1]: [ss_item_sk#45]
+Right keys [1]: [ss_item_sk#44]
 Join condition: None
 
-(64) ReusedExchange [Reuses operator id: 126]
-Output [1]: [d_date_sk#47]
+(61) ReusedExchange [Reuses operator id: 123]
+Output [1]: [d_date_sk#46]
 
-(65) BroadcastHashJoin [codegen id : 45]
+(62) BroadcastHashJoin [codegen id : 43]
 Left keys [1]: [ss_sold_date_sk#4]
-Right keys [1]: [d_date_sk#47]
+Right keys [1]: [d_date_sk#46]
 Join condition: None
 
-(66) Project [codegen id : 45]
+(63) Project [codegen id : 43]
 Output [3]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3]
-Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#47]
+Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#46]
 
-(67) Scan parquet default.item
-Output [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
+(64) Scan parquet default.item
+Output [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(68) ColumnarToRow [codegen id : 23]
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
+(65) ColumnarToRow [codegen id : 22]
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
 
-(69) Filter [codegen id : 23]
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
-Condition : (((isnotnull(i_item_sk#48) AND isnotnull(i_brand_id#49)) AND isnotnull(i_class_id#50)) AND isnotnull(i_category_id#51))
+(66) Filter [codegen id : 22]
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
+Condition : (((isnotnull(i_item_sk#47) AND isnotnull(i_brand_id#48)) AND isnotnull(i_class_id#49)) AND isnotnull(i_category_id#50))
 
-(70) Exchange
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
-Arguments: hashpartitioning(i_item_sk#48, 5), ENSURE_REQUIREMENTS, [id=#52]
+(67) Exchange
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
+Arguments: hashpartitioning(i_item_sk#47, 5), ENSURE_REQUIREMENTS, [id=#51]
 
-(71) Sort [codegen id : 24]
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
-Arguments: [i_item_sk#48 ASC NULLS FIRST], false, 0
+(68) Sort [codegen id : 23]
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
+Arguments: [i_item_sk#47 ASC NULLS FIRST], false, 0
 
-(72) ReusedExchange [Reuses operator id: 61]
-Output [1]: [ss_item_sk#45]
+(69) ReusedExchange [Reuses operator id: 58]
+Output [1]: [ss_item_sk#44]
 
-(73) Sort [codegen id : 43]
-Input [1]: [ss_item_sk#45]
-Arguments: [ss_item_sk#45 ASC NULLS FIRST], false, 0
+(70) Sort [codegen id : 41]
+Input [1]: [ss_item_sk#44]
+Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0
 
-(74) SortMergeJoin [codegen id : 44]
-Left keys [1]: [i_item_sk#48]
-Right keys [1]: [ss_item_sk#45]
+(71) SortMergeJoin [codegen id : 42]
+Left keys [1]: [i_item_sk#47]
+Right keys [1]: [ss_item_sk#44]
 Join condition: None
 
-(75) BroadcastExchange
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#53]
+(72) BroadcastExchange
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#52]
 
-(76) BroadcastHashJoin [codegen id : 45]
+(73) BroadcastHashJoin [codegen id : 43]
 Left keys [1]: [ss_item_sk#1]
-Right keys [1]: [i_item_sk#48]
+Right keys [1]: [i_item_sk#47]
 Join condition: None
 
-(77) Project [codegen id : 45]
-Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#49, i_class_id#50, i_category_id#51]
-Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
+(74) Project [codegen id : 43]
+Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#48, i_class_id#49, i_category_id#50]
+Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
 
-(78) HashAggregate [codegen id : 45]
-Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#49, i_class_id#50, i_category_id#51]
-Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
+(75) HashAggregate [codegen id : 43]
+Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#48, i_class_id#49, i_category_id#50]
+Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50]
 Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
-Aggregate Attributes [3]: [sum#54, isEmpty#55, count#56]
-Results [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
+Aggregate Attributes [3]: [sum#53, isEmpty#54, count#55]
+Results [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58]
 
-(79) Exchange
-Input [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
-Arguments: hashpartitioning(i_brand_id#49, i_class_id#50, i_category_id#51, 5), ENSURE_REQUIREMENTS, [id=#60]
+(76) Exchange
+Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58]
+Arguments: hashpartitioning(i_brand_id#48, i_class_id#49, i_category_id#50, 5), ENSURE_REQUIREMENTS, [id=#59]
 
-(80) HashAggregate [codegen id : 92]
-Input [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
-Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
+(77) HashAggregate [codegen id : 88]
+Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58]
+Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#61, count(1)#62]
-Results [6]: [store AS channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#61 AS sales#64, count(1)#62 AS number_sales#65]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#60, count(1)#61]
+Results [6]: [store AS channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#60 AS sales#63, count(1)#61 AS number_sales#64]
 
-(81) Filter [codegen id : 92]
-Input [6]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sales#64, number_sales#65]
-Condition : (isnotnull(sales#64) AND (cast(sales#64 as decimal(32,6)) > cast(Subquery scalar-subquery#66, [id=#67] as decimal(32,6))))
+(78) Filter [codegen id : 88]
+Input [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64]
+Condition : (isnotnull(sales#63) AND (cast(sales#63 as decimal(32,6)) > cast(Subquery scalar-subquery#65, [id=#66] as decimal(32,6))))
 
-(82) Scan parquet default.store_sales
-Output [4]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70, ss_sold_date_sk#71]
+(79) Scan parquet default.store_sales
+Output [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#71), dynamicpruningexpression(ss_sold_date_sk#71 IN dynamicpruning#72)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#70), dynamicpruningexpression(ss_sold_date_sk#70 IN dynamicpruning#71)]
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(83) ColumnarToRow [codegen id : 46]
-Input [4]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70, ss_sold_date_sk#71]
+(80) ColumnarToRow [codegen id : 44]
+Input [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70]
 
-(84) Filter [codegen id : 46]
-Input [4]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70, ss_sold_date_sk#71]
-Condition : isnotnull(ss_item_sk#68)
+(81) Filter [codegen id : 44]
+Input [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70]
+Condition : isnotnull(ss_item_sk#67)
 
-(85) Exchange
-Input [4]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70, ss_sold_date_sk#71]
-Arguments: hashpartitioning(ss_item_sk#68, 5), ENSURE_REQUIREMENTS, [id=#73]
+(82) Exchange
+Input [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70]
+Arguments: hashpartitioning(ss_item_sk#67, 5), ENSURE_REQUIREMENTS, [id=#72]
 
-(86) Sort [codegen id : 47]
-Input [4]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70, ss_sold_date_sk#71]
-Arguments: [ss_item_sk#68 ASC NULLS FIRST], false, 0
+(83) Sort [codegen id : 45]
+Input [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70]
+Arguments: [ss_item_sk#67 ASC NULLS FIRST], false, 0
 
-(87) ReusedExchange [Reuses operator id: 61]
-Output [1]: [ss_item_sk#45]
+(84) ReusedExchange [Reuses operator id: 58]
+Output [1]: [ss_item_sk#44]
 
-(88) Sort [codegen id : 66]
-Input [1]: [ss_item_sk#45]
-Arguments: [ss_item_sk#45 ASC NULLS FIRST], false, 0
+(85) Sort [codegen id : 63]
+Input [1]: [ss_item_sk#44]
+Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0
 
-(89) SortMergeJoin [codegen id : 90]
-Left keys [1]: [ss_item_sk#68]
-Right keys [1]: [ss_item_sk#45]
+(86) SortMergeJoin [codegen id : 86]
+Left keys [1]: [ss_item_sk#67]
+Right keys [1]: [ss_item_sk#44]
 Join condition: None
 
-(90) ReusedExchange [Reuses operator id: 140]
-Output [1]: [d_date_sk#74]
+(87) ReusedExchange [Reuses operator id: 137]
+Output [1]: [d_date_sk#73]
 
-(91) BroadcastHashJoin [codegen id : 90]
-Left keys [1]: [ss_sold_date_sk#71]
-Right keys [1]: [d_date_sk#74]
+(88) BroadcastHashJoin [codegen id : 86]
+Left keys [1]: [ss_sold_date_sk#70]
+Right keys [1]: [d_date_sk#73]
 Join condition: None
 
-(92) Project [codegen id : 90]
-Output [3]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70]
-Input [5]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70, ss_sold_date_sk#71, d_date_sk#74]
+(89) Project [codegen id : 86]
+Output [3]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69]
+Input [5]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70, d_date_sk#73]
 
-(93) ReusedExchange [Reuses operator id: 75]
-Output [4]: [i_item_sk#75, i_brand_id#76, i_class_id#77, i_category_id#78]
+(90) ReusedExchange [Reuses operator id: 72]
+Output [4]: [i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77]
 
-(94) BroadcastHashJoin [codegen id : 90]
-Left keys [1]: [ss_item_sk#68]
-Right keys [1]: [i_item_sk#75]
+(91) BroadcastHashJoin [codegen id : 86]
+Left keys [1]: [ss_item_sk#67]
+Right keys [1]: [i_item_sk#74]
 Join condition: None
 
-(95) Project [codegen id : 90]
-Output [5]: [ss_quantity#69, ss_list_price#70, i_brand_id#76, i_class_id#77, i_category_id#78]
-Input [7]: [ss_item_sk#68, ss_quantity#69, ss_list_price#70, i_item_sk#75, i_brand_id#76, i_class_id#77, i_category_id#78]
-
-(96) HashAggregate [codegen id : 90]
-Input [5]: [ss_quantity#69, ss_list_price#70, i_brand_id#76, i_class_id#77, i_category_id#78]
-Keys [3]: [i_brand_id#76, i_class_id#77, i_category_id#78]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
-Aggregate Attributes [3]: [sum#79, isEmpty#80, count#81]
-Results [6]: [i_brand_id#76, i_class_id#77, i_category_id#78, sum#82, isEmpty#83, count#84]
-
-(97) Exchange
-Input [6]: [i_brand_id#76, i_class_id#77, i_category_id#78, sum#82, isEmpty#83, count#84]
-Arguments: hashpartitioning(i_brand_id#76, i_class_id#77, i_category_id#78, 5), ENSURE_REQUIREMENTS, [id=#85]
-
-(98) HashAggregate [codegen id : 91]
-Input [6]: [i_brand_id#76, i_class_id#77, i_category_id#78, sum#82, isEmpty#83, count#84]
-Keys [3]: [i_brand_id#76, i_class_id#77, i_category_id#78]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2)))#86, count(1)#87]
-Results [6]: [store AS channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sum(CheckOverflow((promote_precision(cast(ss_quantity#69 as decimal(12,2))) * promote_precision(cast(ss_list_price#70 as decimal(12,2)))), DecimalType(18,2)))#86 AS sales#89, count(1)#87 AS number_sales#90]
-
-(99) Filter [codegen id : 91]
-Input [6]: [channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sales#89, number_sales#90]
-Condition : (isnotnull(sales#89) AND (cast(sales#89 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#66, [id=#67] as decimal(32,6))))
-
-(100) BroadcastExchange
-Input [6]: [channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sales#89, number_sales#90]
-Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#91]
-
-(101) BroadcastHashJoin [codegen id : 92]
-Left keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
-Right keys [3]: [i_brand_id#76, i_class_id#77, i_category_id#78]
+(92) Project [codegen id : 86]
+Output [5]: [ss_quantity#68, ss_list_price#69, i_brand_id#75, i_class_id#76, i_category_id#77]
+Input [7]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77]
+
+(93) HashAggregate [codegen id : 86]
+Input [5]: [ss_quantity#68, ss_list_price#69, i_brand_id#75, i_class_id#76, i_category_id#77]
+Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_list_price#69 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Aggregate Attributes [3]: [sum#78, isEmpty#79, count#80]
+Results [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
+
+(94) Exchange
+Input [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
+Arguments: hashpartitioning(i_brand_id#75, i_class_id#76, i_category_id#77, 5), ENSURE_REQUIREMENTS, [id=#84]
+
+(95) HashAggregate [codegen id : 87]
+Input [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
+Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_list_price#69 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_list_price#69 as decimal(12,2)))), DecimalType(18,2)))#85, count(1)#86]
+Results [6]: [store AS channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_list_price#69 as decimal(12,2)))), DecimalType(18,2)))#85 AS sales#88, count(1)#86 AS number_sales#89]
+
+(96) Filter [codegen id : 87]
+Input [6]: [channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sales#88, number_sales#89]
+Condition : (isnotnull(sales#88) AND (cast(sales#88 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#65, [id=#66] as decimal(32,6))))
+
+(97) BroadcastExchange
+Input [6]: [channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sales#88, number_sales#89]
+Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#90]
+
+(98) BroadcastHashJoin [codegen id : 88]
+Left keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50]
+Right keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77]
 Join condition: None
 
-(102) TakeOrderedAndProject
-Input [12]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sales#64, number_sales#65, channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sales#89, number_sales#90]
-Arguments: 100, [i_brand_id#49 ASC NULLS FIRST, i_class_id#50 ASC NULLS FIRST, i_category_id#51 ASC NULLS FIRST], [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sales#64, number_sales#65, channel#88, i_brand_id#76, i_class_id#77, i_category_id#78, sales#89, number_sales#90]
+(99) TakeOrderedAndProject
+Input [12]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sales#88, number_sales#89]
+Arguments: 100, [i_brand_id#48 ASC NULLS FIRST, i_class_id#49 ASC NULLS FIRST, i_category_id#50 ASC NULLS FIRST], [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sales#88, number_sales#89]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 81 Hosting Expression = Subquery scalar-subquery#66, [id=#67]
-* HashAggregate (121)
-+- Exchange (120)
-   +- * HashAggregate (119)
-      +- Union (118)
-         :- * Project (107)
-         :  +- * BroadcastHashJoin Inner BuildRight (106)
-         :     :- * ColumnarToRow (104)
-         :     :  +- Scan parquet default.store_sales (103)
-         :     +- ReusedExchange (105)
-         :- * Project (112)
-         :  +- * BroadcastHashJoin Inner BuildRight (111)
-         :     :- * ColumnarToRow (109)
-         :     :  +- Scan parquet default.catalog_sales (108)
-         :     +- ReusedExchange (110)
-         +- * Project (117)
-            +- * BroadcastHashJoin Inner BuildRight (116)
-               :- * ColumnarToRow (114)
-               :  +- Scan parquet default.web_sales (113)
-               +- ReusedExchange (115)
-
-
-(103) Scan parquet default.store_sales
-Output [3]: [ss_quantity#92, ss_list_price#93, ss_sold_date_sk#94]
+Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquery#65, [id=#66]
+* HashAggregate (118)
++- Exchange (117)
+   +- * HashAggregate (116)
+      +- Union (115)
+         :- * Project (104)
+         :  +- * BroadcastHashJoin Inner BuildRight (103)
+         :     :- * ColumnarToRow (101)
+         :     :  +- Scan parquet default.store_sales (100)
+         :     +- ReusedExchange (102)
+         :- * Project (109)
+         :  +- * BroadcastHashJoin Inner BuildRight (108)
+         :     :- * ColumnarToRow (106)
+         :     :  +- Scan parquet default.catalog_sales (105)
+         :     +- ReusedExchange (107)
+         +- * Project (114)
+            +- * BroadcastHashJoin Inner BuildRight (113)
+               :- * ColumnarToRow (111)
+               :  +- Scan parquet default.web_sales (110)
+               +- ReusedExchange (112)
+
+
+(100) Scan parquet default.store_sales
+Output [3]: [ss_quantity#91, ss_list_price#92, ss_sold_date_sk#93]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#94), dynamicpruningexpression(ss_sold_date_sk#94 IN dynamicpruning#13)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#93), dynamicpruningexpression(ss_sold_date_sk#93 IN dynamicpruning#13)]
 ReadSchema: struct<ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(104) ColumnarToRow [codegen id : 2]
-Input [3]: [ss_quantity#92, ss_list_price#93, ss_sold_date_sk#94]
+(101) ColumnarToRow [codegen id : 2]
+Input [3]: [ss_quantity#91, ss_list_price#92, ss_sold_date_sk#93]
 
-(105) ReusedExchange [Reuses operator id: 135]
-Output [1]: [d_date_sk#95]
+(102) ReusedExchange [Reuses operator id: 132]
+Output [1]: [d_date_sk#94]
 
-(106) BroadcastHashJoin [codegen id : 2]
-Left keys [1]: [ss_sold_date_sk#94]
-Right keys [1]: [d_date_sk#95]
+(103) BroadcastHashJoin [codegen id : 2]
+Left keys [1]: [ss_sold_date_sk#93]
+Right keys [1]: [d_date_sk#94]
 Join condition: None
 
-(107) Project [codegen id : 2]
-Output [2]: [ss_quantity#92 AS quantity#96, ss_list_price#93 AS list_price#97]
-Input [4]: [ss_quantity#92, ss_list_price#93, ss_sold_date_sk#94, d_date_sk#95]
+(104) Project [codegen id : 2]
+Output [2]: [ss_quantity#91 AS quantity#95, ss_list_price#92 AS list_price#96]
+Input [4]: [ss_quantity#91, ss_list_price#92, ss_sold_date_sk#93, d_date_sk#94]
 
-(108) Scan parquet default.catalog_sales
-Output [3]: [cs_quantity#98, cs_list_price#99, cs_sold_date_sk#100]
+(105) Scan parquet default.catalog_sales
+Output [3]: [cs_quantity#97, cs_list_price#98, cs_sold_date_sk#99]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#100), dynamicpruningexpression(cs_sold_date_sk#100 IN dynamicpruning#13)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#99), dynamicpruningexpression(cs_sold_date_sk#99 IN dynamicpruning#13)]
 ReadSchema: struct<cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(109) ColumnarToRow [codegen id : 4]
-Input [3]: [cs_quantity#98, cs_list_price#99, cs_sold_date_sk#100]
+(106) ColumnarToRow [codegen id : 4]
+Input [3]: [cs_quantity#97, cs_list_price#98, cs_sold_date_sk#99]
 
-(110) ReusedExchange [Reuses operator id: 135]
-Output [1]: [d_date_sk#101]
+(107) ReusedExchange [Reuses operator id: 132]
+Output [1]: [d_date_sk#100]
 
-(111) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [cs_sold_date_sk#100]
-Right keys [1]: [d_date_sk#101]
+(108) BroadcastHashJoin [codegen id : 4]
+Left keys [1]: [cs_sold_date_sk#99]
+Right keys [1]: [d_date_sk#100]
 Join condition: None
 
-(112) Project [codegen id : 4]
-Output [2]: [cs_quantity#98 AS quantity#102, cs_list_price#99 AS list_price#103]
-Input [4]: [cs_quantity#98, cs_list_price#99, cs_sold_date_sk#100, d_date_sk#101]
+(109) Project [codegen id : 4]
+Output [2]: [cs_quantity#97 AS quantity#101, cs_list_price#98 AS list_price#102]
+Input [4]: [cs_quantity#97, cs_list_price#98, cs_sold_date_sk#99, d_date_sk#100]
 
-(113) Scan parquet default.web_sales
-Output [3]: [ws_quantity#104, ws_list_price#105, ws_sold_date_sk#106]
+(110) Scan parquet default.web_sales
+Output [3]: [ws_quantity#103, ws_list_price#104, ws_sold_date_sk#105]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#106), dynamicpruningexpression(ws_sold_date_sk#106 IN dynamicpruning#13)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#105), dynamicpruningexpression(ws_sold_date_sk#105 IN dynamicpruning#13)]
 ReadSchema: struct<ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(114) ColumnarToRow [codegen id : 6]
-Input [3]: [ws_quantity#104, ws_list_price#105, ws_sold_date_sk#106]
+(111) ColumnarToRow [codegen id : 6]
+Input [3]: [ws_quantity#103, ws_list_price#104, ws_sold_date_sk#105]
 
-(115) ReusedExchange [Reuses operator id: 135]
-Output [1]: [d_date_sk#107]
+(112) ReusedExchange [Reuses operator id: 132]
+Output [1]: [d_date_sk#106]
 
-(116) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [ws_sold_date_sk#106]
-Right keys [1]: [d_date_sk#107]
+(113) BroadcastHashJoin [codegen id : 6]
+Left keys [1]: [ws_sold_date_sk#105]
+Right keys [1]: [d_date_sk#106]
 Join condition: None
 
-(117) Project [codegen id : 6]
-Output [2]: [ws_quantity#104 AS quantity#108, ws_list_price#105 AS list_price#109]
-Input [4]: [ws_quantity#104, ws_list_price#105, ws_sold_date_sk#106, d_date_sk#107]
+(114) Project [codegen id : 6]
+Output [2]: [ws_quantity#103 AS quantity#107, ws_list_price#104 AS list_price#108]
+Input [4]: [ws_quantity#103, ws_list_price#104, ws_sold_date_sk#105, d_date_sk#106]
 
-(118) Union
+(115) Union
 
-(119) HashAggregate [codegen id : 7]
-Input [2]: [quantity#96, list_price#97]
+(116) HashAggregate [codegen id : 7]
+Input [2]: [quantity#95, list_price#96]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [2]: [sum#110, count#111]
-Results [2]: [sum#112, count#113]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#95 as decimal(12,2))) * promote_precision(cast(list_price#96 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [2]: [sum#109, count#110]
+Results [2]: [sum#111, count#112]
 
-(120) Exchange
-Input [2]: [sum#112, count#113]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#114]
+(117) Exchange
+Input [2]: [sum#111, count#112]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#113]
 
-(121) HashAggregate [codegen id : 8]
-Input [2]: [sum#112, count#113]
+(118) HashAggregate [codegen id : 8]
+Input [2]: [sum#111, count#112]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2)))#115]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#96 as decimal(12,2))) * promote_precision(cast(list_price#97 as decimal(12,2)))), DecimalType(18,2)))#115 AS average_sales#116]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#95 as decimal(12,2))) * promote_precision(cast(list_price#96 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#95 as decimal(12,2))) * promote_precision(cast(list_price#96 as decimal(12,2)))), DecimalType(18,2)))#114]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#95 as decimal(12,2))) * promote_precision(cast(list_price#96 as decimal(12,2)))), DecimalType(18,2)))#114 AS average_sales#115]
 
-Subquery:2 Hosting operator id = 103 Hosting Expression = ss_sold_date_sk#94 IN dynamicpruning#13
+Subquery:2 Hosting operator id = 100 Hosting Expression = ss_sold_date_sk#93 IN dynamicpruning#13
 
-Subquery:3 Hosting operator id = 108 Hosting Expression = cs_sold_date_sk#100 IN dynamicpruning#13
+Subquery:3 Hosting operator id = 105 Hosting Expression = cs_sold_date_sk#99 IN dynamicpruning#13
 
-Subquery:4 Hosting operator id = 113 Hosting Expression = ws_sold_date_sk#106 IN dynamicpruning#13
+Subquery:4 Hosting operator id = 110 Hosting Expression = ws_sold_date_sk#105 IN dynamicpruning#13
 
 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (126)
-+- * Project (125)
-   +- * Filter (124)
-      +- * ColumnarToRow (123)
-         +- Scan parquet default.date_dim (122)
+BroadcastExchange (123)
++- * Project (122)
+   +- * Filter (121)
+      +- * ColumnarToRow (120)
+         +- Scan parquet default.date_dim (119)
 
 
-(122) Scan parquet default.date_dim
-Output [2]: [d_date_sk#47, d_week_seq#117]
+(119) Scan parquet default.date_dim
+Output [2]: [d_date_sk#46, d_week_seq#116]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_week_seq:int>
 
-(123) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#47, d_week_seq#117]
+(120) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#46, d_week_seq#116]
 
-(124) Filter [codegen id : 1]
-Input [2]: [d_date_sk#47, d_week_seq#117]
-Condition : ((isnotnull(d_week_seq#117) AND (d_week_seq#117 = Subquery scalar-subquery#118, [id=#119])) AND isnotnull(d_date_sk#47))
+(121) Filter [codegen id : 1]
+Input [2]: [d_date_sk#46, d_week_seq#116]
+Condition : ((isnotnull(d_week_seq#116) AND (d_week_seq#116 = Subquery scalar-subquery#117, [id=#118])) AND isnotnull(d_date_sk#46))
 
-(125) Project [codegen id : 1]
-Output [1]: [d_date_sk#47]
-Input [2]: [d_date_sk#47, d_week_seq#117]
+(122) Project [codegen id : 1]
+Output [1]: [d_date_sk#46]
+Input [2]: [d_date_sk#46, d_week_seq#116]
 
-(126) BroadcastExchange
-Input [1]: [d_date_sk#47]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#120]
+(123) BroadcastExchange
+Input [1]: [d_date_sk#46]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#119]
 
-Subquery:6 Hosting operator id = 124 Hosting Expression = Subquery scalar-subquery#118, [id=#119]
-* Project (130)
-+- * Filter (129)
-   +- * ColumnarToRow (128)
-      +- Scan parquet default.date_dim (127)
+Subquery:6 Hosting operator id = 121 Hosting Expression = Subquery scalar-subquery#117, [id=#118]
+* Project (127)
++- * Filter (126)
+   +- * ColumnarToRow (125)
+      +- Scan parquet default.date_dim (124)
 
 
-(127) Scan parquet default.date_dim
-Output [4]: [d_week_seq#121, d_year#122, d_moy#123, d_dom#124]
+(124) Scan parquet default.date_dim
+Output [4]: [d_week_seq#120, d_year#121, d_moy#122, d_dom#123]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,16)]
 ReadSchema: struct<d_week_seq:int,d_year:int,d_moy:int,d_dom:int>
 
-(128) ColumnarToRow [codegen id : 1]
-Input [4]: [d_week_seq#121, d_year#122, d_moy#123, d_dom#124]
+(125) ColumnarToRow [codegen id : 1]
+Input [4]: [d_week_seq#120, d_year#121, d_moy#122, d_dom#123]
 
-(129) Filter [codegen id : 1]
-Input [4]: [d_week_seq#121, d_year#122, d_moy#123, d_dom#124]
-Condition : (((((isnotnull(d_year#122) AND isnotnull(d_moy#123)) AND isnotnull(d_dom#124)) AND (d_year#122 = 1999)) AND (d_moy#123 = 12)) AND (d_dom#124 = 16))
+(126) Filter [codegen id : 1]
+Input [4]: [d_week_seq#120, d_year#121, d_moy#122, d_dom#123]
+Condition : (((((isnotnull(d_year#121) AND isnotnull(d_moy#122)) AND isnotnull(d_dom#123)) AND (d_year#121 = 1999)) AND (d_moy#122 = 12)) AND (d_dom#123 = 16))
 
-(130) Project [codegen id : 1]
-Output [1]: [d_week_seq#121]
-Input [4]: [d_week_seq#121, d_year#122, d_moy#123, d_dom#124]
+(127) Project [codegen id : 1]
+Output [1]: [d_week_seq#120]
+Input [4]: [d_week_seq#120, d_year#121, d_moy#122, d_dom#123]
 
 Subquery:7 Hosting operator id = 9 Hosting Expression = ss_sold_date_sk#12 IN dynamicpruning#13
-BroadcastExchange (135)
-+- * Project (134)
-   +- * Filter (133)
-      +- * ColumnarToRow (132)
-         +- Scan parquet default.date_dim (131)
+BroadcastExchange (132)
++- * Project (131)
+   +- * Filter (130)
+      +- * ColumnarToRow (129)
+         +- Scan parquet default.date_dim (128)
 
 
-(131) Scan parquet default.date_dim
-Output [2]: [d_date_sk#14, d_year#125]
+(128) Scan parquet default.date_dim
+Output [2]: [d_date_sk#14, d_year#124]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(132) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#14, d_year#125]
+(129) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#14, d_year#124]
 
-(133) Filter [codegen id : 1]
-Input [2]: [d_date_sk#14, d_year#125]
-Condition : (((isnotnull(d_year#125) AND (d_year#125 >= 1998)) AND (d_year#125 <= 2000)) AND isnotnull(d_date_sk#14))
+(130) Filter [codegen id : 1]
+Input [2]: [d_date_sk#14, d_year#124]
+Condition : (((isnotnull(d_year#124) AND (d_year#124 >= 1998)) AND (d_year#124 <= 2000)) AND isnotnull(d_date_sk#14))
 
-(134) Project [codegen id : 1]
+(131) Project [codegen id : 1]
 Output [1]: [d_date_sk#14]
-Input [2]: [d_date_sk#14, d_year#125]
+Input [2]: [d_date_sk#14, d_year#124]
 
-(135) BroadcastExchange
+(132) BroadcastExchange
 Input [1]: [d_date_sk#14]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#126]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#125]
 
 Subquery:8 Hosting operator id = 20 Hosting Expression = cs_sold_date_sk#21 IN dynamicpruning#13
 
 Subquery:9 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#36 IN dynamicpruning#13
 
-Subquery:10 Hosting operator id = 99 Hosting Expression = ReusedSubquery Subquery scalar-subquery#66, [id=#67]
+Subquery:10 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#65, [id=#66]
 
-Subquery:11 Hosting operator id = 82 Hosting Expression = ss_sold_date_sk#71 IN dynamicpruning#72
-BroadcastExchange (140)
-+- * Project (139)
-   +- * Filter (138)
-      +- * ColumnarToRow (137)
-         +- Scan parquet default.date_dim (136)
+Subquery:11 Hosting operator id = 79 Hosting Expression = ss_sold_date_sk#70 IN dynamicpruning#71
+BroadcastExchange (137)
++- * Project (136)
+   +- * Filter (135)
+      +- * ColumnarToRow (134)
+         +- Scan parquet default.date_dim (133)
 
 
-(136) Scan parquet default.date_dim
-Output [2]: [d_date_sk#74, d_week_seq#127]
+(133) Scan parquet default.date_dim
+Output [2]: [d_date_sk#73, d_week_seq#126]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_week_seq:int>
 
-(137) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#74, d_week_seq#127]
+(134) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#73, d_week_seq#126]
 
-(138) Filter [codegen id : 1]
-Input [2]: [d_date_sk#74, d_week_seq#127]
-Condition : ((isnotnull(d_week_seq#127) AND (d_week_seq#127 = Subquery scalar-subquery#128, [id=#129])) AND isnotnull(d_date_sk#74))
+(135) Filter [codegen id : 1]
+Input [2]: [d_date_sk#73, d_week_seq#126]
+Condition : ((isnotnull(d_week_seq#126) AND (d_week_seq#126 = Subquery scalar-subquery#127, [id=#128])) AND isnotnull(d_date_sk#73))
 
-(139) Project [codegen id : 1]
-Output [1]: [d_date_sk#74]
-Input [2]: [d_date_sk#74, d_week_seq#127]
+(136) Project [codegen id : 1]
+Output [1]: [d_date_sk#73]
+Input [2]: [d_date_sk#73, d_week_seq#126]
 
-(140) BroadcastExchange
-Input [1]: [d_date_sk#74]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#130]
+(137) BroadcastExchange
+Input [1]: [d_date_sk#73]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#129]
 
-Subquery:12 Hosting operator id = 138 Hosting Expression = Subquery scalar-subquery#128, [id=#129]
-* Project (144)
-+- * Filter (143)
-   +- * ColumnarToRow (142)
-      +- Scan parquet default.date_dim (141)
+Subquery:12 Hosting operator id = 135 Hosting Expression = Subquery scalar-subquery#127, [id=#128]
+* Project (141)
++- * Filter (140)
+   +- * ColumnarToRow (139)
+      +- Scan parquet default.date_dim (138)
 
 
-(141) Scan parquet default.date_dim
-Output [4]: [d_week_seq#131, d_year#132, d_moy#133, d_dom#134]
+(138) Scan parquet default.date_dim
+Output [4]: [d_week_seq#130, d_year#131, d_moy#132, d_dom#133]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1998), EqualTo(d_moy,12), EqualTo(d_dom,16)]
 ReadSchema: struct<d_week_seq:int,d_year:int,d_moy:int,d_dom:int>
 
-(142) ColumnarToRow [codegen id : 1]
-Input [4]: [d_week_seq#131, d_year#132, d_moy#133, d_dom#134]
+(139) ColumnarToRow [codegen id : 1]
+Input [4]: [d_week_seq#130, d_year#131, d_moy#132, d_dom#133]
 
-(143) Filter [codegen id : 1]
-Input [4]: [d_week_seq#131, d_year#132, d_moy#133, d_dom#134]
-Condition : (((((isnotnull(d_year#132) AND isnotnull(d_moy#133)) AND isnotnull(d_dom#134)) AND (d_year#132 = 1998)) AND (d_moy#133 = 12)) AND (d_dom#134 = 16))
+(140) Filter [codegen id : 1]
+Input [4]: [d_week_seq#130, d_year#131, d_moy#132, d_dom#133]
+Condition : (((((isnotnull(d_year#131) AND isnotnull(d_moy#132)) AND isnotnull(d_dom#133)) AND (d_year#131 = 1998)) AND (d_moy#132 = 12)) AND (d_dom#133 = 16))
 
-(144) Project [codegen id : 1]
-Output [1]: [d_week_seq#131]
-Input [4]: [d_week_seq#131, d_year#132, d_moy#133, d_dom#134]
+(141) Project [codegen id : 1]
+Output [1]: [d_week_seq#130]
+Input [4]: [d_week_seq#130, d_year#131, d_moy#132, d_dom#133]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt
index e7d3f84db0c72..82e338515f431 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt
@@ -1,12 +1,12 @@
 TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
-  WholeStageCodegen (92)
+  WholeStageCodegen (88)
     BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
       Filter [sales]
         Subquery #4
           WholeStageCodegen (8)
             HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
               InputAdapter
-                Exchange #17
+                Exchange #16
                   WholeStageCodegen (7)
                     HashAggregate [quantity,list_price] [sum,count,sum,count]
                       InputAdapter
@@ -19,7 +19,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                     Scan parquet default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
-                                  ReusedExchange [d_date_sk] #9
+                                  ReusedExchange [d_date_sk] #8
                           WholeStageCodegen (4)
                             Project [cs_quantity,cs_list_price]
                               BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
@@ -28,7 +28,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                     Scan parquet default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
-                                  ReusedExchange [d_date_sk] #9
+                                  ReusedExchange [d_date_sk] #8
                           WholeStageCodegen (6)
                             Project [ws_quantity,ws_list_price]
                               BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
@@ -37,11 +37,11 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                     Scan parquet default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
-                                  ReusedExchange [d_date_sk] #9
+                                  ReusedExchange [d_date_sk] #8
         HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
           InputAdapter
             Exchange [i_brand_id,i_class_id,i_category_id] #1
-              WholeStageCodegen (45)
+              WholeStageCodegen (43)
                 HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                   Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id]
                     BroadcastHashJoin [ss_item_sk,i_item_sk]
@@ -74,11 +74,11 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                             InputAdapter
                                                               Scan parquet default.date_dim [d_date_sk,d_week_seq]
                             InputAdapter
-                              WholeStageCodegen (21)
+                              WholeStageCodegen (20)
                                 Sort [ss_item_sk]
                                   InputAdapter
                                     Exchange [ss_item_sk] #4
-                                      WholeStageCodegen (20)
+                                      WholeStageCodegen (19)
                                         Project [i_item_sk]
                                           BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id]
                                             Filter [i_brand_id,i_class_id,i_category_id]
@@ -87,129 +87,124 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                   Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                             InputAdapter
                                               BroadcastExchange #5
-                                                WholeStageCodegen (19)
-                                                  HashAggregate [brand_id,class_id,category_id]
+                                                WholeStageCodegen (18)
+                                                  SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
                                                     InputAdapter
-                                                      Exchange [brand_id,class_id,category_id] #6
-                                                        WholeStageCodegen (18)
-                                                          HashAggregate [brand_id,class_id,category_id]
-                                                            SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
-                                                              InputAdapter
-                                                                WholeStageCodegen (13)
-                                                                  Sort [brand_id,class_id,category_id]
-                                                                    InputAdapter
-                                                                      Exchange [brand_id,class_id,category_id] #7
-                                                                        WholeStageCodegen (12)
-                                                                          HashAggregate [brand_id,class_id,category_id]
-                                                                            InputAdapter
-                                                                              Exchange [brand_id,class_id,category_id] #8
-                                                                                WholeStageCodegen (11)
-                                                                                  HashAggregate [brand_id,class_id,category_id]
-                                                                                    Project [i_brand_id,i_class_id,i_category_id]
-                                                                                      BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                                        Project [ss_item_sk]
-                                                                                          BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                            Filter [ss_item_sk]
-                                                                                              ColumnarToRow
-                                                                                                InputAdapter
-                                                                                                  Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
-                                                                                                    SubqueryBroadcast [d_date_sk] #3
-                                                                                                      BroadcastExchange #9
-                                                                                                        WholeStageCodegen (1)
-                                                                                                          Project [d_date_sk]
-                                                                                                            Filter [d_year,d_date_sk]
-                                                                                                              ColumnarToRow
-                                                                                                                InputAdapter
-                                                                                                                  Scan parquet default.date_dim [d_date_sk,d_year]
+                                                      WholeStageCodegen (13)
+                                                        Sort [brand_id,class_id,category_id]
+                                                          InputAdapter
+                                                            Exchange [brand_id,class_id,category_id] #6
+                                                              WholeStageCodegen (12)
+                                                                HashAggregate [brand_id,class_id,category_id]
+                                                                  InputAdapter
+                                                                    Exchange [brand_id,class_id,category_id] #7
+                                                                      WholeStageCodegen (11)
+                                                                        HashAggregate [brand_id,class_id,category_id]
+                                                                          Project [i_brand_id,i_class_id,i_category_id]
+                                                                            BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                              Project [ss_item_sk]
+                                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                                  Filter [ss_item_sk]
+                                                                                    ColumnarToRow
+                                                                                      InputAdapter
+                                                                                        Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                                          SubqueryBroadcast [d_date_sk] #3
+                                                                                            BroadcastExchange #8
+                                                                                              WholeStageCodegen (1)
+                                                                                                Project [d_date_sk]
+                                                                                                  Filter [d_year,d_date_sk]
+                                                                                                    ColumnarToRow
+                                                                                                      InputAdapter
+                                                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                  InputAdapter
+                                                                                    ReusedExchange [d_date_sk] #8
+                                                                              InputAdapter
+                                                                                BroadcastExchange #9
+                                                                                  WholeStageCodegen (10)
+                                                                                    SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
+                                                                                      InputAdapter
+                                                                                        WholeStageCodegen (5)
+                                                                                          Sort [i_brand_id,i_class_id,i_category_id]
                                                                                             InputAdapter
-                                                                                              ReusedExchange [d_date_sk] #9
-                                                                                        InputAdapter
-                                                                                          BroadcastExchange #10
-                                                                                            WholeStageCodegen (10)
-                                                                                              SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
-                                                                                                InputAdapter
-                                                                                                  WholeStageCodegen (5)
-                                                                                                    Sort [i_brand_id,i_class_id,i_category_id]
+                                                                                              Exchange [i_brand_id,i_class_id,i_category_id] #10
+                                                                                                WholeStageCodegen (4)
+                                                                                                  Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                    ColumnarToRow
                                                                                                       InputAdapter
-                                                                                                        Exchange [i_brand_id,i_class_id,i_category_id] #11
-                                                                                                          WholeStageCodegen (4)
-                                                                                                            Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                        Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                      InputAdapter
+                                                                                        WholeStageCodegen (9)
+                                                                                          Sort [i_brand_id,i_class_id,i_category_id]
+                                                                                            InputAdapter
+                                                                                              Exchange [i_brand_id,i_class_id,i_category_id] #11
+                                                                                                WholeStageCodegen (8)
+                                                                                                  Project [i_brand_id,i_class_id,i_category_id]
+                                                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                                                      Project [cs_item_sk]
+                                                                                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                                                          Filter [cs_item_sk]
+                                                                                                            ColumnarToRow
+                                                                                                              InputAdapter
+                                                                                                                Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
+                                                                                                                  ReusedSubquery [d_date_sk] #3
+                                                                                                          InputAdapter
+                                                                                                            ReusedExchange [d_date_sk] #8
+                                                                                                      InputAdapter
+                                                                                                        BroadcastExchange #12
+                                                                                                          WholeStageCodegen (7)
+                                                                                                            Filter [i_item_sk]
                                                                                                               ColumnarToRow
                                                                                                                 InputAdapter
                                                                                                                   Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                InputAdapter
-                                                                                                  WholeStageCodegen (9)
-                                                                                                    Sort [i_brand_id,i_class_id,i_category_id]
-                                                                                                      InputAdapter
-                                                                                                        Exchange [i_brand_id,i_class_id,i_category_id] #12
-                                                                                                          WholeStageCodegen (8)
-                                                                                                            Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                              BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                                                Project [cs_item_sk]
-                                                                                                                  BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                                                    Filter [cs_item_sk]
-                                                                                                                      ColumnarToRow
-                                                                                                                        InputAdapter
-                                                                                                                          Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
-                                                                                                                            ReusedSubquery [d_date_sk] #3
-                                                                                                                    InputAdapter
-                                                                                                                      ReusedExchange [d_date_sk] #9
-                                                                                                                InputAdapter
-                                                                                                                  BroadcastExchange #13
-                                                                                                                    WholeStageCodegen (7)
-                                                                                                                      Filter [i_item_sk]
-                                                                                                                        ColumnarToRow
-                                                                                                                          InputAdapter
-                                                                                                                            Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                              InputAdapter
-                                                                WholeStageCodegen (17)
-                                                                  Sort [i_brand_id,i_class_id,i_category_id]
+                                                    InputAdapter
+                                                      WholeStageCodegen (17)
+                                                        Sort [i_brand_id,i_class_id,i_category_id]
+                                                          InputAdapter
+                                                            Exchange [i_brand_id,i_class_id,i_category_id] #13
+                                                              WholeStageCodegen (16)
+                                                                Project [i_brand_id,i_class_id,i_category_id]
+                                                                  BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                                    Project [ws_item_sk]
+                                                                      BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                        Filter [ws_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
+                                                                                ReusedSubquery [d_date_sk] #3
+                                                                        InputAdapter
+                                                                          ReusedExchange [d_date_sk] #8
                                                                     InputAdapter
-                                                                      Exchange [i_brand_id,i_class_id,i_category_id] #14
-                                                                        WholeStageCodegen (16)
-                                                                          Project [i_brand_id,i_class_id,i_category_id]
-                                                                            BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                              Project [ws_item_sk]
-                                                                                BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                                  Filter [ws_item_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
-                                                                                          ReusedSubquery [d_date_sk] #3
-                                                                                  InputAdapter
-                                                                                    ReusedExchange [d_date_sk] #9
-                                                                              InputAdapter
-                                                                                ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #13
+                                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #12
                           InputAdapter
                             ReusedExchange [d_date_sk] #3
                       InputAdapter
-                        BroadcastExchange #15
-                          WholeStageCodegen (44)
+                        BroadcastExchange #14
+                          WholeStageCodegen (42)
                             SortMergeJoin [i_item_sk,ss_item_sk]
                               InputAdapter
-                                WholeStageCodegen (24)
+                                WholeStageCodegen (23)
                                   Sort [i_item_sk]
                                     InputAdapter
-                                      Exchange [i_item_sk] #16
-                                        WholeStageCodegen (23)
+                                      Exchange [i_item_sk] #15
+                                        WholeStageCodegen (22)
                                           Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                             ColumnarToRow
                                               InputAdapter
                                                 Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                               InputAdapter
-                                WholeStageCodegen (43)
+                                WholeStageCodegen (41)
                                   Sort [ss_item_sk]
                                     InputAdapter
                                       ReusedExchange [ss_item_sk] #4
       InputAdapter
-        BroadcastExchange #18
-          WholeStageCodegen (91)
+        BroadcastExchange #17
+          WholeStageCodegen (87)
             Filter [sales]
               ReusedSubquery [average_sales] #4
               HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
                 InputAdapter
-                  Exchange [i_brand_id,i_class_id,i_category_id] #19
-                    WholeStageCodegen (90)
+                  Exchange [i_brand_id,i_class_id,i_category_id] #18
+                    WholeStageCodegen (86)
                       HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                         Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id]
                           BroadcastHashJoin [ss_item_sk,i_item_sk]
@@ -217,17 +212,17 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                               BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                 SortMergeJoin [ss_item_sk,ss_item_sk]
                                   InputAdapter
-                                    WholeStageCodegen (47)
+                                    WholeStageCodegen (45)
                                       Sort [ss_item_sk]
                                         InputAdapter
-                                          Exchange [ss_item_sk] #20
-                                            WholeStageCodegen (46)
+                                          Exchange [ss_item_sk] #19
+                                            WholeStageCodegen (44)
                                               Filter [ss_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
                                                     Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
                                                       SubqueryBroadcast [d_date_sk] #5
-                                                        BroadcastExchange #21
+                                                        BroadcastExchange #20
                                                           WholeStageCodegen (1)
                                                             Project [d_date_sk]
                                                               Filter [d_week_seq,d_date_sk]
@@ -242,11 +237,11 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                                   InputAdapter
                                                                     Scan parquet default.date_dim [d_date_sk,d_week_seq]
                                   InputAdapter
-                                    WholeStageCodegen (66)
+                                    WholeStageCodegen (63)
                                       Sort [ss_item_sk]
                                         InputAdapter
                                           ReusedExchange [ss_item_sk] #4
                                 InputAdapter
-                                  ReusedExchange [d_date_sk] #21
+                                  ReusedExchange [d_date_sk] #20
                             InputAdapter
-                              ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #15
+                              ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #14
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt
index a5e01db243952..86bbc553e8c31 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt
@@ -1,90 +1,88 @@
 == Physical Plan ==
-TakeOrderedAndProject (86)
-+- * BroadcastHashJoin Inner BuildRight (85)
-   :- * Filter (68)
-   :  +- * HashAggregate (67)
-   :     +- Exchange (66)
-   :        +- * HashAggregate (65)
-   :           +- * Project (64)
-   :              +- * BroadcastHashJoin Inner BuildRight (63)
-   :                 :- * Project (61)
-   :                 :  +- * BroadcastHashJoin Inner BuildRight (60)
-   :                 :     :- * BroadcastHashJoin LeftSemi BuildRight (53)
+TakeOrderedAndProject (84)
++- * BroadcastHashJoin Inner BuildRight (83)
+   :- * Filter (66)
+   :  +- * HashAggregate (65)
+   :     +- Exchange (64)
+   :        +- * HashAggregate (63)
+   :           +- * Project (62)
+   :              +- * BroadcastHashJoin Inner BuildRight (61)
+   :                 :- * Project (59)
+   :                 :  +- * BroadcastHashJoin Inner BuildRight (58)
+   :                 :     :- * BroadcastHashJoin LeftSemi BuildRight (51)
    :                 :     :  :- * Filter (3)
    :                 :     :  :  +- * ColumnarToRow (2)
    :                 :     :  :     +- Scan parquet default.store_sales (1)
-   :                 :     :  +- BroadcastExchange (52)
-   :                 :     :     +- * Project (51)
-   :                 :     :        +- * BroadcastHashJoin Inner BuildRight (50)
+   :                 :     :  +- BroadcastExchange (50)
+   :                 :     :     +- * Project (49)
+   :                 :     :        +- * BroadcastHashJoin Inner BuildRight (48)
    :                 :     :           :- * Filter (6)
    :                 :     :           :  +- * ColumnarToRow (5)
    :                 :     :           :     +- Scan parquet default.item (4)
-   :                 :     :           +- BroadcastExchange (49)
-   :                 :     :              +- * HashAggregate (48)
-   :                 :     :                 +- * HashAggregate (47)
-   :                 :     :                    +- * BroadcastHashJoin LeftSemi BuildRight (46)
-   :                 :     :                       :- * HashAggregate (35)
-   :                 :     :                       :  +- Exchange (34)
-   :                 :     :                       :     +- * HashAggregate (33)
-   :                 :     :                       :        +- * Project (32)
-   :                 :     :                       :           +- * BroadcastHashJoin Inner BuildRight (31)
-   :                 :     :                       :              :- * Project (29)
-   :                 :     :                       :              :  +- * BroadcastHashJoin Inner BuildRight (28)
-   :                 :     :                       :              :     :- * Filter (9)
-   :                 :     :                       :              :     :  +- * ColumnarToRow (8)
-   :                 :     :                       :              :     :     +- Scan parquet default.store_sales (7)
-   :                 :     :                       :              :     +- BroadcastExchange (27)
-   :                 :     :                       :              :        +- * BroadcastHashJoin LeftSemi BuildRight (26)
-   :                 :     :                       :              :           :- * Filter (12)
-   :                 :     :                       :              :           :  +- * ColumnarToRow (11)
-   :                 :     :                       :              :           :     +- Scan parquet default.item (10)
-   :                 :     :                       :              :           +- BroadcastExchange (25)
-   :                 :     :                       :              :              +- * Project (24)
-   :                 :     :                       :              :                 +- * BroadcastHashJoin Inner BuildRight (23)
-   :                 :     :                       :              :                    :- * Project (21)
-   :                 :     :                       :              :                    :  +- * BroadcastHashJoin Inner BuildRight (20)
-   :                 :     :                       :              :                    :     :- * Filter (15)
-   :                 :     :                       :              :                    :     :  +- * ColumnarToRow (14)
-   :                 :     :                       :              :                    :     :     +- Scan parquet default.catalog_sales (13)
-   :                 :     :                       :              :                    :     +- BroadcastExchange (19)
-   :                 :     :                       :              :                    :        +- * Filter (18)
-   :                 :     :                       :              :                    :           +- * ColumnarToRow (17)
-   :                 :     :                       :              :                    :              +- Scan parquet default.item (16)
-   :                 :     :                       :              :                    +- ReusedExchange (22)
-   :                 :     :                       :              +- ReusedExchange (30)
-   :                 :     :                       +- BroadcastExchange (45)
-   :                 :     :                          +- * Project (44)
-   :                 :     :                             +- * BroadcastHashJoin Inner BuildRight (43)
-   :                 :     :                                :- * Project (41)
-   :                 :     :                                :  +- * BroadcastHashJoin Inner BuildRight (40)
-   :                 :     :                                :     :- * Filter (38)
-   :                 :     :                                :     :  +- * ColumnarToRow (37)
-   :                 :     :                                :     :     +- Scan parquet default.web_sales (36)
-   :                 :     :                                :     +- ReusedExchange (39)
-   :                 :     :                                +- ReusedExchange (42)
-   :                 :     +- BroadcastExchange (59)
-   :                 :        +- * BroadcastHashJoin LeftSemi BuildRight (58)
-   :                 :           :- * Filter (56)
-   :                 :           :  +- * ColumnarToRow (55)
-   :                 :           :     +- Scan parquet default.item (54)
-   :                 :           +- ReusedExchange (57)
-   :                 +- ReusedExchange (62)
-   +- BroadcastExchange (84)
-      +- * Filter (83)
-         +- * HashAggregate (82)
-            +- Exchange (81)
-               +- * HashAggregate (80)
-                  +- * Project (79)
-                     +- * BroadcastHashJoin Inner BuildRight (78)
-                        :- * Project (76)
-                        :  +- * BroadcastHashJoin Inner BuildRight (75)
-                        :     :- * BroadcastHashJoin LeftSemi BuildRight (73)
-                        :     :  :- * Filter (71)
-                        :     :  :  +- * ColumnarToRow (70)
-                        :     :  :     +- Scan parquet default.store_sales (69)
-                        :     :  +- ReusedExchange (72)
-                        :     +- ReusedExchange (74)
-                        +- ReusedExchange (77)
+   :                 :     :           +- BroadcastExchange (47)
+   :                 :     :              +- * BroadcastHashJoin LeftSemi BuildRight (46)
+   :                 :     :                 :- * HashAggregate (35)
+   :                 :     :                 :  +- Exchange (34)
+   :                 :     :                 :     +- * HashAggregate (33)
+   :                 :     :                 :        +- * Project (32)
+   :                 :     :                 :           +- * BroadcastHashJoin Inner BuildRight (31)
+   :                 :     :                 :              :- * Project (29)
+   :                 :     :                 :              :  +- * BroadcastHashJoin Inner BuildRight (28)
+   :                 :     :                 :              :     :- * Filter (9)
+   :                 :     :                 :              :     :  +- * ColumnarToRow (8)
+   :                 :     :                 :              :     :     +- Scan parquet default.store_sales (7)
+   :                 :     :                 :              :     +- BroadcastExchange (27)
+   :                 :     :                 :              :        +- * BroadcastHashJoin LeftSemi BuildRight (26)
+   :                 :     :                 :              :           :- * Filter (12)
+   :                 :     :                 :              :           :  +- * ColumnarToRow (11)
+   :                 :     :                 :              :           :     +- Scan parquet default.item (10)
+   :                 :     :                 :              :           +- BroadcastExchange (25)
+   :                 :     :                 :              :              +- * Project (24)
+   :                 :     :                 :              :                 +- * BroadcastHashJoin Inner BuildRight (23)
+   :                 :     :                 :              :                    :- * Project (21)
+   :                 :     :                 :              :                    :  +- * BroadcastHashJoin Inner BuildRight (20)
+   :                 :     :                 :              :                    :     :- * Filter (15)
+   :                 :     :                 :              :                    :     :  +- * ColumnarToRow (14)
+   :                 :     :                 :              :                    :     :     +- Scan parquet default.catalog_sales (13)
+   :                 :     :                 :              :                    :     +- BroadcastExchange (19)
+   :                 :     :                 :              :                    :        +- * Filter (18)
+   :                 :     :                 :              :                    :           +- * ColumnarToRow (17)
+   :                 :     :                 :              :                    :              +- Scan parquet default.item (16)
+   :                 :     :                 :              :                    +- ReusedExchange (22)
+   :                 :     :                 :              +- ReusedExchange (30)
+   :                 :     :                 +- BroadcastExchange (45)
+   :                 :     :                    +- * Project (44)
+   :                 :     :                       +- * BroadcastHashJoin Inner BuildRight (43)
+   :                 :     :                          :- * Project (41)
+   :                 :     :                          :  +- * BroadcastHashJoin Inner BuildRight (40)
+   :                 :     :                          :     :- * Filter (38)
+   :                 :     :                          :     :  +- * ColumnarToRow (37)
+   :                 :     :                          :     :     +- Scan parquet default.web_sales (36)
+   :                 :     :                          :     +- ReusedExchange (39)
+   :                 :     :                          +- ReusedExchange (42)
+   :                 :     +- BroadcastExchange (57)
+   :                 :        +- * BroadcastHashJoin LeftSemi BuildRight (56)
+   :                 :           :- * Filter (54)
+   :                 :           :  +- * ColumnarToRow (53)
+   :                 :           :     +- Scan parquet default.item (52)
+   :                 :           +- ReusedExchange (55)
+   :                 +- ReusedExchange (60)
+   +- BroadcastExchange (82)
+      +- * Filter (81)
+         +- * HashAggregate (80)
+            +- Exchange (79)
+               +- * HashAggregate (78)
+                  +- * Project (77)
+                     +- * BroadcastHashJoin Inner BuildRight (76)
+                        :- * Project (74)
+                        :  +- * BroadcastHashJoin Inner BuildRight (73)
+                        :     :- * BroadcastHashJoin LeftSemi BuildRight (71)
+                        :     :  :- * Filter (69)
+                        :     :  :  +- * ColumnarToRow (68)
+                        :     :  :     +- Scan parquet default.store_sales (67)
+                        :     :  +- ReusedExchange (70)
+                        :     +- ReusedExchange (72)
+                        +- ReusedExchange (75)
 
 
 (1) Scan parquet default.store_sales
@@ -187,7 +185,7 @@ Join condition: None
 Output [4]: [cs_sold_date_sk#18, i_brand_id#20, i_class_id#21, i_category_id#22]
 Input [6]: [cs_item_sk#17, cs_sold_date_sk#18, i_item_sk#19, i_brand_id#20, i_class_id#21, i_category_id#22]
 
-(22) ReusedExchange [Reuses operator id: 119]
+(22) ReusedExchange [Reuses operator id: 117]
 Output [1]: [d_date_sk#24]
 
 (23) BroadcastHashJoin [codegen id : 3]
@@ -221,7 +219,7 @@ Join condition: None
 Output [4]: [ss_sold_date_sk#11, i_brand_id#14, i_class_id#15, i_category_id#16]
 Input [6]: [ss_item_sk#10, ss_sold_date_sk#11, i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16]
 
-(30) ReusedExchange [Reuses operator id: 119]
+(30) ReusedExchange [Reuses operator id: 117]
 Output [1]: [d_date_sk#27]
 
 (31) BroadcastHashJoin [codegen id : 6]
@@ -278,7 +276,7 @@ Join condition: None
 Output [4]: [ws_sold_date_sk#33, i_brand_id#35, i_class_id#36, i_category_id#37]
 Input [6]: [ws_item_sk#32, ws_sold_date_sk#33, i_item_sk#34, i_brand_id#35, i_class_id#36, i_category_id#37]
 
-(42) ReusedExchange [Reuses operator id: 119]
+(42) ReusedExchange [Reuses operator id: 117]
 Output [1]: [d_date_sk#38]
 
 (43) BroadcastHashJoin [codegen id : 9]
@@ -299,112 +297,98 @@ Left keys [6]: [coalesce(brand_id#28, 0), isnull(brand_id#28), coalesce(class_id
 Right keys [6]: [coalesce(i_brand_id#35, 0), isnull(i_brand_id#35), coalesce(i_class_id#36, 0), isnull(i_class_id#36), coalesce(i_category_id#37, 0), isnull(i_category_id#37)]
 Join condition: None
 
-(47) HashAggregate [codegen id : 10]
-Input [3]: [brand_id#28, class_id#29, category_id#30]
-Keys [3]: [brand_id#28, class_id#29, category_id#30]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [brand_id#28, class_id#29, category_id#30]
-
-(48) HashAggregate [codegen id : 10]
-Input [3]: [brand_id#28, class_id#29, category_id#30]
-Keys [3]: [brand_id#28, class_id#29, category_id#30]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [brand_id#28, class_id#29, category_id#30]
-
-(49) BroadcastExchange
+(47) BroadcastExchange
 Input [3]: [brand_id#28, class_id#29, category_id#30]
 Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#40]
 
-(50) BroadcastHashJoin [codegen id : 11]
+(48) BroadcastHashJoin [codegen id : 11]
 Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Right keys [3]: [brand_id#28, class_id#29, category_id#30]
 Join condition: None
 
-(51) Project [codegen id : 11]
+(49) Project [codegen id : 11]
 Output [1]: [i_item_sk#6 AS ss_item_sk#41]
 Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#28, class_id#29, category_id#30]
 
-(52) BroadcastExchange
+(50) BroadcastExchange
 Input [1]: [ss_item_sk#41]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#42]
 
-(53) BroadcastHashJoin [codegen id : 25]
+(51) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [ss_item_sk#41]
 Join condition: None
 
-(54) Scan parquet default.item
+(52) Scan parquet default.item
 Output [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(55) ColumnarToRow [codegen id : 23]
+(53) ColumnarToRow [codegen id : 23]
 Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 
-(56) Filter [codegen id : 23]
+(54) Filter [codegen id : 23]
 Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 Condition : (((isnotnull(i_item_sk#43) AND isnotnull(i_brand_id#44)) AND isnotnull(i_class_id#45)) AND isnotnull(i_category_id#46))
 
-(57) ReusedExchange [Reuses operator id: 52]
+(55) ReusedExchange [Reuses operator id: 50]
 Output [1]: [ss_item_sk#41]
 
-(58) BroadcastHashJoin [codegen id : 23]
+(56) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [i_item_sk#43]
 Right keys [1]: [ss_item_sk#41]
 Join condition: None
 
-(59) BroadcastExchange
+(57) BroadcastExchange
 Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#47]
 
-(60) BroadcastHashJoin [codegen id : 25]
+(58) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#43]
 Join condition: None
 
-(61) Project [codegen id : 25]
+(59) Project [codegen id : 25]
 Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_class_id#45, i_category_id#46]
 Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 
-(62) ReusedExchange [Reuses operator id: 110]
+(60) ReusedExchange [Reuses operator id: 108]
 Output [1]: [d_date_sk#48]
 
-(63) BroadcastHashJoin [codegen id : 25]
+(61) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#48]
 Join condition: None
 
-(64) Project [codegen id : 25]
+(62) Project [codegen id : 25]
 Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46]
 Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_class_id#45, i_category_id#46, d_date_sk#48]
 
-(65) HashAggregate [codegen id : 25]
+(63) HashAggregate [codegen id : 25]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
 Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#49, isEmpty#50, count#51]
 Results [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 
-(66) Exchange
+(64) Exchange
 Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 Arguments: hashpartitioning(i_brand_id#44, i_class_id#45, i_category_id#46, 5), ENSURE_REQUIREMENTS, [id=#55]
 
-(67) HashAggregate [codegen id : 52]
+(65) HashAggregate [codegen id : 52]
 Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56, count(1)#57]
 Results [6]: [store AS channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56 AS sales#59, count(1)#57 AS number_sales#60]
 
-(68) Filter [codegen id : 52]
+(66) Filter [codegen id : 52]
 Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60]
 Condition : (isnotnull(sales#59) AND (cast(sales#59 as decimal(32,6)) > cast(Subquery scalar-subquery#61, [id=#62] as decimal(32,6))))
 
-(69) Scan parquet default.store_sales
+(67) Scan parquet default.store_sales
 Output [4]: [ss_item_sk#63, ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66]
 Batched: true
 Location: InMemoryFileIndex []
@@ -412,278 +396,278 @@ PartitionFilters: [isnotnull(ss_sold_date_sk#66), dynamicpruningexpression(ss_so
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(70) ColumnarToRow [codegen id : 50]
+(68) ColumnarToRow [codegen id : 50]
 Input [4]: [ss_item_sk#63, ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66]
 
-(71) Filter [codegen id : 50]
+(69) Filter [codegen id : 50]
 Input [4]: [ss_item_sk#63, ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66]
 Condition : isnotnull(ss_item_sk#63)
 
-(72) ReusedExchange [Reuses operator id: 52]
+(70) ReusedExchange [Reuses operator id: 50]
 Output [1]: [ss_item_sk#41]
 
-(73) BroadcastHashJoin [codegen id : 50]
+(71) BroadcastHashJoin [codegen id : 50]
 Left keys [1]: [ss_item_sk#63]
 Right keys [1]: [ss_item_sk#41]
 Join condition: None
 
-(74) ReusedExchange [Reuses operator id: 59]
+(72) ReusedExchange [Reuses operator id: 57]
 Output [4]: [i_item_sk#68, i_brand_id#69, i_class_id#70, i_category_id#71]
 
-(75) BroadcastHashJoin [codegen id : 50]
+(73) BroadcastHashJoin [codegen id : 50]
 Left keys [1]: [ss_item_sk#63]
 Right keys [1]: [i_item_sk#68]
 Join condition: None
 
-(76) Project [codegen id : 50]
+(74) Project [codegen id : 50]
 Output [6]: [ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66, i_brand_id#69, i_class_id#70, i_category_id#71]
 Input [8]: [ss_item_sk#63, ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66, i_item_sk#68, i_brand_id#69, i_class_id#70, i_category_id#71]
 
-(77) ReusedExchange [Reuses operator id: 124]
+(75) ReusedExchange [Reuses operator id: 122]
 Output [1]: [d_date_sk#72]
 
-(78) BroadcastHashJoin [codegen id : 50]
+(76) BroadcastHashJoin [codegen id : 50]
 Left keys [1]: [ss_sold_date_sk#66]
 Right keys [1]: [d_date_sk#72]
 Join condition: None
 
-(79) Project [codegen id : 50]
+(77) Project [codegen id : 50]
 Output [5]: [ss_quantity#64, ss_list_price#65, i_brand_id#69, i_class_id#70, i_category_id#71]
 Input [7]: [ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66, i_brand_id#69, i_class_id#70, i_category_id#71, d_date_sk#72]
 
-(80) HashAggregate [codegen id : 50]
+(78) HashAggregate [codegen id : 50]
 Input [5]: [ss_quantity#64, ss_list_price#65, i_brand_id#69, i_class_id#70, i_category_id#71]
 Keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71]
 Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#73, isEmpty#74, count#75]
 Results [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78]
 
-(81) Exchange
+(79) Exchange
 Input [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78]
 Arguments: hashpartitioning(i_brand_id#69, i_class_id#70, i_category_id#71, 5), ENSURE_REQUIREMENTS, [id=#79]
 
-(82) HashAggregate [codegen id : 51]
+(80) HashAggregate [codegen id : 51]
 Input [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78]
 Keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2))), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#80, count(1)#81]
 Results [6]: [store AS channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#80 AS sales#83, count(1)#81 AS number_sales#84]
 
-(83) Filter [codegen id : 51]
+(81) Filter [codegen id : 51]
 Input [6]: [channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84]
 Condition : (isnotnull(sales#83) AND (cast(sales#83 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#61, [id=#62] as decimal(32,6))))
 
-(84) BroadcastExchange
+(82) BroadcastExchange
 Input [6]: [channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84]
 Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#85]
 
-(85) BroadcastHashJoin [codegen id : 52]
+(83) BroadcastHashJoin [codegen id : 52]
 Left keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
 Right keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71]
 Join condition: None
 
-(86) TakeOrderedAndProject
+(84) TakeOrderedAndProject
 Input [12]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60, channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84]
 Arguments: 100, [i_brand_id#44 ASC NULLS FIRST, i_class_id#45 ASC NULLS FIRST, i_category_id#46 ASC NULLS FIRST], [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60, channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 68 Hosting Expression = Subquery scalar-subquery#61, [id=#62]
-* HashAggregate (105)
-+- Exchange (104)
-   +- * HashAggregate (103)
-      +- Union (102)
-         :- * Project (91)
-         :  +- * BroadcastHashJoin Inner BuildRight (90)
-         :     :- * ColumnarToRow (88)
-         :     :  +- Scan parquet default.store_sales (87)
-         :     +- ReusedExchange (89)
-         :- * Project (96)
-         :  +- * BroadcastHashJoin Inner BuildRight (95)
-         :     :- * ColumnarToRow (93)
-         :     :  +- Scan parquet default.catalog_sales (92)
-         :     +- ReusedExchange (94)
-         +- * Project (101)
-            +- * BroadcastHashJoin Inner BuildRight (100)
-               :- * ColumnarToRow (98)
-               :  +- Scan parquet default.web_sales (97)
-               +- ReusedExchange (99)
-
-
-(87) Scan parquet default.store_sales
+Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquery#61, [id=#62]
+* HashAggregate (103)
++- Exchange (102)
+   +- * HashAggregate (101)
+      +- Union (100)
+         :- * Project (89)
+         :  +- * BroadcastHashJoin Inner BuildRight (88)
+         :     :- * ColumnarToRow (86)
+         :     :  +- Scan parquet default.store_sales (85)
+         :     +- ReusedExchange (87)
+         :- * Project (94)
+         :  +- * BroadcastHashJoin Inner BuildRight (93)
+         :     :- * ColumnarToRow (91)
+         :     :  +- Scan parquet default.catalog_sales (90)
+         :     +- ReusedExchange (92)
+         +- * Project (99)
+            +- * BroadcastHashJoin Inner BuildRight (98)
+               :- * ColumnarToRow (96)
+               :  +- Scan parquet default.web_sales (95)
+               +- ReusedExchange (97)
+
+
+(85) Scan parquet default.store_sales
 Output [3]: [ss_quantity#86, ss_list_price#87, ss_sold_date_sk#88]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(ss_sold_date_sk#88), dynamicpruningexpression(ss_sold_date_sk#88 IN dynamicpruning#12)]
 ReadSchema: struct<ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(88) ColumnarToRow [codegen id : 2]
+(86) ColumnarToRow [codegen id : 2]
 Input [3]: [ss_quantity#86, ss_list_price#87, ss_sold_date_sk#88]
 
-(89) ReusedExchange [Reuses operator id: 119]
+(87) ReusedExchange [Reuses operator id: 117]
 Output [1]: [d_date_sk#89]
 
-(90) BroadcastHashJoin [codegen id : 2]
+(88) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#88]
 Right keys [1]: [d_date_sk#89]
 Join condition: None
 
-(91) Project [codegen id : 2]
+(89) Project [codegen id : 2]
 Output [2]: [ss_quantity#86 AS quantity#90, ss_list_price#87 AS list_price#91]
 Input [4]: [ss_quantity#86, ss_list_price#87, ss_sold_date_sk#88, d_date_sk#89]
 
-(92) Scan parquet default.catalog_sales
+(90) Scan parquet default.catalog_sales
 Output [3]: [cs_quantity#92, cs_list_price#93, cs_sold_date_sk#94]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(cs_sold_date_sk#94), dynamicpruningexpression(cs_sold_date_sk#94 IN dynamicpruning#12)]
 ReadSchema: struct<cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(93) ColumnarToRow [codegen id : 4]
+(91) ColumnarToRow [codegen id : 4]
 Input [3]: [cs_quantity#92, cs_list_price#93, cs_sold_date_sk#94]
 
-(94) ReusedExchange [Reuses operator id: 119]
+(92) ReusedExchange [Reuses operator id: 117]
 Output [1]: [d_date_sk#95]
 
-(95) BroadcastHashJoin [codegen id : 4]
+(93) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#94]
 Right keys [1]: [d_date_sk#95]
 Join condition: None
 
-(96) Project [codegen id : 4]
+(94) Project [codegen id : 4]
 Output [2]: [cs_quantity#92 AS quantity#96, cs_list_price#93 AS list_price#97]
 Input [4]: [cs_quantity#92, cs_list_price#93, cs_sold_date_sk#94, d_date_sk#95]
 
-(97) Scan parquet default.web_sales
+(95) Scan parquet default.web_sales
 Output [3]: [ws_quantity#98, ws_list_price#99, ws_sold_date_sk#100]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(ws_sold_date_sk#100), dynamicpruningexpression(ws_sold_date_sk#100 IN dynamicpruning#12)]
 ReadSchema: struct<ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(98) ColumnarToRow [codegen id : 6]
+(96) ColumnarToRow [codegen id : 6]
 Input [3]: [ws_quantity#98, ws_list_price#99, ws_sold_date_sk#100]
 
-(99) ReusedExchange [Reuses operator id: 119]
+(97) ReusedExchange [Reuses operator id: 117]
 Output [1]: [d_date_sk#101]
 
-(100) BroadcastHashJoin [codegen id : 6]
+(98) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_sold_date_sk#100]
 Right keys [1]: [d_date_sk#101]
 Join condition: None
 
-(101) Project [codegen id : 6]
+(99) Project [codegen id : 6]
 Output [2]: [ws_quantity#98 AS quantity#102, ws_list_price#99 AS list_price#103]
 Input [4]: [ws_quantity#98, ws_list_price#99, ws_sold_date_sk#100, d_date_sk#101]
 
-(102) Union
+(100) Union
 
-(103) HashAggregate [codegen id : 7]
+(101) HashAggregate [codegen id : 7]
 Input [2]: [quantity#90, list_price#91]
 Keys: []
 Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#104, count#105]
 Results [2]: [sum#106, count#107]
 
-(104) Exchange
+(102) Exchange
 Input [2]: [sum#106, count#107]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#108]
 
-(105) HashAggregate [codegen id : 8]
+(103) HashAggregate [codegen id : 8]
 Input [2]: [sum#106, count#107]
 Keys: []
 Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))#109]
 Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))#109 AS average_sales#110]
 
-Subquery:2 Hosting operator id = 87 Hosting Expression = ss_sold_date_sk#88 IN dynamicpruning#12
+Subquery:2 Hosting operator id = 85 Hosting Expression = ss_sold_date_sk#88 IN dynamicpruning#12
 
-Subquery:3 Hosting operator id = 92 Hosting Expression = cs_sold_date_sk#94 IN dynamicpruning#12
+Subquery:3 Hosting operator id = 90 Hosting Expression = cs_sold_date_sk#94 IN dynamicpruning#12
 
-Subquery:4 Hosting operator id = 97 Hosting Expression = ws_sold_date_sk#100 IN dynamicpruning#12
+Subquery:4 Hosting operator id = 95 Hosting Expression = ws_sold_date_sk#100 IN dynamicpruning#12
 
 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (110)
-+- * Project (109)
-   +- * Filter (108)
-      +- * ColumnarToRow (107)
-         +- Scan parquet default.date_dim (106)
+BroadcastExchange (108)
++- * Project (107)
+   +- * Filter (106)
+      +- * ColumnarToRow (105)
+         +- Scan parquet default.date_dim (104)
 
 
-(106) Scan parquet default.date_dim
+(104) Scan parquet default.date_dim
 Output [2]: [d_date_sk#48, d_week_seq#111]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_week_seq:int>
 
-(107) ColumnarToRow [codegen id : 1]
+(105) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#48, d_week_seq#111]
 
-(108) Filter [codegen id : 1]
+(106) Filter [codegen id : 1]
 Input [2]: [d_date_sk#48, d_week_seq#111]
 Condition : ((isnotnull(d_week_seq#111) AND (d_week_seq#111 = Subquery scalar-subquery#112, [id=#113])) AND isnotnull(d_date_sk#48))
 
-(109) Project [codegen id : 1]
+(107) Project [codegen id : 1]
 Output [1]: [d_date_sk#48]
 Input [2]: [d_date_sk#48, d_week_seq#111]
 
-(110) BroadcastExchange
+(108) BroadcastExchange
 Input [1]: [d_date_sk#48]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#114]
 
-Subquery:6 Hosting operator id = 108 Hosting Expression = Subquery scalar-subquery#112, [id=#113]
-* Project (114)
-+- * Filter (113)
-   +- * ColumnarToRow (112)
-      +- Scan parquet default.date_dim (111)
+Subquery:6 Hosting operator id = 106 Hosting Expression = Subquery scalar-subquery#112, [id=#113]
+* Project (112)
++- * Filter (111)
+   +- * ColumnarToRow (110)
+      +- Scan parquet default.date_dim (109)
 
 
-(111) Scan parquet default.date_dim
+(109) Scan parquet default.date_dim
 Output [4]: [d_week_seq#115, d_year#116, d_moy#117, d_dom#118]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,16)]
 ReadSchema: struct<d_week_seq:int,d_year:int,d_moy:int,d_dom:int>
 
-(112) ColumnarToRow [codegen id : 1]
+(110) ColumnarToRow [codegen id : 1]
 Input [4]: [d_week_seq#115, d_year#116, d_moy#117, d_dom#118]
 
-(113) Filter [codegen id : 1]
+(111) Filter [codegen id : 1]
 Input [4]: [d_week_seq#115, d_year#116, d_moy#117, d_dom#118]
 Condition : (((((isnotnull(d_year#116) AND isnotnull(d_moy#117)) AND isnotnull(d_dom#118)) AND (d_year#116 = 1999)) AND (d_moy#117 = 12)) AND (d_dom#118 = 16))
 
-(114) Project [codegen id : 1]
+(112) Project [codegen id : 1]
 Output [1]: [d_week_seq#115]
 Input [4]: [d_week_seq#115, d_year#116, d_moy#117, d_dom#118]
 
 Subquery:7 Hosting operator id = 7 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12
-BroadcastExchange (119)
-+- * Project (118)
-   +- * Filter (117)
-      +- * ColumnarToRow (116)
-         +- Scan parquet default.date_dim (115)
+BroadcastExchange (117)
++- * Project (116)
+   +- * Filter (115)
+      +- * ColumnarToRow (114)
+         +- Scan parquet default.date_dim (113)
 
 
-(115) Scan parquet default.date_dim
+(113) Scan parquet default.date_dim
 Output [2]: [d_date_sk#27, d_year#119]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(116) ColumnarToRow [codegen id : 1]
+(114) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#27, d_year#119]
 
-(117) Filter [codegen id : 1]
+(115) Filter [codegen id : 1]
 Input [2]: [d_date_sk#27, d_year#119]
 Condition : (((isnotnull(d_year#119) AND (d_year#119 >= 1998)) AND (d_year#119 <= 2000)) AND isnotnull(d_date_sk#27))
 
-(118) Project [codegen id : 1]
+(116) Project [codegen id : 1]
 Output [1]: [d_date_sk#27]
 Input [2]: [d_date_sk#27, d_year#119]
 
-(119) BroadcastExchange
+(117) BroadcastExchange
 Input [1]: [d_date_sk#27]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#120]
 
@@ -691,60 +675,60 @@ Subquery:8 Hosting operator id = 13 Hosting Expression = cs_sold_date_sk#18 IN d
 
 Subquery:9 Hosting operator id = 36 Hosting Expression = ws_sold_date_sk#33 IN dynamicpruning#12
 
-Subquery:10 Hosting operator id = 83 Hosting Expression = ReusedSubquery Subquery scalar-subquery#61, [id=#62]
+Subquery:10 Hosting operator id = 81 Hosting Expression = ReusedSubquery Subquery scalar-subquery#61, [id=#62]
 
-Subquery:11 Hosting operator id = 69 Hosting Expression = ss_sold_date_sk#66 IN dynamicpruning#67
-BroadcastExchange (124)
-+- * Project (123)
-   +- * Filter (122)
-      +- * ColumnarToRow (121)
-         +- Scan parquet default.date_dim (120)
+Subquery:11 Hosting operator id = 67 Hosting Expression = ss_sold_date_sk#66 IN dynamicpruning#67
+BroadcastExchange (122)
++- * Project (121)
+   +- * Filter (120)
+      +- * ColumnarToRow (119)
+         +- Scan parquet default.date_dim (118)
 
 
-(120) Scan parquet default.date_dim
+(118) Scan parquet default.date_dim
 Output [2]: [d_date_sk#72, d_week_seq#121]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_week_seq:int>
 
-(121) ColumnarToRow [codegen id : 1]
+(119) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#72, d_week_seq#121]
 
-(122) Filter [codegen id : 1]
+(120) Filter [codegen id : 1]
 Input [2]: [d_date_sk#72, d_week_seq#121]
 Condition : ((isnotnull(d_week_seq#121) AND (d_week_seq#121 = Subquery scalar-subquery#122, [id=#123])) AND isnotnull(d_date_sk#72))
 
-(123) Project [codegen id : 1]
+(121) Project [codegen id : 1]
 Output [1]: [d_date_sk#72]
 Input [2]: [d_date_sk#72, d_week_seq#121]
 
-(124) BroadcastExchange
+(122) BroadcastExchange
 Input [1]: [d_date_sk#72]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#124]
 
-Subquery:12 Hosting operator id = 122 Hosting Expression = Subquery scalar-subquery#122, [id=#123]
-* Project (128)
-+- * Filter (127)
-   +- * ColumnarToRow (126)
-      +- Scan parquet default.date_dim (125)
+Subquery:12 Hosting operator id = 120 Hosting Expression = Subquery scalar-subquery#122, [id=#123]
+* Project (126)
++- * Filter (125)
+   +- * ColumnarToRow (124)
+      +- Scan parquet default.date_dim (123)
 
 
-(125) Scan parquet default.date_dim
+(123) Scan parquet default.date_dim
 Output [4]: [d_week_seq#125, d_year#126, d_moy#127, d_dom#128]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1998), EqualTo(d_moy,12), EqualTo(d_dom,16)]
 ReadSchema: struct<d_week_seq:int,d_year:int,d_moy:int,d_dom:int>
 
-(126) ColumnarToRow [codegen id : 1]
+(124) ColumnarToRow [codegen id : 1]
 Input [4]: [d_week_seq#125, d_year#126, d_moy#127, d_dom#128]
 
-(127) Filter [codegen id : 1]
+(125) Filter [codegen id : 1]
 Input [4]: [d_week_seq#125, d_year#126, d_moy#127, d_dom#128]
 Condition : (((((isnotnull(d_year#126) AND isnotnull(d_moy#127)) AND isnotnull(d_dom#128)) AND (d_year#126 = 1998)) AND (d_moy#127 = 12)) AND (d_dom#128 = 16))
 
-(128) Project [codegen id : 1]
+(126) Project [codegen id : 1]
 Output [1]: [d_week_seq#125]
 Input [4]: [d_week_seq#125, d_year#126, d_moy#127, d_dom#128]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt
index 8f722e735172f..259178d0e432f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt
@@ -79,77 +79,75 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                       InputAdapter
                                         BroadcastExchange #4
                                           WholeStageCodegen (10)
-                                            HashAggregate [brand_id,class_id,category_id]
+                                            BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
                                               HashAggregate [brand_id,class_id,category_id]
-                                                BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
-                                                  HashAggregate [brand_id,class_id,category_id]
-                                                    InputAdapter
-                                                      Exchange [brand_id,class_id,category_id] #5
-                                                        WholeStageCodegen (6)
-                                                          HashAggregate [brand_id,class_id,category_id]
-                                                            Project [i_brand_id,i_class_id,i_category_id]
-                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id]
-                                                                  BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                    Filter [ss_item_sk]
-                                                                      ColumnarToRow
-                                                                        InputAdapter
-                                                                          Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
-                                                                            SubqueryBroadcast [d_date_sk] #3
-                                                                              BroadcastExchange #6
-                                                                                WholeStageCodegen (1)
-                                                                                  Project [d_date_sk]
-                                                                                    Filter [d_year,d_date_sk]
-                                                                                      ColumnarToRow
-                                                                                        InputAdapter
-                                                                                          Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                    InputAdapter
-                                                                      BroadcastExchange #7
-                                                                        WholeStageCodegen (4)
-                                                                          BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
-                                                                            Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                              ColumnarToRow
-                                                                                InputAdapter
-                                                                                  Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                            InputAdapter
-                                                                              BroadcastExchange #8
-                                                                                WholeStageCodegen (3)
-                                                                                  Project [i_brand_id,i_class_id,i_category_id]
-                                                                                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                      Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                        BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                          Filter [cs_item_sk]
-                                                                                            ColumnarToRow
-                                                                                              InputAdapter
-                                                                                                Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
-                                                                                                  ReusedSubquery [d_date_sk] #3
-                                                                                          InputAdapter
-                                                                                            BroadcastExchange #9
-                                                                                              WholeStageCodegen (1)
-                                                                                                Filter [i_item_sk]
-                                                                                                  ColumnarToRow
-                                                                                                    InputAdapter
-                                                                                                      Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                      InputAdapter
-                                                                                        ReusedExchange [d_date_sk] #6
-                                                                InputAdapter
-                                                                  ReusedExchange [d_date_sk] #6
-                                                  InputAdapter
-                                                    BroadcastExchange #10
-                                                      WholeStageCodegen (9)
+                                                InputAdapter
+                                                  Exchange [brand_id,class_id,category_id] #5
+                                                    WholeStageCodegen (6)
+                                                      HashAggregate [brand_id,class_id,category_id]
                                                         Project [i_brand_id,i_class_id,i_category_id]
-                                                          BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                            Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id]
-                                                              BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                Filter [ws_item_sk]
+                                                          BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                            Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id]
+                                                              BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                Filter [ss_item_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
-                                                                        ReusedSubquery [d_date_sk] #3
+                                                                      Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                        SubqueryBroadcast [d_date_sk] #3
+                                                                          BroadcastExchange #6
+                                                                            WholeStageCodegen (1)
+                                                                              Project [d_date_sk]
+                                                                                Filter [d_year,d_date_sk]
+                                                                                  ColumnarToRow
+                                                                                    InputAdapter
+                                                                                      Scan parquet default.date_dim [d_date_sk,d_year]
                                                                 InputAdapter
-                                                                  ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #9
+                                                                  BroadcastExchange #7
+                                                                    WholeStageCodegen (4)
+                                                                      BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
+                                                                        Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                        InputAdapter
+                                                                          BroadcastExchange #8
+                                                                            WholeStageCodegen (3)
+                                                                              Project [i_brand_id,i_class_id,i_category_id]
+                                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                                  Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                                      Filter [cs_item_sk]
+                                                                                        ColumnarToRow
+                                                                                          InputAdapter
+                                                                                            Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
+                                                                                              ReusedSubquery [d_date_sk] #3
+                                                                                      InputAdapter
+                                                                                        BroadcastExchange #9
+                                                                                          WholeStageCodegen (1)
+                                                                                            Filter [i_item_sk]
+                                                                                              ColumnarToRow
+                                                                                                InputAdapter
+                                                                                                  Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                  InputAdapter
+                                                                                    ReusedExchange [d_date_sk] #6
                                                             InputAdapter
                                                               ReusedExchange [d_date_sk] #6
+                                              InputAdapter
+                                                BroadcastExchange #10
+                                                  WholeStageCodegen (9)
+                                                    Project [i_brand_id,i_class_id,i_category_id]
+                                                      BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                        Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id]
+                                                          BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                            Filter [ws_item_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
+                                                                    ReusedSubquery [d_date_sk] #3
+                                                            InputAdapter
+                                                              ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #9
+                                                        InputAdapter
+                                                          ReusedExchange [d_date_sk] #6
                           InputAdapter
                             BroadcastExchange #11
                               WholeStageCodegen (23)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt
index e3ad267942560..88d71316966c6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt
@@ -1,150 +1,147 @@
 == Physical Plan ==
-TakeOrderedAndProject (146)
-+- * HashAggregate (145)
-   +- Exchange (144)
-      +- * HashAggregate (143)
-         +- Union (142)
-            :- * HashAggregate (121)
-            :  +- Exchange (120)
-            :     +- * HashAggregate (119)
-            :        +- Union (118)
-            :           :- * Filter (81)
-            :           :  +- * HashAggregate (80)
-            :           :     +- Exchange (79)
-            :           :        +- * HashAggregate (78)
-            :           :           +- * Project (77)
-            :           :              +- * BroadcastHashJoin Inner BuildRight (76)
-            :           :                 :- * Project (66)
-            :           :                 :  +- * BroadcastHashJoin Inner BuildRight (65)
-            :           :                 :     :- * SortMergeJoin LeftSemi (63)
+TakeOrderedAndProject (143)
++- * HashAggregate (142)
+   +- Exchange (141)
+      +- * HashAggregate (140)
+         +- Union (139)
+            :- * HashAggregate (118)
+            :  +- Exchange (117)
+            :     +- * HashAggregate (116)
+            :        +- Union (115)
+            :           :- * Filter (78)
+            :           :  +- * HashAggregate (77)
+            :           :     +- Exchange (76)
+            :           :        +- * HashAggregate (75)
+            :           :           +- * Project (74)
+            :           :              +- * BroadcastHashJoin Inner BuildRight (73)
+            :           :                 :- * Project (63)
+            :           :                 :  +- * BroadcastHashJoin Inner BuildRight (62)
+            :           :                 :     :- * SortMergeJoin LeftSemi (60)
             :           :                 :     :  :- * Sort (5)
             :           :                 :     :  :  +- Exchange (4)
             :           :                 :     :  :     +- * Filter (3)
             :           :                 :     :  :        +- * ColumnarToRow (2)
             :           :                 :     :  :           +- Scan parquet default.store_sales (1)
-            :           :                 :     :  +- * Sort (62)
-            :           :                 :     :     +- Exchange (61)
-            :           :                 :     :        +- * Project (60)
-            :           :                 :     :           +- * BroadcastHashJoin Inner BuildRight (59)
+            :           :                 :     :  +- * Sort (59)
+            :           :                 :     :     +- Exchange (58)
+            :           :                 :     :        +- * Project (57)
+            :           :                 :     :           +- * BroadcastHashJoin Inner BuildRight (56)
             :           :                 :     :              :- * Filter (8)
             :           :                 :     :              :  +- * ColumnarToRow (7)
             :           :                 :     :              :     +- Scan parquet default.item (6)
-            :           :                 :     :              +- BroadcastExchange (58)
-            :           :                 :     :                 +- * HashAggregate (57)
-            :           :                 :     :                    +- Exchange (56)
-            :           :                 :     :                       +- * HashAggregate (55)
-            :           :                 :     :                          +- * SortMergeJoin LeftSemi (54)
-            :           :                 :     :                             :- * Sort (42)
-            :           :                 :     :                             :  +- Exchange (41)
-            :           :                 :     :                             :     +- * HashAggregate (40)
-            :           :                 :     :                             :        +- Exchange (39)
-            :           :                 :     :                             :           +- * HashAggregate (38)
-            :           :                 :     :                             :              +- * Project (37)
-            :           :                 :     :                             :                 +- * BroadcastHashJoin Inner BuildRight (36)
-            :           :                 :     :                             :                    :- * Project (14)
-            :           :                 :     :                             :                    :  +- * BroadcastHashJoin Inner BuildRight (13)
-            :           :                 :     :                             :                    :     :- * Filter (11)
-            :           :                 :     :                             :                    :     :  +- * ColumnarToRow (10)
-            :           :                 :     :                             :                    :     :     +- Scan parquet default.store_sales (9)
-            :           :                 :     :                             :                    :     +- ReusedExchange (12)
-            :           :                 :     :                             :                    +- BroadcastExchange (35)
-            :           :                 :     :                             :                       +- * SortMergeJoin LeftSemi (34)
-            :           :                 :     :                             :                          :- * Sort (19)
-            :           :                 :     :                             :                          :  +- Exchange (18)
-            :           :                 :     :                             :                          :     +- * Filter (17)
-            :           :                 :     :                             :                          :        +- * ColumnarToRow (16)
-            :           :                 :     :                             :                          :           +- Scan parquet default.item (15)
-            :           :                 :     :                             :                          +- * Sort (33)
-            :           :                 :     :                             :                             +- Exchange (32)
-            :           :                 :     :                             :                                +- * Project (31)
-            :           :                 :     :                             :                                   +- * BroadcastHashJoin Inner BuildRight (30)
-            :           :                 :     :                             :                                      :- * Project (25)
-            :           :                 :     :                             :                                      :  +- * BroadcastHashJoin Inner BuildRight (24)
-            :           :                 :     :                             :                                      :     :- * Filter (22)
-            :           :                 :     :                             :                                      :     :  +- * ColumnarToRow (21)
-            :           :                 :     :                             :                                      :     :     +- Scan parquet default.catalog_sales (20)
-            :           :                 :     :                             :                                      :     +- ReusedExchange (23)
-            :           :                 :     :                             :                                      +- BroadcastExchange (29)
-            :           :                 :     :                             :                                         +- * Filter (28)
-            :           :                 :     :                             :                                            +- * ColumnarToRow (27)
-            :           :                 :     :                             :                                               +- Scan parquet default.item (26)
-            :           :                 :     :                             +- * Sort (53)
-            :           :                 :     :                                +- Exchange (52)
-            :           :                 :     :                                   +- * Project (51)
-            :           :                 :     :                                      +- * BroadcastHashJoin Inner BuildRight (50)
-            :           :                 :     :                                         :- * Project (48)
-            :           :                 :     :                                         :  +- * BroadcastHashJoin Inner BuildRight (47)
-            :           :                 :     :                                         :     :- * Filter (45)
-            :           :                 :     :                                         :     :  +- * ColumnarToRow (44)
-            :           :                 :     :                                         :     :     +- Scan parquet default.web_sales (43)
-            :           :                 :     :                                         :     +- ReusedExchange (46)
-            :           :                 :     :                                         +- ReusedExchange (49)
-            :           :                 :     +- ReusedExchange (64)
-            :           :                 +- BroadcastExchange (75)
-            :           :                    +- * SortMergeJoin LeftSemi (74)
-            :           :                       :- * Sort (71)
-            :           :                       :  +- Exchange (70)
-            :           :                       :     +- * Filter (69)
-            :           :                       :        +- * ColumnarToRow (68)
-            :           :                       :           +- Scan parquet default.item (67)
-            :           :                       +- * Sort (73)
-            :           :                          +- ReusedExchange (72)
-            :           :- * Filter (99)
-            :           :  +- * HashAggregate (98)
-            :           :     +- Exchange (97)
-            :           :        +- * HashAggregate (96)
-            :           :           +- * Project (95)
-            :           :              +- * BroadcastHashJoin Inner BuildRight (94)
-            :           :                 :- * Project (92)
-            :           :                 :  +- * BroadcastHashJoin Inner BuildRight (91)
-            :           :                 :     :- * SortMergeJoin LeftSemi (89)
-            :           :                 :     :  :- * Sort (86)
-            :           :                 :     :  :  +- Exchange (85)
-            :           :                 :     :  :     +- * Filter (84)
-            :           :                 :     :  :        +- * ColumnarToRow (83)
-            :           :                 :     :  :           +- Scan parquet default.catalog_sales (82)
-            :           :                 :     :  +- * Sort (88)
-            :           :                 :     :     +- ReusedExchange (87)
-            :           :                 :     +- ReusedExchange (90)
-            :           :                 +- ReusedExchange (93)
-            :           +- * Filter (117)
-            :              +- * HashAggregate (116)
-            :                 +- Exchange (115)
-            :                    +- * HashAggregate (114)
-            :                       +- * Project (113)
-            :                          +- * BroadcastHashJoin Inner BuildRight (112)
-            :                             :- * Project (110)
-            :                             :  +- * BroadcastHashJoin Inner BuildRight (109)
-            :                             :     :- * SortMergeJoin LeftSemi (107)
-            :                             :     :  :- * Sort (104)
-            :                             :     :  :  +- Exchange (103)
-            :                             :     :  :     +- * Filter (102)
-            :                             :     :  :        +- * ColumnarToRow (101)
-            :                             :     :  :           +- Scan parquet default.web_sales (100)
-            :                             :     :  +- * Sort (106)
-            :                             :     :     +- ReusedExchange (105)
-            :                             :     +- ReusedExchange (108)
-            :                             +- ReusedExchange (111)
-            :- * HashAggregate (126)
-            :  +- Exchange (125)
-            :     +- * HashAggregate (124)
-            :        +- * HashAggregate (123)
-            :           +- ReusedExchange (122)
-            :- * HashAggregate (131)
-            :  +- Exchange (130)
-            :     +- * HashAggregate (129)
-            :        +- * HashAggregate (128)
-            :           +- ReusedExchange (127)
-            :- * HashAggregate (136)
-            :  +- Exchange (135)
-            :     +- * HashAggregate (134)
-            :        +- * HashAggregate (133)
-            :           +- ReusedExchange (132)
-            +- * HashAggregate (141)
-               +- Exchange (140)
-                  +- * HashAggregate (139)
-                     +- * HashAggregate (138)
-                        +- ReusedExchange (137)
+            :           :                 :     :              +- BroadcastExchange (55)
+            :           :                 :     :                 +- * SortMergeJoin LeftSemi (54)
+            :           :                 :     :                    :- * Sort (42)
+            :           :                 :     :                    :  +- Exchange (41)
+            :           :                 :     :                    :     +- * HashAggregate (40)
+            :           :                 :     :                    :        +- Exchange (39)
+            :           :                 :     :                    :           +- * HashAggregate (38)
+            :           :                 :     :                    :              +- * Project (37)
+            :           :                 :     :                    :                 +- * BroadcastHashJoin Inner BuildRight (36)
+            :           :                 :     :                    :                    :- * Project (14)
+            :           :                 :     :                    :                    :  +- * BroadcastHashJoin Inner BuildRight (13)
+            :           :                 :     :                    :                    :     :- * Filter (11)
+            :           :                 :     :                    :                    :     :  +- * ColumnarToRow (10)
+            :           :                 :     :                    :                    :     :     +- Scan parquet default.store_sales (9)
+            :           :                 :     :                    :                    :     +- ReusedExchange (12)
+            :           :                 :     :                    :                    +- BroadcastExchange (35)
+            :           :                 :     :                    :                       +- * SortMergeJoin LeftSemi (34)
+            :           :                 :     :                    :                          :- * Sort (19)
+            :           :                 :     :                    :                          :  +- Exchange (18)
+            :           :                 :     :                    :                          :     +- * Filter (17)
+            :           :                 :     :                    :                          :        +- * ColumnarToRow (16)
+            :           :                 :     :                    :                          :           +- Scan parquet default.item (15)
+            :           :                 :     :                    :                          +- * Sort (33)
+            :           :                 :     :                    :                             +- Exchange (32)
+            :           :                 :     :                    :                                +- * Project (31)
+            :           :                 :     :                    :                                   +- * BroadcastHashJoin Inner BuildRight (30)
+            :           :                 :     :                    :                                      :- * Project (25)
+            :           :                 :     :                    :                                      :  +- * BroadcastHashJoin Inner BuildRight (24)
+            :           :                 :     :                    :                                      :     :- * Filter (22)
+            :           :                 :     :                    :                                      :     :  +- * ColumnarToRow (21)
+            :           :                 :     :                    :                                      :     :     +- Scan parquet default.catalog_sales (20)
+            :           :                 :     :                    :                                      :     +- ReusedExchange (23)
+            :           :                 :     :                    :                                      +- BroadcastExchange (29)
+            :           :                 :     :                    :                                         +- * Filter (28)
+            :           :                 :     :                    :                                            +- * ColumnarToRow (27)
+            :           :                 :     :                    :                                               +- Scan parquet default.item (26)
+            :           :                 :     :                    +- * Sort (53)
+            :           :                 :     :                       +- Exchange (52)
+            :           :                 :     :                          +- * Project (51)
+            :           :                 :     :                             +- * BroadcastHashJoin Inner BuildRight (50)
+            :           :                 :     :                                :- * Project (48)
+            :           :                 :     :                                :  +- * BroadcastHashJoin Inner BuildRight (47)
+            :           :                 :     :                                :     :- * Filter (45)
+            :           :                 :     :                                :     :  +- * ColumnarToRow (44)
+            :           :                 :     :                                :     :     +- Scan parquet default.web_sales (43)
+            :           :                 :     :                                :     +- ReusedExchange (46)
+            :           :                 :     :                                +- ReusedExchange (49)
+            :           :                 :     +- ReusedExchange (61)
+            :           :                 +- BroadcastExchange (72)
+            :           :                    +- * SortMergeJoin LeftSemi (71)
+            :           :                       :- * Sort (68)
+            :           :                       :  +- Exchange (67)
+            :           :                       :     +- * Filter (66)
+            :           :                       :        +- * ColumnarToRow (65)
+            :           :                       :           +- Scan parquet default.item (64)
+            :           :                       +- * Sort (70)
+            :           :                          +- ReusedExchange (69)
+            :           :- * Filter (96)
+            :           :  +- * HashAggregate (95)
+            :           :     +- Exchange (94)
+            :           :        +- * HashAggregate (93)
+            :           :           +- * Project (92)
+            :           :              +- * BroadcastHashJoin Inner BuildRight (91)
+            :           :                 :- * Project (89)
+            :           :                 :  +- * BroadcastHashJoin Inner BuildRight (88)
+            :           :                 :     :- * SortMergeJoin LeftSemi (86)
+            :           :                 :     :  :- * Sort (83)
+            :           :                 :     :  :  +- Exchange (82)
+            :           :                 :     :  :     +- * Filter (81)
+            :           :                 :     :  :        +- * ColumnarToRow (80)
+            :           :                 :     :  :           +- Scan parquet default.catalog_sales (79)
+            :           :                 :     :  +- * Sort (85)
+            :           :                 :     :     +- ReusedExchange (84)
+            :           :                 :     +- ReusedExchange (87)
+            :           :                 +- ReusedExchange (90)
+            :           +- * Filter (114)
+            :              +- * HashAggregate (113)
+            :                 +- Exchange (112)
+            :                    +- * HashAggregate (111)
+            :                       +- * Project (110)
+            :                          +- * BroadcastHashJoin Inner BuildRight (109)
+            :                             :- * Project (107)
+            :                             :  +- * BroadcastHashJoin Inner BuildRight (106)
+            :                             :     :- * SortMergeJoin LeftSemi (104)
+            :                             :     :  :- * Sort (101)
+            :                             :     :  :  +- Exchange (100)
+            :                             :     :  :     +- * Filter (99)
+            :                             :     :  :        +- * ColumnarToRow (98)
+            :                             :     :  :           +- Scan parquet default.web_sales (97)
+            :                             :     :  +- * Sort (103)
+            :                             :     :     +- ReusedExchange (102)
+            :                             :     +- ReusedExchange (105)
+            :                             +- ReusedExchange (108)
+            :- * HashAggregate (123)
+            :  +- Exchange (122)
+            :     +- * HashAggregate (121)
+            :        +- * HashAggregate (120)
+            :           +- ReusedExchange (119)
+            :- * HashAggregate (128)
+            :  +- Exchange (127)
+            :     +- * HashAggregate (126)
+            :        +- * HashAggregate (125)
+            :           +- ReusedExchange (124)
+            :- * HashAggregate (133)
+            :  +- Exchange (132)
+            :     +- * HashAggregate (131)
+            :        +- * HashAggregate (130)
+            :           +- ReusedExchange (129)
+            +- * HashAggregate (138)
+               +- Exchange (137)
+                  +- * HashAggregate (136)
+                     +- * HashAggregate (135)
+                        +- ReusedExchange (134)
 
 
 (1) Scan parquet default.store_sales
@@ -177,10 +174,10 @@ Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(7) ColumnarToRow [codegen id : 20]
+(7) ColumnarToRow [codegen id : 19]
 Input [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10]
 
-(8) Filter [codegen id : 20]
+(8) Filter [codegen id : 19]
 Input [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10]
 Condition : ((isnotnull(i_brand_id#8) AND isnotnull(i_class_id#9)) AND isnotnull(i_category_id#10))
 
@@ -199,7 +196,7 @@ Input [2]: [ss_item_sk#11, ss_sold_date_sk#12]
 Input [2]: [ss_item_sk#11, ss_sold_date_sk#12]
 Condition : isnotnull(ss_item_sk#11)
 
-(12) ReusedExchange [Reuses operator id: 180]
+(12) ReusedExchange [Reuses operator id: 177]
 Output [1]: [d_date_sk#14]
 
 (13) BroadcastHashJoin [codegen id : 11]
@@ -248,7 +245,7 @@ Input [2]: [cs_item_sk#20, cs_sold_date_sk#21]
 Input [2]: [cs_item_sk#20, cs_sold_date_sk#21]
 Condition : isnotnull(cs_item_sk#20)
 
-(23) ReusedExchange [Reuses operator id: 180]
+(23) ReusedExchange [Reuses operator id: 177]
 Output [1]: [d_date_sk#22]
 
 (24) BroadcastHashJoin [codegen id : 8]
@@ -354,7 +351,7 @@ Input [2]: [ws_item_sk#35, ws_sold_date_sk#36]
 Input [2]: [ws_item_sk#35, ws_sold_date_sk#36]
 Condition : isnotnull(ws_item_sk#35)
 
-(46) ReusedExchange [Reuses operator id: 180]
+(46) ReusedExchange [Reuses operator id: 177]
 Output [1]: [d_date_sk#37]
 
 (47) BroadcastHashJoin [codegen id : 16]
@@ -391,663 +388,645 @@ Left keys [6]: [coalesce(brand_id#30, 0), isnull(brand_id#30), coalesce(class_id
 Right keys [6]: [coalesce(i_brand_id#39, 0), isnull(i_brand_id#39), coalesce(i_class_id#40, 0), isnull(i_class_id#40), coalesce(i_category_id#41, 0), isnull(i_category_id#41)]
 Join condition: None
 
-(55) HashAggregate [codegen id : 18]
+(55) BroadcastExchange
 Input [3]: [brand_id#30, class_id#31, category_id#32]
-Keys [3]: [brand_id#30, class_id#31, category_id#32]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [brand_id#30, class_id#31, category_id#32]
-
-(56) Exchange
-Input [3]: [brand_id#30, class_id#31, category_id#32]
-Arguments: hashpartitioning(brand_id#30, class_id#31, category_id#32, 5), ENSURE_REQUIREMENTS, [id=#43]
-
-(57) HashAggregate [codegen id : 19]
-Input [3]: [brand_id#30, class_id#31, category_id#32]
-Keys [3]: [brand_id#30, class_id#31, category_id#32]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [brand_id#30, class_id#31, category_id#32]
-
-(58) BroadcastExchange
-Input [3]: [brand_id#30, class_id#31, category_id#32]
-Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#44]
+Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#43]
 
-(59) BroadcastHashJoin [codegen id : 20]
+(56) BroadcastHashJoin [codegen id : 19]
 Left keys [3]: [i_brand_id#8, i_class_id#9, i_category_id#10]
 Right keys [3]: [brand_id#30, class_id#31, category_id#32]
 Join condition: None
 
-(60) Project [codegen id : 20]
-Output [1]: [i_item_sk#7 AS ss_item_sk#45]
+(57) Project [codegen id : 19]
+Output [1]: [i_item_sk#7 AS ss_item_sk#44]
 Input [7]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, brand_id#30, class_id#31, category_id#32]
 
-(61) Exchange
-Input [1]: [ss_item_sk#45]
-Arguments: hashpartitioning(ss_item_sk#45, 5), ENSURE_REQUIREMENTS, [id=#46]
+(58) Exchange
+Input [1]: [ss_item_sk#44]
+Arguments: hashpartitioning(ss_item_sk#44, 5), ENSURE_REQUIREMENTS, [id=#45]
 
-(62) Sort [codegen id : 21]
-Input [1]: [ss_item_sk#45]
-Arguments: [ss_item_sk#45 ASC NULLS FIRST], false, 0
+(59) Sort [codegen id : 20]
+Input [1]: [ss_item_sk#44]
+Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0
 
-(63) SortMergeJoin [codegen id : 45]
+(60) SortMergeJoin [codegen id : 43]
 Left keys [1]: [ss_item_sk#1]
-Right keys [1]: [ss_item_sk#45]
+Right keys [1]: [ss_item_sk#44]
 Join condition: None
 
-(64) ReusedExchange [Reuses operator id: 175]
-Output [1]: [d_date_sk#47]
+(61) ReusedExchange [Reuses operator id: 172]
+Output [1]: [d_date_sk#46]
 
-(65) BroadcastHashJoin [codegen id : 45]
+(62) BroadcastHashJoin [codegen id : 43]
 Left keys [1]: [ss_sold_date_sk#4]
-Right keys [1]: [d_date_sk#47]
+Right keys [1]: [d_date_sk#46]
 Join condition: None
 
-(66) Project [codegen id : 45]
+(63) Project [codegen id : 43]
 Output [3]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3]
-Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#47]
+Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#46]
 
-(67) Scan parquet default.item
-Output [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
+(64) Scan parquet default.item
+Output [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(68) ColumnarToRow [codegen id : 23]
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
+(65) ColumnarToRow [codegen id : 22]
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
 
-(69) Filter [codegen id : 23]
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
-Condition : isnotnull(i_item_sk#48)
+(66) Filter [codegen id : 22]
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
+Condition : isnotnull(i_item_sk#47)
 
-(70) Exchange
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
-Arguments: hashpartitioning(i_item_sk#48, 5), ENSURE_REQUIREMENTS, [id=#52]
+(67) Exchange
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
+Arguments: hashpartitioning(i_item_sk#47, 5), ENSURE_REQUIREMENTS, [id=#51]
 
-(71) Sort [codegen id : 24]
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
-Arguments: [i_item_sk#48 ASC NULLS FIRST], false, 0
+(68) Sort [codegen id : 23]
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
+Arguments: [i_item_sk#47 ASC NULLS FIRST], false, 0
 
-(72) ReusedExchange [Reuses operator id: 61]
-Output [1]: [ss_item_sk#45]
+(69) ReusedExchange [Reuses operator id: 58]
+Output [1]: [ss_item_sk#44]
 
-(73) Sort [codegen id : 43]
-Input [1]: [ss_item_sk#45]
-Arguments: [ss_item_sk#45 ASC NULLS FIRST], false, 0
+(70) Sort [codegen id : 41]
+Input [1]: [ss_item_sk#44]
+Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0
 
-(74) SortMergeJoin [codegen id : 44]
-Left keys [1]: [i_item_sk#48]
-Right keys [1]: [ss_item_sk#45]
+(71) SortMergeJoin [codegen id : 42]
+Left keys [1]: [i_item_sk#47]
+Right keys [1]: [ss_item_sk#44]
 Join condition: None
 
-(75) BroadcastExchange
-Input [4]: [i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#53]
+(72) BroadcastExchange
+Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#52]
 
-(76) BroadcastHashJoin [codegen id : 45]
+(73) BroadcastHashJoin [codegen id : 43]
 Left keys [1]: [ss_item_sk#1]
-Right keys [1]: [i_item_sk#48]
+Right keys [1]: [i_item_sk#47]
 Join condition: None
 
-(77) Project [codegen id : 45]
-Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#49, i_class_id#50, i_category_id#51]
-Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#48, i_brand_id#49, i_class_id#50, i_category_id#51]
+(74) Project [codegen id : 43]
+Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#48, i_class_id#49, i_category_id#50]
+Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50]
 
-(78) HashAggregate [codegen id : 45]
-Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#49, i_class_id#50, i_category_id#51]
-Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
+(75) HashAggregate [codegen id : 43]
+Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#48, i_class_id#49, i_category_id#50]
+Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50]
 Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
-Aggregate Attributes [3]: [sum#54, isEmpty#55, count#56]
-Results [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
+Aggregate Attributes [3]: [sum#53, isEmpty#54, count#55]
+Results [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58]
 
-(79) Exchange
-Input [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
-Arguments: hashpartitioning(i_brand_id#49, i_class_id#50, i_category_id#51, 5), ENSURE_REQUIREMENTS, [id=#60]
+(76) Exchange
+Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58]
+Arguments: hashpartitioning(i_brand_id#48, i_class_id#49, i_category_id#50, 5), ENSURE_REQUIREMENTS, [id=#59]
 
-(80) HashAggregate [codegen id : 46]
-Input [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#57, isEmpty#58, count#59]
-Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51]
+(77) HashAggregate [codegen id : 44]
+Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58]
+Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#61, count(1)#62]
-Results [6]: [store AS channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#61 AS sales#64, count(1)#62 AS number_sales#65]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#60, count(1)#61]
+Results [6]: [store AS channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#60 AS sales#63, count(1)#61 AS number_sales#64]
 
-(81) Filter [codegen id : 46]
-Input [6]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sales#64, number_sales#65]
-Condition : (isnotnull(sales#64) AND (cast(sales#64 as decimal(32,6)) > cast(Subquery scalar-subquery#66, [id=#67] as decimal(32,6))))
+(78) Filter [codegen id : 44]
+Input [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64]
+Condition : (isnotnull(sales#63) AND (cast(sales#63 as decimal(32,6)) > cast(Subquery scalar-subquery#65, [id=#66] as decimal(32,6))))
 
-(82) Scan parquet default.catalog_sales
-Output [4]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70, cs_sold_date_sk#71]
+(79) Scan parquet default.catalog_sales
+Output [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#71), dynamicpruningexpression(cs_sold_date_sk#71 IN dynamicpruning#5)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#70), dynamicpruningexpression(cs_sold_date_sk#70 IN dynamicpruning#5)]
 PushedFilters: [IsNotNull(cs_item_sk)]
 ReadSchema: struct<cs_item_sk:int,cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(83) ColumnarToRow [codegen id : 47]
-Input [4]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70, cs_sold_date_sk#71]
+(80) ColumnarToRow [codegen id : 45]
+Input [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70]
 
-(84) Filter [codegen id : 47]
-Input [4]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70, cs_sold_date_sk#71]
-Condition : isnotnull(cs_item_sk#68)
+(81) Filter [codegen id : 45]
+Input [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70]
+Condition : isnotnull(cs_item_sk#67)
 
-(85) Exchange
-Input [4]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70, cs_sold_date_sk#71]
-Arguments: hashpartitioning(cs_item_sk#68, 5), ENSURE_REQUIREMENTS, [id=#72]
+(82) Exchange
+Input [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70]
+Arguments: hashpartitioning(cs_item_sk#67, 5), ENSURE_REQUIREMENTS, [id=#71]
 
-(86) Sort [codegen id : 48]
-Input [4]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70, cs_sold_date_sk#71]
-Arguments: [cs_item_sk#68 ASC NULLS FIRST], false, 0
+(83) Sort [codegen id : 46]
+Input [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70]
+Arguments: [cs_item_sk#67 ASC NULLS FIRST], false, 0
 
-(87) ReusedExchange [Reuses operator id: 61]
-Output [1]: [ss_item_sk#45]
+(84) ReusedExchange [Reuses operator id: 58]
+Output [1]: [ss_item_sk#44]
 
-(88) Sort [codegen id : 67]
-Input [1]: [ss_item_sk#45]
-Arguments: [ss_item_sk#45 ASC NULLS FIRST], false, 0
+(85) Sort [codegen id : 64]
+Input [1]: [ss_item_sk#44]
+Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0
 
-(89) SortMergeJoin [codegen id : 91]
-Left keys [1]: [cs_item_sk#68]
-Right keys [1]: [ss_item_sk#45]
+(86) SortMergeJoin [codegen id : 87]
+Left keys [1]: [cs_item_sk#67]
+Right keys [1]: [ss_item_sk#44]
 Join condition: None
 
-(90) ReusedExchange [Reuses operator id: 175]
-Output [1]: [d_date_sk#73]
+(87) ReusedExchange [Reuses operator id: 172]
+Output [1]: [d_date_sk#72]
 
-(91) BroadcastHashJoin [codegen id : 91]
-Left keys [1]: [cs_sold_date_sk#71]
-Right keys [1]: [d_date_sk#73]
+(88) BroadcastHashJoin [codegen id : 87]
+Left keys [1]: [cs_sold_date_sk#70]
+Right keys [1]: [d_date_sk#72]
 Join condition: None
 
-(92) Project [codegen id : 91]
-Output [3]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70]
-Input [5]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70, cs_sold_date_sk#71, d_date_sk#73]
+(89) Project [codegen id : 87]
+Output [3]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69]
+Input [5]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70, d_date_sk#72]
 
-(93) ReusedExchange [Reuses operator id: 75]
-Output [4]: [i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77]
+(90) ReusedExchange [Reuses operator id: 72]
+Output [4]: [i_item_sk#73, i_brand_id#74, i_class_id#75, i_category_id#76]
 
-(94) BroadcastHashJoin [codegen id : 91]
-Left keys [1]: [cs_item_sk#68]
-Right keys [1]: [i_item_sk#74]
+(91) BroadcastHashJoin [codegen id : 87]
+Left keys [1]: [cs_item_sk#67]
+Right keys [1]: [i_item_sk#73]
 Join condition: None
 
-(95) Project [codegen id : 91]
-Output [5]: [cs_quantity#69, cs_list_price#70, i_brand_id#75, i_class_id#76, i_category_id#77]
-Input [7]: [cs_item_sk#68, cs_quantity#69, cs_list_price#70, i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77]
-
-(96) HashAggregate [codegen id : 91]
-Input [5]: [cs_quantity#69, cs_list_price#70, i_brand_id#75, i_class_id#76, i_category_id#77]
-Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
-Aggregate Attributes [3]: [sum#78, isEmpty#79, count#80]
-Results [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
-
-(97) Exchange
-Input [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
-Arguments: hashpartitioning(i_brand_id#75, i_class_id#76, i_category_id#77, 5), ENSURE_REQUIREMENTS, [id=#84]
-
-(98) HashAggregate [codegen id : 92]
-Input [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83]
-Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2)))#85, count(1)#86]
-Results [6]: [catalog AS channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sum(CheckOverflow((promote_precision(cast(cs_quantity#69 as decimal(12,2))) * promote_precision(cast(cs_list_price#70 as decimal(12,2)))), DecimalType(18,2)))#85 AS sales#88, count(1)#86 AS number_sales#89]
-
-(99) Filter [codegen id : 92]
-Input [6]: [channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sales#88, number_sales#89]
-Condition : (isnotnull(sales#88) AND (cast(sales#88 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#66, [id=#67] as decimal(32,6))))
-
-(100) Scan parquet default.web_sales
-Output [4]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93]
+(92) Project [codegen id : 87]
+Output [5]: [cs_quantity#68, cs_list_price#69, i_brand_id#74, i_class_id#75, i_category_id#76]
+Input [7]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, i_item_sk#73, i_brand_id#74, i_class_id#75, i_category_id#76]
+
+(93) HashAggregate [codegen id : 87]
+Input [5]: [cs_quantity#68, cs_list_price#69, i_brand_id#74, i_class_id#75, i_category_id#76]
+Keys [3]: [i_brand_id#74, i_class_id#75, i_category_id#76]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#68 as decimal(12,2))) * promote_precision(cast(cs_list_price#69 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Aggregate Attributes [3]: [sum#77, isEmpty#78, count#79]
+Results [6]: [i_brand_id#74, i_class_id#75, i_category_id#76, sum#80, isEmpty#81, count#82]
+
+(94) Exchange
+Input [6]: [i_brand_id#74, i_class_id#75, i_category_id#76, sum#80, isEmpty#81, count#82]
+Arguments: hashpartitioning(i_brand_id#74, i_class_id#75, i_category_id#76, 5), ENSURE_REQUIREMENTS, [id=#83]
+
+(95) HashAggregate [codegen id : 88]
+Input [6]: [i_brand_id#74, i_class_id#75, i_category_id#76, sum#80, isEmpty#81, count#82]
+Keys [3]: [i_brand_id#74, i_class_id#75, i_category_id#76]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#68 as decimal(12,2))) * promote_precision(cast(cs_list_price#69 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#68 as decimal(12,2))) * promote_precision(cast(cs_list_price#69 as decimal(12,2)))), DecimalType(18,2)))#84, count(1)#85]
+Results [6]: [catalog AS channel#86, i_brand_id#74, i_class_id#75, i_category_id#76, sum(CheckOverflow((promote_precision(cast(cs_quantity#68 as decimal(12,2))) * promote_precision(cast(cs_list_price#69 as decimal(12,2)))), DecimalType(18,2)))#84 AS sales#87, count(1)#85 AS number_sales#88]
+
+(96) Filter [codegen id : 88]
+Input [6]: [channel#86, i_brand_id#74, i_class_id#75, i_category_id#76, sales#87, number_sales#88]
+Condition : (isnotnull(sales#87) AND (cast(sales#87 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#65, [id=#66] as decimal(32,6))))
+
+(97) Scan parquet default.web_sales
+Output [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#93), dynamicpruningexpression(ws_sold_date_sk#93 IN dynamicpruning#5)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#92), dynamicpruningexpression(ws_sold_date_sk#92 IN dynamicpruning#5)]
 PushedFilters: [IsNotNull(ws_item_sk)]
 ReadSchema: struct<ws_item_sk:int,ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(101) ColumnarToRow [codegen id : 93]
-Input [4]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93]
+(98) ColumnarToRow [codegen id : 89]
+Input [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92]
 
-(102) Filter [codegen id : 93]
-Input [4]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93]
-Condition : isnotnull(ws_item_sk#90)
+(99) Filter [codegen id : 89]
+Input [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92]
+Condition : isnotnull(ws_item_sk#89)
 
-(103) Exchange
-Input [4]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93]
-Arguments: hashpartitioning(ws_item_sk#90, 5), ENSURE_REQUIREMENTS, [id=#94]
+(100) Exchange
+Input [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92]
+Arguments: hashpartitioning(ws_item_sk#89, 5), ENSURE_REQUIREMENTS, [id=#93]
 
-(104) Sort [codegen id : 94]
-Input [4]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93]
-Arguments: [ws_item_sk#90 ASC NULLS FIRST], false, 0
+(101) Sort [codegen id : 90]
+Input [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92]
+Arguments: [ws_item_sk#89 ASC NULLS FIRST], false, 0
 
-(105) ReusedExchange [Reuses operator id: 61]
-Output [1]: [ss_item_sk#45]
+(102) ReusedExchange [Reuses operator id: 58]
+Output [1]: [ss_item_sk#44]
 
-(106) Sort [codegen id : 113]
-Input [1]: [ss_item_sk#45]
-Arguments: [ss_item_sk#45 ASC NULLS FIRST], false, 0
+(103) Sort [codegen id : 108]
+Input [1]: [ss_item_sk#44]
+Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0
 
-(107) SortMergeJoin [codegen id : 137]
-Left keys [1]: [ws_item_sk#90]
-Right keys [1]: [ss_item_sk#45]
+(104) SortMergeJoin [codegen id : 131]
+Left keys [1]: [ws_item_sk#89]
+Right keys [1]: [ss_item_sk#44]
 Join condition: None
 
-(108) ReusedExchange [Reuses operator id: 175]
-Output [1]: [d_date_sk#95]
+(105) ReusedExchange [Reuses operator id: 172]
+Output [1]: [d_date_sk#94]
 
-(109) BroadcastHashJoin [codegen id : 137]
-Left keys [1]: [ws_sold_date_sk#93]
-Right keys [1]: [d_date_sk#95]
+(106) BroadcastHashJoin [codegen id : 131]
+Left keys [1]: [ws_sold_date_sk#92]
+Right keys [1]: [d_date_sk#94]
 Join condition: None
 
-(110) Project [codegen id : 137]
-Output [3]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92]
-Input [5]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93, d_date_sk#95]
+(107) Project [codegen id : 131]
+Output [3]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91]
+Input [5]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92, d_date_sk#94]
 
-(111) ReusedExchange [Reuses operator id: 75]
-Output [4]: [i_item_sk#96, i_brand_id#97, i_class_id#98, i_category_id#99]
+(108) ReusedExchange [Reuses operator id: 72]
+Output [4]: [i_item_sk#95, i_brand_id#96, i_class_id#97, i_category_id#98]
 
-(112) BroadcastHashJoin [codegen id : 137]
-Left keys [1]: [ws_item_sk#90]
-Right keys [1]: [i_item_sk#96]
+(109) BroadcastHashJoin [codegen id : 131]
+Left keys [1]: [ws_item_sk#89]
+Right keys [1]: [i_item_sk#95]
 Join condition: None
 
-(113) Project [codegen id : 137]
-Output [5]: [ws_quantity#91, ws_list_price#92, i_brand_id#97, i_class_id#98, i_category_id#99]
-Input [7]: [ws_item_sk#90, ws_quantity#91, ws_list_price#92, i_item_sk#96, i_brand_id#97, i_class_id#98, i_category_id#99]
-
-(114) HashAggregate [codegen id : 137]
-Input [5]: [ws_quantity#91, ws_list_price#92, i_brand_id#97, i_class_id#98, i_category_id#99]
-Keys [3]: [i_brand_id#97, i_class_id#98, i_category_id#99]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
-Aggregate Attributes [3]: [sum#100, isEmpty#101, count#102]
-Results [6]: [i_brand_id#97, i_class_id#98, i_category_id#99, sum#103, isEmpty#104, count#105]
-
-(115) Exchange
-Input [6]: [i_brand_id#97, i_class_id#98, i_category_id#99, sum#103, isEmpty#104, count#105]
-Arguments: hashpartitioning(i_brand_id#97, i_class_id#98, i_category_id#99, 5), ENSURE_REQUIREMENTS, [id=#106]
-
-(116) HashAggregate [codegen id : 138]
-Input [6]: [i_brand_id#97, i_class_id#98, i_category_id#99, sum#103, isEmpty#104, count#105]
-Keys [3]: [i_brand_id#97, i_class_id#98, i_category_id#99]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2)))#107, count(1)#108]
-Results [6]: [web AS channel#109, i_brand_id#97, i_class_id#98, i_category_id#99, sum(CheckOverflow((promote_precision(cast(ws_quantity#91 as decimal(12,2))) * promote_precision(cast(ws_list_price#92 as decimal(12,2)))), DecimalType(18,2)))#107 AS sales#110, count(1)#108 AS number_sales#111]
-
-(117) Filter [codegen id : 138]
-Input [6]: [channel#109, i_brand_id#97, i_class_id#98, i_category_id#99, sales#110, number_sales#111]
-Condition : (isnotnull(sales#110) AND (cast(sales#110 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#66, [id=#67] as decimal(32,6))))
-
-(118) Union
-
-(119) HashAggregate [codegen id : 139]
-Input [6]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sales#64, number_sales#65]
-Keys [4]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [partial_sum(sales#64), partial_sum(number_sales#65)]
-Aggregate Attributes [3]: [sum#112, isEmpty#113, sum#114]
-Results [7]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum#115, isEmpty#116, sum#117]
-
-(120) Exchange
-Input [7]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum#115, isEmpty#116, sum#117]
-Arguments: hashpartitioning(channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, 5), ENSURE_REQUIREMENTS, [id=#118]
-
-(121) HashAggregate [codegen id : 140]
-Input [7]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum#115, isEmpty#116, sum#117]
-Keys [4]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [sum(sales#64), sum(number_sales#65)]
-Aggregate Attributes [2]: [sum(sales#64)#119, sum(number_sales#65)#120]
-Results [6]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum(sales#64)#119 AS sum_sales#121, sum(number_sales#65)#120 AS number_sales#122]
-
-(122) ReusedExchange [Reuses operator id: 120]
-Output [7]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum#115, isEmpty#116, sum#117]
-
-(123) HashAggregate [codegen id : 280]
-Input [7]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum#115, isEmpty#116, sum#117]
-Keys [4]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [sum(sales#64), sum(number_sales#65)]
-Aggregate Attributes [2]: [sum(sales#64)#119, sum(number_sales#65)#120]
-Results [5]: [channel#63, i_brand_id#49, i_class_id#50, sum(sales#64)#119 AS sum_sales#121, sum(number_sales#65)#120 AS number_sales#122]
-
-(124) HashAggregate [codegen id : 280]
-Input [5]: [channel#63, i_brand_id#49, i_class_id#50, sum_sales#121, number_sales#122]
-Keys [3]: [channel#63, i_brand_id#49, i_class_id#50]
-Functions [2]: [partial_sum(sum_sales#121), partial_sum(number_sales#122)]
-Aggregate Attributes [3]: [sum#123, isEmpty#124, sum#125]
-Results [6]: [channel#63, i_brand_id#49, i_class_id#50, sum#126, isEmpty#127, sum#128]
-
-(125) Exchange
-Input [6]: [channel#63, i_brand_id#49, i_class_id#50, sum#126, isEmpty#127, sum#128]
-Arguments: hashpartitioning(channel#63, i_brand_id#49, i_class_id#50, 5), ENSURE_REQUIREMENTS, [id=#129]
-
-(126) HashAggregate [codegen id : 281]
-Input [6]: [channel#63, i_brand_id#49, i_class_id#50, sum#126, isEmpty#127, sum#128]
-Keys [3]: [channel#63, i_brand_id#49, i_class_id#50]
-Functions [2]: [sum(sum_sales#121), sum(number_sales#122)]
-Aggregate Attributes [2]: [sum(sum_sales#121)#130, sum(number_sales#122)#131]
-Results [6]: [channel#63, i_brand_id#49, i_class_id#50, null AS i_category_id#132, sum(sum_sales#121)#130 AS sum(sum_sales)#133, sum(number_sales#122)#131 AS sum(number_sales)#134]
-
-(127) ReusedExchange [Reuses operator id: 120]
-Output [7]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum#115, isEmpty#116, sum#117]
-
-(128) HashAggregate [codegen id : 421]
-Input [7]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum#115, isEmpty#116, sum#117]
-Keys [4]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [sum(sales#64), sum(number_sales#65)]
-Aggregate Attributes [2]: [sum(sales#64)#119, sum(number_sales#65)#120]
-Results [4]: [channel#63, i_brand_id#49, sum(sales#64)#119 AS sum_sales#121, sum(number_sales#65)#120 AS number_sales#122]
-
-(129) HashAggregate [codegen id : 421]
-Input [4]: [channel#63, i_brand_id#49, sum_sales#121, number_sales#122]
-Keys [2]: [channel#63, i_brand_id#49]
-Functions [2]: [partial_sum(sum_sales#121), partial_sum(number_sales#122)]
-Aggregate Attributes [3]: [sum#135, isEmpty#136, sum#137]
-Results [5]: [channel#63, i_brand_id#49, sum#138, isEmpty#139, sum#140]
-
-(130) Exchange
-Input [5]: [channel#63, i_brand_id#49, sum#138, isEmpty#139, sum#140]
-Arguments: hashpartitioning(channel#63, i_brand_id#49, 5), ENSURE_REQUIREMENTS, [id=#141]
-
-(131) HashAggregate [codegen id : 422]
-Input [5]: [channel#63, i_brand_id#49, sum#138, isEmpty#139, sum#140]
-Keys [2]: [channel#63, i_brand_id#49]
-Functions [2]: [sum(sum_sales#121), sum(number_sales#122)]
-Aggregate Attributes [2]: [sum(sum_sales#121)#142, sum(number_sales#122)#143]
-Results [6]: [channel#63, i_brand_id#49, null AS i_class_id#144, null AS i_category_id#145, sum(sum_sales#121)#142 AS sum(sum_sales)#146, sum(number_sales#122)#143 AS sum(number_sales)#147]
-
-(132) ReusedExchange [Reuses operator id: 120]
-Output [7]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum#115, isEmpty#116, sum#117]
-
-(133) HashAggregate [codegen id : 562]
-Input [7]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum#115, isEmpty#116, sum#117]
-Keys [4]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [sum(sales#64), sum(number_sales#65)]
-Aggregate Attributes [2]: [sum(sales#64)#119, sum(number_sales#65)#120]
-Results [3]: [channel#63, sum(sales#64)#119 AS sum_sales#121, sum(number_sales#65)#120 AS number_sales#122]
-
-(134) HashAggregate [codegen id : 562]
-Input [3]: [channel#63, sum_sales#121, number_sales#122]
-Keys [1]: [channel#63]
-Functions [2]: [partial_sum(sum_sales#121), partial_sum(number_sales#122)]
-Aggregate Attributes [3]: [sum#148, isEmpty#149, sum#150]
-Results [4]: [channel#63, sum#151, isEmpty#152, sum#153]
-
-(135) Exchange
-Input [4]: [channel#63, sum#151, isEmpty#152, sum#153]
-Arguments: hashpartitioning(channel#63, 5), ENSURE_REQUIREMENTS, [id=#154]
-
-(136) HashAggregate [codegen id : 563]
-Input [4]: [channel#63, sum#151, isEmpty#152, sum#153]
-Keys [1]: [channel#63]
-Functions [2]: [sum(sum_sales#121), sum(number_sales#122)]
-Aggregate Attributes [2]: [sum(sum_sales#121)#155, sum(number_sales#122)#156]
-Results [6]: [channel#63, null AS i_brand_id#157, null AS i_class_id#158, null AS i_category_id#159, sum(sum_sales#121)#155 AS sum(sum_sales)#160, sum(number_sales#122)#156 AS sum(number_sales)#161]
-
-(137) ReusedExchange [Reuses operator id: 120]
-Output [7]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum#115, isEmpty#116, sum#117]
-
-(138) HashAggregate [codegen id : 703]
-Input [7]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum#115, isEmpty#116, sum#117]
-Keys [4]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51]
-Functions [2]: [sum(sales#64), sum(number_sales#65)]
-Aggregate Attributes [2]: [sum(sales#64)#119, sum(number_sales#65)#120]
-Results [2]: [sum(sales#64)#119 AS sum_sales#121, sum(number_sales#65)#120 AS number_sales#122]
-
-(139) HashAggregate [codegen id : 703]
-Input [2]: [sum_sales#121, number_sales#122]
+(110) Project [codegen id : 131]
+Output [5]: [ws_quantity#90, ws_list_price#91, i_brand_id#96, i_class_id#97, i_category_id#98]
+Input [7]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, i_item_sk#95, i_brand_id#96, i_class_id#97, i_category_id#98]
+
+(111) HashAggregate [codegen id : 131]
+Input [5]: [ws_quantity#90, ws_list_price#91, i_brand_id#96, i_class_id#97, i_category_id#98]
+Keys [3]: [i_brand_id#96, i_class_id#97, i_category_id#98]
+Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#90 as decimal(12,2))) * promote_precision(cast(ws_list_price#91 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Aggregate Attributes [3]: [sum#99, isEmpty#100, count#101]
+Results [6]: [i_brand_id#96, i_class_id#97, i_category_id#98, sum#102, isEmpty#103, count#104]
+
+(112) Exchange
+Input [6]: [i_brand_id#96, i_class_id#97, i_category_id#98, sum#102, isEmpty#103, count#104]
+Arguments: hashpartitioning(i_brand_id#96, i_class_id#97, i_category_id#98, 5), ENSURE_REQUIREMENTS, [id=#105]
+
+(113) HashAggregate [codegen id : 132]
+Input [6]: [i_brand_id#96, i_class_id#97, i_category_id#98, sum#102, isEmpty#103, count#104]
+Keys [3]: [i_brand_id#96, i_class_id#97, i_category_id#98]
+Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#90 as decimal(12,2))) * promote_precision(cast(ws_list_price#91 as decimal(12,2)))), DecimalType(18,2))), count(1)]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#90 as decimal(12,2))) * promote_precision(cast(ws_list_price#91 as decimal(12,2)))), DecimalType(18,2)))#106, count(1)#107]
+Results [6]: [web AS channel#108, i_brand_id#96, i_class_id#97, i_category_id#98, sum(CheckOverflow((promote_precision(cast(ws_quantity#90 as decimal(12,2))) * promote_precision(cast(ws_list_price#91 as decimal(12,2)))), DecimalType(18,2)))#106 AS sales#109, count(1)#107 AS number_sales#110]
+
+(114) Filter [codegen id : 132]
+Input [6]: [channel#108, i_brand_id#96, i_class_id#97, i_category_id#98, sales#109, number_sales#110]
+Condition : (isnotnull(sales#109) AND (cast(sales#109 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#65, [id=#66] as decimal(32,6))))
+
+(115) Union
+
+(116) HashAggregate [codegen id : 133]
+Input [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64]
+Keys [4]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50]
+Functions [2]: [partial_sum(sales#63), partial_sum(number_sales#64)]
+Aggregate Attributes [3]: [sum#111, isEmpty#112, sum#113]
+Results [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116]
+
+(117) Exchange
+Input [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116]
+Arguments: hashpartitioning(channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, 5), ENSURE_REQUIREMENTS, [id=#117]
+
+(118) HashAggregate [codegen id : 134]
+Input [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116]
+Keys [4]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50]
+Functions [2]: [sum(sales#63), sum(number_sales#64)]
+Aggregate Attributes [2]: [sum(sales#63)#118, sum(number_sales#64)#119]
+Results [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum(sales#63)#118 AS sum_sales#120, sum(number_sales#64)#119 AS number_sales#121]
+
+(119) ReusedExchange [Reuses operator id: 117]
+Output [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116]
+
+(120) HashAggregate [codegen id : 268]
+Input [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116]
+Keys [4]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50]
+Functions [2]: [sum(sales#63), sum(number_sales#64)]
+Aggregate Attributes [2]: [sum(sales#63)#118, sum(number_sales#64)#119]
+Results [5]: [channel#62, i_brand_id#48, i_class_id#49, sum(sales#63)#118 AS sum_sales#120, sum(number_sales#64)#119 AS number_sales#121]
+
+(121) HashAggregate [codegen id : 268]
+Input [5]: [channel#62, i_brand_id#48, i_class_id#49, sum_sales#120, number_sales#121]
+Keys [3]: [channel#62, i_brand_id#48, i_class_id#49]
+Functions [2]: [partial_sum(sum_sales#120), partial_sum(number_sales#121)]
+Aggregate Attributes [3]: [sum#122, isEmpty#123, sum#124]
+Results [6]: [channel#62, i_brand_id#48, i_class_id#49, sum#125, isEmpty#126, sum#127]
+
+(122) Exchange
+Input [6]: [channel#62, i_brand_id#48, i_class_id#49, sum#125, isEmpty#126, sum#127]
+Arguments: hashpartitioning(channel#62, i_brand_id#48, i_class_id#49, 5), ENSURE_REQUIREMENTS, [id=#128]
+
+(123) HashAggregate [codegen id : 269]
+Input [6]: [channel#62, i_brand_id#48, i_class_id#49, sum#125, isEmpty#126, sum#127]
+Keys [3]: [channel#62, i_brand_id#48, i_class_id#49]
+Functions [2]: [sum(sum_sales#120), sum(number_sales#121)]
+Aggregate Attributes [2]: [sum(sum_sales#120)#129, sum(number_sales#121)#130]
+Results [6]: [channel#62, i_brand_id#48, i_class_id#49, null AS i_category_id#131, sum(sum_sales#120)#129 AS sum(sum_sales)#132, sum(number_sales#121)#130 AS sum(number_sales)#133]
+
+(124) ReusedExchange [Reuses operator id: 117]
+Output [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116]
+
+(125) HashAggregate [codegen id : 403]
+Input [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116]
+Keys [4]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50]
+Functions [2]: [sum(sales#63), sum(number_sales#64)]
+Aggregate Attributes [2]: [sum(sales#63)#118, sum(number_sales#64)#119]
+Results [4]: [channel#62, i_brand_id#48, sum(sales#63)#118 AS sum_sales#120, sum(number_sales#64)#119 AS number_sales#121]
+
+(126) HashAggregate [codegen id : 403]
+Input [4]: [channel#62, i_brand_id#48, sum_sales#120, number_sales#121]
+Keys [2]: [channel#62, i_brand_id#48]
+Functions [2]: [partial_sum(sum_sales#120), partial_sum(number_sales#121)]
+Aggregate Attributes [3]: [sum#134, isEmpty#135, sum#136]
+Results [5]: [channel#62, i_brand_id#48, sum#137, isEmpty#138, sum#139]
+
+(127) Exchange
+Input [5]: [channel#62, i_brand_id#48, sum#137, isEmpty#138, sum#139]
+Arguments: hashpartitioning(channel#62, i_brand_id#48, 5), ENSURE_REQUIREMENTS, [id=#140]
+
+(128) HashAggregate [codegen id : 404]
+Input [5]: [channel#62, i_brand_id#48, sum#137, isEmpty#138, sum#139]
+Keys [2]: [channel#62, i_brand_id#48]
+Functions [2]: [sum(sum_sales#120), sum(number_sales#121)]
+Aggregate Attributes [2]: [sum(sum_sales#120)#141, sum(number_sales#121)#142]
+Results [6]: [channel#62, i_brand_id#48, null AS i_class_id#143, null AS i_category_id#144, sum(sum_sales#120)#141 AS sum(sum_sales)#145, sum(number_sales#121)#142 AS sum(number_sales)#146]
+
+(129) ReusedExchange [Reuses operator id: 117]
+Output [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116]
+
+(130) HashAggregate [codegen id : 538]
+Input [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116]
+Keys [4]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50]
+Functions [2]: [sum(sales#63), sum(number_sales#64)]
+Aggregate Attributes [2]: [sum(sales#63)#118, sum(number_sales#64)#119]
+Results [3]: [channel#62, sum(sales#63)#118 AS sum_sales#120, sum(number_sales#64)#119 AS number_sales#121]
+
+(131) HashAggregate [codegen id : 538]
+Input [3]: [channel#62, sum_sales#120, number_sales#121]
+Keys [1]: [channel#62]
+Functions [2]: [partial_sum(sum_sales#120), partial_sum(number_sales#121)]
+Aggregate Attributes [3]: [sum#147, isEmpty#148, sum#149]
+Results [4]: [channel#62, sum#150, isEmpty#151, sum#152]
+
+(132) Exchange
+Input [4]: [channel#62, sum#150, isEmpty#151, sum#152]
+Arguments: hashpartitioning(channel#62, 5), ENSURE_REQUIREMENTS, [id=#153]
+
+(133) HashAggregate [codegen id : 539]
+Input [4]: [channel#62, sum#150, isEmpty#151, sum#152]
+Keys [1]: [channel#62]
+Functions [2]: [sum(sum_sales#120), sum(number_sales#121)]
+Aggregate Attributes [2]: [sum(sum_sales#120)#154, sum(number_sales#121)#155]
+Results [6]: [channel#62, null AS i_brand_id#156, null AS i_class_id#157, null AS i_category_id#158, sum(sum_sales#120)#154 AS sum(sum_sales)#159, sum(number_sales#121)#155 AS sum(number_sales)#160]
+
+(134) ReusedExchange [Reuses operator id: 117]
+Output [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116]
+
+(135) HashAggregate [codegen id : 673]
+Input [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116]
+Keys [4]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50]
+Functions [2]: [sum(sales#63), sum(number_sales#64)]
+Aggregate Attributes [2]: [sum(sales#63)#118, sum(number_sales#64)#119]
+Results [2]: [sum(sales#63)#118 AS sum_sales#120, sum(number_sales#64)#119 AS number_sales#121]
+
+(136) HashAggregate [codegen id : 673]
+Input [2]: [sum_sales#120, number_sales#121]
 Keys: []
-Functions [2]: [partial_sum(sum_sales#121), partial_sum(number_sales#122)]
-Aggregate Attributes [3]: [sum#162, isEmpty#163, sum#164]
-Results [3]: [sum#165, isEmpty#166, sum#167]
+Functions [2]: [partial_sum(sum_sales#120), partial_sum(number_sales#121)]
+Aggregate Attributes [3]: [sum#161, isEmpty#162, sum#163]
+Results [3]: [sum#164, isEmpty#165, sum#166]
 
-(140) Exchange
-Input [3]: [sum#165, isEmpty#166, sum#167]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#168]
+(137) Exchange
+Input [3]: [sum#164, isEmpty#165, sum#166]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#167]
 
-(141) HashAggregate [codegen id : 704]
-Input [3]: [sum#165, isEmpty#166, sum#167]
+(138) HashAggregate [codegen id : 674]
+Input [3]: [sum#164, isEmpty#165, sum#166]
 Keys: []
-Functions [2]: [sum(sum_sales#121), sum(number_sales#122)]
-Aggregate Attributes [2]: [sum(sum_sales#121)#169, sum(number_sales#122)#170]
-Results [6]: [null AS channel#171, null AS i_brand_id#172, null AS i_class_id#173, null AS i_category_id#174, sum(sum_sales#121)#169 AS sum(sum_sales)#175, sum(number_sales#122)#170 AS sum(number_sales)#176]
+Functions [2]: [sum(sum_sales#120), sum(number_sales#121)]
+Aggregate Attributes [2]: [sum(sum_sales#120)#168, sum(number_sales#121)#169]
+Results [6]: [null AS channel#170, null AS i_brand_id#171, null AS i_class_id#172, null AS i_category_id#173, sum(sum_sales#120)#168 AS sum(sum_sales)#174, sum(number_sales#121)#169 AS sum(number_sales)#175]
 
-(142) Union
+(139) Union
 
-(143) HashAggregate [codegen id : 705]
-Input [6]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum_sales#121, number_sales#122]
-Keys [6]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum_sales#121, number_sales#122]
+(140) HashAggregate [codegen id : 675]
+Input [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121]
+Keys [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121]
 Functions: []
 Aggregate Attributes: []
-Results [6]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum_sales#121, number_sales#122]
+Results [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121]
 
-(144) Exchange
-Input [6]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum_sales#121, number_sales#122]
-Arguments: hashpartitioning(channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum_sales#121, number_sales#122, 5), ENSURE_REQUIREMENTS, [id=#177]
+(141) Exchange
+Input [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121]
+Arguments: hashpartitioning(channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121, 5), ENSURE_REQUIREMENTS, [id=#176]
 
-(145) HashAggregate [codegen id : 706]
-Input [6]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum_sales#121, number_sales#122]
-Keys [6]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum_sales#121, number_sales#122]
+(142) HashAggregate [codegen id : 676]
+Input [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121]
+Keys [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121]
 Functions: []
 Aggregate Attributes: []
-Results [6]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum_sales#121, number_sales#122]
+Results [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121]
 
-(146) TakeOrderedAndProject
-Input [6]: [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum_sales#121, number_sales#122]
-Arguments: 100, [channel#63 ASC NULLS FIRST, i_brand_id#49 ASC NULLS FIRST, i_class_id#50 ASC NULLS FIRST, i_category_id#51 ASC NULLS FIRST], [channel#63, i_brand_id#49, i_class_id#50, i_category_id#51, sum_sales#121, number_sales#122]
+(143) TakeOrderedAndProject
+Input [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121]
+Arguments: 100, [channel#62 ASC NULLS FIRST, i_brand_id#48 ASC NULLS FIRST, i_class_id#49 ASC NULLS FIRST, i_category_id#50 ASC NULLS FIRST], [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 81 Hosting Expression = Subquery scalar-subquery#66, [id=#67]
-* HashAggregate (165)
-+- Exchange (164)
-   +- * HashAggregate (163)
-      +- Union (162)
-         :- * Project (151)
-         :  +- * BroadcastHashJoin Inner BuildRight (150)
-         :     :- * ColumnarToRow (148)
-         :     :  +- Scan parquet default.store_sales (147)
-         :     +- ReusedExchange (149)
-         :- * Project (156)
-         :  +- * BroadcastHashJoin Inner BuildRight (155)
-         :     :- * ColumnarToRow (153)
-         :     :  +- Scan parquet default.catalog_sales (152)
-         :     +- ReusedExchange (154)
-         +- * Project (161)
-            +- * BroadcastHashJoin Inner BuildRight (160)
-               :- * ColumnarToRow (158)
-               :  +- Scan parquet default.web_sales (157)
-               +- ReusedExchange (159)
-
-
-(147) Scan parquet default.store_sales
-Output [3]: [ss_quantity#178, ss_list_price#179, ss_sold_date_sk#180]
+Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquery#65, [id=#66]
+* HashAggregate (162)
++- Exchange (161)
+   +- * HashAggregate (160)
+      +- Union (159)
+         :- * Project (148)
+         :  +- * BroadcastHashJoin Inner BuildRight (147)
+         :     :- * ColumnarToRow (145)
+         :     :  +- Scan parquet default.store_sales (144)
+         :     +- ReusedExchange (146)
+         :- * Project (153)
+         :  +- * BroadcastHashJoin Inner BuildRight (152)
+         :     :- * ColumnarToRow (150)
+         :     :  +- Scan parquet default.catalog_sales (149)
+         :     +- ReusedExchange (151)
+         +- * Project (158)
+            +- * BroadcastHashJoin Inner BuildRight (157)
+               :- * ColumnarToRow (155)
+               :  +- Scan parquet default.web_sales (154)
+               +- ReusedExchange (156)
+
+
+(144) Scan parquet default.store_sales
+Output [3]: [ss_quantity#177, ss_list_price#178, ss_sold_date_sk#179]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#180), dynamicpruningexpression(ss_sold_date_sk#180 IN dynamicpruning#13)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#179), dynamicpruningexpression(ss_sold_date_sk#179 IN dynamicpruning#13)]
 ReadSchema: struct<ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(148) ColumnarToRow [codegen id : 2]
-Input [3]: [ss_quantity#178, ss_list_price#179, ss_sold_date_sk#180]
+(145) ColumnarToRow [codegen id : 2]
+Input [3]: [ss_quantity#177, ss_list_price#178, ss_sold_date_sk#179]
 
-(149) ReusedExchange [Reuses operator id: 180]
-Output [1]: [d_date_sk#181]
+(146) ReusedExchange [Reuses operator id: 177]
+Output [1]: [d_date_sk#180]
 
-(150) BroadcastHashJoin [codegen id : 2]
-Left keys [1]: [ss_sold_date_sk#180]
-Right keys [1]: [d_date_sk#181]
+(147) BroadcastHashJoin [codegen id : 2]
+Left keys [1]: [ss_sold_date_sk#179]
+Right keys [1]: [d_date_sk#180]
 Join condition: None
 
-(151) Project [codegen id : 2]
-Output [2]: [ss_quantity#178 AS quantity#182, ss_list_price#179 AS list_price#183]
-Input [4]: [ss_quantity#178, ss_list_price#179, ss_sold_date_sk#180, d_date_sk#181]
+(148) Project [codegen id : 2]
+Output [2]: [ss_quantity#177 AS quantity#181, ss_list_price#178 AS list_price#182]
+Input [4]: [ss_quantity#177, ss_list_price#178, ss_sold_date_sk#179, d_date_sk#180]
 
-(152) Scan parquet default.catalog_sales
-Output [3]: [cs_quantity#184, cs_list_price#185, cs_sold_date_sk#186]
+(149) Scan parquet default.catalog_sales
+Output [3]: [cs_quantity#183, cs_list_price#184, cs_sold_date_sk#185]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#186), dynamicpruningexpression(cs_sold_date_sk#186 IN dynamicpruning#187)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#185), dynamicpruningexpression(cs_sold_date_sk#185 IN dynamicpruning#186)]
 ReadSchema: struct<cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(153) ColumnarToRow [codegen id : 4]
-Input [3]: [cs_quantity#184, cs_list_price#185, cs_sold_date_sk#186]
+(150) ColumnarToRow [codegen id : 4]
+Input [3]: [cs_quantity#183, cs_list_price#184, cs_sold_date_sk#185]
 
-(154) ReusedExchange [Reuses operator id: 170]
-Output [1]: [d_date_sk#188]
+(151) ReusedExchange [Reuses operator id: 167]
+Output [1]: [d_date_sk#187]
 
-(155) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [cs_sold_date_sk#186]
-Right keys [1]: [d_date_sk#188]
+(152) BroadcastHashJoin [codegen id : 4]
+Left keys [1]: [cs_sold_date_sk#185]
+Right keys [1]: [d_date_sk#187]
 Join condition: None
 
-(156) Project [codegen id : 4]
-Output [2]: [cs_quantity#184 AS quantity#189, cs_list_price#185 AS list_price#190]
-Input [4]: [cs_quantity#184, cs_list_price#185, cs_sold_date_sk#186, d_date_sk#188]
+(153) Project [codegen id : 4]
+Output [2]: [cs_quantity#183 AS quantity#188, cs_list_price#184 AS list_price#189]
+Input [4]: [cs_quantity#183, cs_list_price#184, cs_sold_date_sk#185, d_date_sk#187]
 
-(157) Scan parquet default.web_sales
-Output [3]: [ws_quantity#191, ws_list_price#192, ws_sold_date_sk#193]
+(154) Scan parquet default.web_sales
+Output [3]: [ws_quantity#190, ws_list_price#191, ws_sold_date_sk#192]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#193), dynamicpruningexpression(ws_sold_date_sk#193 IN dynamicpruning#187)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#192), dynamicpruningexpression(ws_sold_date_sk#192 IN dynamicpruning#186)]
 ReadSchema: struct<ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(158) ColumnarToRow [codegen id : 6]
-Input [3]: [ws_quantity#191, ws_list_price#192, ws_sold_date_sk#193]
+(155) ColumnarToRow [codegen id : 6]
+Input [3]: [ws_quantity#190, ws_list_price#191, ws_sold_date_sk#192]
 
-(159) ReusedExchange [Reuses operator id: 170]
-Output [1]: [d_date_sk#194]
+(156) ReusedExchange [Reuses operator id: 167]
+Output [1]: [d_date_sk#193]
 
-(160) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [ws_sold_date_sk#193]
-Right keys [1]: [d_date_sk#194]
+(157) BroadcastHashJoin [codegen id : 6]
+Left keys [1]: [ws_sold_date_sk#192]
+Right keys [1]: [d_date_sk#193]
 Join condition: None
 
-(161) Project [codegen id : 6]
-Output [2]: [ws_quantity#191 AS quantity#195, ws_list_price#192 AS list_price#196]
-Input [4]: [ws_quantity#191, ws_list_price#192, ws_sold_date_sk#193, d_date_sk#194]
+(158) Project [codegen id : 6]
+Output [2]: [ws_quantity#190 AS quantity#194, ws_list_price#191 AS list_price#195]
+Input [4]: [ws_quantity#190, ws_list_price#191, ws_sold_date_sk#192, d_date_sk#193]
 
-(162) Union
+(159) Union
 
-(163) HashAggregate [codegen id : 7]
-Input [2]: [quantity#182, list_price#183]
+(160) HashAggregate [codegen id : 7]
+Input [2]: [quantity#181, list_price#182]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#182 as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [2]: [sum#197, count#198]
-Results [2]: [sum#199, count#200]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#181 as decimal(12,2))) * promote_precision(cast(list_price#182 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [2]: [sum#196, count#197]
+Results [2]: [sum#198, count#199]
 
-(164) Exchange
-Input [2]: [sum#199, count#200]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#201]
+(161) Exchange
+Input [2]: [sum#198, count#199]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#200]
 
-(165) HashAggregate [codegen id : 8]
-Input [2]: [sum#199, count#200]
+(162) HashAggregate [codegen id : 8]
+Input [2]: [sum#198, count#199]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#182 as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#182 as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2)))#202]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#182 as decimal(12,2))) * promote_precision(cast(list_price#183 as decimal(12,2)))), DecimalType(18,2)))#202 AS average_sales#203]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#181 as decimal(12,2))) * promote_precision(cast(list_price#182 as decimal(12,2)))), DecimalType(18,2)))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#181 as decimal(12,2))) * promote_precision(cast(list_price#182 as decimal(12,2)))), DecimalType(18,2)))#201]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#181 as decimal(12,2))) * promote_precision(cast(list_price#182 as decimal(12,2)))), DecimalType(18,2)))#201 AS average_sales#202]
 
-Subquery:2 Hosting operator id = 147 Hosting Expression = ss_sold_date_sk#180 IN dynamicpruning#13
+Subquery:2 Hosting operator id = 144 Hosting Expression = ss_sold_date_sk#179 IN dynamicpruning#13
 
-Subquery:3 Hosting operator id = 152 Hosting Expression = cs_sold_date_sk#186 IN dynamicpruning#187
-BroadcastExchange (170)
-+- * Project (169)
-   +- * Filter (168)
-      +- * ColumnarToRow (167)
-         +- Scan parquet default.date_dim (166)
+Subquery:3 Hosting operator id = 149 Hosting Expression = cs_sold_date_sk#185 IN dynamicpruning#186
+BroadcastExchange (167)
++- * Project (166)
+   +- * Filter (165)
+      +- * ColumnarToRow (164)
+         +- Scan parquet default.date_dim (163)
 
 
-(166) Scan parquet default.date_dim
-Output [2]: [d_date_sk#188, d_year#204]
+(163) Scan parquet default.date_dim
+Output [2]: [d_date_sk#187, d_year#203]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(167) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#188, d_year#204]
+(164) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#187, d_year#203]
 
-(168) Filter [codegen id : 1]
-Input [2]: [d_date_sk#188, d_year#204]
-Condition : (((isnotnull(d_year#204) AND (d_year#204 >= 1998)) AND (d_year#204 <= 2000)) AND isnotnull(d_date_sk#188))
+(165) Filter [codegen id : 1]
+Input [2]: [d_date_sk#187, d_year#203]
+Condition : (((isnotnull(d_year#203) AND (d_year#203 >= 1998)) AND (d_year#203 <= 2000)) AND isnotnull(d_date_sk#187))
 
-(169) Project [codegen id : 1]
-Output [1]: [d_date_sk#188]
-Input [2]: [d_date_sk#188, d_year#204]
+(166) Project [codegen id : 1]
+Output [1]: [d_date_sk#187]
+Input [2]: [d_date_sk#187, d_year#203]
 
-(170) BroadcastExchange
-Input [1]: [d_date_sk#188]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#205]
+(167) BroadcastExchange
+Input [1]: [d_date_sk#187]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#204]
 
-Subquery:4 Hosting operator id = 157 Hosting Expression = ws_sold_date_sk#193 IN dynamicpruning#187
+Subquery:4 Hosting operator id = 154 Hosting Expression = ws_sold_date_sk#192 IN dynamicpruning#186
 
 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (175)
-+- * Project (174)
-   +- * Filter (173)
-      +- * ColumnarToRow (172)
-         +- Scan parquet default.date_dim (171)
+BroadcastExchange (172)
++- * Project (171)
+   +- * Filter (170)
+      +- * ColumnarToRow (169)
+         +- Scan parquet default.date_dim (168)
 
 
-(171) Scan parquet default.date_dim
-Output [3]: [d_date_sk#47, d_year#206, d_moy#207]
+(168) Scan parquet default.date_dim
+Output [3]: [d_date_sk#46, d_year#205, d_moy#206]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,11), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(172) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#47, d_year#206, d_moy#207]
+(169) ColumnarToRow [codegen id : 1]
+Input [3]: [d_date_sk#46, d_year#205, d_moy#206]
 
-(173) Filter [codegen id : 1]
-Input [3]: [d_date_sk#47, d_year#206, d_moy#207]
-Condition : ((((isnotnull(d_year#206) AND isnotnull(d_moy#207)) AND (d_year#206 = 2000)) AND (d_moy#207 = 11)) AND isnotnull(d_date_sk#47))
+(170) Filter [codegen id : 1]
+Input [3]: [d_date_sk#46, d_year#205, d_moy#206]
+Condition : ((((isnotnull(d_year#205) AND isnotnull(d_moy#206)) AND (d_year#205 = 2000)) AND (d_moy#206 = 11)) AND isnotnull(d_date_sk#46))
 
-(174) Project [codegen id : 1]
-Output [1]: [d_date_sk#47]
-Input [3]: [d_date_sk#47, d_year#206, d_moy#207]
+(171) Project [codegen id : 1]
+Output [1]: [d_date_sk#46]
+Input [3]: [d_date_sk#46, d_year#205, d_moy#206]
 
-(175) BroadcastExchange
-Input [1]: [d_date_sk#47]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#208]
+(172) BroadcastExchange
+Input [1]: [d_date_sk#46]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#207]
 
 Subquery:6 Hosting operator id = 9 Hosting Expression = ss_sold_date_sk#12 IN dynamicpruning#13
-BroadcastExchange (180)
-+- * Project (179)
-   +- * Filter (178)
-      +- * ColumnarToRow (177)
-         +- Scan parquet default.date_dim (176)
+BroadcastExchange (177)
++- * Project (176)
+   +- * Filter (175)
+      +- * ColumnarToRow (174)
+         +- Scan parquet default.date_dim (173)
 
 
-(176) Scan parquet default.date_dim
-Output [2]: [d_date_sk#14, d_year#209]
+(173) Scan parquet default.date_dim
+Output [2]: [d_date_sk#14, d_year#208]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(177) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#14, d_year#209]
+(174) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#14, d_year#208]
 
-(178) Filter [codegen id : 1]
-Input [2]: [d_date_sk#14, d_year#209]
-Condition : (((isnotnull(d_year#209) AND (d_year#209 >= 1999)) AND (d_year#209 <= 2001)) AND isnotnull(d_date_sk#14))
+(175) Filter [codegen id : 1]
+Input [2]: [d_date_sk#14, d_year#208]
+Condition : (((isnotnull(d_year#208) AND (d_year#208 >= 1999)) AND (d_year#208 <= 2001)) AND isnotnull(d_date_sk#14))
 
-(179) Project [codegen id : 1]
+(176) Project [codegen id : 1]
 Output [1]: [d_date_sk#14]
-Input [2]: [d_date_sk#14, d_year#209]
+Input [2]: [d_date_sk#14, d_year#208]
 
-(180) BroadcastExchange
+(177) BroadcastExchange
 Input [1]: [d_date_sk#14]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#210]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#209]
 
 Subquery:7 Hosting operator id = 20 Hosting Expression = cs_sold_date_sk#21 IN dynamicpruning#13
 
 Subquery:8 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#36 IN dynamicpruning#13
 
-Subquery:9 Hosting operator id = 99 Hosting Expression = ReusedSubquery Subquery scalar-subquery#66, [id=#67]
+Subquery:9 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#65, [id=#66]
 
-Subquery:10 Hosting operator id = 82 Hosting Expression = cs_sold_date_sk#71 IN dynamicpruning#5
+Subquery:10 Hosting operator id = 79 Hosting Expression = cs_sold_date_sk#70 IN dynamicpruning#5
 
-Subquery:11 Hosting operator id = 117 Hosting Expression = ReusedSubquery Subquery scalar-subquery#66, [id=#67]
+Subquery:11 Hosting operator id = 114 Hosting Expression = ReusedSubquery Subquery scalar-subquery#65, [id=#66]
 
-Subquery:12 Hosting operator id = 100 Hosting Expression = ws_sold_date_sk#93 IN dynamicpruning#5
+Subquery:12 Hosting operator id = 97 Hosting Expression = ws_sold_date_sk#92 IN dynamicpruning#5
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt
index b5378a01bfa13..856de20a40ca8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt
@@ -1,27 +1,27 @@
 TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
-  WholeStageCodegen (706)
+  WholeStageCodegen (676)
     HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
       InputAdapter
         Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #1
-          WholeStageCodegen (705)
+          WholeStageCodegen (675)
             HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
               InputAdapter
                 Union
-                  WholeStageCodegen (140)
+                  WholeStageCodegen (134)
                     HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
                       InputAdapter
                         Exchange [channel,i_brand_id,i_class_id,i_category_id] #2
-                          WholeStageCodegen (139)
+                          WholeStageCodegen (133)
                             HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
                               InputAdapter
                                 Union
-                                  WholeStageCodegen (46)
+                                  WholeStageCodegen (44)
                                     Filter [sales]
                                       Subquery #3
                                         WholeStageCodegen (8)
                                           HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
                                             InputAdapter
-                                              Exchange #19
+                                              Exchange #18
                                                 WholeStageCodegen (7)
                                                   HashAggregate [quantity,list_price] [sum,count,sum,count]
                                                     InputAdapter
@@ -34,7 +34,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                   Scan parquet default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
                                                                     ReusedSubquery [d_date_sk] #2
                                                               InputAdapter
-                                                                ReusedExchange [d_date_sk] #11
+                                                                ReusedExchange [d_date_sk] #10
                                                         WholeStageCodegen (4)
                                                           Project [cs_quantity,cs_list_price]
                                                             BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
@@ -42,7 +42,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                 InputAdapter
                                                                   Scan parquet default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
                                                                     SubqueryBroadcast [d_date_sk] #4
-                                                                      BroadcastExchange #20
+                                                                      BroadcastExchange #19
                                                                         WholeStageCodegen (1)
                                                                           Project [d_date_sk]
                                                                             Filter [d_year,d_date_sk]
@@ -50,7 +50,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                                 InputAdapter
                                                                                   Scan parquet default.date_dim [d_date_sk,d_year]
                                                               InputAdapter
-                                                                ReusedExchange [d_date_sk] #20
+                                                                ReusedExchange [d_date_sk] #19
                                                         WholeStageCodegen (6)
                                                           Project [ws_quantity,ws_list_price]
                                                             BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
@@ -59,11 +59,11 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                   Scan parquet default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
                                                                     ReusedSubquery [d_date_sk] #4
                                                               InputAdapter
-                                                                ReusedExchange [d_date_sk] #20
+                                                                ReusedExchange [d_date_sk] #19
                                       HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
                                           Exchange [i_brand_id,i_class_id,i_category_id] #3
-                                            WholeStageCodegen (45)
+                                            WholeStageCodegen (43)
                                               HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                                                 Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id]
                                                   BroadcastHashJoin [ss_item_sk,i_item_sk]
@@ -89,11 +89,11 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                                           InputAdapter
                                                                                             Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
                                                           InputAdapter
-                                                            WholeStageCodegen (21)
+                                                            WholeStageCodegen (20)
                                                               Sort [ss_item_sk]
                                                                 InputAdapter
                                                                   Exchange [ss_item_sk] #6
-                                                                    WholeStageCodegen (20)
+                                                                    WholeStageCodegen (19)
                                                                       Project [i_item_sk]
                                                                         BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id]
                                                                           Filter [i_brand_id,i_class_id,i_category_id]
@@ -102,127 +102,122 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                                 Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                                           InputAdapter
                                                                             BroadcastExchange #7
-                                                                              WholeStageCodegen (19)
-                                                                                HashAggregate [brand_id,class_id,category_id]
+                                                                              WholeStageCodegen (18)
+                                                                                SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
                                                                                   InputAdapter
-                                                                                    Exchange [brand_id,class_id,category_id] #8
-                                                                                      WholeStageCodegen (18)
-                                                                                        HashAggregate [brand_id,class_id,category_id]
-                                                                                          SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
-                                                                                            InputAdapter
-                                                                                              WholeStageCodegen (13)
-                                                                                                Sort [brand_id,class_id,category_id]
-                                                                                                  InputAdapter
-                                                                                                    Exchange [brand_id,class_id,category_id] #9
-                                                                                                      WholeStageCodegen (12)
-                                                                                                        HashAggregate [brand_id,class_id,category_id]
-                                                                                                          InputAdapter
-                                                                                                            Exchange [brand_id,class_id,category_id] #10
-                                                                                                              WholeStageCodegen (11)
-                                                                                                                HashAggregate [brand_id,class_id,category_id]
-                                                                                                                  Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                                                                      Project [ss_item_sk]
-                                                                                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                                                          Filter [ss_item_sk]
-                                                                                                                            ColumnarToRow
-                                                                                                                              InputAdapter
-                                                                                                                                Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
-                                                                                                                                  SubqueryBroadcast [d_date_sk] #2
-                                                                                                                                    BroadcastExchange #11
-                                                                                                                                      WholeStageCodegen (1)
-                                                                                                                                        Project [d_date_sk]
-                                                                                                                                          Filter [d_year,d_date_sk]
-                                                                                                                                            ColumnarToRow
-                                                                                                                                              InputAdapter
-                                                                                                                                                Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                    WholeStageCodegen (13)
+                                                                                      Sort [brand_id,class_id,category_id]
+                                                                                        InputAdapter
+                                                                                          Exchange [brand_id,class_id,category_id] #8
+                                                                                            WholeStageCodegen (12)
+                                                                                              HashAggregate [brand_id,class_id,category_id]
+                                                                                                InputAdapter
+                                                                                                  Exchange [brand_id,class_id,category_id] #9
+                                                                                                    WholeStageCodegen (11)
+                                                                                                      HashAggregate [brand_id,class_id,category_id]
+                                                                                                        Project [i_brand_id,i_class_id,i_category_id]
+                                                                                                          BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                                                            Project [ss_item_sk]
+                                                                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                                                                Filter [ss_item_sk]
+                                                                                                                  ColumnarToRow
+                                                                                                                    InputAdapter
+                                                                                                                      Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                                                                        SubqueryBroadcast [d_date_sk] #2
+                                                                                                                          BroadcastExchange #10
+                                                                                                                            WholeStageCodegen (1)
+                                                                                                                              Project [d_date_sk]
+                                                                                                                                Filter [d_year,d_date_sk]
+                                                                                                                                  ColumnarToRow
+                                                                                                                                    InputAdapter
+                                                                                                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                                                InputAdapter
+                                                                                                                  ReusedExchange [d_date_sk] #10
+                                                                                                            InputAdapter
+                                                                                                              BroadcastExchange #11
+                                                                                                                WholeStageCodegen (10)
+                                                                                                                  SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
+                                                                                                                    InputAdapter
+                                                                                                                      WholeStageCodegen (5)
+                                                                                                                        Sort [i_brand_id,i_class_id,i_category_id]
                                                                                                                           InputAdapter
-                                                                                                                            ReusedExchange [d_date_sk] #11
-                                                                                                                      InputAdapter
-                                                                                                                        BroadcastExchange #12
-                                                                                                                          WholeStageCodegen (10)
-                                                                                                                            SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
-                                                                                                                              InputAdapter
-                                                                                                                                WholeStageCodegen (5)
-                                                                                                                                  Sort [i_brand_id,i_class_id,i_category_id]
+                                                                                                                            Exchange [i_brand_id,i_class_id,i_category_id] #12
+                                                                                                                              WholeStageCodegen (4)
+                                                                                                                                Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                                                  ColumnarToRow
                                                                                                                                     InputAdapter
-                                                                                                                                      Exchange [i_brand_id,i_class_id,i_category_id] #13
-                                                                                                                                        WholeStageCodegen (4)
-                                                                                                                                          Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                                                      Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                                    InputAdapter
+                                                                                                                      WholeStageCodegen (9)
+                                                                                                                        Sort [i_brand_id,i_class_id,i_category_id]
+                                                                                                                          InputAdapter
+                                                                                                                            Exchange [i_brand_id,i_class_id,i_category_id] #13
+                                                                                                                              WholeStageCodegen (8)
+                                                                                                                                Project [i_brand_id,i_class_id,i_category_id]
+                                                                                                                                  BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                                                                                    Project [cs_item_sk]
+                                                                                                                                      BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                                                                                        Filter [cs_item_sk]
+                                                                                                                                          ColumnarToRow
+                                                                                                                                            InputAdapter
+                                                                                                                                              Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
+                                                                                                                                                ReusedSubquery [d_date_sk] #2
+                                                                                                                                        InputAdapter
+                                                                                                                                          ReusedExchange [d_date_sk] #10
+                                                                                                                                    InputAdapter
+                                                                                                                                      BroadcastExchange #14
+                                                                                                                                        WholeStageCodegen (7)
+                                                                                                                                          Filter [i_item_sk]
                                                                                                                                             ColumnarToRow
                                                                                                                                               InputAdapter
                                                                                                                                                 Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                                              InputAdapter
-                                                                                                                                WholeStageCodegen (9)
-                                                                                                                                  Sort [i_brand_id,i_class_id,i_category_id]
-                                                                                                                                    InputAdapter
-                                                                                                                                      Exchange [i_brand_id,i_class_id,i_category_id] #14
-                                                                                                                                        WholeStageCodegen (8)
-                                                                                                                                          Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                                                            BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                                                                              Project [cs_item_sk]
-                                                                                                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                                                                                  Filter [cs_item_sk]
-                                                                                                                                                    ColumnarToRow
-                                                                                                                                                      InputAdapter
-                                                                                                                                                        Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
-                                                                                                                                                          ReusedSubquery [d_date_sk] #2
-                                                                                                                                                  InputAdapter
-                                                                                                                                                    ReusedExchange [d_date_sk] #11
-                                                                                                                                              InputAdapter
-                                                                                                                                                BroadcastExchange #15
-                                                                                                                                                  WholeStageCodegen (7)
-                                                                                                                                                    Filter [i_item_sk]
-                                                                                                                                                      ColumnarToRow
-                                                                                                                                                        InputAdapter
-                                                                                                                                                          Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                            InputAdapter
-                                                                                              WholeStageCodegen (17)
-                                                                                                Sort [i_brand_id,i_class_id,i_category_id]
+                                                                                  InputAdapter
+                                                                                    WholeStageCodegen (17)
+                                                                                      Sort [i_brand_id,i_class_id,i_category_id]
+                                                                                        InputAdapter
+                                                                                          Exchange [i_brand_id,i_class_id,i_category_id] #15
+                                                                                            WholeStageCodegen (16)
+                                                                                              Project [i_brand_id,i_class_id,i_category_id]
+                                                                                                BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                                                                  Project [ws_item_sk]
+                                                                                                    BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                                                      Filter [ws_item_sk]
+                                                                                                        ColumnarToRow
+                                                                                                          InputAdapter
+                                                                                                            Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
+                                                                                                              ReusedSubquery [d_date_sk] #2
+                                                                                                      InputAdapter
+                                                                                                        ReusedExchange [d_date_sk] #10
                                                                                                   InputAdapter
-                                                                                                    Exchange [i_brand_id,i_class_id,i_category_id] #16
-                                                                                                      WholeStageCodegen (16)
-                                                                                                        Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                          BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                                                            Project [ws_item_sk]
-                                                                                                              BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                                                                Filter [ws_item_sk]
-                                                                                                                  ColumnarToRow
-                                                                                                                    InputAdapter
-                                                                                                                      Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
-                                                                                                                        ReusedSubquery [d_date_sk] #2
-                                                                                                                InputAdapter
-                                                                                                                  ReusedExchange [d_date_sk] #11
-                                                                                                            InputAdapter
-                                                                                                              ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #15
+                                                                                                    ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #14
                                                         InputAdapter
                                                           ReusedExchange [d_date_sk] #5
                                                     InputAdapter
-                                                      BroadcastExchange #17
-                                                        WholeStageCodegen (44)
+                                                      BroadcastExchange #16
+                                                        WholeStageCodegen (42)
                                                           SortMergeJoin [i_item_sk,ss_item_sk]
                                                             InputAdapter
-                                                              WholeStageCodegen (24)
+                                                              WholeStageCodegen (23)
                                                                 Sort [i_item_sk]
                                                                   InputAdapter
-                                                                    Exchange [i_item_sk] #18
-                                                                      WholeStageCodegen (23)
+                                                                    Exchange [i_item_sk] #17
+                                                                      WholeStageCodegen (22)
                                                                         Filter [i_item_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
                                                                               Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                             InputAdapter
-                                                              WholeStageCodegen (43)
+                                                              WholeStageCodegen (41)
                                                                 Sort [ss_item_sk]
                                                                   InputAdapter
                                                                     ReusedExchange [ss_item_sk] #6
-                                  WholeStageCodegen (92)
+                                  WholeStageCodegen (88)
                                     Filter [sales]
                                       ReusedSubquery [average_sales] #3
                                       HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
-                                          Exchange [i_brand_id,i_class_id,i_category_id] #21
-                                            WholeStageCodegen (91)
+                                          Exchange [i_brand_id,i_class_id,i_category_id] #20
+                                            WholeStageCodegen (87)
                                               HashAggregate [i_brand_id,i_class_id,i_category_id,cs_quantity,cs_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                                                 Project [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id]
                                                   BroadcastHashJoin [cs_item_sk,i_item_sk]
@@ -230,32 +225,32 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                       BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                                         SortMergeJoin [cs_item_sk,ss_item_sk]
                                                           InputAdapter
-                                                            WholeStageCodegen (48)
+                                                            WholeStageCodegen (46)
                                                               Sort [cs_item_sk]
                                                                 InputAdapter
-                                                                  Exchange [cs_item_sk] #22
-                                                                    WholeStageCodegen (47)
+                                                                  Exchange [cs_item_sk] #21
+                                                                    WholeStageCodegen (45)
                                                                       Filter [cs_item_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
                                                                             Scan parquet default.catalog_sales [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk]
                                                                               ReusedSubquery [d_date_sk] #1
                                                           InputAdapter
-                                                            WholeStageCodegen (67)
+                                                            WholeStageCodegen (64)
                                                               Sort [ss_item_sk]
                                                                 InputAdapter
                                                                   ReusedExchange [ss_item_sk] #6
                                                         InputAdapter
                                                           ReusedExchange [d_date_sk] #5
                                                     InputAdapter
-                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #17
-                                  WholeStageCodegen (138)
+                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #16
+                                  WholeStageCodegen (132)
                                     Filter [sales]
                                       ReusedSubquery [average_sales] #3
                                       HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
-                                          Exchange [i_brand_id,i_class_id,i_category_id] #23
-                                            WholeStageCodegen (137)
+                                          Exchange [i_brand_id,i_class_id,i_category_id] #22
+                                            WholeStageCodegen (131)
                                               HashAggregate [i_brand_id,i_class_id,i_category_id,ws_quantity,ws_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                                                 Project [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id]
                                                   BroadcastHashJoin [ws_item_sk,i_item_sk]
@@ -263,57 +258,57 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                       BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                                         SortMergeJoin [ws_item_sk,ss_item_sk]
                                                           InputAdapter
-                                                            WholeStageCodegen (94)
+                                                            WholeStageCodegen (90)
                                                               Sort [ws_item_sk]
                                                                 InputAdapter
-                                                                  Exchange [ws_item_sk] #24
-                                                                    WholeStageCodegen (93)
+                                                                  Exchange [ws_item_sk] #23
+                                                                    WholeStageCodegen (89)
                                                                       Filter [ws_item_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
                                                                             Scan parquet default.web_sales [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk]
                                                                               ReusedSubquery [d_date_sk] #1
                                                           InputAdapter
-                                                            WholeStageCodegen (113)
+                                                            WholeStageCodegen (108)
                                                               Sort [ss_item_sk]
                                                                 InputAdapter
                                                                   ReusedExchange [ss_item_sk] #6
                                                         InputAdapter
                                                           ReusedExchange [d_date_sk] #5
                                                     InputAdapter
-                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #17
-                  WholeStageCodegen (281)
+                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #16
+                  WholeStageCodegen (269)
                     HashAggregate [channel,i_brand_id,i_class_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
                       InputAdapter
-                        Exchange [channel,i_brand_id,i_class_id] #25
-                          WholeStageCodegen (280)
+                        Exchange [channel,i_brand_id,i_class_id] #24
+                          WholeStageCodegen (268)
                             HashAggregate [channel,i_brand_id,i_class_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
                               HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
                                 InputAdapter
                                   ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2
-                  WholeStageCodegen (422)
+                  WholeStageCodegen (404)
                     HashAggregate [channel,i_brand_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
                       InputAdapter
-                        Exchange [channel,i_brand_id] #26
-                          WholeStageCodegen (421)
+                        Exchange [channel,i_brand_id] #25
+                          WholeStageCodegen (403)
                             HashAggregate [channel,i_brand_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
                               HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
                                 InputAdapter
                                   ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2
-                  WholeStageCodegen (563)
+                  WholeStageCodegen (539)
                     HashAggregate [channel,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
                       InputAdapter
-                        Exchange [channel] #27
-                          WholeStageCodegen (562)
+                        Exchange [channel] #26
+                          WholeStageCodegen (538)
                             HashAggregate [channel,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
                               HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
                                 InputAdapter
                                   ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2
-                  WholeStageCodegen (704)
+                  WholeStageCodegen (674)
                     HashAggregate [sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),channel,i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
                       InputAdapter
-                        Exchange #28
-                          WholeStageCodegen (703)
+                        Exchange #27
+                          WholeStageCodegen (673)
                             HashAggregate [sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
                               HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
                                 InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt
index 5d0a71ecbf8a2..2438fa9d7eb57 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt
@@ -1,131 +1,129 @@
 == Physical Plan ==
-TakeOrderedAndProject (127)
-+- * HashAggregate (126)
-   +- Exchange (125)
-      +- * HashAggregate (124)
-         +- Union (123)
-            :- * HashAggregate (102)
-            :  +- Exchange (101)
-            :     +- * HashAggregate (100)
-            :        +- Union (99)
-            :           :- * Filter (68)
-            :           :  +- * HashAggregate (67)
-            :           :     +- Exchange (66)
-            :           :        +- * HashAggregate (65)
-            :           :           +- * Project (64)
-            :           :              +- * BroadcastHashJoin Inner BuildRight (63)
-            :           :                 :- * Project (61)
-            :           :                 :  +- * BroadcastHashJoin Inner BuildRight (60)
-            :           :                 :     :- * BroadcastHashJoin LeftSemi BuildRight (53)
+TakeOrderedAndProject (125)
++- * HashAggregate (124)
+   +- Exchange (123)
+      +- * HashAggregate (122)
+         +- Union (121)
+            :- * HashAggregate (100)
+            :  +- Exchange (99)
+            :     +- * HashAggregate (98)
+            :        +- Union (97)
+            :           :- * Filter (66)
+            :           :  +- * HashAggregate (65)
+            :           :     +- Exchange (64)
+            :           :        +- * HashAggregate (63)
+            :           :           +- * Project (62)
+            :           :              +- * BroadcastHashJoin Inner BuildRight (61)
+            :           :                 :- * Project (59)
+            :           :                 :  +- * BroadcastHashJoin Inner BuildRight (58)
+            :           :                 :     :- * BroadcastHashJoin LeftSemi BuildRight (51)
             :           :                 :     :  :- * Filter (3)
             :           :                 :     :  :  +- * ColumnarToRow (2)
             :           :                 :     :  :     +- Scan parquet default.store_sales (1)
-            :           :                 :     :  +- BroadcastExchange (52)
-            :           :                 :     :     +- * Project (51)
-            :           :                 :     :        +- * BroadcastHashJoin Inner BuildRight (50)
+            :           :                 :     :  +- BroadcastExchange (50)
+            :           :                 :     :     +- * Project (49)
+            :           :                 :     :        +- * BroadcastHashJoin Inner BuildRight (48)
             :           :                 :     :           :- * Filter (6)
             :           :                 :     :           :  +- * ColumnarToRow (5)
             :           :                 :     :           :     +- Scan parquet default.item (4)
-            :           :                 :     :           +- BroadcastExchange (49)
-            :           :                 :     :              +- * HashAggregate (48)
-            :           :                 :     :                 +- * HashAggregate (47)
-            :           :                 :     :                    +- * BroadcastHashJoin LeftSemi BuildRight (46)
-            :           :                 :     :                       :- * HashAggregate (35)
-            :           :                 :     :                       :  +- Exchange (34)
-            :           :                 :     :                       :     +- * HashAggregate (33)
-            :           :                 :     :                       :        +- * Project (32)
-            :           :                 :     :                       :           +- * BroadcastHashJoin Inner BuildRight (31)
-            :           :                 :     :                       :              :- * Project (29)
-            :           :                 :     :                       :              :  +- * BroadcastHashJoin Inner BuildRight (28)
-            :           :                 :     :                       :              :     :- * Filter (9)
-            :           :                 :     :                       :              :     :  +- * ColumnarToRow (8)
-            :           :                 :     :                       :              :     :     +- Scan parquet default.store_sales (7)
-            :           :                 :     :                       :              :     +- BroadcastExchange (27)
-            :           :                 :     :                       :              :        +- * BroadcastHashJoin LeftSemi BuildRight (26)
-            :           :                 :     :                       :              :           :- * Filter (12)
-            :           :                 :     :                       :              :           :  +- * ColumnarToRow (11)
-            :           :                 :     :                       :              :           :     +- Scan parquet default.item (10)
-            :           :                 :     :                       :              :           +- BroadcastExchange (25)
-            :           :                 :     :                       :              :              +- * Project (24)
-            :           :                 :     :                       :              :                 +- * BroadcastHashJoin Inner BuildRight (23)
-            :           :                 :     :                       :              :                    :- * Project (21)
-            :           :                 :     :                       :              :                    :  +- * BroadcastHashJoin Inner BuildRight (20)
-            :           :                 :     :                       :              :                    :     :- * Filter (15)
-            :           :                 :     :                       :              :                    :     :  +- * ColumnarToRow (14)
-            :           :                 :     :                       :              :                    :     :     +- Scan parquet default.catalog_sales (13)
-            :           :                 :     :                       :              :                    :     +- BroadcastExchange (19)
-            :           :                 :     :                       :              :                    :        +- * Filter (18)
-            :           :                 :     :                       :              :                    :           +- * ColumnarToRow (17)
-            :           :                 :     :                       :              :                    :              +- Scan parquet default.item (16)
-            :           :                 :     :                       :              :                    +- ReusedExchange (22)
-            :           :                 :     :                       :              +- ReusedExchange (30)
-            :           :                 :     :                       +- BroadcastExchange (45)
-            :           :                 :     :                          +- * Project (44)
-            :           :                 :     :                             +- * BroadcastHashJoin Inner BuildRight (43)
-            :           :                 :     :                                :- * Project (41)
-            :           :                 :     :                                :  +- * BroadcastHashJoin Inner BuildRight (40)
-            :           :                 :     :                                :     :- * Filter (38)
-            :           :                 :     :                                :     :  +- * ColumnarToRow (37)
-            :           :                 :     :                                :     :     +- Scan parquet default.web_sales (36)
-            :           :                 :     :                                :     +- ReusedExchange (39)
-            :           :                 :     :                                +- ReusedExchange (42)
-            :           :                 :     +- BroadcastExchange (59)
-            :           :                 :        +- * BroadcastHashJoin LeftSemi BuildRight (58)
-            :           :                 :           :- * Filter (56)
-            :           :                 :           :  +- * ColumnarToRow (55)
-            :           :                 :           :     +- Scan parquet default.item (54)
-            :           :                 :           +- ReusedExchange (57)
-            :           :                 +- ReusedExchange (62)
-            :           :- * Filter (83)
-            :           :  +- * HashAggregate (82)
-            :           :     +- Exchange (81)
-            :           :        +- * HashAggregate (80)
-            :           :           +- * Project (79)
-            :           :              +- * BroadcastHashJoin Inner BuildRight (78)
-            :           :                 :- * Project (76)
-            :           :                 :  +- * BroadcastHashJoin Inner BuildRight (75)
-            :           :                 :     :- * BroadcastHashJoin LeftSemi BuildRight (73)
-            :           :                 :     :  :- * Filter (71)
-            :           :                 :     :  :  +- * ColumnarToRow (70)
-            :           :                 :     :  :     +- Scan parquet default.catalog_sales (69)
-            :           :                 :     :  +- ReusedExchange (72)
-            :           :                 :     +- ReusedExchange (74)
-            :           :                 +- ReusedExchange (77)
-            :           +- * Filter (98)
-            :              +- * HashAggregate (97)
-            :                 +- Exchange (96)
-            :                    +- * HashAggregate (95)
-            :                       +- * Project (94)
-            :                          +- * BroadcastHashJoin Inner BuildRight (93)
-            :                             :- * Project (91)
-            :                             :  +- * BroadcastHashJoin Inner BuildRight (90)
-            :                             :     :- * BroadcastHashJoin LeftSemi BuildRight (88)
-            :                             :     :  :- * Filter (86)
-            :                             :     :  :  +- * ColumnarToRow (85)
-            :                             :     :  :     +- Scan parquet default.web_sales (84)
-            :                             :     :  +- ReusedExchange (87)
-            :                             :     +- ReusedExchange (89)
-            :                             +- ReusedExchange (92)
-            :- * HashAggregate (107)
-            :  +- Exchange (106)
-            :     +- * HashAggregate (105)
-            :        +- * HashAggregate (104)
-            :           +- ReusedExchange (103)
-            :- * HashAggregate (112)
-            :  +- Exchange (111)
-            :     +- * HashAggregate (110)
-            :        +- * HashAggregate (109)
-            :           +- ReusedExchange (108)
-            :- * HashAggregate (117)
-            :  +- Exchange (116)
-            :     +- * HashAggregate (115)
-            :        +- * HashAggregate (114)
-            :           +- ReusedExchange (113)
-            +- * HashAggregate (122)
-               +- Exchange (121)
-                  +- * HashAggregate (120)
-                     +- * HashAggregate (119)
-                        +- ReusedExchange (118)
+            :           :                 :     :           +- BroadcastExchange (47)
+            :           :                 :     :              +- * BroadcastHashJoin LeftSemi BuildRight (46)
+            :           :                 :     :                 :- * HashAggregate (35)
+            :           :                 :     :                 :  +- Exchange (34)
+            :           :                 :     :                 :     +- * HashAggregate (33)
+            :           :                 :     :                 :        +- * Project (32)
+            :           :                 :     :                 :           +- * BroadcastHashJoin Inner BuildRight (31)
+            :           :                 :     :                 :              :- * Project (29)
+            :           :                 :     :                 :              :  +- * BroadcastHashJoin Inner BuildRight (28)
+            :           :                 :     :                 :              :     :- * Filter (9)
+            :           :                 :     :                 :              :     :  +- * ColumnarToRow (8)
+            :           :                 :     :                 :              :     :     +- Scan parquet default.store_sales (7)
+            :           :                 :     :                 :              :     +- BroadcastExchange (27)
+            :           :                 :     :                 :              :        +- * BroadcastHashJoin LeftSemi BuildRight (26)
+            :           :                 :     :                 :              :           :- * Filter (12)
+            :           :                 :     :                 :              :           :  +- * ColumnarToRow (11)
+            :           :                 :     :                 :              :           :     +- Scan parquet default.item (10)
+            :           :                 :     :                 :              :           +- BroadcastExchange (25)
+            :           :                 :     :                 :              :              +- * Project (24)
+            :           :                 :     :                 :              :                 +- * BroadcastHashJoin Inner BuildRight (23)
+            :           :                 :     :                 :              :                    :- * Project (21)
+            :           :                 :     :                 :              :                    :  +- * BroadcastHashJoin Inner BuildRight (20)
+            :           :                 :     :                 :              :                    :     :- * Filter (15)
+            :           :                 :     :                 :              :                    :     :  +- * ColumnarToRow (14)
+            :           :                 :     :                 :              :                    :     :     +- Scan parquet default.catalog_sales (13)
+            :           :                 :     :                 :              :                    :     +- BroadcastExchange (19)
+            :           :                 :     :                 :              :                    :        +- * Filter (18)
+            :           :                 :     :                 :              :                    :           +- * ColumnarToRow (17)
+            :           :                 :     :                 :              :                    :              +- Scan parquet default.item (16)
+            :           :                 :     :                 :              :                    +- ReusedExchange (22)
+            :           :                 :     :                 :              +- ReusedExchange (30)
+            :           :                 :     :                 +- BroadcastExchange (45)
+            :           :                 :     :                    +- * Project (44)
+            :           :                 :     :                       +- * BroadcastHashJoin Inner BuildRight (43)
+            :           :                 :     :                          :- * Project (41)
+            :           :                 :     :                          :  +- * BroadcastHashJoin Inner BuildRight (40)
+            :           :                 :     :                          :     :- * Filter (38)
+            :           :                 :     :                          :     :  +- * ColumnarToRow (37)
+            :           :                 :     :                          :     :     +- Scan parquet default.web_sales (36)
+            :           :                 :     :                          :     +- ReusedExchange (39)
+            :           :                 :     :                          +- ReusedExchange (42)
+            :           :                 :     +- BroadcastExchange (57)
+            :           :                 :        +- * BroadcastHashJoin LeftSemi BuildRight (56)
+            :           :                 :           :- * Filter (54)
+            :           :                 :           :  +- * ColumnarToRow (53)
+            :           :                 :           :     +- Scan parquet default.item (52)
+            :           :                 :           +- ReusedExchange (55)
+            :           :                 +- ReusedExchange (60)
+            :           :- * Filter (81)
+            :           :  +- * HashAggregate (80)
+            :           :     +- Exchange (79)
+            :           :        +- * HashAggregate (78)
+            :           :           +- * Project (77)
+            :           :              +- * BroadcastHashJoin Inner BuildRight (76)
+            :           :                 :- * Project (74)
+            :           :                 :  +- * BroadcastHashJoin Inner BuildRight (73)
+            :           :                 :     :- * BroadcastHashJoin LeftSemi BuildRight (71)
+            :           :                 :     :  :- * Filter (69)
+            :           :                 :     :  :  +- * ColumnarToRow (68)
+            :           :                 :     :  :     +- Scan parquet default.catalog_sales (67)
+            :           :                 :     :  +- ReusedExchange (70)
+            :           :                 :     +- ReusedExchange (72)
+            :           :                 +- ReusedExchange (75)
+            :           +- * Filter (96)
+            :              +- * HashAggregate (95)
+            :                 +- Exchange (94)
+            :                    +- * HashAggregate (93)
+            :                       +- * Project (92)
+            :                          +- * BroadcastHashJoin Inner BuildRight (91)
+            :                             :- * Project (89)
+            :                             :  +- * BroadcastHashJoin Inner BuildRight (88)
+            :                             :     :- * BroadcastHashJoin LeftSemi BuildRight (86)
+            :                             :     :  :- * Filter (84)
+            :                             :     :  :  +- * ColumnarToRow (83)
+            :                             :     :  :     +- Scan parquet default.web_sales (82)
+            :                             :     :  +- ReusedExchange (85)
+            :                             :     +- ReusedExchange (87)
+            :                             +- ReusedExchange (90)
+            :- * HashAggregate (105)
+            :  +- Exchange (104)
+            :     +- * HashAggregate (103)
+            :        +- * HashAggregate (102)
+            :           +- ReusedExchange (101)
+            :- * HashAggregate (110)
+            :  +- Exchange (109)
+            :     +- * HashAggregate (108)
+            :        +- * HashAggregate (107)
+            :           +- ReusedExchange (106)
+            :- * HashAggregate (115)
+            :  +- Exchange (114)
+            :     +- * HashAggregate (113)
+            :        +- * HashAggregate (112)
+            :           +- ReusedExchange (111)
+            +- * HashAggregate (120)
+               +- Exchange (119)
+                  +- * HashAggregate (118)
+                     +- * HashAggregate (117)
+                        +- ReusedExchange (116)
 
 
 (1) Scan parquet default.store_sales
@@ -228,7 +226,7 @@ Join condition: None
 Output [4]: [cs_sold_date_sk#18, i_brand_id#20, i_class_id#21, i_category_id#22]
 Input [6]: [cs_item_sk#17, cs_sold_date_sk#18, i_item_sk#19, i_brand_id#20, i_class_id#21, i_category_id#22]
 
-(22) ReusedExchange [Reuses operator id: 161]
+(22) ReusedExchange [Reuses operator id: 159]
 Output [1]: [d_date_sk#24]
 
 (23) BroadcastHashJoin [codegen id : 3]
@@ -262,7 +260,7 @@ Join condition: None
 Output [4]: [ss_sold_date_sk#11, i_brand_id#14, i_class_id#15, i_category_id#16]
 Input [6]: [ss_item_sk#10, ss_sold_date_sk#11, i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16]
 
-(30) ReusedExchange [Reuses operator id: 161]
+(30) ReusedExchange [Reuses operator id: 159]
 Output [1]: [d_date_sk#27]
 
 (31) BroadcastHashJoin [codegen id : 6]
@@ -319,7 +317,7 @@ Join condition: None
 Output [4]: [ws_sold_date_sk#33, i_brand_id#35, i_class_id#36, i_category_id#37]
 Input [6]: [ws_item_sk#32, ws_sold_date_sk#33, i_item_sk#34, i_brand_id#35, i_class_id#36, i_category_id#37]
 
-(42) ReusedExchange [Reuses operator id: 161]
+(42) ReusedExchange [Reuses operator id: 159]
 Output [1]: [d_date_sk#38]
 
 (43) BroadcastHashJoin [codegen id : 9]
@@ -340,112 +338,98 @@ Left keys [6]: [coalesce(brand_id#28, 0), isnull(brand_id#28), coalesce(class_id
 Right keys [6]: [coalesce(i_brand_id#35, 0), isnull(i_brand_id#35), coalesce(i_class_id#36, 0), isnull(i_class_id#36), coalesce(i_category_id#37, 0), isnull(i_category_id#37)]
 Join condition: None
 
-(47) HashAggregate [codegen id : 10]
-Input [3]: [brand_id#28, class_id#29, category_id#30]
-Keys [3]: [brand_id#28, class_id#29, category_id#30]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [brand_id#28, class_id#29, category_id#30]
-
-(48) HashAggregate [codegen id : 10]
-Input [3]: [brand_id#28, class_id#29, category_id#30]
-Keys [3]: [brand_id#28, class_id#29, category_id#30]
-Functions: []
-Aggregate Attributes: []
-Results [3]: [brand_id#28, class_id#29, category_id#30]
-
-(49) BroadcastExchange
+(47) BroadcastExchange
 Input [3]: [brand_id#28, class_id#29, category_id#30]
 Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#40]
 
-(50) BroadcastHashJoin [codegen id : 11]
+(48) BroadcastHashJoin [codegen id : 11]
 Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Right keys [3]: [brand_id#28, class_id#29, category_id#30]
 Join condition: None
 
-(51) Project [codegen id : 11]
+(49) Project [codegen id : 11]
 Output [1]: [i_item_sk#6 AS ss_item_sk#41]
 Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#28, class_id#29, category_id#30]
 
-(52) BroadcastExchange
+(50) BroadcastExchange
 Input [1]: [ss_item_sk#41]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#42]
 
-(53) BroadcastHashJoin [codegen id : 25]
+(51) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [ss_item_sk#41]
 Join condition: None
 
-(54) Scan parquet default.item
+(52) Scan parquet default.item
 Output [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(55) ColumnarToRow [codegen id : 23]
+(53) ColumnarToRow [codegen id : 23]
 Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 
-(56) Filter [codegen id : 23]
+(54) Filter [codegen id : 23]
 Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 Condition : isnotnull(i_item_sk#43)
 
-(57) ReusedExchange [Reuses operator id: 52]
+(55) ReusedExchange [Reuses operator id: 50]
 Output [1]: [ss_item_sk#41]
 
-(58) BroadcastHashJoin [codegen id : 23]
+(56) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [i_item_sk#43]
 Right keys [1]: [ss_item_sk#41]
 Join condition: None
 
-(59) BroadcastExchange
+(57) BroadcastExchange
 Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#47]
 
-(60) BroadcastHashJoin [codegen id : 25]
+(58) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#43]
 Join condition: None
 
-(61) Project [codegen id : 25]
+(59) Project [codegen id : 25]
 Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_class_id#45, i_category_id#46]
 Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46]
 
-(62) ReusedExchange [Reuses operator id: 156]
+(60) ReusedExchange [Reuses operator id: 154]
 Output [1]: [d_date_sk#48]
 
-(63) BroadcastHashJoin [codegen id : 25]
+(61) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#48]
 Join condition: None
 
-(64) Project [codegen id : 25]
+(62) Project [codegen id : 25]
 Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46]
 Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_class_id#45, i_category_id#46, d_date_sk#48]
 
-(65) HashAggregate [codegen id : 25]
+(63) HashAggregate [codegen id : 25]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
 Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#49, isEmpty#50, count#51]
 Results [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 
-(66) Exchange
+(64) Exchange
 Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 Arguments: hashpartitioning(i_brand_id#44, i_class_id#45, i_category_id#46, 5), ENSURE_REQUIREMENTS, [id=#55]
 
-(67) HashAggregate [codegen id : 26]
+(65) HashAggregate [codegen id : 26]
 Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54]
 Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56, count(1)#57]
 Results [6]: [store AS channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56 AS sales#59, count(1)#57 AS number_sales#60]
 
-(68) Filter [codegen id : 26]
+(66) Filter [codegen id : 26]
 Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60]
 Condition : (isnotnull(sales#59) AND (cast(sales#59 as decimal(32,6)) > cast(Subquery scalar-subquery#61, [id=#62] as decimal(32,6))))
 
-(69) Scan parquet default.catalog_sales
+(67) Scan parquet default.catalog_sales
 Output [4]: [cs_item_sk#63, cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66]
 Batched: true
 Location: InMemoryFileIndex []
@@ -453,68 +437,68 @@ PartitionFilters: [isnotnull(cs_sold_date_sk#66), dynamicpruningexpression(cs_so
 PushedFilters: [IsNotNull(cs_item_sk)]
 ReadSchema: struct<cs_item_sk:int,cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(70) ColumnarToRow [codegen id : 51]
+(68) ColumnarToRow [codegen id : 51]
 Input [4]: [cs_item_sk#63, cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66]
 
-(71) Filter [codegen id : 51]
+(69) Filter [codegen id : 51]
 Input [4]: [cs_item_sk#63, cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66]
 Condition : isnotnull(cs_item_sk#63)
 
-(72) ReusedExchange [Reuses operator id: 52]
+(70) ReusedExchange [Reuses operator id: 50]
 Output [1]: [ss_item_sk#41]
 
-(73) BroadcastHashJoin [codegen id : 51]
+(71) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [cs_item_sk#63]
 Right keys [1]: [ss_item_sk#41]
 Join condition: None
 
-(74) ReusedExchange [Reuses operator id: 59]
+(72) ReusedExchange [Reuses operator id: 57]
 Output [4]: [i_item_sk#67, i_brand_id#68, i_class_id#69, i_category_id#70]
 
-(75) BroadcastHashJoin [codegen id : 51]
+(73) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [cs_item_sk#63]
 Right keys [1]: [i_item_sk#67]
 Join condition: None
 
-(76) Project [codegen id : 51]
+(74) Project [codegen id : 51]
 Output [6]: [cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66, i_brand_id#68, i_class_id#69, i_category_id#70]
 Input [8]: [cs_item_sk#63, cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66, i_item_sk#67, i_brand_id#68, i_class_id#69, i_category_id#70]
 
-(77) ReusedExchange [Reuses operator id: 156]
+(75) ReusedExchange [Reuses operator id: 154]
 Output [1]: [d_date_sk#71]
 
-(78) BroadcastHashJoin [codegen id : 51]
+(76) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [cs_sold_date_sk#66]
 Right keys [1]: [d_date_sk#71]
 Join condition: None
 
-(79) Project [codegen id : 51]
+(77) Project [codegen id : 51]
 Output [5]: [cs_quantity#64, cs_list_price#65, i_brand_id#68, i_class_id#69, i_category_id#70]
 Input [7]: [cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66, i_brand_id#68, i_class_id#69, i_category_id#70, d_date_sk#71]
 
-(80) HashAggregate [codegen id : 51]
+(78) HashAggregate [codegen id : 51]
 Input [5]: [cs_quantity#64, cs_list_price#65, i_brand_id#68, i_class_id#69, i_category_id#70]
 Keys [3]: [i_brand_id#68, i_class_id#69, i_category_id#70]
 Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#72, isEmpty#73, count#74]
 Results [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77]
 
-(81) Exchange
+(79) Exchange
 Input [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77]
 Arguments: hashpartitioning(i_brand_id#68, i_class_id#69, i_category_id#70, 5), ENSURE_REQUIREMENTS, [id=#78]
 
-(82) HashAggregate [codegen id : 52]
+(80) HashAggregate [codegen id : 52]
 Input [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77]
 Keys [3]: [i_brand_id#68, i_class_id#69, i_category_id#70]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2))), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#79, count(1)#80]
 Results [6]: [catalog AS channel#81, i_brand_id#68, i_class_id#69, i_category_id#70, sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#79 AS sales#82, count(1)#80 AS number_sales#83]
 
-(83) Filter [codegen id : 52]
+(81) Filter [codegen id : 52]
 Input [6]: [channel#81, i_brand_id#68, i_class_id#69, i_category_id#70, sales#82, number_sales#83]
 Condition : (isnotnull(sales#82) AND (cast(sales#82 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#61, [id=#62] as decimal(32,6))))
 
-(84) Scan parquet default.web_sales
+(82) Scan parquet default.web_sales
 Output [4]: [ws_item_sk#84, ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87]
 Batched: true
 Location: InMemoryFileIndex []
@@ -522,424 +506,424 @@ PartitionFilters: [isnotnull(ws_sold_date_sk#87), dynamicpruningexpression(ws_so
 PushedFilters: [IsNotNull(ws_item_sk)]
 ReadSchema: struct<ws_item_sk:int,ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(85) ColumnarToRow [codegen id : 77]
+(83) ColumnarToRow [codegen id : 77]
 Input [4]: [ws_item_sk#84, ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87]
 
-(86) Filter [codegen id : 77]
+(84) Filter [codegen id : 77]
 Input [4]: [ws_item_sk#84, ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87]
 Condition : isnotnull(ws_item_sk#84)
 
-(87) ReusedExchange [Reuses operator id: 52]
+(85) ReusedExchange [Reuses operator id: 50]
 Output [1]: [ss_item_sk#41]
 
-(88) BroadcastHashJoin [codegen id : 77]
+(86) BroadcastHashJoin [codegen id : 77]
 Left keys [1]: [ws_item_sk#84]
 Right keys [1]: [ss_item_sk#41]
 Join condition: None
 
-(89) ReusedExchange [Reuses operator id: 59]
+(87) ReusedExchange [Reuses operator id: 57]
 Output [4]: [i_item_sk#88, i_brand_id#89, i_class_id#90, i_category_id#91]
 
-(90) BroadcastHashJoin [codegen id : 77]
+(88) BroadcastHashJoin [codegen id : 77]
 Left keys [1]: [ws_item_sk#84]
 Right keys [1]: [i_item_sk#88]
 Join condition: None
 
-(91) Project [codegen id : 77]
+(89) Project [codegen id : 77]
 Output [6]: [ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87, i_brand_id#89, i_class_id#90, i_category_id#91]
 Input [8]: [ws_item_sk#84, ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87, i_item_sk#88, i_brand_id#89, i_class_id#90, i_category_id#91]
 
-(92) ReusedExchange [Reuses operator id: 156]
+(90) ReusedExchange [Reuses operator id: 154]
 Output [1]: [d_date_sk#92]
 
-(93) BroadcastHashJoin [codegen id : 77]
+(91) BroadcastHashJoin [codegen id : 77]
 Left keys [1]: [ws_sold_date_sk#87]
 Right keys [1]: [d_date_sk#92]
 Join condition: None
 
-(94) Project [codegen id : 77]
+(92) Project [codegen id : 77]
 Output [5]: [ws_quantity#85, ws_list_price#86, i_brand_id#89, i_class_id#90, i_category_id#91]
 Input [7]: [ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87, i_brand_id#89, i_class_id#90, i_category_id#91, d_date_sk#92]
 
-(95) HashAggregate [codegen id : 77]
+(93) HashAggregate [codegen id : 77]
 Input [5]: [ws_quantity#85, ws_list_price#86, i_brand_id#89, i_class_id#90, i_category_id#91]
 Keys [3]: [i_brand_id#89, i_class_id#90, i_category_id#91]
 Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
 Aggregate Attributes [3]: [sum#93, isEmpty#94, count#95]
 Results [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98]
 
-(96) Exchange
+(94) Exchange
 Input [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98]
 Arguments: hashpartitioning(i_brand_id#89, i_class_id#90, i_category_id#91, 5), ENSURE_REQUIREMENTS, [id=#99]
 
-(97) HashAggregate [codegen id : 78]
+(95) HashAggregate [codegen id : 78]
 Input [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98]
 Keys [3]: [i_brand_id#89, i_class_id#90, i_category_id#91]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2))), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2)))#100, count(1)#101]
 Results [6]: [web AS channel#102, i_brand_id#89, i_class_id#90, i_category_id#91, sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2)))#100 AS sales#103, count(1)#101 AS number_sales#104]
 
-(98) Filter [codegen id : 78]
+(96) Filter [codegen id : 78]
 Input [6]: [channel#102, i_brand_id#89, i_class_id#90, i_category_id#91, sales#103, number_sales#104]
 Condition : (isnotnull(sales#103) AND (cast(sales#103 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#61, [id=#62] as decimal(32,6))))
 
-(99) Union
+(97) Union
 
-(100) HashAggregate [codegen id : 79]
+(98) HashAggregate [codegen id : 79]
 Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60]
 Keys [4]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46]
 Functions [2]: [partial_sum(sales#59), partial_sum(number_sales#60)]
 Aggregate Attributes [3]: [sum#105, isEmpty#106, sum#107]
 Results [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110]
 
-(101) Exchange
+(99) Exchange
 Input [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110]
 Arguments: hashpartitioning(channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, 5), ENSURE_REQUIREMENTS, [id=#111]
 
-(102) HashAggregate [codegen id : 80]
+(100) HashAggregate [codegen id : 80]
 Input [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110]
 Keys [4]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46]
 Functions [2]: [sum(sales#59), sum(number_sales#60)]
 Aggregate Attributes [2]: [sum(sales#59)#112, sum(number_sales#60)#113]
 Results [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(sales#59)#112 AS sum_sales#114, sum(number_sales#60)#113 AS number_sales#115]
 
-(103) ReusedExchange [Reuses operator id: 101]
+(101) ReusedExchange [Reuses operator id: 99]
 Output [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110]
 
-(104) HashAggregate [codegen id : 160]
+(102) HashAggregate [codegen id : 160]
 Input [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110]
 Keys [4]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46]
 Functions [2]: [sum(sales#59), sum(number_sales#60)]
 Aggregate Attributes [2]: [sum(sales#59)#112, sum(number_sales#60)#113]
 Results [5]: [channel#58, i_brand_id#44, i_class_id#45, sum(sales#59)#112 AS sum_sales#114, sum(number_sales#60)#113 AS number_sales#115]
 
-(105) HashAggregate [codegen id : 160]
+(103) HashAggregate [codegen id : 160]
 Input [5]: [channel#58, i_brand_id#44, i_class_id#45, sum_sales#114, number_sales#115]
 Keys [3]: [channel#58, i_brand_id#44, i_class_id#45]
 Functions [2]: [partial_sum(sum_sales#114), partial_sum(number_sales#115)]
 Aggregate Attributes [3]: [sum#116, isEmpty#117, sum#118]
 Results [6]: [channel#58, i_brand_id#44, i_class_id#45, sum#119, isEmpty#120, sum#121]
 
-(106) Exchange
+(104) Exchange
 Input [6]: [channel#58, i_brand_id#44, i_class_id#45, sum#119, isEmpty#120, sum#121]
 Arguments: hashpartitioning(channel#58, i_brand_id#44, i_class_id#45, 5), ENSURE_REQUIREMENTS, [id=#122]
 
-(107) HashAggregate [codegen id : 161]
+(105) HashAggregate [codegen id : 161]
 Input [6]: [channel#58, i_brand_id#44, i_class_id#45, sum#119, isEmpty#120, sum#121]
 Keys [3]: [channel#58, i_brand_id#44, i_class_id#45]
 Functions [2]: [sum(sum_sales#114), sum(number_sales#115)]
 Aggregate Attributes [2]: [sum(sum_sales#114)#123, sum(number_sales#115)#124]
 Results [6]: [channel#58, i_brand_id#44, i_class_id#45, null AS i_category_id#125, sum(sum_sales#114)#123 AS sum(sum_sales)#126, sum(number_sales#115)#124 AS sum(number_sales)#127]
 
-(108) ReusedExchange [Reuses operator id: 101]
+(106) ReusedExchange [Reuses operator id: 99]
 Output [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110]
 
-(109) HashAggregate [codegen id : 241]
+(107) HashAggregate [codegen id : 241]
 Input [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110]
 Keys [4]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46]
 Functions [2]: [sum(sales#59), sum(number_sales#60)]
 Aggregate Attributes [2]: [sum(sales#59)#112, sum(number_sales#60)#113]
 Results [4]: [channel#58, i_brand_id#44, sum(sales#59)#112 AS sum_sales#114, sum(number_sales#60)#113 AS number_sales#115]
 
-(110) HashAggregate [codegen id : 241]
+(108) HashAggregate [codegen id : 241]
 Input [4]: [channel#58, i_brand_id#44, sum_sales#114, number_sales#115]
 Keys [2]: [channel#58, i_brand_id#44]
 Functions [2]: [partial_sum(sum_sales#114), partial_sum(number_sales#115)]
 Aggregate Attributes [3]: [sum#128, isEmpty#129, sum#130]
 Results [5]: [channel#58, i_brand_id#44, sum#131, isEmpty#132, sum#133]
 
-(111) Exchange
+(109) Exchange
 Input [5]: [channel#58, i_brand_id#44, sum#131, isEmpty#132, sum#133]
 Arguments: hashpartitioning(channel#58, i_brand_id#44, 5), ENSURE_REQUIREMENTS, [id=#134]
 
-(112) HashAggregate [codegen id : 242]
+(110) HashAggregate [codegen id : 242]
 Input [5]: [channel#58, i_brand_id#44, sum#131, isEmpty#132, sum#133]
 Keys [2]: [channel#58, i_brand_id#44]
 Functions [2]: [sum(sum_sales#114), sum(number_sales#115)]
 Aggregate Attributes [2]: [sum(sum_sales#114)#135, sum(number_sales#115)#136]
 Results [6]: [channel#58, i_brand_id#44, null AS i_class_id#137, null AS i_category_id#138, sum(sum_sales#114)#135 AS sum(sum_sales)#139, sum(number_sales#115)#136 AS sum(number_sales)#140]
 
-(113) ReusedExchange [Reuses operator id: 101]
+(111) ReusedExchange [Reuses operator id: 99]
 Output [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110]
 
-(114) HashAggregate [codegen id : 322]
+(112) HashAggregate [codegen id : 322]
 Input [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110]
 Keys [4]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46]
 Functions [2]: [sum(sales#59), sum(number_sales#60)]
 Aggregate Attributes [2]: [sum(sales#59)#112, sum(number_sales#60)#113]
 Results [3]: [channel#58, sum(sales#59)#112 AS sum_sales#114, sum(number_sales#60)#113 AS number_sales#115]
 
-(115) HashAggregate [codegen id : 322]
+(113) HashAggregate [codegen id : 322]
 Input [3]: [channel#58, sum_sales#114, number_sales#115]
 Keys [1]: [channel#58]
 Functions [2]: [partial_sum(sum_sales#114), partial_sum(number_sales#115)]
 Aggregate Attributes [3]: [sum#141, isEmpty#142, sum#143]
 Results [4]: [channel#58, sum#144, isEmpty#145, sum#146]
 
-(116) Exchange
+(114) Exchange
 Input [4]: [channel#58, sum#144, isEmpty#145, sum#146]
 Arguments: hashpartitioning(channel#58, 5), ENSURE_REQUIREMENTS, [id=#147]
 
-(117) HashAggregate [codegen id : 323]
+(115) HashAggregate [codegen id : 323]
 Input [4]: [channel#58, sum#144, isEmpty#145, sum#146]
 Keys [1]: [channel#58]
 Functions [2]: [sum(sum_sales#114), sum(number_sales#115)]
 Aggregate Attributes [2]: [sum(sum_sales#114)#148, sum(number_sales#115)#149]
 Results [6]: [channel#58, null AS i_brand_id#150, null AS i_class_id#151, null AS i_category_id#152, sum(sum_sales#114)#148 AS sum(sum_sales)#153, sum(number_sales#115)#149 AS sum(number_sales)#154]
 
-(118) ReusedExchange [Reuses operator id: 101]
+(116) ReusedExchange [Reuses operator id: 99]
 Output [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110]
 
-(119) HashAggregate [codegen id : 403]
+(117) HashAggregate [codegen id : 403]
 Input [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110]
 Keys [4]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46]
 Functions [2]: [sum(sales#59), sum(number_sales#60)]
 Aggregate Attributes [2]: [sum(sales#59)#112, sum(number_sales#60)#113]
 Results [2]: [sum(sales#59)#112 AS sum_sales#114, sum(number_sales#60)#113 AS number_sales#115]
 
-(120) HashAggregate [codegen id : 403]
+(118) HashAggregate [codegen id : 403]
 Input [2]: [sum_sales#114, number_sales#115]
 Keys: []
 Functions [2]: [partial_sum(sum_sales#114), partial_sum(number_sales#115)]
 Aggregate Attributes [3]: [sum#155, isEmpty#156, sum#157]
 Results [3]: [sum#158, isEmpty#159, sum#160]
 
-(121) Exchange
+(119) Exchange
 Input [3]: [sum#158, isEmpty#159, sum#160]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#161]
 
-(122) HashAggregate [codegen id : 404]
+(120) HashAggregate [codegen id : 404]
 Input [3]: [sum#158, isEmpty#159, sum#160]
 Keys: []
 Functions [2]: [sum(sum_sales#114), sum(number_sales#115)]
 Aggregate Attributes [2]: [sum(sum_sales#114)#162, sum(number_sales#115)#163]
 Results [6]: [null AS channel#164, null AS i_brand_id#165, null AS i_class_id#166, null AS i_category_id#167, sum(sum_sales#114)#162 AS sum(sum_sales)#168, sum(number_sales#115)#163 AS sum(number_sales)#169]
 
-(123) Union
+(121) Union
 
-(124) HashAggregate [codegen id : 405]
+(122) HashAggregate [codegen id : 405]
 Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115]
 Keys [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115]
 Functions: []
 Aggregate Attributes: []
 Results [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115]
 
-(125) Exchange
+(123) Exchange
 Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115]
 Arguments: hashpartitioning(channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115, 5), ENSURE_REQUIREMENTS, [id=#170]
 
-(126) HashAggregate [codegen id : 406]
+(124) HashAggregate [codegen id : 406]
 Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115]
 Keys [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115]
 Functions: []
 Aggregate Attributes: []
 Results [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115]
 
-(127) TakeOrderedAndProject
+(125) TakeOrderedAndProject
 Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115]
 Arguments: 100, [channel#58 ASC NULLS FIRST, i_brand_id#44 ASC NULLS FIRST, i_class_id#45 ASC NULLS FIRST, i_category_id#46 ASC NULLS FIRST], [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 68 Hosting Expression = Subquery scalar-subquery#61, [id=#62]
-* HashAggregate (146)
-+- Exchange (145)
-   +- * HashAggregate (144)
-      +- Union (143)
-         :- * Project (132)
-         :  +- * BroadcastHashJoin Inner BuildRight (131)
-         :     :- * ColumnarToRow (129)
-         :     :  +- Scan parquet default.store_sales (128)
-         :     +- ReusedExchange (130)
-         :- * Project (137)
-         :  +- * BroadcastHashJoin Inner BuildRight (136)
-         :     :- * ColumnarToRow (134)
-         :     :  +- Scan parquet default.catalog_sales (133)
-         :     +- ReusedExchange (135)
-         +- * Project (142)
-            +- * BroadcastHashJoin Inner BuildRight (141)
-               :- * ColumnarToRow (139)
-               :  +- Scan parquet default.web_sales (138)
-               +- ReusedExchange (140)
-
-
-(128) Scan parquet default.store_sales
+Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquery#61, [id=#62]
+* HashAggregate (144)
++- Exchange (143)
+   +- * HashAggregate (142)
+      +- Union (141)
+         :- * Project (130)
+         :  +- * BroadcastHashJoin Inner BuildRight (129)
+         :     :- * ColumnarToRow (127)
+         :     :  +- Scan parquet default.store_sales (126)
+         :     +- ReusedExchange (128)
+         :- * Project (135)
+         :  +- * BroadcastHashJoin Inner BuildRight (134)
+         :     :- * ColumnarToRow (132)
+         :     :  +- Scan parquet default.catalog_sales (131)
+         :     +- ReusedExchange (133)
+         +- * Project (140)
+            +- * BroadcastHashJoin Inner BuildRight (139)
+               :- * ColumnarToRow (137)
+               :  +- Scan parquet default.web_sales (136)
+               +- ReusedExchange (138)
+
+
+(126) Scan parquet default.store_sales
 Output [3]: [ss_quantity#171, ss_list_price#172, ss_sold_date_sk#173]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(ss_sold_date_sk#173), dynamicpruningexpression(ss_sold_date_sk#173 IN dynamicpruning#12)]
 ReadSchema: struct<ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(129) ColumnarToRow [codegen id : 2]
+(127) ColumnarToRow [codegen id : 2]
 Input [3]: [ss_quantity#171, ss_list_price#172, ss_sold_date_sk#173]
 
-(130) ReusedExchange [Reuses operator id: 161]
+(128) ReusedExchange [Reuses operator id: 159]
 Output [1]: [d_date_sk#174]
 
-(131) BroadcastHashJoin [codegen id : 2]
+(129) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#173]
 Right keys [1]: [d_date_sk#174]
 Join condition: None
 
-(132) Project [codegen id : 2]
+(130) Project [codegen id : 2]
 Output [2]: [ss_quantity#171 AS quantity#175, ss_list_price#172 AS list_price#176]
 Input [4]: [ss_quantity#171, ss_list_price#172, ss_sold_date_sk#173, d_date_sk#174]
 
-(133) Scan parquet default.catalog_sales
+(131) Scan parquet default.catalog_sales
 Output [3]: [cs_quantity#177, cs_list_price#178, cs_sold_date_sk#179]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(cs_sold_date_sk#179), dynamicpruningexpression(cs_sold_date_sk#179 IN dynamicpruning#180)]
 ReadSchema: struct<cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(134) ColumnarToRow [codegen id : 4]
+(132) ColumnarToRow [codegen id : 4]
 Input [3]: [cs_quantity#177, cs_list_price#178, cs_sold_date_sk#179]
 
-(135) ReusedExchange [Reuses operator id: 151]
+(133) ReusedExchange [Reuses operator id: 149]
 Output [1]: [d_date_sk#181]
 
-(136) BroadcastHashJoin [codegen id : 4]
+(134) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#179]
 Right keys [1]: [d_date_sk#181]
 Join condition: None
 
-(137) Project [codegen id : 4]
+(135) Project [codegen id : 4]
 Output [2]: [cs_quantity#177 AS quantity#182, cs_list_price#178 AS list_price#183]
 Input [4]: [cs_quantity#177, cs_list_price#178, cs_sold_date_sk#179, d_date_sk#181]
 
-(138) Scan parquet default.web_sales
+(136) Scan parquet default.web_sales
 Output [3]: [ws_quantity#184, ws_list_price#185, ws_sold_date_sk#186]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(ws_sold_date_sk#186), dynamicpruningexpression(ws_sold_date_sk#186 IN dynamicpruning#180)]
 ReadSchema: struct<ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(139) ColumnarToRow [codegen id : 6]
+(137) ColumnarToRow [codegen id : 6]
 Input [3]: [ws_quantity#184, ws_list_price#185, ws_sold_date_sk#186]
 
-(140) ReusedExchange [Reuses operator id: 151]
+(138) ReusedExchange [Reuses operator id: 149]
 Output [1]: [d_date_sk#187]
 
-(141) BroadcastHashJoin [codegen id : 6]
+(139) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_sold_date_sk#186]
 Right keys [1]: [d_date_sk#187]
 Join condition: None
 
-(142) Project [codegen id : 6]
+(140) Project [codegen id : 6]
 Output [2]: [ws_quantity#184 AS quantity#188, ws_list_price#185 AS list_price#189]
 Input [4]: [ws_quantity#184, ws_list_price#185, ws_sold_date_sk#186, d_date_sk#187]
 
-(143) Union
+(141) Union
 
-(144) HashAggregate [codegen id : 7]
+(142) HashAggregate [codegen id : 7]
 Input [2]: [quantity#175, list_price#176]
 Keys: []
 Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [2]: [sum#190, count#191]
 Results [2]: [sum#192, count#193]
 
-(145) Exchange
+(143) Exchange
 Input [2]: [sum#192, count#193]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#194]
 
-(146) HashAggregate [codegen id : 8]
+(144) HashAggregate [codegen id : 8]
 Input [2]: [sum#192, count#193]
 Keys: []
 Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2)))]
 Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2)))#195]
 Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2)))#195 AS average_sales#196]
 
-Subquery:2 Hosting operator id = 128 Hosting Expression = ss_sold_date_sk#173 IN dynamicpruning#12
+Subquery:2 Hosting operator id = 126 Hosting Expression = ss_sold_date_sk#173 IN dynamicpruning#12
 
-Subquery:3 Hosting operator id = 133 Hosting Expression = cs_sold_date_sk#179 IN dynamicpruning#180
-BroadcastExchange (151)
-+- * Project (150)
-   +- * Filter (149)
-      +- * ColumnarToRow (148)
-         +- Scan parquet default.date_dim (147)
+Subquery:3 Hosting operator id = 131 Hosting Expression = cs_sold_date_sk#179 IN dynamicpruning#180
+BroadcastExchange (149)
++- * Project (148)
+   +- * Filter (147)
+      +- * ColumnarToRow (146)
+         +- Scan parquet default.date_dim (145)
 
 
-(147) Scan parquet default.date_dim
+(145) Scan parquet default.date_dim
 Output [2]: [d_date_sk#181, d_year#197]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(148) ColumnarToRow [codegen id : 1]
+(146) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#181, d_year#197]
 
-(149) Filter [codegen id : 1]
+(147) Filter [codegen id : 1]
 Input [2]: [d_date_sk#181, d_year#197]
 Condition : (((isnotnull(d_year#197) AND (d_year#197 >= 1998)) AND (d_year#197 <= 2000)) AND isnotnull(d_date_sk#181))
 
-(150) Project [codegen id : 1]
+(148) Project [codegen id : 1]
 Output [1]: [d_date_sk#181]
 Input [2]: [d_date_sk#181, d_year#197]
 
-(151) BroadcastExchange
+(149) BroadcastExchange
 Input [1]: [d_date_sk#181]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#198]
 
-Subquery:4 Hosting operator id = 138 Hosting Expression = ws_sold_date_sk#186 IN dynamicpruning#180
+Subquery:4 Hosting operator id = 136 Hosting Expression = ws_sold_date_sk#186 IN dynamicpruning#180
 
 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (156)
-+- * Project (155)
-   +- * Filter (154)
-      +- * ColumnarToRow (153)
-         +- Scan parquet default.date_dim (152)
+BroadcastExchange (154)
++- * Project (153)
+   +- * Filter (152)
+      +- * ColumnarToRow (151)
+         +- Scan parquet default.date_dim (150)
 
 
-(152) Scan parquet default.date_dim
+(150) Scan parquet default.date_dim
 Output [3]: [d_date_sk#48, d_year#199, d_moy#200]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,11), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(153) ColumnarToRow [codegen id : 1]
+(151) ColumnarToRow [codegen id : 1]
 Input [3]: [d_date_sk#48, d_year#199, d_moy#200]
 
-(154) Filter [codegen id : 1]
+(152) Filter [codegen id : 1]
 Input [3]: [d_date_sk#48, d_year#199, d_moy#200]
 Condition : ((((isnotnull(d_year#199) AND isnotnull(d_moy#200)) AND (d_year#199 = 2000)) AND (d_moy#200 = 11)) AND isnotnull(d_date_sk#48))
 
-(155) Project [codegen id : 1]
+(153) Project [codegen id : 1]
 Output [1]: [d_date_sk#48]
 Input [3]: [d_date_sk#48, d_year#199, d_moy#200]
 
-(156) BroadcastExchange
+(154) BroadcastExchange
 Input [1]: [d_date_sk#48]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#201]
 
 Subquery:6 Hosting operator id = 7 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12
-BroadcastExchange (161)
-+- * Project (160)
-   +- * Filter (159)
-      +- * ColumnarToRow (158)
-         +- Scan parquet default.date_dim (157)
+BroadcastExchange (159)
++- * Project (158)
+   +- * Filter (157)
+      +- * ColumnarToRow (156)
+         +- Scan parquet default.date_dim (155)
 
 
-(157) Scan parquet default.date_dim
+(155) Scan parquet default.date_dim
 Output [2]: [d_date_sk#27, d_year#202]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(158) ColumnarToRow [codegen id : 1]
+(156) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#27, d_year#202]
 
-(159) Filter [codegen id : 1]
+(157) Filter [codegen id : 1]
 Input [2]: [d_date_sk#27, d_year#202]
 Condition : (((isnotnull(d_year#202) AND (d_year#202 >= 1999)) AND (d_year#202 <= 2001)) AND isnotnull(d_date_sk#27))
 
-(160) Project [codegen id : 1]
+(158) Project [codegen id : 1]
 Output [1]: [d_date_sk#27]
 Input [2]: [d_date_sk#27, d_year#202]
 
-(161) BroadcastExchange
+(159) BroadcastExchange
 Input [1]: [d_date_sk#27]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#203]
 
@@ -947,12 +931,12 @@ Subquery:7 Hosting operator id = 13 Hosting Expression = cs_sold_date_sk#18 IN d
 
 Subquery:8 Hosting operator id = 36 Hosting Expression = ws_sold_date_sk#33 IN dynamicpruning#12
 
-Subquery:9 Hosting operator id = 83 Hosting Expression = ReusedSubquery Subquery scalar-subquery#61, [id=#62]
+Subquery:9 Hosting operator id = 81 Hosting Expression = ReusedSubquery Subquery scalar-subquery#61, [id=#62]
 
-Subquery:10 Hosting operator id = 69 Hosting Expression = cs_sold_date_sk#66 IN dynamicpruning#5
+Subquery:10 Hosting operator id = 67 Hosting Expression = cs_sold_date_sk#66 IN dynamicpruning#5
 
-Subquery:11 Hosting operator id = 98 Hosting Expression = ReusedSubquery Subquery scalar-subquery#61, [id=#62]
+Subquery:11 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#61, [id=#62]
 
-Subquery:12 Hosting operator id = 84 Hosting Expression = ws_sold_date_sk#87 IN dynamicpruning#5
+Subquery:12 Hosting operator id = 82 Hosting Expression = ws_sold_date_sk#87 IN dynamicpruning#5
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt
index f800a80a4e636..086c36864ebdb 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt
@@ -94,77 +94,75 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                     InputAdapter
                                                                       BroadcastExchange #6
                                                                         WholeStageCodegen (10)
-                                                                          HashAggregate [brand_id,class_id,category_id]
+                                                                          BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
                                                                             HashAggregate [brand_id,class_id,category_id]
-                                                                              BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
-                                                                                HashAggregate [brand_id,class_id,category_id]
-                                                                                  InputAdapter
-                                                                                    Exchange [brand_id,class_id,category_id] #7
-                                                                                      WholeStageCodegen (6)
-                                                                                        HashAggregate [brand_id,class_id,category_id]
-                                                                                          Project [i_brand_id,i_class_id,i_category_id]
-                                                                                            BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                              Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                                                  Filter [ss_item_sk]
-                                                                                                    ColumnarToRow
-                                                                                                      InputAdapter
-                                                                                                        Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
-                                                                                                          SubqueryBroadcast [d_date_sk] #2
-                                                                                                            BroadcastExchange #8
-                                                                                                              WholeStageCodegen (1)
-                                                                                                                Project [d_date_sk]
-                                                                                                                  Filter [d_year,d_date_sk]
-                                                                                                                    ColumnarToRow
-                                                                                                                      InputAdapter
-                                                                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                                                  InputAdapter
-                                                                                                    BroadcastExchange #9
-                                                                                                      WholeStageCodegen (4)
-                                                                                                        BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
-                                                                                                          Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                            ColumnarToRow
-                                                                                                              InputAdapter
-                                                                                                                Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                          InputAdapter
-                                                                                                            BroadcastExchange #10
-                                                                                                              WholeStageCodegen (3)
-                                                                                                                Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                                  BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                                                    Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                                      BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                                                        Filter [cs_item_sk]
-                                                                                                                          ColumnarToRow
-                                                                                                                            InputAdapter
-                                                                                                                              Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
-                                                                                                                                ReusedSubquery [d_date_sk] #2
-                                                                                                                        InputAdapter
-                                                                                                                          BroadcastExchange #11
-                                                                                                                            WholeStageCodegen (1)
-                                                                                                                              Filter [i_item_sk]
-                                                                                                                                ColumnarToRow
-                                                                                                                                  InputAdapter
-                                                                                                                                    Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                                    InputAdapter
-                                                                                                                      ReusedExchange [d_date_sk] #8
-                                                                                              InputAdapter
-                                                                                                ReusedExchange [d_date_sk] #8
-                                                                                InputAdapter
-                                                                                  BroadcastExchange #12
-                                                                                    WholeStageCodegen (9)
+                                                                              InputAdapter
+                                                                                Exchange [brand_id,class_id,category_id] #7
+                                                                                  WholeStageCodegen (6)
+                                                                                    HashAggregate [brand_id,class_id,category_id]
                                                                                       Project [i_brand_id,i_class_id,i_category_id]
-                                                                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                                          Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                            BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                                              Filter [ws_item_sk]
+                                                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                                          Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                            BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                                              Filter [ss_item_sk]
                                                                                                 ColumnarToRow
                                                                                                   InputAdapter
-                                                                                                    Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
-                                                                                                      ReusedSubquery [d_date_sk] #2
+                                                                                                    Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                                                      SubqueryBroadcast [d_date_sk] #2
+                                                                                                        BroadcastExchange #8
+                                                                                                          WholeStageCodegen (1)
+                                                                                                            Project [d_date_sk]
+                                                                                                              Filter [d_year,d_date_sk]
+                                                                                                                ColumnarToRow
+                                                                                                                  InputAdapter
+                                                                                                                    Scan parquet default.date_dim [d_date_sk,d_year]
                                                                                               InputAdapter
-                                                                                                ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #11
+                                                                                                BroadcastExchange #9
+                                                                                                  WholeStageCodegen (4)
+                                                                                                    BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
+                                                                                                      Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                        ColumnarToRow
+                                                                                                          InputAdapter
+                                                                                                            Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                      InputAdapter
+                                                                                                        BroadcastExchange #10
+                                                                                                          WholeStageCodegen (3)
+                                                                                                            Project [i_brand_id,i_class_id,i_category_id]
+                                                                                                              BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                                                                Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                                  BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                                                                    Filter [cs_item_sk]
+                                                                                                                      ColumnarToRow
+                                                                                                                        InputAdapter
+                                                                                                                          Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
+                                                                                                                            ReusedSubquery [d_date_sk] #2
+                                                                                                                    InputAdapter
+                                                                                                                      BroadcastExchange #11
+                                                                                                                        WholeStageCodegen (1)
+                                                                                                                          Filter [i_item_sk]
+                                                                                                                            ColumnarToRow
+                                                                                                                              InputAdapter
+                                                                                                                                Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                                InputAdapter
+                                                                                                                  ReusedExchange [d_date_sk] #8
                                                                                           InputAdapter
                                                                                             ReusedExchange [d_date_sk] #8
+                                                                            InputAdapter
+                                                                              BroadcastExchange #12
+                                                                                WholeStageCodegen (9)
+                                                                                  Project [i_brand_id,i_class_id,i_category_id]
+                                                                                    BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                                      Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                        BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                                                          Filter [ws_item_sk]
+                                                                                            ColumnarToRow
+                                                                                              InputAdapter
+                                                                                                Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
+                                                                                                  ReusedSubquery [d_date_sk] #2
+                                                                                          InputAdapter
+                                                                                            ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #11
+                                                                                      InputAdapter
+                                                                                        ReusedExchange [d_date_sk] #8
                                                         InputAdapter
                                                           BroadcastExchange #13
                                                             WholeStageCodegen (23)

From f6c463453b80178bc172ff6df9decd688e1e1101 Mon Sep 17 00:00:00 2001
From: pralabhkumar <pralabhkumar@gmail.com>
Date: Mon, 14 Mar 2022 10:32:47 -0700
Subject: [PATCH 494/513] [SPARK-37491][PYTHON] Fix Series.asof for unsorted
 values

### What changes were proposed in this pull request?
Fix Series.asof when values of the series is not sorted

####  Before
```python
import pandas as pd
from pyspark import pandas as ps
import numpy as np
pser = pd.Series([2, 1, np.nan, 4], index=[10, 20, 30, 40], name="Koalas")
psser = ps.from_pandas(pser)
psser.asof([5, 25])
5     NaN
25    2.0
Name: Koalas, dtype: float64

pser = pd.Series([4, np.nan, np.nan, 2], index=[10, 20, 30, 40], name="Koalas")
psser = ps.from_pandas(pser)
psser.asof([5, 100])

5      NaN
100    4.0
```

#### After

```python
import pandas as pd
from pyspark import pandas as ps
import numpy as np
pser = pd.Series([2, 1, np.nan, 4], index=[10, 20, 30, 40], name="Koalas")
psser = ps.from_pandas(pser)
psser.asof([5, 25])
5     NaN
25    1.0
Name: Koalas, dtype: float64

pser = pd.Series([4, np.nan, np.nan, 2], index=[10, 20, 30, 40], name="Koalas")
psser = ps.from_pandas(pser)
psser.asof([5, 100])
5      NaN
100    2.0
```

### Why are the changes needed?
There is a bug in ps.as_of, when the series is not sorted

### Does this PR introduce any user-facing change?
Yes user will be able to see the behavior exactly matching to pandas

### How was this patch tested?
unit tests

Closes #35191 from pralabhkumar/rk_spark_asof_series.

Lead-authored-by: pralabhkumar <pralabhkumar@gmail.com>
Co-authored-by: Kumar, Pralabh <pralabhkumar@gmail.com>
Signed-off-by: Takuya UESHIN <ueshin@databricks.com>
---
 python/pyspark/pandas/series.py            | 42 ++++++++++++++++++----
 python/pyspark/pandas/tests/test_series.py | 42 ++++++++++++++++++++++
 2 files changed, 77 insertions(+), 7 deletions(-)

diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index e6f68678317eb..705ba599e2788 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -66,6 +66,7 @@
     NumericType,
     Row,
     StructType,
+    TimestampType,
 )
 from pyspark.sql.window import Window
 
@@ -5270,13 +5271,33 @@ def asof(self, where: Union[Any, List]) -> Union[Scalar, "Series"]:
         if not is_list_like(where):
             should_return_series = False
             where = [where]
-        index_scol = self._internal.index_spark_columns[0]
-        index_type = self._internal.spark_type_for(index_scol)
+        internal = self._internal.resolved_copy
+        index_scol = internal.index_spark_columns[0]
+        index_type = internal.spark_type_for(index_scol)
+        spark_column = internal.data_spark_columns[0]
+        monotonically_increasing_id_column = verify_temp_column_name(
+            internal.spark_frame, "__monotonically_increasing_id__"
+        )
         cond = [
-            F.max(F.when(index_scol <= SF.lit(index).cast(index_type), self.spark.column))
+            F.max_by(
+                spark_column,
+                F.when(
+                    (index_scol <= SF.lit(index).cast(index_type)) & spark_column.isNotNull()
+                    if pd.notna(index)
+                    # If index is nan and the value of the col is not null
+                    # then return monotonically_increasing_id .This will let max by
+                    # to return last index value , which is the behaviour of pandas
+                    else spark_column.isNotNull(),
+                    monotonically_increasing_id_column,
+                ),
+            )
             for index in where
         ]
-        sdf = self._internal.spark_frame.select(cond)
+
+        sdf = internal.spark_frame.withColumn(
+            monotonically_increasing_id_column, F.monotonically_increasing_id()
+        ).select(cond)
+
         if not should_return_series:
             with sql_conf({SPARK_CONF_ARROW_ENABLED: False}):
                 # Disable Arrow to keep row ordering.
@@ -5285,9 +5306,16 @@ def asof(self, where: Union[Any, List]) -> Union[Scalar, "Series"]:
 
         # The data is expected to be small so it's fine to transpose/use default index.
         with ps.option_context("compute.default_index_type", "distributed", "compute.max_rows", 1):
-            psdf: DataFrame = DataFrame(sdf)
-            psdf.columns = pd.Index(where)
-            return first_series(psdf.transpose()).rename(self.name)
+            if len(where) == len(set(where)) and not isinstance(index_type, TimestampType):
+                psdf: DataFrame = DataFrame(sdf)
+                psdf.columns = pd.Index(where)
+                return first_series(psdf.transpose()).rename(self.name)
+            else:
+                # If `where` has duplicate items, leverage the pandas directly
+                # since pandas API on Spark doesn't support the duplicate column name.
+                pdf: pd.DataFrame = sdf.limit(1).toPandas()
+                pdf.columns = pd.Index(where)
+                return first_series(DataFrame(pdf.transpose())).rename(self.name)
 
     def mad(self) -> float:
         """
diff --git a/python/pyspark/pandas/tests/test_series.py b/python/pyspark/pandas/tests/test_series.py
index eeadb060c6a3e..4cfd7c63e312d 100644
--- a/python/pyspark/pandas/tests/test_series.py
+++ b/python/pyspark/pandas/tests/test_series.py
@@ -2111,6 +2111,48 @@ def test_asof(self):
         with ps.option_context("compute.eager_check", False):
             self.assert_eq(psser.asof(20), 4.0)
 
+        pser = pd.Series([2, 1, np.nan, 4], index=[10, 20, 30, 40], name="Koalas")
+        psser = ps.from_pandas(pser)
+        self.assert_eq(psser.asof([5, 20]), pser.asof([5, 20]))
+
+        pser = pd.Series([4, np.nan, np.nan, 2], index=[10, 20, 30, 40], name="Koalas")
+        psser = ps.from_pandas(pser)
+        self.assert_eq(psser.asof([5, 100]), pser.asof([5, 100]))
+
+        pser = pd.Series([np.nan, 4, 1, 2], index=[10, 20, 30, 40], name="Koalas")
+        psser = ps.from_pandas(pser)
+        self.assert_eq(psser.asof([5, 35]), pser.asof([5, 35]))
+
+        pser = pd.Series([2, 1, np.nan, 4], index=[10, 20, 30, 40], name="Koalas")
+        psser = ps.from_pandas(pser)
+        self.assert_eq(psser.asof([25, 25]), pser.asof([25, 25]))
+
+        pser = pd.Series([2, 1, np.nan, 4], index=["a", "b", "c", "d"], name="Koalas")
+        psser = ps.from_pandas(pser)
+        self.assert_eq(psser.asof(["a", "d"]), pser.asof(["a", "d"]))
+
+        pser = pd.Series(
+            [2, 1, np.nan, 4],
+            index=[
+                pd.Timestamp(2020, 1, 1),
+                pd.Timestamp(2020, 2, 2),
+                pd.Timestamp(2020, 3, 3),
+                pd.Timestamp(2020, 4, 4),
+            ],
+            name="Koalas",
+        )
+        psser = ps.from_pandas(pser)
+        self.assert_eq(
+            psser.asof([pd.Timestamp(2020, 1, 1)]),
+            pser.asof([pd.Timestamp(2020, 1, 1)]),
+        )
+
+        pser = pd.Series([2, np.nan, 1, 4], index=[10, 20, 30, 40], name="Koalas")
+        psser = ps.from_pandas(pser)
+        self.assert_eq(psser.asof(np.nan), pser.asof(np.nan))
+        self.assert_eq(psser.asof([np.nan, np.nan]), pser.asof([np.nan, np.nan]))
+        self.assert_eq(psser.asof([10, np.nan]), pser.asof([10, np.nan]))
+
     def test_squeeze(self):
         # Single value
         pser = pd.Series([90])

From a30575e91e82e9f4394add40f1ba9265eb1f7819 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Mon, 14 Mar 2022 12:49:11 -0700
Subject: [PATCH 495/513] [SPARK-38544][BUILD] Upgrade log4j2 to 2.17.2

### What changes were proposed in this pull request?
This pr aims to upgrade log4j2 to 2.17.2.

### Why are the changes needed?
This version brings a lot of fixes released to log1.x support, the release notes and change report as follows:

- https://logging.apache.org/log4j/2.x/index.html#News
- https://logging.apache.org/log4j/2.x/changes-report.html#a2.17.2

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes #35839 from LuciferYang/log4j2-2172.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2-hive-2.3 | 8 ++++----
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 8 ++++----
 pom.xml                               | 2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index bcbf8b9908ae5..d87f8034a8b42 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -187,10 +187,10 @@ lapack/2.2.1//lapack-2.2.1.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
 libthrift/0.12.0//libthrift-0.12.0.jar
-log4j-1.2-api/2.17.1//log4j-1.2-api-2.17.1.jar
-log4j-api/2.17.1//log4j-api-2.17.1.jar
-log4j-core/2.17.1//log4j-core-2.17.1.jar
-log4j-slf4j-impl/2.17.1//log4j-slf4j-impl-2.17.1.jar
+log4j-1.2-api/2.17.2//log4j-1.2-api-2.17.2.jar
+log4j-api/2.17.2//log4j-api-2.17.2.jar
+log4j-core/2.17.2//log4j-core-2.17.2.jar
+log4j-slf4j-impl/2.17.2//log4j-slf4j-impl-2.17.2.jar
 logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
 lz4-java/1.8.0//lz4-java-1.8.0.jar
 mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 8ca7880c7a34d..d92152666a2bd 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -172,10 +172,10 @@ lapack/2.2.1//lapack-2.2.1.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
 libthrift/0.12.0//libthrift-0.12.0.jar
-log4j-1.2-api/2.17.1//log4j-1.2-api-2.17.1.jar
-log4j-api/2.17.1//log4j-api-2.17.1.jar
-log4j-core/2.17.1//log4j-core-2.17.1.jar
-log4j-slf4j-impl/2.17.1//log4j-slf4j-impl-2.17.1.jar
+log4j-1.2-api/2.17.2//log4j-1.2-api-2.17.2.jar
+log4j-api/2.17.2//log4j-api-2.17.2.jar
+log4j-core/2.17.2//log4j-core-2.17.2.jar
+log4j-slf4j-impl/2.17.2//log4j-slf4j-impl-2.17.2.jar
 logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
 lz4-java/1.8.0//lz4-java-1.8.0.jar
 mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar
diff --git a/pom.xml b/pom.xml
index d8b5b87c7d97b..a751bdd3462fe 100644
--- a/pom.xml
+++ b/pom.xml
@@ -119,7 +119,7 @@
     <exec-maven-plugin.version>1.6.0</exec-maven-plugin.version>
     <sbt.project.name>spark</sbt.project.name>
     <slf4j.version>1.7.32</slf4j.version>
-    <log4j.version>2.17.1</log4j.version>
+    <log4j.version>2.17.2</log4j.version>
     <!-- make sure to update IsolatedClientLoader whenever this version is changed -->
     <hadoop.version>3.3.2</hadoop.version>
     <protobuf.version>2.5.0</protobuf.version>

From 1d4e917d5c36ed53d5d97d28e2a16cb2f9a3b81c Mon Sep 17 00:00:00 2001
From: jackylee-ch <lijunqing@baidu.com>
Date: Mon, 14 Mar 2022 20:17:38 -0700
Subject: [PATCH 496/513] [SPARK-38521][SQL] Change `partitionOverwriteMode`
 from string to variable in Scala

### Why are the changes needed?
We use `partitionOverwriteMode` in several places, but there is no unified variable definition. This PR is to change all this related modifications to a unified `DataSourceUtils. PARTITION_OVERWRITE_MODE` definition.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Origin UT

Closes #35843 from jackylee-ch/change_parameter_string.

Authored-by: jackylee-ch <lijunqing@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/execution/datasources/DataSourceUtils.scala  | 6 ++++++
 .../datasources/InsertIntoHadoopFsRelationCommand.scala    | 2 +-
 .../scala/org/apache/spark/sql/sources/InsertSuite.scala   | 7 +++++--
 .../org/apache/spark/sql/hive/HiveMetastoreCatalog.scala   | 4 +++-
 4 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
index 6ceb44ab15020..15d40a78f2346 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
@@ -47,6 +47,12 @@ object DataSourceUtils extends PredicateHelper {
    */
   val PARTITIONING_COLUMNS_KEY = "__partition_columns"
 
+  /**
+   * The key to use for specifying partition overwrite mode when
+   * INSERT OVERWRITE a partitioned data source table.
+   */
+  val PARTITION_OVERWRITE_MODE = "partitionOverwriteMode"
+
   /**
    * Utility methods for converting partitionBy columns to options and back.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
index 267b360b474ca..74be483cd7c37 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -62,7 +62,7 @@ case class InsertIntoHadoopFsRelationCommand(
   private lazy val parameters = CaseInsensitiveMap(options)
 
   private[sql] lazy val dynamicPartitionOverwrite: Boolean = {
-    val partitionOverwriteMode = parameters.get("partitionOverwriteMode")
+    val partitionOverwriteMode = parameters.get(DataSourceUtils.PARTITION_OVERWRITE_MODE)
       // scalastyle:off caselocale
       .map(mode => PartitionOverwriteMode.withName(mode.toUpperCase))
       // scalastyle:on caselocale
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index b553e6ed566b5..1fb4737c45a61 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.execution.datasources.DataSourceUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode
 import org.apache.spark.sql.test.SharedSparkSession
@@ -807,13 +808,15 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
 
       Seq((1, 2), (1, 3)).toDF("i", "part")
         .write.partitionBy("part").mode("overwrite")
-        .option("partitionOverwriteMode", "dynamic").parquet(path.getAbsolutePath)
+        .option(DataSourceUtils.PARTITION_OVERWRITE_MODE, PartitionOverwriteMode.DYNAMIC.toString)
+        .parquet(path.getAbsolutePath)
       checkAnswer(spark.read.parquet(path.getAbsolutePath),
         Row(1, 1) :: Row(1, 2) :: Row(1, 3) :: Nil)
 
       Seq((1, 2), (1, 3)).toDF("i", "part")
         .write.partitionBy("part").mode("overwrite")
-        .option("partitionOverwriteMode", "static").parquet(path.getAbsolutePath)
+        .option(DataSourceUtils.PARTITION_OVERWRITE_MODE, PartitionOverwriteMode.STATIC.toString)
+        .parquet(path.getAbsolutePath)
       checkAnswer(spark.read.parquet(path.getAbsolutePath), Row(1, 2) :: Row(1, 3) :: Nil)
     }
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index b6f06f5989d2f..12b570e818650 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -34,6 +34,7 @@ import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, ParquetOptions}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.HiveCaseSensitiveInferenceMode._
+import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode
 import org.apache.spark.sql.types._
 
 /**
@@ -248,7 +249,8 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
 
           // Spark SQL's data source table now support static and dynamic partition insert. Source
           // table converted from Hive table should always use dynamic.
-          val enableDynamicPartition = hiveOptions.updated("partitionOverwriteMode", "dynamic")
+          val enableDynamicPartition = hiveOptions.updated(DataSourceUtils.PARTITION_OVERWRITE_MODE,
+            PartitionOverwriteMode.DYNAMIC.toString)
           val fsRelation = HadoopFsRelation(
             location = fileIndex,
             partitionSchema = partitionSchema,

From 8b5ec779ecf05580c0b911934175fb1cd342d2b8 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 14 Mar 2022 21:28:41 -0700
Subject: [PATCH 497/513] [SPARK-38549][SS] Add `numRowsDroppedByWatermark` to
 `SessionWindowStateStoreRestoreExec`

### What changes were proposed in this pull request?

This patch adds `numRowsDroppedByWatermark` metric to `SessionWindowStateStoreRestoreExec` operator.

### Why are the changes needed?

`SessionWindowStateStoreRestoreExec` filters out outdated inputs by watermark but it doesn't provide corresponding `numRowsDroppedByWatermark` metric. It makes users confused about stream query result.

### Does this PR introduce _any_ user-facing change?

Yes, adding a metric.

### How was this patch tested?

Existing tests.

Closes #35854 from viirya/watermark_metric.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 .../sql/execution/streaming/statefulOperators.scala  | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
index 45c6430f96423..df44714ee5270 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
@@ -535,6 +535,12 @@ case class SessionWindowStateStoreRestoreExec(
     child: SparkPlan)
   extends UnaryExecNode with StateStoreReader with WatermarkSupport {
 
+  override lazy val metrics = Map(
+    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
+    "numRowsDroppedByWatermark" -> SQLMetrics.createMetric(sparkContext,
+      "number of rows which are dropped by watermark")
+  )
+
   override def keyExpressions: Seq[Attribute] = keyWithoutSessionExpressions
 
   assert(keyExpressions.nonEmpty, "Grouping key must be specified when using sessionWindow")
@@ -555,7 +561,11 @@ case class SessionWindowStateStoreRestoreExec(
 
       // We need to filter out outdated inputs
       val filteredIterator = watermarkPredicateForData match {
-        case Some(predicate) => iter.filter((row: InternalRow) => !predicate.eval(row))
+        case Some(predicate) => iter.filter((row: InternalRow) => {
+          val shouldKeep = !predicate.eval(row)
+          if (!shouldKeep) longMetric("numRowsDroppedByWatermark") += 1
+          shouldKeep
+        })
         case None => iter
       }
 

From f17f07860e9d38ff99e74af2181d4608953204f0 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Mon, 14 Mar 2022 22:28:57 -0700
Subject: [PATCH 498/513] [SPARK-38513][K8S][FOLLWUP] Cleanup
 executor-podgroup-template.yml

### What changes were proposed in this pull request?
Cleanup executor-podgroup-template.yml

### Why are the changes needed?
A follwup cleanup for https://github.com/apache/spark/pull/35809

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
UT passed

Closes #35857 from Yikun/SPARK-38551.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../resources/executor-podgroup-template.yml  | 25 -------------------
 1 file changed, 25 deletions(-)
 delete mode 100644 resource-managers/kubernetes/core/src/test/resources/executor-podgroup-template.yml

diff --git a/resource-managers/kubernetes/core/src/test/resources/executor-podgroup-template.yml b/resource-managers/kubernetes/core/src/test/resources/executor-podgroup-template.yml
deleted file mode 100644
index f0f7b35f191a1..0000000000000
--- a/resource-managers/kubernetes/core/src/test/resources/executor-podgroup-template.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-apiVersion: scheduling.volcano.sh/v1beta1
-kind: PodGroup
-spec:
-  minMember: 1000
-  minResources:
-    cpu: "4"
-    memory: "16Gi"
-  priorityClassName: executor-priority
-  queue: executor-queue

From 58c21e59288a1078fcdd59fb7444172637d55b49 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 15 Mar 2022 00:02:41 -0700
Subject: [PATCH 499/513] [SPARK-38527][K8S][DOCS][FOLLOWUP] Use v1.5.0 tag
 instead of release-1.5

### What changes were proposed in this pull request?

This PR uses the `v1.5.0` tag instead of the branch name.

### Why are the changes needed?

It was a mistake to use `branch` name. It should be an immutable tag.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual review.

Closes #35859 from dongjoon-hyun/SPARK-38527-2.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 resource-managers/kubernetes/integration-tests/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md
index 93dbc18554ce5..265962dab518e 100644
--- a/resource-managers/kubernetes/integration-tests/README.md
+++ b/resource-managers/kubernetes/integration-tests/README.md
@@ -317,10 +317,10 @@ Volcano integration is experimental in Aapche Spark 3.3.0 and the test coverage
 ## Installation
 
     # x86_64
-    kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/release-1.5/installer/volcano-development.yaml
+    kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.5.0/installer/volcano-development.yaml
 
     # arm64:
-    kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/release-1.5/installer/volcano-development-arm64.yaml
+    kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.5.0/installer/volcano-development-arm64.yaml
 
 ## Run tests
 

From 2a63fea139ec7e5d0883ccd5b33080bdad21e009 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Tue, 15 Mar 2022 21:47:57 +0800
Subject: [PATCH 500/513] Revert "[SPARK-38544][BUILD] Upgrade log4j2 to
 2.17.2"

This reverts commit a30575e
---
 dev/deps/spark-deps-hadoop-2-hive-2.3 | 8 ++++----
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 8 ++++----
 pom.xml                               | 2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index d87f8034a8b42..bcbf8b9908ae5 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -187,10 +187,10 @@ lapack/2.2.1//lapack-2.2.1.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
 libthrift/0.12.0//libthrift-0.12.0.jar
-log4j-1.2-api/2.17.2//log4j-1.2-api-2.17.2.jar
-log4j-api/2.17.2//log4j-api-2.17.2.jar
-log4j-core/2.17.2//log4j-core-2.17.2.jar
-log4j-slf4j-impl/2.17.2//log4j-slf4j-impl-2.17.2.jar
+log4j-1.2-api/2.17.1//log4j-1.2-api-2.17.1.jar
+log4j-api/2.17.1//log4j-api-2.17.1.jar
+log4j-core/2.17.1//log4j-core-2.17.1.jar
+log4j-slf4j-impl/2.17.1//log4j-slf4j-impl-2.17.1.jar
 logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
 lz4-java/1.8.0//lz4-java-1.8.0.jar
 mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index d92152666a2bd..8ca7880c7a34d 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -172,10 +172,10 @@ lapack/2.2.1//lapack-2.2.1.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
 libthrift/0.12.0//libthrift-0.12.0.jar
-log4j-1.2-api/2.17.2//log4j-1.2-api-2.17.2.jar
-log4j-api/2.17.2//log4j-api-2.17.2.jar
-log4j-core/2.17.2//log4j-core-2.17.2.jar
-log4j-slf4j-impl/2.17.2//log4j-slf4j-impl-2.17.2.jar
+log4j-1.2-api/2.17.1//log4j-1.2-api-2.17.1.jar
+log4j-api/2.17.1//log4j-api-2.17.1.jar
+log4j-core/2.17.1//log4j-core-2.17.1.jar
+log4j-slf4j-impl/2.17.1//log4j-slf4j-impl-2.17.1.jar
 logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
 lz4-java/1.8.0//lz4-java-1.8.0.jar
 mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar
diff --git a/pom.xml b/pom.xml
index a751bdd3462fe..d8b5b87c7d97b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -119,7 +119,7 @@
     <exec-maven-plugin.version>1.6.0</exec-maven-plugin.version>
     <sbt.project.name>spark</sbt.project.name>
     <slf4j.version>1.7.32</slf4j.version>
-    <log4j.version>2.17.2</log4j.version>
+    <log4j.version>2.17.1</log4j.version>
     <!-- make sure to update IsolatedClientLoader whenever this version is changed -->
     <hadoop.version>3.3.2</hadoop.version>
     <protobuf.version>2.5.0</protobuf.version>

From c00942dc3ce3389cbbcb9a4b1bbf4cfecf28965c Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Tue, 15 Mar 2022 09:18:35 -0700
Subject: [PATCH 501/513] [SPARK-38524][SPARK-38553][K8S] Bump `Volcano` to
 v1.5.1 and fix Volcano `weight` to be positive integer and use `cpu`
 capability instead

### What changes were proposed in this pull request?
- Bump Volcano to v1.5.1: https://github.com/volcano-sh/volcano/issues/2090
- Use capability to limit disable queue

### Why are the changes needed?
In Volcano, weight should be [a positive integer](https://github.com/volcano-sh/volcano/blob/c5bed286211af4f9d94503968c1c6b06baa882d1/pkg/webhooks/admission/queues/validate/validate_queue.go#L188), so weight 0 is a wrong usage. As [description](https://volcano.sh/en/docs/queue/) for queue
- weight is a soft constraint.
- capability is a hard constraint.

We better to use capability to limit disable queue. This also fix the error `requestBody.spec.weight: Invalid value: 0: queue weight must be a positive integer` when running latest volcano image.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
```
# arm64
kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.5.1/installer/volcano-development-arm64.yaml

# x86
kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.5.1/installer/volcano-development.yaml

build/sbt -Pvolcano -Pkubernetes -Pkubernetes-integration-tests -Dtest.exclude.tags=minikube,r  -Dtest.include.tags=volcano  -Dspark.kubernetes.test.namespace=default "kubernetes-integration-tests/testOnly"
```

Closes #35819 from Yikun/SPARK-38524.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../kubernetes/integration-tests/README.md    | 19 ++++++++++++++++---
 .../test/resources/volcano/disable-queue.yml  |  4 ++--
 .../volcano/disable-queue0-enable-queue1.yml  |  4 +++-
 3 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md
index 265962dab518e..748664cf41b74 100644
--- a/resource-managers/kubernetes/integration-tests/README.md
+++ b/resource-managers/kubernetes/integration-tests/README.md
@@ -312,15 +312,15 @@ Volcano integration is experimental in Aapche Spark 3.3.0 and the test coverage
 
 ## Requirements
 - A minimum of 6 CPUs and 9G of memory is required to complete all Volcano test cases.
-- Volcano v1.5.0.
+- Volcano v1.5.1.
 
 ## Installation
 
     # x86_64
-    kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.5.0/installer/volcano-development.yaml
+    kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.5.1/installer/volcano-development.yaml
 
     # arm64:
-    kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.5.0/installer/volcano-development-arm64.yaml
+    kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.5.1/installer/volcano-development-arm64.yaml
 
 ## Run tests
 
@@ -338,3 +338,16 @@ You can also specify `volcano` tag to only run Volcano test:
         -Dtest.exclude.tags=minikube \
         -Dspark.kubernetes.test.deployMode=docker-desktop \
         'kubernetes-integration-tests/test'
+
+## Cleanup Volcano
+
+    # x86_64
+    kubectl delete -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.5.1/installer/volcano-development.yaml
+
+    # arm64:
+    kubectl delete -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.5.1/installer/volcano-development-arm64.yaml
+    
+    # Cleanup Volcano webhook 
+    kubectl delete validatingwebhookconfigurations volcano-admission-service-jobs-validate volcano-admission-service-pods-validate volcano-admission-service-queues-validate
+    kubectl delete mutatingwebhookconfigurations volcano-admission-service-jobs-mutate volcano-admission-service-podgroups-mutate volcano-admission-service-pods-mutate volcano-admission-service-queues-mutate
+
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue.yml
index 909102d7c90c1..d9f8c36471ec8 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue.yml
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue.yml
@@ -19,6 +19,6 @@ kind: Queue
 metadata:
   name: queue
 spec:
-  weight: 0
+  weight: 1
   capability:
-    cpu: "1"
+    cpu: "0.001"
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue0-enable-queue1.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue0-enable-queue1.yml
index 2281e2e8226a2..82e479478ccd9 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue0-enable-queue1.yml
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue0-enable-queue1.yml
@@ -19,7 +19,9 @@ kind: Queue
 metadata:
   name: queue0
 spec:
-  weight: 0
+  weight: 1
+  capability:
+    cpu: "0.001"
 ---
 apiVersion: scheduling.volcano.sh/v1beta1
 kind: Queue

From 21db91633912752de603e1113b61e0828e00bd11 Mon Sep 17 00:00:00 2001
From: Yihong He <yihong.he@databricks.com>
Date: Tue, 15 Mar 2022 10:45:40 -0700
Subject: [PATCH 502/513] [SPARK-38484][PYTHON] Move usage logging
 instrumentation util functions from pandas module to pyspark.util module

### What changes were proposed in this pull request?

Move usage logging instrumentation util functions from pandas module to pyspark.util module

### Why are the changes needed?

It will be helpful to attach the usage logger to other modules (e.g. sql) besides Pandas but other modules should not depend on Pandas modules to use the instrumentation utils (e.g. _wrap_function, _wrap_property ...).

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

- Existing unit tests
- Manual test by running `./bin/pyspark` and verified the output:
```
>>> sc.setLogLevel("info")
>>> import pyspark.pandas as ps
22/03/15 17:17:16 INFO Log4jUsageLogger: pandasOnSparkImported=1.0, tags=List(), blob=
22/03/15 17:18:21 INFO Log4jUsageLogger: initialConfigLogging=1.0, tags=List(sparkApplicationId=local-1647360920525, sparkExecutionId=null, sparkJobGroupId=null), blob={"spark.sql.warehouse.dir":"file:/Users/yihong.he/spark/spark-warehouse","spark.executor.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.driver.host":"10.120.131.148","spark.serializer.objectStreamReset":"100","spark.driver.port":"61338","spark.rdd.compress":"True","spark.app.name":"PySparkShell","spark.submit.pyFiles":"","spark.ui.showConsoleProgress":"true","spark.app.startTime":"1647360919721","spark.executor.id":"driver","spark.driver.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.submit.deployMode":"client","spark.master":"local[*]","spark.sql.catalogImplementation":"hive","spark.app.id":"local-1647360920525"}
22/03/15 17:18:25 INFO Log4jUsageLogger: pandasOnSparkFunctionCalled=1.0, tags=List(pandasOnSparkFunction=__init__(self, data=None, index=None, columns=None, dtype=None, copy=False), className=DataFrame, status=success), blob={"duration": 3781.477633999998}
>>> psdf.columns
22/03/15 17:18:49 INFO Log4jUsageLogger: pandasOnSparkFunctionCalled=1.0, tags=List(pandasOnSparkProperty=columns, className=DataFrame, status=success), blob={"duration": 0.24742499999774736}
Index(['a', 'b'], dtype='object')
```

Closes #35790 from heyihong/SPARK-38484.

Authored-by: Yihong He <yihong.he@databricks.com>
Signed-off-by: Takuya UESHIN <ueshin@databricks.com>
---
 python/pyspark/instrumentation_utils.py       | 183 ++++++++++++++++++
 .../pyspark/pandas/usage_logging/__init__.py  | 152 +--------------
 2 files changed, 187 insertions(+), 148 deletions(-)
 create mode 100644 python/pyspark/instrumentation_utils.py

diff --git a/python/pyspark/instrumentation_utils.py b/python/pyspark/instrumentation_utils.py
new file mode 100644
index 0000000000000..908f5cbb3d473
--- /dev/null
+++ b/python/pyspark/instrumentation_utils.py
@@ -0,0 +1,183 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import functools
+import inspect
+import threading
+import importlib
+import time
+from types import ModuleType
+from typing import Tuple, Union, List, Callable, Any, Type
+
+
+__all__: List[str] = []
+
+_local = threading.local()
+
+
+def _wrap_function(class_name: str, function_name: str, func: Callable, logger: Any) -> Callable:
+
+    signature = inspect.signature(func)
+
+    @functools.wraps(func)
+    def wrapper(*args: Any, **kwargs: Any) -> Any:
+        if hasattr(_local, "logging") and _local.logging:
+            # no need to log since this should be internal call.
+            return func(*args, **kwargs)
+        _local.logging = True
+        try:
+            start = time.perf_counter()
+            try:
+                res = func(*args, **kwargs)
+                logger.log_success(
+                    class_name, function_name, time.perf_counter() - start, signature
+                )
+                return res
+            except Exception as ex:
+                logger.log_failure(
+                    class_name, function_name, ex, time.perf_counter() - start, signature
+                )
+                raise
+        finally:
+            _local.logging = False
+
+    return wrapper
+
+
+def _wrap_property(class_name: str, property_name: str, prop: Any, logger: Any) -> Any:
+    @property  # type: ignore[misc]
+    def wrapper(self: Any) -> Any:
+        if hasattr(_local, "logging") and _local.logging:
+            # no need to log since this should be internal call.
+            return prop.fget(self)
+        _local.logging = True
+        try:
+            start = time.perf_counter()
+            try:
+                res = prop.fget(self)
+                logger.log_success(class_name, property_name, time.perf_counter() - start)
+                return res
+            except Exception as ex:
+                logger.log_failure(class_name, property_name, ex, time.perf_counter() - start)
+                raise
+        finally:
+            _local.logging = False
+
+    wrapper.__doc__ = prop.__doc__
+
+    if prop.fset is not None:
+        wrapper = wrapper.setter(  # type: ignore[attr-defined]
+            _wrap_function(class_name, prop.fset.__name__, prop.fset, logger)
+        )
+
+    return wrapper
+
+
+def _wrap_missing_function(
+    class_name: str, function_name: str, func: Callable, original: Any, logger: Any
+) -> Any:
+
+    if not hasattr(original, function_name):
+        return func
+
+    signature = inspect.signature(getattr(original, function_name))
+
+    is_deprecated = func.__name__ == "deprecated_function"
+
+    @functools.wraps(func)
+    def wrapper(*args: Any, **kwargs: Any) -> Any:
+        try:
+            return func(*args, **kwargs)
+        finally:
+            logger.log_missing(class_name, function_name, is_deprecated, signature)
+
+    return wrapper
+
+
+def _wrap_missing_property(class_name: str, property_name: str, prop: Any, logger: Any) -> Any:
+
+    is_deprecated = prop.fget.__name__ == "deprecated_property"
+
+    @property  # type: ignore[misc]
+    def wrapper(self: Any) -> Any:
+        try:
+            return prop.fget(self)
+        finally:
+            logger.log_missing(class_name, property_name, is_deprecated)
+
+    return wrapper
+
+
+def _attach(
+    logger_module: Union[str, ModuleType],
+    modules: List[ModuleType],
+    classes: List[Type[Any]],
+    missings: List[Tuple[Type[Any], Type[Any]]],
+) -> None:
+    if isinstance(logger_module, str):
+        logger_module = importlib.import_module(logger_module)
+
+    logger = getattr(logger_module, "get_logger")()
+
+    special_functions = set(
+        [
+            "__init__",
+            "__repr__",
+            "__str__",
+            "_repr_html_",
+            "__len__",
+            "__getitem__",
+            "__setitem__",
+            "__getattr__",
+            "__enter__",
+            "__exit__",
+        ]
+    )
+
+    # Modules
+    for target_module in modules:
+        target_name = target_module.__name__.split(".")[-1]
+        for name in getattr(target_module, "__all__"):
+            func = getattr(target_module, name)
+            if not inspect.isfunction(func):
+                continue
+            setattr(target_module, name, _wrap_function(target_name, name, func, logger))
+
+    # Classes
+    for target_class in classes:
+        for name, func in inspect.getmembers(target_class, inspect.isfunction):
+            if name.startswith("_") and name not in special_functions:
+                continue
+            setattr(target_class, name, _wrap_function(target_class.__name__, name, func, logger))
+
+        for name, prop in inspect.getmembers(target_class, lambda o: isinstance(o, property)):
+            if name.startswith("_"):
+                continue
+            setattr(target_class, name, _wrap_property(target_class.__name__, name, prop, logger))
+
+    # Missings
+    for original, missing in missings:
+        for name, func in inspect.getmembers(missing, inspect.isfunction):
+            setattr(
+                missing,
+                name,
+                _wrap_missing_function(original.__name__, name, func, original, logger),
+            )
+
+        for name, prop in inspect.getmembers(missing, lambda o: isinstance(o, property)):
+            setattr(missing, name, _wrap_missing_property(original.__name__, name, prop, logger))
diff --git a/python/pyspark/pandas/usage_logging/__init__.py b/python/pyspark/pandas/usage_logging/__init__.py
index 10fe616264fb6..a6f1470b9f4e4 100644
--- a/python/pyspark/pandas/usage_logging/__init__.py
+++ b/python/pyspark/pandas/usage_logging/__init__.py
@@ -15,11 +15,6 @@
 # limitations under the License.
 #
 
-import functools
-import importlib
-import inspect
-import threading
-import time
 from types import ModuleType
 from typing import Union
 
@@ -60,6 +55,7 @@
 )
 from pyspark.pandas.strings import StringMethods
 from pyspark.pandas.window import Expanding, ExpandingGroupby, Rolling, RollingGroupby
+from pyspark.instrumentation_utils import _attach
 
 
 def attach(logger_module: Union[str, ModuleType]) -> None:
@@ -76,10 +72,6 @@ def attach(logger_module: Union[str, ModuleType]) -> None:
     --------
     usage_logger : the reference implementation of the usage logger.
     """
-    if isinstance(logger_module, str):
-        logger_module = importlib.import_module(logger_module)
-
-    logger = getattr(logger_module, "get_logger")()
 
     modules = [config, namespace]
     classes = [
@@ -116,44 +108,7 @@ def attach(logger_module: Union[str, ModuleType]) -> None:
     sql_formatter._CAPTURE_SCOPES = 4
     modules.append(sql_formatter)
 
-    # Modules
-    for target_module in modules:
-        target_name = target_module.__name__.split(".")[-1]
-        for name in getattr(target_module, "__all__"):
-            func = getattr(target_module, name)
-            if not inspect.isfunction(func):
-                continue
-            setattr(target_module, name, _wrap_function(target_name, name, func, logger))
-
-    special_functions = set(
-        [
-            "__init__",
-            "__repr__",
-            "__str__",
-            "_repr_html_",
-            "__len__",
-            "__getitem__",
-            "__setitem__",
-            "__getattr__",
-            "__enter__",
-            "__exit__",
-        ]
-    )
-
-    # Classes
-    for target_class in classes:
-        for name, func in inspect.getmembers(target_class, inspect.isfunction):
-            if name.startswith("_") and name not in special_functions:
-                continue
-            setattr(target_class, name, _wrap_function(target_class.__name__, name, func, logger))
-
-        for name, prop in inspect.getmembers(target_class, lambda o: isinstance(o, property)):
-            if name.startswith("_"):
-                continue
-            setattr(target_class, name, _wrap_property(target_class.__name__, name, prop, logger))
-
-    # Missings
-    for original, missing in [
+    missings = [
         (pd.DataFrame, _MissingPandasLikeDataFrame),
         (pd.Series, MissingPandasLikeSeries),
         (pd.Index, MissingPandasLikeIndex),
@@ -165,105 +120,6 @@ def attach(logger_module: Union[str, ModuleType]) -> None:
         (pd.core.window.Rolling, MissingPandasLikeRolling),
         (pd.core.window.ExpandingGroupby, MissingPandasLikeExpandingGroupby),
         (pd.core.window.RollingGroupby, MissingPandasLikeRollingGroupby),
-    ]:
-        for name, func in inspect.getmembers(missing, inspect.isfunction):
-            setattr(
-                missing,
-                name,
-                _wrap_missing_function(original.__name__, name, func, original, logger),
-            )
-
-        for name, prop in inspect.getmembers(missing, lambda o: isinstance(o, property)):
-            setattr(missing, name, _wrap_missing_property(original.__name__, name, prop, logger))
-
-
-_local = threading.local()
-
-
-def _wrap_function(class_name, function_name, func, logger):
-
-    signature = inspect.signature(func)
-
-    @functools.wraps(func)
-    def wrapper(*args, **kwargs):
-        if hasattr(_local, "logging") and _local.logging:
-            # no need to log since this should be internal call.
-            return func(*args, **kwargs)
-        _local.logging = True
-        try:
-            start = time.perf_counter()
-            try:
-                res = func(*args, **kwargs)
-                logger.log_success(
-                    class_name, function_name, time.perf_counter() - start, signature
-                )
-                return res
-            except Exception as ex:
-                logger.log_failure(
-                    class_name, function_name, ex, time.perf_counter() - start, signature
-                )
-                raise
-        finally:
-            _local.logging = False
-
-    return wrapper
-
-
-def _wrap_property(class_name, property_name, prop, logger):
-    @property
-    def wrapper(self):
-        if hasattr(_local, "logging") and _local.logging:
-            # no need to log since this should be internal call.
-            return prop.fget(self)
-        _local.logging = True
-        try:
-            start = time.perf_counter()
-            try:
-                res = prop.fget(self)
-                logger.log_success(class_name, property_name, time.perf_counter() - start)
-                return res
-            except Exception as ex:
-                logger.log_failure(class_name, property_name, ex, time.perf_counter() - start)
-                raise
-        finally:
-            _local.logging = False
-
-    wrapper.__doc__ = prop.__doc__
-
-    if prop.fset is not None:
-        wrapper = wrapper.setter(_wrap_function(class_name, prop.fset.__name__, prop.fset, logger))
-
-    return wrapper
-
-
-def _wrap_missing_function(class_name, function_name, func, original, logger):
-
-    if not hasattr(original, function_name):
-        return func
-
-    signature = inspect.signature(getattr(original, function_name))
-
-    is_deprecated = func.__name__ == "deprecated_function"
-
-    @functools.wraps(func)
-    def wrapper(*args, **kwargs):
-        try:
-            return func(*args, **kwargs)
-        finally:
-            logger.log_missing(class_name, function_name, is_deprecated, signature)
-
-    return wrapper
-
-
-def _wrap_missing_property(class_name, property_name, prop, logger):
-
-    is_deprecated = prop.fget.__name__ == "deprecated_property"
-
-    @property
-    def wrapper(self):
-        try:
-            return prop.fget(self)
-        finally:
-            logger.log_missing(class_name, property_name, is_deprecated)
+    ]
 
-    return wrapper
+    _attach(logger_module, modules, classes, missings)

From 4e31000d86f7610e283cbc124bdfff5e11f62038 Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Tue, 15 Mar 2022 13:28:46 -0700
Subject: [PATCH 503/513] [SPARK-38204][SS] Use StatefulOpClusteredDistribution
 for stateful operators with respecting backward compatibility

### What changes were proposed in this pull request?

This PR proposes to use StatefulOpClusteredDistribution for stateful operators which requires exact order of clustering keys without allowing sub-clustering keys, so that stateful operators will have consistent partitioning across lifetime of the query.
(It doesn't cover the case grouping keys are changed. We have state schema checker verifying on the changes, but changing name is allowed so swapping keys with same data type is still allowed. So there are still grey areas.)

The change will break the existing queries having checkpoint in prior to Spark 3.3 and bring silent correctness issues. To remedy the problem, we introduce a new internal config `spark.sql.streaming.statefulOperator.useStrictDistribution`, which defaults to true for new queries but defaults to false for queries starting from checkpoint in prior to Spark 3.3. If the new config is set to false, stateful operator will use ClusteredDistribution which retains the old requirement of child distribution.

Note that in this change we don't fix the root problem against old checkpoints. Long-term fix should be crafted carefully, after collecting evidence on the impact of SPARK-38204. (e.g. how many queries on end users would encounter SPARK-38204.)

This PR adds E2E tests for the cases which trigger SPARK-38204, and verify the behavior with new query (3.3) & old query (in prior to 3.3).

### Why are the changes needed?

Please refer the description of JIRA issue [SPARK-38024](https://issues.apache.org/jira/browse/SPARK-38204) for details, since the description is quite long to include here.

### Does this PR introduce _any_ user-facing change?

Yes, stateful operators no longer accept the child output partitioning having subset of grouping keys and trigger additional shuffle. This will ensure consistent partitioning with stateful operators across lifetime of the query.

### How was this patch tested?

New UTs including backward compatibility are added.

Closes #35673 from HeartSaVioR/SPARK-38204-short-term-fix.

Authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Signed-off-by: Yuanjian Li <yuanjian.li@databricks.com>
---
 docs/ss-migration-guide.md                    |   4 +
 .../apache/spark/sql/internal/SQLConf.scala   |  17 +
 .../sql/execution/aggregate/AggUtils.scala    |  64 ++-
 .../aggregate/BaseAggregateExec.scala         |  23 +-
 .../aggregate/HashAggregateExec.scala         |   2 +
 .../aggregate/MergingSessionsExec.scala       |  15 +-
 .../aggregate/ObjectHashAggregateExec.scala   |   2 +
 .../aggregate/SortAggregateExec.scala         |   2 +
 .../aggregate/UpdatingSessionsExec.scala      |  18 +-
 .../FlatMapGroupsWithStateExec.scala          |  13 +-
 .../streaming/IncrementalExecution.scala      |  17 +
 .../sql/execution/streaming/OffsetSeq.scala   |   5 +-
 .../StatefulOperatorPartitioning.scala        |  53 ++
 .../streaming/statefulOperators.scala         |  37 +-
 .../commits/0                                 |   2 +
 .../metadata                                  |   1 +
 .../offsets/0                                 |   3 +
 .../state/0/0/1.delta                         | Bin 0 -> 46 bytes
 .../state/0/0/_metadata/schema                | Bin 0 -> 407 bytes
 .../state/0/1/1.delta                         | Bin 0 -> 96 bytes
 .../state/0/2/1.delta                         | Bin 0 -> 46 bytes
 .../state/0/3/1.delta                         | Bin 0 -> 46 bytes
 .../state/0/4/1.delta                         | Bin 0 -> 46 bytes
 .../commits/.0.crc                            | Bin 0 -> 12 bytes
 .../commits/.1.crc                            | Bin 0 -> 12 bytes
 .../commits/0                                 |   2 +
 .../commits/1                                 |   2 +
 .../metadata                                  |   1 +
 .../offsets/.0.crc                            | Bin 0 -> 16 bytes
 .../offsets/.1.crc                            | Bin 0 -> 16 bytes
 .../offsets/0                                 |   3 +
 .../offsets/1                                 |   3 +
 .../state/0/0/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/0/.2.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/0/1.delta                         | Bin 0 -> 46 bytes
 .../state/0/0/2.delta                         | Bin 0 -> 46 bytes
 .../state/0/0/_metadata/.schema.crc           | Bin 0 -> 12 bytes
 .../state/0/0/_metadata/schema                | Bin 0 -> 393 bytes
 .../state/0/1/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/1/.2.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/1/1.delta                         | Bin 0 -> 46 bytes
 .../state/0/1/2.delta                         | Bin 0 -> 75 bytes
 .../state/0/2/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/2/.2.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/2/1.delta                         | Bin 0 -> 46 bytes
 .../state/0/2/2.delta                         | Bin 0 -> 75 bytes
 .../state/0/3/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/3/.2.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/3/1.delta                         | Bin 0 -> 74 bytes
 .../state/0/3/2.delta                         | Bin 0 -> 46 bytes
 .../state/0/4/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/4/.2.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/4/1.delta                         | Bin 0 -> 75 bytes
 .../state/0/4/2.delta                         | Bin 0 -> 46 bytes
 .../commits/.0.crc                            | Bin 0 -> 12 bytes
 .../commits/0                                 |   2 +
 .../metadata                                  |   1 +
 .../offsets/.0.crc                            | Bin 0 -> 16 bytes
 .../offsets/0                                 |   3 +
 .../state/0/0/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/0/1.delta                         | Bin 0 -> 46 bytes
 .../state/0/0/_metadata/.schema.crc           | Bin 0 -> 12 bytes
 .../state/0/0/_metadata/schema                | Bin 0 -> 415 bytes
 .../state/0/1/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/1/1.delta                         | Bin 0 -> 138 bytes
 .../state/0/2/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/2/1.delta                         | Bin 0 -> 46 bytes
 .../state/0/3/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/3/1.delta                         | Bin 0 -> 46 bytes
 .../state/0/4/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/4/1.delta                         | Bin 0 -> 46 bytes
 .../commits/.0.crc                            | Bin 0 -> 12 bytes
 .../commits/0                                 |   2 +
 .../metadata                                  |   1 +
 .../offsets/.0.crc                            | Bin 0 -> 16 bytes
 .../offsets/0                                 |   3 +
 .../state/0/0/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/0/1.delta                         | Bin 0 -> 46 bytes
 .../state/0/0/_metadata/.schema.crc           | Bin 0 -> 12 bytes
 .../state/0/0/_metadata/schema                | Bin 0 -> 415 bytes
 .../state/0/1/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/1/1.delta                         | Bin 0 -> 96 bytes
 .../state/0/2/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/2/1.delta                         | Bin 0 -> 46 bytes
 .../state/0/3/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/3/1.delta                         | Bin 0 -> 46 bytes
 .../state/0/4/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/4/1.delta                         | Bin 0 -> 46 bytes
 .../commits/.0.crc                            | Bin 0 -> 12 bytes
 .../commits/.1.crc                            | Bin 0 -> 12 bytes
 .../commits/0                                 |   2 +
 .../commits/1                                 |   2 +
 .../metadata                                  |   1 +
 .../offsets/.0.crc                            | Bin 0 -> 16 bytes
 .../offsets/.1.crc                            | Bin 0 -> 16 bytes
 .../offsets/0                                 |   3 +
 .../offsets/1                                 |   3 +
 .../state/0/0/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/0/.2.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/0/1.delta                         | Bin 0 -> 46 bytes
 .../state/0/0/2.delta                         | Bin 0 -> 46 bytes
 .../state/0/0/_metadata/.schema.crc           | Bin 0 -> 16 bytes
 .../state/0/0/_metadata/schema                | Bin 0 -> 754 bytes
 .../state/0/1/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/1/.2.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/1/1.delta                         | Bin 0 -> 259 bytes
 .../state/0/1/2.delta                         | Bin 0 -> 46 bytes
 .../state/0/2/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/2/.2.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/2/1.delta                         | Bin 0 -> 46 bytes
 .../state/0/2/2.delta                         | Bin 0 -> 46 bytes
 .../state/0/3/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/3/.2.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/3/1.delta                         | Bin 0 -> 230 bytes
 .../state/0/3/2.delta                         | Bin 0 -> 46 bytes
 .../state/0/4/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/4/.2.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/4/1.delta                         | Bin 0 -> 46 bytes
 .../state/0/4/2.delta                         | Bin 0 -> 46 bytes
 .../commits/.0.crc                            | Bin 0 -> 12 bytes
 .../commits/.1.crc                            | Bin 0 -> 12 bytes
 .../commits/0                                 |   2 +
 .../commits/1                                 |   2 +
 .../metadata                                  |   1 +
 .../offsets/.0.crc                            | Bin 0 -> 16 bytes
 .../offsets/.1.crc                            | Bin 0 -> 16 bytes
 .../offsets/0                                 |   3 +
 .../offsets/1                                 |   3 +
 .../state/0/0/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/0/.2.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/0/1.delta                         | Bin 0 -> 46 bytes
 .../state/0/0/2.delta                         | Bin 0 -> 46 bytes
 .../state/0/0/_metadata/.schema.crc           | Bin 0 -> 12 bytes
 .../state/0/0/_metadata/schema                | Bin 0 -> 262 bytes
 .../state/0/1/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/1/.2.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/1/1.delta                         | Bin 0 -> 82 bytes
 .../state/0/1/2.delta                         | Bin 0 -> 82 bytes
 .../state/0/2/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/2/.2.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/2/1.delta                         | Bin 0 -> 46 bytes
 .../state/0/2/2.delta                         | Bin 0 -> 46 bytes
 .../state/0/3/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/3/.2.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/3/1.delta                         | Bin 0 -> 46 bytes
 .../state/0/3/2.delta                         | Bin 0 -> 46 bytes
 .../state/0/4/.1.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/4/.2.delta.crc                    | Bin 0 -> 12 bytes
 .../state/0/4/1.delta                         | Bin 0 -> 46 bytes
 .../state/0/4/2.delta                         | Bin 0 -> 82 bytes
 .../execution/WholeStageCodegenSuite.scala    |   2 +-
 ...tMapGroupsWithStateDistributionSuite.scala | 455 ++++++++++++++++++
 ...treamingAggregationDistributionSuite.scala | 223 +++++++++
 .../streaming/StreamingAggregationSuite.scala |  37 +-
 ...eamingDeduplicationDistributionSuite.scala | 148 ++++++
 ...eamingSessionWindowDistributionSuite.scala | 225 +++++++++
 ...fulOpClusteredDistributionTestHelper.scala |  80 +++
 157 files changed, 1407 insertions(+), 86 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulOperatorPartitioning.scala
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/commits/0
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/metadata
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/offsets/0
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/state/0/0/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/state/0/0/_metadata/schema
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/state/0/1/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/state/0/2/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/state/0/3/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/state/0/4/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/commits/.0.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/commits/.1.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/commits/0
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/commits/1
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/metadata
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/offsets/.0.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/offsets/.1.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/offsets/0
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/offsets/1
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/0/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/0/.2.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/0/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/0/2.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/0/_metadata/.schema.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/0/_metadata/schema
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/1/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/1/.2.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/1/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/1/2.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/2/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/2/.2.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/2/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/2/2.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/3/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/3/.2.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/3/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/3/2.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/4/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/4/.2.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/4/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/4/2.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/commits/.0.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/commits/0
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/metadata
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/offsets/.0.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/offsets/0
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/0/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/0/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/0/_metadata/.schema.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/0/_metadata/schema
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/1/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/1/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/2/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/2/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/3/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/3/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/4/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/4/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/commits/.0.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/commits/0
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/metadata
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/offsets/.0.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/offsets/0
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/0/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/0/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/0/_metadata/.schema.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/0/_metadata/schema
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/1/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/1/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/2/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/2/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/3/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/3/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/4/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/4/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/commits/.0.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/commits/.1.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/commits/0
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/commits/1
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/metadata
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/offsets/.0.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/offsets/.1.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/offsets/0
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/offsets/1
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/0/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/0/.2.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/0/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/0/2.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/0/_metadata/.schema.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/0/_metadata/schema
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/1/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/1/.2.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/1/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/1/2.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/2/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/2/.2.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/2/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/2/2.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/3/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/3/.2.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/3/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/3/2.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/4/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/4/.2.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/4/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/4/2.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/commits/.0.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/commits/.1.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/commits/0
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/commits/1
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/metadata
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/offsets/.0.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/offsets/.1.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/offsets/0
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/offsets/1
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/0/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/0/.2.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/0/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/0/2.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/0/_metadata/.schema.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/0/_metadata/schema
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/1/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/1/.2.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/1/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/1/2.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/2/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/2/.2.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/2/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/2/2.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/3/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/3/.2.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/3/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/3/2.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/4/.1.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/4/.2.delta.crc
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/4/1.delta
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/4/2.delta
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateDistributionSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationDistributionSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationDistributionSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowDistributionSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/util/StatefulOpClusteredDistributionTestHelper.scala

diff --git a/docs/ss-migration-guide.md b/docs/ss-migration-guide.md
index 480e5e2695a16..c28724576bc41 100644
--- a/docs/ss-migration-guide.md
+++ b/docs/ss-migration-guide.md
@@ -26,6 +26,10 @@ Note that this migration guide describes the items specific to Structured Stream
 Many items of SQL migration can be applied when migrating Structured Streaming to higher versions.
 Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.html).
 
+## Upgrading from Structured Streaming 3.2 to 3.3
+
+- Since Spark 3.3, all stateful operators require hash partitioning with exact grouping keys. In previous versions, all stateful operators except stream-stream join require loose partitioning criteria which opens the possibility on correctness issue. (See [SPARK-38204](https://issues.apache.org/jira/browse/SPARK-38204) for more details.) To ensure backward compatibility, we retain the old behavior with the checkpoint built from older versions.
+
 ## Upgrading from Structured Streaming 3.0 to 3.1
 
 - In Spark 3.0 and before, for the queries that have stateful operation which can emit rows older than the current watermark plus allowed late record delay, which are "late rows" in downstream stateful operations and these rows can be discarded, Spark only prints a warning message. Since Spark 3.1, Spark will check for such queries with possible correctness issue and throw AnalysisException for it by default. For the users who understand the possible risk of correctness issue and still decide to run the query, please disable this check by setting the config `spark.sql.streaming.statefulOperator.checkCorrectness.enabled` to false.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 33919f3acaa0e..3314dd1916498 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1782,6 +1782,23 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION =
+    buildConf("spark.sql.streaming.statefulOperator.useStrictDistribution")
+      .internal()
+      .doc("The purpose of this config is only compatibility; DO NOT MANUALLY CHANGE THIS!!! " +
+        "When true, the stateful operator for streaming query will use " +
+        "StatefulOpClusteredDistribution which guarantees stable state partitioning as long as " +
+        "the operator provides consistent grouping keys across the lifetime of query. " +
+        "When false, the stateful operator for streaming query will use ClusteredDistribution " +
+        "which is not sufficient to guarantee stable state partitioning despite the operator " +
+        "provides consistent grouping keys across the lifetime of query. " +
+        "This config will be set to true for new streaming queries to guarantee stable state " +
+        "partitioning, and set to false for existing streaming queries to not break queries " +
+        "which are restored from existing checkpoints. Please refer SPARK-38204 for details.")
+      .version("3.3.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val FILESTREAM_SINK_METADATA_IGNORED =
     buildConf("spark.sql.streaming.fileStreamSink.ignoreMetadata")
       .internal()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
index 32db622c9f931..26161acae30b2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
@@ -45,8 +45,28 @@ object AggUtils {
     }
   }
 
+  private def createStreamingAggregate(
+      requiredChildDistributionExpressions: Option[Seq[Expression]] = None,
+      groupingExpressions: Seq[NamedExpression] = Nil,
+      aggregateExpressions: Seq[AggregateExpression] = Nil,
+      aggregateAttributes: Seq[Attribute] = Nil,
+      initialInputBufferOffset: Int = 0,
+      resultExpressions: Seq[NamedExpression] = Nil,
+      child: SparkPlan): SparkPlan = {
+    createAggregate(
+      requiredChildDistributionExpressions,
+      isStreaming = true,
+      groupingExpressions = groupingExpressions,
+      aggregateExpressions = aggregateExpressions,
+      aggregateAttributes = aggregateAttributes,
+      initialInputBufferOffset = initialInputBufferOffset,
+      resultExpressions = resultExpressions,
+      child = child)
+  }
+
   private def createAggregate(
       requiredChildDistributionExpressions: Option[Seq[Expression]] = None,
+      isStreaming: Boolean = false,
       groupingExpressions: Seq[NamedExpression] = Nil,
       aggregateExpressions: Seq[AggregateExpression] = Nil,
       aggregateAttributes: Seq[Attribute] = Nil,
@@ -60,6 +80,8 @@ object AggUtils {
     if (useHash && !forceSortAggregate) {
       HashAggregateExec(
         requiredChildDistributionExpressions = requiredChildDistributionExpressions,
+        isStreaming = isStreaming,
+        numShufflePartitions = None,
         groupingExpressions = groupingExpressions,
         aggregateExpressions = mayRemoveAggFilters(aggregateExpressions),
         aggregateAttributes = aggregateAttributes,
@@ -73,6 +95,8 @@ object AggUtils {
       if (objectHashEnabled && useObjectHash && !forceSortAggregate) {
         ObjectHashAggregateExec(
           requiredChildDistributionExpressions = requiredChildDistributionExpressions,
+          isStreaming = isStreaming,
+          numShufflePartitions = None,
           groupingExpressions = groupingExpressions,
           aggregateExpressions = mayRemoveAggFilters(aggregateExpressions),
           aggregateAttributes = aggregateAttributes,
@@ -82,6 +106,8 @@ object AggUtils {
       } else {
         SortAggregateExec(
           requiredChildDistributionExpressions = requiredChildDistributionExpressions,
+          isStreaming = isStreaming,
+          numShufflePartitions = None,
           groupingExpressions = groupingExpressions,
           aggregateExpressions = mayRemoveAggFilters(aggregateExpressions),
           aggregateAttributes = aggregateAttributes,
@@ -290,7 +316,7 @@ object AggUtils {
     val partialAggregate: SparkPlan = {
       val aggregateExpressions = functionsWithoutDistinct.map(_.copy(mode = Partial))
       val aggregateAttributes = aggregateExpressions.map(_.resultAttribute)
-      createAggregate(
+      createStreamingAggregate(
         groupingExpressions = groupingExpressions,
         aggregateExpressions = aggregateExpressions,
         aggregateAttributes = aggregateAttributes,
@@ -302,7 +328,7 @@ object AggUtils {
     val partialMerged1: SparkPlan = {
       val aggregateExpressions = functionsWithoutDistinct.map(_.copy(mode = PartialMerge))
       val aggregateAttributes = aggregateExpressions.map(_.resultAttribute)
-      createAggregate(
+      createStreamingAggregate(
         requiredChildDistributionExpressions =
             Some(groupingAttributes),
         groupingExpressions = groupingAttributes,
@@ -320,7 +346,7 @@ object AggUtils {
     val partialMerged2: SparkPlan = {
       val aggregateExpressions = functionsWithoutDistinct.map(_.copy(mode = PartialMerge))
       val aggregateAttributes = aggregateExpressions.map(_.resultAttribute)
-      createAggregate(
+      createStreamingAggregate(
         requiredChildDistributionExpressions =
             Some(groupingAttributes),
         groupingExpressions = groupingAttributes,
@@ -348,7 +374,7 @@ object AggUtils {
       // projection:
       val finalAggregateAttributes = finalAggregateExpressions.map(_.resultAttribute)
 
-      createAggregate(
+      createStreamingAggregate(
         requiredChildDistributionExpressions = Some(groupingAttributes),
         groupingExpressions = groupingAttributes,
         aggregateExpressions = finalAggregateExpressions,
@@ -407,7 +433,7 @@ object AggUtils {
     val partialAggregate: SparkPlan = {
       val aggregateExpressions = functionsWithoutDistinct.map(_.copy(mode = Partial))
       val aggregateAttributes = aggregateExpressions.map(_.resultAttribute)
-      createAggregate(
+      createStreamingAggregate(
         groupingExpressions = groupingExpressions,
         aggregateExpressions = aggregateExpressions,
         aggregateAttributes = aggregateAttributes,
@@ -424,7 +450,8 @@ object AggUtils {
       // this is to reduce amount of rows to shuffle
       MergingSessionsExec(
         requiredChildDistributionExpressions = None,
-        requiredChildDistributionOption = None,
+        isStreaming = true,
+        numShufflePartitions = None,
         groupingExpressions = groupingAttributes,
         sessionExpression = sessionExpression,
         aggregateExpressions = aggregateExpressions,
@@ -447,8 +474,10 @@ object AggUtils {
       val aggregateExpressions = functionsWithoutDistinct.map(_.copy(mode = PartialMerge))
       val aggregateAttributes = aggregateExpressions.map(_.resultAttribute)
       MergingSessionsExec(
-        requiredChildDistributionExpressions = None,
-        requiredChildDistributionOption = Some(restored.requiredChildDistribution),
+        requiredChildDistributionExpressions = Some(groupingWithoutSessionAttributes),
+        isStreaming = true,
+        // This will be replaced with actual value in state rule.
+        numShufflePartitions = None,
         groupingExpressions = groupingAttributes,
         sessionExpression = sessionExpression,
         aggregateExpressions = aggregateExpressions,
@@ -476,8 +505,8 @@ object AggUtils {
       // projection:
       val finalAggregateAttributes = finalAggregateExpressions.map(_.resultAttribute)
 
-      createAggregate(
-        requiredChildDistributionExpressions = Some(groupingAttributes),
+      createStreamingAggregate(
+        requiredChildDistributionExpressions = Some(groupingWithoutSessionAttributes),
         groupingExpressions = groupingAttributes,
         aggregateExpressions = finalAggregateExpressions,
         aggregateAttributes = finalAggregateAttributes,
@@ -491,10 +520,15 @@ object AggUtils {
 
   private def mayAppendUpdatingSessionExec(
       groupingExpressions: Seq[NamedExpression],
-      maybeChildPlan: SparkPlan): SparkPlan = {
+      maybeChildPlan: SparkPlan,
+      isStreaming: Boolean = false): SparkPlan = {
     groupingExpressions.find(_.metadata.contains(SessionWindow.marker)) match {
       case Some(sessionExpression) =>
         UpdatingSessionsExec(
+          isStreaming = isStreaming,
+          // numShufflePartitions will be set to None, and replaced to the actual value in the
+          // state rule if the query is streaming.
+          numShufflePartitions = None,
           groupingExpressions.map(_.toAttribute),
           sessionExpression.toAttribute,
           maybeChildPlan)
@@ -506,7 +540,8 @@ object AggUtils {
   private def mayAppendMergingSessionExec(
       groupingExpressions: Seq[NamedExpression],
       aggregateExpressions: Seq[AggregateExpression],
-      partialAggregate: SparkPlan): SparkPlan = {
+      partialAggregate: SparkPlan,
+      isStreaming: Boolean = false): SparkPlan = {
     groupingExpressions.find(_.metadata.contains(SessionWindow.marker)) match {
       case Some(sessionExpression) =>
         val aggExpressions = aggregateExpressions.map(_.copy(mode = PartialMerge))
@@ -519,7 +554,10 @@ object AggUtils {
 
         MergingSessionsExec(
           requiredChildDistributionExpressions = Some(groupingWithoutSessionsAttributes),
-          requiredChildDistributionOption = None,
+          isStreaming = isStreaming,
+          // numShufflePartitions will be set to None, and replaced to the actual value in the
+          // state rule if the query is streaming.
+          numShufflePartitions = None,
           groupingExpressions = groupingAttributes,
           sessionExpression = sessionExpression,
           aggregateExpressions = aggExpressions,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/BaseAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/BaseAggregateExec.scala
index b709c8092e46d..756b5eb09d0b9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/BaseAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/BaseAggregateExec.scala
@@ -21,12 +21,15 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference,
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Final, PartialMerge}
 import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, UnspecifiedDistribution}
 import org.apache.spark.sql.execution.{AliasAwareOutputPartitioning, ExplainUtils, UnaryExecNode}
+import org.apache.spark.sql.execution.streaming.StatefulOperatorPartitioning
 
 /**
  * Holds common logic for aggregate operators
  */
 trait BaseAggregateExec extends UnaryExecNode with AliasAwareOutputPartitioning {
   def requiredChildDistributionExpressions: Option[Seq[Expression]]
+  def isStreaming: Boolean
+  def numShufflePartitions: Option[Int]
   def groupingExpressions: Seq[NamedExpression]
   def aggregateExpressions: Seq[AggregateExpression]
   def aggregateAttributes: Seq[Attribute]
@@ -92,7 +95,20 @@ trait BaseAggregateExec extends UnaryExecNode with AliasAwareOutputPartitioning
   override def requiredChildDistribution: List[Distribution] = {
     requiredChildDistributionExpressions match {
       case Some(exprs) if exprs.isEmpty => AllTuples :: Nil
-      case Some(exprs) => ClusteredDistribution(exprs) :: Nil
+      case Some(exprs) =>
+        if (isStreaming) {
+          numShufflePartitions match {
+            case Some(parts) =>
+              StatefulOperatorPartitioning.getCompatibleDistribution(
+                exprs, parts, conf) :: Nil
+
+            case _ =>
+              throw new IllegalStateException("Expected to set the number of partitions before " +
+                "constructing required child distribution!")
+          }
+        } else {
+          ClusteredDistribution(exprs) :: Nil
+        }
       case None => UnspecifiedDistribution :: Nil
     }
   }
@@ -102,7 +118,8 @@ trait BaseAggregateExec extends UnaryExecNode with AliasAwareOutputPartitioning
    */
   def toSortAggregate: SortAggregateExec = {
     SortAggregateExec(
-      requiredChildDistributionExpressions, groupingExpressions, aggregateExpressions,
-      aggregateAttributes, initialInputBufferOffset, resultExpressions, child)
+      requiredChildDistributionExpressions, isStreaming, numShufflePartitions, groupingExpressions,
+      aggregateExpressions, aggregateAttributes, initialInputBufferOffset, resultExpressions,
+      child)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index 1b4f4be501cce..8be3a018cee58 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -45,6 +45,8 @@ import org.apache.spark.util.Utils
  */
 case class HashAggregateExec(
     requiredChildDistributionExpressions: Option[Seq[Expression]],
+    isStreaming: Boolean,
+    numShufflePartitions: Option[Int],
     groupingExpressions: Seq[NamedExpression],
     aggregateExpressions: Seq[AggregateExpression],
     aggregateAttributes: Seq[Attribute],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/MergingSessionsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/MergingSessionsExec.scala
index 08e8b59a17828..31245c5451857 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/MergingSessionsExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/MergingSessionsExec.scala
@@ -21,7 +21,6 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, Expression, MutableProjection, NamedExpression, SortOrder, UnsafeRow}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.metric.SQLMetrics
 
@@ -41,7 +40,8 @@ import org.apache.spark.sql.execution.metric.SQLMetrics
  */
 case class MergingSessionsExec(
     requiredChildDistributionExpressions: Option[Seq[Expression]],
-    requiredChildDistributionOption: Option[Seq[Distribution]],
+    isStreaming: Boolean,
+    numShufflePartitions: Option[Int],
     groupingExpressions: Seq[NamedExpression],
     sessionExpression: NamedExpression,
     aggregateExpressions: Seq[AggregateExpression],
@@ -59,17 +59,6 @@ case class MergingSessionsExec(
 
   override def outputOrdering: Seq[SortOrder] = child.outputOrdering
 
-  override def requiredChildDistribution: List[Distribution] = {
-    requiredChildDistributionExpressions match {
-      case Some(exprs) if exprs.isEmpty => AllTuples :: Nil
-      case Some(exprs) => ClusteredDistribution(exprs) :: Nil
-      case None => requiredChildDistributionOption match {
-        case Some(distributions) => distributions.toList
-        case None => UnspecifiedDistribution :: Nil
-      }
-    }
-  }
-
   override def requiredChildOrdering: Seq[Seq[SortOrder]] = {
     Seq((keyWithoutSessionExpressions ++ Seq(sessionExpression)).map(SortOrder(_, Ascending)))
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
index c98c9f42e69da..9da0ca93c1819 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
@@ -59,6 +59,8 @@ import org.apache.spark.sql.execution.metric.SQLMetrics
  */
 case class ObjectHashAggregateExec(
     requiredChildDistributionExpressions: Option[Seq[Expression]],
+    isStreaming: Boolean,
+    numShufflePartitions: Option[Int],
     groupingExpressions: Seq[NamedExpression],
     aggregateExpressions: Seq[AggregateExpression],
     aggregateAttributes: Seq[Attribute],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
index a0557822795af..3cf63a5318dcf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
@@ -32,6 +32,8 @@ import org.apache.spark.sql.internal.SQLConf
  */
 case class SortAggregateExec(
     requiredChildDistributionExpressions: Option[Seq[Expression]],
+    isStreaming: Boolean,
+    numShufflePartitions: Option[Int],
     groupingExpressions: Seq[NamedExpression],
     aggregateExpressions: Seq[AggregateExpression],
     aggregateAttributes: Seq[Attribute],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/UpdatingSessionsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/UpdatingSessionsExec.scala
index f15a22403cfb4..fee7e29f8add1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/UpdatingSessionsExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/UpdatingSessionsExec.scala
@@ -22,6 +22,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, SortOrder}
 import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning}
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution.streaming.StatefulOperatorPartitioning
 
 /**
  * This node updates the session window spec of each input rows via analyzing neighbor rows and
@@ -35,6 +36,8 @@ import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
  * Refer [[UpdatingSessionsIterator]] for more details.
  */
 case class UpdatingSessionsExec(
+    isStreaming: Boolean,
+    numShufflePartitions: Option[Int],
     groupingExpression: Seq[Attribute],
     sessionExpression: Attribute,
     child: SparkPlan) extends UnaryExecNode {
@@ -63,7 +66,20 @@ case class UpdatingSessionsExec(
     if (groupingWithoutSessionExpression.isEmpty) {
       AllTuples :: Nil
     } else {
-      ClusteredDistribution(groupingWithoutSessionExpression) :: Nil
+      if (isStreaming) {
+        numShufflePartitions match {
+          case Some(parts) =>
+            StatefulOperatorPartitioning.getCompatibleDistribution(
+              groupingWithoutSessionExpression, parts, conf) :: Nil
+
+          case _ =>
+            throw new IllegalStateException("Expected to set the number of partitions before " +
+              "constructing required child distribution!")
+        }
+
+      } else {
+        ClusteredDistribution(groupingWithoutSessionExpression) :: Nil
+      }
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
index ffacfefb552da..3ff539b9ef32b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, Expression, SortOrder, UnsafeRow}
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Distribution}
+import org.apache.spark.sql.catalyst.plans.physical.Distribution
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper._
 import org.apache.spark.sql.execution.streaming.state._
@@ -93,13 +93,10 @@ case class FlatMapGroupsWithStateExec(
    * to have the same grouping so that the data are co-lacated on the same task.
    */
   override def requiredChildDistribution: Seq[Distribution] = {
-    // NOTE: Please read through the NOTE on the classdoc of StatefulOpClusteredDistribution
-    // before making any changes.
-    // TODO(SPARK-38204)
-    ClusteredDistribution(
-        groupingAttributes, requiredNumPartitions = stateInfo.map(_.numPartitions)) ::
-      ClusteredDistribution(
-        initialStateGroupAttrs, requiredNumPartitions = stateInfo.map(_.numPartitions)) ::
+    StatefulOperatorPartitioning.getCompatibleDistribution(
+      groupingAttributes, getStateInfo, conf) ::
+    StatefulOperatorPartitioning.getCompatibleDistribution(
+      initialStateGroupAttrs, getStateInfo, conf) ::
       Nil
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
index 3e772e104648b..9670c774a74c1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.execution.{LocalLimitExec, QueryExecution, SparkPlan, SparkPlanner, UnaryExecNode}
+import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, MergingSessionsExec, ObjectHashAggregateExec, SortAggregateExec, UpdatingSessionsExec}
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.OutputMode
@@ -132,6 +133,22 @@ class IncrementalExecution(
     }
 
     override def apply(plan: SparkPlan): SparkPlan = plan transform {
+      // NOTE: we should include all aggregate execs here which are used in streaming aggregations
+      case a: SortAggregateExec if a.isStreaming =>
+        a.copy(numShufflePartitions = Some(numStateStores))
+
+      case a: HashAggregateExec if a.isStreaming =>
+        a.copy(numShufflePartitions = Some(numStateStores))
+
+      case a: ObjectHashAggregateExec if a.isStreaming =>
+        a.copy(numShufflePartitions = Some(numStateStores))
+
+      case a: MergingSessionsExec if a.isStreaming =>
+        a.copy(numShufflePartitions = Some(numStateStores))
+
+      case a: UpdatingSessionsExec if a.isStreaming =>
+        a.copy(numShufflePartitions = Some(numStateStores))
+
       case StateStoreSaveExec(keys, None, None, None, stateFormatVersion,
              UnaryExecNode(agg,
                StateStoreRestoreExec(_, None, _, child))) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
index c08a14c65b772..913805d1a074d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
@@ -98,7 +98,7 @@ object OffsetSeqMetadata extends Logging {
     SHUFFLE_PARTITIONS, STATE_STORE_PROVIDER_CLASS, STREAMING_MULTIPLE_WATERMARK_POLICY,
     FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION, STREAMING_AGGREGATION_STATE_FORMAT_VERSION,
     STREAMING_JOIN_STATE_FORMAT_VERSION, STATE_STORE_COMPRESSION_CODEC,
-    STATE_STORE_ROCKSDB_FORMAT_VERSION)
+    STATE_STORE_ROCKSDB_FORMAT_VERSION, STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION)
 
   /**
    * Default values of relevant configurations that are used for backward compatibility.
@@ -118,7 +118,8 @@ object OffsetSeqMetadata extends Logging {
       StreamingAggregationStateManager.legacyVersion.toString,
     STREAMING_JOIN_STATE_FORMAT_VERSION.key ->
       SymmetricHashJoinStateManager.legacyVersion.toString,
-    STATE_STORE_COMPRESSION_CODEC.key -> "lz4"
+    STATE_STORE_COMPRESSION_CODEC.key -> "lz4",
+    STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION.key -> "false"
   )
 
   def apply(json: String): OffsetSeqMetadata = Serialization.read[OffsetSeqMetadata](json)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulOperatorPartitioning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulOperatorPartitioning.scala
new file mode 100644
index 0000000000000..527349201574e
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulOperatorPartitioning.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Distribution, StatefulOpClusteredDistribution}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION
+
+/**
+ * This object is to provide clustered distribution for stateful operator with ensuring backward
+ * compatibility. Please read through the NOTE on the classdoc of
+ * [[StatefulOpClusteredDistribution]] before making any changes. Please refer SPARK-38204
+ * for details.
+ *
+ * Do not use methods in this object for stateful operators which already uses
+ * [[StatefulOpClusteredDistribution]] as its required child distribution.
+ */
+object StatefulOperatorPartitioning {
+
+  def getCompatibleDistribution(
+      expressions: Seq[Expression],
+      stateInfo: StatefulOperatorStateInfo,
+      conf: SQLConf): Distribution = {
+    getCompatibleDistribution(expressions, stateInfo.numPartitions, conf)
+  }
+
+  def getCompatibleDistribution(
+      expressions: Seq[Expression],
+      numPartitions: Int,
+      conf: SQLConf): Distribution = {
+    if (conf.getConf(STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION)) {
+      StatefulOpClusteredDistribution(expressions, numPartitions)
+    } else {
+      ClusteredDistribution(expressions, requiredNumPartitions = Some(numPartitions))
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
index df44714ee5270..e367637671cc8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
-import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning}
+import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, Distribution, Partitioning}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution._
@@ -334,14 +334,11 @@ case class StateStoreRestoreExec(
   override def outputPartitioning: Partitioning = child.outputPartitioning
 
   override def requiredChildDistribution: Seq[Distribution] = {
-    // NOTE: Please read through the NOTE on the classdoc of StatefulOpClusteredDistribution
-    // before making any changes.
-    // TODO(SPARK-38204)
     if (keyExpressions.isEmpty) {
       AllTuples :: Nil
     } else {
-      ClusteredDistribution(keyExpressions,
-        requiredNumPartitions = stateInfo.map(_.numPartitions)) :: Nil
+      StatefulOperatorPartitioning.getCompatibleDistribution(
+        keyExpressions, getStateInfo, conf) :: Nil
     }
   }
 
@@ -497,14 +494,11 @@ case class StateStoreSaveExec(
   override def outputPartitioning: Partitioning = child.outputPartitioning
 
   override def requiredChildDistribution: Seq[Distribution] = {
-    // NOTE: Please read through the NOTE on the classdoc of StatefulOpClusteredDistribution
-    // before making any changes.
-    // TODO(SPARK-38204)
     if (keyExpressions.isEmpty) {
       AllTuples :: Nil
     } else {
-      ClusteredDistribution(keyExpressions,
-        requiredNumPartitions = stateInfo.map(_.numPartitions)) :: Nil
+      StatefulOperatorPartitioning.getCompatibleDistribution(
+        keyExpressions, getStateInfo, conf) :: Nil
     }
   }
 
@@ -591,11 +585,8 @@ case class SessionWindowStateStoreRestoreExec(
   }
 
   override def requiredChildDistribution: Seq[Distribution] = {
-    // NOTE: Please read through the NOTE on the classdoc of StatefulOpClusteredDistribution
-    // before making any changes.
-    // TODO(SPARK-38204)
-    ClusteredDistribution(keyWithoutSessionExpressions,
-      requiredNumPartitions = stateInfo.map(_.numPartitions)) :: Nil
+    StatefulOperatorPartitioning.getCompatibleDistribution(
+      keyWithoutSessionExpressions, getStateInfo, conf) :: Nil
   }
 
   override def requiredChildOrdering: Seq[Seq[SortOrder]] = {
@@ -706,11 +697,8 @@ case class SessionWindowStateStoreSaveExec(
   override def outputPartitioning: Partitioning = child.outputPartitioning
 
   override def requiredChildDistribution: Seq[Distribution] = {
-    // NOTE: Please read through the NOTE on the classdoc of StatefulOpClusteredDistribution
-    // before making any changes.
-    // TODO(SPARK-38204)
-    ClusteredDistribution(keyExpressions,
-      requiredNumPartitions = stateInfo.map(_.numPartitions)) :: Nil
+    StatefulOperatorPartitioning.getCompatibleDistribution(
+      keyWithoutSessionExpressions, getStateInfo, conf) :: Nil
   }
 
   override def shouldRunAnotherBatch(newMetadata: OffsetSeqMetadata): Boolean = {
@@ -768,11 +756,8 @@ case class StreamingDeduplicateExec(
 
   /** Distribute by grouping attributes */
   override def requiredChildDistribution: Seq[Distribution] = {
-    // NOTE: Please read through the NOTE on the classdoc of StatefulOpClusteredDistribution
-    // before making any changes.
-    // TODO(SPARK-38204)
-    ClusteredDistribution(keyExpressions,
-      requiredNumPartitions = stateInfo.map(_.numPartitions)) :: Nil
+    StatefulOperatorPartitioning.getCompatibleDistribution(
+      keyExpressions, getStateInfo, conf) :: Nil
   }
 
   override protected def doExecute(): RDD[InternalRow] = {
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/metadata
new file mode 100644
index 0000000000000..019111c307024
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/metadata
@@ -0,0 +1 @@
+{"id":"dc9af96e-870c-4dc6-ad09-1b84b62caac3"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/offsets/0
new file mode 100644
index 0000000000000..d00e8a5a4134a
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1000,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5"}}
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/state/0/0/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/state/0/0/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/state/0/0/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/state/0/0/_metadata/schema
new file mode 100644
index 0000000000000000000000000000000000000000..d3948722c3258db43d84208c2f5f8020f675c815
GIT binary patch
literal 407
zcmb7<Jqp7x3`R|lqS;G2`Wo$4LQxYZ7~~&fxloL6(c5;Z1EqmX&LpHy59x{1vlH(O
z{0bTrL|)RsAYn3^#t7<`!J%EFdsoNXnsIaOT_VB<)x3gQg4K*8GJ?vb$c;T8W&0vN
tEnm%xPvOGI`hTGX8+>xB=Ff-x&SpZ2d!4>C`?9gxC%)Da8ErUx=?jV|iq!xB

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/state/0/1/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/state/0/1/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..2639d3211decfcd934de6fcf901cd89d35e3bec9
GIT binary patch
literal 96
zcmeZ?GI7euPtH~~V_;y20b=3BY%IY*T7!X+A(#=!kl<hlRshn85)3RH3>-QP!U8%B
dia)d@7+9GY7+e?{fN~7~fxri9lqg651_0u952OG9

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/state/0/2/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/state/0/2/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/state/0/3/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/state/0/3/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/state/0/4/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/state/0/4/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/commits/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/commits/.0.crc
new file mode 100644
index 0000000000000000000000000000000000000000..1aee7033161ecac53eda98ef9b64746c31483c89
GIT binary patch
literal 12
TcmYc;N@ieSU}E^Jwf`;v6eR=n

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/commits/.1.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/commits/.1.crc
new file mode 100644
index 0000000000000000000000000000000000000000..1aee7033161ecac53eda98ef9b64746c31483c89
GIT binary patch
literal 12
TcmYc;N@ieSU}E^Jwf`;v6eR=n

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/commits/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/commits/1
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/commits/1
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/metadata
new file mode 100644
index 0000000000000..81acb4439e8f5
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/metadata
@@ -0,0 +1 @@
+{"id":"9538ada3-a233-4697-8b02-cc66250189a3"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/offsets/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/offsets/.0.crc
new file mode 100644
index 0000000000000000000000000000000000000000..b8a99765858110437c95d456464b6b21a3c28db0
GIT binary patch
literal 16
XcmYc;N@ieSU}D(f{xeo8=XV?cC8Pyw

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/offsets/.1.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/offsets/.1.crc
new file mode 100644
index 0000000000000000000000000000000000000000..81716485cf023ce7abcd6d2b0896e76473408e48
GIT binary patch
literal 16
XcmYc;N@ieSU}CTrnlGK3efk6d9(Dw=

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/offsets/0
new file mode 100644
index 0000000000000..852130a526e08
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1645693797622,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5"}}
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/offsets/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/offsets/1
new file mode 100644
index 0000000000000..2d894644897bf
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/offsets/1
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1645693802625,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5"}}
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/0/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/0/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/0/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/0/.2.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/0/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/0/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/0/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/0/2.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/0/_metadata/.schema.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/0/_metadata/.schema.crc
new file mode 100644
index 0000000000000000000000000000000000000000..f03866c573c155f33e06d90185dca395b7c9f87e
GIT binary patch
literal 12
TcmYc;N@ieSU}8wxb5;rf67T~+

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/0/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/0/_metadata/schema
new file mode 100644
index 0000000000000000000000000000000000000000..e4695f58d7de925a9fee4e2df40edb2ce2b8a191
GIT binary patch
literal 393
zcmZQzDl=kWU|?jJQ>|1|S&*t^rBqx}RGM6(q@$FUnVOSQtYj5kt(2FT3zEuCtuzEm
zKow@@m87Pp76AqFN^^1&lX8Gc(h_ruQ+1SbQ%e$45=#=5tg35kb&&NNk*+_s7VN^i
RgV%)!AC@KNl%|s7MF6K<f&~Bo

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/1/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/1/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/1/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/1/.2.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..dc5c3a4905a5bf8b9e079bf625e0f22d36d1f5b6
GIT binary patch
literal 12
TcmYc;N@ieSU}9j^J6Q$*5GDex

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/1/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/1/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/1/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/1/2.delta
new file mode 100644
index 0000000000000000000000000000000000000000..00c03b0f2aaa5ceb2b3c0dd187d8852a2812081a
GIT binary patch
literal 75
zcmeZ?GI7euPtI0VWME)00ph3&tC@m<v;+eqgD^7(gD@KhLm-eZz{t+P%ES=B@E-_#
Lpt?kX5^w+j6nPC;

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/2/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/2/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/2/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/2/.2.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..0df89359466b4f1ffd0ced30d722a24651f59ff2
GIT binary patch
literal 12
TcmYc;N@ieSU}D%lFD@Ja6G;O_

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/2/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/2/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/2/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/2/2.delta
new file mode 100644
index 0000000000000000000000000000000000000000..0a0f74c944036bd7228c4705916584588fe73287
GIT binary patch
literal 75
zcmeZ?GI7euPtI0VWME)00pfkRQ~82{v;+eqgD?vRgD?jNLm-eZz{t+P%ES=B@E-_#
Lpt?kX5^w+j6Il&X

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/3/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/3/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..fcb13666a0ad8be5ca8afb3ef6670ef2408a85ab
GIT binary patch
literal 12
TcmYc;N@ieSU}6yKuoeOU5BLHm

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/3/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/3/.2.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/3/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/3/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..4e033f8786aa8e9f933d741b005271f36f1d9ce4
GIT binary patch
literal 74
zcmeZ?GI7euPtI0VU|?V{0pi9x4_JeNv;+eqgD@ipgD?{ZgA0%^Ai}`R#1O#n9|(M)
KnnZ!(Z~y=!(+#-*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/3/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/3/2.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/4/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/4/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..eb2b6be4e5e55eb4f7e12c6f1c42513180ececc9
GIT binary patch
literal 12
TcmYc;N@ieSU}Erp-}D3k6LACI

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/4/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/4/.2.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/4/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/4/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..7b6e9c175b8cfd4601bf0e8b40cf82d769494763
GIT binary patch
literal 75
zcmeZ?GI7euPtI0VWME)00piDJ9hrlHv;+eqgD?{ZgD?vRLm-eZz{t+P%ES=B@E-_#
Lpt?kX5^w+j8(<A`

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/4/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/state/0/4/2.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/commits/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/commits/.0.crc
new file mode 100644
index 0000000000000000000000000000000000000000..1aee7033161ecac53eda98ef9b64746c31483c89
GIT binary patch
literal 12
TcmYc;N@ieSU}E^Jwf`;v6eR=n

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/metadata
new file mode 100644
index 0000000000000..54698e5f8afa9
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/metadata
@@ -0,0 +1 @@
+{"id":"b36205c7-696a-4fe9-86d4-a4efdf05795b"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/offsets/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/offsets/.0.crc
new file mode 100644
index 0000000000000000000000000000000000000000..04523a6882fdb364e6ba5ba20e4c2001ace31841
GIT binary patch
literal 16
XcmYc;N@ieSU}8AE;st*|jkqEJCK?2(

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/offsets/0
new file mode 100644
index 0000000000000..321a56f4d3707
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1000,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5"}}
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/0/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/0/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/0/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/0/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/0/_metadata/.schema.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/0/_metadata/.schema.crc
new file mode 100644
index 0000000000000000000000000000000000000000..4d339e472ac250928ed93b4e11e8cd99a7d53481
GIT binary patch
literal 12
TcmYc;N@ieSU}A9767~WB4~7B=

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/0/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/0/_metadata/schema
new file mode 100644
index 0000000000000000000000000000000000000000..bf902e50cf260884fba56285b5718c0635c30029
GIT binary patch
literal 415
zcmb7<O$x#=5QU>hnRPaH*=y)dii~Yq0+Sz_i69}j@V0JDMJ#lcu7;U6kM}-td^AFc
zgLoz2r&rKI;5qgT5=O0P2+&>=STu`tJC*bLrtMUA8;S5vMK4Ar!6?Q78KF%%%hlWL
y^>+E&>|N7rnqBl~Mppln8e8w8T~L2a<TgYjQrIGOxtf`^Q7-bOjKFC8?xYW06^hsZ

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/1/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/1/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..7029bc3ccdf2f544143221424cfa331c8ef45821
GIT binary patch
literal 12
TcmYc;N@ieSU}7-jdUF&25dQ-w

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/1/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/1/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..610e2c0250d4ea5bf5ef115ff675531e1aa2a563
GIT binary patch
literal 138
zcmeZ?GI7euPtI0NU|?Y2VPs&~^|gR67|7OOU}Oje@);yJ7=jgmbfN?U3kL&-4ui0O
z4uj$kEeQrzCI$u<24*G({s;y(;bcXSJdnvK{GXA5QI5e8sELV<fe|dP#j}BdIf5a8
P;Xe@gKrIyos(=FkvvL(*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/2/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/2/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/2/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/2/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/3/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/3/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/3/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/3/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/4/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/4/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/4/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/state/0/4/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/commits/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/commits/.0.crc
new file mode 100644
index 0000000000000000000000000000000000000000..1aee7033161ecac53eda98ef9b64746c31483c89
GIT binary patch
literal 12
TcmYc;N@ieSU}E^Jwf`;v6eR=n

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/metadata
new file mode 100644
index 0000000000000..fa78985cb8778
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/metadata
@@ -0,0 +1 @@
+{"id":"f4795695-2b3e-4864-983a-f7bf52c0e29d"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/offsets/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/offsets/.0.crc
new file mode 100644
index 0000000000000000000000000000000000000000..04523a6882fdb364e6ba5ba20e4c2001ace31841
GIT binary patch
literal 16
XcmYc;N@ieSU}8AE;st*|jkqEJCK?2(

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/offsets/0
new file mode 100644
index 0000000000000..321a56f4d3707
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1000,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5"}}
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/0/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/0/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/0/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/0/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/0/_metadata/.schema.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/0/_metadata/.schema.crc
new file mode 100644
index 0000000000000000000000000000000000000000..4d339e472ac250928ed93b4e11e8cd99a7d53481
GIT binary patch
literal 12
TcmYc;N@ieSU}A9767~WB4~7B=

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/0/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/0/_metadata/schema
new file mode 100644
index 0000000000000000000000000000000000000000..bf902e50cf260884fba56285b5718c0635c30029
GIT binary patch
literal 415
zcmb7<O$x#=5QU>hnRPaH*=y)dii~Yq0+Sz_i69}j@V0JDMJ#lcu7;U6kM}-td^AFc
zgLoz2r&rKI;5qgT5=O0P2+&>=STu`tJC*bLrtMUA8;S5vMK4Ar!6?Q78KF%%%hlWL
y^>+E&>|N7rnqBl~Mppln8e8w8T~L2a<TgYjQrIGOxtf`^Q7-bOjKFC8?xYW06^hsZ

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/1/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/1/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..421f95ae9dd8b491b194f485985486a224dc4513
GIT binary patch
literal 12
TcmYc;N@ieSU}6Z5d)f~G5>EqU

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/1/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/1/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..2639d3211decfcd934de6fcf901cd89d35e3bec9
GIT binary patch
literal 96
zcmeZ?GI7euPtH~~V_;y20b=3BY%IY*T7!X+A(#=!kl<hlRshn85)3RH3>-QP!U8%B
dia)d@7+9GY7+e?{fN~7~fxri9lqg651_0u952OG9

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/2/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/2/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/2/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/2/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/3/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/3/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/3/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/3/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/4/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/4/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/4/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/state/0/4/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/commits/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/commits/.0.crc
new file mode 100644
index 0000000000000000000000000000000000000000..ba56986ebd219e3f265814f8b2e0b86a95fcd8bc
GIT binary patch
literal 12
TcmYc;N@ieSU}9kQ(g+0r4-Wzb

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/commits/.1.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/commits/.1.crc
new file mode 100644
index 0000000000000000000000000000000000000000..ba56986ebd219e3f265814f8b2e0b86a95fcd8bc
GIT binary patch
literal 12
TcmYc;N@ieSU}9kQ(g+0r4-Wzb

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/commits/0
new file mode 100644
index 0000000000000..00b8a64995dde
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":11000}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/commits/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/commits/1
new file mode 100644
index 0000000000000..00b8a64995dde
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/commits/1
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":11000}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/metadata
new file mode 100644
index 0000000000000..879dac88e351a
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/metadata
@@ -0,0 +1 @@
+{"id":"c3d27d93-536b-49ce-a62f-f2777855a1fb"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/offsets/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/offsets/.0.crc
new file mode 100644
index 0000000000000000000000000000000000000000..0d6e0a4778504ac9f87854b73ebb090567f8afe9
GIT binary patch
literal 16
XcmYc;N@ieSU}A8nns7)e=XV?cBJ2ew

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/offsets/.1.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/offsets/.1.crc
new file mode 100644
index 0000000000000000000000000000000000000000..24dcb52ef6098b88f03dac0b07a4f2ef2fd8b023
GIT binary patch
literal 16
XcmYc;N@ieSU}Es@2=6HUUn&j&BLD>L

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/offsets/0
new file mode 100644
index 0000000000000..6f149ed4ec45c
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1645760172709,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5"}}
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/offsets/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/offsets/1
new file mode 100644
index 0000000000000..4a6194c2002bd
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/offsets/1
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":11000,"batchTimestampMs":1645760174214,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5"}}
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/0/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/0/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/0/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/0/.2.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/0/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/0/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/0/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/0/2.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/0/_metadata/.schema.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/0/_metadata/.schema.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3f3804f1999c0b7f84f065ee169450dbcc99801b
GIT binary patch
literal 16
XcmYc;N@ieSU}BhbP_*#S#1IDnB$)*J

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/0/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/0/_metadata/schema
new file mode 100644
index 0000000000000000000000000000000000000000..871586884066bdd12f88cf8d0270b3fa1cff343b
GIT binary patch
literal 754
zcmbtSv1-FG5LNOiH=7We&2yIynOewDOu5FFA|lI1cdju;pPaFKFNty8B$O0u((&#|
z@7_Bz|9mjU%*>$!X)A&hSVFxONML8l<qYX-2Of(PM9jAIPr0H$-PV6-?Q7@o-D#Wh
zDS?6na}<QMi!m9VsF}hPKA$B#3kG?ys%lUoTCreJ{Svc7$CqldcpQ25^^cL?8te12
z8GgHqdek^D;I_U$%_2W?`fh#}$7x&}09;|%FB%jIh29>?VY}9#S4%xf5BElI&U2qa
b9p~7?Q=*)@3fr>wBY$0av4_`_$C$-$L$m^K

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/1/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/1/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..9e684a6792e54424a7622403192896d7aac9b37e
GIT binary patch
literal 12
TcmYc;N@ieSU}AWFSCIt(6Zr!q

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/1/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/1/.2.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/1/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/1/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..73c35f68c2f28e85319089ef2a20864615954df3
GIT binary patch
literal 259
zcmeZ?GI7euPtI1o$-uzi!o<KZZ$TzsFpzD)z{v1{l@-WR0Ady()&OFMIfYEp3<ok&
zb8_;5-0aj!BLN0SAS-~4!5K(N=rAzYFbF%?Fa&x4xr}lQY)lL+J`4^AjJTM^82CdN
z7z8XN7#Nrs#5p?{80Ipt*f2~eE-6Y)%+1V8S7qRuz#teQ$iO}U=okhTAi0)-e*yz9
ze+&Z$>kI~Yt{DvMg{8?QrA4VJ%?unGjJz{I3i)R+$N-hFGDtHr0=-bf$iIV;(V;xQ
aC}%PQkZH*s!Vtjl9|(M)z7++!4GsWkH!$1)

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/1/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/1/2.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/2/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/2/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/2/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/2/.2.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/2/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/2/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/2/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/2/2.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/3/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/3/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..816cff99cd156a2c6845ba349cf89c924aa1b085
GIT binary patch
literal 12
TcmYc;N@ieSU}Bh+n8pJD5#s`=

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/3/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/3/.2.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/3/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/3/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..3c6d389f04264b94b85d3bbc7bdaa5bc1f32bf20
GIT binary patch
literal 230
zcmeZ?GI7euPtI1|!@$5`!o<L!>pPn>7|1qYU}U(!3S=k%F$)lDh%gvLB{8ux9LPw`
z$;k(bWT#dd3NSbVSpjSe&Ok~+hk?O{LD<2DA<zTJWt3xJXJTOXVX$xb@GFFYmA`|5
zFTS`Su_(KP!G=GCfkDJ#8-qBf03+jE2IeCSlZ#7=QWJAC^U^gL*d-W+BZL`PB^Vi8
z7+8R$DI>oGBQJjp0|#R>0|U^VIgI=(7#ZWs^NVs)6c}x|Ll^=W{sVyz)I*{`2fzUU
D(+Mkl

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/3/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/3/2.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/4/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/4/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/4/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/4/.2.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/4/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/4/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/4/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/state/0/4/2.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/commits/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/commits/.0.crc
new file mode 100644
index 0000000000000000000000000000000000000000..1aee7033161ecac53eda98ef9b64746c31483c89
GIT binary patch
literal 12
TcmYc;N@ieSU}E^Jwf`;v6eR=n

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/commits/.1.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/commits/.1.crc
new file mode 100644
index 0000000000000000000000000000000000000000..1aee7033161ecac53eda98ef9b64746c31483c89
GIT binary patch
literal 12
TcmYc;N@ieSU}E^Jwf`;v6eR=n

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/commits/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/commits/1
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/commits/1
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/metadata
new file mode 100644
index 0000000000000..0831489d9d02d
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/metadata
@@ -0,0 +1 @@
+{"id":"cd462130-c8fb-4212-8b08-4e1b9e10dbcf"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/offsets/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/offsets/.0.crc
new file mode 100644
index 0000000000000000000000000000000000000000..b1cf4d310b245ad42c3eaa39d64b408635a480d1
GIT binary patch
literal 16
XcmYc;N@ieSU}7luJpE28=XV?cC5r`<

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/offsets/.1.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/offsets/.1.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf958a5259df31fde9f158a2a780a6d1d1e0344e
GIT binary patch
literal 16
XcmYc;N@ieSU}BisyM0}5_URJ<C-Md@

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/offsets/0
new file mode 100644
index 0000000000000..523d0ce69165e
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1645692626085,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5"}}
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/offsets/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/offsets/1
new file mode 100644
index 0000000000000..f69d320e37d2f
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/offsets/1
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1645692630152,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5"}}
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/0/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/0/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/0/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/0/.2.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/0/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/0/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/0/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/0/2.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/0/_metadata/.schema.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/0/_metadata/.schema.crc
new file mode 100644
index 0000000000000000000000000000000000000000..701a0a87ad48a326a5672b487ec746b721e51bae
GIT binary patch
literal 12
TcmYc;N@ieSU}9+2t5gF35iSCY

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/0/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/0/_metadata/schema
new file mode 100644
index 0000000000000000000000000000000000000000..08ee320ef24219b3505baa948e7560028a66afac
GIT binary patch
literal 262
zcmb8pI}U>|3<hAgNAaAcj5!Jwg9`?s8dDLc5{hz@9<~87bZfi*59x#RvjTt<yb{{_
zVDuPliLIf-eq`wa`fEa^X{+wMUP^HcM>QUJDS<{2x=ESzB!L}HY}Anv`joT#{8#_~
Wo4gC_Uxe)(b+aEtnRWQ;Yw{bD!&O57

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/1/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/1/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..f712e4290ad3781454938359710258058daedad6
GIT binary patch
literal 12
TcmYc;N@ieSU}DJseU%pg6XXMj

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/1/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/1/.2.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..2a9f3595f24b553a5f96cacb9af52fa95ecbaef6
GIT binary patch
literal 12
TcmYc;N@ieSU}6Xo?Ar+d5YPhU

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/1/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/1/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..f5faf01f4dc5cdd730b5e129c1e43c07e84a4908
GIT binary patch
literal 82
zcmeZ?GI7euPtI1=U|?V{0b=RwE8M|AT7rR*L716?L70t$!4b$8U}N|Iq=2Fj@E-_#
LpqfRYVqgjYJQEFd

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/1/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/1/2.delta
new file mode 100644
index 0000000000000000000000000000000000000000..ec3f1af46bd49d5558cb485a300615e7d1ae4f51
GIT binary patch
literal 82
zcmeZ?GI7euPtI1=U|?V{0pfWv3ar6ET7rR*L716?L70t$!4b$8U}N|Iq!^e$1Q7fO
N0w1VmQHT(P1OO}d4KM%z

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/2/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/2/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/2/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/2/.2.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/2/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/2/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/2/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/2/2.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/3/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/3/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/3/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/3/.2.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/3/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/3/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/3/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/3/2.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/4/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/4/.1.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0
GIT binary patch
literal 12
TcmYc;N@ieSU}A7peP;>)5flQ*

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/4/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/4/.2.delta.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3ffbb7a9133b5142685e82b055e424661a36a396
GIT binary patch
literal 12
TcmYc;N@ieSU}8A)SM)pp6rcm>

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/4/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/4/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d
GIT binary patch
literal 46
icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/4/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/state/0/4/2.delta
new file mode 100644
index 0000000000000000000000000000000000000000..7c8834f659bd90881edf1834130bdef0f557c605
GIT binary patch
literal 82
zcmeZ?GI7euPtI1=U|?V{0b=z<6L^Dxv;+eqgD?{ZgD?vRgCmeHz{c<aNC8D5;6D)f
LKsAd(#lRE*JPQqT

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index 005e2c5c8c9ea..ba511354f7a40 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -744,7 +744,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
     assert(
       executedPlan.exists {
         case WholeStageCodegenExec(
-          HashAggregateExec(_, _, _, _, _, _, _: LocalTableScanExec)) => true
+          HashAggregateExec(_, _, _, _, _, _, _, _, _: LocalTableScanExec)) => true
         case _ => false
       },
       "LocalTableScanExec should be within a WholeStageCodegen domain.")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateDistributionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateDistributionSuite.scala
new file mode 100644
index 0000000000000..f1578ae5df97d
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateDistributionSuite.scala
@@ -0,0 +1,455 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import java.io.File
+
+import org.apache.commons.io.FileUtils
+
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.Update
+import org.apache.spark.sql.execution.streaming.{FlatMapGroupsWithStateExec, MemoryStream}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.GroupStateTimeout.ProcessingTimeTimeout
+import org.apache.spark.sql.streaming.util.{StatefulOpClusteredDistributionTestHelper, StreamManualClock}
+import org.apache.spark.util.Utils
+
+class FlatMapGroupsWithStateDistributionSuite extends StreamTest
+  with StatefulOpClusteredDistributionTestHelper {
+
+  import testImplicits._
+
+  test("SPARK-38204: flatMapGroupsWithState should require StatefulOpClusteredDistribution " +
+    "from children - with initial state") {
+    // function will return -1 on timeout and returns count of the state otherwise
+    val stateFunc =
+      (key: (String, String), values: Iterator[(String, String, Long)],
+       state: GroupState[RunningCount]) => {
+
+        if (state.hasTimedOut) {
+          state.remove()
+          Iterator((key, "-1"))
+        } else {
+          val count = state.getOption.map(_.count).getOrElse(0L) + values.size
+          state.update(RunningCount(count))
+          state.setTimeoutDuration("10 seconds")
+          Iterator((key, count.toString))
+        }
+      }
+
+    val clock = new StreamManualClock
+    val inputData = MemoryStream[(String, String, Long)]
+    val initialState = Seq(("c", "c", new RunningCount(2)))
+      .toDS()
+      .repartition($"_2")
+      .groupByKey(a => (a._1, a._2)).mapValues(_._3)
+    val result =
+      inputData.toDF().toDF("key1", "key2", "time")
+        .selectExpr("key1", "key2", "timestamp_seconds(time) as timestamp")
+        .withWatermark("timestamp", "10 second")
+        .as[(String, String, Long)]
+        .repartition($"_1")
+        .groupByKey(x => (x._1, x._2))
+        .flatMapGroupsWithState(Update, ProcessingTimeTimeout(), initialState)(stateFunc)
+        .select($"_1._1".as("key1"), $"_1._2".as("key2"), $"_2".as("cnt"))
+
+    testStream(result, Update)(
+      StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
+      AddData(inputData, ("a", "a", 1L)),
+      AdvanceManualClock(1 * 1000), // a and c are processed here for the first time.
+      CheckNewAnswer(("a", "a", "1"), ("c", "c", "2")),
+      Execute { query =>
+        val numPartitions = query.lastExecution.numStateStores
+
+        val flatMapGroupsWithStateExecs = query.lastExecution.executedPlan.collect {
+          case f: FlatMapGroupsWithStateExec => f
+        }
+
+        assert(flatMapGroupsWithStateExecs.length === 1)
+        assert(requireStatefulOpClusteredDistribution(
+          flatMapGroupsWithStateExecs.head, Seq(Seq("_1", "_2"), Seq("_1", "_2")), numPartitions))
+        assert(hasDesiredHashPartitioningInChildren(
+          flatMapGroupsWithStateExecs.head, Seq(Seq("_1", "_2"), Seq("_1", "_2")), numPartitions))
+      }
+    )
+  }
+
+  test("SPARK-38204: flatMapGroupsWithState should require StatefulOpClusteredDistribution " +
+    "from children - without initial state") {
+    // function will return -1 on timeout and returns count of the state otherwise
+    val stateFunc =
+      (key: (String, String), values: Iterator[(String, String, Long)],
+       state: GroupState[RunningCount]) => {
+
+        if (state.hasTimedOut) {
+          state.remove()
+          Iterator((key, "-1"))
+        } else {
+          val count = state.getOption.map(_.count).getOrElse(0L) + values.size
+          state.update(RunningCount(count))
+          state.setTimeoutDuration("10 seconds")
+          Iterator((key, count.toString))
+        }
+      }
+
+    val clock = new StreamManualClock
+    val inputData = MemoryStream[(String, String, Long)]
+    val result =
+      inputData.toDF().toDF("key1", "key2", "time")
+        .selectExpr("key1", "key2", "timestamp_seconds(time) as timestamp")
+        .withWatermark("timestamp", "10 second")
+        .as[(String, String, Long)]
+        .repartition($"_1")
+        .groupByKey(x => (x._1, x._2))
+        .flatMapGroupsWithState(Update, ProcessingTimeTimeout())(stateFunc)
+        .select($"_1._1".as("key1"), $"_1._2".as("key2"), $"_2".as("cnt"))
+
+    testStream(result, Update)(
+      StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
+      AddData(inputData, ("a", "a", 1L)),
+      AdvanceManualClock(1 * 1000), // a is processed here for the first time.
+      CheckNewAnswer(("a", "a", "1")),
+      Execute { query =>
+        val numPartitions = query.lastExecution.numStateStores
+
+        val flatMapGroupsWithStateExecs = query.lastExecution.executedPlan.collect {
+          case f: FlatMapGroupsWithStateExec => f
+        }
+
+        assert(flatMapGroupsWithStateExecs.length === 1)
+        assert(requireStatefulOpClusteredDistribution(
+          flatMapGroupsWithStateExecs.head, Seq(Seq("_1", "_2"), Seq("_1", "_2")), numPartitions))
+        assert(hasDesiredHashPartitioningInChildren(
+          flatMapGroupsWithStateExecs.head, Seq(Seq("_1", "_2"), Seq("_1", "_2")), numPartitions))
+      }
+    )
+  }
+
+  test("SPARK-38204: flatMapGroupsWithState should require ClusteredDistribution " +
+    "from children if the query starts from checkpoint in 3.2.x - with initial state") {
+    // function will return -1 on timeout and returns count of the state otherwise
+    val stateFunc =
+      (key: (String, String), values: Iterator[(String, String, Long)],
+       state: GroupState[RunningCount]) => {
+
+        if (state.hasTimedOut) {
+          state.remove()
+          Iterator((key, "-1"))
+        } else {
+          val count = state.getOption.map(_.count).getOrElse(0L) + values.size
+          state.update(RunningCount(count))
+          state.setTimeoutDuration("10 seconds")
+          Iterator((key, count.toString))
+        }
+      }
+
+    val clock = new StreamManualClock
+    val inputData = MemoryStream[(String, String, Long)]
+    val initialState = Seq(("c", "c", new RunningCount(2)))
+      .toDS()
+      .repartition($"_2")
+      .groupByKey(a => (a._1, a._2)).mapValues(_._3)
+    val result =
+      inputData.toDF().toDF("key1", "key2", "time")
+        .selectExpr("key1", "key2", "timestamp_seconds(time) as timestamp")
+        .withWatermark("timestamp", "10 second")
+        .as[(String, String, Long)]
+        .repartition($"_1")
+        .groupByKey(x => (x._1, x._2))
+        .flatMapGroupsWithState(Update, ProcessingTimeTimeout(), initialState)(stateFunc)
+        .select($"_1._1".as("key1"), $"_1._2".as("key2"), $"_2".as("cnt"))
+
+    val resourceUri = this.getClass.getResource(
+      "/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate1-repartition/").toURI
+
+    val checkpointDir = Utils.createTempDir().getCanonicalFile
+    // Copy the checkpoint to a temp dir to prevent changes to the original.
+    // Not doing this will lead to the test passing on the first run, but fail subsequent runs.
+    FileUtils.copyDirectory(new File(resourceUri), checkpointDir)
+
+    inputData.addData(("a", "a", 1L))
+
+    testStream(result, Update)(
+      StartStream(Trigger.ProcessingTime("1 second"),
+        checkpointLocation = checkpointDir.getAbsolutePath,
+        triggerClock = clock,
+        additionalConfs = Map(SQLConf.STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION.key -> "true")),
+
+      // scalastyle:off line.size.limit
+      /*
+        Note: The checkpoint was generated using the following input in Spark version 3.2.0
+        AddData(inputData, ("a", "a", 1L)),
+        AdvanceManualClock(1 * 1000), // a and c are processed here for the first time.
+        CheckNewAnswer(("a", "a", "1"), ("c", "c", "2")),
+
+        Note2: The following is the physical plan of the query in Spark version 3.2.0.
+
+        WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@253dd5ad, org.apache.spark.sql.execution.datasources.v2.DataSourceV2Strategy$$Lambda$2214/0x0000000840ead440@6ede0d42
+        +- *(6) Project [_1#58._1 AS key1#63, _1#58._2 AS key2#64, _2#59 AS cnt#65]
+           +- *(6) SerializeFromObject [if (isnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1)) null else named_struct(_1, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1)._1, true, false), _2, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1)._2, true, false)) AS _1#58, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2, true, false) AS _2#59]
+              +- FlatMapGroupsWithState org.apache.spark.sql.streaming.FlatMapGroupsWithStateSuite$$Lambda$1067/0x0000000840770440@3f2e51a9, newInstance(class scala.Tuple2), newInstance(class scala.Tuple3), newInstance(class org.apache.spark.sql.streaming.RunningCount), [_1#52, _2#53], [_1#22, _2#23], [key1#29, key2#30, timestamp#35-T10000ms], [count#25L], obj#57: scala.Tuple2, state info [ checkpoint = file:/tmp/streaming.metadata-d4f0d156-78b5-4129-97fb-361241ab03d8/state, runId = eb107298-692d-4336-bb76-6b11b34a0753, opId = 0, ver = 0, numPartitions = 5], class[count[0]: bigint], 2, Update, ProcessingTimeTimeout, 1000, 0, true
+                 :- *(3) Sort [_1#52 ASC NULLS FIRST, _2#53 ASC NULLS FIRST], false, 0
+                 :  +- Exchange hashpartitioning(_1#52, _2#53, 5), ENSURE_REQUIREMENTS, [id=#78]
+                 :     +- AppendColumns org.apache.spark.sql.streaming.FlatMapGroupsWithStateSuite$$Lambda$1751/0x0000000840ccc040@41d4c0d8, newInstance(class scala.Tuple3), [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1, true, false) AS _1#52, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2, true, false) AS _2#53]
+                 :        +- *(2) Project [key1#29, key2#30, timestamp#35-T10000ms]
+                 :           +- Exchange hashpartitioning(_1#3, 5), REPARTITION_BY_COL, [id=#73]
+                 :              +- EventTimeWatermark timestamp#35: timestamp, 10 seconds
+                 :                 +- *(1) Project [_1#3 AS key1#29, _2#4 AS key2#30, timestamp_seconds(_3#5L) AS timestamp#35, _1#3]
+                 :                    +- MicroBatchScan[_1#3, _2#4, _3#5L] MemoryStreamDataSource
+                 +- *(5) Sort [_1#22 ASC NULLS FIRST, _2#23 ASC NULLS FIRST], false, 0
+                    +- Exchange hashpartitioning(_1#22, _2#23, 5), ENSURE_REQUIREMENTS, [id=#85]
+                       +- *(4) Project [count#25L, _1#22, _2#23]
+                          +- AppendColumns org.apache.spark.sql.streaming.FlatMapGroupsWithStateSuite$$Lambda$1686/0x0000000840c9b840@6bb881d0, newInstance(class scala.Tuple3), [knownnotnull(assertnotnull(input[0, org.apache.spark.sql.streaming.RunningCount, true])).count AS count#25L]
+                             +- AppendColumns org.apache.spark.sql.streaming.FlatMapGroupsWithStateSuite$$Lambda$1681/0x0000000840c98840@11355c7b, newInstance(class scala.Tuple3), [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1, true, false) AS _1#22, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2, true, false) AS _2#23]
+                                +- Exchange hashpartitioning(_1#9, 5), REPARTITION_BY_COL, [id=#43]
+                                   +- LocalTableScan [_1#9, _2#10, _3#11]
+       */
+      // scalastyle:on line.size.limit
+
+      AddData(inputData, ("a", "b", 1L)),
+      AdvanceManualClock(1 * 1000),
+      CheckNewAnswer(("a", "b", "1")),
+
+      Execute { query =>
+        val numPartitions = query.lastExecution.numStateStores
+
+        val flatMapGroupsWithStateExecs = query.lastExecution.executedPlan.collect {
+          case f: FlatMapGroupsWithStateExec => f
+        }
+
+        assert(flatMapGroupsWithStateExecs.length === 1)
+        assert(requireClusteredDistribution(flatMapGroupsWithStateExecs.head,
+          Seq(Seq("_1", "_2"), Seq("_1", "_2")), Some(numPartitions)))
+        assert(hasDesiredHashPartitioningInChildren(
+          flatMapGroupsWithStateExecs.head, Seq(Seq("_1", "_2"), Seq("_1", "_2")), numPartitions))
+      }
+    )
+  }
+
+  test("SPARK-38204: flatMapGroupsWithState should require ClusteredDistribution " +
+    "from children if the query starts from checkpoint in 3.2.x - without initial state") {
+    // function will return -1 on timeout and returns count of the state otherwise
+    val stateFunc =
+      (key: (String, String), values: Iterator[(String, String, Long)],
+       state: GroupState[RunningCount]) => {
+
+        if (state.hasTimedOut) {
+          state.remove()
+          Iterator((key, "-1"))
+        } else {
+          val count = state.getOption.map(_.count).getOrElse(0L) + values.size
+          state.update(RunningCount(count))
+          state.setTimeoutDuration("10 seconds")
+          Iterator((key, count.toString))
+        }
+      }
+
+    val clock = new StreamManualClock
+    val inputData = MemoryStream[(String, String, Long)]
+    val result =
+      inputData.toDF().toDF("key1", "key2", "time")
+        .selectExpr("key1", "key2", "timestamp_seconds(time) as timestamp")
+        .withWatermark("timestamp", "10 second")
+        .as[(String, String, Long)]
+        .repartition($"_1")
+        .groupByKey(x => (x._1, x._2))
+        .flatMapGroupsWithState(Update, ProcessingTimeTimeout())(stateFunc)
+        .select($"_1._1".as("key1"), $"_1._2".as("key2"), $"_2".as("cnt"))
+
+    val resourceUri = this.getClass.getResource(
+      "/structured-streaming/checkpoint-version-3.2.0-flatmapgroupswithstate2-repartition/").toURI
+
+    val checkpointDir = Utils.createTempDir().getCanonicalFile
+    // Copy the checkpoint to a temp dir to prevent changes to the original.
+    // Not doing this will lead to the test passing on the first run, but fail subsequent runs.
+    FileUtils.copyDirectory(new File(resourceUri), checkpointDir)
+
+    inputData.addData(("a", "a", 1L))
+
+    testStream(result, Update)(
+      StartStream(Trigger.ProcessingTime("1 second"),
+        checkpointLocation = checkpointDir.getAbsolutePath,
+        triggerClock = clock,
+        additionalConfs = Map(SQLConf.STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION.key -> "true")),
+
+      // scalastyle:off line.size.limit
+      /*
+        Note: The checkpoint was generated using the following input in Spark version 3.2.0
+        AddData(inputData, ("a", "a", 1L)),
+        AdvanceManualClock(1 * 1000), // a is processed here for the first time.
+        CheckNewAnswer(("a", "a", "1")),
+
+        Note2: The following is the physical plan of the query in Spark version 3.2.0 (convenience for checking backward compatibility)
+        WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@20732f1b, org.apache.spark.sql.execution.datasources.v2.DataSourceV2Strategy$$Lambda$2205/0x0000000840ea5440@48e6c016
+        +- *(5) Project [_1#39._1 AS key1#44, _1#39._2 AS key2#45, _2#40 AS cnt#46]
+           +- *(5) SerializeFromObject [if (isnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1)) null else named_struct(_1, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1)._1, true, false), _2, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1)._2, true, false)) AS _1#39, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2, true, false) AS _2#40]
+              +- FlatMapGroupsWithState org.apache.spark.sql.streaming.FlatMapGroupsWithStateSuite$$Lambda$1065/0x0000000840770040@240e41f8, newInstance(class scala.Tuple2), newInstance(class scala.Tuple3), newInstance(class scala.Tuple2), [_1#32, _2#33], [_1#32, _2#33], [key1#9, key2#10, timestamp#15-T10000ms], [key1#9, key2#10, timestamp#15-T10000ms], obj#37: scala.Tuple2, state info [ checkpoint = file:/tmp/spark-6619d285-b0ca-42ab-8284-723a564e13b6/state, runId = b3383a6c-9976-483c-a463-7fc9e9ae3e1a, opId = 0, ver = 0, numPartitions = 5], class[count[0]: bigint], 2, Update, ProcessingTimeTimeout, 1000, 0, false
+                 :- *(3) Sort [_1#32 ASC NULLS FIRST, _2#33 ASC NULLS FIRST], false, 0
+                 :  +- Exchange hashpartitioning(_1#32, _2#33, 5), ENSURE_REQUIREMENTS, [id=#62]
+                 :     +- AppendColumns org.apache.spark.sql.streaming.FlatMapGroupsWithStateSuite$$Lambda$1709/0x0000000840ca7040@351810cb, newInstance(class scala.Tuple3), [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1, true, false) AS _1#32, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2, true, false) AS _2#33]
+                 :        +- *(2) Project [key1#9, key2#10, timestamp#15-T10000ms]
+                 :           +- Exchange hashpartitioning(_1#3, 5), REPARTITION_BY_COL, [id=#57]
+                 :              +- EventTimeWatermark timestamp#15: timestamp, 10 seconds
+                 :                 +- *(1) Project [_1#3 AS key1#9, _2#4 AS key2#10, timestamp_seconds(_3#5L) AS timestamp#15, _1#3]
+                 :                    +- MicroBatchScan[_1#3, _2#4, _3#5L] MemoryStreamDataSource
+                 +- *(4) !Sort [_1#32 ASC NULLS FIRST, _2#33 ASC NULLS FIRST], false, 0
+                    +- !Exchange hashpartitioning(_1#32, _2#33, 5), ENSURE_REQUIREMENTS, [id=#46]
+                       +- LocalTableScan <empty>, [count#38L]
+       */
+      // scalastyle:on line.size.limit
+
+      AddData(inputData, ("a", "b", 1L)),
+      AdvanceManualClock(1 * 1000),
+      CheckNewAnswer(("a", "b", "1")),
+
+      Execute { query =>
+        val numPartitions = query.lastExecution.numStateStores
+
+        val flatMapGroupsWithStateExecs = query.lastExecution.executedPlan.collect {
+          case f: FlatMapGroupsWithStateExec => f
+        }
+
+        assert(flatMapGroupsWithStateExecs.length === 1)
+        assert(requireClusteredDistribution(flatMapGroupsWithStateExecs.head,
+          Seq(Seq("_1", "_2"), Seq("_1", "_2")), Some(numPartitions)))
+        assert(hasDesiredHashPartitioningInChildren(
+          flatMapGroupsWithStateExecs.head, Seq(Seq("_1", "_2"), Seq("_1", "_2")), numPartitions))
+      }
+    )
+  }
+
+  test("SPARK-38204: flatMapGroupsWithState should require ClusteredDistribution " +
+    "from children if the query starts from checkpoint in prior to 3.2") {
+    // function will return -1 on timeout and returns count of the state otherwise
+    val stateFunc =
+      (key: (String, String), values: Iterator[(String, String, Long)],
+       state: GroupState[RunningCount]) => {
+
+        if (state.hasTimedOut) {
+          state.remove()
+          Iterator((key, "-1"))
+        } else {
+          val count = state.getOption.map(_.count).getOrElse(0L) + values.size
+          state.update(RunningCount(count))
+          state.setTimeoutDuration("10 seconds")
+          Iterator((key, count.toString))
+        }
+      }
+
+    val clock = new StreamManualClock
+    val inputData = MemoryStream[(String, String, Long)]
+    val result =
+      inputData.toDF().toDF("key1", "key2", "time")
+        .selectExpr("key1", "key2", "timestamp_seconds(time) as timestamp")
+        .withWatermark("timestamp", "10 second")
+        .as[(String, String, Long)]
+        .repartition($"_1")
+        .groupByKey(x => (x._1, x._2))
+        .flatMapGroupsWithState(Update, ProcessingTimeTimeout())(stateFunc)
+        .select($"_1._1".as("key1"), $"_1._2".as("key2"), $"_2".as("cnt"))
+
+    val resourceUri = this.getClass.getResource(
+      "/structured-streaming/checkpoint-version-3.1.0-flatmapgroupswithstate-repartition/").toURI
+
+    val checkpointDir = Utils.createTempDir().getCanonicalFile
+    // Copy the checkpoint to a temp dir to prevent changes to the original.
+    // Not doing this will lead to the test passing on the first run, but fail subsequent runs.
+    FileUtils.copyDirectory(new File(resourceUri), checkpointDir)
+
+    inputData.addData(("a", "a", 1L))
+
+    testStream(result, Update)(
+      StartStream(Trigger.ProcessingTime("1 second"),
+        checkpointLocation = checkpointDir.getAbsolutePath,
+        triggerClock = clock,
+        additionalConfs = Map(SQLConf.STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION.key -> "true")),
+
+      // scalastyle:off line.size.limit
+      /*
+        Note: The checkpoint was generated using the following input in Spark version 3.2.0
+        AddData(inputData, ("a", "a", 1L)),
+        AdvanceManualClock(1 * 1000), // a is processed here for the first time.
+        CheckNewAnswer(("a", "a", "1")),
+
+        Note2: The following plans are the physical plans of the query in older Spark versions
+        The physical plans around FlatMapGroupsWithStateExec are quite similar, especially
+        shuffles being injected are same. That said, verifying with checkpoint being built with
+        Spark 3.1.0 would verify the following versions as well.
+
+        A. Spark 3.1.0
+        WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@4505821b
+        +- *(3) Project [_1#38._1 AS key1#43, _1#38._2 AS key2#44, _2#39 AS cnt#45]
+           +- *(3) SerializeFromObject [if (isnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1)) null else named_struct(_1, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1)._1, true, false), _2, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1)._2, true, false)) AS _1#38, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2, true, false) AS _2#39]
+              +- FlatMapGroupsWithState org.apache.spark.sql.streaming.FlatMapGroupsWithStateSuite$$Lambda$1035/0x0000000840721840@64351072, newInstance(class scala.Tuple2), newInstance(class scala.Tuple3), [_1#32, _2#33], [key1#9, key2#10, timestamp#15-T10000ms], obj#37: scala.Tuple2, state info [ checkpoint = file:/tmp/spark-56397379-d014-48e0-a002-448c0621cfe8/state, runId = 4f9a129f-2b0c-4838-9d26-18171d94be7d, opId = 0, ver = 0, numPartitions = 5], class[count[0]: bigint], 2, Update, ProcessingTimeTimeout, 1000, 0
+                 +- *(2) Sort [_1#32 ASC NULLS FIRST, _2#33 ASC NULLS FIRST], false, 0
+                    +- Exchange hashpartitioning(_1#32, _2#33, 5), ENSURE_REQUIREMENTS, [id=#54]
+                       +- AppendColumns org.apache.spark.sql.streaming.FlatMapGroupsWithStateSuite$$Lambda$1594/0x0000000840bc8840@857c80d, newInstance(class scala.Tuple3), [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1, true, false) AS _1#32, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2, true, false) AS _2#33]
+                          +- Exchange hashpartitioning(key1#9, 5), REPARTITION, [id=#52]
+                             +- EventTimeWatermark timestamp#15: timestamp, 10 seconds
+                                +- *(1) Project [_1#3 AS key1#9, _2#4 AS key2#10, timestamp_seconds(_3#5L) AS timestamp#15]
+                                   +- *(1) Project [_1#3, _2#4, _3#5L]
+                                      +- MicroBatchScan[_1#3, _2#4, _3#5L] MemoryStreamDataSource
+
+        B. Spark 3.0.0
+        WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@32ae8206
+        +- *(3) Project [_1#38._1 AS key1#43, _1#38._2 AS key2#44, _2#39 AS cnt#45]
+           +- *(3) SerializeFromObject [if (isnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1)) null else named_struct(_1, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1)._1, true, false), _2, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1)._2, true, false)) AS _1#38, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2, true, false) AS _2#39]
+              +- FlatMapGroupsWithState org.apache.spark.sql.streaming.FlatMapGroupsWithStateSuite$$Lambda$972/0x0000000840721c40@3e8c825d, newInstance(class scala.Tuple2), newInstance(class scala.Tuple3), [_1#32, _2#33], [key1#9, key2#10, timestamp#15-T10000ms], obj#37: scala.Tuple2, state info [ checkpoint = file:/tmp/spark-dcd6753e-54c7-481c-aa21-f7fc677a29a4/state, runId = 4854d427-436c-4f4e-9e1d-577bcd9cc890, opId = 0, ver = 0, numPartitions = 5], class[count[0]: bigint], 2, Update, ProcessingTimeTimeout, 1000, 0
+                 +- *(2) Sort [_1#32 ASC NULLS FIRST, _2#33 ASC NULLS FIRST], false, 0
+                    +- Exchange hashpartitioning(_1#32, _2#33, 5), true, [id=#54]
+                       +- AppendColumns org.apache.spark.sql.streaming.FlatMapGroupsWithStateSuite$$Lambda$1477/0x0000000840bb6040@627623e, newInstance(class scala.Tuple3), [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1, true, false) AS _1#32, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2, true, false) AS _2#33]
+                          +- Exchange hashpartitioning(key1#9, 5), false, [id=#52]
+                             +- EventTimeWatermark timestamp#15: timestamp, 10 seconds
+                                +- *(1) Project [_1#3 AS key1#9, _2#4 AS key2#10, cast(_3#5L as timestamp) AS timestamp#15]
+                                   +- *(1) Project [_1#3, _2#4, _3#5L]
+                                      +- MicroBatchScan[_1#3, _2#4, _3#5L] MemoryStreamDataSource
+
+        C. Spark 2.4.0
+        *(3) Project [_1#32._1 AS key1#35, _1#32._2 AS key2#36, _2#33 AS cnt#37]
+        +- *(3) SerializeFromObject [if (isnull(assertnotnull(input[0, scala.Tuple2, true])._1)) null else named_struct(_1, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, assertnotnull(assertnotnull(input[0, scala.Tuple2, true])._1)._1, true, false), _2, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, assertnotnull(assertnotnull(input[0, scala.Tuple2, true])._1)._2, true, false)) AS _1#32, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, assertnotnull(input[0, scala.Tuple2, true])._2, true, false) AS _2#33]
+           +- FlatMapGroupsWithState <function3>, newInstance(class scala.Tuple2), newInstance(class scala.Tuple3), [_1#26, _2#27], [key1#9, key2#10, timestamp#15-T10000ms], obj#31: scala.Tuple2, state info [ checkpoint = file:/tmp/spark-634482c9-a55a-4f4e-b352-babec98fb4fc/state, runId = dd65fff0-d901-4e0b-a1ad-8c09b69f33ba, opId = 0, ver = 0, numPartitions = 5], class[count[0]: bigint], 2, Update, ProcessingTimeTimeout, 1000, 0
+              +- *(2) Sort [_1#26 ASC NULLS FIRST, _2#27 ASC NULLS FIRST], false, 0
+                 +- Exchange hashpartitioning(_1#26, _2#27, 5)
+                    +- AppendColumns <function1>, newInstance(class scala.Tuple3), [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, assertnotnull(input[0, scala.Tuple2, true])._1, true, false) AS _1#26, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, assertnotnull(input[0, scala.Tuple2, true])._2, true, false) AS _2#27]
+                       +- Exchange hashpartitioning(key1#9, 5)
+                          +- EventTimeWatermark timestamp#15: timestamp, interval 10 seconds
+                             +- *(1) Project [_1#56 AS key1#9, _2#57 AS key2#10, cast(_3#58L as timestamp) AS timestamp#15]
+                                +- *(1) Project [_1#56, _2#57, _3#58L]
+                                   +- *(1) ScanV2 MemoryStreamDataSource$[_1#56, _2#57, _3#58L]
+       */
+      // scalastyle:on line.size.limit
+
+      AddData(inputData, ("a", "b", 1L)),
+      AdvanceManualClock(1 * 1000),
+      CheckNewAnswer(("a", "b", "1")),
+
+      Execute { query =>
+        val numPartitions = query.lastExecution.numStateStores
+
+        val flatMapGroupsWithStateExecs = query.lastExecution.executedPlan.collect {
+          case f: FlatMapGroupsWithStateExec => f
+        }
+
+        assert(flatMapGroupsWithStateExecs.length === 1)
+        assert(requireClusteredDistribution(flatMapGroupsWithStateExecs.head,
+          Seq(Seq("_1", "_2"), Seq("_1", "_2")), Some(numPartitions)))
+        assert(hasDesiredHashPartitioningInChildren(
+          flatMapGroupsWithStateExecs.head, Seq(Seq("_1", "_2"), Seq("_1", "_2")), numPartitions))
+      }
+    )
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationDistributionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationDistributionSuite.scala
new file mode 100644
index 0000000000000..615434f2edad9
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationDistributionSuite.scala
@@ -0,0 +1,223 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import java.io.File
+
+import org.apache.commons.io.FileUtils
+import org.scalatest.Assertions
+
+import org.apache.spark.sql.catalyst.plans.physical.UnspecifiedDistribution
+import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
+import org.apache.spark.sql.execution.streaming.{MemoryStream, StateStoreRestoreExec, StateStoreSaveExec}
+import org.apache.spark.sql.functions.count
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.OutputMode.Update
+import org.apache.spark.sql.streaming.util.StatefulOpClusteredDistributionTestHelper
+import org.apache.spark.util.Utils
+
+class StreamingAggregationDistributionSuite extends StreamTest
+  with StatefulOpClusteredDistributionTestHelper with Assertions {
+
+  import testImplicits._
+
+  test("SPARK-38204: streaming aggregation should require StatefulOpClusteredDistribution " +
+    "from children") {
+
+    val input = MemoryStream[Int]
+    val df1 = input.toDF().select('value as 'key1, 'value * 2 as 'key2, 'value * 3 as 'value)
+    val agg = df1.repartition('key1).groupBy('key1, 'key2).agg(count('*))
+
+    testStream(agg, OutputMode.Update())(
+      AddData(input, 1, 1, 2, 3, 4),
+      CheckAnswer((1, 2, 2), (2, 4, 1), (3, 6, 1), (4, 8, 1)),
+      Execute { query =>
+        val numPartitions = query.lastExecution.numStateStores
+
+        // verify state store restore/save
+        val stateStoreOps = query.lastExecution.executedPlan.collect {
+          case s: StateStoreRestoreExec => s
+          case s: StateStoreSaveExec => s
+        }
+
+        assert(stateStoreOps.nonEmpty)
+        stateStoreOps.foreach { stateOp =>
+          assert(requireStatefulOpClusteredDistribution(stateOp, Seq(Seq("key1", "key2")),
+            numPartitions))
+          assert(hasDesiredHashPartitioningInChildren(stateOp, Seq(Seq("key1", "key2")),
+            numPartitions))
+        }
+
+        // verify aggregations in between, except partial aggregation
+        val allAggregateExecs = query.lastExecution.executedPlan.collect {
+          case a: BaseAggregateExec => a
+        }
+
+        val aggregateExecsWithoutPartialAgg = allAggregateExecs.filter {
+          _.requiredChildDistribution.head != UnspecifiedDistribution
+        }
+
+        // We expect single partial aggregation - remaining agg execs should have child producing
+        // expected output partitioning.
+        assert(allAggregateExecs.length - 1 === aggregateExecsWithoutPartialAgg.length)
+
+        // For aggregate execs, we make sure output partitioning of the children is same as
+        // we expect, HashPartitioning with clustering keys & number of partitions.
+        aggregateExecsWithoutPartialAgg.foreach { aggr =>
+          assert(hasDesiredHashPartitioningInChildren(aggr, Seq(Seq("key1", "key2")),
+            numPartitions))
+        }
+      }
+    )
+  }
+
+  test("SPARK-38204: streaming aggregation should require ClusteredDistribution " +
+    "from children if the query starts from checkpoint in prior to 3.3") {
+
+    val inputData = MemoryStream[Int]
+    val df1 = inputData.toDF().select('value as 'key1, 'value * 2 as 'key2, 'value * 3 as 'value)
+    val agg = df1.repartition('key1).groupBy('key1, 'key2).agg(count('*))
+
+    val resourceUri = this.getClass.getResource(
+      "/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/").toURI
+
+    val checkpointDir = Utils.createTempDir().getCanonicalFile
+    // Copy the checkpoint to a temp dir to prevent changes to the original.
+    // Not doing this will lead to the test passing on the first run, but fail subsequent runs.
+    FileUtils.copyDirectory(new File(resourceUri), checkpointDir)
+
+    inputData.addData(3)
+    inputData.addData(3, 2)
+
+    testStream(agg, Update)(
+      StartStream(checkpointLocation = checkpointDir.getAbsolutePath,
+        additionalConfs = Map(SQLConf.STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION.key -> "true")),
+
+      // scalastyle:off line.size.limit
+      /*
+        Note: The checkpoint was generated using the following input in Spark version 3.2.0
+        AddData(inputData, 3),
+        CheckLastBatch((3, 6, 1)),
+        AddData(inputData, 3, 2),
+        CheckLastBatch((3, 6, 2), (2, 4, 1))
+
+        Note2: The following plans are the physical plans of the query in older Spark versions
+        The physical plans around StateStoreRestore and StateStoreSave are quite similar,
+        especially shuffles being injected are same. That said, verifying with checkpoint being
+        built with Spark 3.2.0 would verify the following versions as well.
+
+        A. Spark 3.2.0
+        WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@61a581c0, org.apache.spark.sql.execution.datasources.v2.DataSourceV2Strategy$$Lambda$1968/1468582588@325b0006
+        +- *(4) HashAggregate(keys=[key1#3, key2#4], functions=[count(1)], output=[key1#3, key2#4, count(1)#13L])
+           +- StateStoreSave [key1#3, key2#4], state info [ checkpoint = file:/blabla/state, runId = 2bd7d18c-73b2-49a2-b2aa-1835162f9186, opId = 0, ver = 1, numPartitions = 5], Update, 0, 2
+              +- *(3) HashAggregate(keys=[key1#3, key2#4], functions=[merge_count(1)], output=[key1#3, key2#4, count#47L])
+                 +- StateStoreRestore [key1#3, key2#4], state info [ checkpoint = file:/blabla/state, runId = 2bd7d18c-73b2-49a2-b2aa-1835162f9186, opId = 0, ver = 1, numPartitions = 5], 2
+                    +- *(2) HashAggregate(keys=[key1#3, key2#4], functions=[merge_count(1)], output=[key1#3, key2#4, count#47L])
+                       +- *(2) HashAggregate(keys=[key1#3, key2#4], functions=[partial_count(1)], output=[key1#3, key2#4, count#47L])
+                          +- Exchange hashpartitioning(key1#3, 5), REPARTITION_BY_COL, [id=#220]
+                             +- *(1) Project [value#1 AS key1#3, (value#1 * 2) AS key2#4]
+                                +- MicroBatchScan[value#1] MemoryStreamDataSource
+
+        B. Spark 3.1.0
+        WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@53602363
+        +- *(4) HashAggregate(keys=[key1#3, key2#4], functions=[count(1)], output=[key1#3, key2#4, count(1)#13L])
+           +- StateStoreSave [key1#3, key2#4], state info [ checkpoint = file:/tmp/spark-178e9eaf-b527-499c-8eb6-c9e734f9fdfc/state, runId = 9c7e8635-41ab-4141-9f46-7ab473c58560, opId = 0, ver = 1, numPartitions = 5], Update, 0, 2
+              +- *(3) HashAggregate(keys=[key1#3, key2#4], functions=[merge_count(1)], output=[key1#3, key2#4, count#47L])
+                 +- StateStoreRestore [key1#3, key2#4], state info [ checkpoint = file:/tmp/spark-178e9eaf-b527-499c-8eb6-c9e734f9fdfc/state, runId = 9c7e8635-41ab-4141-9f46-7ab473c58560, opId = 0, ver = 1, numPartitions = 5], 2
+                    +- *(2) HashAggregate(keys=[key1#3, key2#4], functions=[merge_count(1)], output=[key1#3, key2#4, count#47L])
+                       +- *(2) HashAggregate(keys=[key1#3, key2#4], functions=[partial_count(1)], output=[key1#3, key2#4, count#47L])
+                          +- Exchange hashpartitioning(key1#3, 5), REPARTITION, [id=#222]
+                             +- *(1) Project [value#1 AS key1#3, (value#1 * 2) AS key2#4]
+                                +- *(1) Project [value#1]
+                                   +- MicroBatchScan[value#1] MemoryStreamDataSource
+
+        C. Spark 3.0.0
+        WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@33379044
+        +- *(4) HashAggregate(keys=[key1#3, key2#4], functions=[count(1)], output=[key1#3, key2#4, count(1)#13L])
+           +- StateStoreSave [key1#3, key2#4], state info [ checkpoint = file:/tmp/spark-83497e04-657c-4cad-b532-f433b1532302/state, runId = 1a650994-486f-4f32-92d9-f7c05d49d0a0, opId = 0, ver = 1, numPartitions = 5], Update, 0, 2
+              +- *(3) HashAggregate(keys=[key1#3, key2#4], functions=[merge_count(1)], output=[key1#3, key2#4, count#47L])
+                 +- StateStoreRestore [key1#3, key2#4], state info [ checkpoint = file:/tmp/spark-83497e04-657c-4cad-b532-f433b1532302/state, runId = 1a650994-486f-4f32-92d9-f7c05d49d0a0, opId = 0, ver = 1, numPartitions = 5], 2
+                    +- *(2) HashAggregate(keys=[key1#3, key2#4], functions=[merge_count(1)], output=[key1#3, key2#4, count#47L])
+                       +- *(2) HashAggregate(keys=[key1#3, key2#4], functions=[partial_count(1)], output=[key1#3, key2#4, count#47L])
+                          +- Exchange hashpartitioning(key1#3, 5), false, [id=#104]
+                             +- *(1) Project [value#1 AS key1#3, (value#1 * 2) AS key2#4]
+                                +- *(1) Project [value#1]
+                                   +- MicroBatchScan[value#1] MemoryStreamDataSource
+
+        D. Spark 2.4.0
+        *(4) HashAggregate(keys=[key1#3, key2#4], functions=[count(1)], output=[key1#3, key2#4, count(1)#13L])
+        +- StateStoreSave [key1#3, key2#4], state info [ checkpoint = file:/tmp/spark-c4fd5b1f-18e0-4433-ac7a-00df93464b49/state, runId = 89bfe27b-da33-4a75-9f36-97717c137b2a, opId = 0, ver = 1, numPartitions = 5], Update, 0, 2
+           +- *(3) HashAggregate(keys=[key1#3, key2#4], functions=[merge_count(1)], output=[key1#3, key2#4, count#42L])
+              +- StateStoreRestore [key1#3, key2#4], state info [ checkpoint = file:/tmp/spark-c4fd5b1f-18e0-4433-ac7a-00df93464b49/state, runId = 89bfe27b-da33-4a75-9f36-97717c137b2a, opId = 0, ver = 1, numPartitions = 5], 2
+                 +- *(2) HashAggregate(keys=[key1#3, key2#4], functions=[merge_count(1)], output=[key1#3, key2#4, count#42L])
+                    +- *(2) HashAggregate(keys=[key1#3, key2#4], functions=[partial_count(1)], output=[key1#3, key2#4, count#42L])
+                       +- Exchange hashpartitioning(key1#3, 5)
+                          +- *(1) Project [value#47 AS key1#3, (value#47 * 2) AS key2#4]
+                             +- *(1) Project [value#47]
+                                +- *(1) ScanV2 MemoryStreamDataSource$[value#47]
+       */
+      // scalastyle:on line.size.limit
+
+      AddData(inputData, 3, 2, 1),
+      CheckLastBatch((3, 6, 3), (2, 4, 2), (1, 2, 1)),
+
+      Execute { query =>
+        val executedPlan = query.lastExecution.executedPlan
+        assert(!executedPlan.conf.getConf(SQLConf.STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION))
+
+        val numPartitions = query.lastExecution.numStateStores
+
+        // verify state store restore/save
+        val stateStoreOps = executedPlan.collect {
+          case s: StateStoreRestoreExec => s
+          case s: StateStoreSaveExec => s
+        }
+
+        assert(stateStoreOps.nonEmpty)
+        stateStoreOps.foreach { stateOp =>
+          assert(requireClusteredDistribution(stateOp, Seq(Seq("key1", "key2")),
+            Some(numPartitions)))
+          assert(hasDesiredHashPartitioningInChildren(stateOp, Seq(Seq("key1")),
+            numPartitions))
+        }
+
+        // verify aggregations in between, except partial aggregation
+        val allAggregateExecs = executedPlan.collect {
+          case a: BaseAggregateExec => a
+        }
+
+        val aggregateExecsWithoutPartialAgg = allAggregateExecs.filter {
+          _.requiredChildDistribution.head != UnspecifiedDistribution
+        }
+
+        // We expect single partial aggregation - remaining agg execs should have child producing
+        // expected output partitioning.
+        assert(allAggregateExecs.length - 1 === aggregateExecsWithoutPartialAgg.length)
+
+        // For aggregate execs, we make sure output partitioning of the children is same as
+        // we expect, HashPartitioning with sub-clustering keys & number of partitions.
+        aggregateExecsWithoutPartialAgg.foreach { aggr =>
+          assert(requireClusteredDistribution(aggr, Seq(Seq("key1", "key2")),
+            Some(numPartitions)))
+          assert(hasDesiredHashPartitioningInChildren(aggr, Seq(Seq("key1")),
+            numPartitions))
+        }
+      }
+    )
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index a183e6b4e3950..64dffe7f571ac 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -30,6 +30,7 @@ import org.apache.spark.rdd.BlockRDD
 import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Row, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.plans.logical.Aggregate
+import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, SinglePartition}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.exchange.Exchange
@@ -542,8 +543,8 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
   /**
    * This method verifies certain properties in the SparkPlan of a streaming aggregation.
    * First of all, it checks that the child of a `StateStoreRestoreExec` creates the desired
-   * data distribution, where the child could be an Exchange, or a `HashAggregateExec` which already
-   * provides the expected data distribution.
+   * data distribution, where the child is a `HashAggregateExec` which already provides
+   * the expected data distribution.
    *
    * The second thing it checks that the child provides the expected number of partitions.
    *
@@ -552,7 +553,6 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
    */
   private def checkAggregationChain(
       se: StreamExecution,
-      expectShuffling: Boolean,
       expectedPartition: Int): Boolean = {
     val executedPlan = se.lastExecution.executedPlan
     val restore = executedPlan
@@ -560,12 +560,17 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
       .head
     restore.child match {
       case node: UnaryExecNode =>
-        assert(node.outputPartitioning.numPartitions === expectedPartition,
-          "Didn't get the expected number of partitions.")
-        if (expectShuffling) {
-          assert(node.isInstanceOf[Exchange], s"Expected a shuffle, got: ${node.child}")
-        } else {
-          assert(!node.isInstanceOf[Exchange], "Didn't expect a shuffle")
+        node.outputPartitioning match {
+          case HashPartitioning(_, numPartitions) =>
+            assert(numPartitions === expectedPartition,
+              "Didn't get the expected number of partitions.")
+
+            // below case should only applied to no grouping key which leads to AllTuples
+          case SinglePartition if expectedPartition == 1 => // OK
+
+          case p =>
+            fail("Expected a hash partitioning for child output partitioning, but has " +
+              s"$p instead.")
         }
 
       case _ => fail("Expected no shuffling")
@@ -605,12 +610,12 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
         AddBlockData(inputSource, Seq(1)),
         CheckLastBatch(1),
         AssertOnQuery("Verify no shuffling") { se =>
-          checkAggregationChain(se, expectShuffling = false, 1)
+          checkAggregationChain(se, 1)
         },
         AddBlockData(inputSource), // create an empty trigger
         CheckLastBatch(1),
         AssertOnQuery("Verify that no exchange is required") { se =>
-          checkAggregationChain(se, expectShuffling = false, 1)
+          checkAggregationChain(se, 1)
         },
         AddBlockData(inputSource, Seq(2, 3)),
         CheckLastBatch(3),
@@ -647,10 +652,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
           AddBlockData(inputSource, Seq(1)),
           CheckLastBatch((0L, 1L)),
           AssertOnQuery("Verify addition of exchange operator") { se =>
-            checkAggregationChain(
-              se,
-              expectShuffling = true,
-              spark.sessionState.conf.numShufflePartitions)
+            checkAggregationChain(se, spark.sessionState.conf.numShufflePartitions)
           },
           StopStream
         )
@@ -661,10 +663,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
           AddBlockData(inputSource, Seq(2), Seq(3), Seq(4)),
           CheckLastBatch((0L, 4L)),
           AssertOnQuery("Verify no exchange added") { se =>
-            checkAggregationChain(
-              se,
-              expectShuffling = false,
-              spark.sessionState.conf.numShufflePartitions)
+            checkAggregationChain(se, spark.sessionState.conf.numShufflePartitions)
           },
           AddBlockData(inputSource),
           CheckLastBatch((0L, 4L)),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationDistributionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationDistributionSuite.scala
new file mode 100644
index 0000000000000..8dbdb3620688e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationDistributionSuite.scala
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import java.io.File
+
+import org.apache.commons.io.FileUtils
+
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.Update
+import org.apache.spark.sql.execution.streaming.{MemoryStream, StreamingDeduplicateExec}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.util.StatefulOpClusteredDistributionTestHelper
+import org.apache.spark.util.Utils
+
+class StreamingDeduplicationDistributionSuite extends StreamTest
+  with StatefulOpClusteredDistributionTestHelper {
+
+  import testImplicits._
+
+  test("SPARK-38204: streaming deduplication should require StatefulOpClusteredDistribution " +
+    "from children") {
+
+    val input = MemoryStream[Int]
+    val df1 = input.toDF().select('value as 'key1, 'value * 2 as 'key2, 'value * 3 as 'value)
+    val dedup = df1.repartition('key1).dropDuplicates("key1", "key2")
+
+    testStream(dedup, OutputMode.Update())(
+      AddData(input, 1, 1, 2, 3, 4),
+      CheckAnswer((1, 2, 3), (2, 4, 6), (3, 6, 9), (4, 8, 12)),
+      Execute { query =>
+        val numPartitions = query.lastExecution.numStateStores
+
+        val dedupExecs = query.lastExecution.executedPlan.collect {
+          case d: StreamingDeduplicateExec => d
+        }
+
+        assert(dedupExecs.length === 1)
+        assert(requireStatefulOpClusteredDistribution(
+          dedupExecs.head, Seq(Seq("key1", "key2")), numPartitions))
+        assert(hasDesiredHashPartitioningInChildren(
+          dedupExecs.head, Seq(Seq("key1", "key2")), numPartitions))
+      }
+    )
+  }
+
+  test("SPARK-38204: streaming deduplication should require ClusteredDistribution " +
+    "from children if the query starts from checkpoint in prior to 3.3") {
+
+    val inputData = MemoryStream[Int]
+    val df1 = inputData.toDF().select('value as 'key1, 'value * 2 as 'key2, 'value * 3 as 'value)
+    val dedup = df1.repartition('key1).dropDuplicates("key1", "key2")
+
+    val resourceUri = this.getClass.getResource(
+      "/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/").toURI
+
+    val checkpointDir = Utils.createTempDir().getCanonicalFile
+    // Copy the checkpoint to a temp dir to prevent changes to the original.
+    // Not doing this will lead to the test passing on the first run, but fail subsequent runs.
+    FileUtils.copyDirectory(new File(resourceUri), checkpointDir)
+
+    inputData.addData(1, 1, 2)
+    inputData.addData(3, 4)
+
+    testStream(dedup, Update)(
+      StartStream(checkpointLocation = checkpointDir.getAbsolutePath,
+        additionalConfs = Map(SQLConf.STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION.key -> "true")),
+
+      // scalastyle:off line.size.limit
+      /*
+        Note: The checkpoint was generated using the following input in Spark version 3.2.0
+        AddData(inputData, 1, 1, 2),
+        CheckLastBatch((1, 2, 3), (2, 4, 6)),
+        AddData(inputData, 3, 4),
+        CheckLastBatch((3, 6, 9), (4, 8, 12))
+
+        Note2: The following plans are the physical plans of the query in older Spark versions
+        The physical plans around StreamingDeduplicate are quite similar, especially shuffles
+        being injected are same. That said, verifying with checkpoint being built with
+        Spark 3.2.0 would verify the following versions as well.
+
+        A. Spark 3.2.0
+        WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@76467fb2, org.apache.spark.sql.execution.datasources.v2.DataSourceV2Strategy$$Lambda$1900/1334867523@32b72162
+        +- StreamingDeduplicate [key1#3, key2#4], state info [ checkpoint = file:/blabla/state, runId = bf82c05e-4031-4421-89e0-28fd9127eb5b, opId = 0, ver = 1, numPartitions = 5], 0
+           +- Exchange hashpartitioning(key1#3, 5), REPARTITION_BY_COL, [id=#115]
+              +- *(1) Project [value#1 AS key1#3, (value#1 * 2) AS key2#4, (value#1 * 3) AS value#5]
+                 +- MicroBatchScan[value#1] MemoryStreamDataSource
+
+        B. Spark 3.1.0
+        WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@133d8337
+        +- StreamingDeduplicate [key1#3, key2#4], state info [ checkpoint = file:/tmp/spark-c0b73191-75ec-4a54-89b7-368fbbc4b2a8/state, runId = 9b2baaee-1147-4faf-98b4-3c3d8ee34966, opId = 0, ver = 1, numPartitions = 5], 0
+           +- Exchange hashpartitioning(key1#3, 5), REPARTITION, [id=#117]
+              +- *(1) Project [value#1 AS key1#3, (value#1 * 2) AS key2#4, (value#1 * 3) AS value#5]
+                 +- *(1) Project [value#1]
+                    +- MicroBatchScan[value#1] MemoryStreamDataSource
+
+        C. Spark 3.0.0
+        WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@bb06c00
+        +- StreamingDeduplicate [key1#3, key2#4], state info [ checkpoint = file:/tmp/spark-6f8a96c7-2af5-4952-a1b4-c779766334ef/state, runId = 9a208eb0-d915-46dd-a0fd-23b1df82b951, opId = 0, ver = 1, numPartitions = 5], 0
+           +- Exchange hashpartitioning(key1#3, 5), false, [id=#57]
+              +- *(1) Project [value#1 AS key1#3, (value#1 * 2) AS key2#4, (value#1 * 3) AS value#5]
+                 +- *(1) Project [value#1]
+                    +- MicroBatchScan[value#1] MemoryStreamDataSource
+
+        D. Spark 2.4.0
+        StreamingDeduplicate [key1#3, key2#4], state info [ checkpoint = file:/tmp/spark-d8a684a0-5623-4739-85e8-e45b99768aa7/state, runId = 85bd75bd-3d45-4d42-aeac-9e45fc559ee9, opId = 0, ver = 1, numPartitions = 5], 0
+        +- Exchange hashpartitioning(key1#3, 5)
+           +- *(1) Project [value#37 AS key1#3, (value#37 * 2) AS key2#4, (value#37 * 3) AS value#5]
+              +- *(1) Project [value#37]
+                 +- *(1) ScanV2 MemoryStreamDataSource$[value#37]
+       */
+      // scalastyle:on line.size.limit
+
+      AddData(inputData, 2, 3, 4, 5),
+      CheckLastBatch((5, 10, 15)),
+      Execute { query =>
+        val executedPlan = query.lastExecution.executedPlan
+        assert(!executedPlan.conf.getConf(SQLConf.STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION))
+
+        val numPartitions = query.lastExecution.numStateStores
+
+        val dedupExecs = executedPlan.collect {
+          case d: StreamingDeduplicateExec => d
+        }
+
+        assert(dedupExecs.length === 1)
+        assert(requireClusteredDistribution(
+          dedupExecs.head, Seq(Seq("key1", "key2")), Some(numPartitions)))
+        assert(hasDesiredHashPartitioningInChildren(
+          dedupExecs.head, Seq(Seq("key1")), numPartitions))
+      }
+    )
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowDistributionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowDistributionSuite.scala
new file mode 100644
index 0000000000000..bb7b9804105fa
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowDistributionSuite.scala
@@ -0,0 +1,225 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import java.io.File
+
+import org.apache.commons.io.FileUtils
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.plans.physical.UnspecifiedDistribution
+import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
+import org.apache.spark.sql.execution.streaming.{MemoryStream, SessionWindowStateStoreRestoreExec, SessionWindowStateStoreSaveExec}
+import org.apache.spark.sql.functions.{count, session_window}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.util.StatefulOpClusteredDistributionTestHelper
+import org.apache.spark.util.Utils
+
+class StreamingSessionWindowDistributionSuite extends StreamTest
+  with StatefulOpClusteredDistributionTestHelper with Logging {
+
+  import testImplicits._
+
+  test("SPARK-38204: session window aggregation should require StatefulOpClusteredDistribution " +
+    "from children") {
+
+    withSQLConf(
+      // exclude partial merging session to simplify test
+      SQLConf.STREAMING_SESSION_WINDOW_MERGE_SESSIONS_IN_LOCAL_PARTITION.key -> "false") {
+
+      val inputData = MemoryStream[(String, String, Long)]
+
+      // Split the lines into words, treat words as sessionId of events
+      val events = inputData.toDF()
+        .select($"_1".as("value"), $"_2".as("userId"), $"_3".as("timestamp"))
+        .withColumn("eventTime", $"timestamp".cast("timestamp"))
+        .withWatermark("eventTime", "30 seconds")
+        .selectExpr("explode(split(value, ' ')) AS sessionId", "userId", "eventTime")
+
+      val sessionUpdates = events
+        .repartition($"userId")
+        .groupBy(session_window($"eventTime", "10 seconds") as 'session, 'sessionId, 'userId)
+        .agg(count("*").as("numEvents"))
+        .selectExpr("sessionId", "userId", "CAST(session.start AS LONG)",
+          "CAST(session.end AS LONG)",
+          "CAST(session.end AS LONG) - CAST(session.start AS LONG) AS durationMs",
+          "numEvents")
+
+      testStream(sessionUpdates, OutputMode.Append())(
+        AddData(inputData,
+          ("hello world spark streaming", "key1", 40L),
+          ("world hello structured streaming", "key2", 41L)
+        ),
+
+        // skip checking the result, since we focus to verify the physical plan
+        ProcessAllAvailable(),
+        Execute { query =>
+          val numPartitions = query.lastExecution.numStateStores
+
+          val operators = query.lastExecution.executedPlan.collect {
+            case s: SessionWindowStateStoreRestoreExec => s
+            case s: SessionWindowStateStoreSaveExec => s
+          }
+
+          assert(operators.nonEmpty)
+          operators.foreach { stateOp =>
+            assert(requireStatefulOpClusteredDistribution(stateOp, Seq(Seq("sessionId", "userId")),
+              numPartitions))
+            assert(hasDesiredHashPartitioningInChildren(stateOp, Seq(Seq("sessionId", "userId")),
+              numPartitions))
+          }
+
+          // Verify aggregations in between, except partial aggregation.
+          // This includes MergingSessionsExec.
+          val allAggregateExecs = query.lastExecution.executedPlan.collect {
+            case a: BaseAggregateExec => a
+          }
+
+          val aggregateExecsWithoutPartialAgg = allAggregateExecs.filter {
+            _.requiredChildDistribution.head != UnspecifiedDistribution
+          }
+
+          // We expect single partial aggregation since we disable partial merging sessions.
+          // Remaining agg execs should have child producing expected output partitioning.
+          assert(allAggregateExecs.length - 1 === aggregateExecsWithoutPartialAgg.length)
+
+          // For aggregate execs, we make sure output partitioning of the children is same as
+          // we expect, HashPartitioning with clustering keys & number of partitions.
+          aggregateExecsWithoutPartialAgg.foreach { aggr =>
+            assert(hasDesiredHashPartitioningInChildren(aggr, Seq(Seq("sessionId", "userId")),
+              numPartitions))
+          }
+        }
+      )
+    }
+  }
+
+  test("SPARK-38204: session window aggregation should require ClusteredDistribution " +
+    "from children if the query starts from checkpoint in 3.2") {
+
+    withSQLConf(
+      // exclude partial merging session to simplify test
+      SQLConf.STREAMING_SESSION_WINDOW_MERGE_SESSIONS_IN_LOCAL_PARTITION.key -> "false") {
+
+      val inputData = MemoryStream[(String, String, Long)]
+
+      // Split the lines into words, treat words as sessionId of events
+      val events = inputData.toDF()
+        .select($"_1".as("value"), $"_2".as("userId"), $"_3".as("timestamp"))
+        .withColumn("eventTime", $"timestamp".cast("timestamp"))
+        .withWatermark("eventTime", "30 seconds")
+        .selectExpr("explode(split(value, ' ')) AS sessionId", "userId", "eventTime")
+
+      val sessionUpdates = events
+        .repartition($"userId")
+        .groupBy(session_window($"eventTime", "10 seconds") as 'session, 'sessionId, 'userId)
+        .agg(count("*").as("numEvents"))
+        .selectExpr("sessionId", "userId", "CAST(session.start AS LONG)",
+          "CAST(session.end AS LONG)",
+          "CAST(session.end AS LONG) - CAST(session.start AS LONG) AS durationMs",
+          "numEvents")
+
+      val resourceUri = this.getClass.getResource(
+        "/structured-streaming/checkpoint-version-3.2.0-session-window-with-repartition/").toURI
+
+      val checkpointDir = Utils.createTempDir().getCanonicalFile
+      // Copy the checkpoint to a temp dir to prevent changes to the original.
+      // Not doing this will lead to the test passing on the first run, but fail subsequent runs.
+      FileUtils.copyDirectory(new File(resourceUri), checkpointDir)
+
+      inputData.addData(
+        ("hello world spark streaming", "key1", 40L),
+        ("world hello structured streaming", "key2", 41L))
+
+      testStream(sessionUpdates, OutputMode.Append())(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath,
+          additionalConfs = Map(SQLConf.STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION.key -> "true")),
+
+        // scalastyle:off line.size.limit
+        /*
+        Note: The checkpoint was generated using the following input in Spark version 3.2.0
+        AddData(inputData,
+          ("hello world spark streaming", "key1", 40L),
+          ("world hello structured streaming", "key2", 41L)),
+        // skip checking the result, since we focus to verify the physical plan
+        ProcessAllAvailable()
+
+        Note2: The following is the physical plan of the query in Spark version 3.2.0.
+
+        WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@6649ee50, org.apache.spark.sql.execution.datasources.v2.DataSourceV2Strategy$$Lambda$2209/0x0000000840ebd440@f9f45c6
+        +- *(3) HashAggregate(keys=[session_window#33-T30000ms, sessionId#21, userId#10], functions=[count(1)], output=[sessionId#21, userId#10, CAST(session.start AS BIGINT)#43L, CAST(session.end AS BIGINT)#44L, durationMs#38L, numEvents#32L])
+           +- SessionWindowStateStoreSave [sessionId#21, userId#10], session_window#33: struct<start: timestamp, end: timestamp>, state info [ checkpoint = file:/tmp/spark-f8a951f5-c7c1-43b0-883d-9b893d672ee5/state, runId = 92681f36-1f0d-434e-8492-897e4e988bb3, opId = 0, ver = 1, numPartitions = 5], Append, 11000, 1
+              +- MergingSessions List(ClusteredDistribution(ArrayBuffer(sessionId#21, userId#10),None)), [session_window#33-T30000ms, sessionId#21, userId#10], session_window#33: struct<start: timestamp, end: timestamp>, [merge_count(1)], [count(1)#30L], 3, [session_window#33-T30000ms, sessionId#21, userId#10, count#58L]
+                 +- SessionWindowStateStoreRestore [sessionId#21, userId#10], session_window#33: struct<start: timestamp, end: timestamp>, state info [ checkpoint = file:/tmp/spark-f8a951f5-c7c1-43b0-883d-9b893d672ee5/state, runId = 92681f36-1f0d-434e-8492-897e4e988bb3, opId = 0, ver = 1, numPartitions = 5], 11000, 1
+                    +- *(2) Sort [sessionId#21 ASC NULLS FIRST, userId#10 ASC NULLS FIRST, session_window#33-T30000ms ASC NULLS FIRST], false, 0
+                       +- *(2) HashAggregate(keys=[session_window#33-T30000ms, sessionId#21, userId#10], functions=[partial_count(1)], output=[session_window#33-T30000ms, sessionId#21, userId#10, count#58L])
+                          +- *(2) Project [named_struct(start, precisetimestampconversion(precisetimestampconversion(eventTime#15-T30000ms, TimestampType, LongType), LongType, TimestampType), end, precisetimestampconversion(precisetimestampconversion(eventTime#15-T30000ms + 10 seconds, TimestampType, LongType), LongType, TimestampType)) AS session_window#33-T30000ms, sessionId#21, userId#10]
+                             +- Exchange hashpartitioning(userId#10, 5), REPARTITION_BY_COL, [id=#372]
+                                +- *(1) Project [sessionId#21, userId#10, eventTime#15-T30000ms]
+                                   +- *(1) Generate explode(split(value#9,  , -1)), [userId#10, eventTime#15-T30000ms], false, [sessionId#21]
+                                      +- *(1) Filter (precisetimestampconversion(precisetimestampconversion(eventTime#15-T30000ms + 10 seconds, TimestampType, LongType), LongType, TimestampType) > precisetimestampconversion(precisetimestampconversion(eventTime#15-T30000ms, TimestampType, LongType), LongType, TimestampType))
+                                         +- EventTimeWatermark eventTime#15: timestamp, 30 seconds
+                                            +- LocalTableScan <empty>, [value#9, userId#10, eventTime#15]
+        */
+        // scalastyle:on line.size.limit
+
+        AddData(inputData, ("spark streaming", "key1", 25L)),
+        // skip checking the result, since we focus to verify the physical plan
+        ProcessAllAvailable(),
+
+        Execute { query =>
+          val numPartitions = query.lastExecution.numStateStores
+
+          val operators = query.lastExecution.executedPlan.collect {
+            case s: SessionWindowStateStoreRestoreExec => s
+            case s: SessionWindowStateStoreSaveExec => s
+          }
+
+          assert(operators.nonEmpty)
+          operators.foreach { stateOp =>
+            assert(requireClusteredDistribution(stateOp, Seq(Seq("sessionId", "userId")),
+              Some(numPartitions)))
+            assert(hasDesiredHashPartitioningInChildren(stateOp, Seq(Seq("userId")),
+              numPartitions))
+          }
+
+          // Verify aggregations in between, except partial aggregation.
+          // This includes MergingSessionsExec.
+          val allAggregateExecs = query.lastExecution.executedPlan.collect {
+            case a: BaseAggregateExec => a
+          }
+
+          val aggregateExecsWithoutPartialAgg = allAggregateExecs.filter {
+            _.requiredChildDistribution.head != UnspecifiedDistribution
+          }
+
+          // We expect single partial aggregation since we disable partial merging sessions.
+          // Remaining agg execs should have child producing expected output partitioning.
+          assert(allAggregateExecs.length - 1 === aggregateExecsWithoutPartialAgg.length)
+
+          // For aggregate execs, we make sure output partitioning of the children is same as
+          // we expect, HashPartitioning with sub-clustering keys & number of partitions.
+          aggregateExecsWithoutPartialAgg.foreach { aggr =>
+            assert(hasDesiredHashPartitioningInChildren(aggr, Seq(Seq("userId")),
+              numPartitions))
+          }
+        }
+      )
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/StatefulOpClusteredDistributionTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/StatefulOpClusteredDistributionTestHelper.scala
new file mode 100644
index 0000000000000..f2684b8c39cd9
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/StatefulOpClusteredDistributionTestHelper.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.streaming.util
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
+import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, HashPartitioning, StatefulOpClusteredDistribution}
+import org.apache.spark.sql.execution.SparkPlan
+
+trait StatefulOpClusteredDistributionTestHelper extends SparkFunSuite {
+  protected def requireClusteredDistribution(
+      plan: SparkPlan,
+      desiredClusterColumns: Seq[Seq[String]],
+      desiredNumPartitions: Option[Int]): Boolean = {
+    assert(plan.requiredChildDistribution.length === desiredClusterColumns.length)
+    plan.requiredChildDistribution.zip(desiredClusterColumns).forall {
+      case (d: ClusteredDistribution, clusterColumns: Seq[String])
+        if partitionExpressionsColumns(d.clustering) == clusterColumns &&
+          d.requiredNumPartitions == desiredNumPartitions => true
+
+      case _ => false
+    }
+  }
+
+  protected def requireStatefulOpClusteredDistribution(
+      plan: SparkPlan,
+      desiredClusterColumns: Seq[Seq[String]],
+      desiredNumPartitions: Int): Boolean = {
+    assert(plan.requiredChildDistribution.length === desiredClusterColumns.length)
+    plan.requiredChildDistribution.zip(desiredClusterColumns).forall {
+      case (d: StatefulOpClusteredDistribution, clusterColumns: Seq[String])
+        if partitionExpressionsColumns(d.expressions) == clusterColumns &&
+          d._requiredNumPartitions == desiredNumPartitions => true
+
+      case _ => false
+    }
+  }
+
+  protected def hasDesiredHashPartitioning(
+      plan: SparkPlan,
+      desiredClusterColumns: Seq[String],
+      desiredNumPartitions: Int): Boolean = {
+    plan.outputPartitioning match {
+      case HashPartitioning(expressions, numPartitions)
+        if partitionExpressionsColumns(expressions) == desiredClusterColumns &&
+          numPartitions == desiredNumPartitions => true
+
+      case _ => false
+    }
+  }
+
+  protected def hasDesiredHashPartitioningInChildren(
+      plan: SparkPlan,
+      desiredClusterColumns: Seq[Seq[String]],
+      desiredNumPartitions: Int): Boolean = {
+    plan.children.zip(desiredClusterColumns).forall { case (child, clusterColumns) =>
+      hasDesiredHashPartitioning(child, clusterColumns, desiredNumPartitions)
+    }
+  }
+
+  private def partitionExpressionsColumns(expressions: Seq[Expression]): Seq[String] = {
+    expressions.flatMap {
+      case ref: AttributeReference => Some(ref.name)
+    }
+  }
+}

From f84018a4810867afa84658fec76494aaae6d57fc Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Wed, 16 Mar 2022 12:23:00 +0900
Subject: [PATCH 504/513] [SPARK-38424][PYTHON] Warn unused casts and ignores

### What changes were proposed in this pull request?

This PR adds the following options to mypy configuration

```
warn_unused_ignores = True
warn_redundant_casts = True
```

### Why are the changes needed?

This ensures that no unused casts and imports are used to reduce overall noise.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #35740 from zero323/SPARK-38424.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 dev/lint-python                               |  1 -
 examples/src/main/python/als.py               |  4 +-
 examples/src/main/python/kmeans.py            |  5 +-
 .../src/main/python/logistic_regression.py    |  8 ++--
 .../ml/estimator_transformer_param_example.py |  2 +-
 .../src/main/python/ml/pipeline_example.py    |  2 +-
 examples/src/main/python/pagerank.py          | 11 +++--
 examples/src/main/python/pi.py                |  2 +-
 examples/src/main/python/sql/arrow.py         | 36 ++++++++-------
 examples/src/main/python/sql/basic.py         |  6 +--
 examples/src/main/python/sql/datasource.py    | 18 ++++----
 .../structured_network_wordcount_windowed.py  |  2 +-
 examples/src/main/python/status_api_demo.py   | 15 +++---
 .../streaming/network_wordjoinsentiments.py   |  3 +-
 .../recoverable_network_wordcount.py          | 15 ++++--
 .../python/streaming/sql_network_wordcount.py |  8 ++--
 .../streaming/stateful_network_wordcount.py   |  3 +-
 .../src/main/python/transitive_closure.py     |  5 +-
 python/mypy.ini                               |  2 +
 python/pyspark/conf.py                        |  4 +-
 python/pyspark/context.py                     | 46 +++++++------------
 python/pyspark/ml/classification.pyi          |  2 +-
 python/pyspark/ml/clustering.py               |  2 +-
 python/pyspark/ml/common.py                   |  8 ++--
 python/pyspark/ml/feature.pyi                 |  2 +-
 python/pyspark/ml/fpm.py                      |  2 +-
 python/pyspark/ml/param/__init__.py           |  3 +-
 python/pyspark/ml/pipeline.py                 |  2 +-
 python/pyspark/ml/regression.py               | 18 ++++----
 python/pyspark/ml/stat.py                     |  4 +-
 python/pyspark/ml/tests/test_wrapper.py       |  2 +-
 python/pyspark/ml/tree.py                     |  4 +-
 python/pyspark/ml/tuning.py                   |  8 ++--
 python/pyspark/ml/util.py                     |  8 +---
 python/pyspark/ml/wrapper.py                  |  2 +-
 python/pyspark/mllib/classification.py        |  8 ++--
 python/pyspark/mllib/clustering.pyi           |  2 +-
 python/pyspark/mllib/common.py                |  8 ++--
 python/pyspark/mllib/feature.py               |  2 +-
 python/pyspark/mllib/linalg/__init__.py       |  2 +-
 python/pyspark/mllib/regression.py            | 12 ++---
 python/pyspark/mllib/tests/test_linalg.py     |  2 +-
 python/pyspark/mllib/tests/test_util.py       |  2 +-
 python/pyspark/pandas/accessors.py            |  4 +-
 python/pyspark/pandas/base.py                 |  2 +-
 .../pandas/data_type_ops/complex_ops.py       |  2 +-
 .../pandas/data_type_ops/datetime_ops.py      |  2 +-
 python/pyspark/pandas/frame.py                |  6 +--
 python/pyspark/pandas/generic.py              |  5 +-
 python/pyspark/pandas/groupby.py              | 12 ++---
 python/pyspark/pandas/indexes/base.py         | 23 ++++------
 python/pyspark/pandas/indexing.py             |  6 +--
 python/pyspark/pandas/internal.py             |  2 +-
 python/pyspark/pandas/ml.py                   |  6 +--
 python/pyspark/pandas/plot/core.py            |  2 +-
 python/pyspark/pandas/series.py               | 17 +++----
 python/pyspark/pandas/spark/functions.py      |  4 +-
 python/pyspark/pandas/tests/test_dataframe.py |  2 +-
 python/pyspark/pandas/utils.py                |  2 +-
 python/pyspark/rdd.py                         | 13 ++----
 python/pyspark/resource/profile.py            | 12 ++---
 python/pyspark/resource/requests.py           |  6 +--
 python/pyspark/serializers.py                 |  4 +-
 python/pyspark/shell.py                       |  4 +-
 python/pyspark/shuffle.py                     |  2 +-
 python/pyspark/sql/avro/functions.py          |  2 +-
 python/pyspark/sql/catalog.py                 |  2 +-
 python/pyspark/sql/column.py                  | 22 +++------
 python/pyspark/sql/conf.py                    |  4 +-
 python/pyspark/sql/context.py                 | 10 ++--
 python/pyspark/sql/dataframe.py               | 37 ++++++---------
 python/pyspark/sql/functions.py               |  4 +-
 python/pyspark/sql/group.py                   |  2 +-
 python/pyspark/sql/observation.py             |  2 +-
 .../pyspark/sql/pandas/_typing/__init__.pyi   |  2 +-
 python/pyspark/sql/pandas/conversion.py       | 20 ++++----
 python/pyspark/sql/pandas/functions.pyi       | 12 +++--
 python/pyspark/sql/pandas/group_ops.py        |  6 +--
 python/pyspark/sql/pandas/map_ops.py          |  2 +-
 python/pyspark/sql/readwriter.py              | 26 ++++-------
 python/pyspark/sql/session.py                 | 20 ++++----
 python/pyspark/sql/sql_formatter.py           |  4 +-
 python/pyspark/sql/streaming.py               |  6 +--
 python/pyspark/sql/tests/test_functions.py    |  2 +-
 python/pyspark/sql/tests/test_types.py        |  2 +-
 .../pyspark/sql/tests/typing/test_session.yml |  2 +-
 python/pyspark/sql/types.py                   | 12 ++---
 python/pyspark/sql/udf.py                     | 11 ++---
 python/pyspark/sql/utils.py                   | 28 +++++------
 python/pyspark/sql/window.py                  |  2 +-
 python/pyspark/status.py                      |  4 +-
 python/pyspark/streaming/context.pyi          |  2 +-
 python/pyspark/streaming/kinesis.py           |  2 +-
 python/pyspark/taskcontext.py                 |  2 +-
 python/pyspark/testing/mlutils.py             |  4 +-
 python/pyspark/testing/pandasutils.py         |  4 +-
 python/pyspark/testing/streamingutils.py      |  2 +-
 python/pyspark/tests/test_context.py          |  2 +-
 python/pyspark/tests/test_serializers.py      |  2 +-
 python/pyspark/util.py                        |  2 +-
 python/pyspark/worker.py                      |  2 +-
 101 files changed, 330 insertions(+), 381 deletions(-)

diff --git a/dev/lint-python b/dev/lint-python
index 35ba7a496839b..f0ca8832be057 100755
--- a/dev/lint-python
+++ b/dev/lint-python
@@ -127,7 +127,6 @@ function mypy_examples_test {
     echo "starting mypy examples test..."
 
     MYPY_REPORT=$( (MYPYPATH=python $MYPY_BUILD \
-      --allow-untyped-defs \
       --config-file python/mypy.ini \
       --exclude "mllib/*" \
       examples/src/main/python/) 2>&1)
diff --git a/examples/src/main/python/als.py b/examples/src/main/python/als.py
index 73af5e1a1fd37..5bd1807cce1b0 100755
--- a/examples/src/main/python/als.py
+++ b/examples/src/main/python/als.py
@@ -32,12 +32,12 @@
 np.random.seed(42)
 
 
-def rmse(R, ms, us):
+def rmse(R: np.ndarray, ms: np.ndarray, us: np.ndarray) -> np.float64:
     diff = R - ms * us.T
     return np.sqrt(np.sum(np.power(diff, 2)) / (M * U))
 
 
-def update(i, mat, ratings):
+def update(i: int, mat: np.ndarray, ratings: np.ndarray) -> np.ndarray:
     uu = mat.shape[0]
     ff = mat.shape[1]
 
diff --git a/examples/src/main/python/kmeans.py b/examples/src/main/python/kmeans.py
index 022378619c97f..dc828273e8d37 100755
--- a/examples/src/main/python/kmeans.py
+++ b/examples/src/main/python/kmeans.py
@@ -23,16 +23,17 @@
 This example requires NumPy (http://www.numpy.org/).
 """
 import sys
+from typing import List
 
 import numpy as np
 from pyspark.sql import SparkSession
 
 
-def parseVector(line):
+def parseVector(line: str) -> np.ndarray:
     return np.array([float(x) for x in line.split(' ')])
 
 
-def closestPoint(p, centers):
+def closestPoint(p: np.ndarray, centers: List[np.ndarray]) -> int:
     bestIndex = 0
     closest = float("+inf")
     for i in range(len(centers)):
diff --git a/examples/src/main/python/logistic_regression.py b/examples/src/main/python/logistic_regression.py
index 9de27cd9aaf44..9645af619b1e3 100755
--- a/examples/src/main/python/logistic_regression.py
+++ b/examples/src/main/python/logistic_regression.py
@@ -23,6 +23,8 @@
 ML, as shown in examples/src/main/python/ml/logistic_regression_with_elastic_net.py.
 """
 import sys
+from typing import Iterable, List
+
 
 import numpy as np
 from pyspark.sql import SparkSession
@@ -35,7 +37,7 @@
 # make further computations faster.
 # The data file contains lines of the form <label> <x1> <x2> ... <xD>. We load each block of these
 # into a NumPy array of size numLines * (D + 1) and pull out column 0 vs the others in gradient().
-def readPointBatch(iterator):
+def readPointBatch(iterator: Iterable[str]) -> List[np.ndarray]:
     strs = list(iterator)
     matrix = np.zeros((len(strs), D + 1))
     for i, s in enumerate(strs):
@@ -68,13 +70,13 @@ def readPointBatch(iterator):
     print("Initial w: " + str(w))
 
     # Compute logistic regression gradient for a matrix of data points
-    def gradient(matrix, w):
+    def gradient(matrix: np.ndarray, w: np.ndarray) -> np.ndarray:
         Y = matrix[:, 0]    # point labels (first column of input file)
         X = matrix[:, 1:]   # point coordinates
         # For each point (x, y), compute gradient function, then sum these up
         return ((1.0 / (1.0 + np.exp(-Y * X.dot(w))) - 1.0) * Y * X.T).sum(1)
 
-    def add(x, y):
+    def add(x: np.ndarray, y: np.ndarray) -> np.ndarray:
         x += y
         return x
 
diff --git a/examples/src/main/python/ml/estimator_transformer_param_example.py b/examples/src/main/python/ml/estimator_transformer_param_example.py
index 2cf9432646b5e..b34861a97fa7f 100644
--- a/examples/src/main/python/ml/estimator_transformer_param_example.py
+++ b/examples/src/main/python/ml/estimator_transformer_param_example.py
@@ -61,7 +61,7 @@
 
     # You can combine paramMaps, which are python dictionaries.
     # Change output column name
-    paramMap2 = {lr.probabilityCol: "myProbability"}  # type: ignore
+    paramMap2 = {lr.probabilityCol: "myProbability"}
     paramMapCombined = paramMap.copy()
     paramMapCombined.update(paramMap2)  # type: ignore
 
diff --git a/examples/src/main/python/ml/pipeline_example.py b/examples/src/main/python/ml/pipeline_example.py
index 66fdd73632a70..4df1460b96bec 100644
--- a/examples/src/main/python/ml/pipeline_example.py
+++ b/examples/src/main/python/ml/pipeline_example.py
@@ -62,7 +62,7 @@
     prediction = model.transform(test)
     selected = prediction.select("id", "text", "probability", "prediction")
     for row in selected.collect():
-        rid, text, prob, prediction = row  # type: ignore
+        rid, text, prob, prediction = row
         print(
             "(%d, %s) --> prob=%s, prediction=%f" % (
                 rid, text, str(prob), prediction   # type: ignore
diff --git a/examples/src/main/python/pagerank.py b/examples/src/main/python/pagerank.py
index 0ab7249a82185..c0233461d119f 100755
--- a/examples/src/main/python/pagerank.py
+++ b/examples/src/main/python/pagerank.py
@@ -25,18 +25,20 @@
 import re
 import sys
 from operator import add
+from typing import Iterable, Tuple
 
+from pyspark.resultiterable import ResultIterable
 from pyspark.sql import SparkSession
 
 
-def computeContribs(urls, rank):
+def computeContribs(urls: ResultIterable[str], rank: float) -> Iterable[Tuple[str, float]]:
     """Calculates URL contributions to the rank of other URLs."""
     num_urls = len(urls)
     for url in urls:
         yield (url, rank / num_urls)
 
 
-def parseNeighbors(urls):
+def parseNeighbors(urls: str) -> Tuple[str, str]:
     """Parses a urls pair string into urls pair."""
     parts = re.split(r'\s+', urls)
     return parts[0], parts[1]
@@ -73,8 +75,9 @@ def parseNeighbors(urls):
     # Calculates and updates URL ranks continuously using PageRank algorithm.
     for iteration in range(int(sys.argv[2])):
         # Calculates URL contributions to the rank of other URLs.
-        contribs = links.join(ranks).flatMap(
-            lambda url_urls_rank: computeContribs(url_urls_rank[1][0], url_urls_rank[1][1]))
+        contribs = links.join(ranks).flatMap(lambda url_urls_rank: computeContribs(
+            url_urls_rank[1][0], url_urls_rank[1][1]  # type: ignore[arg-type]
+        ))
 
         # Re-calculates URL ranks based on neighbor contributions.
         ranks = contribs.reduceByKey(add).mapValues(lambda rank: rank * 0.85 + 0.15)
diff --git a/examples/src/main/python/pi.py b/examples/src/main/python/pi.py
index e646722533f68..e61740ad58832 100755
--- a/examples/src/main/python/pi.py
+++ b/examples/src/main/python/pi.py
@@ -34,7 +34,7 @@
     partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2
     n = 100000 * partitions
 
-    def f(_):
+    def f(_: int) -> float:
         x = random() * 2 - 1
         y = random() * 2 - 1
         return 1 if x ** 2 + y ** 2 <= 1 else 0
diff --git a/examples/src/main/python/sql/arrow.py b/examples/src/main/python/sql/arrow.py
index 298830ca26751..76dfbc4d73a0c 100644
--- a/examples/src/main/python/sql/arrow.py
+++ b/examples/src/main/python/sql/arrow.py
@@ -23,6 +23,8 @@
 
 # NOTE that this file is imported in user guide in PySpark documentation.
 # The codes are referred via line numbers. See also `literalinclude` directive in Sphinx.
+import pandas as pd
+from typing import Iterable
 
 from pyspark.sql import SparkSession
 from pyspark.sql.pandas.utils import require_minimum_pandas_version, require_minimum_pyarrow_version
@@ -31,7 +33,7 @@
 require_minimum_pyarrow_version()
 
 
-def dataframe_with_arrow_example(spark):
+def dataframe_with_arrow_example(spark: SparkSession) -> None:
     import numpy as np
     import pandas as pd
 
@@ -50,12 +52,12 @@ def dataframe_with_arrow_example(spark):
     print("Pandas DataFrame result statistics:\n%s\n" % str(result_pdf.describe()))
 
 
-def ser_to_frame_pandas_udf_example(spark):
+def ser_to_frame_pandas_udf_example(spark: SparkSession) -> None:
     import pandas as pd
 
     from pyspark.sql.functions import pandas_udf
 
-    @pandas_udf("col1 string, col2 long")
+    @pandas_udf("col1 string, col2 long")  # type: ignore[call-overload]
     def func(s1: pd.Series, s2: pd.Series, s3: pd.DataFrame) -> pd.DataFrame:
         s3['col2'] = s1 + s2.str.len()
         return s3
@@ -78,7 +80,7 @@ def func(s1: pd.Series, s2: pd.Series, s3: pd.DataFrame) -> pd.DataFrame:
     # |    |-- col2: long (nullable = true)
 
 
-def ser_to_ser_pandas_udf_example(spark):
+def ser_to_ser_pandas_udf_example(spark: SparkSession) -> None:
     import pandas as pd
 
     from pyspark.sql.functions import col, pandas_udf
@@ -88,7 +90,7 @@ def ser_to_ser_pandas_udf_example(spark):
     def multiply_func(a: pd.Series, b: pd.Series) -> pd.Series:
         return a * b
 
-    multiply = pandas_udf(multiply_func, returnType=LongType())
+    multiply = pandas_udf(multiply_func, returnType=LongType())  # type: ignore[call-overload]
 
     # The function for a pandas_udf should be able to execute with local Pandas data
     x = pd.Series([1, 2, 3])
@@ -112,7 +114,7 @@ def multiply_func(a: pd.Series, b: pd.Series) -> pd.Series:
     # +-------------------+
 
 
-def iter_ser_to_iter_ser_pandas_udf_example(spark):
+def iter_ser_to_iter_ser_pandas_udf_example(spark: SparkSession) -> None:
     from typing import Iterator
 
     import pandas as pd
@@ -123,7 +125,7 @@ def iter_ser_to_iter_ser_pandas_udf_example(spark):
     df = spark.createDataFrame(pdf)
 
     # Declare the function and create the UDF
-    @pandas_udf("long")
+    @pandas_udf("long")  # type: ignore[call-overload]
     def plus_one(iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
         for x in iterator:
             yield x + 1
@@ -138,7 +140,7 @@ def plus_one(iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
     # +-----------+
 
 
-def iter_sers_to_iter_ser_pandas_udf_example(spark):
+def iter_sers_to_iter_ser_pandas_udf_example(spark: SparkSession) -> None:
     from typing import Iterator, Tuple
 
     import pandas as pd
@@ -149,7 +151,7 @@ def iter_sers_to_iter_ser_pandas_udf_example(spark):
     df = spark.createDataFrame(pdf)
 
     # Declare the function and create the UDF
-    @pandas_udf("long")
+    @pandas_udf("long")  # type: ignore[call-overload]
     def multiply_two_cols(
             iterator: Iterator[Tuple[pd.Series, pd.Series]]) -> Iterator[pd.Series]:
         for a, b in iterator:
@@ -165,7 +167,7 @@ def multiply_two_cols(
     # +-----------------------+
 
 
-def ser_to_scalar_pandas_udf_example(spark):
+def ser_to_scalar_pandas_udf_example(spark: SparkSession) -> None:
     import pandas as pd
 
     from pyspark.sql.functions import pandas_udf
@@ -176,7 +178,7 @@ def ser_to_scalar_pandas_udf_example(spark):
         ("id", "v"))
 
     # Declare the function and create the UDF
-    @pandas_udf("double")
+    @pandas_udf("double")  # type: ignore[call-overload]
     def mean_udf(v: pd.Series) -> float:
         return v.mean()
 
@@ -210,12 +212,12 @@ def mean_udf(v: pd.Series) -> float:
     # +---+----+------+
 
 
-def grouped_apply_in_pandas_example(spark):
+def grouped_apply_in_pandas_example(spark: SparkSession) -> None:
     df = spark.createDataFrame(
         [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
         ("id", "v"))
 
-    def subtract_mean(pdf):
+    def subtract_mean(pdf: pd.DataFrame) -> pd.DataFrame:
         # pdf is a pandas.DataFrame
         v = pdf.v
         return pdf.assign(v=v - v.mean())
@@ -232,10 +234,10 @@ def subtract_mean(pdf):
     # +---+----+
 
 
-def map_in_pandas_example(spark):
+def map_in_pandas_example(spark: SparkSession) -> None:
     df = spark.createDataFrame([(1, 21), (2, 30)], ("id", "age"))
 
-    def filter_func(iterator):
+    def filter_func(iterator: Iterable[pd.DataFrame]) -> Iterable[pd.DataFrame]:
         for pdf in iterator:
             yield pdf[pdf.id == 1]
 
@@ -247,7 +249,7 @@ def filter_func(iterator):
     # +---+---+
 
 
-def cogrouped_apply_in_pandas_example(spark):
+def cogrouped_apply_in_pandas_example(spark: SparkSession) -> None:
     import pandas as pd
 
     df1 = spark.createDataFrame(
@@ -258,7 +260,7 @@ def cogrouped_apply_in_pandas_example(spark):
         [(20000101, 1, "x"), (20000101, 2, "y")],
         ("time", "id", "v2"))
 
-    def asof_join(left, right):
+    def asof_join(left: pd.DataFrame, right: pd.DataFrame) -> pd.DataFrame:
         return pd.merge_asof(left, right, on="time", by="id")
 
     df1.groupby("id").cogroup(df2.groupby("id")).applyInPandas(
diff --git a/examples/src/main/python/sql/basic.py b/examples/src/main/python/sql/basic.py
index cc63e9d71a254..4f7ec7ba267df 100644
--- a/examples/src/main/python/sql/basic.py
+++ b/examples/src/main/python/sql/basic.py
@@ -34,7 +34,7 @@
 # $example off:programmatic_schema$
 
 
-def basic_df_example(spark):
+def basic_df_example(spark: SparkSession) -> None:
     # $example on:create_df$
     # spark is an existing SparkSession
     df = spark.read.json("examples/src/main/resources/people.json")
@@ -137,7 +137,7 @@ def basic_df_example(spark):
     # $example off:global_temp_view$
 
 
-def schema_inference_example(spark):
+def schema_inference_example(spark: SparkSession) -> None:
     # $example on:schema_inferring$
     sc = spark.sparkContext
 
@@ -162,7 +162,7 @@ def schema_inference_example(spark):
     # $example off:schema_inferring$
 
 
-def programmatic_schema_example(spark):
+def programmatic_schema_example(spark: SparkSession) -> None:
     # $example on:programmatic_schema$
     sc = spark.sparkContext
 
diff --git a/examples/src/main/python/sql/datasource.py b/examples/src/main/python/sql/datasource.py
index 4d7aa045b4b87..fd312dbf16476 100644
--- a/examples/src/main/python/sql/datasource.py
+++ b/examples/src/main/python/sql/datasource.py
@@ -26,7 +26,7 @@
 # $example off:schema_merging$
 
 
-def generic_file_source_options_example(spark):
+def generic_file_source_options_example(spark: SparkSession) -> None:
     # $example on:ignore_corrupt_files$
     # enable ignore corrupt files
     spark.sql("set spark.sql.files.ignoreCorruptFiles=true")
@@ -88,7 +88,7 @@ def generic_file_source_options_example(spark):
     # $example off:load_with_modified_time_filter$
 
 
-def basic_datasource_example(spark):
+def basic_datasource_example(spark: SparkSession) -> None:
     # $example on:generic_load_save_functions$
     df = spark.read.load("examples/src/main/resources/users.parquet")
     df.select("name", "favorite_color").write.save("namesAndFavColors.parquet")
@@ -148,7 +148,7 @@ def basic_datasource_example(spark):
     spark.sql("DROP TABLE IF EXISTS users_partitioned_bucketed")
 
 
-def parquet_example(spark):
+def parquet_example(spark: SparkSession) -> None:
     # $example on:basic_parquet_example$
     peopleDF = spark.read.json("examples/src/main/resources/people.json")
 
@@ -172,7 +172,7 @@ def parquet_example(spark):
     # $example off:basic_parquet_example$
 
 
-def parquet_schema_merging_example(spark):
+def parquet_schema_merging_example(spark: SparkSession) -> None:
     # $example on:schema_merging$
     # spark is from the previous example.
     # Create a simple DataFrame, stored into a partition directory
@@ -202,7 +202,7 @@ def parquet_schema_merging_example(spark):
     # $example off:schema_merging$
 
 
-def json_dataset_example(spark):
+def json_dataset_example(spark: SparkSession) -> None:
     # $example on:json_dataset$
     # spark is from the previous example.
     sc = spark.sparkContext
@@ -244,7 +244,7 @@ def json_dataset_example(spark):
     # $example off:json_dataset$
 
 
-def csv_dataset_example(spark):
+def csv_dataset_example(spark: SparkSession) -> None:
     # $example on:csv_dataset$
     # spark is from the previous example
     sc = spark.sparkContext
@@ -264,7 +264,7 @@ def csv_dataset_example(spark):
     # +------------------+
 
     # Read a csv with delimiter, the default delimiter is ","
-    df2 = spark.read.option(delimiter=';').csv(path)
+    df2 = spark.read.option("delimiter", ";").csv(path)
     df2.show()
     # +-----+---+---------+
     # |  _c0|_c1|      _c2|
@@ -308,7 +308,7 @@ def csv_dataset_example(spark):
     # $example off:csv_dataset$
 
 
-def text_dataset_example(spark):
+def text_dataset_example(spark: SparkSession) -> None:
     # $example on:text_dataset$
     # spark is from the previous example
     sc = spark.sparkContext
@@ -358,7 +358,7 @@ def text_dataset_example(spark):
     # $example off:text_dataset$
 
 
-def jdbc_dataset_example(spark):
+def jdbc_dataset_example(spark: SparkSession) -> None:
     # $example on:jdbc_dataset$
     # Note: JDBC loading and saving can be achieved via either the load/save or jdbc methods
     # Loading data from a JDBC source
diff --git a/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py b/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py
index 4aa44955d9d30..cc39d8afa6be9 100644
--- a/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py
+++ b/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py
@@ -87,7 +87,7 @@
     windowedCounts = words.groupBy(
         window(words.timestamp, windowDuration, slideDuration),
         words.word
-    ).count().orderBy('window')  # type: ignore[arg-type]
+    ).count().orderBy('window')
 
     # Start running the query that prints the windowed word counts to the console
     query = windowedCounts\
diff --git a/examples/src/main/python/status_api_demo.py b/examples/src/main/python/status_api_demo.py
index fca733034b93e..3bf96ca4466fa 100644
--- a/examples/src/main/python/status_api_demo.py
+++ b/examples/src/main/python/status_api_demo.py
@@ -18,30 +18,31 @@
 import time
 import threading
 import queue as Queue
+from typing import Any, Callable, List, Tuple
 
 from pyspark import SparkConf, SparkContext
 
 
-def delayed(seconds):
-    def f(x):
+def delayed(seconds: int) -> Callable[[Any], Any]:
+    def f(x: int) -> int:
         time.sleep(seconds)
         return x
     return f
 
 
-def call_in_background(f, *args):
-    result = Queue.Queue(1)
+def call_in_background(f: Callable[..., Any], *args: Any) -> Queue.Queue:
+    result: Queue.Queue = Queue.Queue(1)
     t = threading.Thread(target=lambda: result.put(f(*args)))
     t.daemon = True
     t.start()
     return result
 
 
-def main():
+def main() -> None:
     conf = SparkConf().set("spark.ui.showConsoleProgress", "false")
     sc = SparkContext(appName="PythonStatusAPIDemo", conf=conf)
 
-    def run():
+    def run() -> List[Tuple[int, int]]:
         rdd = sc.parallelize(range(10), 10).map(delayed(2))
         reduced = rdd.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y)
         return reduced.map(delayed(2)).collect()
@@ -52,6 +53,8 @@ def run():
         ids = status.getJobIdsForGroup()
         for id in ids:
             job = status.getJobInfo(id)
+            assert job is not None
+
             print("Job", id, "status: ", job.status)
             for sid in job.stageIds:
                 info = status.getStageInfo(sid)
diff --git a/examples/src/main/python/streaming/network_wordjoinsentiments.py b/examples/src/main/python/streaming/network_wordjoinsentiments.py
index 15f727b0f28cd..b3f2114b9e8c2 100644
--- a/examples/src/main/python/streaming/network_wordjoinsentiments.py
+++ b/examples/src/main/python/streaming/network_wordjoinsentiments.py
@@ -34,10 +34,11 @@
 from typing import Tuple
 
 from pyspark import SparkContext
+from pyspark.rdd import RDD
 from pyspark.streaming import DStream, StreamingContext
 
 
-def print_happiest_words(rdd):
+def print_happiest_words(rdd: RDD[Tuple[float, str]]) -> None:
     top_list = rdd.take(5)
     print("Happiest topics in the last 5 seconds (%d total):" % rdd.count())
     for tuple in top_list:
diff --git a/examples/src/main/python/streaming/recoverable_network_wordcount.py b/examples/src/main/python/streaming/recoverable_network_wordcount.py
index 212b6f605780d..9d3fe4c30ec61 100644
--- a/examples/src/main/python/streaming/recoverable_network_wordcount.py
+++ b/examples/src/main/python/streaming/recoverable_network_wordcount.py
@@ -35,28 +35,33 @@
  checkpoint data exists in ~/checkpoint/, then it will create StreamingContext from
  the checkpoint data.
 """
+import datetime
 import os
 import sys
+from typing import List, Tuple
 
 from pyspark import SparkContext
+from pyspark.accumulators import Accumulator
+from pyspark.broadcast import Broadcast
+from pyspark.rdd import RDD
 from pyspark.streaming import StreamingContext
 
 
 # Get or register a Broadcast variable
-def getWordExcludeList(sparkContext):
+def getWordExcludeList(sparkContext: SparkContext) -> Broadcast[List[str]]:
     if ('wordExcludeList' not in globals()):
         globals()['wordExcludeList'] = sparkContext.broadcast(["a", "b", "c"])
     return globals()['wordExcludeList']
 
 
 # Get or register an Accumulator
-def getDroppedWordsCounter(sparkContext):
+def getDroppedWordsCounter(sparkContext: SparkContext) -> Accumulator[int]:
     if ('droppedWordsCounter' not in globals()):
         globals()['droppedWordsCounter'] = sparkContext.accumulator(0)
     return globals()['droppedWordsCounter']
 
 
-def createContext(host, port, outputPath):
+def createContext(host: str, port: int, outputPath: str) -> StreamingContext:
     # If you do not see this printed, that means the StreamingContext has been loaded
     # from the new checkpoint
     print("Creating new context")
@@ -71,14 +76,14 @@ def createContext(host, port, outputPath):
     words = lines.flatMap(lambda line: line.split(" "))
     wordCounts = words.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y)
 
-    def echo(time, rdd):
+    def echo(time: datetime.datetime, rdd: RDD[Tuple[str, int]]) -> None:
         # Get or register the excludeList Broadcast
         excludeList = getWordExcludeList(rdd.context)
         # Get or register the droppedWordsCounter Accumulator
         droppedWordsCounter = getDroppedWordsCounter(rdd.context)
 
         # Use excludeList to drop words and use droppedWordsCounter to count them
-        def filterFunc(wordCount):
+        def filterFunc(wordCount: Tuple[str, int]) -> bool:
             if wordCount[0] in excludeList.value:
                 droppedWordsCounter.add(wordCount[1])
                 return False
diff --git a/examples/src/main/python/streaming/sql_network_wordcount.py b/examples/src/main/python/streaming/sql_network_wordcount.py
index 10bacbe0e6c6d..9518cb70ba784 100644
--- a/examples/src/main/python/streaming/sql_network_wordcount.py
+++ b/examples/src/main/python/streaming/sql_network_wordcount.py
@@ -28,13 +28,15 @@
     `$ bin/spark-submit examples/src/main/python/streaming/sql_network_wordcount.py localhost 9999`
 """
 import sys
+import datetime
 
-from pyspark import SparkContext
+from pyspark import SparkConf, SparkContext
+from pyspark.rdd import RDD
 from pyspark.streaming import StreamingContext
 from pyspark.sql import Row, SparkSession
 
 
-def getSparkSessionInstance(sparkConf):
+def getSparkSessionInstance(sparkConf: SparkConf) -> SparkSession:
     if ('sparkSessionSingletonInstance' not in globals()):
         globals()['sparkSessionSingletonInstance'] = SparkSession\
             .builder\
@@ -57,7 +59,7 @@ def getSparkSessionInstance(sparkConf):
     words = lines.flatMap(lambda line: line.split(" "))
 
     # Convert RDDs of the words DStream to DataFrame and run SQL query
-    def process(time, rdd):
+    def process(time: datetime.datetime, rdd: RDD[str]) -> None:
         print("========= %s =========" % str(time))
 
         try:
diff --git a/examples/src/main/python/streaming/stateful_network_wordcount.py b/examples/src/main/python/streaming/stateful_network_wordcount.py
index 7a45be663a765..553af5f48eda4 100644
--- a/examples/src/main/python/streaming/stateful_network_wordcount.py
+++ b/examples/src/main/python/streaming/stateful_network_wordcount.py
@@ -30,6 +30,7 @@
         localhost 9999`
 """
 import sys
+from typing import Iterable, Optional
 
 from pyspark import SparkContext
 from pyspark.streaming import StreamingContext
@@ -45,7 +46,7 @@
     # RDD with initial state (key, value) pairs
     initialStateRDD = sc.parallelize([(u'hello', 1), (u'world', 1)])
 
-    def updateFunc(new_values, last_sum):
+    def updateFunc(new_values: Iterable[int], last_sum: Optional[int]) -> int:
         return sum(new_values) + (last_sum or 0)
 
     lines = ssc.socketTextStream(sys.argv[1], int(sys.argv[2]))
diff --git a/examples/src/main/python/transitive_closure.py b/examples/src/main/python/transitive_closure.py
index 9f543daecd3dd..e1f3b66af82d9 100755
--- a/examples/src/main/python/transitive_closure.py
+++ b/examples/src/main/python/transitive_closure.py
@@ -17,6 +17,7 @@
 
 import sys
 from random import Random
+from typing import Set, Tuple
 
 from pyspark.sql import SparkSession
 
@@ -25,8 +26,8 @@
 rand = Random(42)
 
 
-def generateGraph():
-    edges = set()
+def generateGraph() -> Set[Tuple[int, int]]:
+    edges: Set[Tuple[int, int]] = set()
     while len(edges) < numEdges:
         src = rand.randrange(0, numVertices)
         dst = rand.randrange(0, numVertices)
diff --git a/python/mypy.ini b/python/mypy.ini
index 8a4c92eaebcf5..efaa3dc97d3c4 100644
--- a/python/mypy.ini
+++ b/python/mypy.ini
@@ -20,6 +20,8 @@ strict_optional = True
 no_implicit_optional = True
 disallow_untyped_defs = True
 show_error_codes = True
+warn_unused_ignores = True
+warn_redundant_casts = True
 
 ; Allow untyped def in internal modules and tests
 
diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py
index a9e26966b0611..1ddc8f5ddaa92 100644
--- a/python/pyspark/conf.py
+++ b/python/pyspark/conf.py
@@ -20,7 +20,7 @@
 import sys
 from typing import Dict, List, Optional, Tuple, cast, overload
 
-from py4j.java_gateway import JVMView, JavaObject  # type: ignore[import]
+from py4j.java_gateway import JVMView, JavaObject
 
 
 class SparkConf:
@@ -124,7 +124,7 @@ def __init__(
         else:
             from pyspark.context import SparkContext
 
-            _jvm = _jvm or SparkContext._jvm  # type: ignore[attr-defined]
+            _jvm = _jvm or SparkContext._jvm
 
             if _jvm is not None:
                 # JVM is created, so create self._jconf directly through JVM
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 3beebb08cde7d..e47f162ca936c 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -63,7 +63,7 @@
 )
 from pyspark.storagelevel import StorageLevel
 from pyspark.resource.information import ResourceInformation
-from pyspark.rdd import RDD, _load_from_socket  # type: ignore[attr-defined]
+from pyspark.rdd import RDD, _load_from_socket
 from pyspark.taskcontext import TaskContext
 from pyspark.traceback_utils import CallSite, first_spark_call
 from pyspark.status import StatusTracker
@@ -181,10 +181,7 @@ def __init__(
         udf_profiler_cls: Type[UDFBasicProfiler] = UDFBasicProfiler,
     ):
 
-        if (
-            conf is None
-            or cast(str, conf.get("spark.executor.allowSparkContext", "false")).lower() != "true"
-        ):
+        if conf is None or conf.get("spark.executor.allowSparkContext", "false").lower() != "true":
             # In order to prevent SparkContext from being created in executors.
             SparkContext._assert_on_driver()
 
@@ -290,7 +287,7 @@ def _do_init(
         # they will be passed back to us through a TCP server
         assert self._gateway is not None
         auth_token = self._gateway.gateway_parameters.auth_token
-        start_update_server = accumulators._start_update_server  # type: ignore[attr-defined]
+        start_update_server = accumulators._start_update_server
         self._accumulatorServer = start_update_server(auth_token)
         (host, port) = self._accumulatorServer.server_address
         assert self._jvm is not None
@@ -326,7 +323,7 @@ def _do_init(
 
         # Deploy code dependencies set by spark-submit; these will already have been added
         # with SparkContext.addFile, so we just need to add them to the PYTHONPATH
-        for path in cast(str, self._conf.get("spark.submit.pyFiles", "")).split(","):
+        for path in self._conf.get("spark.submit.pyFiles", "").split(","):
             if path != "":
                 (dirname, filename) = os.path.split(path)
                 try:
@@ -573,7 +570,7 @@ def stop(self) -> None:
             self._accumulatorServer.shutdown()
             self._accumulatorServer = None  # type: ignore[assignment]
         with SparkContext._lock:
-            SparkContext._active_spark_context = None  # type: ignore[assignment]
+            SparkContext._active_spark_context = None
 
     def emptyRDD(self) -> RDD[Any]:
         """
@@ -1163,12 +1160,9 @@ def union(self, rdds: List[RDD[T]]) -> RDD[T]:
         >>> sorted(sc.union([textFile, parallelized]).collect())
         ['Hello', 'World!']
         """
-        first_jrdd_deserializer = rdds[0]._jrdd_deserializer  # type: ignore[attr-defined]
-        if any(
-            x._jrdd_deserializer != first_jrdd_deserializer  # type: ignore[attr-defined]
-            for x in rdds
-        ):
-            rdds = [x._reserialize() for x in rdds]  # type: ignore[attr-defined]
+        first_jrdd_deserializer = rdds[0]._jrdd_deserializer
+        if any(x._jrdd_deserializer != first_jrdd_deserializer for x in rdds):
+            rdds = [x._reserialize() for x in rdds]
         gw = SparkContext._gateway
         assert gw is not None
         jvm = SparkContext._jvm
@@ -1176,21 +1170,19 @@ def union(self, rdds: List[RDD[T]]) -> RDD[T]:
         jrdd_cls = jvm.org.apache.spark.api.java.JavaRDD
         jpair_rdd_cls = jvm.org.apache.spark.api.java.JavaPairRDD
         jdouble_rdd_cls = jvm.org.apache.spark.api.java.JavaDoubleRDD
-        if is_instance_of(gw, rdds[0]._jrdd, jrdd_cls):  # type: ignore[attr-defined]
+        if is_instance_of(gw, rdds[0]._jrdd, jrdd_cls):
             cls = jrdd_cls
-        elif is_instance_of(gw, rdds[0]._jrdd, jpair_rdd_cls):  # type: ignore[attr-defined]
+        elif is_instance_of(gw, rdds[0]._jrdd, jpair_rdd_cls):
             cls = jpair_rdd_cls
-        elif is_instance_of(gw, rdds[0]._jrdd, jdouble_rdd_cls):  # type: ignore[attr-defined]
+        elif is_instance_of(gw, rdds[0]._jrdd, jdouble_rdd_cls):
             cls = jdouble_rdd_cls
         else:
-            cls_name = rdds[0]._jrdd.getClass().getCanonicalName()  # type: ignore[attr-defined]
+            cls_name = rdds[0]._jrdd.getClass().getCanonicalName()
             raise TypeError("Unsupported Java RDD class %s" % cls_name)
         jrdds = gw.new_array(cls, len(rdds))
         for i in range(0, len(rdds)):
-            jrdds[i] = rdds[i]._jrdd  # type: ignore[attr-defined]
-        return RDD(
-            self._jsc.union(jrdds), self, rdds[0]._jrdd_deserializer  # type: ignore[attr-defined]
-        )
+            jrdds[i] = rdds[i]._jrdd
+        return RDD(self._jsc.union(jrdds), self, rdds[0]._jrdd_deserializer)
 
     def broadcast(self, value: T) -> "Broadcast[T]":
         """
@@ -1484,19 +1476,15 @@ def runJob(
         [0, 1, 16, 25]
         """
         if partitions is None:
-            partitions = list(range(rdd._jrdd.partitions().size()))  # type: ignore[attr-defined]
+            partitions = list(range(rdd._jrdd.partitions().size()))
 
         # Implementation note: This is implemented as a mapPartitions followed
         # by runJob() in order to avoid having to pass a Python lambda into
         # SparkContext#runJob.
         mappedRDD = rdd.mapPartitions(partitionFunc)
         assert self._jvm is not None
-        sock_info = self._jvm.PythonRDD.runJob(
-            self._jsc.sc(), mappedRDD._jrdd, partitions  # type: ignore[attr-defined]
-        )
-        return list(
-            _load_from_socket(sock_info, mappedRDD._jrdd_deserializer)  # type: ignore[attr-defined]
-        )
+        sock_info = self._jvm.PythonRDD.runJob(self._jsc.sc(), mappedRDD._jrdd, partitions)
+        return list(_load_from_socket(sock_info, mappedRDD._jrdd_deserializer))
 
     def show_profiles(self) -> None:
         """Print the profile stats to stdout"""
diff --git a/python/pyspark/ml/classification.pyi b/python/pyspark/ml/classification.pyi
index 89089a46936e3..16c31924defde 100644
--- a/python/pyspark/ml/classification.pyi
+++ b/python/pyspark/ml/classification.pyi
@@ -69,7 +69,7 @@ from pyspark.ml.param import Param
 from pyspark.ml.regression import DecisionTreeRegressionModel
 from pyspark.sql.dataframe import DataFrame
 
-from py4j.java_gateway import JavaObject  # type: ignore[import]
+from py4j.java_gateway import JavaObject
 
 class _ClassifierParams(HasRawPredictionCol, _PredictorParams): ...
 
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index c8b4c93812ac3..9d2384ffe35b4 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -53,7 +53,7 @@
 
 if TYPE_CHECKING:
     from pyspark.ml._typing import M
-    from py4j.java_gateway import JavaObject  # type: ignore[import]
+    from py4j.java_gateway import JavaObject
 
 
 __all__ = [
diff --git a/python/pyspark/ml/common.py b/python/pyspark/ml/common.py
index 32829c42f42db..dd6fee467e699 100644
--- a/python/pyspark/ml/common.py
+++ b/python/pyspark/ml/common.py
@@ -65,11 +65,9 @@ def _to_java_object_rdd(rdd: RDD) -> JavaObject:
     It will convert each Python object into Java object by Pickle, whenever the
     RDD is serialized in batch or not.
     """
-    rdd = rdd._reserialize(AutoBatchedSerializer(CPickleSerializer()))  # type: ignore[attr-defined]
+    rdd = rdd._reserialize(AutoBatchedSerializer(CPickleSerializer()))
     assert rdd.ctx._jvm is not None
-    return rdd.ctx._jvm.org.apache.spark.ml.python.MLSerDe.pythonToJava(
-        rdd._jrdd, True  # type: ignore[attr-defined]
-    )
+    return rdd.ctx._jvm.org.apache.spark.ml.python.MLSerDe.pythonToJava(rdd._jrdd, True)
 
 
 def _py2java(sc: SparkContext, obj: Any) -> JavaObject:
@@ -79,7 +77,7 @@ def _py2java(sc: SparkContext, obj: Any) -> JavaObject:
     elif isinstance(obj, DataFrame):
         obj = obj._jdf
     elif isinstance(obj, SparkContext):
-        obj = obj._jsc  # type: ignore[attr-defined]
+        obj = obj._jsc
     elif isinstance(obj, list):
         obj = [_py2java(sc, x) for x in obj]
     elif isinstance(obj, JavaObject):
diff --git a/python/pyspark/ml/feature.pyi b/python/pyspark/ml/feature.pyi
index ecfd26ebbadb7..6545bcd1c516a 100644
--- a/python/pyspark/ml/feature.pyi
+++ b/python/pyspark/ml/feature.pyi
@@ -42,7 +42,7 @@ from pyspark.ml.linalg import Vector, DenseVector, DenseMatrix
 from pyspark.sql.dataframe import DataFrame
 from pyspark.ml.param import Param
 
-from py4j.java_gateway import JavaObject  # type: ignore[import]
+from py4j.java_gateway import JavaObject
 
 class Binarizer(
     JavaTransformer,
diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py
index b748a7dee63f0..5848b5baca3e5 100644
--- a/python/pyspark/ml/fpm.py
+++ b/python/pyspark/ml/fpm.py
@@ -25,7 +25,7 @@
 from pyspark.ml.param.shared import HasPredictionCol, Param, TypeConverters, Params
 
 if TYPE_CHECKING:
-    from py4j.java_gateway import JavaObject  # type: ignore[import]
+    from py4j.java_gateway import JavaObject
 
 __all__ = ["FPGrowth", "FPGrowthModel", "PrefixSpan"]
 
diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py
index 6c223c61df870..74f7b0bc3c7a8 100644
--- a/python/pyspark/ml/param/__init__.py
+++ b/python/pyspark/ml/param/__init__.py
@@ -20,7 +20,6 @@
 from typing import (
     Any,
     Callable,
-    cast,
     Generic,
     List,
     Optional,
@@ -304,7 +303,7 @@ def explainParam(self, param: Union[str, Param]) -> str:
         Explains a single param and returns its name, doc, and optional
         default value and user-supplied value in a string.
         """
-        param = cast(Param, self._resolveParam(param))
+        param = self._resolveParam(param)
         values = []
         if self.isDefined(param):
             if param in self._defaultParamMap:
diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py
index 1da86ec2bd83d..24653d1d919ee 100644
--- a/python/pyspark/ml/pipeline.py
+++ b/python/pyspark/ml/pipeline.py
@@ -39,7 +39,7 @@
 
 if TYPE_CHECKING:
     from pyspark.ml._typing import ParamMap, PipelineStage
-    from py4j.java_gateway import JavaObject  # type: ignore[import]
+    from py4j.java_gateway import JavaObject
 
 
 @inherit_doc
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index f6190c46c38f1..8678ec3f31e0b 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -74,7 +74,7 @@
 from pyspark.sql import DataFrame
 
 if TYPE_CHECKING:
-    from py4j.java_gateway import JavaObject  # type: ignore[import]
+    from py4j.java_gateway import JavaObject
 
 T = TypeVar("T")
 M = TypeVar("M", bound=Transformer)
@@ -684,7 +684,7 @@ def devianceResiduals(self) -> List[float]:
         """
         return self._call_java("devianceResiduals")
 
-    @property  # type: ignore[misc]
+    @property
     def coefficientStandardErrors(self) -> List[float]:
         """
         Standard error of estimated coefficients and intercept.
@@ -701,7 +701,7 @@ def coefficientStandardErrors(self) -> List[float]:
         """
         return self._call_java("coefficientStandardErrors")
 
-    @property  # type: ignore[misc]
+    @property
     def tValues(self) -> List[float]:
         """
         T-statistic of estimated coefficients and intercept.
@@ -718,7 +718,7 @@ def tValues(self) -> List[float]:
         """
         return self._call_java("tValues")
 
-    @property  # type: ignore[misc]
+    @property
     def pValues(self) -> List[float]:
         """
         Two-sided p-value of estimated coefficients and intercept.
@@ -745,7 +745,7 @@ class LinearRegressionTrainingSummary(LinearRegressionSummary):
     .. versionadded:: 2.0.0
     """
 
-    @property  # type: ignore[misc]
+    @property
     def objectiveHistory(self) -> List[float]:
         """
         Objective function (scaled loss + regularization) at each
@@ -760,7 +760,7 @@ def objectiveHistory(self) -> List[float]:
         """
         return self._call_java("objectiveHistory")
 
-    @property  # type: ignore[misc]
+    @property
     def totalIterations(self) -> int:
         """
         Number of training iterations until termination.
@@ -1286,7 +1286,7 @@ def setVarianceCol(self, value: str) -> "DecisionTreeRegressionModel":
         """
         return self._set(varianceCol=value)
 
-    @property  # type: ignore[misc]
+    @property
     def featureImportances(self) -> Vector:
         """
         Estimate of the importance of each feature.
@@ -1606,7 +1606,7 @@ def trees(self) -> List[DecisionTreeRegressionModel]:
         """Trees in this ensemble. Warning: These have null parent Estimators."""
         return [DecisionTreeRegressionModel(m) for m in list(self._call_java("trees"))]
 
-    @property  # type: ignore[misc]
+    @property
     def featureImportances(self) -> Vector:
         """
         Estimate of the importance of each feature.
@@ -1971,7 +1971,7 @@ class GBTRegressionModel(
     .. versionadded:: 1.4.0
     """
 
-    @property  # type: ignore[misc]
+    @property
     def featureImportances(self) -> Vector:
         """
         Estimate of the importance of each feature.
diff --git a/python/pyspark/ml/stat.py b/python/pyspark/ml/stat.py
index 3b3588c2af0d2..b91ef1b6cb346 100644
--- a/python/pyspark/ml/stat.py
+++ b/python/pyspark/ml/stat.py
@@ -23,13 +23,13 @@
 from pyspark import since, SparkContext
 from pyspark.ml.common import _java2py, _py2java
 from pyspark.ml.linalg import Matrix, Vector
-from pyspark.ml.wrapper import JavaWrapper, _jvm  # type: ignore[attr-defined]
+from pyspark.ml.wrapper import JavaWrapper, _jvm
 from pyspark.sql.column import Column, _to_seq
 from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.functions import lit
 
 if TYPE_CHECKING:
-    from py4j.java_gateway import JavaObject  # type: ignore[import]
+    from py4j.java_gateway import JavaObject
 
 
 class ChiSquareTest:
diff --git a/python/pyspark/ml/tests/test_wrapper.py b/python/pyspark/ml/tests/test_wrapper.py
index 315a035891c28..02ce6f319241f 100644
--- a/python/pyspark/ml/tests/test_wrapper.py
+++ b/python/pyspark/ml/tests/test_wrapper.py
@@ -21,7 +21,7 @@
 
 from pyspark.ml.linalg import DenseVector, Vectors
 from pyspark.ml.regression import LinearRegression
-from pyspark.ml.wrapper import (  # type: ignore[attr-defined]
+from pyspark.ml.wrapper import (
     _java2py,
     _py2java,
     JavaParams,
diff --git a/python/pyspark/ml/tree.py b/python/pyspark/ml/tree.py
index 6c1622bd699aa..ad405b742bdb1 100644
--- a/python/pyspark/ml/tree.py
+++ b/python/pyspark/ml/tree.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from typing import cast, List, Sequence, TYPE_CHECKING, TypeVar
+from typing import List, Sequence, TypeVar, TYPE_CHECKING
 
 from pyspark import since
 from pyspark.ml.linalg import Vector
@@ -336,7 +336,7 @@ def getBootstrap(self) -> bool:
         """
         Gets the value of bootstrap or its default value.
         """
-        return cast(bool, self.getOrDefault(self.bootstrap))
+        return self.getOrDefault(self.bootstrap)
 
 
 class _GBTParams(_TreeEnsembleParams, HasMaxIter, HasStepSize, HasValidationIndicatorCol):
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 9fae5fe9af715..44a8b51ef8ec5 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -63,8 +63,8 @@
 
 if TYPE_CHECKING:
     from pyspark.ml._typing import ParamMap
-    from py4j.java_gateway import JavaObject  # type: ignore[import]
-    from py4j.java_collections import JavaArray  # type: ignore[import]
+    from py4j.java_gateway import JavaObject
+    from py4j.java_collections import JavaArray
 
 __all__ = [
     "ParamGridBuilder",
@@ -257,7 +257,7 @@ def _from_java_impl(
         evaluator: Evaluator = JavaParams._from_java(java_stage.getEvaluator())
         if isinstance(estimator, JavaEstimator):
             epms = [
-                cast("JavaEstimator", estimator)._transfer_param_map_from_java(epm)
+                estimator._transfer_param_map_from_java(epm)
                 for epm in java_stage.getEstimatorParamMaps()
             ]
         elif MetaAlgorithmReadWrite.isMetaEstimator(estimator):
@@ -284,7 +284,7 @@ def _to_java_impl(self) -> Tuple["JavaObject", "JavaObject", "JavaObject"]:
         if isinstance(estimator, JavaEstimator):
             java_epms = gateway.new_array(cls, len(self.getEstimatorParamMaps()))
             for idx, epm in enumerate(self.getEstimatorParamMaps()):
-                java_epms[idx] = cast(JavaEstimator, estimator)._transfer_param_map_to_java(epm)
+                java_epms[idx] = estimator._transfer_param_map_to_java(epm)
         elif MetaAlgorithmReadWrite.isMetaEstimator(estimator):
             # Meta estimator such as Pipeline, OneVsRest
             java_epms = _ValidatorSharedReadWrite.meta_estimator_transfer_param_maps_to_java(
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index d7c9bd24bf39f..14e62ce6217c8 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -129,13 +129,9 @@ def __init__(self) -> None:
     def _handleOverwrite(self, path: str) -> None:
         from pyspark.ml.wrapper import JavaWrapper
 
-        _java_obj = JavaWrapper._new_java_obj(  # type: ignore[attr-defined]
-            "org.apache.spark.ml.util.FileSystemOverwrite"
-        )
+        _java_obj = JavaWrapper._new_java_obj("org.apache.spark.ml.util.FileSystemOverwrite")
         wrapper = JavaWrapper(_java_obj)
-        wrapper._call_java(  # type: ignore[attr-defined]
-            "handleOverwrite", path, True, self.sparkSession._jsparkSession
-        )
+        wrapper._call_java("handleOverwrite", path, True, self.sparkSession._jsparkSession)
 
     def save(self, path: str) -> None:
         """Save the ML instance to the input path."""
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index 385a2439e27e2..7853e76624464 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -25,7 +25,7 @@
 from pyspark.ml import Estimator, Predictor, PredictionModel, Transformer, Model
 from pyspark.ml.base import _PredictorParams
 from pyspark.ml.param import Param, Params
-from pyspark.ml.util import _jvm  # type: ignore[attr-defined]
+from pyspark.ml.util import _jvm
 from pyspark.ml.common import inherit_doc, _java2py, _py2java
 
 
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index ee7168163bf25..1a3b3581e969f 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -26,13 +26,13 @@
 from pyspark.streaming.dstream import DStream
 from pyspark.mllib.common import callMLlibFunc, _py2java, _java2py
 from pyspark.mllib.linalg import _convert_to_vector
-from pyspark.mllib.regression import (  # type: ignore[attr-defined]
+from pyspark.mllib.regression import (
     LabeledPoint,
     LinearModel,
     _regression_train_wrapper,
     StreamingLinearAlgorithm,
 )
-from pyspark.mllib.util import Saveable, Loader, inherit_doc  # type: ignore[attr-defined]
+from pyspark.mllib.util import Saveable, Loader, inherit_doc
 from pyspark.mllib.linalg import Vector
 from pyspark.mllib.regression import LabeledPoint
 
@@ -929,9 +929,7 @@ def __init__(
         self.miniBatchFraction = miniBatchFraction
         self.convergenceTol = convergenceTol
         self._model: Optional[LogisticRegressionModel] = None
-        super(StreamingLogisticRegressionWithSGD, self).__init__(
-            model=self._model  # type: ignore[arg-type]
-        )
+        super(StreamingLogisticRegressionWithSGD, self).__init__(model=self._model)
 
     @since("1.5.0")
     def setInitialWeights(
diff --git a/python/pyspark/mllib/clustering.pyi b/python/pyspark/mllib/clustering.pyi
index f98348066b090..8a8401d35657f 100644
--- a/python/pyspark/mllib/clustering.pyi
+++ b/python/pyspark/mllib/clustering.pyi
@@ -22,7 +22,7 @@ from typing import List, NamedTuple, Optional, Tuple, TypeVar
 import array
 
 from numpy import float64, int64, ndarray
-from py4j.java_gateway import JavaObject  # type: ignore[import]
+from py4j.java_gateway import JavaObject
 
 from pyspark.mllib._typing import VectorLike
 from pyspark.context import SparkContext
diff --git a/python/pyspark/mllib/common.py b/python/pyspark/mllib/common.py
index cf68c5bb11e47..c5e1a7e8580c0 100644
--- a/python/pyspark/mllib/common.py
+++ b/python/pyspark/mllib/common.py
@@ -67,11 +67,9 @@ def _to_java_object_rdd(rdd: RDD) -> JavaObject:
     It will convert each Python object into Java object by Pickle, whenever the
     RDD is serialized in batch or not.
     """
-    rdd = rdd._reserialize(AutoBatchedSerializer(CPickleSerializer()))  # type: ignore[attr-defined]
+    rdd = rdd._reserialize(AutoBatchedSerializer(CPickleSerializer()))
     assert rdd.ctx._jvm is not None
-    return rdd.ctx._jvm.org.apache.spark.mllib.api.python.SerDe.pythonToJava(
-        rdd._jrdd, True  # type: ignore[attr-defined]
-    )
+    return rdd.ctx._jvm.org.apache.spark.mllib.api.python.SerDe.pythonToJava(rdd._jrdd, True)
 
 
 def _py2java(sc: SparkContext, obj: Any) -> JavaObject:
@@ -81,7 +79,7 @@ def _py2java(sc: SparkContext, obj: Any) -> JavaObject:
     elif isinstance(obj, DataFrame):
         obj = obj._jdf
     elif isinstance(obj, SparkContext):
-        obj = obj._jsc  # type: ignore[attr-defined]
+        obj = obj._jsc
     elif isinstance(obj, list):
         obj = [_py2java(sc, x) for x in obj]
     elif isinstance(obj, JavaObject):
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index c17bb8c15723f..17dab6ac057e0 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -37,7 +37,7 @@
 
 if TYPE_CHECKING:
     from pyspark.mllib._typing import VectorLike
-    from py4j.java_collections import JavaMap  # type: ignore[import]
+    from py4j.java_collections import JavaMap
 
 __all__ = [
     "Normalizer",
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index dd7fad092d1c5..20566f569bdd8 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -161,7 +161,7 @@ def _format_float_list(xs: Iterable[float]) -> List[str]:
 
 def _double_to_long_bits(value: float) -> int:
     if np.isnan(value):
-        value = float("nan")  # type: ignore[assignment]
+        value = float("nan")
     # pack double into 64 bits, then unpack as long int
     return struct.unpack("Q", struct.pack("d", value))[0]
 
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 9ce31c8b538d2..4f7da0131f6e9 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -381,9 +381,7 @@ def train(rdd: RDD[LabeledPoint], i: Vector) -> Iterable[Any]:
                 float(convergenceTol),
             )
 
-        return _regression_train_wrapper(
-            train, LinearRegressionModel, data, initialWeights  # type: ignore[arg-type]
-        )
+        return _regression_train_wrapper(train, LinearRegressionModel, data, initialWeights)
 
 
 @inherit_doc
@@ -561,9 +559,7 @@ def train(rdd: RDD[LabeledPoint], i: Vector) -> Iterable[Any]:
                 float(convergenceTol),
             )
 
-        return _regression_train_wrapper(
-            train, LassoModel, data, initialWeights  # type: ignore[arg-type]
-        )
+        return _regression_train_wrapper(train, LassoModel, data, initialWeights)
 
 
 @inherit_doc
@@ -745,9 +741,7 @@ def train(rdd: RDD[LabeledPoint], i: Vector) -> Iterable[Any]:
                 float(convergenceTol),
             )
 
-        return _regression_train_wrapper(
-            train, RidgeRegressionModel, data, initialWeights  # type: ignore[arg-type]
-        )
+        return _regression_train_wrapper(train, RidgeRegressionModel, data, initialWeights)
 
 
 class IsotonicRegressionModel(Saveable, Loader["IsotonicRegressionModel"]):
diff --git a/python/pyspark/mllib/tests/test_linalg.py b/python/pyspark/mllib/tests/test_linalg.py
index d25d2f21202ce..007f42d3c2d09 100644
--- a/python/pyspark/mllib/tests/test_linalg.py
+++ b/python/pyspark/mllib/tests/test_linalg.py
@@ -22,7 +22,7 @@
 
 import pyspark.ml.linalg as newlinalg
 from pyspark.serializers import CPickleSerializer
-from pyspark.mllib.linalg import (  # type: ignore[attr-defined]
+from pyspark.mllib.linalg import (
     Vector,
     SparseVector,
     DenseVector,
diff --git a/python/pyspark/mllib/tests/test_util.py b/python/pyspark/mllib/tests/test_util.py
index b45dce9f21642..aad1349c71bbc 100644
--- a/python/pyspark/mllib/tests/test_util.py
+++ b/python/pyspark/mllib/tests/test_util.py
@@ -19,7 +19,7 @@
 import tempfile
 import unittest
 
-from pyspark.mllib.common import _to_java_object_rdd  # type: ignore[attr-defined]
+from pyspark.mllib.common import _to_java_object_rdd
 from pyspark.mllib.util import LinearDataGenerator
 from pyspark.mllib.util import MLUtils
 from pyspark.mllib.linalg import SparseVector, DenseVector, Vectors
diff --git a/python/pyspark/pandas/accessors.py b/python/pyspark/pandas/accessors.py
index 674fa436af301..411ed0ee49bbf 100644
--- a/python/pyspark/pandas/accessors.py
+++ b/python/pyspark/pandas/accessors.py
@@ -613,7 +613,7 @@ def udf(pdf: pd.DataFrame) -> pd.Series:
             psdf_or_psser = ps.from_pandas(transformed)
 
             if isinstance(psdf_or_psser, ps.Series):
-                psser = cast(ps.Series, psdf_or_psser)
+                psser = psdf_or_psser
 
                 field = psser._internal.data_fields[0].normalize_spark_type()
 
@@ -892,7 +892,7 @@ def transform_batch(
                     "Expected the return type of this function to be of type column,"
                     " but found type {}".format(sig_return)
                 )
-            return_type = cast(SeriesType, sig_return)
+            return_type = sig_return
 
         return self._transform_batch(lambda c: func(c, *args, **kwargs), return_type)
 
diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py
index cb998b4cbf9fd..2d2c79e7f472d 100644
--- a/python/pyspark/pandas/base.py
+++ b/python/pyspark/pandas/base.py
@@ -148,7 +148,7 @@ def align_diff_index_ops(
                     ],
                 ).rename(this_index_ops.name)
             else:
-                this = cast(Index, this_index_ops).to_frame().reset_index(drop=True)
+                this = this_index_ops.to_frame().reset_index(drop=True)
 
                 that_series = next(col for col in cols if isinstance(col, Series))
                 that_frame = that_series._psdf[
diff --git a/python/pyspark/pandas/data_type_ops/complex_ops.py b/python/pyspark/pandas/data_type_ops/complex_ops.py
index bee09f383e127..415301e400e99 100644
--- a/python/pyspark/pandas/data_type_ops/complex_ops.py
+++ b/python/pyspark/pandas/data_type_ops/complex_ops.py
@@ -53,7 +53,7 @@ def add(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             )
 
         left_type = cast(ArrayType, left.spark.data_type).elementType
-        right_type = cast(ArrayType, right.spark.data_type).elementType
+        right_type = right.spark.data_type.elementType
 
         if left_type != right_type and not (
             isinstance(left_type, NumericType) and isinstance(right_type, NumericType)
diff --git a/python/pyspark/pandas/data_type_ops/datetime_ops.py b/python/pyspark/pandas/data_type_ops/datetime_ops.py
index 3ec4109499c6e..16613f1bb288d 100644
--- a/python/pyspark/pandas/data_type_ops/datetime_ops.py
+++ b/python/pyspark/pandas/data_type_ops/datetime_ops.py
@@ -160,7 +160,7 @@ class DatetimeNTZOps(DatetimeOps):
     """
 
     def _cast_spark_column_timestamp_to_long(self, scol: Column) -> Column:
-        jvm = SparkContext._active_spark_context._jvm  # type: ignore[attr-defined]
+        jvm = SparkContext._active_spark_context._jvm
         return Column(jvm.PythonSQLUtils.castTimestampNTZToLong(scol._jc))
 
     def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index 7abcb9cb24ccc..41a0dde47a51b 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -5606,7 +5606,7 @@ def replace(
         if isinstance(to_replace, dict) and (
             value is not None or all(isinstance(i, dict) for i in to_replace.values())
         ):
-            to_replace_dict = cast(dict, to_replace)
+            to_replace_dict = to_replace
 
             def op(psser: ps.Series) -> ps.Series:
                 if psser.name in to_replace_dict:
@@ -10583,7 +10583,7 @@ def gen_mapper_fn(
             mapper: Union[Dict, Callable[[Any], Any]]
         ) -> Tuple[Callable[[Any], Any], Dtype, DataType]:
             if isinstance(mapper, dict):
-                mapper_dict = cast(dict, mapper)
+                mapper_dict = mapper
 
                 type_set = set(map(lambda x: type(x), mapper_dict.values()))
                 if len(type_set) > 1:
@@ -12500,7 +12500,7 @@ def _reduce_spark_multi(sdf: SparkDataFrame, aggs: List[Column]) -> Any:
     """
     assert isinstance(sdf, SparkDataFrame)
     sdf0 = sdf.agg(*aggs)
-    lst = cast(pd.DataFrame, sdf0.limit(2).toPandas())
+    lst = sdf0.limit(2).toPandas()
     assert len(lst) == 1, (sdf, lst)
     row = lst.iloc[0]
     lst2 = list(row)
diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py
index a1b1a14395232..49375da516629 100644
--- a/python/pyspark/pandas/generic.py
+++ b/python/pyspark/pandas/generic.py
@@ -2441,12 +2441,11 @@ def first_valid_index(self) -> Optional[Union[Scalar, Tuple[Scalar, ...]]]:
 
         with sql_conf({SPARK_CONF_ARROW_ENABLED: False}):
             # Disable Arrow to keep row ordering.
-            first_valid_row = cast(
-                pd.DataFrame,
+            first_valid_row = (
                 self._internal.spark_frame.filter(cond)
                 .select(self._internal.index_spark_columns)
                 .limit(1)
-                .toPandas(),
+                .toPandas()
             )
 
         # For Empty Series or DataFrame, returns None.
diff --git a/python/pyspark/pandas/groupby.py b/python/pyspark/pandas/groupby.py
index 6d6f50c0a0b9d..addb53d8cd5c1 100644
--- a/python/pyspark/pandas/groupby.py
+++ b/python/pyspark/pandas/groupby.py
@@ -1251,9 +1251,9 @@ def pandas_apply(pdf: pd.DataFrame, *a: Any, **k: Any) -> Any:
                 )
 
             if isinstance(return_type, DataFrameType):
-                data_fields = cast(DataFrameType, return_type).data_fields
-                return_schema = cast(DataFrameType, return_type).spark_type
-                index_fields = cast(DataFrameType, return_type).index_fields
+                data_fields = return_type.data_fields
+                return_schema = return_type.spark_type
+                index_fields = return_type.index_fields
                 should_retain_index = len(index_fields) > 0
                 psdf_from_pandas = None
             else:
@@ -2287,8 +2287,8 @@ def pandas_transform(pdf: pd.DataFrame) -> pd.DataFrame:
                     "but found type {}".format(return_type)
                 )
 
-            dtype = cast(SeriesType, return_type).dtype
-            spark_type = cast(SeriesType, return_type).spark_type
+            dtype = return_type.dtype
+            spark_type = return_type.spark_type
 
             data_fields = [
                 InternalField(dtype=dtype, struct_field=StructField(name=c, dataType=spark_type))
@@ -2838,7 +2838,7 @@ def _apply_series_op(
     ) -> DataFrame:
         applied = []
         for column in self._agg_columns:
-            applied.append(op(cast(SeriesGroupBy, column.groupby(self._groupkeys))))
+            applied.append(op(column.groupby(self._groupkeys)))
         if numeric_only:
             applied = [col for col in applied if isinstance(col.spark.data_type, NumericType)]
             if not applied:
diff --git a/python/pyspark/pandas/indexes/base.py b/python/pyspark/pandas/indexes/base.py
index 3a7fa5e636848..1705ef83261bf 100644
--- a/python/pyspark/pandas/indexes/base.py
+++ b/python/pyspark/pandas/indexes/base.py
@@ -286,12 +286,11 @@ def _summary(self, name: Optional[str] = None) -> str:
         String with a summarized representation of the index
         """
         head, tail, total_count = tuple(
-            cast(
-                pd.DataFrame,
-                self._internal.spark_frame.select(
-                    F.first(self.spark.column), F.last(self.spark.column), F.count(F.expr("*"))
-                ).toPandas(),
-            ).iloc[0]
+            self._internal.spark_frame.select(
+                F.first(self.spark.column), F.last(self.spark.column), F.count(F.expr("*"))
+            )
+            .toPandas()
+            .iloc[0]
         )
 
         if total_count > 0:
@@ -1652,11 +1651,10 @@ def min(self) -> Union[Scalar, Tuple[Scalar, ...]]:
         ('a', 'x', 1)
         """
         sdf = self._internal.spark_frame
-        min_row = cast(
-            pd.DataFrame,
+        min_row = (
             sdf.select(F.min(F.struct(*self._internal.index_spark_columns)).alias("min_row"))
             .select("min_row.*")
-            .toPandas(),
+            .toPandas()
         )
         result = tuple(min_row.iloc[0])
 
@@ -1694,11 +1692,10 @@ def max(self) -> Union[Scalar, Tuple[Scalar, ...]]:
         ('b', 'y', 2)
         """
         sdf = self._internal.spark_frame
-        max_row = cast(
-            pd.DataFrame,
+        max_row = (
             sdf.select(F.max(F.struct(*self._internal.index_spark_columns)).alias("max_row"))
             .select("max_row.*")
-            .toPandas(),
+            .toPandas()
         )
         result = tuple(max_row.iloc[0])
 
@@ -2285,7 +2282,7 @@ def asof(self, label: Any) -> Scalar:
         else:
             raise ValueError("index must be monotonic increasing or decreasing")
 
-        result = cast(pd.DataFrame, sdf.toPandas()).iloc[0, 0]
+        result = sdf.toPandas().iloc[0, 0]
         return result if result is not None else np.nan
 
     def _index_fields_for_union_like(
diff --git a/python/pyspark/pandas/indexing.py b/python/pyspark/pandas/indexing.py
index 2058c264f270c..76627bd0e128b 100644
--- a/python/pyspark/pandas/indexing.py
+++ b/python/pyspark/pandas/indexing.py
@@ -172,7 +172,7 @@ def __getitem__(self, key: Any) -> Union["Series", "DataFrame", Scalar]:
         if len(pdf) < 1:
             raise KeyError(name_like_string(row_sel))
 
-        values = cast(pd.DataFrame, pdf).iloc[:, 0].values
+        values = pdf.iloc[:, 0].values
         return (
             values if (len(row_sel) < self._internal.index_level or len(values) > 1) else values[0]
         )
@@ -535,7 +535,7 @@ def __getitem__(self, key: Any) -> Union["Series", "DataFrame"]:
         except AnalysisException:
             raise KeyError(
                 "[{}] don't exist in columns".format(
-                    [col._jc.toString() for col in data_spark_columns]  # type: ignore[operator]
+                    [col._jc.toString() for col in data_spark_columns]
                 )
             )
 
@@ -553,7 +553,7 @@ def __getitem__(self, key: Any) -> Union["Series", "DataFrame"]:
 
         psdf_or_psser: Union[DataFrame, Series]
         if returns_series:
-            psdf_or_psser = cast(Series, first_series(psdf))
+            psdf_or_psser = first_series(psdf)
             if series_name is not None and series_name != psdf_or_psser.name:
                 psdf_or_psser = psdf_or_psser.rename(series_name)
         else:
diff --git a/python/pyspark/pandas/internal.py b/python/pyspark/pandas/internal.py
index 71f8f6ed57193..ffc86ba4c6134 100644
--- a/python/pyspark/pandas/internal.py
+++ b/python/pyspark/pandas/internal.py
@@ -905,7 +905,7 @@ def attach_distributed_sequence_column(sdf: SparkDataFrame, column_name: str) ->
         """
         if len(sdf.columns) > 0:
             return SparkDataFrame(
-                sdf._jdf.toDF().withSequenceColumn(column_name),  # type: ignore[operator]
+                sdf._jdf.toDF().withSequenceColumn(column_name),
                 sdf.sparkSession,
             )
         else:
diff --git a/python/pyspark/pandas/ml.py b/python/pyspark/pandas/ml.py
index f0dbc9ab55f50..a8203f11d8d57 100644
--- a/python/pyspark/pandas/ml.py
+++ b/python/pyspark/pandas/ml.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 #
 
-from typing import List, Tuple, TYPE_CHECKING, cast
+from typing import List, Tuple, TYPE_CHECKING
 
 import numpy as np
 import pandas as pd
@@ -54,7 +54,7 @@ def corr(psdf: "ps.DataFrame", method: str = "pearson") -> pd.DataFrame:
     assert method in ("pearson", "spearman")
     ndf, column_labels = to_numeric_df(psdf)
     corr = Correlation.corr(ndf, CORRELATION_OUTPUT_COLUMN, method)
-    pcorr = cast(pd.DataFrame, corr.toPandas())
+    pcorr = corr.toPandas()
     arr = pcorr.iloc[0, 0].toArray()
     if column_labels_level(column_labels) > 1:
         idx = pd.MultiIndex.from_tuples(column_labels)
@@ -78,7 +78,7 @@ def to_numeric_df(psdf: "ps.DataFrame") -> Tuple[pyspark.sql.DataFrame, List[Lab
     """
     # TODO, it should be more robust.
     accepted_types = {
-        np.dtype(dt)  # type: ignore[misc]
+        np.dtype(dt)
         for dt in [np.int8, np.int16, np.int32, np.int64, np.float32, np.float64, np.bool_]
     }
     numeric_column_labels = [
diff --git a/python/pyspark/pandas/plot/core.py b/python/pyspark/pandas/plot/core.py
index 5daf92178149a..8ee959db481a6 100644
--- a/python/pyspark/pandas/plot/core.py
+++ b/python/pyspark/pandas/plot/core.py
@@ -20,7 +20,7 @@
 import pandas as pd
 import numpy as np
 from pyspark.ml.feature import Bucketizer
-from pyspark.mllib.stat import KernelDensity  # type: ignore[no-redef]
+from pyspark.mllib.stat import KernelDensity
 from pyspark.sql import functions as F
 from pandas.core.base import PandasObject
 from pandas.core.dtypes.inference import is_integer
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index 705ba599e2788..35dc5acf21bee 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -3266,7 +3266,7 @@ def apply_each(s: Any) -> pd.Series:
                     "Expected the return type of this function to be of scalar type, "
                     "but found type {}".format(sig_return)
                 )
-            return_type = cast(ScalarType, sig_return)
+            return_type = sig_return
             return self.pandas_on_spark._transform_batch(apply_each, return_type)
 
     # TODO: not all arguments are implemented comparing to pandas' for now.
@@ -3542,7 +3542,7 @@ def quantile(
                 raise TypeError(
                     "q must be a float or an array of floats; however, [%s] found." % type(q)
                 )
-            q_float = cast(float, q)
+            q_float = q
             if q_float < 0.0 or q_float > 1.0:
                 raise ValueError("percentiles should all be in the interval [0, 1].")
 
@@ -3707,10 +3707,7 @@ def filter(
         if axis == 1:
             raise ValueError("Series does not support columns axis.")
         return first_series(
-            cast(
-                "ps.DataFrame",
-                self.to_frame().filter(items=items, like=like, regex=regex, axis=axis),
-            )
+            self.to_frame().filter(items=items, like=like, regex=regex, axis=axis),
         ).rename(self.name)
 
     filter.__doc__ = DataFrame.filter.__doc__
@@ -4791,7 +4788,7 @@ def mask(self, cond: "Series", other: Any = np.nan) -> "Series":
 
         >>> reset_option("compute.ops_on_diff_frames")
         """
-        return self.where(cast(Series, ~cond), other)
+        return self.where(~cond, other)
 
     def xs(self, key: Name, level: Optional[int] = None) -> "Series":
         """
@@ -5301,7 +5298,7 @@ def asof(self, where: Union[Any, List]) -> Union[Scalar, "Series"]:
         if not should_return_series:
             with sql_conf({SPARK_CONF_ARROW_ENABLED: False}):
                 # Disable Arrow to keep row ordering.
-                result = cast(pd.DataFrame, sdf.limit(1).toPandas()).iloc[0, 0]
+                result = sdf.limit(1).toPandas().iloc[0, 0]
             return result if result is not None else np.nan
 
         # The data is expected to be small so it's fine to transpose/use default index.
@@ -6349,7 +6346,7 @@ def _apply_series_op(
         if isinstance(psser_or_scol, Series):
             psser = psser_or_scol
         else:
-            psser = self._with_new_scol(cast(Column, psser_or_scol))
+            psser = self._with_new_scol(psser_or_scol)
         if should_resolve:
             internal = psser._internal.resolved_copy
             return first_series(DataFrame(internal))
@@ -6501,7 +6498,7 @@ def unpack_scalar(sdf: SparkDataFrame) -> Any:
     Takes a dataframe that is supposed to contain a single row with a single scalar value,
     and returns this value.
     """
-    lst = cast(pd.DataFrame, sdf.limit(2).toPandas())
+    lst = sdf.limit(2).toPandas()
     assert len(lst) == 1, (sdf, lst)
     row = lst.iloc[0]
     lst2 = list(row)
diff --git a/python/pyspark/pandas/spark/functions.py b/python/pyspark/pandas/spark/functions.py
index dcc8fc8d91285..b7d57b4c3f828 100644
--- a/python/pyspark/pandas/spark/functions.py
+++ b/python/pyspark/pandas/spark/functions.py
@@ -40,7 +40,7 @@ def repeat(col: Column, n: Union[int, Column]) -> Column:
     """
     Repeats a string column n times, and returns it as a new string column.
     """
-    sc = SparkContext._active_spark_context  # type: ignore[attr-defined]
+    sc = SparkContext._active_spark_context
     n = _to_java_column(n) if isinstance(n, Column) else _create_column_from_literal(n)
     return _call_udf(sc, "repeat", _to_java_column(col), n)
 
@@ -49,7 +49,7 @@ def date_part(field: Union[str, Column], source: Column) -> Column:
     """
     Extracts a part of the date/timestamp or interval source.
     """
-    sc = SparkContext._active_spark_context  # type: ignore[attr-defined]
+    sc = SparkContext._active_spark_context
     field = (
         _to_java_column(field) if isinstance(field, Column) else _create_column_from_literal(field)
     )
diff --git a/python/pyspark/pandas/tests/test_dataframe.py b/python/pyspark/pandas/tests/test_dataframe.py
index dc1ed97c6656a..6f3c1c41653ad 100644
--- a/python/pyspark/pandas/tests/test_dataframe.py
+++ b/python/pyspark/pandas/tests/test_dataframe.py
@@ -4581,7 +4581,7 @@ def identify3(x) -> ps.DataFrame[float, [int, List[int]]]:
 
             def identify4(
                 x,
-            ) -> ps.DataFrame[float, [int, ntp.NDArray[int]]]:  # type: ignore[name-defined]
+            ) -> ps.DataFrame[float, [int, ntp.NDArray[int]]]:
                 return x
 
             actual = psdf.pandas_on_spark.apply_batch(identify4)
diff --git a/python/pyspark/pandas/utils.py b/python/pyspark/pandas/utils.py
index d37a359440ff7..a61ea7d19b3ec 100644
--- a/python/pyspark/pandas/utils.py
+++ b/python/pyspark/pandas/utils.py
@@ -927,7 +927,7 @@ def spark_column_equals(left: Column, right: Column) -> bool:
     >>> spark_column_equals(sdf1["x"] + 1, sdf2["x"] + 1)
     False
     """
-    return left._jc.equals(right._jc)  # type: ignore[operator]
+    return left._jc.equals(right._jc)
 
 
 def compare_null_first(
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index fd8fc77fc547d..611183160a5f4 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -110,8 +110,8 @@
     from pyspark.sql.types import AtomicType, StructType
     from pyspark.sql._typing import AtomicValue, RowLike, SQLBatchedUDFType
 
-    from py4j.java_gateway import JavaObject  # type: ignore[import]
-    from py4j.java_collections import JavaArray  # type: ignore[import]
+    from py4j.java_gateway import JavaObject
+    from py4j.java_collections import JavaArray
 
 T = TypeVar("T")
 T_co = TypeVar("T_co", covariant=True)
@@ -1292,7 +1292,7 @@ def op(x: Tuple[T, bool], y: Tuple[T, bool]) -> Tuple[T, bool]:
             elif y[1]:
                 return x
             else:
-                return f(x[0], y[0]), False  # type: ignore[arg-type]
+                return f(x[0], y[0]), False
 
         reduced = self.map(lambda x: (x, False)).treeAggregate(zeroValue, op, op, depth)
         if reduced[1]:
@@ -1654,12 +1654,7 @@ def minmax(a: Tuple["S", "S"], b: Tuple["S", "S"]) -> Tuple["S", "S"]:
         def histogram(iterator: Iterable["S"]) -> Iterable[List[int]]:
             counters = [0] * len(buckets)  # type: ignore[arg-type]
             for i in iterator:
-                if (
-                    i is None
-                    or (isinstance(i, float) and isnan(i))  # type: ignore[arg-type]
-                    or i > maxv
-                    or i < minv
-                ):
+                if i is None or (isinstance(i, float) and isnan(i)) or i > maxv or i < minv:
                     continue
                 t = (
                     int((i - minv) / inc)  # type: ignore[operator]
diff --git a/python/pyspark/resource/profile.py b/python/pyspark/resource/profile.py
index 24556f4f3b339..37e8ee85ea21c 100644
--- a/python/pyspark/resource/profile.py
+++ b/python/pyspark/resource/profile.py
@@ -121,7 +121,7 @@ def __init__(self) -> None:
         from pyspark.context import SparkContext
 
         # TODO: ignore[attr-defined] will be removed, once SparkContext is inlined
-        _jvm = SparkContext._jvm  # type: ignore[attr-defined]
+        _jvm = SparkContext._jvm
         if _jvm is not None:
             self._jvm = _jvm
             self._java_resource_profile_builder = (
@@ -138,17 +138,15 @@ def require(
     ) -> "ResourceProfileBuilder":
         if isinstance(resourceRequest, TaskResourceRequests):
             if self._java_resource_profile_builder is not None:
-                if (
-                    resourceRequest._java_task_resource_requests is not None
-                ):  # type: ignore[attr-defined]
+                if resourceRequest._java_task_resource_requests is not None:
                     self._java_resource_profile_builder.require(
                         resourceRequest._java_task_resource_requests
-                    )  # type: ignore[attr-defined]
+                    )
                 else:
                     taskReqs = TaskResourceRequests(self._jvm, resourceRequest.requests)
                     self._java_resource_profile_builder.require(
                         taskReqs._java_task_resource_requests
-                    )  # type: ignore[attr-defined]
+                    )
             else:
                 self._task_resource_requests.update(  # type: ignore[union-attr]
                     resourceRequest.requests
@@ -163,7 +161,7 @@ def require(
                         self._jvm, resourceRequest.requests  # type: ignore[attr-defined]
                     )
                     self._java_resource_profile_builder.require(
-                        execReqs._java_executor_resource_requests  # type: ignore[attr-defined]
+                        execReqs._java_executor_resource_requests
                     )
             else:
                 self._executor_resource_requests.update(  # type: ignore[union-attr]
diff --git a/python/pyspark/resource/requests.py b/python/pyspark/resource/requests.py
index 58226116979fe..0999e4e4aeb68 100644
--- a/python/pyspark/resource/requests.py
+++ b/python/pyspark/resource/requests.py
@@ -18,7 +18,7 @@
 
 from py4j.java_gateway import JavaObject, JVMView
 
-from pyspark.util import _parse_memory  # type: ignore[attr-defined]
+from pyspark.util import _parse_memory
 
 
 class ExecutorResourceRequest:
@@ -133,7 +133,7 @@ def __init__(
     ):
         from pyspark import SparkContext
 
-        _jvm = _jvm or SparkContext._jvm  # type: ignore[attr-defined]
+        _jvm = _jvm or SparkContext._jvm
         if _jvm is not None:
             self._java_executor_resource_requests = (
                 _jvm.org.apache.spark.resource.ExecutorResourceRequests()
@@ -302,7 +302,7 @@ def __init__(
     ):
         from pyspark import SparkContext
 
-        _jvm = _jvm or SparkContext._jvm  # type: ignore[attr-defined]
+        _jvm = _jvm or SparkContext._jvm
         if _jvm is not None:
             self._java_task_resource_requests: Optional[
                 JavaObject
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 628ef18a54a12..8c5a941f376d2 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -66,7 +66,7 @@
 pickle_protocol = pickle.HIGHEST_PROTOCOL
 
 from pyspark import cloudpickle
-from pyspark.util import print_exec  # type: ignore
+from pyspark.util import print_exec
 
 
 __all__ = [
@@ -364,7 +364,7 @@ def dumps(self, obj):
     # requires namedtuple hack.
     # The whole hack here should be removed once we drop Python 3.7.
 
-    __cls = {}  # type: ignore
+    __cls = {}
 
     def _restore(name, fields, value):
         """Restore an object of namedtuple"""
diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py
index e0a8c06d0e78d..9004a94e34063 100644
--- a/python/pyspark/shell.py
+++ b/python/pyspark/shell.py
@@ -33,10 +33,10 @@
 if os.environ.get("SPARK_EXECUTOR_URI"):
     SparkContext.setSystemProperty("spark.executor.uri", os.environ["SPARK_EXECUTOR_URI"])
 
-SparkContext._ensure_initialized()  # type: ignore
+SparkContext._ensure_initialized()
 
 try:
-    spark = SparkSession._create_shell_session()  # type: ignore
+    spark = SparkSession._create_shell_session()
 except Exception:
     import sys
     import traceback
diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py
index 0709d2de25a67..35c3397de503b 100644
--- a/python/pyspark/shuffle.py
+++ b/python/pyspark/shuffle.py
@@ -33,7 +33,7 @@
     CompressedSerializer,
     AutoBatchedSerializer,
 )
-from pyspark.util import fail_on_stopiteration  # type: ignore
+from pyspark.util import fail_on_stopiteration
 
 
 try:
diff --git a/python/pyspark/sql/avro/functions.py b/python/pyspark/sql/avro/functions.py
index 57fa7fc773df2..909fe3f3bd00b 100644
--- a/python/pyspark/sql/avro/functions.py
+++ b/python/pyspark/sql/avro/functions.py
@@ -23,7 +23,7 @@
 from typing import Dict, Optional, TYPE_CHECKING
 from pyspark import SparkContext
 from pyspark.sql.column import Column, _to_java_column
-from pyspark.util import _print_missing_jar  # type: ignore[attr-defined]
+from pyspark.util import _print_missing_jar
 
 if TYPE_CHECKING:
     from pyspark.sql._typing import ColumnOrName
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index 3ececfa0f1ed7..b954995f857bb 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -346,7 +346,7 @@ def createTable(
             options["path"] = path
         if source is None:
             c = self._sparkSession._jconf
-            source = c.defaultDataSourceName()  # type: ignore[attr-defined]
+            source = c.defaultDataSourceName()
         if description is None:
             description = ""
         if schema is None:
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index dce0cc6d1b327..04458d560ee8d 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -31,7 +31,7 @@
     Union,
 )
 
-from py4j.java_gateway import JavaObject  # type: ignore[import]
+from py4j.java_gateway import JavaObject
 
 from pyspark import copy_func
 from pyspark.context import SparkContext
@@ -233,23 +233,13 @@ def __init__(self, jc: JavaObject) -> None:
     __radd__ = cast(
         Callable[["Column", Union["LiteralType", "DecimalLiteral"]], "Column"], _bin_op("plus")
     )
-    __rsub__ = cast(
-        Callable[["Column", Union["LiteralType", "DecimalLiteral"]], "Column"], _reverse_op("minus")
-    )
+    __rsub__ = _reverse_op("minus")
     __rmul__ = cast(
         Callable[["Column", Union["LiteralType", "DecimalLiteral"]], "Column"], _bin_op("multiply")
     )
-    __rdiv__ = cast(
-        Callable[["Column", Union["LiteralType", "DecimalLiteral"]], "Column"],
-        _reverse_op("divide"),
-    )
-    __rtruediv__ = cast(
-        Callable[["Column", Union["LiteralType", "DecimalLiteral"]], "Column"],
-        _reverse_op("divide"),
-    )
-    __rmod__ = cast(
-        Callable[["Column", Union["LiteralType", "DecimalLiteral"]], "Column"], _reverse_op("mod")
-    )
+    __rdiv__ = _reverse_op("divide")
+    __rtruediv__ = _reverse_op("divide")
+    __rmod__ = _reverse_op("mod")
 
     __pow__ = _bin_func_op("pow")
     __rpow__ = cast(
@@ -709,7 +699,7 @@ def substr(self, startPos: Union[int, "Column"], length: Union[int, "Column"]) -
         if isinstance(startPos, int):
             jc = self._jc.substr(startPos, length)
         elif isinstance(startPos, Column):
-            jc = self._jc.substr(cast("Column", startPos)._jc, cast("Column", length)._jc)
+            jc = self._jc.substr(startPos._jc, cast("Column", length)._jc)
         else:
             raise TypeError("Unexpected type: %s" % type(startPos))
         return Column(jc)
diff --git a/python/pyspark/sql/conf.py b/python/pyspark/sql/conf.py
index 7e8a56574822f..40a36a26701a6 100644
--- a/python/pyspark/sql/conf.py
+++ b/python/pyspark/sql/conf.py
@@ -18,9 +18,9 @@
 import sys
 from typing import Any, Optional, Union
 
-from py4j.java_gateway import JavaObject  # type: ignore[import]
+from py4j.java_gateway import JavaObject
 
-from pyspark import since, _NoValue  # type: ignore[attr-defined]
+from pyspark import since, _NoValue
 from pyspark._globals import _NoValueType
 
 
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 18816d3fd2414..6c7ab6f937e75 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -32,9 +32,9 @@
     cast,
 )
 
-from py4j.java_gateway import JavaObject  # type: ignore[import]
+from py4j.java_gateway import JavaObject
 
-from pyspark import since, _NoValue  # type: ignore[attr-defined]
+from pyspark import since, _NoValue
 from pyspark._globals import _NoValueType
 from pyspark.sql.session import _monkey_patch_RDD, SparkSession
 from pyspark.sql.dataframe import DataFrame
@@ -115,8 +115,8 @@ def __init__(
             )
 
         self._sc = sparkContext
-        self._jsc = self._sc._jsc  # type: ignore[attr-defined]
-        self._jvm = self._sc._jvm  # type: ignore[attr-defined]
+        self._jsc = self._sc._jsc
+        self._jvm = self._sc._jvm
         if sparkSession is None:
             sparkSession = SparkSession._getActiveSessionOrCreate()
         if jsqlContext is None:
@@ -127,7 +127,7 @@ def __init__(
         install_exception_handler()
         if (
             SQLContext._instantiatedContext is None
-            or SQLContext._instantiatedContext._sc._jsc is None  # type: ignore[attr-defined]
+            or SQLContext._instantiatedContext._sc._jsc is None
         ):
             SQLContext._instantiatedContext = self
 
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 37e184b23d26c..c5de9fb79571f 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -39,12 +39,12 @@
     TYPE_CHECKING,
 )
 
-from py4j.java_gateway import JavaObject  # type: ignore[import]
+from py4j.java_gateway import JavaObject
 
-from pyspark import copy_func, since, _NoValue  # type: ignore[attr-defined]
+from pyspark import copy_func, since, _NoValue
 from pyspark._globals import _NoValueType
 from pyspark.context import SparkContext
-from pyspark.rdd import (  # type: ignore[attr-defined]
+from pyspark.rdd import (
     RDD,
     _load_from_socket,
     _local_iterator_from_socket,
@@ -614,16 +614,13 @@ def show(self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool =
             print(self._jdf.showString(n, int_truncate, vertical))
 
     def __repr__(self) -> str:
-        if (
-            not self._support_repr_html
-            and self.sparkSession._jconf.isReplEagerEvalEnabled()  # type: ignore[attr-defined]
-        ):
+        if not self._support_repr_html and self.sparkSession._jconf.isReplEagerEvalEnabled():
             vertical = False
             return self._jdf.showString(
-                self.sparkSession._jconf.replEagerEvalMaxNumRows(),  # type: ignore[attr-defined]
-                self.sparkSession._jconf.replEagerEvalTruncate(),  # type: ignore[attr-defined]
+                self.sparkSession._jconf.replEagerEvalMaxNumRows(),
+                self.sparkSession._jconf.replEagerEvalTruncate(),
                 vertical,
-            )  # type: ignore[attr-defined]
+            )
         else:
             return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))
 
@@ -634,13 +631,11 @@ def _repr_html_(self) -> Optional[str]:
         """
         if not self._support_repr_html:
             self._support_repr_html = True
-        if self.sparkSession._jconf.isReplEagerEvalEnabled():  # type: ignore[attr-defined]
-            max_num_rows = max(
-                self.sparkSession._jconf.replEagerEvalMaxNumRows(), 0  # type: ignore[attr-defined]
-            )
+        if self.sparkSession._jconf.isReplEagerEvalEnabled():
+            max_num_rows = max(self.sparkSession._jconf.replEagerEvalMaxNumRows(), 0)
             sock_info = self._jdf.getRowsToPython(
                 max_num_rows,
-                self.sparkSession._jconf.replEagerEvalTruncate(),  # type: ignore[attr-defined]
+                self.sparkSession._jconf.replEagerEvalTruncate(),
             )
             rows = list(_load_from_socket(sock_info, BatchedSerializer(CPickleSerializer())))
             head = rows[0]
@@ -935,9 +930,7 @@ def cache(self) -> "DataFrame":
 
     def persist(
         self,
-        storageLevel: StorageLevel = (
-            StorageLevel.MEMORY_AND_DISK_DESER  # type: ignore[attr-defined]
-        ),
+        storageLevel: StorageLevel = (StorageLevel.MEMORY_AND_DISK_DESER),
     ) -> "DataFrame":
         """Sets the storage level to persist the contents of the :class:`DataFrame` across
         operations after the first time it is computed. This can only be used to assign
@@ -951,7 +944,7 @@ def persist(
         The default storage level has changed to `MEMORY_AND_DISK_DESER` to match Scala in 3.0.
         """
         self.is_cached = True
-        javaStorageLevel = self._sc._getJavaStorageLevel(storageLevel)  # type: ignore[attr-defined]
+        javaStorageLevel = self._sc._getJavaStorageLevel(storageLevel)
         self._jdf.persist(javaStorageLevel)
         return self
 
@@ -1630,10 +1623,10 @@ def _joinAsOf(
         """
         if isinstance(leftAsOfColumn, str):
             leftAsOfColumn = self[leftAsOfColumn]
-        left_as_of_jcol = cast(Column, leftAsOfColumn)._jc
+        left_as_of_jcol = leftAsOfColumn._jc
         if isinstance(rightAsOfColumn, str):
             rightAsOfColumn = other[rightAsOfColumn]
-        right_as_of_jcol = cast(Column, rightAsOfColumn)._jc
+        right_as_of_jcol = rightAsOfColumn._jc
 
         if on is not None and not isinstance(on, list):
             on = [on]  # type: ignore[assignment]
@@ -3498,7 +3491,7 @@ def approxQuantile(
     ) -> List[List[float]]:
         ...
 
-    def approxQuantile(  # type: ignore[misc]
+    def approxQuantile(
         self,
         col: Union[str, List[str], Tuple[str]],
         probabilities: Union[List[float], Tuple[float]],
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 269374525c3b1..06fdbf1ed3904 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -4687,8 +4687,8 @@ def _create_lambda(f: Callable) -> Callable:
     if not isinstance(result, Column):
         raise ValueError("f should return Column, got {}".format(type(result)))
 
-    jexpr = result._jc.expr()  # type: ignore[operator]
-    jargs = _to_seq(sc, [arg._jc.expr() for arg in args])  # type: ignore[operator]
+    jexpr = result._jc.expr()
+    jargs = _to_seq(sc, [arg._jc.expr() for arg in args])
 
     return expressions.LambdaFunction(jexpr, jargs, False)
 
diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py
index 802d34d020893..bece13684e087 100644
--- a/python/pyspark/sql/group.py
+++ b/python/pyspark/sql/group.py
@@ -19,7 +19,7 @@
 
 from typing import Callable, List, Optional, TYPE_CHECKING, overload, Dict, Union, cast, Tuple
 
-from py4j.java_gateway import JavaObject  # type: ignore[import]
+from py4j.java_gateway import JavaObject
 
 from pyspark.sql.column import Column, _to_seq
 from pyspark.sql.session import SparkSession
diff --git a/python/pyspark/sql/observation.py b/python/pyspark/sql/observation.py
index 951b0f4c83a0c..48b3d96a45ae6 100644
--- a/python/pyspark/sql/observation.py
+++ b/python/pyspark/sql/observation.py
@@ -16,7 +16,7 @@
 #
 from typing import Any, Dict, Optional
 
-from py4j.java_gateway import JavaObject, JVMView  # type: ignore[import]
+from py4j.java_gateway import JavaObject, JVMView
 
 from pyspark.sql import column
 from pyspark.sql.column import Column
diff --git a/python/pyspark/sql/pandas/_typing/__init__.pyi b/python/pyspark/sql/pandas/_typing/__init__.pyi
index 906703d3c86ca..6ecd04f057e02 100644
--- a/python/pyspark/sql/pandas/_typing/__init__.pyi
+++ b/python/pyspark/sql/pandas/_typing/__init__.pyi
@@ -33,7 +33,7 @@ from pyspark.sql._typing import LiteralType
 from pandas.core.frame import DataFrame as PandasDataFrame
 from pandas.core.series import Series as PandasSeries
 
-import pyarrow  # type: ignore[import]
+import pyarrow
 
 DataFrameLike = PandasDataFrame
 SeriesLike = PandasSeries
diff --git a/python/pyspark/sql/pandas/conversion.py b/python/pyspark/sql/pandas/conversion.py
index fbb5183c45ed9..7153450d2bc4f 100644
--- a/python/pyspark/sql/pandas/conversion.py
+++ b/python/pyspark/sql/pandas/conversion.py
@@ -19,7 +19,7 @@
 from collections import Counter
 from typing import List, Optional, Type, Union, no_type_check, overload, TYPE_CHECKING
 
-from pyspark.rdd import _load_from_socket  # type: ignore[attr-defined]
+from pyspark.rdd import _load_from_socket
 from pyspark.sql.pandas.serializers import ArrowCollectSerializer
 from pyspark.sql.types import (
     IntegralType,
@@ -92,7 +92,7 @@ def toPandas(self) -> "PandasDataFrameLike":
         jconf = self.sparkSession._jconf
         timezone = jconf.sessionLocalTimeZone()
 
-        if jconf.arrowPySparkEnabled():  # type: ignore[attr-defined]
+        if jconf.arrowPySparkEnabled():
             use_arrow = True
             try:
                 from pyspark.sql.pandas.types import to_arrow_schema
@@ -102,7 +102,7 @@ def toPandas(self) -> "PandasDataFrameLike":
                 to_arrow_schema(self.schema)
             except Exception as e:
 
-                if jconf.arrowPySparkFallbackEnabled():  # type: ignore[attr-defined]
+                if jconf.arrowPySparkFallbackEnabled():
                     msg = (
                         "toPandas attempted Arrow optimization because "
                         "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
@@ -137,9 +137,7 @@ def toPandas(self) -> "PandasDataFrameLike":
                     # Rename columns to avoid duplicated column names.
                     tmp_column_names = ["col_{}".format(i) for i in range(len(self.columns))]
                     c = self.sparkSession._jconf
-                    self_destruct = (
-                        c.arrowPySparkSelfDestructEnabled()  # type: ignore[attr-defined]
-                    )
+                    self_destruct = c.arrowPySparkSelfDestructEnabled()
                     batches = self.toDF(*tmp_column_names)._collect_as_arrow(
                         split_batches=self_destruct
                     )
@@ -324,7 +322,7 @@ def _collect_as_arrow(self, split_batches: bool = False) -> List["pa.RecordBatch
                 port,
                 auth_secret,
                 jsocket_auth_server,
-            ) = self._jdf.collectAsArrowToPython()  # type: ignore[operator]
+            ) = self._jdf.collectAsArrowToPython()
 
         # Collect list of un-ordered batches where last element is a list of correct order indices
         try:
@@ -402,17 +400,17 @@ def createDataFrame(  # type: ignore[misc]
 
         require_minimum_pandas_version()
 
-        timezone = self._jconf.sessionLocalTimeZone()  # type: ignore[attr-defined]
+        timezone = self._jconf.sessionLocalTimeZone()
 
         # If no schema supplied by user then get the names of columns only
         if schema is None:
             schema = [str(x) if not isinstance(x, str) else x for x in data.columns]
 
-        if self._jconf.arrowPySparkEnabled() and len(data) > 0:  # type: ignore[attr-defined]
+        if self._jconf.arrowPySparkEnabled() and len(data) > 0:
             try:
                 return self._create_from_pandas_with_arrow(data, schema, timezone)
             except Exception as e:
-                if self._jconf.arrowPySparkFallbackEnabled():  # type: ignore[attr-defined]
+                if self._jconf.arrowPySparkFallbackEnabled():
                     msg = (
                         "createDataFrame attempted Arrow optimization because "
                         "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
@@ -606,7 +604,7 @@ def _create_from_pandas_with_arrow(
 
         jsparkSession = self._jsparkSession
 
-        safecheck = self._jconf.arrowSafeTypeConversion()  # type: ignore[attr-defined]
+        safecheck = self._jconf.arrowSafeTypeConversion()
         col_by_name = True  # col by name only applies to StructType columns, can't happen here
         ser = ArrowStreamPandasSerializer(timezone, safecheck, col_by_name)
 
diff --git a/python/pyspark/sql/pandas/functions.pyi b/python/pyspark/sql/pandas/functions.pyi
index 7ff06be915137..1af6f8625935e 100644
--- a/python/pyspark/sql/pandas/functions.pyi
+++ b/python/pyspark/sql/pandas/functions.pyi
@@ -65,11 +65,17 @@ def pandas_udf(
     functionType: PandasScalarUDFType,
 ) -> UserDefinedFunctionLike: ...
 @overload
-def pandas_udf(f: Union[StructType, str], returnType: PandasScalarUDFType) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ...  # type: ignore[misc]
+def pandas_udf(
+    f: Union[StructType, str], returnType: PandasScalarUDFType
+) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ...
 @overload
-def pandas_udf(f: Union[StructType, str], *, functionType: PandasScalarUDFType) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ...  # type: ignore[misc]
+def pandas_udf(
+    f: Union[StructType, str], *, functionType: PandasScalarUDFType
+) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ...
 @overload
-def pandas_udf(*, returnType: Union[StructType, str], functionType: PandasScalarUDFType) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ...  # type: ignore[misc]
+def pandas_udf(
+    *, returnType: Union[StructType, str], functionType: PandasScalarUDFType
+) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ...
 @overload
 def pandas_udf(
     f: PandasScalarIterFunction,
diff --git a/python/pyspark/sql/pandas/group_ops.py b/python/pyspark/sql/pandas/group_ops.py
index e7599b1f144d5..6178433573e9e 100644
--- a/python/pyspark/sql/pandas/group_ops.py
+++ b/python/pyspark/sql/pandas/group_ops.py
@@ -213,7 +213,7 @@ def applyInPandas(
         udf = pandas_udf(func, returnType=schema, functionType=PandasUDFType.GROUPED_MAP)
         df = self._df
         udf_column = udf(*[df[col] for col in df.columns])
-        jdf = self._jgd.flatMapGroupsInPandas(udf_column._jc.expr())  # type: ignore[attr-defined]
+        jdf = self._jgd.flatMapGroupsInPandas(udf_column._jc.expr())
         return DataFrame(jdf, self.session)
 
     def cogroup(self, other: "GroupedData") -> "PandasCogroupedOps":
@@ -341,9 +341,7 @@ def applyInPandas(
 
         all_cols = self._extract_cols(self._gd1) + self._extract_cols(self._gd2)
         udf_column = udf(*all_cols)
-        jdf = self._gd1._jgd.flatMapCoGroupsInPandas(  # type: ignore[attr-defined]
-            self._gd2._jgd, udf_column._jc.expr()  # type: ignore[attr-defined]
-        )
+        jdf = self._gd1._jgd.flatMapCoGroupsInPandas(self._gd2._jgd, udf_column._jc.expr())
         return DataFrame(jdf, self._gd1.session)
 
     @staticmethod
diff --git a/python/pyspark/sql/pandas/map_ops.py b/python/pyspark/sql/pandas/map_ops.py
index c1bf6aa478dd3..5f89577a1b6c3 100644
--- a/python/pyspark/sql/pandas/map_ops.py
+++ b/python/pyspark/sql/pandas/map_ops.py
@@ -89,7 +89,7 @@ def mapInPandas(
             func, returnType=schema, functionType=PythonEvalType.SQL_MAP_PANDAS_ITER_UDF
         )  # type: ignore[call-overload]
         udf_column = udf(*[self[col] for col in self.columns])
-        jdf = self._jdf.mapInPandas(udf_column._jc.expr())  # type: ignore[operator]
+        jdf = self._jdf.mapInPandas(udf_column._jc.expr())
         return DataFrame(jdf, self.sparkSession)
 
     def mapInArrow(
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 8c8756d8a8624..760e54831c2f0 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -17,7 +17,7 @@
 import sys
 from typing import cast, overload, Dict, Iterable, List, Optional, Tuple, TYPE_CHECKING, Union
 
-from py4j.java_gateway import JavaClass, JavaObject  # type: ignore[import]
+from py4j.java_gateway import JavaClass, JavaObject
 
 from pyspark import RDD, since
 from pyspark.sql.column import _to_seq, _to_java_column, Column
@@ -63,7 +63,7 @@ class DataFrameReader(OptionUtils):
     """
 
     def __init__(self, spark: "SparkSession"):
-        self._jreader = spark._jsparkSession.read()  # type: ignore[attr-defined]
+        self._jreader = spark._jsparkSession.read()
         self._spark = spark
 
     def _df(self, jdf: JavaObject) -> "DataFrame":
@@ -112,9 +112,7 @@ def schema(self, schema: Union[StructType, str]) -> "DataFrameReader":
 
         spark = SparkSession._getActiveSessionOrCreate()
         if isinstance(schema, StructType):
-            jschema = spark._jsparkSession.parseDataType(
-                schema.json()
-            )  # type: ignore[attr-defined]
+            jschema = spark._jsparkSession.parseDataType(schema.json())
             self._jreader = self._jreader.schema(jschema)
         elif isinstance(schema, str):
             self._jreader = self._jreader.schema(schema)
@@ -283,11 +281,7 @@ def json(
             path = [path]
         if type(path) == list:
             assert self._spark._sc._jvm is not None
-            return self._df(
-                self._jreader.json(
-                    self._spark._sc._jvm.PythonUtils.toSeq(path)  # type: ignore[attr-defined]
-                )
-            )
+            return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))
         elif isinstance(path, RDD):
 
             def func(iterator: Iterable) -> Iterable:
@@ -301,7 +295,7 @@ def func(iterator: Iterable) -> Iterable:
             keyed = path.mapPartitions(func)
             keyed._bypass_serializer = True  # type: ignore[attr-defined]
             assert self._spark._jvm is not None
-            jrdd = keyed._jrdd.map(self._spark._jvm.BytesToString())  # type: ignore[attr-defined]
+            jrdd = keyed._jrdd.map(self._spark._jvm.BytesToString())
             return self._df(self._jreader.json(jrdd))
         else:
             raise TypeError("path can be only string, list or RDD")
@@ -424,11 +418,7 @@ def text(
         if isinstance(paths, str):
             paths = [paths]
         assert self._spark._sc._jvm is not None
-        return self._df(
-            self._jreader.text(
-                self._spark._sc._jvm.PythonUtils.toSeq(paths)  # type: ignore[attr-defined]
-            )
-        )
+        return self._df(self._jreader.text(self._spark._sc._jvm.PythonUtils.toSeq(paths)))
 
     def csv(
         self,
@@ -738,7 +728,7 @@ class DataFrameWriter(OptionUtils):
     def __init__(self, df: "DataFrame"):
         self._df = df
         self._spark = df.sparkSession
-        self._jwrite = df._jdf.write()  # type: ignore[operator]
+        self._jwrite = df._jdf.write()
 
     def _sq(self, jsq: JavaObject) -> "StreamingQuery":
         from pyspark.sql.streaming import StreamingQuery
@@ -1361,7 +1351,7 @@ class DataFrameWriterV2:
     def __init__(self, df: "DataFrame", table: str):
         self._df = df
         self._spark = df.sparkSession
-        self._jwriter = df._jdf.writeTo(table)  # type: ignore[operator]
+        self._jwriter = df._jdf.writeTo(table)
 
     @since(3.1)
     def using(self, provider: str) -> "DataFrameWriterV2":
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 759859ab33912..8f4809907b599 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -36,7 +36,7 @@
     TYPE_CHECKING,
 )
 
-from py4j.java_gateway import JavaObject  # type: ignore[import]
+from py4j.java_gateway import JavaObject
 
 from pyspark import SparkConf, SparkContext, since
 from pyspark.rdd import RDD
@@ -261,7 +261,7 @@ def getOrCreate(self) -> "SparkSession":
                 from pyspark.conf import SparkConf
 
                 session = SparkSession._instantiatedSession
-                if session is None or session._sc._jsc is None:  # type: ignore[attr-defined]
+                if session is None or session._sc._jsc is None:
                     sparkConf = SparkConf()
                     for key, value in self._options.items():
                         sparkConf.set(key, value)
@@ -317,7 +317,7 @@ def __init__(
         # Otherwise, we will use invalid SparkSession when we call Builder.getOrCreate.
         if (
             SparkSession._instantiatedSession is None
-            or SparkSession._instantiatedSession._sc._jsc is None  # type: ignore[attr-defined]
+            or SparkSession._instantiatedSession._sc._jsc is None
         ):
             SparkSession._instantiatedSession = self
             SparkSession._activeSession = self
@@ -333,7 +333,7 @@ def _repr_html_(self) -> str:
             </div>
         """.format(
             catalogImplementation=self.conf.get("spark.sql.catalogImplementation"),
-            sc_HTML=self.sparkContext._repr_html_(),  # type: ignore[attr-defined]
+            sc_HTML=self.sparkContext._repr_html_(),
         )
 
     @property
@@ -512,7 +512,7 @@ def _inferSchemaFromList(
         """
         if not data:
             raise ValueError("can not infer schema from empty dataset")
-        infer_dict_as_struct = self._jconf.inferDictAsStruct()  # type: ignore[attr-defined]
+        infer_dict_as_struct = self._jconf.inferDictAsStruct()
         prefer_timestamp_ntz = is_timestamp_ntz_preferred()
         schema = reduce(
             _merge_type,
@@ -547,7 +547,7 @@ def _inferSchema(
         if not first:
             raise ValueError("The first row in RDD is empty, " "can not infer schema")
 
-        infer_dict_as_struct = self._jconf.inferDictAsStruct()  # type: ignore[attr-defined]
+        infer_dict_as_struct = self._jconf.inferDictAsStruct()
         prefer_timestamp_ntz = is_timestamp_ntz_preferred()
         if samplingRatio is None:
             schema = _infer_schema(
@@ -662,13 +662,13 @@ def _create_shell_session() -> "SparkSession":
             # Try to access HiveConf, it will raise exception if Hive is not added
             conf = SparkConf()
             assert SparkContext._jvm is not None
-            if cast(str, conf.get("spark.sql.catalogImplementation", "hive")).lower() == "hive":
+            if conf.get("spark.sql.catalogImplementation", "hive").lower() == "hive":
                 SparkContext._jvm.org.apache.hadoop.hive.conf.HiveConf()
                 return SparkSession.builder.enableHiveSupport().getOrCreate()
             else:
                 return SparkSession._getActiveSessionOrCreate()
         except (py4j.protocol.Py4JError, TypeError):
-            if cast(str, conf.get("spark.sql.catalogImplementation", "")).lower() == "hive":
+            if conf.get("spark.sql.catalogImplementation", "").lower() == "hive":
                 warnings.warn(
                     "Fall back to non-hive support because failing to access HiveConf, "
                     "please make sure you build spark with hive"
@@ -935,9 +935,7 @@ def prepare(obj: Any) -> Any:
         else:
             rdd, struct = self._createFromLocal(map(prepare, data), schema)
         assert self._jvm is not None
-        jrdd = self._jvm.SerDeUtil.toJavaArray(
-            rdd._to_java_object_rdd()  # type: ignore[attr-defined]
-        )
+        jrdd = self._jvm.SerDeUtil.toJavaArray(rdd._to_java_object_rdd())
         jdf = self._jsparkSession.applySchemaToPythonRDD(jrdd.rdd(), struct.json())
         df = DataFrame(jdf, self)
         df._schema = struct
diff --git a/python/pyspark/sql/sql_formatter.py b/python/pyspark/sql/sql_formatter.py
index 8528dd3e88352..5e79b9ff5ea98 100644
--- a/python/pyspark/sql/sql_formatter.py
+++ b/python/pyspark/sql/sql_formatter.py
@@ -50,9 +50,9 @@ def _convert_value(self, val: Any, field_name: str) -> Optional[str]:
         from pyspark.sql import Column, DataFrame
 
         if isinstance(val, Column):
-            assert SparkContext._gateway is not None  # type: ignore[attr-defined]
+            assert SparkContext._gateway is not None
 
-            gw = SparkContext._gateway  # type: ignore[attr-defined]
+            gw = SparkContext._gateway
             jexpr = val._jc.expr()
             if is_instance_of(
                 gw, jexpr, "org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute"
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index 7cff8d0e52181..7517a41337f90 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -20,7 +20,7 @@
 from collections.abc import Iterator
 from typing import cast, overload, Any, Callable, Dict, List, Optional, TYPE_CHECKING, Union
 
-from py4j.java_gateway import java_import, JavaObject  # type: ignore[import]
+from py4j.java_gateway import java_import, JavaObject
 
 from pyspark import since
 from pyspark.sql.column import _to_seq
@@ -1196,7 +1196,7 @@ def foreach(self, f: Union[Callable[[Row], None], "SupportsProcess"]) -> "DataSt
         >>> writer = sdf.writeStream.foreach(RowPrinter())
         """
 
-        from pyspark.rdd import _wrap_function  # type: ignore[attr-defined]
+        from pyspark.rdd import _wrap_function
         from pyspark.serializers import CPickleSerializer, AutoBatchedSerializer
         from pyspark.taskcontext import TaskContext
 
@@ -1474,7 +1474,7 @@ def _test() -> None:
     import tempfile
     from pyspark.sql import SparkSession, SQLContext
     import pyspark.sql.streaming
-    from py4j.protocol import Py4JError  # type: ignore[import]
+    from py4j.protocol import Py4JError
 
     os.chdir(os.environ["SPARK_HOME"])
 
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index 5021da569fe40..5c6acaffa324b 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -20,7 +20,7 @@
 import re
 import math
 
-from py4j.protocol import Py4JJavaError  # type: ignore[import]
+from py4j.protocol import Py4JJavaError
 from pyspark.sql import Row, Window, types
 from pyspark.sql.functions import (
     udf,
diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
index 6aa8b111b4254..9ae6c3a63457e 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -48,7 +48,7 @@
     BooleanType,
     NullType,
 )
-from pyspark.sql.types import (  # type: ignore
+from pyspark.sql.types import (
     _array_signed_int_typecode_ctype_mappings,
     _array_type_mappings,
     _array_unsigned_int_typecode_ctype_mappings,
diff --git a/python/pyspark/sql/tests/typing/test_session.yml b/python/pyspark/sql/tests/typing/test_session.yml
index 01a6b288aae1a..70d0001c47ca8 100644
--- a/python/pyspark/sql/tests/typing/test_session.yml
+++ b/python/pyspark/sql/tests/typing/test_session.yml
@@ -35,7 +35,7 @@
     spark.createDataFrame(data, schema)
     spark.createDataFrame(data, "name string, age integer")
     spark.createDataFrame([(1, ("foo", "bar"))], ("_1", "_2"))
-    spark.createDataFrame(data, ("name", "age"), samplingRatio=0.1)  # type: ignore
+    spark.createDataFrame(data, ("name", "age"), samplingRatio=0.1)
 
 
 - case: createDataFrameScalarsValid
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 52a9b89ec3df1..41db22b054049 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -43,8 +43,8 @@
     TypeVar,
 )
 
-from py4j.protocol import register_input_converter  # type: ignore[import]
-from py4j.java_gateway import JavaClass, JavaGateway, JavaObject  # type: ignore[import]
+from py4j.protocol import register_input_converter
+from py4j.java_gateway import JavaClass, JavaGateway, JavaObject
 
 from pyspark.serializers import CloudPickleSerializer
 
@@ -1011,7 +1011,7 @@ def _parse_datatype_string(s: str) -> DataType:
     """
     from pyspark import SparkContext
 
-    sc = SparkContext._active_spark_context  # type: ignore[attr-defined]
+    sc = SparkContext._active_spark_context
     assert sc is not None
 
     def from_ddl_schema(type_str: str) -> DataType:
@@ -1593,7 +1593,7 @@ def assert_acceptable_types(obj: Any) -> None:
 
     def verify_acceptable_types(obj: Any) -> None:
         # subclass of them can not be fromInternal in JVM
-        if type(obj) not in _acceptable_types[_type]:  # type: ignore[operator]
+        if type(obj) not in _acceptable_types[_type]:
             raise TypeError(
                 new_msg("%s can not accept object %r in type %s" % (dataType, obj, type(obj)))
             )
@@ -1684,9 +1684,7 @@ def verify_map(obj: Any) -> None:
     elif isinstance(dataType, StructType):
         verifiers = []
         for f in dataType.fields:
-            verifier = _make_type_verifier(
-                f.dataType, f.nullable, name=new_name(f.name)
-            )  # type: ignore[arg-type]
+            verifier = _make_type_verifier(f.dataType, f.nullable, name=new_name(f.name))
             verifiers.append((f.name, verifier))
 
         def verify_struct(obj: Any) -> None:
diff --git a/python/pyspark/sql/udf.py b/python/pyspark/sql/udf.py
index d98078af743c0..d8856e053faa7 100644
--- a/python/pyspark/sql/udf.py
+++ b/python/pyspark/sql/udf.py
@@ -22,11 +22,11 @@
 import sys
 from typing import Callable, Any, TYPE_CHECKING, Optional, cast, Union
 
-from py4j.java_gateway import JavaObject  # type: ignore[import]
+from py4j.java_gateway import JavaObject
 
 from pyspark import SparkContext
 from pyspark.profiler import Profiler
-from pyspark.rdd import _prepare_for_python_RDD, PythonEvalType  # type: ignore[attr-defined]
+from pyspark.rdd import _prepare_for_python_RDD, PythonEvalType
 from pyspark.sql.column import Column, _to_java_column, _to_seq
 from pyspark.sql.types import (
     StringType,
@@ -53,10 +53,10 @@ def _wrap_function(
         bytearray(pickled_command),
         env,
         includes,
-        sc.pythonExec,  # type: ignore[attr-defined]
-        sc.pythonVer,  # type: ignore[attr-defined]
+        sc.pythonExec,
+        sc.pythonVer,
         broadcast_vars,
-        sc._javaAccumulator,  # type: ignore[attr-defined]
+        sc._javaAccumulator,
     )
 
 
@@ -505,7 +505,6 @@ def registerJavaFunction(
         if returnType is not None:
             if not isinstance(returnType, DataType):
                 returnType = _parse_datatype_string(returnType)
-            returnType = cast(DataType, returnType)
             jdt = self.sparkSession._jsparkSession.parseDataType(returnType.json())
         self.sparkSession._jsparkSession.udf().registerJava(name, javaClassName, jdt)
 
diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
index b5abe6891d2cb..b3219b8b9be4e 100644
--- a/python/pyspark/sql/utils.py
+++ b/python/pyspark/sql/utils.py
@@ -17,14 +17,14 @@
 from typing import Any, Callable, Optional, Sequence, TYPE_CHECKING, cast
 
 import py4j
-from py4j.java_collections import JavaArray  # type: ignore[import]
-from py4j.java_gateway import (  # type: ignore[import]
+from py4j.java_collections import JavaArray
+from py4j.java_gateway import (
     JavaClass,
     JavaGateway,
     JavaObject,
     is_instance_of,
 )
-from py4j.protocol import Py4JJavaError  # type: ignore[import]
+from py4j.protocol import Py4JJavaError
 
 from pyspark import SparkContext
 from pyspark.find_spark_home import _find_spark_home
@@ -61,9 +61,9 @@ def __init__(
         self._origin = origin
 
     def __str__(self) -> str:
-        assert SparkContext._jvm is not None  # type: ignore[attr-defined]
+        assert SparkContext._jvm is not None
 
-        jvm = SparkContext._jvm  # type: ignore[attr-defined]
+        jvm = SparkContext._jvm
         sql_conf = jvm.org.apache.spark.sql.internal.SQLConf.get()
         debug_enabled = sql_conf.pysparkJVMStacktraceEnabled()
         desc = self.desc
@@ -72,9 +72,9 @@ def __str__(self) -> str:
         return str(desc)
 
     def getErrorClass(self) -> Optional[str]:
-        assert SparkContext._gateway is not None  # type: ignore[attr-defined]
+        assert SparkContext._gateway is not None
 
-        gw = SparkContext._gateway  # type: ignore[attr-defined]
+        gw = SparkContext._gateway
         if self._origin is not None and is_instance_of(
             gw, self._origin, "org.apache.spark.SparkThrowable"
         ):
@@ -83,9 +83,9 @@ def getErrorClass(self) -> Optional[str]:
             return None
 
     def getSqlState(self) -> Optional[str]:
-        assert SparkContext._gateway is not None  # type: ignore[attr-defined]
+        assert SparkContext._gateway is not None
 
-        gw = SparkContext._gateway  # type: ignore[attr-defined]
+        gw = SparkContext._gateway
         if self._origin is not None and is_instance_of(
             gw, self._origin, "org.apache.spark.SparkThrowable"
         ):
@@ -144,11 +144,11 @@ class SparkUpgradeException(CapturedException):
 
 def convert_exception(e: Py4JJavaError) -> CapturedException:
     assert e is not None
-    assert SparkContext._jvm is not None  # type: ignore[attr-defined]
-    assert SparkContext._gateway is not None  # type: ignore[attr-defined]
+    assert SparkContext._jvm is not None
+    assert SparkContext._gateway is not None
 
-    jvm = SparkContext._jvm  # type: ignore[attr-defined]
-    gw = SparkContext._gateway  # type: ignore[attr-defined]
+    jvm = SparkContext._jvm
+    gw = SparkContext._gateway
 
     if is_instance_of(gw, e, "org.apache.spark.sql.catalyst.parser.ParseException"):
         return ParseException(origin=e)
@@ -292,7 +292,7 @@ def is_timestamp_ntz_preferred() -> bool:
     """
     Return a bool if TimestampNTZType is preferred according to the SQL configuration set.
     """
-    jvm = SparkContext._jvm  # type: ignore[attr-defined]
+    jvm = SparkContext._jvm
     return (
         jvm is not None
         and getattr(getattr(jvm.org.apache.spark.sql.internal, "SQLConf$"), "MODULE$")
diff --git a/python/pyspark/sql/window.py b/python/pyspark/sql/window.py
index 1690c49a777fb..b8bc90f458cdf 100644
--- a/python/pyspark/sql/window.py
+++ b/python/pyspark/sql/window.py
@@ -21,7 +21,7 @@
 from pyspark import since, SparkContext
 from pyspark.sql.column import _to_seq, _to_java_column
 
-from py4j.java_gateway import JavaObject  # type: ignore[import]
+from py4j.java_gateway import JavaObject
 
 if TYPE_CHECKING:
     from pyspark.sql._typing import ColumnOrName, ColumnOrName_
diff --git a/python/pyspark/status.py b/python/pyspark/status.py
index 193b9ff60f229..7e64c414403f2 100644
--- a/python/pyspark/status.py
+++ b/python/pyspark/status.py
@@ -19,8 +19,8 @@
 
 from typing import List, NamedTuple, Optional
 
-from py4j.java_collections import JavaArray  # type: ignore[import]
-from py4j.java_gateway import JavaObject  # type: ignore[import]
+from py4j.java_collections import JavaArray
+from py4j.java_gateway import JavaObject
 
 
 class SparkJobInfo(NamedTuple):
diff --git a/python/pyspark/streaming/context.pyi b/python/pyspark/streaming/context.pyi
index 3eb252630934d..0d1b2aca7395f 100644
--- a/python/pyspark/streaming/context.pyi
+++ b/python/pyspark/streaming/context.pyi
@@ -18,7 +18,7 @@
 
 from typing import Any, Callable, List, Optional, TypeVar
 
-from py4j.java_gateway import JavaObject  # type: ignore[import]
+from py4j.java_gateway import JavaObject
 
 from pyspark.context import SparkContext
 from pyspark.rdd import RDD
diff --git a/python/pyspark/streaming/kinesis.py b/python/pyspark/streaming/kinesis.py
index e48a91e7ceb86..26d66c394ab83 100644
--- a/python/pyspark/streaming/kinesis.py
+++ b/python/pyspark/streaming/kinesis.py
@@ -20,7 +20,7 @@
 from pyspark.storagelevel import StorageLevel
 from pyspark.streaming import DStream
 from pyspark.streaming.context import StreamingContext
-from pyspark.util import _print_missing_jar  # type: ignore[attr-defined]
+from pyspark.util import _print_missing_jar
 
 
 __all__ = ["KinesisUtils", "InitialPositionInStream", "utf8_decoder"]
diff --git a/python/pyspark/taskcontext.py b/python/pyspark/taskcontext.py
index 627456b3744e3..c4d10aaeacc14 100644
--- a/python/pyspark/taskcontext.py
+++ b/python/pyspark/taskcontext.py
@@ -183,7 +183,7 @@ def _getOrCreate(cls: Type["BarrierTaskContext"]) -> "BarrierTaskContext":
         """
         if not isinstance(cls._taskContext, BarrierTaskContext):
             cls._taskContext = object.__new__(cls)
-        return cast(BarrierTaskContext, cls._taskContext)
+        return cls._taskContext
 
     @classmethod
     def get(cls: Type["BarrierTaskContext"]) -> "BarrierTaskContext":
diff --git a/python/pyspark/testing/mlutils.py b/python/pyspark/testing/mlutils.py
index 48e3498eb0db3..503ba7c76960b 100644
--- a/python/pyspark/testing/mlutils.py
+++ b/python/pyspark/testing/mlutils.py
@@ -24,7 +24,7 @@
 from pyspark.ml.param.shared import HasMaxIter, HasRegParam
 from pyspark.ml.classification import Classifier, ClassificationModel
 from pyspark.ml.util import DefaultParamsReadable, DefaultParamsWritable
-from pyspark.ml.wrapper import _java2py  # type: ignore
+from pyspark.ml.wrapper import _java2py
 from pyspark.sql import DataFrame, SparkSession
 from pyspark.sql.types import DoubleType
 from pyspark.testing.utils import ReusedPySparkTestCase as PySparkTestCase
@@ -126,7 +126,7 @@ def _transform(self, dataset):
 class MockUnaryTransformer(UnaryTransformer, DefaultParamsReadable, DefaultParamsWritable):
 
     shift = Param(
-        Params._dummy(),  # type: ignore
+        Params._dummy(),
         "shift",
         "The amount by which to shift " + "data in a DataFrame",
         typeConverter=TypeConverters.toFloat,
diff --git a/python/pyspark/testing/pandasutils.py b/python/pyspark/testing/pandasutils.py
index a5b913ec6727b..9b07a23ae1b56 100644
--- a/python/pyspark/testing/pandasutils.py
+++ b/python/pyspark/testing/pandasutils.py
@@ -46,7 +46,7 @@
 
 matplotlib_requirement_message = None
 try:
-    import matplotlib  # type: ignore # noqa: F401
+    import matplotlib  # noqa: F401
 except ImportError as e:
     # If matplotlib requirement is not satisfied, skip related tests.
     matplotlib_requirement_message = str(e)
@@ -54,7 +54,7 @@
 
 plotly_requirement_message = None
 try:
-    import plotly  # type: ignore # noqa: F401
+    import plotly  # noqa: F401
 except ImportError as e:
     # If plotly requirement is not satisfied, skip related tests.
     plotly_requirement_message = str(e)
diff --git a/python/pyspark/testing/streamingutils.py b/python/pyspark/testing/streamingutils.py
index b44fb4c73aeb2..1860c54d31856 100644
--- a/python/pyspark/testing/streamingutils.py
+++ b/python/pyspark/testing/streamingutils.py
@@ -40,7 +40,7 @@
         "spark-streaming-kinesis-asl-assembly_",
     )
     if kinesis_asl_assembly_jar is None:
-        kinesis_requirement_message = (  # type: ignore
+        kinesis_requirement_message = (
             "Skipping all Kinesis Python tests as the optional Kinesis project was "
             "not compiled into a JAR. To run these tests, "
             "you need to build Spark with 'build/sbt -Pkinesis-asl assembly/package "
diff --git a/python/pyspark/tests/test_context.py b/python/pyspark/tests/test_context.py
index 0f092a860b2a1..1b63869562f40 100644
--- a/python/pyspark/tests/test_context.py
+++ b/python/pyspark/tests/test_context.py
@@ -164,7 +164,7 @@ def test_overwrite_system_module(self):
         self.assertEqual("My Server", SimpleHTTPServer.__name__)
 
         def func(x):
-            import SimpleHTTPServer  # type: ignore[import]
+            import SimpleHTTPServer
 
             return SimpleHTTPServer.__name__
 
diff --git a/python/pyspark/tests/test_serializers.py b/python/pyspark/tests/test_serializers.py
index e2fb5ed894e3b..0a89861a26f8c 100644
--- a/python/pyspark/tests/test_serializers.py
+++ b/python/pyspark/tests/test_serializers.py
@@ -249,7 +249,7 @@ def test_chunked_stream(self):
     from pyspark.tests.test_serializers import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/util.py b/python/pyspark/util.py
index de44ab681b74d..5abbbb919636f 100644
--- a/python/pyspark/util.py
+++ b/python/pyspark/util.py
@@ -27,7 +27,7 @@
 from types import TracebackType
 from typing import Any, Callable, Iterator, List, Optional, TextIO, Tuple
 
-from py4j.clientserver import ClientServer  # type: ignore[import]
+from py4j.clientserver import ClientServer
 
 __all__: List[str] = []
 
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index b00dc75e3d6e9..8784abfb33379 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -60,7 +60,7 @@
 )
 from pyspark.sql.pandas.types import to_arrow_type
 from pyspark.sql.types import StructType
-from pyspark.util import fail_on_stopiteration, try_simplify_traceback  # type: ignore
+from pyspark.util import fail_on_stopiteration, try_simplify_traceback
 from pyspark import shuffle
 
 pickleSer = CPickleSerializer()

From 1acadf3cffe8e592b4000f23aa7d42f3a54a2e02 Mon Sep 17 00:00:00 2001
From: cashmand <david.cashman@databricks.com>
Date: Wed, 16 Mar 2022 14:13:57 +0800
Subject: [PATCH 505/513] [SPARK-38558][SQL] Remove unnecessary casts between
 IntegerType and IntDecimal

### What changes were proposed in this pull request?

In `NTile`, the number of rows per bucket is computed as `n / buckets`, where `n` is the partition size, and `buckets` is the argument to `NTile` (number of buckets). The code currently casts the arguments to IntDecimal, then casts the result back to IntegerType.  This is unnecessary, since it is equivalent to just doing integer division, i.e. `n div buckets`.  This PR makes that simplifying change.

### Why are the changes needed?

Simplifies the code, and avoids a couple of casts at run-time.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Relying on existing tests (specifically, org.apache.spark.sql.hive.execution.WindowQuerySuite).

Closes #35863 from cashmand/remove_decimal_cast.

Authored-by: cashmand <david.cashman@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/expressions/windowExpressions.scala      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 6396fde575b8f..c701d10b00b73 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -825,7 +825,7 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow
     zero,
     zero,
     zero,
-    (n.cast(DecimalType.IntDecimal) / buckets.cast(DecimalType.IntDecimal)).cast(IntegerType),
+    (n div buckets).cast(IntegerType),
     (n % buckets).cast(IntegerType)
   )
 

From 8476c8b846ffd2622a6bcf1accf9fa55ffbdc0db Mon Sep 17 00:00:00 2001
From: mcdull-zhang <work4dong@163.com>
Date: Wed, 16 Mar 2022 14:17:18 +0800
Subject: [PATCH 506/513] [SPARK-38542][SQL] UnsafeHashedRelation should
 serialize numKeys out

### What changes were proposed in this pull request?
UnsafeHashedRelation should serialize numKeys out

### Why are the changes needed?
One case I found was this:
We turned on ReusedExchange(BroadcastExchange), but the returned UnsafeHashedRelation is missing numKeys.

The reason is that the current type of TorrentBroadcast._value is SoftReference, so the UnsafeHashedRelation obtained by deserialization loses numKeys, which will lead to incorrect calculation results.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added a line of assert to an existing unit test

Closes #35836 from mcdull-zhang/UnsafeHashed.

Authored-by: mcdull-zhang <work4dong@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/execution/joins/HashedRelation.scala | 4 +++-
 .../spark/sql/execution/joins/HashedRelationSuite.scala       | 3 +++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index 698e7ed6fc57e..253f16e39d352 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -207,7 +207,7 @@ private[execution] class ValueRowWithKeyIndex {
  * A HashedRelation for UnsafeRow, which is backed BytesToBytesMap.
  *
  * It's serialized in the following format:
- *  [number of keys]
+ *  [number of keys] [number of fields]
  *  [size of key] [size of value] [key bytes] [bytes for value]
  */
 private[joins] class UnsafeHashedRelation(
@@ -364,6 +364,7 @@ private[joins] class UnsafeHashedRelation(
       writeInt: (Int) => Unit,
       writeLong: (Long) => Unit,
       writeBuffer: (Array[Byte], Int, Int) => Unit) : Unit = {
+    writeInt(numKeys)
     writeInt(numFields)
     // TODO: move these into BytesToBytesMap
     writeLong(binaryMap.numKeys())
@@ -397,6 +398,7 @@ private[joins] class UnsafeHashedRelation(
       readInt: () => Int,
       readLong: () => Long,
       readBuffer: (Array[Byte], Int, Int) => Unit): Unit = {
+    numKeys = readInt()
     numFields = readInt()
     resultRow = new UnsafeRow(numFields)
     val nKeys = readLong()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
index 2462fe31a9b66..6c87178f267c4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
@@ -93,6 +93,9 @@ class HashedRelationSuite extends SharedSparkSession {
     assert(hashed2.get(toUnsafe(InternalRow(10))) === null)
     assert(hashed2.get(unsafeData(2)).toArray === data2)
 
+    // SPARK-38542: UnsafeHashedRelation should serialize numKeys out
+    assert(hashed2.keys().map(_.copy()).forall(_.numFields == 1))
+
     val os2 = new ByteArrayOutputStream()
     val out2 = new ObjectOutputStream(os2)
     hashed2.writeExternal(out2)

From 8193b405f02f867439dd2d2017bf7b3c814b5cc8 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 16 Mar 2022 18:20:50 +0900
Subject: [PATCH 507/513] [SPARK-38563][PYTHON] Upgrade to Py4J 0.10.9.4

### What changes were proposed in this pull request?

This PR upgrade Py4J 0.10.9.4, with relevant documentation changes.

### Why are the changes needed?

Py4J 0.10.9.3 has a resource leak issue when pinned thread mode is enabled - it's enabled by default in PySpark at https://github.com/apache/spark/commit/41af409b7bcfe1b3960274c0b3085bcc1f9d1c98.
We worked around this by enforcing users to use `InheritableThread` or `inhteritable_thread_target` as a workaround.
After upgrading, we don't need to enforce users anymore because it automatically cleans up, see also https://github.com/py4j/py4j/pull/471

### Does this PR introduce _any_ user-facing change?

Yes, users don't have to use `InheritableThread` or `inhteritable_thread_target` to avoid resource leaking problem anymore.

### How was this patch tested?

CI in this PR should test it out.

Closes #35871 from HyukjinKwon/SPARK-38563.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 bin/pyspark                                   |   2 +-
 bin/pyspark2.cmd                              |   2 +-
 core/pom.xml                                  |   2 +-
 .../apache/spark/api/python/PythonUtils.scala |   2 +-
 dev/deps/spark-deps-hadoop-2-hive-2.3         |   2 +-
 dev/deps/spark-deps-hadoop-3-hive-2.3         |   2 +-
 docs/job-scheduling.md                        |   2 +-
 python/docs/Makefile                          |   2 +-
 python/docs/make2.bat                         |   2 +-
 .../docs/source/getting_started/install.rst   |   2 +-
 python/lib/py4j-0.10.9.3-src.zip              | Bin 42021 -> 0 bytes
 python/lib/py4j-0.10.9.4-src.zip              | Bin 0 -> 42404 bytes
 python/pyspark/context.py                     |   6 +--
 python/pyspark/util.py                        |  35 +++---------------
 python/setup.py                               |   2 +-
 sbin/spark-config.sh                          |   2 +-
 16 files changed, 20 insertions(+), 45 deletions(-)
 delete mode 100644 python/lib/py4j-0.10.9.3-src.zip
 create mode 100644 python/lib/py4j-0.10.9.4-src.zip

diff --git a/bin/pyspark b/bin/pyspark
index 4840589ffb7bd..1e16c56bc9724 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -50,7 +50,7 @@ export PYSPARK_DRIVER_PYTHON_OPTS
 
 # Add the PySpark classes to the Python path:
 export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
-export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.3-src.zip:$PYTHONPATH"
+export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.4-src.zip:$PYTHONPATH"
 
 # Load the PySpark shell.py script when ./pyspark is used interactively:
 export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd
index a19627a3b220a..f20c320494757 100644
--- a/bin/pyspark2.cmd
+++ b/bin/pyspark2.cmd
@@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" (
 )
 
 set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%
-set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.9.3-src.zip;%PYTHONPATH%
+set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.9.4-src.zip;%PYTHONPATH%
 
 set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
 set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py
diff --git a/core/pom.xml b/core/pom.xml
index 9d3b1709af2ac..953c76b73469f 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -423,7 +423,7 @@
     <dependency>
       <groupId>net.sf.py4j</groupId>
       <artifactId>py4j</artifactId>
-      <version>0.10.9.3</version>
+      <version>0.10.9.4</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index 8daba86758412..a9c353691b466 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -27,7 +27,7 @@ import org.apache.spark.SparkContext
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 
 private[spark] object PythonUtils {
-  val PY4J_ZIP_NAME = "py4j-0.10.9.3-src.zip"
+  val PY4J_ZIP_NAME = "py4j-0.10.9.4-src.zip"
 
   /** Get the PYTHONPATH for PySpark, either from SPARK_HOME, if it is set, or from our JAR */
   def sparkPythonPath: String = {
diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index bcbf8b9908ae5..f2db663550407 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -233,7 +233,7 @@ parquet-hadoop/1.12.2//parquet-hadoop-1.12.2.jar
 parquet-jackson/1.12.2//parquet-jackson-1.12.2.jar
 pickle/1.2//pickle-1.2.jar
 protobuf-java/2.5.0//protobuf-java-2.5.0.jar
-py4j/0.10.9.3//py4j-0.10.9.3.jar
+py4j/0.10.9.4//py4j-0.10.9.4.jar
 remotetea-oncrpc/1.1.2//remotetea-oncrpc-1.1.2.jar
 rocksdbjni/6.20.3//rocksdbjni-6.20.3.jar
 scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 8ca7880c7a34d..c56b4c9bb6826 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -221,7 +221,7 @@ parquet-hadoop/1.12.2//parquet-hadoop-1.12.2.jar
 parquet-jackson/1.12.2//parquet-jackson-1.12.2.jar
 pickle/1.2//pickle-1.2.jar
 protobuf-java/2.5.0//protobuf-java-2.5.0.jar
-py4j/0.10.9.3//py4j-0.10.9.3.jar
+py4j/0.10.9.4//py4j-0.10.9.4.jar
 remotetea-oncrpc/1.1.2//remotetea-oncrpc-1.1.2.jar
 rocksdbjni/6.20.3//rocksdbjni-6.20.3.jar
 scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
diff --git a/docs/job-scheduling.md b/docs/job-scheduling.md
index 4ed2aa9112224..f44ed8245e286 100644
--- a/docs/job-scheduling.md
+++ b/docs/job-scheduling.md
@@ -304,5 +304,5 @@ via `sc.setJobGroup` in a separate PVM thread, which also disallows to cancel th
 later.
 
 `pyspark.InheritableThread` is recommended to use together for a PVM thread to inherit the inheritable attributes
- such as local properties in a JVM thread, and to avoid resource leak.
+ such as local properties in a JVM thread.
 
diff --git a/python/docs/Makefile b/python/docs/Makefile
index 9cb1a17ef584f..2628530cb20b3 100644
--- a/python/docs/Makefile
+++ b/python/docs/Makefile
@@ -21,7 +21,7 @@ SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     ?= source
 BUILDDIR      ?= build
 
-export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9.3-src.zip)
+export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9.4-src.zip)
 
 # Put it first so that "make" without argument is like "make help".
 help:
diff --git a/python/docs/make2.bat b/python/docs/make2.bat
index 2e4e2b543ab24..26ef220309c48 100644
--- a/python/docs/make2.bat
+++ b/python/docs/make2.bat
@@ -25,7 +25,7 @@ if "%SPHINXBUILD%" == "" (
 set SOURCEDIR=source
 set BUILDDIR=build
 
-set PYTHONPATH=..;..\lib\py4j-0.10.9.3-src.zip
+set PYTHONPATH=..;..\lib\py4j-0.10.9.4-src.zip
 
 if "%1" == "" goto help
 
diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst
index 15a12403128d9..3503be03339fe 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -157,7 +157,7 @@ Package       Minimum supported version Note
 `pandas`      1.0.5                     Optional for Spark SQL
 `NumPy`       1.7                       Required for MLlib DataFrame-based API
 `pyarrow`     1.0.0                     Optional for Spark SQL
-`Py4J`        0.10.9.3                  Required
+`Py4J`        0.10.9.4                  Required
 `pandas`      1.0.5                     Required for pandas API on Spark
 `pyarrow`     1.0.0                     Required for pandas API on Spark
 `Numpy`       1.14                      Required for pandas API on Spark
diff --git a/python/lib/py4j-0.10.9.3-src.zip b/python/lib/py4j-0.10.9.3-src.zip
deleted file mode 100644
index 428f3acd62b3c024c76cd331c7ddb0fbad923720..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 42021
zcmafZV{|UevgRAxwr$(CZQHiJV`s;<o$T1QZQI#N<~uWU?%bKVYu)Zu-TkAgtN+wf
zwVrB484yrtz`sVUYq-k)JpAtk41f!8^kTJUP*a5hfM^%g80r5bZk{jzAkY^e000#A
zA4SD~%>T7R0_2Oh6lXrN!kfSV04O8?0O8-}E>;%yMz${Wj$Z%sq9Za|eZ~L(q8bgk
z_)T`iKc5<i<6`=pJNVv|uy1q3at(r606Ax9U<NuR_9o<tBvtnBSHD39rPgHI`3vLB
zBi}z>EVz6Y4EFl<+8oMw>{o-I8Vvpgsl!bcv)@b8YfvX~TMhkZ42?)>PUdC;hU*g{
z2RUXrTR8Uem%|5NPTF|wE(4V0bf3giQK6=za4y&%prO?$(iu8`+6MMSkpr<c^_#_C
zWvO{0H<;EI!3A0|ZpwexpT~(iI&<6gt~?JCU%{2-3Dntai&$*1>EaC;H&}{JdVdqT
zH&$#YEl*Ad?<d-+8_+MBgyJAcqJgb+<)G<xZPIDjY8MjF(qV*&HUg>~c6#3ZJYJLR
znyxvtp*AXb?CE??m8zb;Sq@@bfd(D6Cb)$s2Bh+SWzk@AA%=r9v?K`5HBuK?r~j%7
zXIlUc`Z!yTVi4xSxo<N@203W{sWS7%Y(|62Iq%@kG1G+ZqW<MXlCcn{RR(9;ZWbRb
zF!ZKl6j~|OXH8D!W-Hr@6d6kM3qgd!=B8M^nWYjJ3I2J$N+7{>HHR0Cbp8I_?;UG>
z35gA7kDHrgZTk9JsPnMB>}r?&Tk~Vx!#iNpr7^*SSnype<JnRA3<4pNu`5S);o2R|
ztS2A*OV48u4_W4^D$VPX%csm|{%#Sv9WaCPsJ5tb#8wwicoTgsAy}lS)9v1f{$2id
z!G!<|t!I7p3J`A-0}fo*A3C`3TCF*?mxE}#IxM3~)20Cf;ZLE;lT1;JWz;!1h8|A{
zh#yk<>5as!|BJRLT0<Dsq^`1&aMG8V!RK90b!75oiRU#PB(!?Zw;@Tk67d|ZRN_f3
zh?I2Ac{V64B%km7@YsnCl2Prc(9IIj@H$m{q%AoA^))>`NRoE__0@QHDBoWiyFInY
z2tbOh*T;|B;dp?J{%mkb`p8b^L%b)QnRo$qA_Qun@%G~ylVC(d0*9Rk6XxT~=ZG_J
zG<Zs<gm@~?SH?;};D!XC^(lciVYTo3p>KPpuoUkM6<u|@_I;)^yOfd~@mWIN9^D=)
zt&Bd_cMJ1aiV@YU>-~0{N6AR`4zE+$6%CfR#z-5x{S6S7^Pc@0UxJ~RqYVCQk8T!a
zeZ~~%aI(-=VCn#2tP=&H=IiY+S{njGiDw|w72>$mTxLD4Upk;AKYsr{TVurX7<oZz
zb-%!44R{RBk(ndeRF1T38}FYi%FQ8nJ9vtmiJok7X|iE84OLcCdR@Xzo2>B5SNT4L
ze;{i6pm<3ro6Ff`-X@gj|I#|zEf1l1j1vztWKa)i%02HjT_J}BRkRgn5U8WEw0@2(
zjP`IJ!th?FNm<zPfzx`Vtf5@)S|nt*t#f<rZQvSD`xX>(JR4U{SA0puycw;lM5)Y5
zw(saJE<yQHZ2vv$|FpsH_xF6U1$bOpfHF?pb@f?afDa)L`7I}jCl5>Jydo6xK$92f
z8m+72p0xRq+bcp(=gHofO>>MqekOv``(~rD92P=D(L5T;gjF_?`$rlM1s4A;D7lhv
z6cYs8oKLwRWlB<`CdLCqZWFQJ(BeKCr_V!gN@uRPA+0L$hRM_0X>{GeK*r*^J?gdV
zheQ|Gb1aHg9K9hgrt_RWW*Mi(w~o`G$Ybulkbc0<VwhRiVwHAeqiFoo`kD;Q307wE
zm@mNrV34sci6-MDe^`ugu`o^z+D1p!^2cyRjZS5;7y0QSdq%4=<JYWSu5O0Qd)%5R
zZrov7SrkD`s)8a9cju^)u!kYIpA70QQU-KWqY!5PYJ~h4ev4NhWS|k49qP0u*LrSb
z8C(y1omsW4yd&#V){~L@4q*1=tod)zjD{MIU?v=G%;r-ChyVU|S_y;X!L%*OPsh#;
zeeLVjh?)E0c+=Iy$w6svHv{_*w2CN)UYpC=gPNKiCK2Z2{$x}sqMW-+wT9YId(qYb
z!N0Kof2EGS9#<7N>KtOMe=@<p<Ug?A-OSm=%EA8M*e?Q`Y4l%Uzk$L3)-y05<RheD
zq-SEJ=b~q!fc`((#k;^s9!?3)`xN*e4^RLA{J*n{iG!V^k?X%5c+$k_{ud5f)Me~9
z84<THQ9;Z|AZpRaOBpZ36pQsxDYf%`f|A6URH9O(s1xZ|V4jTel0LL+lTpht(BDs{
zBL*qr>j;y%Fc5ewW$$nN5W|h`l+@W+c#p|A@K9sNVAF*~h|pMrF>qZZ@B4B&fvB=9
zM1gj>T@rxp570Bl_Sx+Z3D+9J6<b2Okagaw{eO@lwAATk3QWPIlvzuaGD!QXw5d%{
z6vZ+W?NzInbPOxf%uE>Xe-WN#>Yib<u-Lkl`N_H{j$LJjCkWsof-01=;*stbCe*jV
z-{ThcflURq8^^m2AUJlx1oCsC3#zGcPjKKj5QMn`fV{5cejCoHrN3Jrlhm^*b(OX?
zG(E`<4}mNoY4--wAU=$B*QB^U{0X3irCKbHJ(%I$q?PphjAE)zW8w~xrCQlu!yGhs
z<}2SxMyHXOu-LRuiTK3}(%EyqgLv*n(q8JrA!~}6$R5e+&|aGZ?ZEIBKrFX!Meh-v
z-W0o}B_D$|9u~FG(xuAPUuLSaj^)gKAGzP<60{og=@SCSz1LHD?zZhi>dq6<^%d#k
zAGj2zQQmp9AzIYDTYN3Gs@{>Z%b)h(HXCML5pOZXU1_3x3R#0%p|B*mTZWuHf2Ac(
zl&ccRvObBBIm3y-@l&iK;aj9^5Pc^Q_Pa;934R9{T0rdOl3Y%?XKAY!^o-3PmyO@@
zopbR)_j5G!W&8F$$jZEY%wKbkjX%FZQh1w*7lva|XU@feIFo;2t>8E_x`Z~u@Sb_;
zsP(y*`Kd>?ZXGc@`YK)3P@TD^e^y7Jg!tzSWieY9-oydy6yp<rJGI1j<?yebqU1(j
zBdiSw*I@LOKwf5Hza%iy){scjd1ulPb`t4=Cb~{LIlJEMbIarJT|)u~1cQ=3B(}=m
zlrUKch^vip!Uzjw2(-3(u?F>B8qwbjw8BfS2}pBtvD8@}zrq2jol&C`3O4ZK77HN?
ze`F264jUAEA;14qG4`);QTZqeT?7FD77zgdq<@Etxz#_ln3cDg^M4aD*BaX~o8pK8
zs|HNE1lAZj#bk5DoM(EILWEXCfA~`mbYw_uT&naMx}pz5|K8?&C6KK@Y<H|i2p3ZL
zu;%r?Ps4tIr|r@aX@I5whW+mTqxF46IzBus_8^W{BUq2tq-Z!!hHdCLNQUy4#Ci;j
z1)McYTxxK&Seshzhhs}~=?F1{RuU$&SU_qWi+r2`J98K~C6q&UFdpV@fB2OliPWT~
zCS_(X%Sxx9&l{q*5l(s}R-t04GJLtx)~?E_1z4X$TE~*C4B3&46U=;oHn1iZxn@{x
zPBOTphO(Zfg~j(%`}p<VtjXt{ep+o?vE!btqe?D`v|vHU;R}MsDZXE}X4lC6R15e?
zF<iruqre?vOQ;RWdRla-h7B}O&8OQi=4z*FUb9d<6D2nX1cW8mSlZ32C1@gTGrE`y
z#d!MR(<{r9pbbq7HmE&btrfTxO?n{vS4OirYW8!3VCoVlWU7}1zxa&X4s_|~8o8U(
zUFh+ewQfdGmIE~m@^11nw%-RR1Ve=wCZ4Yicy2|ck1cJrhy0)qOZAPa$vlUsz~8<L
z@x6if=qTN=zMegs+40MuBWoe(Qso*-L24PJdLZ-*d=a^cEo@XXSdYfVAafZJ24BwL
z-DxQ4Dkx9HAub6&Q?FyZif~Pvj--mib__V~0H{2EpUk!{*zBC?ke%6{QMQFORDHn^
z=xR!JABd){X%GhVo@}&Klyz*ZPj=<LUz~`RWaIScm$}tOS<9r8zPn!g*gG865tWgC
z+=7gJPfKQ~fAI#l=UMQ`yMOQ?$z?AB&uyIJlV1?Ohhuu7+UYky#;mHBeR#od5t+-_
zz<>ExWPjG2JT}?`W`$?zX6=R_CtxCK5QEK4RSkr?5{3Ct(wmj0r<e{ow4o7@CaxW~
zi;RJ3g`|^xyhL~~6bwbjJ?uzr!TaMA);}lr)T9<mbh>psAwsMSv0s-5Z$nddoI_%i
zQl+D}F-5h>?1xbb8tkub60AYWNF<!j<67WYqh&5@BR3(D1=<fRYefjA|6bMJIV*mi
zO^eQUlgIujf<b+oh)7Vb>P=)Xz>eCozpx9mMnfge+uo#Q9*#7xh&n@rMn^pMyLf#k
zX7eVD=NlNrAN`z3#(iG?LYOsqx-3hZc;YKVhLy|cGn<PGS+{H{P&1Mzm|*y(_Mb1i
zz4Y_B>KVr4dhmGh`#tpigywS;jAu|6;L(xm-h$tuNpZ$F99mZ2SCZ#7VYZkD(+v32
zBwrDLX7r6Pb_mr3ap*O4E}*jQ!fwSqAZwVI8_wr=Y?bx%|DXdLc<ClAG61m51ppxa
z8yy^-9b6qu9Bls^aA@JNbJ>tw*nOrsSwo1J5p76b+Clf&q5gFonhoa<u;56QixPs(
z7iCBxy{fPH`+AwnF)caZweF)^5y;B7{m=Yj7b#nGI?)k#_fj`ELQf{uxi%43x5X=|
zCL|+>Nh<$!v!7T>^Bs*wv@LcXeg)G?JQ?ER<9V-5a*x}Gg$DYqq}-@*oJ=OhDObF;
zfGn}@jTeR_BhxtJ+-O8KY4z8%ZPr7pKRYya(^gCvdsMJeBJDG2tLd*`APe6;5p^4B
z>3evhYaXqZe!@*s7%HYq;37x?7mKH3P+JUTNuZd^qS?;KWz^}h|8)|Cb^@bJjODPi
zA&+zzrlpcIc{U}TR_;OutroUDN>+fV66)qvP|eYXbR4O9SjQNpo1w9Cf~{QinF9F`
z7TFYuw0;$z<8LQ#8|*Y-95N?$ZcZv7t%Sm<ApL%g5&kfpEc-xvjcA$@sLMpgQbBFs
z-ZC%IbS9U(hBGAfaQq3Q3>NKyDOxfVLbsPfiQJsr9B%o%8z8kE3Jg{VdE+$PF*2~{
zxpGHtL#9h!8A-(I*JFMQA;Ch^^kcI1GNp_fO+*PYYGlcIv&7-gBrxCsBo-wUfnLR?
zXyhTmN45ID?_gsnh)>lXG~u^Ls-BU8kk&UPo+%++gE0vD8Du<5fq*k^(C>p^n+OSL
zjh9sF`^N>Qth49)TJa$VEP!W0td=@lXaaYL1o;nLxJxTD@bLL$gLlz41X<`@y-_E9
zgdN!6-~G4!mb?_FN9*e`0X=WW@3RcwR1)GQTo0lMU&I!%BN|2bmfz^$JVmJP=BoJI
z#+FyTxKk1izAz_Jc$SPKt0vqx0FrN8&i3b!{i=S{-S2yd?!QrQR#Cr!SNGly-=FLp
zd4T+b$A@hKI1$9tA$8od_f2`jF^a}H+jHdYzs~>Gz@fO~8jk<i-LYr9FLHb@;kS-*
z&)CBMMF2_<VEe!W`o41U*YE9cqW`Mu-+tx|ymfZ}E-?9nhhXpPi|`$NbC@)nP2gVb
zNWU}lrNtQFGdXI6O*;|*wN<X6G}@8-%i!YlGR}o5Q};8Z!M_F6IJqmv#-LY==zagp
z+_A<$?*O}$1Vm-t$&l@Pi2$K0(`yt<wyfUq88{>7-Q}wt%w4?cg6oEpBShy*l4s`a
zrn4~1J$tkNP9P*5EewzH)SIyi9WJZ={<Mxf{X-5|le|ARz-8ZM+XC-&+Eg`mHzeca
z+m$g-)p-L@(iRwY`S}|C!-nxv^HgZaqAOH=__lZ+YVxz>R15BcnCkyXQT=Yu`DDT3
z%blNn;<3o&$ojiq`R;gkPBeg+#Zl0#odJf8C}9!|GwdKVgLp1b7ZP2Q44tZ=>IVnH
z&{L%cDe>8mv!es6@3h7<s){S<Pa-z33M0BTU$iVj6DiQB8s{KTxpaXlJ2r7^RbqhO
zI;Qp^;uS4paTgJEFFYbq-_`Q=U+0HSPDP(`cr`-Qw!#m|UnGVptIn4zqb|4+VX~!i
z)lhMN9_k!$NOW+K)b$7oIYD`zrpJD2nBIUOzIG$N?PyqdaC>msO6m`kGZeo1<xFhD
zP4Yv4I<1ti(hHQs4e!XVTGqG4Om<4O2d^L{jRJM!4HepehSBC)1nj|#!rKku9^FL?
zbjMO^OwiMiLM>d_%0+uJy6yu;EMe_9U!=Ejm9rdh42^*v44wD@8sx3xPA`GIm*tdn
zGwu7+<SO(-hoxN4{kOA!+4FTlthm~uwX)tkIW0@X*rwn{qR=BnMSjHviGmk&Inj)1
zTVGHc-%lWitsO5MY17FIPHhlDN4<rUY1!j6Z55?|yKN4oHpD_az8k(>l=}j{XwYwI
zk9?CSEw`B&s&Ipt<4s0jcy<SVHE3_}zCudS#YBAk*0B=1_D3`_|2CvnU+vK=BLtm_
zjw_lVs}dQjNN4>k8I5O}oKUs7Un?!^&fE+Zo3G~MBrMt&a+WXB3QJBc?uRFaX8blZ
zzWw*b9{<i#Z@56hT+K<(*b&_0Q3H+&mYY~JYM5O$xMS-HSmeslIkx{0axXmQfq8<i
zrU>CQe8-x`=J-S`X#qN!5;Kz=f0Gunuuu+WiVq_&g@_aWPS7P6$nRvmOI;<;)g+5n
zBgOY3Q6%qIQ%VQ%AcxB!^i}SN&xp`F2SYeE<89{=E=srWEVpy(4C`j8vDd<Jd@U%4
z%Ty~?<4praCY1dq>DbjZlv?x_IQ>PfU^+h=d{uY5-2?ZGWo%X=Y1{N#S$9QWa0;Iw
zSZ1U33kF2eYv;+%pHnp$`{vB8^m+>mX1M8ve8clfl~~!Y0tB}t+$pWGLZ9$NSZN4l
zik#zl@?hxQYnPRRlbl99SrKqHHXlMrk^H-v0@2a9!3O{v_eTQo#0{O6h0~M|&|g33
z687{~Y>PtI2w<+{P%zUhp@tj{&!g^d_%l+`hNaVKAf-{`DnY}9d@raL1p-B@Az;Ot
z-~jPUXsm#s0Im>i{$Y3#eQ@FDRMc+W>YkWkV+0xTH`hM`Pqslq{YSv$w;3oZ_srMr
zzUVP9EBUpM(lxAd!nKuRbk=`b)ltD8|Dax~rJ=k;k#}KUIiy~vGgXN)sF8N0N7D)>
zkSB<SUN@0#E_^~V8WYe+yL+{oc=`5?hNhLQfZX0+sm$x?+<z-TP9XE)>g*=n`!73G
zHCs{PjHd3zmFw(RPHvuodJ^P6+-gIM<+OB<2-KP~wLV7c;il>}RXGM9*IstUdR^BW
zqOem+J4N_$jC)?P9*+ABm6AeRDW~~iEW`;4BT-`fmN%>nyMgbRz9u?nH6JriU<ygE
z_ZfpxaYyLlwqV@~t`+<tqQj)WNqbEkd*nuv!kc%0IsO=Dh_}OnyQ0<@(XkA2?ECv8
z$f?Yv)(xcIkEA^S(3z$4)EFPh2>w$-spmn>F6(;@S0Qc6URbyHzVVb&8sEr-$rGOO
z)J|7I-A?x&VTGt%@)8-O{QBNG4PTnIRLbdSj?8waP4b|}N-bKe>`!72qj=0XAOApA
z?z{YZiBUeb-ZI90_k73P<NTqfL-f+F)v^ow&!;{UDq4_bI`|%n0AA--8<XQ;DopRt
zXlNcguPqir5VkZf(O9f=45l~fL&oYL{Ab$1o{hIc*n+Bt{!`C;Kj3XeER>$#$V!V=
za^Nnq&%HS-!?k1*0T$;L%hL?q|B2RsCn5#DMkE^o*KFaT*UY~>CH_GWQc-YfK^hmI
zey~c@ZM^PC6?3*M;ad9OF`iOJ$4O6!naBlDA;?DgTyJDXxcC5wOhY@yXF@~0YNX{q
zwW?E}W0dI+NgW|B`6YOOH6oT6lMiB);F_B9O1t+7zlo7iVv9AZCO|sip^houP>9Vz
zxHP;D42A=J;8~xPINXDh<A%Ol&d=0@M00s14IhPyKuk*qmgPM7)Nn{iVU!7gu2Gh^
zOR8nm28k^AMK-p}7d>LFn70Psy`gPTL{t@VXj+Z_ijWK>*C{Hk8hyUIz%yxr$P?5o
z6rh%WpnF7Fr>^r8;WdmrMV8Txr#GPlU#SvGKt4OTNb<o5Pb7Z@xhr1;4RDLtG*?}g
z8kb^WY(=S`?fdR6Z?;GDi}uJ)^TKyopuE|z;=?b~j$}m;lh3MM+Puc$9uUx{FznM7
z@eA62vLrpz@fkYU2hE=E<rY*ov6kYn)?I}!m4_Vw0*Bfc2(est%PD;oiSAAfCShL4
z0Ejmu6%nUjEi`)y9@|-t<DU^hn(!h*M8re9O2$TH;|tXDWBVx+&mn4c*Fg$Ai|xiT
ztBk2>HX5txG+V{%tZRo6DuHMNN$xG3kAxq`=Od-))DyO$@KZ8D(WFZ}s=K1l)Pm8Y
zR#Qq}+HjO@G@?_cL=N#d`GiDFkoF4$i;L|P-&xamQ;~a{CJAf(4hD_}W>6Z_m81@*
z5a!Zp!e!@F!JrAn^~Tn~-4}}vz0pn9vx5#+X$D0tL|W`<C1_P@XR`vnP2f7nASXQk
z7MbGtgt(~-^Sx`Ojccmyax|jjEhucTcnAvpO&d9M|EaaN)RPEQWC-Cg^uRlLJmR#Q
z)5G!5!#0a|j7S=7(Hh^U3gO;~8Q}4<gMIXgB!2m4&V%#vkLbsZ){mcG(*_#%UH-iZ
zmw6Z3Z-N^dOHeTe@<2;<eghqlNEjmk84r9iE$l1TPA`zxsH`3cah^i2FaPJm`#J`~
zCQ1#b=xRw79rT4|9Z7b~CJJ)BhKbJ<`Rosle}OjuWBZ5X>+j#m_`6!oI!}j(FSXp>
zoLzKq0u)=F3ZEt9dAqxY$BXR3R>YfQe4UC&egVT#wO<FpkjPJFzfFWH(c?<9X?xdN
zBPfBt3m9-;GDlrr{bOWOkWUR~dFSU2sS{F$?YR?fsDo9>4A4loN10|LT3G0GHziPm
z2nkmJ!uC#BxXN~(y5al!Q|mt@tHzYeSI->PXAGT$6>s)-NZuwe&j)Ob-JT<=+ttgo
zb<h<SkezuG*_ERv7xFgLD%W8A9ToCpfZHN8opDS4{?-u?`)OTbQIdr0wWe#$e8t>k
z?9I*=8)k#?&aBlSIJsP2qWTJ++^xPeY5epEG8ZwOS9jW&#Xl^Ip?#S04Yf(ye0~bw
zrFFBc+SYnkbx%VToA+rJ=^i|&87F3PGT3cSq-Eo9QC$<Q_bT7D#)z8iwRBRzT$_R$
zz$0nbGK|%NWUgKsJ%WeV=e!IYa*|U=;-rBzjBe{FF2qx8>`tx4P9U;)+OQjpM}6GX
z>X@mx>0U4j@b|<t$iN@et4C|hP7P}06OhO2SPw*-AVjvtHpF{x{Dwh|3`J{)9(H&5
zk(RDlS{${bBrGFi#{G3>2c(GFv~|s>F<^B;q!oqkD{4{A3gCqfLwYdP(HDcniD(nD
zGTW(D3$jfb2ijD*X?Vm2w<=EK;RY>q=Mv<oB^QK7xNHi=QF8;9T9KRcfn=*f1NIqi
zhi4?B;EjEf4SSoyCfcfM{lZn_Eq{WV<2nyf=}T|q;E00dG-7ITTvIk)s(bfSEn7{_
zFR~rlNzkXu0+#b_6vO#APBu?y!%Nt3(mo>jY<~Zc+pxo`tH<GL#3Z)u{Zs}`qSu-o
z_1a-E<EJ9U76~u`4Qe*#e=@6Bb4p0Gp8@&!?M&aeTzXoedi~T!N-sXaCKzA@qAEjD
z-oa9-AB>g=j`TCBy;Rj6muYDg(Ox}ZDlvUr3R4n6Jg)F%O{84o+1m+;ypQ!q10Cyr
zzX@(xGgR6kFQ`gY(6pv@?eRDm^<FPKM}!>%7n{9~(~W_>@(B!Wi&{l<)w8yRFU?72
zx8iAa<>r45CPkn0fS(3ACLF4;I5va!e6V(kH0znf9Y(6eQ$MakfG_tf*NJ8ZW(sH$
z51X1{HHoT%J{*A}dEqTzibo)US~dyI(P?&rFHUiUU2WVM=ww8pe4~BhaiUBs#P6}L
zE^0dZtBvpFkKX539rW%nrn9bv@Gl_qp!W4>=8J!|fIZv+nGa5P3WvsFl4eAIT~Nh-
zmz1>Ly|1G4n;(m$1^k^sQ%Q>UGOy6kxO$~+9lgglXd6FyWmv;LfZ;rSqLoth3vs{@
zk#_y4GbId|BEd0S3uCW!Qk|Qyfr=09J*H~&dP!h62K4qefa*Xukd3J0raPXYuMXH)
z1H(wU^>>}_sev%fq_@;pf_Akk_Vxk448&uOnO=pgEB)$yuyS6bD+C?!wA=|&p|{{n
z=OF2<rNP3oDABz_N~?r{nUzGgYb8tkqzZ{99=PcbvJL9ed;l3dN6`*dD!!pC2PglI
zi;-!ntTdh09VcKnmlA>^rLICf%NQpgE|su-&r*+84`YW^U$z5jAqpY_0BgSO;y085
zg?<%=i?8MwH4YG<Ht)I%Sg{{ni!l8T$2i-G>0XJ#FtQdfU_1sTPB&&IHI@+XhMCjN
z#lW5P6vJ}UA3WH1<8d3>vWw35Zy{3D3v}S8LG&-LguB0`uy6BIZgl)DO>*R63+?nO
zp6Y**3zoIJyZ?B*NAV^HQvbHpM^4wmSo3878nEUu8xJJYvwdB~0mfjuaMjO(Xk^?@
zH>o!t@Zk!wW7s=HP|Byb(5*W^t+J)BP*k|qba4K-n8y`PI@v%5Fekv!m(%W!zSUvt
zHmc^6DXo1p_`h2;q=q1Ca{n}d9R892V3iCEtn95^4GjJpR!K!2vYho^8B$VqMqXCo
zSej;HRxL7KfrgHXPC7PTV{D3UjDcE7QkHsVmWoa)HsX$G;>obltU_!!^W95V_x@gy
zT(OK!Rue%g1?E4IT?vWGDjal#{VkdRK%FlT0QKKwm$i|*k%5JgtC@$9*T0_@z-{+m
zHK?w1FWooAlfGxm&I(?$1EIhs<5;=|U7keJEG44si(+!BM<Hy1<dD%J2?3N@$%lNO
z@SowIq&n$6K*gwJGvuEpd?-<%omKyg%HGP_6_kd@<G9Ee(rF74C%<Rw)7?9%y4~A6
z)Ny8&LMqWLj>;q_5_KyUjNxerG!8aiG-Y3k+hWgT^=W1E1JRL3Rt;lqG#DbE(@S&x
zqwO}Mx>s@aitce$x~vY{A>y2A93+dz+(*c1-Kg80?8wf<RD9JF<4lUm%ELnCKY`(V
zsW5xmxTx<r6TeZzrJ)lsV#`%^Y0;?_65f5HmcZhnY_|*{z!;jI)Xgq|?mQK#*2_f!
z%OYqobrA>eH#fW9e-d>Esw)#cp-fMyRbpB=Q&>_bo5d@hEgzIv_1Gt+#S$ek%z$8N
zA5n{|3u(o)%%2s<es_+g#YVS_#ruqir3HB!vA3LOmFJ~Zmi3wr+k!W-8^mOFrCt_C
z%0(c*RmjF@6Y+i4L^`B?uBgg*N}ygG%W0uCR9?5Aid8%ma0+k}Y{IuD`Zcx^;LIVp
zksRufuT2SsmTQFXpz&cw$CwxnxN1{*=~P(F${r3!Ag&aGw#>l==LQC(PFYw}o;0S(
zsqhg*<7m+s5{w}Tfz*c({k)cFR|ZKqWSua}{8Uu3lcYOW>St=ycAy!nv4T~HC$I&)
zP;Pk8;m@uz^x$ZyDZ!nST$H~)tFl!Ub{<o-e(_tPP&?2%jsHn!Sr#oHW>zRfR<)B<
zpa#=UmGEHJp|;nk(9#Z(78`D)N?(JhE(nGYQDVq9NTFQnDo}Kzl|ehU2IJ1S+_A!w
zGb3q|WGcGT3(!8%;UPp>BGwK>mefVjrB^k1W>P1c8R&jk&6l?zw5rlhe_HlI;9a%s
z|L{XShfrF&e4dlFaq66EbP&SLtOT|LEBNNeL~@DBf^L!g4RH16Cgo5h`@s9A34%?S
z3{qr0%{8tQ^O$2vF)AFrL&|+l?EfZAkuuJDM{S2u>MV1317ao03gXckcMF)i{L}6K
zZ0(DrhyV47>HTNzYtz6d;!xvrbyQ>idw+lU?ExA8HIMIS)e+<Swc24fH&0Kuzv^_?
z_v?A}W<OqIH#bN86vIN`?^&DYek4Bbui3i7s-ookbv9Sd9RbV$A9r8Qo<1j68jgwn
z@B917uaMiK$@^iPy{2uvPTxhIpZj5t`QulK)A|1O`}?=Oi?gegjGd3d+3Py`TBja?
z*VW1!1hIfv0qhSYI<+4ZTDcl(O;6Q>>>NW?4l%7@f}KTf5E)SYG=UI@$^MW)pnmyc
zkKcQO=2+YXLG7f*14tmx=vnxlZ*5lKsA*=IHCCgUvbvMwqGQ&rp?sjQ2(7WK5q6~U
zsg$Jyu#Sd0APqExfGyH9Q)^<2EGv50umtv~8Wb>_FFgwBr|(<2l&w(<S(Rg_22oLE
zl0jKJHU*Yto$==WFLP*xVv4vL=KMGWxd7Y0>kfI(r*#!NV|FfkM)*zPvNtT`4;|`f
zvbVFDl2C?IvgSRuNye!GJGMt~SM8vTLD_=dI?GoG8Cpsly|+sV(3RJDoE7*t$Du)x
z5h&2}V=P%}!;Qb}H5fhd&aUyeV;`{*+v2DkK_NLBMKu+5Vl+*>f9Vp+XAIV#(9MT-
z5Z6?BMJFg2sR9kmjg{z+xC{lSOVq{7*Kv*n?F6=vapBE@;<bf_lZ0!k0M3o*iF)2(
zY!kc|Jpw~d1$~HrmGM_{W}SQ?xtFV{%v3(0w5Vy-w7wK#1;u2UJFAOGM9dC`#N2-`
zb8_SLidi}W@`=6uoZS7sE6SWmh^6FYCcDT(JF3+{H6cW`AjE4ZtsM({R0%Pysa$20
zQo8SDsenTis#qbuDq3V`uR@5T&L3zyF%1mjkZv=2V8~ohkm;Mq=~$Mq7U0bR>WU|#
zH{wu?0`PYUy0Hgr)u2`(LBPz+t27`%L}G!FMulyk5*i_+49YU#?euR@n(($T*28rS
zvvEa}O(D0V8X~V_a&DBlaJ)u?4p1zy#&$4?zno^HvRn)Lx<Ie%@ZK+Hf^nd>A!BC~
z7OUIJvM3c>Fs}*E1cD(#EoKL{zw6m_idWh9T(p9o95uj0WfFzew*qIqaLbGF<45xB
z;9GeAc<pn;gMq@uLXi;W0@a8UfCTO86Og9{d0F^?HxR@b==bhq7=tnK4YT>w^?^{*
z1=Jki3zgBtc!yKIoo*|fIpE6dIUvso-WBC{q-vvjT!qW7Jw7%md<8UzBLEH2nl_oy
zhtSdQvy8xrSsZ<{WjxQ$3PAC7-OXpW6i?YY+oBHC1(NcMszZp!srT8FKS|<VU;*aa
z8YhgRfG<X6<nB==qWdGd#9Y%WmlY%80BY10loC(BII@vl(F&D;_7XrQI#r#x<%Ebi
zY?z$3DJY4~ghT^-1V4bwbU@%r_%_N#V=>JCCic<I_=-RWN*RszJz<SDCq?*Gz8hBI
zb{J&>H=&n1qQhGi?l(#`-C+j*1!}yRF0QIAT&ANNV1TfHOGB7zutc?>5)S5@^uTN}
zP_oGh4>V7Cg*1xsaRJ?&=wBZn|2{Wz?F?W>H|R*8pN9%Bm_L4=l*cKPwC`gL>iG&d
z@_SpB;sV`C+7OQeN8kj%Fi&~>f^jFSPpqaSbx@!Xg*0BE4*KT1dl8wbHtdyeBr}X2
zgzqolq;nn?@Z+X)*0w3D{}CfFMN4hW^l3k?ltyYnT@tKguvH6Oq#ib4q5;F3Z*B^{
z>bzcP;5!KNxYaT<`b9M)a)wmT;2DOP`y9LPJ&tzbMFeyqx^2(XyB0qJM;7RvP<lR7
zm)nQT`a9MzdH8lef_VBmowR1IeaqikAJ9H7sfnZ%ZUj+}6SlANEx;7j`Hku}8s~tS
zuk{5CgCl~ik8j-XzA*v~YLm}1q@+_FLdX1>Go&<YRuUOumt+}xrWu#628P|OQXxC8
z-Qx2NT=k6Iqh_7u4+@Tt{}n~Km^9CKC0snAfC#6*D+t?*I{S@%(}QVk)_a%?oVrVe
zYjb<vSesioVv&^s@Bxqz(uM7=S(zF@)d{!<WR-CAp@(w`@XGZC+*tN%u{8Os)6=f*
zAVyc$ut}%4a#-aYRJF=_SU|83U1-dF2;_$M&SqaWD+LT?2ZTufK+^;$_C^tw`iz+k
zl*)8-vi7&<hAR;Wfn5Lue5j0O%?@#P!!{XNlP7s|aT>td$*!*t=ym43iL#3IHvL7Y
zL;@9W|M>I<QQaFPRYIZ|5iEu$lATlviBpgKp$C%flGvPR?eP~;Q`=G`MXuM&{Z<h>
z`Pxxy*^&=(4fGn0AN%5CYBjgVK}tOY(hrR=GKapWp5Ho~MzG=&1a2_q)6iNGUac>~
zOYR0@kkfA(qwrmU{Cn?J1mz^xUR$jyYBh+6f>BU<wbzl$)xJa_Dk7@l+Ke9E=JH2{
z^}>V%(Od9U+G$`YG^3euEs88YC~cA%a8=NUUQgp@fvU_klM<`e1si3^tfgjH1@wX|
zOAZ2`7$s8(0w)~_zHJi=66NDN*_vKaVRHmEDj!V})INq|1u7kHQqKTItIn=DDvR)P
zAVMbXk;wWks2T&zL^Mf^DxuOqpT;N{o3J_{&ifq7<~rJ`uI<UINo(QKw-X!8*1ywX
zOx3`DK$7qftpSc&0;<6cwmiqB<y$GlnROJ|fQ%=KzG2Qq5<*?X2Gx!L#{3&&i?v2C
z+hH&$T-mfk@mQDcu6d<dEN6LbvM<0Lm?UEJiTZFKzXz^`(<O|Wri$%@^xFP<jz;S@
zABYM2FSG~Z-Tq&i#8sh61S2-}cXN8DH<1LqFzlwJo@wW*`>pik_VKxf@?pf-89fJ}
zFb%8NE6N2V4=bcK9*0&Nb)<0+#oLLfJi|!R>gs6pBdUQ(YRVLWkf4WHc;DpQLN=Uj
z%8$lS+Mq-=IxI<`WfXQ1Q7#Z7(d!i9QBs_DUXQpBkYKz*r(9T_P*%GRi(#gcb1{t-
zX$%|X&j%T4l1-&<Rl|1C!S+%mhp!UgFBmf=9oCSxLZ#vM>Y&^%ERN1bU9y*Pt3h;(
zN*<n;SkWx9KDCS^<}u)ttzTBJ3WShV3yu9EhE)EIwaVHSQs;)%wSx3xK?x(q+#^wi
zb32MW+ddR<JBLjau{&=H+09O#)xfWE-YzlZYDIQg)6lerP&eWj@=NRvCO+IaEmn9-
z);X2r#4t}+cxc6{Y!^i4ot#~A73z8CwV2jO^@b7NP~qEZn|-wQQtkN=S|B$}61CKK
zflliAucqJ&+7$Bug8`n6MIc4m@B?tRwRKfm8*at?LvGze*$c26XWpTdO7O|d;${Y*
z7&r#h2AV}`wxu4#k|?O_fOq?!Y5%9|_3^iJxhxdp5jW>}y!K*T+HzswPGdG&J6t+&
zO*;c`gcV}{Z07u%t|oz)fWKQ29`^mrYTrPSe64qcu#SPdntbe%MB4pqK%9Sq<wk+f
zc^44t`mmQIIw)D#NJZ{Aazjcd+~IRCiW1cl1Lhs)HCt$LBn~YrAF=43K{+NOfYY)O
z9yBuuArzBn|EP%MXz28T%+XhIqJk}H4y%m<<g1$Ro^cYzZLoO%ZX|Z|JLMbEF#0Px
z^^fE4<W(Nf%NZI{2(DM|2Z^^)pPI0{fWo=2E(EDD8zU}QqM05AUYh^PZB-x7l)<Zn
zsU~<Bo;lG_wFwd1JZnF8q9cASomB9GrG8<KOfdj}u)_TD$AJ#H6C$W+0dMNl|N9#7
zosJVL!T5OC$O|r9vuuC;h>2(D5c^VTu1Ae<c04&8+bFpe2ICDk|NGI2qB=qUp%^bl
z^4_;h>p&_wteoGhg2h=h5WO|oYmiIqMo^xh_M8M51YU0?nLuz-p1FQ;`i-)Vgg2|m
z{dRu@9LY-?C?%(Y{7h$b$pfD=r<4BLa244?nqIx_n&=IDG~$&1=nwAvOQJ|RX4}nq
z$^cEKIsO8gRKePz3MT%?;x*`*ws^=*Vqi@z%m)PM&zOeM#L|gGzpww!m+50__(C6}
z+m)fUoqUcENVkXzye@4|3vs2|xH7Zop9BMnPTjP>IcP9rj-#!Qi=ekuvHhB;{H+L@
zp`N2|Cfa%Lfi*unkkb-X_IXG9cDW7~5LwrPh)ya68#gId6(28q!yW+$347)|Yt_41
zB~~gNXvZSpHH*2T+q4!zafe80IB&A-=Ag$6sZzt6B@5*<g1=O6HA0D40YFlV-}VT}
z8*iX(3a+&~Lbd1N;y9eXkX6kOOWiEs9R%tDn4~pw1;X_~5$$)V-SR$M1Q)_*&h=qp
z)ScU`+@C!n<LsWq>VRlVkP}PnprEN$m}<{f@a4oMe5eT&K5-<yc@NTRkl5DHF1Q<%
z79vjuOlP0iQ^P^COu@%JB{L###NMASemlyq*S_@aqIR?d!2*;vJs(C$PlVObp5l(r
zUtF9`Ss6efKU?sR$LZg1>KzD7#CnRt;q|Z-blDor<p`H9j%ZlXMqqJVV?GPxa!b`c
zNJ<+bS7>|L!g%~fyh0GxGkx@XN)|RKowl1^5{Hye-}2wY2d&x`_zx7<C!6nH92;IQ
zJ9n;98HObAl`s)#)9guvVZ=Rt-0APX5_hnUE8$&l_xMY2Ke98X{^?D*vO`*71^0t|
z4+A(kPCql&jNRjq+`-~aj<U&&Bm}WV?rT}bU*iicrsp0#NoG_3-VGw)Y9U&JP{Q6X
zhgHUyq0#u!GU&FyyS+_o=v!Q99+BT260+<UxmDH`dO7Now-7G!Q7qn_(H+$kI*y3C
zzQub+TLSRvI<@SUrE=!BP=MAa<}?=U!q8g%hCEoS={b3?L+EE$T@)gz5cS;UacR}Q
z_Gt=M?}_SL`nlb)nkzfnqcr$9<kZp_n@ubQP@gCV4l&c6dKKkTD|yr|RRe`R!RU~g
zoTe#e1Y~)mQF^UsiY?q~T|1{R0~G`(z+9R*hUIk1p5PcMJt2zT!p7YzX}3IKf9z@s
zX3T#d5y0)gy8d;MsJveQ0#Qg=kmvnDZ@o?9>*mHLAh2if-qWZXRef}H7!Ua^f_cy6
zRQ1;H^nqi8rwzi}d1pj_ofE$@46g?|?_F9&m>rwh5c^AhwlMjAeDXfb2qoFl9kfn=
zJkII%I<fWk>NHHBzS5~Nks4hd3vnH2ejxem4*wj1JK6nZ6^>gQZeye()}hF>7|-(J
zBM}1qoS%FXsmh{PpKL!QoF8>avCRKYNCg7M3X4m7JhXhv=GM0d@Qn|LeIGplN)_)|
zWZbdz8xSgZPHT{HK|BRNQCi-eLz>nhkB<%zX%r@Rjo*)dgy`$sjX0T}?%;>ax})97
z%*6_?Oy_$QN^fhXdymg{_e}3#-x3{-p4OPDV85)VjkcZpw7a=@2$<JvA^%zS+qwMN
zUd=n~@t-&PqMxx|R5FSQ14g;^r1p1bAW!UL%&{JsSi^Z0>`L{*VdTXQ-^*!&B76H=
z1=B5Ae<V7UmX@3)V;Uv_e{P6(W}a<1+SM}>W4Xi0JNHn^>dZVa2*k}ccb}JT==R5W
zyezbLe3uEUw~As*F1}3S4+5~2n2@G>AtMpCC%NHxtLlFG^8ru^b9B;5Vy2<0EXcOy
zCTEJ$<AvX2C^Wn^Dy`mKLbk!mi}-lLiWu6&o3ni0@X1(fx*!WvW0bp!vSB%2yZ+j~
z?4zTB54LM)7eSuw-LpATepU_9QX&?CTXGKU^YJIh)zS=y)yD_GWpXTJg}7}HeKaay
zw9_pL9c$-Nb0-V$@m!OifD}c1G4DZQ{@|Qf7PdJq9JZCXHO|BK(^^7|HqGsjL0skn
z2l%F~h)Kobb>ALInIB^-qi2&qzedI_Hf-C<uei*M9c}}~`n>B_1TUnYouvIP&aE7k
zH~F2GG;Q3Z6RUkt^GVS|^I~=F0nbsS+@DB>*o>oon9izY+hJ2phhuttI!#=g<NS_>
zzEXPGERn)>g}m#^!9~6Yc;3Rc7A;xqgaYc0q7*E<`8&=*F0HVX+RxtD7o5E2ymGgj
zTAdQ98usBP-1~u1PvT(ed04m=N-0lv>Sdlykmp2P`Z<d8NWhxtw>X_N_vCSCba9$D
zRFkp#VUC}QsVP>qUSlZO2rS3)hRol~d6BflR?NaZ`~J?8YuaABzI4YZOvwrXQRuyq
zGO(OIIPDmt8!B*Li3#t|X|w(`wrx>SlH!$Kb208GiGzj2zL;NG@t+!{B73u1`S2)4
z62AM#ZL1pd!~2^tvpU?JuTc-SQJ+1jL-i<pp!vWZU*UVlTCTpXAKtz~^|A0l9$ekR
zOJwx)b!XHtvVw!Hh0%*R7BQvdeO+tz@TT|lDn-A9&w}fz@VO6c>$8$3QBGgV5mC-*
zwLp24L-0Q?32r9Jl^A)~Pn^-}h><}wd3RSat`ok6YaKN0NtsbjN@FKgS(XO9#aVKy
zLR<o*xdsB(_BSij#l-ag`Jh$7x45gda%Y%+SQ=qZ%I}GjxH<&H{7u71L@(y}D3;~V
z#9D-KT@TW9aj!evw&UjC^&*mVaiTX2a+M#WTn(nuk|w*-4cQ=li>g1s9KaZk99t3e
z)M*=v0XMc9%={X=(^;H>7HEVN5mD1x+AWlBo$pk8%`g8)?mf}tvAGDW+M?FG;#hZT
z>HQ?_#%u}ZbgrV`@c`?*((E<#b6Ku{;Y-unuH{Z+dkc9R&cnkaFLMsQZB|>0&FUTL
zwuTJ!pcf{$4L8Ey#B<xx&9`cWFe098dFyK!gu%P4RG>TA>(Jq_r-V&Ec-yg)!v5on
z-nA*0=FGt9JiDRPE@q`|Pf7wuQx@0taGgg>i3lFkwiaf+-%k~8nDaGtk-|#P+<7`~
z{^Bo@fY|m>!5IScxd3Lq0}!(7RNG1Crf)-O9}6toQ%SeGeI>RR=LE<mXK2h;Ie&CK
z4qvIKa4ES(sq@sZ+p_MYspzQ-&&ThYubfh?B5FLj14*|k&0i8{Ahur*YD!NP_LIj1
z@+)@<-_41ypT|GVuQii)xLR31&#23HqL_{%39S2>8GGz}A}Y;tI;1V6W^E$s0I__@
zG%#RBXvE=rZsT2}UB^7r$1P@s9q4@0U2mLuyF}3)28!8do_eG`@JMMvRKjV4_e@?2
z0UKOkPUbWkk;Ew*XSIRL$U%Sst>X?x|5i7>3w7j&l(mG+0P*tcxK+GAS9oBm0yhwk
z5kjbQk#Gsc&N{aF71Y`aP^J%?u44!TL><sqzqj{72M~kQBKN3TwdP_U$m_A-ImK!m
zhxfP3-rb2jWjT2<c@U}QSz8txi0Yhdb8ec<{K&8TsKjMKZ83XJn42S@MdBAGkrwF0
z<H%3s?mBw?nHj`lf=-q}?*%1f!}71c7klfEws%xBgJ?YOe=kG6)b%KRry#-of3{T>
zseOs{7zoyOMuKmA#msbQ1tqm_@$RFQ-P`-bL4On{JAZ6%uWYN-*mAxHYe+ATE6vIx
zUisdz!AmhVSo6!ZuQUL8V+iur?VgZNi4yB4aKkX^7f_1)^2#n49D*rt;als-ig`UP
zEKo<NKWxt)MaNdp`f9I5Kdlo@6F)l!5rd0){oeJ31#=9h`+SLEdW5!M<gB`!Bm+EP
z!N2jYC+=EGp^wNmg<`no(~x{)xIDt}Ve~GuRc8S&vvaf1tm1tJ&oIY}`OU&(=8Kmi
z`GJnS%@#E1ew4hdTsN9r-~@9$6xJj-2%CMzO*Pb}AzD64;UZ=LUe{bT9Gyq#snJ8W
zxA$c9FLy0say2<eaVc6ct8wr^Lo+1UJ-DYl+;|=EKd=f%48nj{{p^5aq`oaQIaqK|
z`-9Y&ynyqr;rI7+p&iJS^Hu{;`sBO)M<RR;?4O3Jc&W31<1^s5Fd#|Do5F$w^VRga
z<>}ex!gN_4ziV%ihxAIV`1ukU^_QdJ_eYG=K?enRIz4>A(r+vui$9&dZv@f4Mt&{}
ziR3s)MU$~D9kY;ExGF!3i^+@o=l$<7i--IFu-)rB=@2#BeyZ7dynIxKB+)E$28uq$
zP|Z7sZ>K*~@u<DG+oy9^D3TJ0W`It3WIIP^(sr{A$VhW#2~b;2G^*&UZJ4o3eOJ14
z%PC;Gqz;i^oyFm{Yu3GBv9qgE>vSqMK^v)hyU|knK*sEyRCO2k380Dgri#sN`nrqv
z$08n_g)z&v!c36Ejv31AB3{mUHu<V;EVvJ!al3+UuO9fzca^&O87$1EmImg8B=3t_
zZXU+kM_<5spyAhn0RM3GTlP|Nd@0B`obXxZtA#(61)m;;alhqZY#7I?<qDnkD=KDa
zsMw1dM6={?Rto8o;WN483u=YGEIu9ispAxB#_H=WZ|RsS({Tw-#Y>&Jcf*=gL?Ohi
z)*HPa{Hmy1I@yZ*U7GY%NIZpqJP*WNE?6D+vTVgfnLvdc=|?0Bxr~DvcQJ2aL@j%B
zryZme!u!{Q@BCErk6`%uNVmf-C$n@~;WRnhK2HAm8b)?u9V!OsWMuaxrc3;B9DFNn
z%h&8E56q<Pls=Ts_e=KvB_eTFba8F09e;^7IWvQlim`t%lhr}$DH)X0$Y#1ULre+_
zYW3n5a!jI>_NA=?*2eE&UfR)qh%O*SFj^yD<i)uQ{4wksU0ddGi(4Ul0o`0y5ZK-3
zF{6P>4S=~R{N7nfU0cb6HsoQ(!<>f0zg8U)rf*cX@S;eExWu2;Se6GWsKebGZOA+L
z>^IFR_hQ5n@6{i-Pn-oy6VoQFKN@cC?#M`)k&o`g&oKD{$7h3($d8XSvFiq)X|ayV
z;p67z0h>SW-*;FN!RNIi`ni-if#^%C)B)#?x!V@0FB!d;cEYRyy)hnln`N|QxC8T}
zS$_n}zzv{<x%4MhT>lxI%Pl7|a=Ko>&vnA@6#In7J!c#03ZCgCF9NK`HCU3!&ovSu
zSnf{+v1^p;!$qLuWpC{hMTeM{DG*8y{(Xdsu%Z0i^fnsVLx3tYEKC0rWXE28WBuD1
z1ZVxm*yArSE41O}eFQ|-2&r%+=@zA7rJIHbXU-hqUnfW)!@)e-Qc0ajBh>|VI+Y~g
z<wiBnc6T1jjLWU2<H@KE5e}Fg@gbgdsk-OB!qQN>J<B#LB2jfHoX%|9#pqxUCpg+w
zmg*{NBG)NI7SU=&%^-rV3s23=Dg<>Tk0HU1_<T@t%om*X<`up8wn3toD>B#+Ird%Q
zD53Pg97t;L!6^~hz}WQELXsf`BThC^i$^~=@qk&FNhYJa_%ft9lRJ#pq640&KRAQ5
z``$G}LUlx{Q6q)$nYqg}@s*$XEd>4V<vm#qQg`Nv4kaAejuOkgZIFZG6AT6|`0`!U
zAbnaI!o8!_Yr;g7qOGXl*JmMD*+%A3L8Td$eQl$;1}EpZ>~<ctdZSZOpYW-Y!I0Dl
zP<EKWD_p!(A4%DdfeHf>iEk6{SB*Wu7;}x7RI7bTlO1ubMGSAlT|)Gc{Niud7r2OD
z!d#8yh^==zx~Pph4+NBKk$Y}x`O&z+9)6WkrJ2HDf<eY6l%pzG>)t)xEWs~ROAv7a
zM%c^jdbQU}!fz06wV^h_@t7-CWMgj1hB<H$0|}L@!DD-h*wik;G7hlN-+A*D{QAmw
zK+K*+O-&Y`ti6fx+<-i!4ldo<->EI%agZ3%#MWN2X9SU1N_F1pOEY|pbxtIDB-$?d
zzY`tQpUKX})J=E79+9)5e)Y2w!wGsq4pyU6f&ba4=hYVn-CkA^Phv+ITq1TdxM@e?
zN5;=U$}VUi4Ha%kv<D;c>6b1s4FEu%3w9E^{$$LY)~&)gciSW0pU-SCRg7cj+y~zG
z+@(vJa^Re=4A!7Ok=rk@o1YlAR^k^;Lbz4|-1eIE`_u-%Z=x@;`IN@cw<0suxI&6v
zXKggdv9(lWkpLG9wyYQ23supptin0YI>&~W3^z+VAKPKUHSMXp`z|<)H|i(<18P8(
zzmJAO1i~K-?UKKETrfnEfsbSdLx8&mj-KHy8(yx3gZ4UMh$MUXD6%FvF?`S%#&`)r
zqM_C(NPw&jt@;`OBa8smp7W_u$V~8Y`T%K$t~1T2ao&S!WgMA|Hc8I&N)UW`*A#G$
z*S%6VoZU4m^P)4d%ef(yM`qx4V?CIpZH|TFmc8a&FQ^<*o|5$nUKH{(0EKFk@{m+>
zRK$KqU^$3v8!G9YMY&8j<)jMG=Ws}{nvSx@i<x;tG!#|^SI45+Gyt|kATC}{%5}Kw
zg@;%@9{TF(W_oZy_$~xb1>Xy)!g?*J6fc(V$7RO-S@9ULry9lsLudqGUB>9hTwbn}
z1qD8iPK*EIYQ6wG9i6_$dO%rCik(RE`ts|#!<q&RXg~v72v3AwZfKujSR-$2_@W8O
zkw9NEBvJ~}2tN7+M}|$F$6a|S116GC3M`s|t7o4XsA_*n6e5bZt4L_75R<80kG%N>
z-Ssxb+%JJ_?(PT3rtY_(t<33$mNQ?40;6&W-1SacG(Cr`5Td5*h<4$GbIBTh1BTe|
z`q$x!!xU(NH-TUv2<z)W5fp7p@WnA_8?nVc-|Z!)*xj6eZ6n((8D=VqZE#-W8MgoT
z;<0}_?)1}``q}N=(d-`q$5rGLNJR<>oVENMr6<zet**s89NhybDTAL-g_;KZrGPI2
z(@P-6V^lOoq6wS{)N7f8K9b&i!a=BV{wXlm4pIrN8930vN7^BH@R1jQqe41%m=X9x
zdhMAtyMhrSUyyEY34xQdy17oVmYRV&47l82N+4P31@*5UqvAMBIvs?kpy;;eZlD#%
zt$f^gf*G?q57a`SD8?AYGIX^KiFer}2wQUt0r-iWBk9w^1jSr~8x3?~7(X&}N5Y9-
zb#@=KdCdBFK4_u(*gm<}QWsev8!pPk-PM1SDeVwfh%%w)YwxuLsx<&9E{)h=#6h#d
zwgE56TI$^VZaf-{1M|tSn~<<-5vQ7A+p^T%6E(`&T`}@9%8uAkDwHlthoHa<M2O!O
zdtqisEkHf^-JyJSQH5j>Ry*XONx&BIADw@LkwuD3voiH1l6Y8rEPmj64m;?R+OupV
zWwTENh?Kqt*uS|2!gi8PaaK_T6CSy_^(*5b@cPngOk<8)1GXf!M$hNVa$KoQESSw<
z9^WWe7V?gX850x4%3)!o>M=F$oM+H+`~4VtWCMvA`=c@R_OUSE+MhE;^uwo%4^t1_
zhvePi)%NH#IIeBx-9qKRn1~%zn@+@K&C+l+2ZKRa8Jx@5_s(YIEx;230R5KEDKF7v
z&=hvi!j=}h_esj;jzHK*kcG`FytN>crGR95Nq_gAve47@&CMMqA@`-9$nF%Sj+Rp8
z7y%vFIE4(9xj(y4vK2VVPJ64vA#fMVO`Q)B`O})lfPgT-kNM>yPIv8@LY#Zcm+Nvl
z*{}fpLY&jw9Ski(E{Ry6nB;F=Xx45dFSO2D90SZ>^+J|nalJNFclY6wbPe6Hww-pN
zR&UG2lzjHSpS~z66D~e_K?cLC`b{;&l#1GsD;Qjx%W5<ciO&u<6+4VX`BTy$p&}HX
zAk;d*d!z}iC=nem?Ky<Wt5`aeCs@XZO?gS`SKSuK0ivv)&mK-Hgo<^osW4060v<ZJ
zC#UQuJ-5yOsU#L`hy88+^eiwAHaJ%}B37%S#lW!_{`|hf#}D(TqCseE-bur~?GT)S
zXT}rXGe{_Za|2myCpJ|D2bXG818KtBospzdq=2)qj7!K(+jn6Y9m7B{(MPj`Wilu@
zP6^Wd+%)-RQ@bPp>JYlwzzPDZf}8-Q(6B<r;MXx-h8hdsP#GdzC6p|&Wztr%x~VPn
zA91A}=8aGWR(dXpbaYy+kpJ^`I%ej=N$uDsUsu&aB^MDrc4Hw90&(kZtdYt?v!R|P
zFg47y{Y7yzv@fAonac?4&xBjBIbe%UWKs7)$C2w%{lSiJ+Zoi|(39>8r1-=hdY#4J
zn@1>i3h?USvE<{-|MD>=)J=6{-H)-$us)^gXEg_;?#U6xf6Q^?amhg&gPtfJ^Xzpa
zPYx!0_dE$>1$=4*8J&E14Rme(Qyc4^=qam-r_5Np<Z07Y?|sJKoL~RI10O!%-;}UO
z30A<89e#`H9MKsftTJWLR3@vGok{hm&$)x%Yj+z3S|8%b7pdA`?wucexA(p9{2LmG
zj)Z^1A{SM+c{*G&DgqxG&e6fqiJ6TbB)Yl$(}QPmY-zHZ!Y)jH-}zxy{V)r{ecGc9
z;L{GJa?bz*?-Qb}z~9ItyIO^;q|cfHP28|yF2LrvT0ue)jxth;RAp~=5k|O@;bwcx
zy}NjZs4Tis<i?@lo8BfIkmE)K-CfO9MF<g-pA;qHdchibeMP)i%-JO9b3aYYdWk+f
z;O<Rh&q@m!;Vaq<wf>x)At_~S6GoGF29M5p{RYl<pb1Yv2FW}n{=ufx40Z^ygHR>w
zV0A>j%Cd^!swr%8>j0xiEPNGdyLQlgNl*q!mj*9C)s63sMG1LGg@|LA)|VA=V?pX2
zG?fT!+^TjqQQ0;}_PKPL@+WRZaQ(NsmxE}%d-0*suwUL60W0958PrJ%Nl8mwRtF0t
z!KP$S<C7B&DlsE>y7Z$Xmxp7G0paqF#f*!3R+?XQ;?dyn1!oTPSFsSQ;5|_&6M4|r
zpmRtGxmuGXiiyTRhYW5$oKKPfh`6xt-=;-T3Q!+eIWwU$<`I(=Ga^8Zk)8*_Zu#@E
zq1vurJiC|X*}_OKPGpHsuX)Q-mKHdDp=XrG**pIzG@wloZw1A8*DLu-Lr`mCX0%&g
zC$|-+8AJ0FSz}PAAS1Q@Gp-NoNJB;hGm`&d+WFYrel$4<A`1XBmACo=(we!3aJQDu
zG;A8X0M!7;;hD4Pl2U1d9GcaXc77QsXe2z&uUB;EOCB61Tv!E;m7$&i1FlGxA0=NE
zn;aIGSmo5RhlU|AY=pXVLHhQn=)$zQ1f3H#nCQgxXqm3olu|CC@Sv!^W^NH-yi+lv
z&Bs!-`M(0aRJd50bY=lR)cE*pMlU6wCTY2!<7|ewGRdbJJ`9CAyewDclwhd4(q=F2
zHPRIlg$tSt^^EZ0B}*|OEo?+c{K0Ow2(_Y!qaaPBRti)u;p?I*s~9EI_5iJ!{Q#sa
zu@<^*Pl(Mf7M6~7m{x|2<U=HPUHos~yW5LdYCqul1Irq`X9>JTN~36#YPCOpJRdoz
zv5;k7I)2^gpKUf@hM5WXm(S)2w2oLIFNEjlo9gajZtlUMB#jJ>Yk(*g6@rHa*bAXh
zB2GhICw7U2O$(YbOq&tKz{kKRv&}sGOW=(BQ2|gQ#3mAoieRId7GEcdlG#p?PxdW>
zbk3}Eg$oqbEQ&L;s%CvDvI<uCXg;Z?6o48OKdUMtpVzC^qS<-$=&D{_uP+8>Paa(u
z@nce6Jp%OMM~n4z`smY7AH!1<v;o<{0F|7c-}aNyiML^^!?1x0Y(u#Sy&{yR>_guo
z2Qrmy5vRDZjF~K|s5<i~gT$7m=tHQK<}MdAvZMfQ!!xn#gOr>c56=#cpSm7h43*Ei
zG86!aInDq=LH-#pCCbr+2{_oigNUVcO09{<g*{(@8%K3{3ED)%E10#qavZC}y^Uln
z)#V;#p!aM>usNkUO~OXY9m8Hxt>L-QIgLjn5hB7gF;oiRagd7^q5{yt%(yp@bHcP@
z(;UUV+OL5v8-k~h8Q@jb1t%g?p|^z+Y&+eEe7rce952wWM}bef$v3hPifg3}Hc(8b
zKqQoACweifYA*SDhAt>B1i%_5P#FHUUapLEQyi6$1hN{m#doIEEJ;()AK0d2-K10Y
z9I#Tpnb#oupxVH7g^Si^$@v4?3ChbMNDXkXi3>BVHnfLqj_$hen(e)O{L1{#?~f0e
zZHNBpa{CIj7<^RDPY?EvJabcGoSx@jLU|D3&u(aAQ-9jIG}qlm<HZ25%orOt0ovnQ
z>a#M-MJDRi4kDOawKu=5CFd4`*%?y<(jX*k$UfVFup7ngvLE8B?5fr97rX^Nr|>i!
z?PVjp6|G)EeQrWyNl+sJ`*`69D&b+FjLmyBvVc4zxE-V9)lchq`3!U0-Ry-+>CP<3
zglY&fD_$B7uG?8&gdl4<gJ4{0sL{UCa`(0<Wq1NY0J6?LMZ$TYVw!l0GjJ!xGzLS+
z!JfhArn!Q2)&OyaB5fAlER6PpN3RoG&^qjG+5P$PFFQX!`K4#U{&E}Fz7S@7Is7=!
zq+`k%c~Wz#xgClPNtriC`Pu~v=NmW~9W%opW@d^%%<d0pm?nFzWALi@YhpoM6JqjB
zpvB?zVM8mT)|~Lm<_puMA+u<j`TD5X)1XR}iJ6n0FYur^eIYW-RXIyE5{sp`6$`EN
z_0r7eO%(`fcD0lmBd0)U#n($q{a+x|U<BxwxqAsQrB8a8l)7EPmj#ql=|Y)KIihoT
z$(kJoH|wMD*?2i6XnZ!^L6W}K+)hYjZDI>WCfgkKKnkC&RFJ|&bqOLhrvwpt!C66=
z2!m5NPfWKJ!i1$p5MgOhlWhfp7V->&d+dH)OI-@|;2kY>&OE0ZdlyTcGSBIzS}k?=
zwYvufcNu}(2{F~URlS)EwD(iQ7Hm8ZcFQgQOK7AS>J0g0o}PSnJQQCMO{qhe)E4Br
z)X8f<eNv*`Osx}9{KXYAhoQc@UeSvUp(E(+sVnB3Hi9=kX`5_w+sInGv-Zf}!|Evc
z2P3Pa0txqS&>zRx_^Vr~l2_Y5F8luMb`a)H_#6kvY&!{W=|{zPn3xF380xk#VG%w;
zn9Zn`tV4w71%6BBF}s79mZpf5wq!DGC}c^!dtLkHZNC4atIb~NE%;?#BsOtq3qR}K
z+RYjn?loqk6HP<Q{gkq_jaWL?P$QoyvYCg+j?cuNe+K<C!R%wiKhobdjxbHZhoerI
zlkCvxhnG)wUImBZD|G#77%{`Qb_@0=-bKTm5U~W$`xzxi3fkh{n3qwZkP1`8fvl(I
zx2zZ49_X`m2A>TZFH4HI**RQ-yE=?9l<r7}PCb5*Q%hU<E1g^0c=F_)9_$|+etY2J
zJ5=J;IJiTQq0pTIQ~Y%6K{b$cx;;P8z&W3eC_m3^erq2zBt`;tgpm~#Q2<QV7lfh&
zf0Z1u>V3idx#lZLbz2mo2Nh&~H;2tIf0RvD8dxfi+%Vrl7=?tGOuN{C|AlRgZ2Lhb
z3oPOsM0Z_>X@!@Kb@KlZc1d|iuFYL@GH4=&wi`M9Db&Tzr&4*FJmAs10qlen#i=%2
z0l%V6$vZ*(Lr}WYe*KrH+N0ewW#YiCkMOPMFC~j@-B>8%Lt?NWJ->Tey?Iov#t1IU
zxu;Ysn3%}rYcnJZ36aFIvO^w}ic}ZgBw9kX{JG{6nq&?_pTl1yT}fcG^y1M}SAk0}
zfKT!T2yiSxdc>~@ol*V_(T(w6(FtmKg}2Up8Tnu!<LF!|OqonJUEQEPB^Hb^dyLW^
z7-aUPB7<=`6BHikKjXBaXHK_Ps=KQF)L>$DN3@XQ8Y_RuSCK{DzQ)BebOV4-+T$YC
zL)3V>YO-2*=DJd<@8+&mffws~I?*TZli=vdO;U73A)6!Jw>DE(rUyU^{*aR+@qDy!
z6g~oLJacvC;UDk2;F{R);7=lbfqJqt_;FqbzAZ1K%u()i0afkkdnJQ-6)(8%OU+c<
z4uH4JthN?9051u-EtdJ#RuA?udGvhTCLsaoO*z@w<d$d^mqmeWi5x}gOF+pr?0YLs
z8>kM_C+d*FCrlnj2E5kvVX5~sDB^AEyJYt2CrtBgslL1(LLVz7@5ARKurafKiw_2c
z3!@~wG*Ygg@yJkR$p~!Qh%!zkq?aW5>j#WR-!T|!h;W=Obp<?9+0vclof-_mFKB$-
zjgNdfnj6lSW$40l{sB)DU>E!jXDK*L+Z>?CbyV+eD#h3zb;I=_JsE{qh;|tTML9-h
zMA?qQkO6}Q0>F_hS+vz9JNC#32+Fn9g=8C1A>lG&tC4caFqShwfN~Dp0hr{sM~s8>
zrIGiQl2`)Gzdg$PibzIR58sm{3g?d^el=BqN^DzCO@?mI7J|(1UXskv17zGJgyaAl
zhqA4^#m~P4Utw`f07yhYM@w-DNPRk7%opovnRR5y3N7$?UEL+MJ;7S21V{lP*;tcv
zASLLJ+Y>K%IoV3Ipjot3uOA^JS@#MVOsv0FZWiP5Zfa0{b0c<xnZzt<CdCzUiMjMw
z<1w_Y)7QZ#mDbAai^^$bc9Ri0G(XEUE09EUeWzy{Of!tUba`KcUOX``*zbP|&AF9+
zAE_Dk1Con)iX&fmX~_lW7K@TKSezk5TEklQC1SYhgywJ0^j}}BR~72_d}snh37@a5
z%q$czlF7RvnN5hqT?qex@A}>Y?8{JCfF>C=0R2cC8Ie08#&GE}&8a=|7y^H3&1p90
zj&U6Op<l6sFu9#C8$gjYE82_=!CThz*zbO9?l$PFDP!wozD7iNBXeS66%U-9RyqXO
z@^;}R%<Gj|KBZobrEFA1(&-9rc0YNlJt^Q^atqdt%`Tok;NF<@5uelvOeCr!k0EVX
z69*VWo{TO$rd}TdLpn|2Uh%);{q_>u1kI-?)rZ*I_uN-)cMrLQ=%R)9Eak85%NI`%
zPg}(c`MA<w93DJ-x-sLmzm>68xUHAGG$J2gDhWA^%@J+x5N#=*_#LOOM&br__<$!O
zhGJT@UQA4TT|J0_c{=J<7nUq!A>L&pUCE<^^RG^}+r{m7aGTwG-<`Ynu6>vt6V^Sq
zIbLB9MTsm@D%FI}c>Cwm--i3pc0KlYrSY!kSU_<evX*(I_c_U|gy$8>Nb8B!<-R;P
z-*i^DJEzJH^?s_5`yE|tE_Pv0$0z0%9({gbuPr<dp-<+^$FIT|UQ&EBin~k14xd<`
z{A*vAn0Jx^lEb3M%6WK%KUuC8E%!cKOUC>pbI$DtepC=Y+_ky*usYmjPoOa*G}#nR
zs`_bdTBMqr7nBA#P}A@xX9jZJiu5>u68j)%d-a-CJ3!=MH9td{$FH`m7KwTviTzu1
z6WmxwfixM(*IH?{T&EOY`|wkw_u8W@VEE;zSVmef{6Z_i8o*Z^vl+i(K*R@bWMp+8
zc+FbpEVRF41Snt`Gn(<N23$XgZ3DpI<<i0)n$HVz|LS|qu@PU@RF;Pt4S{A8*5ehp
zN?$k_*Oeq065vO2bnxTyMnAr}8F}FQrO1u_q8eNctZHH;E~BQ#0AODS({gq-pv?_3
z@g|gjmp{!mc?t|o8FS&yf8R?prmgcE6*;kaei%6kz_3zo0K?dlANF@2Q@&q-4qo$5
zJAbyu2UA%j<+(qQEae}F<XXNTXMJ|QcYe6PJ!9=I#6J{EjU+0=Oox|pBU9}iJg)b0
z=T-J1Bak*w)FHlM377saD-L9H#Fw5c!Ly;ra`*b#+}Msc_-S36Au~9yWIP+tt7&+8
zsG0+D@ekPP!57aC_RkMbj<?{4p)j(;U+;apH+;5t{N-@}+1?q7a+?i>-f`Q`kbJ*`
zrSBSePB0%^d{#UmFFJDReEiBW!e=k`_an4c-trH67=vTcY31{RK!&Jahyrua7?>bj
z@?L)Z?U5h+1_S8z{v?{+4|6(Mw7LJGd|z?RKNr8x;kDA<%O|QR>0E~A-#<Ud0x4~h
z=Sh~gNwd)11mBzIX5|n6ZuxjP@hxhEHit5lrR?xy_ids!FpGqiT}^|8hL%C_uKx>A
zBB!I+G`N(n*w(}XK3fwEU$%Wm5;6Gs$-T$~cN<#e{#?$J!{66kvh@U|+WV)!<W_D?
zHh;@y2?8>_Eo%(*ys6*9)?b&)FwP3erlkfkBfgE_<?tWX@p8D8=Vjy(c5-}ne)?kn
z{G?qhNebY<nrGPk#Ey4&4T}giy@ig=`prDTUNqJ6D<fL(tylB?Ib;w?f)C-p=Kv#9
zV(1#cWg}NZsnxJ4_?;D3ptGyqme3rh27`Cv+q$}yIL0^TXKMYk&ps>WX8GVR19nd9
zfsBkgDpS)5H}tKowKTgizmcj|3AJ9;)4?fzmuN_RE4fo#Dm)ov-N^GMu3wC;MWc5E
zR~j&C?ejZntgi<C3EGz5l=T#xgrsd8c5zOS!{uZG|F2>B%PFZHtLD*jT5LlXCUe&<
zsv`j?4reT<2{OO{CJ_FOezrB#-@0nhMxZdqdcb={(&G~rDWIB7m9nd+%H`yY@faz^
z3?7BCJxQeTBuFwle*v{qDZXiF6_HtLrD_KHAoYRX-p$uo^BtxGXb7~dPbjJro|tke
zo1up6Yc9Vz_#VjO-{jcOm+HKK*6~!#yb?JjGk@)=nSq*;l9giJxfi8y){+k|Oqhlz
zr!Tf0{-HI-@1!a=*XzyAu%3iSV;@FuY>pZ=-=?VK5>~^;c~lG$86G};kSR+)Y*aQf
z&3>7t&jWKPU)@g<r=Yvt48ey6ld6k!@z{e3;zot>>a<H3opXoEgWwTz=m$a8nBPr#
z01OPBAWy~{h>Z7vwG1zgzc~f~hfQh*8CF`8;GMJIOxd8eD>MzWQo;1G|IOZ)2g&25
zi6&34U8be^X&$&GMq5UJ!hYIp+9Kn#_CV_kzWATVpc+3t-+8r3O4}@HZ>f9ZW$CF%
zW8zYhuMB-P0#ugytw3&is8<2hqnAV4%cSNW6;vAYlOxXWE_F^|5ba)a5VE-!d<A<N
z(@n0udUBf0(Z(s_PwEza0_ab)B<VNdel|ntfJwG@?SVE;w5XSQ9gx~9mwr5{Zj4)1
zw}j&0^x$~^0QFVoZo5_nfwyxnppIvA3MRu8b(Xue*68khCP2(0;`VGNyPzqwMp=NK
zL1+xK{5|HUu4mWPvZmmXpdF`)A%odraf?-^_3O$(Z>}hVFPu(u(xz1^dFmx%?O&~@
zhKoal5MdiLbdeW+SFK-=;S1m52B2Tf@(e-ZBum|H(WNu?@>Or+(|updn=1utczE#L
zc0NK`Dj&(}ZBu*s_*E1P@h6d)+f8(c)S~*zUN^&tgXp)VSKJ@dSvr4g6Iy1NuG#j!
z{P<+&l{ramoMy&`nK@BUURkT=@4}pF)yF-KYKB8gvUBgo&G~j9b&b8|TCJKNmf^~p
zE39xOWBgMxT*E$L-V8Z|4bvO%Z%NolS01ziMm}))tZlt$^xLv?aQK6P<-lqf8&nDf
zSmDoE;Ewn}c8PPEY?~$)A^ZAHI1iEnZ<NEGPktX$*O+-^!rmy@I$*)P2*q3kB0Fov
zZdkdlOyS&Do^pnlEvK9jwwDqo8+u!s-NQV-C54|8Q0PUqx~&kei5aJFk2H5%s;JUW
zil}K(rvX2$F$7DC1E2~~7%R7I(UolDh#p`jxdeyi8nOw|(xU!lyMDRtmBJwyV2jVG
z!C)x4L1-o9kPg{>2O}eydxJUSwzeGy=%H-Cey?;y(BTbjJ}enA%h+cr=7A7^c3CDR
z!9)wfS+2KS36a5?^#xZPJcoP`HVhl=016S4<T?S0@Y>~;g1znohk<q}2HW(<5*)|H
zvO*H(G6r}Zck3BzCy~Jc&|5-$c3@LuH$thUqyf4-lHBS@R5egqbd`qwATpI9VE$0a
zht~WES-z=5cbdH$5h##HR{uDepPR0_y%An_&k2fJu_aY=s+M)60^GJ`IjJ7bFE1@^
zKvUnW2EK@N`;akYzT*7dkj7x$0AJH0^+rryC<3$4>!>LCIr{z*3tVM5iEm*JI=%Pz
z=U*AN=K1N#-@i}f`o@=iDDMW}Y3_luDw8=={#F@^yBq@gVQL&PGBgiA=pibQnhh6K
z8<QxjZ{Ry7qu85ync{^exm?sEQHFRuds!-qH^BF=A#n>?LzttN(-mWNW)~zJkk{#G
zpX(9DOpH;DD?O&vh7}AIT^up+E&KTrt|pujr|OroUOOk?B79pnYO5Pa2G}7iL$L_O
z>yX{+9Z^IrLGox;L?FCJ!4Noa5cmb)DK04iDw}&GCkkA|uzP)cI4#LN=(5*)nnnY6
z=CbP`c?Q1PEr>p>J|7gH)%NrvTjpQSw;dh)1ncg|kmeCRN}+#=-MSP3aX*F`*z>mo
zc=ha;;Zn3*vOqpRp6f(3x7AWzfo3j{@h~&G4bpP*GO*{#=X+-d#lL?V@cg#~lW*p+
zo>k4bTvS8Q{35hzW@{9>ekxIH+gjzwMFM0RUXAD-*+lU9Oz`M-?ex6B>;3D>tQ1_J
z3D`sFE*6~6*xR&REU?t8Cf$y@T}sgC^o!1u!DIL*I>v#yNuSSqbi+07(UDua?YeR0
z@6k!}1<&>uOS5_a$+n2A2@ggBcw0uCJ98<&`uyQls}bclQCWW&zGtiT_3PL*jwT}W
zNiDs68EYXTvje*{QQABI+ipnkvd<t$$hq$Vop!WGuCDuA=817RgOa0`@4Mc;KC4#i
zMF5)Qw+~qxf|<7fG*Ej1X|?_a!ED1*8UhTpBuvTOOKRAC+wDcIZFOD~ttM|)cSDom
zA<@hVSU1$Kah}Cs*#p}}bXfVUQ)abp@?a~TJ+rcr{y4C_Wd~``S7w;%e1=M~ZFrl3
z=2b`{eGWI``FuX@_WD~jkyAazXC%+krk7O*Bn}qqT(Yrjvlg=Y`1}-Pyu;)3;mPM;
zAMBqG51(eY#9k}ADT&LP5=g@&2^^Z_lL2OzYfmG`!gRw`obrAFgIU(#))DPEygrx@
zMrrGQd~<_Nbqv*Ge(i+`G|m6eQ+4tN#3*orLZ7bL)Qn;e^LU^!5*^kqC>f!{R1eP|
zv<4R&9I({fx1lw8W+NLfY7L5q$IoA!5BE=wzdblTSJ`qly*++gUWDm}px6*OF++Pd
zS=d8;u71rFG-1NJl03s3aUUFY^p%qvyOUHZX9uTdjqly#8*oY$FO9@gl5=L*jUCD2
zbX)Z#QS9*PeFC#Q$N;6^(iIgjCAO)azknF06W@V{-`Sm!&u<1ChIn4zR7dr6S~qc;
zxlHaDdLt#hQZ~Ub3=pl}iFY)I{T7rB@&+cG+NiF|CmIaJ!uzOwl;Fm(3Rd|Ns%=DF
zb4YToh^m-h8+~5aem<vgVE$Eb)GCH9KE<`9j9#Kp=F5dR#V|Zk&9L7z=$}zk4D~ZG
zxDs+9gBAVG*6lLfD9i+XQf`P0LvFQMCB=N*n?IdQrWhBu(Iy>v5FPKTL1-1iH?Vo3
zZ%WMBdOUe}Vz(SeTlSS?zr?eqGezp`8lfs>UsAc-#CY<u!c7*voHev2+#E85pq20U
zb~KriJOBX7tmdZsul;|bk@%8t8*8e0zKm&p%YEH#+wx!uva}F1Q1CbbQ>C7g-OhO3
ztmZfHaCL$lfw+8N7BO`ZffpGv3<9YQHG~{p<hS1#hRYSGX3g5LbM7pP)|6u!ZpsP9
zO~B2^3f9sNdhKaFUb&3cD)a$9V0fDm77s>TxD)Ut;#;5_PgRbu#iDyNpR6&l7_x#$
zdSvLtU84IVgf5f{8v_-<7nLbq&zHTJ#KXev6ekM~@Sx}!BAmjv4vMETtC>x(?(O$_
zR{sQuM3g+nTsA<tSk*H#eQ1EZG{u3B{DwTwMfh~ls6C^3qk}&_4Opvm?bVZBg6{7{
zUWA6(hHl8tUV2PFazZvY?{+W(;sdwYV-EP9{nG=(stv_!uC-R=7l$m)Z|VGotf=)u
zjC|kP+}+}Et|WgM*a$bUo9F=K)mJ{WYOp<@A|ON&lLIV;d*3fUnHUal2A0hxM-=dq
zV0}FM5KuO9O^nda=2{u0?J|@l2`%k9V^V;%<brJYZSf|GUGwz|AeW>sHx4-V0MiK-
z++Y3b+S2UxG<x}@|F5q`?B!~X*JM3Lq^A*91a~>}b@Hm|+3n$?{mlh^F-$W0?brgJ
zjl8~PEwX$Ngr}-q17ETrGBxsA2fB_UZzv-m(T$a9s)Q9X^w~D3^rGQ4-#a}GQdOoe
zBA26Ue(xLZjthrXN=Y<2$S~nuQJd{Quo70f8@F!j#MAui?ynhakwXgDf`I5`v6Al>
zcG!8ir857XnHmix*j%Yw=P3tml61X(Qqt#uykpGY4E!RWjp7R#40H#?oDr~ZBc4Q*
zutT!md0UG4Lz`sw(5~tE4n5JQcf?On7QH6(cx5=|a5KZ?yxWSu;KLluInn|8Dbumd
zWMsY0p%;CL^Ku#ZQcDwvRvGh1CI_F#zDK-u;%H|0?|UEB|BudMPO42uFoJ0tQ}UuR
zfmb~t|K3|TjAgWc@H1}DE~bNh_k-)QtAC(FtDEthe1D6xtW&~=mOC{VUoiPoo_I>1
z{pUFDvMcX*&<$6e)seRH4dFrPil0I?>4`yyl%}LUFnidNLZ{_DY2C3Ty5{>BV(sNm
zd2PO~szt_;(q}YN)MKcwN(;>Z<*L=_s+(>^2$en!rF?Xp1OGKh`Rb`*ZE~}si!u9n
z&pi=@ymUtn2P2{7NKJPKni7JNItLV+<u9JuTs%17d$^($=;l05*G>H<%(k@6>J7qy
zZmi*r8cV}EC$(=0!q%4hDYVKl0SR@?$))uA-`ar(yBMSz%zL}|9zdD9mh#=%;$C79
z(dHe9+fKi+>K>}xLVc}o@$b!1`I??uQb_PDO=TC=jAjcCveh=^N_&f=2Edf#?2q<h
zNJ?)FP|c#a+vTR<j7J5F^3_D*8(0xZ$;-}N59q^xH6CyUP3cHk*{t}I*RwbC*D?@t
zPC#pQeN$sUOZsLtpBFa(nLsmj@3F@G$OK)$N2_J13DV0XCJxW8Uf}NKvZ{JT5Y-!W
zr1mVOMGMi6-~=F0Fzx@F(g?$gas}#6b~rcdWavC<zX;o1P=bSaCDY+k`UV~&L_J<0
z#?8fiy|R!;;7KrxPYJJz>3+uxceYw8sHot@E-B~u_*!@6EsM}@L1dmJJf>-$I|bMs
zAM$V3`}?w<0=6BcD#hTIyvC|?Rjq#b;gc`s^Nu+jcS_xGbZ7q9ho98HpO=Ig6VdPs
zgkd)&Fj!m$2*>cIT#c{Iz{;x<jt5zOW~&;qnb%jd()3sP2B7mfRU*%fm89&!pc=Uw
zVBGA`FURJ~u9-t8UMF_~kkO@6U|Vf!1g+BAdHdc+3dDw)7F-_sgKQKZD*%Ewop6u@
zw1ycdyp{m7AF>!uyk2w7CDfGom6{AxcPADc-=#J}CaS2mH~z+Bd9wmFMAS<t7PK_7
zimh4z1HyT6UkQd9_8Be_p@ygLbl`*(gMFtp+4_R(yKS#E(}n-S>#9;a0il?d>)H6)
zDrEwGqUn?C^J92MmZp#E-A<=Z_ul>nq-efaHM=ig`48x%u7>lOVe)GmAtRwm!^XUh
z>r~8&E@8wgrdX5GQ14m&-_=mQXUau>emE5PF?hn+jtTmD_5~bXE$8cn4cH;SHE`0F
zKaFoEiBXwbHEhg=Oj8W#GhD+Ud{K;lI$#Wg<^+h15z^dAR!pY6#ea+7%?9f0&qf^5
z!qF4%Rn>I(tg4EgOEbcqk-yog*8LKu1E9*_a^TvYQ4B9I)ASDD;}0FAqe$QYQS=Df
zFwKamlbG{>`$S$FR5Z9q&7v9;i9i%H5Wg4K)eRUgr94vlh|A@p%Y!JvKssj*<I}I<
zThr|c&_2igvonYJ&VPZ*hMscdZy-$140iSq-u6kkoETx0{r4gN%cXEII4&1MNFOjV
zULqT?Dn`v+Gca82W&L(Uz8#pybykrD7>DU~?VrurL<d<|)}HSuP22uzAH&QpBvy~E
z_l`}>0o1vAyIPhucxnX^B8^3vD!QSi$#u^>8n_kO{d5V#N4DU|JvshW1gT&ogBlHb
z@l#-_P9$^T;N<wrt;FZW)Iw%3;KR(;@g_=XXt$yKKE>-AHX~ZGfXMAY!^?{Bg6JEU
znHNj=ue`Yd7@^u_3K2*bSOa?Z!aj?6GniCwX69iXxPUVZKwYd{U@d(vK@Bpgn(?w;
zAlO}L-rovYfF)8U^V^{K4%P*1FnAIn28T}kst#=FbekH<JRFOe0Zsql3u4;*b1g{x
zoZ(S*oC`!%uWyi71%EKh1NrZ(v@*e$1&ff!TlEIRc*P|+IKmMg5u9n@N<_*kXCxN}
z%hD8GTI@b=Q|rqeJ@&ogroOsH4w<7U7u9llM<Vbf;B?td8Op8+%@O0V=jOS-Ax68N
zQP?#QBT&P_{$oC~a4W)D^wFW2Uae_~noxYylay~rB$SU}pJ}mp5&jEiZusit=-|;s
zJ$v*c##s7(eXa;Hp%l)|nX>RiYOh4%VPaaIVPgVz7w%8(+zy2Y@wr<*16ZFooZXJ-
zu|d&e3XbTM4TG6~_>GQHvN;J?H@v~Ec?jtS@|c47bGSg^W4OOsP9N?+Iy?OG@c0};
zV?SOIf4*EVR<4~xmPP#IY)E(059oFP5r7M9*2Ns6sp0qgENFsUcTu^6h1^av4{oyn
z7r_F}*H_ok_9!kg*b#9Ch3+9#7i2qP+v&|D*HoV`m|@Hy2!hN@P)H<dDnu`&B?vhq
zx04-ldF7-6$mzhIiU5UAKYJ%hS`=_q47Y=>Eoe(v4%3Sn^=x_L;{X2d|KsLQ0m9O?
zb=AdNr8XweVG@(=N3@AANcm1Yvd@QTfCmk-F^Ym!wPXe!2E}fxutWi&%xWqT$;n53
z&6?y%Cx}JC@BK(B3TbWnU2wuu{3k6s*`KzQo^oIK)wOV?ekKvi{NJV&gf>W=%g9{z
zCoi5zUpucN2j>o5>@7wCK_V^KNTq;{z>=sY6!z+K&El^Df#(Jy+~lqYuj93vh!KmU
z$x+Ux{*!j7){lxZR-^8<ph3XDx$vh44<39}?C(8$_W9oaH^tfWgZ;xV4)+B`DDdMA
zfrLQG71(K!HER}zrKuVC+WbDQ2!{Zj;?aKKl->{Qun^sVGY{HlfZ?6Br2!wHWrYMT
z)YRHlU{JaxLCt7qEoC5DkO@P`Bk^DN1SuiM<xo#^ALx;CGFielN%6g>Pfrid&h8m}
z5sTvUsfgn9skbRW=$EEl*b-(JnR!LYE=Ks+pbQT}kW*1`7Lx-`05)aW<>+zyV*rBW
zBUmE)*8QTueq%_7R+E{(W&_l$V8$dmk}bBf4-?ydFJK0_C;Ts8GWbsvL{2f^n*x>N
zFmP^qi@IFWuii9MFz2nRx7&2DN{7SG0K#b^xLe#-<?Cg2Irz@}dsb=N*hj^)xjDI#
z?GjS1Hgk}d%rjpjkSD;%0tG}2T164@wfBNi08$sc-RBd8yt4kh)}5mvc6Etrc2wj{
zr`6O0)h7_qJIf%B%QXmxaMC7KIi*O3fj`$N%wV@Tg_(p74s|mWo2x0}=M>fjfO#Hz
zr7TIOn4W|Khl^ngE{8TtD$N$#KM9h@w#1|Wh!p^(0;unL*6j^6?I<sY=#;(7UOxkf
zMKs3>-COi=?7LG9nWfGDf@4QVi&}Bb(LY`8rZ0a!JK6tccy@kzuy>@F6+CN!_u@Xg
zam%iB+EK_WJTNlyq??^ah%x)=!_|7h{6f8W^Y8uQU;V%M=rOQFa>M*?%^Jc_U=vA3
zO@UW?1#l}vch{jcy{M_uOj5Au6IV3!eFBLq;4^O;NPN}VV-GU&8e7`T5-Xb<z==)v
z?dim+Ma?M{XQH1U|FZM*lV5s%I<mxSdyTLYmA$`RGr$}krw(s}a{Z$6fGUp4yNgQD
zfFXjXp%5N({;P7fUKH!afBI*Y%kG-n1V3~sb2Y_nm@DG7{*;r{m}{4~<IJs*w6g`d
zwPI1S8Hw?;1Oj}`{q0Xs3keFf7LU201ZKQ`Y>Q@{9Km~PtpjKf7(r7bBCBHTc-(46
zOr6%;RP+VU&qi*?yG`hblr58BUfk#iRrU54427M6w#NL>_0p%Hk!5;4Z&n$P$jtoZ
zb+$_Y)Hh4)-5ikjL9H>0EACws=|r)CMd7^&fh!EUqd+48Un&P}G${6TQ=u58Aw$_K
z1qLVjK0s3d!yICAN5+hIQ=aHfb!*)JmH`3e#wss(?8CY70jF(uj-Hm)EO>2j*UZu;
zU9r5!fHt=KZ%G1}_9E?^(JSiCgxP&VXgh<bs1V&7aHO16z#%1GP5(ve^}<0@QQB6F
zf=Q#E^tmWf%AR)$3m{;4_#x3<JOck=qgMwch;S!T2|CLO=w12+ptI9Y_=r^#BDmlM
zME|7yaqY+;kpTWbS!^S-jaPCAwimE1Jct?*$LQACz=yIrrna)c3}`arbZuJCq1=;x
zF-$WdG~R&!2*=toUtBhxZ9c7LFdD#^;yUTgu=VcId;1iW1he%p?TWd&gWF}f;I-Q@
z^l$@Y-RnseMBsSioh{amF?K6<Q@*aiV$|&{FFNU}>J13t2kbZbF-POYk)(UtL^$na
zF1NhB-arEJ5xB&z`#qlq)n9`M3L)KB;Mcl=yrst4SDo%|GtPWsR{8w2uFY21a2-9V
zK)lJ<CeP)Ts$0kdThS`-0Yvtj3HNP@=3Bya(mNBJ-}HLKTZ81hWn*YFu+(nic16O(
z)`L}CI6r~&#X`<V?#1%}E8P9#;t_FJnUXhNq8fFXg{|1n++(YXk%m$|#Ol8Uh+3Z3
zQVTIWh0~7PY17e*c~}(@K@N_NOHYLzY2MA}vN%%4j|<ZpCsfS)e5yKhZR87cwff2{
zU<$e+Rr1dlC#jYllT@%-e|h+%uW2q6IFttFi~9)!#hzL9Y2Z{ES}MflAtyM1QVkO6
z5dNA|Ckcdywo0_`V8|y+sNYjFU$s+<WhEz3RwH}sw%XlL1b|OK<*X(yO{0yh&%nZv
z#SIq$25w~p7rThR2F`-6mFjIZUXy#v-V`xE@J8H<vTikJFOw>83UW;>D*y;9jm!!&
zCHIrtDpQym7U+^O!u!HaM3+9?y)|4;iJE{GV>tpoK_s4ha>`7!nYR^gD;GS3ZHhj>
ziX%ar$|u#55j+umwkHiqrv1M30Y2&-iXxG`2_Oz7XtI_8$8%XDm`uBH8!Ml^Qhcp3
z^>{D!Rk&h{d9&L3-i8KT^~jOp&C3Q8mm0CvwrUiNJrKkof~Vbp>xZ=@)9ggArC3yv
zQ%*d&?*<MP8B=wpp7o+ycH=eR$9Z7K8f;V6Oe5k3{_B4g=;()0kktJQiVU!RTHVaw
zAX?Ou!Q(-3Zp4~`a}j!aJBbICoF#s*?`oK|t=t=IJ~?<qM!jaj3ZuQKcoIzzBJ&=`
z#SJ+7@q~l&_t!dWy40$dOuy;-n*9LKKy;|ZD=OK$wCxpvVk5$%oXtdo)2?@MPB9!|
z-lF%zCmJ4{m@GOhoLE_gmjrbVi2UE{z1(^D<W<xJpDk|&Y4R>kJljt$^SGl6a0U**
z)GaQ{dfHHc(Ouzr)9X*T=94+!VszfgOT)ZoeMM6Gnuhd?L>>fJ89<=4U!8pB7e7*_
z)py>B%|ZC5bS@`$-yu<)buxiWvOhmbDOBOZN>w|18gDP2rr8Ca3m;8z-v)Fi`s)CK
zuNGz!34|y4kTNjQusyE=Y6K2a?7ySon9|L%r<=>E=YsMHf1Zb^w>G_vYP<2JH!XDi
z)(6!LH?Xr_T|WGePGX%X@;IE(mwTt5?|pe-X7daZK5jj?<UvW695SaMuw6WLB~|m*
zZawqtRq<0Xo*taPI6WR7nV-!Jw^^W^=17zq1OxVB_Iif0u{+e(@G5{+>B$&UQhf}O
zp5zUUUdI-gLmc=oreH4`Uy;O7lGA#oIYv82t0T<aEHT`IizQHm^u2nA1wA-DJvrTa
zITa}Cp6An?z($qVDH2a&b<#dD0s-mID86_MJk6v6eN)#NtI#>MC8#2?dIYVQO5fFq
z+bHFs?(}o8QSDGFAE_K{GdOrtg&rHE$!(vGR?_`ylX3V3U0n4DJcQVN{4NT$yq9h&
z%z|th4E<BU0zSJEgp){Z8_@tr!hz`cEK4(Tui(j$MvzRh*p^p0DPad)Mk00a4#XmN
z&;X-HQxVI}v49b>YrgOie<dP|_l@veUPs}=#=3r`$4v{!0aNDb`Xgnpdo}5}$lFQF
z<t0^SegMW?&zy{wM7fq6#+tafB*Z}ZLaasjM2LtVGv>qL(_&5U!^eHsa~sq&YySd9
zYzrO?Sy=0$2mtoP)i&J(ClO~K2ef<l3csW^d)4Rh^tzu=W2t7^7meTFt(Hv>%Udt0
zv4tHaek;z=-<#b@ssIVfss{1dj47J;>9AWgCPxV{O3px`5m;Ncu<1ewTnTdg5Ir!I
z`=nWwXvqK{oHPSn4nzZG3X<(!%&hn=LVDnV|0l0Tn&FAs02lEJan^{*fiFW<*@wMM
z{t)7OgakpIcOnQOA+Y!3rMowv7`*3IWA8ku_!ot3x)a~U6tZ#W4pN#!?WB>Ym$=b`
z($GyqGx0aSz&i|Ib_Sca>cBANPa-pPP-Ik4hMxwO;B8&0l(jZiAcJAEhgI%~U*XF`
z!@wpGcjN}c;}|9xqW~|<u?jJeHm>PX3y?Rr13onqN;5ek6Gh2jFZ7kPGID*5*qsdr
zB@o95Q4+wCU;r%Ocl3hj0NWOZUGj*PV;JT`j#kl|ZL_`rE|dfACiQsbTCxXhg!I|a
z=25>GO{!^Ee(%8_Y>p&cANMC#SC9H`?id{kUqT^}knu{5HjcPq)6+w+PHo7P9wt6-
z^WMh!?iN4);{SP8y<L%N;<3B#Fhl$9V<ej)M&rtqON7AeO(1hMpBM>eg++;COf+(n
zEnZG0el1(c;!_hEePK?y5k$;US(<mra8)(xb}Ssfz$2-lIkM7XNHed9%r0S2gO!o5
zx1i^>gP!)|k4_K1IQ+XIPO$=rcOYZVk^TAWZt)~m!@{P*;WVAn^g~Ky2Ca^cA(*wH
zYBpcb>*`MJW!oZYk5BB>RDGqJHdo~xn}$o~x9f>4aIu23-qxCJ$(oSVAVEO~NKo&-
z0H%WFa-kR9y1C@>z1*TrAP%;{9)<>fUN-d@qeTogM0x%J{bINqBM_VWDM`H~&xbGr
zUmTns9Pb|t&%b}p37w|p&BdfF$mJWE_%L!zC8^4m>Y0H7-pco%emy+<=J2`buntd-
z&xH4%Lz=ob^%Bupkn?bVd6}&F%B;#QAnQpJSnB-r1uMb~wZkMEW?ef;NP5E&{y$Jl
z0|XQR000O8PNO+c0pyj}(+vOsaYq0E7yujqaCtOpFKS_SVP9i!Y;0v?bZKvHb1ras
zRa6N81LP$vUQnT}VRd*5009K{0RR956aWAK?Ob1P+c*+`_ov`FFVfa^lXf2tK~o@`
zb??$k(jayhD7uA8%XF+(mVBa|#8>QhznP&(k^18}aoT2q8YpU$GsEF<=EoUQ$_{(_
z%TpGnlPI~|XYWR@pZ*hDI^Ax!bDB<;v0#~yi!_nZ7`{)_42Y8}k>$X^-ta39zvEbh
zd6XtBjJcFTvOHzyt9*tnrn6IY_St9_Nj6u);aj@EZXUDoGG}qhCt||JD>e~RzKrvJ
zr_(vg1ka%r<h|hud;Yw~p6~AMb~^if38Q5D>^Gj}GZBmViv7+nMZzOVQ<!GyoCU#j
znJ+UD1T2~_(ky4oB!Vf6AdYg8@mTg4AImge<|0rOoz4zB1l6W`9xwVYdJKj@!aU<)
z&c<8{GvO$q&T_V9f-`<4K-gG_1e&g37GNBUuFuC|9|$}tfH=<=a{t-0NgB%jf=2GA
z+2u1B{aGB1GoG!URe5LmJoY<zw%YG7`0QwnUmXA!HIWf2Y}yhYQQ+rvnR8g;Q@&Wh
z()QR;{C6pmQ1o<<X{fRcMULdT-w`*VSmf-OWDl|omc~`;B`Ed0QK@gEP$5*VWJmeJ
zV)g6iewfY|Ja<P#Pk?`O@P$O&0RN0jp640(S|rD-T<|R8D>YOk)!Mwkc5sItIRktF
zJ$-AO&k!ELCAjW2U+K{>WZsTH!>oJs{Z!1y0wi3DJeUiRI#KOM=vk#K&A!OeJO#5c
zF4cEyIMe4$?0og>8(IoA2F!iVlS#0Eh*2$>fZ#}iD;_Vwwwbt$B-kQ|lEo6qDcC_n
z5@veSW$twQ5T+Cir2H|4Ri0*EIa^<O0t6*2sAODwL=5uC_(+ej!4;}F?EnJEJc1DW
zWXt~qfE35+wWR1PBpP9*t&$1L01_ZjISKkS%+rMu0>1Z&3A7Kpo(UDkMnVW^lcWGj
z$)(7OX3<ArK`DSxfGtrH<v{@98&3ftc!nm8U=fRT55hPSiJm@wN}KdI<~Gbo*bmS{
zgM`loJ78V>(KQ4+Y?QKb1T&7KsR&mgU=YAqj+kqk8Rv=qS=tyG7n!(%@hoYVp<)-+
z#VBTv&Y<+Srl?$slr4!-&nh99l6C{^W6J&6b%qXtBYwi>Pr5xylxJu?WTA-h4DA(t
z8p$siB`hQW&4e5wn!<lfAr<afX&5du=867gzXX!Abhr#>1K3c&8Us!1!#=?r*RkZl
zDCA=k;~xYS)7p|HX^wMI%}PaCTBKa$qU%G2|0Aq<2^^lo*x(^OYUs6)Sstev+WgM!
zFAYoFuBulGD7FTJMipsjIoGtnOC*7k7r?afi-R)&C7una?v~L8Ilo7=<-lfY;SG=!
z{&X0}kdqWz0LrFP<vze^L`*;g<C#FHRZdc@`ws>NpaMIUdr86Hg_f>wD+dj@+nEu*
zh*fj^l1vY~ZFX5_?=jjzEGfheyM$-;+CpNHtc8xgjQ%G)hh-w-HEI)6CNRyaX?!bC
z^??I&2G8$nfN_V0tuE(LA~!IZCjF0C)h(CbqDBB4@&aibwxX@VM+ploxQhbaue*Qk
zv#)!9b^DO7&w1`4xA(=z-5tJM`MyJsN&OdCAVejT2C1R^s*2sOUqSj`rRXkrSO0Sw
zB_6T)ZX$pNZ3s}z&02yJx~!;#Uw4Q~@XGd}41&Sw`RIKx9E@<+1X}OIN54x5y^?)M
zV<bq1xSOW^IsXhcdC6Wd5d$s}vEMDkfPi2GvQ7?110xBHvZ*j_!5@tkd@^ao3TW?E
zX(5Z?StB*|)aX?ZHZrI#8;xMVqO%b}P*Jan9+OChJX_0G06~?rXl`bN^`j<xlWy9n
zAhKx}vo|<c+@@{3MGdlBHJ&YFd)q?g?SQu-&Q)#Hgmks(Jv7iw8JM8jT9;_YW*wnw
zNst1@8=oNXMlZNuhrL3)K@bPz4zV(HU`|)2HIO4vBYFjLLott@7NBUj_L)%dh35(~
zng~pWJIX&41r;V^foBZSG!hW%cy@v*Gg#>|pXrp{rbmwsXB;XStC||~@CzQ~SZ6qq
zB)tko%$SkWU@VovFm}k7QYi%kK}1ly1UQaYjE9u#EwX4H<<S+CT~JzOf-7@r((5$)
zLWbuFSYx#ikkA*_`u@=91i*YjCqc<X+}QaIpL|}*{DjN=;5CP6w*wc#575r|sO`AB
z?BxLpfGq&Y8r>JVEg%6VMZZh-=VM7}1j=cZKpH%$3o4avWm^Mb3hj>35(@9SsxHR{
zmV(cLqh;t*-DVR)O3;`sCE_~3DIVy^a*a+&lcp_BEP$2@`u^d?#o_zl=<Vt0;o0xO
z+2QGcJ^9no+NS##Aj0t7Z)@pIo*Sg(H{c<j560~NnRIJvJUOVHqj<%1>N-EsN)KRY
zt-|@?#mG?dL|%)`7{c|-z7#M-A<QfOuYptb5XVz8+e$jO3J7Ezg?c*#>%$LeE!1|#
zAD$c^4Iak)2o%cgFiTZ>y1u$@fr#_(xIAmB9SH77!UmCZ0BTiZ(OOnE$ih~PFsNVr
z@(fb>ji{i4vUCrMaqk!HAHIt*RW%|9kLz+BcyQvT4S`0mJrn?>HXuE#Ie;7PgfOJI
zgsi(qyA>pron=(rT(`D2P~6?2l;ZC04#j=r?(XjH?ogz-Q@l`|;)UX{ad+7`l*1$M
zc<mc!oG%&4NPgtXT*>^gvhF);E|v;c*gaap{Df#de;?@Jse=B_p93*UrVEgunTJQF
z3ZA9GI$53<w{oL7hcwv^$R~3K;pt@#w5h6RmGSOf$Qt>J)f+<8j^lEu&zE}0yZwH7
z?|ENL)+VW|RUHIo>F<?{U?ePpl<lc3VzDCQiK{PMIugWB6oc}DrX>>H7_UahR*c)J
zd|y$m!ort)iCWaQTnF7693zydx+xI{DWsPlLWX-(A~JfwN~3D_!;(7Vh|>k~>wDZ#
zYWkK;>TLqbr=QT+CrKUHi8UcaR{9Ka*$~v!7sDvt6krlzTgN`ZLL1Gd@IhYu=X9l2
ztw(!UX{wMWSlE(bSR#!uZ^+|WUz7!vvgHhyv%FQ}x3VXl-Xz2N{V3G-+?*9U@L;=Y
zkvVK6C%?s&fXiu%1@u^qnO&j&eIy(@y~X4MHPC$5!?qfBe7~qtXseW*%o!|ZY!8r`
zb5e|P*c9P=8J$*QRrG-pXn(^rPD4qUsUKgzd1m6(6r+NWyu8x!Y1XV`CZ%%c5Nlj+
z)(S^Y5<Wo&5C3e~0QILII4^#$`+DS87ovj>Cg`RKyw(A3SwN}_`jmWw>>}Ts`ZjmO
z)rz;fzO7{3gBNd}YzOHIhkRh7eSZhh8ro_7f-+2Oo6PlAqAjE$fEjvB|7buTBEP(j
z0a`eVsy)V_)1IRPq(O>@$vF?lsqRF6tlKdU%V63IM$o?5MCcS0F!ZsfEsG@-u{~KD
zBHVPrNi2_~uUkAeYwAL?m^R(bB}7?H6GnwSMUODh+#3m+7dK(PTJ29$!y>WFM(rde
z#GcPRJxcye={E<b9u{D!_vm;-V?fWm-F_^tB=cp!dxiEA*#l!JrWJbV$bC6HHlazs
zpCxX)Q4?0u+vdKNK2s-V(YKO_29}4~#O&PUn3>_y^223IQ2sArpPH+j+qj#)9NOl%
zF}-J7r7Ij$o6$An>G_QA`=#pV!KoKH!!7?nRy?5zn9{6g`Ut|Y%}&y#0wPo?!qg#g
zbCi>S{DoKkx`52<$8&V7V^YK?oN$M7ii;*~_k;qVJ~Bi}sC~NOx((TcWr7pB4Ql@9
z(pCWvQFaAuo-8~FE5Q&j*eLPx;B;wv(~{6~09rDP;8BkZ`qD(Q!!|EVuRt|5dZ?mn
zKI*-Gny%$M%wbF01M8YO=rsSA2h|FrtDTrCEyvRWx!#r=j$4M7GAI3Iddv(#>~Mm4
z)Y*5|S}0r!n`=yu%5pnTghp=hHXSeR^^f*-zCO08>M<BAuuAPmUU{WHSSxZt6zOy@
z+?F9psN0K8aI)wzuFAm24SIQ6cS%IaWTRo=b)|{*?d;Nc6=lac&FLc>{FA94$Rtnv
z&K($F$)QK-h4|{8bKKbkn_%%}V;$DnEPHM(p<{cJ!`G*w2A>q=e0<;{ukll~&v?z8
z5hF|h5@WcIdgZ!xtxZ1Tx>1NYBb%>?_mGP_!-0FMJ<7#evVNo7)mEgckXIFujD&$D
z-#h;Pd9nSpF|gY+s|o+;hR0sD+@`}^+*u)5j*mgC$F71xgRJ}mKGvWW_9uD55I5gB
z3L{rPp!B!PyQMgP{&jGNhvE7jan7E_VAWM8nDz%u-7u1bf603Ojmi_TwkR<FAj9US
z)GS2{3Rsn&Jv`+C`}Nq^zTKQe#mYusOk?b)!JhK!D^~{F!C8v|$FCx3@}RW}=lC#i
zGf$?H)i!U?URg=e=b9{SOnjHEuHWvrt4306=#^44IqPKUO|JM=<wPcdtae`f_X8fi
z4BAI55={FB;*iA_&~fNmIZ6KOtJTJu>m?q3P%!gV2j20E`$hR5Tn&_4D-eUTR~DHX
z))#yZKrp&7oUbhJTD_F2L-8JcxY0L<YV<?ZhT;?+wnT~=yR#FcMvN(P(wB3I-On9-
zW8dmMcl-=HTR{?{f`@h*JWAcRj@c$mnYkuJLj52pOx0akEq$n2d=rEfyMC()%MVKd
zA)o`F7p5b100|B0d`!xr6c#>;p@2FSGW~Qdk}&8in168_VV^1hJ{~!p+Plv|{`4@Z
zCsi7_<ZXHLbyaUzqfX3DfV*qdZMe{>ZpnLNdwOHb5q!HfS)B9TyFnrB73!ZKi4XBQ
zDqi|<?WTADfPfX?pLKFfY^}`fU0uwa-TzrN9vhz5P^qxS$juWG0P*e?0swf2^;Yud
zRKUh%g*$oug|2=yLa8&_+L=wK`3tQ<+LQ*yIj{SuJ0%wvpTsW=Q?^oR2ARQWc~HQ2
zf53bFRI`1jJ0mp?{HS*X!rwcm0}G;X1N_O%=lGc%D7RjAt8fGQseD3uH|#hJa8<sx
zYSUfqRwl}6x5WJz;5VQ*MHbr7V_-X>O`p@n*#frQsiga4-TU0qU<&QKbxtB&a^`^c
zzJ%7EbDr9%M}M<5>#C%9=386gV}wvzV~$1lm>6xh=O2?L6PmJbx{{0ra=rq`VD=#x
zHWn&r36}ukfn*XX47(&#*7W-79IoSzN90LNhW9`f3y2U7JB?Q7c;Y$Rb<(OEG|SK-
z%tfdZ=}ZY_;W}%Yb`C)zWx8wC)qUtK08Cm_Gh{pi(hkd(n}dV0rd(2Kj9X`|S@s9_
zst)X9Rw-tyBq;sHBK)Y6jtYdpRA<CF7Y*#N+-9C?U5~u&zT4S^qK<Zm?1KV%j?NSY
zk5f4F<-R`2t>pY;61#5po@L&gkz-r;GSbztliGAdj*PjzvV`q~z#uTC&8}a|bYO}A
zL_t7}%Tj4mku^a3xwu`g_4=rrNI3*1AB&k(J7K^k+)-<%M`s-N2)$L{9md8dM$;FW
z`SV7CNaRfGPl``TE13`n5S5Ll&t3W!H@+X^OY2b93Uo_hrKI+M#l)QTb~athomuBU
zJ3s85u={!V-xhqoIIg|h+ysNnyuF^?y>Cf?`9wqt>$!PE@>4e|PLzk^vH6^K%|MyV
zo^Oyuo0Y$8L2L2v-Zynm+M8BN=hYq^U7If!>YCu^w_>wnX!ox(RAzdhNUYiUKR$h}
zrR(AH7l4kcIpVu1(GFHBaTyu+3n{5emqUwig=X@^zsGu;5Fq2QtXfMh+y!tQ{;=PK
zq$#-(?&IV0Bf1giDVu8e=Tm4t1yuG&rW|Fq9ui!KH3O17KW=#g;sTOfqSpm5|BIvh
z?}S4h0Deih&kWX%w8>j95tr)MAzz=+S3mfAbBTHOEp>f!iQnsuzX6D0L0JnCDFVPr
zT}EN7YgUI}>{g}?cn!?GK81q!uAfjL80E6Pn)M;X9YWD>@Zymdjkwgh7_G1npI_|s
zf9y264-F{lIIO~gnQhJt2|&3+Ro2kt_ou?^PQU!7V`k8J{c9lvJbDSW*;&^F$u0|M
zDxjOAiIU$&mE>|oz9!6>om_jny`cZj5PxEUnS~?2MEF|bx`2`{S_^d2o!qrB?uSDV
z>59t{9aWzh6A|kdEDz@$Onz4>iH?l*6u}GSR11Aa-U;8gX3le9^;6Q=?6Ew<aIpti
z{h4DCAyF?BN&x2pc@dgfPO}<8J4`RG4lGQz&1+-IIJxJ-oG{`4c~uGjgpWWWaK2(j
zf-&Ynu+Vj<r?|DG|1L>R-6fNHyi%Pw^c}{$m9m*Kn!KuEgoeNUonjTnO2(2K*{?6U
z8aI^C#85BOz4Z}oS%Zd!;8N*+>zbemHh^mIyW8ID1(%$bxjg!cP6{{Eyqg60X}N7D
z(RAGxfN|mCr2Qe;HH%KbsgdbqWZooc%y!tK+VQ^hMyZG#l52MxH;?IwE=1<-{Gl&L
zXIfJyxW#%I<30k%H4KWbRc6YnJ=`fno6n__s$N`~A)H+u`J+|%GEWu+`ZYAIz0CJC
z;yZ2x9yIz`TP2+YsE=fdEa8h*o@fG`vblxvJ0Em#cZ%LBA_Y8O4m(!IYg3i))<#Oy
zCPEnrYmj`%nfNF-XQx-=U=Oc4$-S)CUN;Ki%|n>$s;usDoc4YrPTKI6aC?^J2T`Vm
znpp$-0%P<~A#v*>>!ROcA8xRfZAwH44C`UutH*b7vRCKwZOqeQsS!Ak+q>L0P_z{T
zN`QC<n(tXg4D`%B^l}W%<ERrQb|B&8Wx3+jF`S1gv$@cJCRU=eB?^LV>$q_2K$~bV
ziit#u8}GK%gw5V`vZR&;h3ecp`LMYdKK#<;82&jWzT;c9Onk%B2jX!U?Yegaw5~PM
z(IZ|$+wLa)zB`L>2_{&@MobswM@N{<oz=MN$fM>%;%(_ow#Zd)h1-H=HPljpy~hd9
z-)IBz80bZUjGXpv*gxV4n)6Z+qRCvl=<FwJl$M7Z(8xYK)UffRDK(u8mRjvP^Xn|&
z&FTf4`ypaZ5gHp3a^h7tT635#XtET?I^G<VazGMu+=typ_BYczz`c&jN-bDsApk$d
z{{WJFE$NQ|W~Sp0=-bSGIZH*$$umbzS~3Z;RBI8f5uaue(N8f+j5s-|^za?w|A+w>
z$IO{8vo)(iS5oQPi<BgiO7B@JIudqPRAhKEOHRwhW-}_S=EO!w$323^{BAt#*<f;M
zmAYWfLfXRL1P(qa%%RGSMQZRhYTM5+JnJ(a6#o*&=3(YK%$H%~)869EfIIEAEtgER
zElRrc-p1IaII;NLrsww9O4H~ta<M~5iQD#?Ag%X&#~5Ea7h?ybzKxg@Dx(`ftONoF
z+h)|O<UegLl0X+;tq-Ce45+q<2}rpt2^*>fLr&t<z_vEI%N`n8F0>tUEFlz%IuU|;
zb6eWjIYS^5eGqun;Ob6<g|!q<KZK_;obCKT;?bNn7_Bxn;0UyE`l7YR)ygnJGn1za
zvg_YPge9;<eBg1=UZJ)~&Y<t0sN%s1)b;WF*~b+~uUNO2DGLFTV`J$zr-yR-RrV~>
zM_=i#g%gVVqeww_%%6`#y>yBLeJ8l$H;{sMJ!&?5ULOsPPp~YB5P=F5Nghe5XONyJ
zbbDkc<!`F(x?s}kccahLwJt)+v&Gt2zW{BFl}8JDXY~1T5Bg1@j5l(B)xpeS!PIKd
zHV8iG46~#Ze#%A%COSC_<BJ~(SNG?d(UB_y@o>XLmeZ+`K%43|TlIP=hL4!=Tf+7l
z_#Ff^X0Wj_KfwbfI>y{DFFB!(>rD3#Rx#R{i__3Sus#xE&X}T9(oaL7hy!o^I;s>1
zOiAu?FwayRt0@>Km@@CE1C*)A6y3w9cA;1U;F@*Fv1;O;)lLwCugV*PZqBhir_aoS
zY$tAm`Kp))QHTv;uKR(`r_bJkh$yxVkYsOf5+8b6K<Xos+WHY`)h@Inxtx>A^}uw$
zuc0iuk7S#lJarP^e~sJe`xSs+SE*jnx-O?nB8ursk|$*Rclm8h5yaqvhjbhvj%P_x
z>fN%z={FTk5G_1tnhtxx<|{MzE^|#Z`DVW2P*fqwYxA&$+s*l;K3!F*uq}rlw#(4B
zh4?FOIQ#Y_eRvgQ&J;RpBX;TDjY!8yU!D*5knDovN#O_bS%v!99+j7M-ZGeyLS~+T
z1%WeKqzLnG6KC&bb;(Td0r&l%7%59%EgB%<71VPMWFnA6R+lgZ!33&QPKjxRPGKut
za8ZrBnnWd?_#Y$38|}F?$Rb_CaUkJ~etoWObB-vsjVjY(rhr|%B{x>cI5Z{czOo1>
z?>KYU*h6eAkMy+`Je9nK#fwU?v5%qA(Cy5j|1bvK`-|u_7-*MKhKAc{ryT|uqah1(
zsQ*3KaS@U&-wYF5f5z^!?_Heb{vCHI))}6o7`111RHbv=W_hh|ASW$%R~uq|%&Raf
zj3oGj#fJG%5vbZ!SLU4PROxen`~_ExyD!sk<;(bnp922p@eT1+pSs|N@6fiKZ_0&#
ztm$k4J~(lH?+o0{jl-ruOIpN&A={PLm3u4dUL|WuTa?nvzM5ebZ>G8T)*md%s~<<e
z%fHXMUfJM4!{X~Xbf09gu!<&NH}Z(~hI62cS*6SFyq2aQQa$Z=4&4Cg90)9=x{G*3
zLVgRPa(9OOIEC;cy@|7gY()Me#ci|TJc~jXq3<J-dgJ%WI(8_oBkU-JiJw!B1cLTd
z-f&J-yQ4eanv_W+j~El#7KL0*qK%9<Xy~zr=r(<w2j+(Hr{pr^Os1ys0n~0q$5Jg7
z=mqd5I%33k52*YPw}C;qK8mY*5TwEp8(Ax1&=V@QYRV9$+?S<WWCYn>ioE!zOYRnA
z&9{qY@Syo`rw0g&W*_+`;aOR2K3nrr;dJ@Zv7FfsYad=qPZEM4Ibh$7I6*&N$6f73
z(IGLLMSM#}GH+|(?ymst%OiynJG*L8oYM|Dwr9<M{OK%6Je27MnZkf;w`JnFn8aM&
zaIk$uvgIbs$7>R%-Xmm<0fNeCd>cJ3<#7~vR2P8hf$Lu8Vx8~7{cWEJ?{y_`;n*pT
zY7^LrFic1-nby`8OFW{CZEJGR)WB?5_z4%EruUA{J-9DdkQF+R7l;{wC728AS=O37
zoc%1%)TKvIp!#kS<EI#gSVB8xq!a0op*!!5F9XdO8dX%tlj^C4_@(9C5NM%e)QXTJ
zM0g4YP2)GWs?#zsRT*0x)=p<1t$R!w8+|Wi0!qQ`9lYpONLy%a0>Y%LWsTF8wj{rB
zR+Hs?d+rsy=If{2keFu~yUA8J3Hy@bw9|>-(y!a|wm5TF(Dz$C__-SO9*@lOVuTEW
zfGW$H(k$++{moa4)sTdZ-&t)N)uJ@hpo)r<5N;RsU3~_~>5=(G#!7_>_8|qPcy;Mo
z{adFpO3l%zwy&voj9u=Vah~gT^cc9mevaG<Br7Luvmp39*2d46COufvgeFFqkhydF
zC498^H+EeQr&IW71hVQ<Zi(qm$o)E7iL@1Hv4s?ZZyk&DnLow{u5{n+_sCkwnTk^>
z*Jp$n=k`1Q?+J&Wrd)s~O-MMWNCRo7k2!-77uN-TGOS={8TB$ew94ht#1;B49wCEM
zD?|_+t=tP;@pqhsr!D%IyeEtY&7aWZle<Q}C}cW8AEv3fQdYbcCwJANaD#{Dq~C9I
z@lXrO>V~Z_>7PUg%Cj(=D@%&1^X3?PBFa^p<TA+UH@6Z$9AxI_TK^nHx~5TXsT~24
z?W_?1-sYpcR>Ex6jlAo2O_idN2c}kzA?OVXiGr!Z4^XKVXL%xPlrJkQqu(zQZ~x*B
zU?#Y;(~q%-rT%5O=_b?z_DHCBaT7&-o#wo$n1y;mBzG+{ZFvr`Xd7m%h}~fmx%IK*
zbAqjlJB|9qbg9ub5$eDJ3vUywz;T4^i0Jc;)c6k9Yw4?&i%i$7slKiSvU$Vwmt*G?
zv91-1)Ecux^{mvC-m9r<zlfbuxld9~owJYRgAd3z`&?hGQLw2Lf@2nnjpm#wr{$G{
zu$~}LfYhB+j5TX*h4qr^W-C4r-cW2qjOz-6z<4f_@8U(EVpv!?V9*b}eTg-D-JXv3
z7DBuXFUvVdILlzyjg;g~l>$~2F7H)C1e!HRh)Qpq(rj(9Z~^iV;EJ}NYIC-yAJ5)R
zGTU=BmM?gY@%C4-wz@dMv`ecQa7Ui5A+vwD=ti=TI#+AWk_{Q7Vakk%#a}7-n?sCu
zDxR$Dou0@oOk9}3*?}dWxA7kAbqq?@eavIc`1<{Zy0<TH*DT4%{np6^>EYn+19+px
zN0`xIV5y(RHnPkf+|!1saGDHDHaOfemfC{+WyHRjbbVn-ww>~lEm-3<Nzl~>MG^YQ
za52I%Z(Tb1^VAPjlzu!k1|H5IX6{tDp35#u2B;tM_~!?^;-!m8G>Yequ4k8LGcM`V
zM%!+3x8v$RNX)_GL|#60L}c_i8@3klVg?|T`WAcqEJK~+arDEx9H>`RsYXCpu+ZMR
z2w4xfF9=Bg{yj7R#)Ov`Yg3gsM?las;DJ(r-{<Le157O-;K@fk7*-enso;alp}n*!
zmm)q4V)V=83ySiNFn_rBh~|wqhoSyr0WSt$qBW#Ak5Tk=Pt8~pqKOYTpqqEhYHC!c
z`=mRXfv>efgepP3Qrwg&dfMRoY*rO@_|YK)MD94iXnD@UCSOC&)3q;Jo*fcXI}XoB
zMSFFeN2xZQ4`CB<G(hI=AH9*qMjCi#vy{exKK{W#dijjYQB9czxwOi>f&}idrJbG5
z3Dp{WR&uq8j@p!>6xn%<m>h&61Fk2QsINp-EOI^2n?%0epkl6{7r>5^`0c+PcmdJn
zmu$y*q))1=AVl=MCH;UW8gGpvX$WG}f3`zwBI&MOsU<0hDweg7e4XX6o*l`x=G58t
zszNp#1Ep>r;}5xbb#=?Yl#<_-R{-!sQ!b>N9YQ}1&W9*zXQ6(=4c?LX5-UbUptV5c
z$ww`MRa5n1!#Q)ayi900=al4MzzzIucC%mDx<0V+6fTg1Yd;^#YjkOjFsfg*Na^Ih
z!2qga+XIg+mACQp@nXU<tiSyvsja4j1o>&A;TD4_s}PR~Z>q;h#(VJ)J9kh#(1}$;
zM$veY36~{(0eYrO_k@iuUDsR3$~yfAd5;J6RQ8-&p}TsOC0-RQEO|k(tyZ3KJrf%|
z1sf-dC<#qgUVcGAUYSMN;UE}nnbT+p$DQ;O(p_VrLv~PuW``Kp3x4M1cxP~Iy)TDP
z7lDLXIE~<fy1d#2Zad3U%nT}yx7IpuBJrCTUzz-~+%l&TN6_$$99c${J7!QBPU+}f
z&5_x$64Msf>dj!vhgp1<#JXg%H5pdC0Vd2tvQkO0Ru<7YNiCOzYvNG6dMRjG_5;O_
zpAN1Mcb~3*flt9(TN^j7i)`o$W~{LW1`7U&z?=2RqMC>9BN96`y?{^G5B9lz)s47e
zVx7y@{HNW-B)Pfem?kZoQBy5Cb{(M`FmxB=x12xt)&){8T&0Q@c$W(@mmMP(?V<Rv
zhBfXUH=cz89qYfQq#SPU7UK{MJ2+h5OMHqNv53XOCyj{q@281{y8L~CcF*SK<)&GE
z!$Rqw>TPuAf6@Jo&JNbK)9*rPY5?|Ol~*UeH0I8tcjJJK*YyA)|K<sC>L3#d8|8=<
zMG)erp3z+7F#%M*QrE#0Ci(<0K$ATn%Ib;acDx}+dD~r3AeZEozj-xf@_4Wa0qO7o
z_{Cm(G8v_?{Sx~0xU+h(Z)Ui~Woq_v)msJ8-6cEZJp4ds3j<M!>qp30XST$<uRb)O
z+Knz4?Vt|Re)&E78L0&fKcmSx_~at|_gL%F%dFG{y{N&?RHx+M_G@`DY7%k#olYy!
znx`ze8NulSQmaPX*rJux8GE)4>C_=JTKrO`D6Asx-bQPr(yk+EfzAn6@A#+;J)q9}
zmBc;FNfu5%M;%789HKl0V>^d_{;p-}C30uFRao~P<3udkht24_sCt##b4b#}(aZ9y
z2=Blt>R!n8jH1`pK74)}YM*(J=kk#*&rZwk09I_rIrPx$3q|IPrNu%w0qKXMfyzOx
zx#%$ZG_F!&>#Onr&v83{gA;atiHJ)Z?ihmwW#7=0doz@+V&8No7o?}WzPepB!AbnD
zdfg<BijMCJ0)JBR1oGLXS+Zf-Zd=CM5XX5DR9fD;zL~Aq;PyVJ<AWM~EDzd@33V&4
z$7~lg6p|p*$AK29utw*fR<`_KZwvV;Nwo)NJ)`I~wO}2Gr)SNkL~R%D3F-US-w`gS
z8RsFG769q`zUkKJc=*WBc$DxF<7>*2?~%o5oryjVnj2qc{G7f%zmU(bO3@gWGM*^<
z?K>Oyjp1}q6Y<eJO`{p~Ia&F<<-$q!Ig$^RylS^9tN|$8qP;SoC;F1+bh{!wVUhWq
z6*SmmE0wMCg|Bd;>vFL#<E+8Q)G|t-Y(0^JI3w;Mor!ZA`-rTVm-Lk4yV%fm{IuSN
zWOCO_5NTICeg)$p-xd)v#g(MyHU;IP>*T4_l?|ir=SHH=gj^D*G3*}Z=A}{K=GcG^
zcmbQj1ECNr3sX3HMVt8Ex<`7rE^qd$bGvuRXO=0YVIiwm3vch80-Z{lM)3KC0!$9X
zdw|bMGjhi`6JWYr%nJU+nULS;3`7P8AZEaquO=9%1%i$JCgiKaR7>t7g|wNKBeHKM
zUO{A@E`ycoF_oMgJV^@p2c?~a5sa`O41d48J{HOEhnBo9z=P5K^MA;>+0ik^ILL4I
z$$9^fm)Z*~X-^@KA|k?*e5DWK%Fg~}1+5_EE^pCAd~P0maWBbz9#=Y;a%6HdVbF~d
zoqOkzj7Sq};Gp7+_DUTCYNl_&4a4-C%Hs{30iD~}HMimBUPw^#5f??<-aNf?5{_`I
zSi?RM0Wu9Z#JUkB_JV@eZ=NCY#_DfYZe8$8O&c*2YjE-S_f`jMRpy_Dz#h4Vf{fF|
zW_xn7Awqt_imLk#bXuuOTsdd30e!t^`&@Ul(9g>icDQohfvzKg{?+5D0-kNAcRhnp
zh#E2}B&KfUoi~;9H7r5Ve#`I}udHd@{&SC~&P#?5B*%x&5&Gd@7027+ViRdm-|Jc*
zk=lFlLKb09Y_{Wgg!{bBEF=d3NdkrNrZxZoajxO2e;)k3gMPEN{k5P0PDTDWa`a-i
zW>Qy!20#J-Tdw#E^@nf)2l&GyH~Ho)Zh{5?;L!m9)W1=0(gvU;Na!CZ7b^>UBip}>
za&OQb3VQwj&_Mn*zKw2IRX5ta5BP7F|FQi$ln>_b_}+vSmpA(zqodcKk%}Q@{im?<
zuaSBeIw>G4BYW>dzCqyu0F-}=H18i!69+p-BiDa0J?Y|g|DS>W)%rh}s*lB?zn}ns
z88iR@{ok15Z+H3gL7H2=HQvhmZyo5Z{r?9nGEC_T5(WS;#{&SM{Tue^&5rmd*3sF)
z)#1&C`2P{?M?<JZ`L`$P@V5OsRrvkiR09Jmdn;E1ga0EnF-b+0i{X85vlakQ=L_+s
zfBlt9e<i@eKd9D5?nVX{My_TaMqd9Q2k_ecZ!he>?U(;bUiSJs-apCzX~z8Da_p~H
q_usnRztUeL{-*zziuXqTL&f{Aq=0@)AOHa2ZPkBEhKA37oc#}Cf`+#M

diff --git a/python/lib/py4j-0.10.9.4-src.zip b/python/lib/py4j-0.10.9.4-src.zip
new file mode 100644
index 0000000000000000000000000000000000000000..51b3404d5ab3e9157bdb04df97920be616b1c855
GIT binary patch
literal 42404
zcmagEV~}P+*QQ&xZC7=5+3d2-r;Mj;+jf_2+qSJP+qUiV&Ybxs&P>dS*%`TW|Hxdq
z|E%lIxK}F5fJ4B6{I_S;$zJupfBcUZBnSbBy$72mBR~}v1pLK?*$~2&+0exu9t0HP
z859Hr0^>gu75}sRzl|^;>n>)h|K2k~8zDhJU{OIpkpFABlZBbBfwdEZy~lqKB$GvH
z*z%uX|68DHb-CCL4wUW>b(C>2J+5scPilnMSu(kL!3+>NM>sG>dL@oVwDJU1j_((r
zK}MyPMC-Y8!}LS%ZVy%hJ~KvJy*e#UWkQb2!4Gvt-~8m^Mzfjk#pzYp<Cx9*{!_*V
z)D#C(69M4bM9_Y=N%khbt^CFC{+ELmVVlzcH5L5_*;GWZ@hGAb-aABa6}ohqc6aN*
zt|*#6o`znN_=_wJZ}>X%>H?HN3-)zcX5CqgxV<BfP4Dv4AlW5iX|6!6)s~3aI=c?x
zkYT;K=%nX2iEBgoy3*3*bl_gRjha5gf>AI&sw5`la%VQCZs!KQy0umTF&#a2h-d>y
zrQLSVn~&Q|f=%NUrxxsbIj=3f*NIZ)<2UPmbPMR9-R1<3@Wg;r?%xbr9B!0QD8}YE
z!Py3ye9P3om7(nOU;*!^OA(C1-1v8`hG^jXO+S?;o;XdIh}mcDJlQ51aGf-N9mvz>
zV>C+<joVCO0|ka&wGDzRr1~tWe!E!9wxEUw)BZ&k`DJxoq}Ie*L4b<%G*>AQXS|aA
z2a{s$?#<^7cWn`s9e<aHhjVrM>Po2Npsn<BhvQr0ea+3&Z^Nk}&W%j)O)TxnUiuUo
zIh?68TXp`*71N|A59&+TZI=*D=CLxx<AU3()NAf`0j>??ANm7eLFJIWHkRZ%@=8Lm
zP*J<fwE^q9?Dd=*839h$^6~{lyj2X$e@$;_|J-Ax`ovZay7ls)^mmFDEhIR9((hc!
zB*kbZ?foOT@i@QOA(fxraGbhN%mvYEl88n%mG!vezVtLc&$8c#MqlQH9@7CrD|dYB
zl9bC~PmxN+?lgj^30GXFgTg}cd7k$V9a!LLRqhI1tYP&pQ#FTLf^%PA)6;|GDOX=#
z4YvpK{Uy;`QwvNWsL^$LL^0c(_Xv?6^-c-zS&4iocNG67p1~YQK<jBeeYnRYm{3rm
z5au9-`S|iUV@w<LA5*EJAItKTapRDAU_dbYlpq>$+xC30w%n6gi?)XfFFTz3K2lkn
zO2`lS%wcX0Zw{1}M<43Cgn7-yNUPU$GhOD;(-J*HYgKkc1LZBTQ^syGL4@TzXTHW4
z;Thzp13zuCOd>2#nf>jK=UelQ?Vt^{BcK7^o_3=(LGaXs`ZAqC_KQuWmg9OQ1Df*V
zcW*P*2CNU^=M)xq^SqWI4}sY-v*a7f;Wn+~{gZ_`*;Fq3kKz9!CmWp_t=No%l>tgG
ziv%f?<vw{T-zP}-q^<A2p5sbqvo~0_NF@5dG!J*mf_^>3h=%|f)%+TB&U%fPsSqF(
zt;HDyYH7_apTY|w-CT#TJ=bWH<~O|%H6N&}sh2tzNI0x(U0!<YxyMt!1%>QS$5m4m
zpObN}M{6t4D>4#o+q;U2(Z3YiGG}}r*ZF-u&lZ}&#+3!AW5k_Tp7aFxkaN*qvlDo8
zab?cRgJJeH{(xR#cDCP9G(GTmgz0KO+8VNJj8Vn@3*+*<UT-KvfL2#Djf6E~la1&3
zkw!#EAbJf*tl%5P0f#c>Q_fGCl2osbas!v!K<NjX-9_T}x#>=6&lc6ERK{O3yL&o}
zuG#6!m_4;cymbDM>kzn)MX-ru)#pZaoH0Z#;a7Xta_JYk&E6F-4A__rv*?(u(2cAY
zj(=EQQDQnE$V?vb#o2-Ar>#k1$~ed$6d|9_kJCW3(*JJm1}+2WRTg^D9`CdMX;!5D
zozczFNppIOSrsLSIY=puAdX5_P~_$57&Q=f148-8VC<l#!8JAr;pDA^$&V2=d-TEh
z8$jA%Olxqj<%E|a_8`@oRLRQQvpr@!8n|wQ%p9LKeHQ*x2k;7}Bhp1}JeG6%?ro(M
zGfM7HTa*8^Z(lRiyj+f$xE_o*UQQhEm-Kcqa{R!lh;r(-I-TAF0J=D&I1hW15hW;c
zu1;0zfT6a+&3)qk#{U0TI^d1%Rd4^y|HA!`68yLRKiThU;^<^yXZv657uRC`Ke1n5
z|9{NW*C*j4`NhP*%*4RW!1fF7e^nPmB}C5?u>b4;3j%`lU+Q9HXJc>R{9haV|A<So
znvCrR6Ux>F2Dk}1bPd*c3DdclVv#NewN{>2K!P~4N<@+rO+3Rg{G%aZ!n;;YB1RcD
z*4y!P*x;|&T9QO=Y-C<@*}H2Wlu(0PB{g=|KSz|Dgc#9d2&uv%q?l}h*aS|JcYQfr
zpuaQBL_v3WoZ`T2_p#E(_Bd=0NLK4Z6`O-P(X?Nye1FiOHPsko@{J*-l-Wv@(kS{W
zwP=jc6~)pNZB?rlwSna+CPqwme@RZ$bx!eES*=}4ePo>!$1c-D;{*s$AQZ~j2r2do
z;_6zF?g$F{Ag2P_3}c-KknKC+{rS1E1OWh^2~MJV;t*#LP>)NwOyEC2>YL>ec^$h_
zXGv>)<D=~G5coW*R<A!T%KcbZb&~Uaw;vtC?}f7H{eOQp=p=nUBABaEn0bO^e=l#X
z;tZNP@|A5TV$n)Wm~Gf5h5h{l-qCZmjdJEf-d5tpDQk=q&k@dM*H)7aXUF*JM<%yt
z!Qd8|+8Dj4DIbM99uhI%+^NdlUuvwqhU>_47rxi&6tEKY;T43)v)fZ~=Cb8Q;mRA<
z`4#Tv>%SPHUe<BAE?U^MQ*<S@qSl_Y!=G~RG81B19&0wlQ(>fh0#l7quCOS%Q;L=~
zcd02(nxo>+x;BZN{*Mcp^QTBf!n;seKk`-}B(q1k5osF?PC)GWf=W)gXK}L^;*{N&
zfSup`jcegv=VLVedF$paz{0d_%vWQUoj<Q$Qh1B`4?O3B_N<c~SvvpxYW`7rWHDVB
z@Q!8iu;r<j<*`S$b`2#f@-kHwsK!#=Kcg*BO!jk%zL2GZWMl_<g8f0Xm0aw-eDJBO
zD7oI(fM5mAJs5c@kego6FA0XaIV6&B){!uTmq0PEfu-F>#i2X%)claS10=RX)-Ucu
zWv|GjhR;AoS!sw7MxLicrnA<K*01YSkNjq&6JB(VL!Fh2rpa*o8w!%#5ivTUV1*=Z
zHXo$WEerfQs8{TT`TpMyV=C{3!3*FZAoD07AgKS9E~Xa$Ig43%nmGPnGUiHsD{4a=
z#cxHQS%=sXTf2yImW=CEcT$MNg0!1Id0$(G+{&p^x4tv-Ao%kp`zwxe?S89$B}};B
zmls=Z@7pxOJ5<UJJ*hfmY9_*WSGVT(A;tLcu-LsgX0>1)W}_l-oDvUcKS+uGNp3j?
z$qL1mAuctzQlv#A_rtlVv3Q7*MkfiMUL+v3hD$Zhh?hPLmK4k>I~WW9x;Om7m_T7v
zU7hrAH^V|Zzt0o8w*gUlBwC?-u_AP-!rG?Np&3k%Q(D`cvJ}mpk_*ywfX=@<8m)R5
zFe@2YTuog^+sx|yp>_0fXVU2P#xM=oQf$9tZ?BX~pva%sw)=vnb%^bkt==)PJ<$Yv
zR18(O=gfD7-V|y@wVW0ms%D4q2Y7V>qb|2Q=QIk$)6sLX!NFN`45eK>ngb?MHX@6-
z(G92XKRhzriCZzn5CYm_0WDxHn9>7Tf76;wF|wZO1(O%KV3Iw|_{IOZY{Qj&tWvo+
z+y)=5TI!?)WZ2QbqwORv;rYBnKr@z$;ShRTLFJT(ds)*}xycWDu~uEH8qINv3VimR
zi|-D+MMmg^^!4mo&5U0J99jy&l_*zJ3)09K)PZ816N$)8Y~o><Ah<Ow1enT*FnV(Z
z?o7i<SHilZ3~@{N7<(KMmWOIswI`Gxv|%Ii_`&A#d!@H_B4lMx2W`*vjIz(KV(1A5
z!BtVKc|kXBPJ=UI^<-ftqp#uNesC!F{pCV2ryOU%y2z<A$XKG7^xpB<!`tSp3abeB
z;Sprwdt5ZZ_#_<MnqwuT>iQvsp^`oKKeKX-O?*cA9**jTZDUvmAG4@h^7;dHgThkE
z4)xc&JnN(S_@TiTWJY*~e#U0_VFEs^8YR%wSXE!BGhUbvJ+(<`dW!jgQwt6mb>hl?
ztI!aPPDnb@%R_`0TLCCK?q)+_4b>kTxArl)3y@kU*6z}FhYqp;;=L>l-UKIYI|fB5
zB}+$c;fQKc+76@V*V|rR$611xl1n(8#WW+bMao>%gm1v03AF8-*N6~Le_qzyIx2pg
zPK(ZTQAPhKLc)F<iAYed=uTwKBaB*eJaY)NM8d|;Szo85AB;3Di#kFFM}|H0J9&J^
zXZ?vA&(qhBJ^VS9jQP0ui#%iWcu|@%@yJ(-h9H;LXEGZTv}WF%4=|7?o?!f@@trHZ
zx$yD2>=`EHzIS`}$sGEA#Pm80B-F3<b8F9WZ6<2h_;t!O99&x0SDfoHVX}}5-vrX#
zC|~Y}Y4D9ab^zN5ec&;4CZMwA#9_fRAPbDo3FWgtvdH-P-{^n{RkDGA1_H9g4FZDl
zUv#i{v~#vIva|lbz@eGf#%W!0e&>nycojM9pJ;vJ;x?AsHqGCo;4DO65HrqXxd<VI
zJW<9Zip#q4&zFlF&S}X3k2Noya(_0yt^c$y4w2FYhhuGVR}VE)1FS>}?JFa3HEY7+
zDiTWKsD!e=*L(3LwBM1Kq+4QVp_lM2WRpQoUha2V<aY#pxR?;%O3DoiM~ReTTyjO5
z^Jo%lo`fN&GBOSS92*RNPg;B$x6Zg}_GbkrZ&-^7<BbYdNThruY&L!h`m^%gk<zqM
zl)Qz;JLl4A>cw3rgkaz}`7eOybF;eJ2ed{}7yFAjEtqVNTtu84`CcVJYsE3iM41me
z0(qrFaLkn)sj{f)HFM_E=rr+c(KGx+l`u9g1F8?#rDG^eL)yovU4Vwlan^E?rwUX<
zxRg`m(t4GA_L&ZzR(L63_>>N6JY2s)HRB4V0`&UTNBBduGi?2B)gx((VK3sDN(8mM
zdrLh;Q<<G^>rYYDLWw2}(pa?yrsycqNnD-_ByzHIvU%iluR#IZzp&Y$<qcB^#wa15
zX3Ok(fXo+vWF%3lUXJ+9gaiwWQ;#UuN|n;8HBcldY0xC+OyY+>5+K0_P+67G1$q@5
zBhiKg9{_cqZ;)f?D34Wcw4pbLs_x-}FqYTF?nyzNgHgzOX_UN+{vfA35a0WMH<05n
z8!mpU?H%PCv(23CX~qWavw}Pc;x^Y3z!AGb$H~9zAYNFQK!wgF>c5G;BFn<%=#Dz*
zA#WoDX7=CooB#QBa=5k@<=69i^ftrz{aZrZi2Gg?`HRdfdPKeO&ioq-inkEM)l`*;
z$I$$;mtac5&Kv$%iqM>CWW|W*8if4Ynyc+8Xs@y#W9R!0y6ZFI)gmGjY-RWL;O)`I
zo)^?NaD3Rxj|*8m6-L`NYtQ&kD0bmES6jBc?bq37H6pqz0dV}s=9VMvZGrQ9k*H;q
z=btswCo*WNANxBY==Y_QuU>Dv1H)Hk|JKtV{~Jfw?|h?fLS$QSZ{+XD>w|=uEMnIx
zdxq_QUz$vQUX!B+cyuFvu$yJ-N~7&LfA!B#E@GUR({(<A>V2Cb3==z}tn_;|N#FKP
zP3^1ob@%Z~$iY?S9DwZKi^Ry4=^mrFvZZzQPhe@;Z%$urkgnp5=iJv^oI%=OlDz+3
zuR97dT(dU%Zv}!<F+&KcPdu3_u@E!b?oMi{Qs3pkG^qNc{haokw#*1mrj1pjcY@N6
zznz(KRUOwsid+3dE<Ro&f7r2~s~-!1tU5wf2d@if!A3ue4mD6tD9OGLzpCDBxgO0}
zy?OGoj@=fR?b$N>m2Z!BW<~wTSnUN(+8E*4N#iCVaYFWk)5vE1bzrbGD6xL$SN`Bb
z1Km}6P~)F~T<z_+eJ9oK5tZBl-SK!}Doj|Ge37!qjTE4x0IosMGU<F(4m`4!%6LDY
zH5{!2luJ6MqE1q{UL+LKzRRVpPsjTWE=8|0BmfCUYr(tZUvgm5isQxds1re0h-`^m
z6>LoReXSinxi$f+nl5nx7X<I)^w>`|^D8Ly*G|~C4J|7#K@S0YaoxUhn!;DVoRM{?
zQC<*ehlTQGYQAzP@Rstjd2LI~XuCvf@DfJSz+Wd8sL%>Jj5*sZU<+vw+6F{?a23th
z8B3}*!b(96HgjSx6YWXsyz?6|N3h{~mfj*z&alG=8iL*f9r!@hsanPzp8b0-%Bbo8
zwe3w)sW1#3lyE=w-^_e+<mrH0aJNQkX1uy{nHP((Pa%#(V1<i{{EZ0^h05=ApdHh)
zJf|_dn?MU$J(@q%qL&w(S|^2zcnzi0w8d}SEKL1&*%(T$kA{AD1HPV@d4qh>V%^Xk
zdM8erZ?P~|5(Kcs8jZm7?hO2`*INJc0wX~m751K~Z6S8$i(+8<4Wv<9>Cr7EftZSn
zDV(6J6d5Z|W&4zjBs5NptK8VDk(PC3X@ZQ-19&+Ii}nSb=83c*P|=9{5Q<?Mz7CCV
zeLmX~-J0tT=S!FZ9CQusq1+zS5q~4_kZD8=bEpQkZ$5&FT-rNE_aDIQhDP19Owd;s
zBA<kAThdw`9g8K*!zEJVq*D=X(7_ZG$iYwXVf!bMav|LcI^}@+9Ith%spL8vWzeZ7
zd7sCN<o<0;Y9|}ybQ*-a%o*_-5qjff48>!*={O`n@A97Eacud=wozi}F@F?W1HtJu
z)q>k_UH>Z`)^>wp?D7g05V;A(a9$&r%Fm8e+0|xq&-2ebIwPK<b$Ye5tGq8TiBAwR
zy}|Ms8#>{o<9NIK1ORW_l)jl-XJ*ENI6a>SJgZQNmi@{{c1a+Z(i|)B3XMmQhF1QS
zeKbcE2)A?Pw48sO-JmNg0>#emMFJy|cl$42bTnr09)z9eJq~Q*nqJe)Vaf~i(+94Y
zBeez3tiU-8j5{$H(l|q?K3m=Wu<IM?ltQ$A@njlQY1FV%5Ez%|0o$xVtY|R=rdS>5
zCw>8k>lfh19i+uSj3lB5CH$0((WO(>6E$pzEF=Ev+%52E9U#<y2u5|2hOTnQa@FRI
z6$QVXR|6wm%_b*YQz1rg+1;Xs0rk+0aREp{e~zH)#JRLfzDi}T6lDZZw5LYW3C2;y
zi3VRaQf|zDz%UsS(@MK~v>17K_l*Xp6fc9{++C{7>1yA7E5J;k@eye6B;5Hf*;O`K
z{Kg+m-i;~K-m940IE8R0&bz<Sf)mSb?ivxOF=lRgh}0!W)@`h`4?L>5=!o{Xsso~P
zP)j?6`EZWAU$7mF`wW#(z*#7#_+Zb+2nwT8V`s_(D?+Z3+NZBb&)7`IOyih?QtQ0N
z;8k3aJ9*65HUnz}e@N+Z7_L)Z;>R9%P^AdxT%V8L#~EX7a1k$Q)JL?<1MK@ge+0Rd
znE_p(YW=8M10dQn^zQ28BWZ!%#nifPG#s+tSBT}(#vBE;yKn1{NhPrj%s9NEX^(C6
z#WZd7Z($ZF%Eiy&0m?6L9n(l98H*)c_NHj;w_4=)x@<I}HOju^rtpeKOmngKzstOr
zG8dWTqwCD0TzAg4P2JA!tJ_5{Y+B4a@wz|sm@zN|%u}Iu(FF)QHd~qP2b1A@hem^Q
zIsRDVA_w3}6OfKYJ4WGnQrxGl3?hA`?C)B6Dum3ds_Q-Wy!C^;E{g>-@EcfY(n${7
zhWEKPWu!S5Pa-4W-{88NVEI1M>GOsqAytcHK@*tF-}jpMmL<jB3&JQ0PR&ab5HSo^
zYPgKo9;)KZl*V02??1#+%V<033bBwnfs_lfQ$N)in2;>ogM_DG9uYBPVq7-R@t;`K
zs?9RV^amx6kQM(G+{Yaei;v0!H;8jiPI{r+{Xp8lPAj&?9R&zb47jP`h}Re3agr<!
zuYp10!|l7*CBzT+pl7>a?UeB|H=@#BTuLKFU?7vx(L-i94nEc&Q2#PW2Z5_rmbXc$
zVbTH*&;Ls~w!;@WVyT$Biqy5PrC&%|8FpY?h4q4*2rAbhDy<rMwlmK=X@tTX&?Mvs
z$lKRBq^?!d{t5FKMw_Bc>mt;hP(rFu2_~kR8C)QLXF?*Czl7P5FN6be3EMDLU6LA?
zVr6PUubb)n?k#JwMe&Jr%S!Pea+;^UUbo;QD%A>SLl%?Is9D^&!si(f(EA1K(-QFs
z*n2dmIMwzV+TVl7n(O5eR5P-a;<VIRK`N0)7yyNW-4h5hUvkMVc@c^1N)9AvnNI@|
zZ$d33OTC<Lau+<ZF&`&7C518ig905E3;iM)9hOBTP{)twqf9o7qS;joBk&})6V0MB
z2GD3Q1ZX!|#A>f;g^(zLYk^AcE}o5q9>wONCTZ7^w4(D<GsDuRN<64Jqtn(vGGJ6u
zOJ7)VmaaEoQ745D@j7?~g-uZO3xkP^Z5Q2I(t7@;@-$8m*31k9iv(j-8q<-a3H>F^
zt=&k#!KH#t8%*Gdr%tdZ78!i4lc;M07pT$%fl+|E(B4AaqSVH20d^C|y`M%!a`r7U
z#rpw$T^r(k+d>!9Skq~5K>sJdpx*30AUKmQeCX~&b9b>P9<&e$?KX7(XYy#oVJEwX
z^S+0DhVTf5BGRlSwoet>wFAe`?RgvT@B>x+qI=ej>!Mrq{aW+K$ER@}ljk<?&WPKz
z6El<en${dbjFHOUT#a8}8$2A|0EChkDUlB0g?qaf)MHdu7o03tq1T)L<Nj?88+il0
znoD%0xRM_3+`N`ND{2EBtxny@Yl>>-2j4f}69jwfhy3gFb29d}Mx)l<?*0pq)0@46
z1x1W*tzGW5h&E?)TmNvLRnUTReMF>P9?mZS90mN{4}?K`G|4m)s=$gV$)f9BZ3&|W
z`_5-1cupU6dhv~tNkTgT&it91J)nt88n)$$yQT?LrPRly*cxS?32SDh*V&N32p}O@
z1`)P(z$H+&an}jm)0<lRAzv}1Ub=kZtU6`vASr*fwL$eXf`8g)XX<hvQQfLqqN{}~
zH-qWOoye*fH9D8KqEWem<ZmyR9|O55#MB-)=kISB0k@sjArmE!%UW%`(#TWHNyOXe
zXtrX}AMeOm2|$#~`9oS)&YQE@mm*D+8b;|Prv2hd7q!sMx)9ulBVS*WpvC8-@Lf_n
z!=`1ads+K9RK9VSVwUR0i;;G0A}52_>OfICju_E7(Q>EqU1Nx%!BImm1;)K0xDGaw
zawWr5B}nP)q242SaCP>FkyB1`>QJ1*pO(pG4c&=sik-uurN{wXmQV{}o$0WThei!2
znIP2zUIFQjj1~>-oo3~5mBpbRKs5n#w1#_6x&cmVZD>Wd`^patXkaW{J#e$RB?>op
z#?|Dk86{^O88hs!HQA>?(W0wu!ia*b4WKA5a9svOG$}w8*bV8zS4Ex=lEq_A$jWRb
zSI)~es_$$4&PgF8)4x%17!TENrazOQ!YDo`F(6=9D2kXJFxL#<nDeJx5gM>fb2&IA
z7lmr*ldRv}5H`|M1@sG7jyHD)G{tlrU@(+i%OR2m%Be@y;5#R+KUekcC7ZVxot<ad
zwUJ{@m-;Q`St*9{aUO3R(}fnZ<EOlb^I2v7P+4&xsHw#es7J-O?*3E+Ok&lT9QNAa
zvJfSsMi&Y&gAQsm<bANHSaL~7w4H)_`D{;LJ6*V2V0ir0gi9|xBE;!q`(r4>P~Rf_
zR@)yf798nk20T~R9F=Nn7Sdhb<0vt|UkFo^LO(3?WlSVp5!%`aiM);VM}i*de7_2A
zS~6DHpv|jFmD9E)ckc4q8T4K)IfjKCffbp&jMI-nzVHbQZHZb$a@Vo7hAz%ZX0;G%
zcIM=L3?@V#_duNl*vB2Hu-Z34^t`imh&1UM#T<mI#L_&hKtnC{EY*sp`=<+NkPRD~
z;5Le?Lfjuhpn4E4U5JOFLYOxS&C+XhK`l&ihFq@S>T9P(pnqe25OSeUE5z=ytt@ER
z`vS&y^G5ITD))Q0nNrzSgZSsscrp5VH1fp1nj!CRK}`pzJA{K{a46Cuzs`S0e-{_G
z+`g?~@tYorr1*XQ!&FI#^e`<~SHFCrYZ<*G(r+C<eqmh2+lS{md8Ct4^$D`W7Lj&-
zuQev|n<B>tu7+^bIH=A}Si!~y_a6Oj^>~isFa+uCt%uNttEU`MBS^JB#abD#vIK*d
za_R3p*#&?z|4VJItAOZiQS9vldmf0z88f~NT2uPldvD>mN?!mm;%>ejpu%9rmC8xp
zQA3N1YgVjtiJDRY4?iP`X466$|M5F0l5F6*KfpSmQ{x_d@C;omSgGimx(tfyJ0?n|
zv7*9wT4$V?!&FKLmV%}d<1}rYYPdwg`Yl5(QZ0l7Ms3LkyqPqB6a-A;b%&_F6g2p&
zAXI!M+n`~97^7*&RltJd@JfU^GZg!DJF06r0^7h+K%eOdf-KdLg~CumybFF-CkLBg
z(p?PKMQ?C_&xO}zXwxP#&$pRWQP<y&pBBZptOD`w=9g`&k8*>3rZoAXn>C!ni+Hl{
zc@AX8&d%P$&F-&PIq<sI#Xc(fW~S;deF(o*x0zT_nVzkyN=`^d<N3>eRulunHu_23
zIlp&j@NHo45OE2g?tGWd+_cK3o<d>4YUBRd!$K~1D8*zwC5S08ww|0;SLBU0dzV2K
zpG-;3gZ}?oqM-nSugd+W0A%-{{y$wMeSHgC3uk@(|BI`nq6SmOcBc#@Df>@eR^dpR
zc47t)9;-l0|C?SqI#zvbihhidMoChZ=HJY3da3BJThfU~V1r4y*l_xrhmOwOog$TD
zDZQ))vQ!fM|Htfta5YnnYS$;r)c^r0_XY*Q_%E}|(!kX~-^{?-#Ld9tzkV(4|9m;&
zQb*fuLp;Ivr1mubB?}ZCE``v{HSFY6D8*bN!nQCfv1%0B8cYt21BL{IIwSGm?Mgef
z2eb%-@*mYlT~353*w)h05@1Ompnegt`eCRrB$jae5Z~7A(fp$STC{TWG81W>{Cf_A
zY#JXh-iCV7q#17bt~V@)s1Sm&Gr{|Ny9m&jkj|BOM>d_D1{hWz+deAM#PE2rS-t3v
z2hfzJSBN#nMQfBih>`EhF2fV$Yf|4`WwI)9Hp~Z2=v3W{0$g-dE3jwlP8?x%q6*1+
zRXh?06s1V-3s93IqmGo1U|IYa<2!bl1EPPsD&Dfl2$Wf~f)K^hP~s=U<at&Z-YqHq
z+1dH=s^v_eO#`r_F$P=;n^?A^jyl#jrY}=B6Up&MnpT<u%Bj)L8tD8306Bq4fTUvT
z$tof^?&>J(rLu7C$2Kz^W883TQ<cn`h<A%(&vzNMsS_rvhvu?&?RKfiRcGpe+=-?#
z%=yNh$k!mpoeWFJn{eUoj1&NB<$8>lW)cl?<YeMx{S^>@?HB$>Jt(1)3a%e8a2iZB
z+|%TAYc?pqjR%rSwzN5cn1=l#-J;8#ct9&D5356H7X7w1;Y|l|Sa`X?qT)n^Ik_3^
zIp7n84TKlx{zs!c8PEOE)PkY$QNO~H)sf!pJlW$YLz`4hOr_5fM)Scew!mj)|IYDY
zf5QQQxfI7=5aX#o188KZK>i$R5rD7}K8mJfXn%36$$)9rIF?cktUx@IDi*A~Kb1G7
z9loMrSFlnnFVkQ@g8T4iSG@&NpHn_Yp@n#IVpPPKlo#k!3qd<=@m3e6TN`8jHpS?<
zP^k1agEkJ-5|vQ|i?9lu46&@@AE_A9OjG~HTRbD6umN0T^myI`oZ!)G^v05XkwkiS
z6J=P?|5Mj|Mhquc0pwMaEbc&)8!{-d=BHSo`~RB>J%p+d?H%-^j01J#Mv27QR3T_O
zMe+<~k$uGAA86!fStSl;k&<Q21#=^netAUP38E5pvj1h(eunJXo!VNf7hPL$k@!O-
zQj7iJ_oIE|{&{};{7A*~hrb6;Uw`o6yYb`)_j>Rr=jm!=GO7~a{ps_fw&#6<@AK|B
zGwR{={(L`-_zR1XV6L$6?nutr$&sU{Ed;4FOya<fK|Xd=oh!!nNSvGJ51)^R`=hQi
zUymoacT1;8YOPnQK+h{j$M^Li3r`0ZA0hEpmQEJ>0<ZpK``7dm{lAx!Z}FwC{8PJI
z2@!#c<a58k;&K2ae5`a<Bl_R-5^9;-L1-;AU_{^Kz1gh|w>t_G$!f%-EjqzCoyadX
zloe4~F2lTS;RSHyAD;}+5P+sB{<;Gd`idh9YC}dX5E*nZ$cB8{0Ds(wGBTYBoM3HP
z@Dd6JgcFu0J!350Gy`6Qv`EhIM6_SaZzTe)`^O8N^5qV9LDfU23MGDVj7deCabEUm
znZ?GwH@zSIEW)T1q3l3pff(QKdBg1M&FUP>afQG=ZM2Fs@#AlVcMT@b!iN);G8hhP
zqGnY_k;X|bCq8!wVJZk!9J-`kX~q{QEA9F5OE#XeQ1ci1ucg86EGCpadtp*M5AlZz
zjpVs7b_6|WPp>ffgD$9{Y9lxT(1B<=RIJ5ivgI@)a_n$)ET$?C88(9&$qUnXvO^Ud
zfIkzPW4ZbrSpDwP_(kFaevCsNC(uSp9-2*Pls<nioOBsM&@)XYobekB-+23xkYta0
zHV5XQJeDTL)Wa{B&~hp1q2g222B21%-EA(uTVR@=wK6-m_i%e^>Fv%M^KW#AoQ(@i
z5w*LgqpkaMaiI-0zNn=5NEdxTXNd-^3cR2Syig5;ol8ld76qXhqobHwdfVL;BcQ+3
zB_rs2QL6;|xj$~m*#}iO+OBpq+)Z|;IH4UDR$VhO1@|1m0-6<smfDGo696_93;u^f
zz0e(zR%jiV=l_@PaY7I%iArLacKR`xnAv}TT}}zEgZ&9X2i*eMyuY%7T{`zPJm+?h
zneTO2!h<mbl~cFZ8k{a(-x4A@(sm*$*P*(r8~D5u^XX#NA02EPJYqIxvAm%ohf${W
z*A?ZFxF39=(QMDxZ#9E{-U|P=xJuBojTU^MSdx^sWY>uZnx=fT@Xjn#gkHf;uPsId
zR7k{V04Cf_uySrxaMx`%Ov(hfy%r#JIePG)ENd;%5Rz4JnC_>&1(BZAuWXZ;J)3CA
zA&2|+_(6ZiA4Pl3gkVGYq&BxLP8lZbBt?Jz{Jd28$*tZK0b!O|yG)w|hRMACbr6}u
zZTEvW{d#y%5T0T1W;3Jz&6-JoA971e5`i4Q6cAsCS(gdRhc@a7CHUy2c~mC~`gBlS
z;vPvpy!*Aq(jl|tStcUkPm|1=gzh<lFAv)Rr&JDNKL%=|TfvoGOcH-4F01nbD-Gj`
zf||z|*VA{lEGS|T%R!@b5Q6JRVIRf169)!VUS($T8DXwIG|08&U!{?-)hOw!f~3g`
z6Ui`dvz({<6GGzWU**MmR#9p0A`^8R6@cG!1nx|SH>?>Mx7&A5D1N2Bs8vduhkL{)
zyt==g)z{+a?BMF^?n{tOoD)5qdTrwD7;0ed?DNsEF!DPApGhvy?kCrS*w;lFDWJ8e
zCBCp9JSLz6_vrpMQWw^m!U}GDV*wU5Fs(^C&kwolD~^?RwKmZjHf_I7#Gd+Jl+K+(
zu6$J1@;dIN-<+4GSg|!0k!^dqvM4<y`|Jh0Hs)@V^qu;Yw2%bbRTW96ZKtz!to!a2
zubL)~5!4-uC&VSZ9%-<dk5QYRV~B^Iv_Fqh>&7<YbD@I>Y#vdOMaM(suR4fnSMpWk
zJFnLsNv97=DOxs$*4zwi5L*^BHPIx4RnSXugV&@V1=)fc0kN(61AHiRwcfsg<+#w)
zFpauw*Ltviyi+8HMXU>8*p%-thedeJ-}?rs6k6mwDF*;R0MreDMx{aRDyO&4vOB;o
zIir|3ARtV_k8GmJ*crJKrTiXUG^kZ2d{2FUZD-&MTcrBZ&09I|o;SpZ@Q?1&lFH(%
zY$6&E83<8Se2dQ;eM!6bG<vPj=nQ9VkoGXR0P%T}^PI`fdSP}n;CO(TKenz_=xpJY
zxcs}va@6_d87NZadsOxF#rElQtWz5OPJaA}ulla8@fP$h2f@50!hp`9yFZ){JMzT#
z_Z@~t5&Csa2G)xKf9I89Gc-S4=jd)AoW76#3`p#@7P$;RM{%|Oj4zD~7W}?@)k!ZK
z@spq`mIm{Y#_^YqSsjr@nfclU7Pt<+uIBpmgkMpyA4(SQe|7(;Ps4J9Y+JEp3s(d_
zMe8eieG^g7?$jAxMu2=v_6LDgC442~SlrH6@&Xhs9P=V%q5-QTl<z9(5YgS@K0}Ou
zuTgwAb_QQA+-ktZq?k+^KQyfkhTQ~t=4q<Gn4c7nETf>XS<$<9mRGEhp93r(+PN0v
zm#l2K6ns#&#yyUCI~{l*5u7z>r?Q(kY4U;Wnnv?v91K;W3uFr(aHijwWfvvAje=)3
zFW7vxdzmY)ZM4<eD?OuKJjedBhzl(|sw-YR?~x1YBcadGB*<mz5y;4|THuT=m540b
zOpSuxKqu_Lf}Uvz3BCy9L2o^zqM#w-R%KNjG8!~&*#mO9MXiB;nN6F5c0tY$_Pf@@
zL{LfEzL-@JF*|X%6fd|?Dh7vkPRKalCWV!yj72Na|3i_a(W75(VSV_GgTS0xECn9G
zu!yE$C8xcp8QqQWY4(fBu}!X)h3I;3Lotd~y{w^OJx^i&dKsxZrFo5&$#hVIl<}T#
zDx9)G8t2KN3kn{2%jG)ewZ9Q4A|7a#K8;~}VYvT%3VW<dT@Y|aD_@7=Rc%0z$wOu*
zH}A=rQ}QLf?=HAbC{?dQkZ)`P*fX4hpw^^x0Sgyo%9-1<D?o1wb)s#iR?8DZneSjH
zm$LVqcTMARV=7AXF-%Jf2G8Z3A&ph+LwC+e%+HK_{^HP;429^X&a`|hh}L3^v4~)^
z(rVT6mQ<RhFL(a@9_#xaXlGML44@xdoSlvoG_!@0XRuDS*t+AXgQPk~LZ%mS2(Xx8
zA6d1Y1Fp3QTz$hr7yz_5++pa36_x?g7j(Y~|1M#UL2{$aYsijrDPgSRTb6p=6!m*~
zKNrp6|3oBmAjJZ9d!+2_=87zv<Qg>tAglVeP6r~^k8P|K%nCWGgS4tumPCysa=Hrx
zlPbjdhGenLT7NH{vu<IE(1T%JDl+!hby{kt4b#J7KayDW@g0u=Ac>g%!9C_o32I$X
z;O)w|OL*~sL>?*1Dg|ZB4L6j4L-N!Nvi~d6+hrIPkuA5Z%aW4*K&xB57e?)5sa6)b
zf>+8&oBfWpL*5|}-_gm6&Xr%tLSd~*^t$Y$JOT<~qRnDf8dw~dGxu43%zM>g-5n%>
zy6j|sX?`A@@OL@Er<C6#3trZTKpXgje~z5ji^0vo)xh?hv0L*X)$PWIx<fAK@lTkf
zmy_ZK_TCY&16tf3Rf(gkqCRU8v7-T%4GwJbraZZG$_n;{-qgVHH<H*8SVs`b@p)eM
zMUQ66KOk2Pks?Zh(EuG6g#YjsUcVRBqA8_+u$FNA0v#H!1?&JC6Fcj2@2i~VdL9DG
z6)dS5kI4K{=*WA@G$ybAT8pPnUK~LfuR8EAf&JFsxrJpKZl`h)mw+dcp4GL_NK2zS
zb9}<qKQ&^9+s<#lV}*wGpyKc0f&QlXD|RvlDEz17&C5WnRWA`{4_5A)aU+aHt@CB7
z*nH6EWyM-cj^cYoy&f(3<d8iEwORP^nyeWf1CK|~Usr;nktbf$aIxuh{7=7SvSY}>
znkurJ@c;dGEOmG{mdi!PF+Nq!`2^8&WeLc(9TD;%+?xXfe1N#I!;CJ#*<b3g3+;Fo
z_c-;QO}SV|bfj>pI0kgsAjfmM^Zj`wT{#MGnZfTEYI9LILDw5+djQr*!VO>N7~an2
z+Wz8MVZn2r0k^i$ofROvl!$?QDCXdXYk7G1{*_t9mIM`At`eqX<#dREx0{{jxM<j4
zjj2>EosvL$KK#Kmd8B-3SFIr!>eZ5+OYSbgjQ~|}Tkq3BHl)pfx9~nyYW3wNZqBdj
zl1YJVm+0-~HEWGb7H$z@*{NPqe7>QAz4O8+7?^ow(CBNGQX;pdnuLJc6yu4;t~(7*
zI7CjxxteZ01v8;flNi=2Q%suOk6pf91tVzw2%1nby_G}J+z)%5eX7~5qO*V)&0)Wb
zqGoYb?qP=DC{XpyB&n5269R-sa9H~7p8f2|zZjBt1cZ)Kb?CHle)b7Vu(=hj0!5v>
zp3G+}086(^(YUrotROE3M@#zUi9p&>a3iT9folWnhQImCQsh>T;*bMxra!cjCE&Qf
z&{Fu3#P_GwXI=6A#-FiM$bpt5K=_w!=QD8k2DAp&N6P617v0{Jo%sv$r=9RPit+QV
z+L73ZtE0#o0TlP22G@T~Dq^gYT^bgo!FznC$d{tTv@%T}@&af7KH5I+5FXb7VBml9
zD?qou@ZU0(^K{EY%!tzIboQs%kWKTyKZgqI({2wBu1znO*?VUhO#Nyos@O=>88#F`
zaAKc(XTrx$p&r;tWjyoUK_5wur%s0RCD0rBYa~S`Fi^;^*g@NySvN+8VQbvttC)Xc
z;$>2T2|+CohBy9E{HFwy!!9y&5dBB}eh^u}UPCAk(VhuHUA=-q4N&!%-eq5ZtiHFR
z@0ne06rWiZOFk7mvsB&|dE76RzDOqfk|jG>*qTI)G0Z}hHRv~_A_{&AT$;8ll|Q#W
z4uBa`3++j`uBfQHBJasFaOn@QAa?fX$?*~v3^;9Y+60!LBkRL;#~=q((=XRg<^d*#
z<+|qEH7go|^9c{$^YbK7bB5cTKawpgRJY2tPCzjRXwBopBdquxz76)3vR)~js5mRp
ziyHwWAl_ig3~8A`xVGy&xpw|S{k%AkjIu2%hI=DMuXgZ=E;Jj)-{vsAr3bmZ$^ofJ
zIPL{WPWx}Ibv~`fdrRZ5kM3^6;Ju!FC4pgH7lfMx1K~5_+R4Z<3p*O8vv|960AzRz
zM(-wDms7a#Wma9VJ?p8zHpb4rsM*modNH*KnQ20e<nF`Lu7AC1W14)-1Q;J3l>!nG
zRq8D#{Kt^Ov2sMiUL^fy)0-N5ZD;2!XF23;ytSXmt^`%EWB=4izA&%FXz+bJO~4Sd
zJ?4{y@(c$dXYgo9PL9n<Q;8nwd7TVG-oBX6hO1+kN_Yg9M;=MrfuVpEKX_Y6M=`Ne
zXBOPMCTJY<IB9?&jKihRQiclrBL=zfCC=dFJlQ8PA853P_B6$z(S6>8-rAgRITUbQ
z-`hG|cSI+o<<RHOU$-7;pl%SlX>G5(_z`nk%J@uu@u>ZA)Ax({coz$M>g1}lkqDtr
zfs^ezD1Tk+P7?(ibS}cBHL}|TzfwE4>AN+@3B>+Km9+bCLw`%wpMcJ+tuAlL5RXa3
zU8LfYm+6o~$oP)OSm1GRD>#|yI5rNW0ChShIO?b8y7UxDNsRhU<u_{lS(9Vm%9lg*
z#1B)132CsFJ`ilao9<7!p&X*W90i>;K`pH%VH&8!4DVd1d!Q&jP5V5JUcphOROj3&
z;^?cnfln!-)v8K;FbV5}l}Wy#2R1!4X1XTF7m)wF>uC@MKRxJkWig9-?dx6D@0Ka%
zZ`l%{C}HaTPtKKPk@F_DPK9@;=<H%)l8_u(D{2!-o8Z?s<i#?gmho$!k%yV<RH@OM
zcbqKFwC@D5SXAi)6Z0$&9?NRPD$jQPRV5*2z0Oi}FAjaV9c<f9%!q9K)<>s_<eQGY
z(Yv9ad*kmVdNr5Qr-+>6VLEe#vf`T_WETXe&cC}8ybJj}x3b3%aT-*yXQsRIhI1lY
z2HE;XR&ITW`R^fg5lLHpNUcRn&viEHHd}1JIC=3bwu%yK+sbGFsbV>^LYbEhy_4K;
zaH6^O?P{`E>A4hrxoKGT^Y73-?3$4o(vR&CcerUyC0@Q!6&i)2b#!B$IOpSo&V&*0
zE7+B*RH8mC%o`FbAXjPHzn6aPr~MR!SmQO*osp!X(<Eq}Q%*&CL}Ne8g~XfGxsIb@
z;jnJ1;xpdWN+4^BuN#I3c7k1Gv^IXWfbR5B8j=)$qx|;A&&6~O<TR%bZ7;&Ph5UVS
z)tU;az3hyL8Xc$glYx0QOB`V(0cOW+$$zb%juaqZ7QiDLOn4Wvva`7@sy@1-@3TEW
zSFDg^CbRicO%a-XN(zEGwzUFY%UH0G_^@(78))MU(>1e)S-{}n;v@2+0R+Qq!31Q<
zXVYf{g50Qea>orv$wsrl<yZEbx}SzO_A5)*$my&W3d<HX8lzrHB6^-G__ci$i1*)W
zWLg2b93>HeclP=_4I=L{?dc7hlEze1a`>s`re$Hne@%HfB~rhW9D{%A23rA)QITC~
z(?AuklHSIuy?*{tIb>Z4?^_zO+6Zvt&tE`cok-yO3ftGk>|g$-L8PgQQFu(h+w1#{
zX5s|ZRz?~k18CiNL;5u7X5)rEv2cKA=`B>sn-z1Cxh!JQ3Ojk1xnK`gQoG&bGUad=
z2tpLsAM0lJX2cG8@MtHa7l(5j%h)cfQbKwlpHs06{fS}8zm*ne#zH-=FnQ2W^cl%i
zy3SvlwYG3R2cybXl_9<7$C0PLZN0reQ^BrnKFbpoQj@XncJ!nuKf1SFM_iBigBK~4
z2ei#l+}?EMyVs0F?sL5CUrX6j_<q<g^woE0m!S*b%vUd+v(uh6d47W~P9qtv^jbTC
zzX)xBln%4eW;e!S04`u%dyOIVixNA=!KS)QZjnp+I3llj<-_PFh6{RFLGMV>kCuNA
znB*+;PWpxI!*B-J4AbH)=lyznx#i&=HL~>q3WH<eJ~hAXW7<A!dPY|J1T*#uz==K)
zHGAgj%&z6uEyXmt!kI0Ce5qQRnJ6E*?kP%LY_7C`KGlzFqfg|$Q+)d-wGBlBTvAF9
zj^wAbo6*kpVC?==Keqrc0|wj{CWYz`I{IdbnR#T2MO0Br(WPq{rXVVHf=K|#UocUJ
z?(gf&n+b0ZOhKt0Py9KXc+u_ANw@EuH(G(jlSAWZ5fH`^M-%D<54jp?cR~l8-Ehd9
z53;n(6%%<#B>|}`b199YcYcq{ImS_x>U)b{z+aaEiZOC!k3+oLgMaxzH#~`*YD3{|
zZ;y7_^8_?F6?iyz$t<)^@***<@Z5KM2&lgf*YCt<{E)h~Hen%`f)^=*A)-Fi8cqG-
zG9s229DlE;p|UYI;7L}FA)57P{{d<ZA(@t`kv0%G=z~2k4NtD&20n_onQAETNP{6X
z=js#x@r#fCGwji3_)Fi{scxqNJ|q^mzwf3uGg2}{q3GfWd}BjnHoLaWItmg->Y{s4
zS7`zXdo_HQ*h=#y_a-~rrn#bI!LK9PxLc?B^6|C@T7R(bQ8*I{0}%BEw~PMPFNKJW
z5P)<~z0p+++*9TQofws}yxxZjN85?x)do9XXmzCu$lC8M`L=EPQ@vez?)$i-=Arhs
z^KoJC{?4j_tj31%AHsG0YN&$`uI>vCg*Ov*ucfwcEY!vASym0`GUhdB{<Y2mX3Kn1
zGi#J@m7REw3;f1?*9;hed9C!#_r^<7gITlo^npP|i3xkQ9xjLt4x<_+q#fM_Uw;v`
zF+MQB*RXB@gjh3KF;3W*A()5#YqT`7(hGQV?}GZ}H(#*BAOlfU0Xs<zTSQI2c8Xj@
z7YN!F@SS+zFqxOS7x5_uv%5X{P|F9F)>#nI@82ia<#_kN=+nw2@WV2OAHcHp9Mxxn
z#_!EPbRZoh;QGW2`&CPz3)0IZ2-C02hB1HnejnZOmag`gU{PADYbW`w%znY#;XFQ(
z)Ah&i?%w-#YdpVr61>lY)##>5JvH`QCZ4g%PM&ApnB360y`JL*w?E71+|Sd~@jt#d
zm!F5{pFccYyc=Gwv+G-f`=5trQlR)sSpnaH>*z9hPeh{vKW@H7uRb^YVWzn#Jb@_S
zvo3MwL5bx3^b6vGY>7N%z`+VhCF2`CLA{p(t9DT%Ec5s&Lc=jicH=>{4|;R^9{CB|
zbS?PBn#*@JSvX{r#yL3;Kx`OAxHEZBLHE;sMhqiy#~`Y1l_`3p5OwHaQUmTp;)f1+
zaYy!E+>YBPR7Gd+Ls_W$(fvSHJn6wZ3pnn0?!Sso^aZjT>{kj9Yv|x>)+S&pGy9#S
zaNiD-Yl=bYH2`s|7u{vkSxX~G$kYZc`9WIFMNg_8H=1(Z*|8PNGTnJ>(c`ZZ=DEN|
z*h3wca5ah1KtXrwP@JBlcNTv7#F;ntKY3I_jAE0?^YNGJipBHgIH;LvSGnk2*sp^K
z?CJ7_z5uHROw?f%i1>CKl8EDQ$O&JgE=H7`FH??QN<Lh1Q~0hIrGNy+*T*<#>>5TZ
z+cgeg{pNXQrloRfStTkK=;^@fZA6pk_aNdy=60arQ304~!zoQPE$Oq|IWrQWLu`dx
zlm%anIvp>sgsNU}6tC5B!Y*lu^TcwT5^q!-6H3wA3t~#FsOGbk9OmZxYktP@evI}f
zn-4)NPxSGgH}ov_!|#rqo=&GCxN_RroX~>1?JaJ7r7{q@HuyI$b#+5+KN|C^F=rq(
zr+bqMZdB`-boNoU4pEiYAAJBX<WRSxGcmMp<PCJ9OTlr!G0wXuWv3WFwg#3(05Ac2
z;r`fAnTPlHxhF8@N!xvq04so>ET;JmOx1XffYZa?+ZWD%I&Ac$AcV_%UHEZ2c?8j&
zQmz}xoou5%T1zSHXycp#2)#R)^pL{5Y`6})t6Fvj&%h3&j`0strljQsNyMulG<dk#
zY}9MS;}q+h*S=`w^%{=zBI7TZG4Du0G?&n1=%2h0R_Jw|d{1@)EpvOvurO+*glzs0
z62ym@Qi7WNOS_8@+#mr8n1F1(XQ-C7x{gYl2}Em+&Zw<B_*rUyn^9sSbG$?t(ggcL
z|MF#B#64%>pxaSW2!BMs27fXe`Y<J_jV4)KIMqqDy}k9TJcAminH0ufmdIOl?)a#e
z#=p9k!Qs=<TKo&QN<vW7D6LN&8{}x=Hip^jbY|*mDni<rMP!ldg$*EmFDniVOltTw
z#5PgEkNC;Za12)Mj2E=r__mS4RvS~95IEMn;VD6M!Cc9*aQ-qQgMxDCsYS-12!!k{
zqGV6Mu;YC3G183&_Y35R3&!^AZ-x!Opm}kIY7RVUMFi=I#Gyrp5U}u7suKZTgscPt
zAC!F9jS_dKNw%f!*mh$o{OnLe6O;A3ZTYfX0Z<?fHNn2In)M;VqT$xR5!UCR*SH6#
z(7@%G0ReWge7#c(+`#o~oz9qa^cOywcnBm_BD8fT(Ml(v@*^ep321I`EZJT1!<vZ?
zBtxDFvvN&9VTu!>jf8$*lv|J}iqGHMjU`x;_Yg;8xjw7C);4P6)<XeBJrvN33PDV6
zh?{p+bUD@#I1td`5yhBNmWC&94>O3X^m1eZzX7%in+~0g@{n8Pa~;?f2x8`{1?jkp
zA|MYwQt+K>Eo4l;5S!{TMCvvU#<PHkicdq)F1YcnxS83^i-j*au?q-4xua8C4kxYE
z7a=M&hS=hL+N=Nyd!gPFLwUBBvHpR0hj`PmASd}w)w#@kRMl7){0Su&I!=&<C|=(U
zN~k8i@?ZC61CK6en2yR)1acdyP!F;Fo=pdGdx}3y<Xj>~<4_@n<Xdo|Pl3tO<KO;B
z^8q?Sr{B!kGdoq-hwfV>n+w^^M#_ooJO`i$K6{MuW40Xg)nVH7vvQk7Hd9mm)&M^7
zROC~&{Ifo@ZvV>AuPw|KcF%%1`ZiS7N=GP(n_SKsS+<6fG_v=F;x+R{mm+n{S_=s7
zqm~JSW&O?a?x#+8NNoq&-oX<N;AZs{F=7ZzumH|FrQcl}5QZF_SPC)p)D@9{Z%~_~
zOXvRqWI&t03y14<!uUw`@KIz<aANqNF^2IHghWHFQIG&x8(Q@>07e)Asy*jZqhOif
z<MaX23|(iMPvg7?)yg<B8EulB=as<s@~$b=9Itz&ZaBMZR^~-#SeJ7{Dv!*->&AL8
zN!uI?#Vvcyxn58?qC6$*6}%|qX8;P-CgmZi=BSAMj=*vd*)~+tJBxCeZpuj&K+oZj
zU^N|OjTbZXhG-nD3a*ZYvS|Qmhd^Aso|Nlw*9#A^dOY;i)6MkYfbd-io(jGfQib(e
zP$^z4-;c|T`-|c+0#7vz28PfGz`Bglk-5BFDGLgG9Gw>b#npTPU^+T?jrD-CniM;c
z<n`s(b%!+#7|?(Qwh)^Lz1+|~!<a_i*ziRYgd@SdWIUu4o)LWX3yut%JdeBbPzFpS
zp%hp&16R*JGf>t3k|;zJZ&#7fR3Ro)yB>M-3%cuVin(6`+1%X^kWJlhL0g&A4J~KB
z3I#^x5V-4|v}k$`Ss{c>*AeZ)3Fk63{00oM-}SG<6NmZF0&fDrKoHi~fg&i{mf(wH
z&NgC;eZJevO0l~+`PxRdS<=f?6x-mu#xrdH@5N*PcHHUbG4-?Cxue-XLXE4)Cy<I1
z5;$x5Im%6>yIWn0cR0ESP(}tnn+i1z_)7s_1g4ijjK`>Gj6@SS6R6iR2Yn>H`GkW|
z<@{4%t{vnNS~F^(!;Z8=@USB<07r#%>@XwnhxFPrYjy=AM7|*1+!6vOXLWO(Vl6cT
zbr^8D!IVI<(hKTeJx0ZGfOI+tPvOvQ&)q;Pj$8Sp@dPtwbsngNKv9eVie>0(8xrrb
zM-aB=76R}SIY-i`g$ate1~(e$#4vtj=#GRFz3S{fW|Nrp@qExi^|5_&uca=sLN;8K
ziMy-+Aye8Rt`KEH&)42-2~=x<P+S_Z!H9!qg>3^~hPBkW`Q3Ol7zZYkVK*UR)gn$c
z!?tCqyC-Utv%6yCrIQ`8qf{telnz0G7l`1#E%w69kXnFx@Vi6#>Y@tCAgp%CLz93l
z;y*h71|y3UnPz3`OC<5I_*neF^&EE4C$(qUNWNyD2oNcK1F(N{i-PSWo8qjZ2qrvo
zbL&^eLE!bJ*O&$zw+3v<XpNrFm*u!pX;?6o!#uuGt}G-S6Eh}ehn2&^Fx6vf+&Ryn
z<MxL!^vDJhGxkSg=<Q=+zO_GRis*+=7ayh`x{t}b!>jGlX>eTI%)5oke=!j|s5YI2
z%bKO(Y7Pd2urfH8vG1Kt$6J6W1OWOiol{<-$)G9hpoJ|hcJGss%^iWTkst+|S9ohd
zCQAXy^pgJW17)G7>zkW9%s}o-Kat%j${Q`E$}s{uuyG0*D06>ypJXd=hMo3SheO~l
zmYX^sBJ!s-jR65+fFJYAMV#x}Gle+!mS^j7IoYrP{X(46+#L)pLN0??pqS)uU1-*B
zBrmkiS{wt+U-d$kVsX7TRCo8$lXMN;v9_Idp;m9p#gt_BzMsA*Diba~dO-%mtNLv<
z#C(d{kt-Nno6Bl65sA+ZHx)aKMEO(FAfX}@o*>jZz<Z<#ttb&4FYP&m$*Wj8lqXom
zM@@N2>Q~(s$N{3Pp3fdlDujx4t*J0e-~t{wxF@IVCq1`K{;4DuY=`}A{oE`t4mLPf
zI3iZ7qQ$_m7ykUd$Hxzor=me<Y~D%3z3mX3foH}O-!sT2e{%yVY$rBP1qYXER0C<k
z+ntf*Qlx;hu#C&bP1|>27#+hvFwsY|gJm)(I8F)D{M<D8WmCH(=;;u;*}w_{tAd;W
z<<GD}#^Bd6U4|M9-%uGMTqTq&v1QU$vbw1)^dE7h9p;Tt23C45iF9;Yt&soob~<L}
z!b$DeCf`)mLS+^aJ$7Ru4gzuOZmf~YL$jftB``J2wEabKGqf+ESD8x(>(7K+unAy`
zPGnK{LC2BnQT@S=Z`&Ev-O!Wn3Z(eN9(tX{-<wA$b_($7;IZW6%>VK+Ce%%JWZjRk
z%dkGB>Sr|vr0&TP#(&Il<8jGB8-t!G9`o#VBTo)yd-psEV+DL_1R0%tcnx%I{!<(4
zp6Ds7iKnz!yX0xpRquVq-<)6nzylvX;NO(6NC{TJk{y1F=^W7+BCIlH&{QU?l$}ZS
zsL#2B-fMRo1zI2C$QP;F&-Ts_zTf*nc>WCyL`TBEVUdff+dLgE85MyK4d>|K=)}y%
z4-(y6{^`N<IJPudO<@-%zwi8GR{diZg!{Bd8^EU>O68sb2Hqz`TY<lkM|QOeSxKKY
z1)8{F!(4#PakYYMA{=F;6sgMI>>`YCCBx13n0t5e3{hEhqsWaz!#BN4I3UN32)etP
ztBMdJCO;`k#PxzT^7@K+ub8t*(C2=dnDr8Uc);D8#-5cHGQwB18EXAGJ3~^+*d~l7
z?+hND^ZG5E?LZTrfDDp(O8kS(r5WrHVh5p0*1_tCdX;4r!Btb(<kkU3k68FB(su2j
z`I4Xvk}eHia;h8O8;i2>kO~pUFs&~u;>Lo!J7_8q*tk{gY@)Jlj_h;kH04j+is1Th
zbuS0ediUZ(qhY_iF9KG;MKh?A6q1sbxU3EqNP<ntp2jC98dPFN?sVx#M=lS?8Uw=R
z9g7(k^{h0%=&Yl`;S0_jCa+>4R>6CsP$u%AuR-UK5^}XBNffh;fesnmd^njT0T6Lv
z-@i?Zq7<M$vT|lZWy~WcDP}}~8Y4Xqgx&JzV?(uFzj$^p&9jA(UYx}epI-Bpr7SIQ
z`a;hrkF$6FQD{J$Al?d!@vc|$m4=|!#LQ^7yiRT_&M}7ODYC|(PC-U${byVs){%yc
z2xcVz%e3=}x&3Hz5JVOLW-4#>1*A1|4dHGrooU!Kb^)pZj>9u&(<P<S201jVDee3+
zP|!$toL{f#&X+tmOt`QL94kXT0|s1?EI&%VDmF7LF0sm~We*KQVAu$C=YsUzQPG8I
za|t>pYB14>>CrM>ttp>eLg7JCea+k=!g!}*M4OMLX!CysdZ}=+H0jI&eyH*B*^FLF
zK26ecKgZb&ab=QEHGCKfb$D5>$|=E6ccsl<+-syOBnlTa8R{9~!%LQ8LR#2}kobe$
zZV_rl5l2CqNUap8T*B8yRaP-drtJY*Gy4HZSz;}8+nx}cT`Vje?J%tj8Oeu8?z;Hj
zzIV45v($dT^9PnSc+V1ei<CprCe><x{CGZcP-7v>zI6P$(LdX4z6>)H?k}Is6KEZ=
zLS6{Z(YMvz#oXM3L&+E!8rJ|(EGh&K3$PbLp+ua9yiV*A3!4@+WtcW2ih+-TPiC8W
z_?N&L`C$Q2BE%*Vii%*Pm=@n8ijvt*kx%w5f?Uq5bA<~O)hvoLv#Mr&DY6Py_-H<<
zrWAl06hEsfBA?f*)uP$??6a$Sb-lhAm_7OI!iXP}^6E1{AO38yo=!je{PV}~)C6rn
zb}&FCr{}l*WOU+f*y=EBpaR=aE<&#er78Q+x5$A^Wn084ZY*Oaiz=$lJjx)kr78Ll
zDy6y0#f&T|K-=(4?D`-jC&$CHgX5>JM;Alov#tyU0Ah|afKZTs#!HEEG+_b`Ht!%}
zDV<Vl;&EZm7vRQGU0#AV(eMgpt*#u$>Tqu(8B2A!N9pH1n-OeIX-<={(Q?PIS5#|w
zE_6=g(MW`dFii}V0(czcqJ^jcbTBjS4dk3Kt=Ke2v9I=PV9SQ!DP#tCRdvCM$W-WU
zp#<AbHzFS|jxEOvwChpe({A#OEQI1(X@d<E(<u-MrP+yI%&MA8zMi2AiVFd-h6xmg
zzpIxkBi$57B_x5Y25s@3DK$&d6!ZtS=~y@ElsyNmlyB!X$Udkxa9!b|wOMlhfOdlN
zatKlb9Bksk466<8A)BMS?z?7tuO7cP|MUCfLuT8df4bbh0xbp~mGjesy(7=ulo+Sy
z`Ik^0MEJ8C+St^eb}r3zx6ybp04y`c#!Z0sxR&~?%yN;5dbNWH=2q>^Z)?f9g<y8Z
z)POVy2^+G{b|CCVal7n?_$s?<HT(r{fzK&C4M%&~2yaEJmr$RZ&{z`GNWeZ`ID$%e
zSSVxjUX3gu&j@bED0%hMI$l1*+;%s6;ZnLY3o@Y^g3OAShJ)*NmKPz&TFxLCml|rc
zue98~ElL@lfDnMJvrmz59;lcmp5hGLNimJV5OT0*@VRNOAe}WpoS{gYg*OYM{ov8-
z#1^y;ds}vYe*DYM&rg2oS+Kv{#<eem8D9=R&NJzlaz>uioN8`|Vnb5q%~8H~fx`I)
zPDaPf@E<cX#edB14``Ssd#z*es`zVSL0l7J@=c({;q+lcE27q%@XO{4)1@J^Xqx$Y
zSnO#~CCbFiNzWH}P@KLHndPdSB^rsv(%XuK*7<sA=JU1+gfzQaN{x|IAhhD^rKSF_
z5Na?2^vm46gqYGNJxog7uHef8%BggrOs5>tIlN@e4uhNZQTS}UoDwuXo9-Y<Uu$kB
zB(gTKg(8z}j(Q-4&sHi(;i9?(k(yJ22)*E}AWVe8DV!&!+X`XAQX`14G^ok80znIT
z2EjdczpkY&1$ywFmO5vi(~Z54rB0dWbW^RCy8GJQ1B1Ja!0m*XYTT;cOa|KfDPjvY
zo(H?-mj5L*(hPNmd@@f@zCRv{uZX79Axvrua$V}=wVysI(Qc;Ji75W!3Yo)DUtO>0
z#fH!k^!C&hb50w<8=tgIwz+L&t=(CB<nLj1l>CE{)lq?jdpGEhV{H7@tyIaY?H`wY
ze|9?vb0>U`gJZUxgtzoz@jWIcLNbQBElgO1j}T@vswL|X;dz1Il6lPTAf}}$BBd>v
zOdASWQtw{ZzImJPzvybSS9%M6nHPyo9NNOqdbf77MuvNh+2};m&~iVeENvr}jy2TC
zXNqj*;j!a0vFD#b|4cCZ81awvca0-VQ}E%a6Xqm4bo$}dlbzSWq4*kIe;P*2@U7i~
z{fT$ca3@48!SjAb$&rG#xHslyR4Amv6mcNysrfDIMYjj~Y@NYp!^X>!;%#;gm*B1r
zV+^G`(xFq2ALZ22R{l!o);69zxu*yF2Z!GsxcCm0cr_005M(HHr@$0H-Fi?BB%N;0
z4>WMjrz6VGGn?Pq2MvjlKpkOZ1w|A9ll2v$D8XMPN342ZFn_N3N>bexh3G*AncvM}
zGt3`lla&UR$|E<-w-81lAtuu<HsF6@8zb9(kjVmzI0w;P*I`=WWn-QEKZIRU9+GQw
z*PINRNTKaUPJaq@vGb`^-X;%tG;aVqAw_Yj4OhUgXjAe|5dRRA?zCV3<Ei#&_e_~M
zaO)wy_57t|v8@{mMSMsM_Gd5do>p%^t5#zKm*w13sufI3<npx{l7)mwVp-WC4@yO<
z3vUuFp<4c2^9fBd2cgg5FOse#uvvQXXsWBgr5C^_`2qwumLNUi*M!a}e}?GB_^;>$
zwY<VxXTFSlFpzO{t`w$BCY!Eq(4G<t#+W@uX%7rC`%;m?xSR<J5A>gL+R!tnTPxLF
z)qZL)vAQE#NO6soKjf>(B5z;gVi~#tz$fi-k?J98JY6+eEj)8wDb;s#*Q&sabv>Qv
zllN(G^yDTfI--!xk?vcYsVmb1pap-($&q+IS~v<Hfi<4FI`i<4_g!#J?04`dk-k7Z
z*%|yeuLIwfS5f9Dce;S8_Vm4yLA;I^T=%7Bs%;0rTV_^U3mt%$gxnU(d~2%*`-D7t
zK53JXfb^!E>}+yNw2I53K(<7VqVy%8<Qn$9m8K0;2kBFF$lwzu4<iF!Yx=O%`xzAR
zHuZfnd-W5h`L<MFUJs#<m6G@2^AK#ztl#2;LE*wE2``P5>t{SNR9P|t+cu(%QwixM
zN&fl)qtSN^h8iLqXG>iHk5smFCwZp^L+}e4Uw7jppN{5+^JN*j@SK0d(*)QBzr$Gy
z4%0RVC~_UuyPHZe_D9`tJxEVRAr_)tMnO@Ikr`38qcCK^V1WQ|Buf@;b;*uBG6I5f
zZFM2pMpQ_+jM!?VTr!O13=p8419t!>`P~uY;5;+(zEToPp!s)4d0!F9=<4Bnl0@PB
zQN*vN3Q&n{>#51m?b$+*8Qx2h8G3+>n}m=YVB=7>b+`EWm*6WbjtKyX2<T`jE&-`e
zhl}}QJuS123|XNCK5weKq_!tm3zYyVAS4@Wat@>f{c(HZ1urLCi54`Amg@B*WF+fe
zA%lta*UHUeJl;(Ws&8(@ZZMOWCC#L`LM}0v{%SmiwsrbC_@vTWnSD_?t;}vRLWkyO
znPvr&Xs++{OoM5Lk(Vy-YtV})<^}uxFQGZN(jOu<!+t<=5l?aC3ok9X;M`(SvIdJY
zgh*>x%f3VmSDn!O?V0}Ti}k8P{hkj^fGFYfm6e%=0!A`<HzczOk+=)tAMjn@dw_iz
z3JcIAqXwWKX(J<YN5mK|U8Xs;M;=4qFReMv=G-xkV?XpOb`U1F^JN1lvSvk_u_1WN
zdLH}TkImf%eKloloy^yW2ybLgOswL8v(rk409)QJyn=bXHp{2ftFe@gsz^Fr!OiX`
zPqilnoJ(%Oy0O{C(+AuelRn~;I)RBqb>uOm4Qt{6W5|=yg~!zEV_-<9DcmdmZ@k}L
zVw<4(6s7tYd;5X=s_pI}mk?dF@SdgowSD>G>EUUscp)EG`m4i(=TA3gy!N*;)(W@v
zl9xv0<4YwWhp{=L%^jjG#S_2d^wmh*pbj7KM8r@`i`I*YX|JmXF)&X@z3Re}g)GGT
zY@{oBba4Ll$#%QA{SI!kdmp-U_ujRSvtz=#=QhVH45BEJMM|Zb&>3(4eEQpPAKI?R
z{=PKc^&AT*&O_ERkMuq#nU(OoA{l8tvAW!|gY!*ib-Qz_>`))33c26WwdP_M_H=w=
zZsF0F2lm>+;}H5}zIyyRjNv84H>0?_MC|a1^~t~Xb%}W=86Y_<daRs>NBEQFYSD7<
zv$bT*PcrA+e&9z1@xxu4iw~>AUG@YTLqd~H;iRgc)}}?Oxp_fpfCDuRZ*pcJ*R4p8
z11PZ%g0@$$S+xU14p#FslzIGW%W9FR_fYKLnw#LpItrx8NWRuetK~YS_}Yh`BE8oh
zWdXx4N5wMIg5eih3Dy9<;+W0&4Fe)Ra3dqD`@n0~I%lE%9V0*i%b3xOXEosZL2MfU
z1}~Qu_RxG@lKWTRYmSZhs;06$)MyAao3I|Qz*YLf!MLs@$&dg)lB0tkmpA(H?ajyo
z-!DaO>=)JGYG73pBXJouH3k6tI+&KTs{w6pkcl^;1ibudw#idqXv&xiZ~ps1nlWvi
z->Arm&GW;^NdShGaswE~mi(~4`<U|m0(9`2f7$u7H9nfkA}P=Pfn+KFKqS}l!#L}+
z^S$%K{p}fRcOm|%SZX9u8D=`XlpC39@8EI0S39q>7a4)Hfuaua4NJK6KdU&9%@JRE
zt_079BFo+DXLDma-r%QoZHCO?ypr*3K(D6Z>7i;4#Kk{grw3m>KiEG%JUQNiABMun
z4u7-v-QMu|-tn{H{`0*v6y-J>3ccgDogw*t2TR{I@SI>iw)moWLSA&_()swcV}#FM
z?(auvt-R$Q^)Lp<qSMOf1%V7vzYqoHpfNB(xa7V3=DQ<5_zeco>-|YIyC3ItvS@Sv
zL;0cNntv{SpTldVy;o0EQPQ~#&wqGvkOflOB+rv9Z<A)By9vHGFU-mx{@wENaN=9k
z2yG5!C`;Ml$L`xiZD1A&ExVcq2@NfS;9dVWphQkbv1xEAU$L!;1$?$97`|-#o+M)M
z^OJj#3GOzu%Kf>VCx^eUyJYJLO11Ydf61-fnr!}-%Mt`+cw5#O>UmSYgRQ?VmtmY0
zl1)nuVn%!$zsuo2tK;QxE6>ZwBkbh(?ELiQ{`pC}SdtXLe>Kmr`-vUz?-~{nY<deF
zoAsM{guQI4<<~~E-dnHc`*X-3k^~>Zf6oC%ro_-SfXha%hEl6xQ}8=0u0Us3y(^(P
zP7Ma{#CLUdD{+i(&Ck^O7hil)%+2z_Uk2=))&m(CbyTLN6K?2RTWe`{V}2u5trBXz
zs;7ff{4UXu`c`tMxKwyD$hwi|O<cbiTZ=~T2Cg(<)Y_MK(pX;&{8O|ozb)%2I0;GH
zIPBt_AcxDz1pZ&c@|ROmJ66qSFKDq1U6{;Wx2TQ;pg5edoF>Qs1DHVgH~QJuP=D*H
zK^uX>9P0t^6-kdzSfqezHdV^5o+_7<Gsa`26f<}f#`YwU#*-k)?ED4PPNn#!p;bg?
zsg<f3=!4V;dV4otW6k%N4xk~>vOb}xPIzL<rEG>8vah-P_TUE~i+_`2KVPcz{zb=A
zG4o2~l+66Kr)CCfN=jCWb?07`!dXi`yf9%Jo}9kecKC<Z7{8aQ*j%r-H^X`oB8`0<
zy|FoJ)O?$wl1o?(8|P6mL}YmQ^g*U9{jgEl$Ta&^nm!NAp?rNmNt}Z2b~6MY8ceD#
z(#2yBDu^2u#;emVVRX(NDi4B3$e|wuSz~@T<pD4-bb>q?Zy++>N7gdDJpT3=030@{
z8Dv;#O@epMelul*+OE(v%t{5*$Nsl_&kmBuOA}3=Ub{?7^V2+VON_RR0EPXu*|bH*
zXYGO37ku@<k3ltldcN~|la#hu(%w?{#>>)Ek;cTOBwrc&Y6Pe(^IL)3^iZz?s7Eh{
zw3kWEJu0X)<|jv--(BjQz#!Va<RD~oFZc@fG^U$ed-dcro1=|W#GlkH`~=XSXi3s<
z!u@Q9(gBlf@7e=xnrKlk^*SK6S1$c{Qr#H0s%{Cz!Rf*A{sHQ%%-wda3<7WGUO*ks
z<`hhZDe5eDYpv1U`AmSAMa1pdOm;z2XpOP}J%i8~X8C)}PhHQht7T2WBSAY(6GH~G
z!{QdJOzSt5gWg<G246Ux=A=!lRPxkI#M-}FPYoA`2qD5YX6Pa>{H|KRAj22F#SK8e
zn&laS#7UOA-J(lp?A7bu#;5zfmN!=l*6{G)`|W&$vQ$2j)!U}_>hbF+7~)SNGq;=Q
z5UEA=mA!6;5eLz4ORu;;rn7YZ*e0~hFkQ3lefjap&TDg$+BnUO4Ks71p1ihJ&EJJN
z)vAws9MuemmSpGNkDK%DK<XNM&9z!JKQ6<SHCI^SO2+u7WVnWX!n_%B1{<a~-rth2
zk*+*w1&n;)@>$z@(df5j=iu-M1IvNcFgB<Z3b4Xou)rPhf$S3JG}$&yEJF76op2r`
z1>Pu!JD>bMrmivb$b`L7uyw$Kc@c`a2t;<)iruhsU75nUuRP@pFI!GIBWy1vPB!$m
zG`oj+d`k*HC!o-aYIR#7UK2A;-yLc0wp3B2pB7QmqD}*TT4M;776(8TqA*r&*`h1i
z#t}WhOmYbh%{62bqNPRs%Xa;8+be}bFu)dHP=moxa)Z!H$RQoF`wm7%GWP~^#%*mo
z4$wo{e*IqQh@it8+I(0tVwSPbQp^J(0PV6&N`i?NgtJ_4xe_9SHR}tmICug1AZ!>m
z*Z~wGCdqXI6ydeYEd_hs2Mz=6QVh1~k0m&ci)Do*%w-JlI_}mp)=naW1E9Br`0T)@
z#%_dCOGyKCc_g{jk*I2*wCE}g{Xt|ZL%{r@k`Jx<5wd(!hwe0cHzH6VkF5T2GCwz6
zb$cVc?w%7AwPH)E=2R`~NCmiU%W_gZnqOX8+JL6MSq*#<>GmOG$b7~5yCIFix&gkX
zMe2>1yif#Yq1RDS@^ke4B^J2Ka1!6a9CUi`AI`rvY|V?)lfVCv$n}jc`%vBuzSG<T
zXH_P1ru?lk6n8lU^uyFRVq|C@e$YcyAT=8<sx~H3R^Pz)Oh&Oc^D@N?O>()YN1_bz
zdiJtZ6mNm=Uqj*+vW756FQ+TU>dY=kI3Tan(LUEBikTRr8drKusSPU_D!Mpg;9K_d
zC0tE7BTm&XWxaMzz(x4BZq!yckPNUxScYN|iq|2#*E^z!T7u-!tcXB(kAfj^-XQP`
zz*AgO0#r8lNKO>Eh++5o_;6a1d(dUC_cV<L?#yM^LGlcIwObH<SbaVyKCA8NMYhbp
zo^Lxk_zBkCks-|^dXz%{61#ON0^)uQGqC4x2k`3IFT<s1xnzNSemvKSXl|>ex&qBy
zAmd?XbQ`4Q<Yi#blP~wq4vPQ$JmC3n2`1mnV?C>yak;35p!r2;)6CW=bp2GK*tWIG
zk&6V#G`t$oJF<!3^O@k$?b_*if!F)jm02mcKohWs(p@Y#pRu=TxmaMSS53Mdb-R?H
z(dieRCxgfEPjrj}bCW)w_vnUe+@mA6blY{~%HN}t<O`ndFP3KY0FrGHR}&tL1n{<u
zICth!e)ajot5ze*Z=$mPFnrHe>+9FCYaC5P=95}_`7+i*L}mweX`-}u{<qzb;ANjd
zkdSlV1v>3$k6c~%x6Bjcat0+wE#G&&dwo`|){6i%$!{OBHUu+o0cfE10@7;z4T9N*
zr!)i@YDt)qyO-3k`?lMQTHETpCR$D2tnP*;!$YE(6|iooU*kNB!LkRoi|DZOS*Ofu
z-Q>YmJbPwkBmHq;dCLycp0CU>)%gsSV%zXG1I??DMEU}5#EbcS+U@nXY9gn4iqA-%
zrA;rZ4oDm<*12S3*=8+d_3`;B$asgx=fjgPzd6`HA09r<Zi&5Cc2g3UH6@USNfJ0T
z$tMHMF4vw$j)m!lt2pKT0tU0J!L1|Oad>?&AB@u0{rL6<o$45>$Nbt06KI<Mp{MHP
zEr?Oz1cg3bv#A-y9_H~tV<bAPT~IPYhp8T(KWGguHaK9Zxo<;j^2|myUep>C5078G
zJRk0#9DjFkdaknNY<heAw!8?_4MDLXa$<(|ZnCh4`ds~*DQLokbtQR*H{w1x>gX#c
zH+CnfRL%}g%^Kgk$G6~=Dqb3irzGdhup2v)#p$-{Nut={)B6Nwd5{50zojcGU`lLL
zJ%0f)PA9$t55KcJBcI<4I1KT;zNwDt>9lU*G;^8UG4w`CdZlcFVHhA<y%X<f4*M-A
z8{`d4HnmY*l}|JniiP)4`zXPUV->9OB~;spy5^ANToF|<zc%{3uKj#Y;lTW>;HXs$
zU3`jbNg2IFq0E;Haf)GhqMBj9Y0y8Ts2J*JU~nbmLIx}PovqttxKWr1`lQ?t8HU_y
zvr3Bjx;KA1nM^S*Zlg^)@*q0iSA)<hgl}N;Lf@2_v-NoL@WgI8j<)P8$$p7vO=pVK
z*)>8{%D$v>w~6uOWrdq8dO2%oO}IH^2tg~~@9k(ZC3ye<lv&M9_h0+}L?iJf-!|4%
z^L!c8{FeK=+qUJw5@cy1XrSP60;WnmCA*#Rx>?O{;Nj{7IRbI{z${|wA_6ZmWEcce
z8)^tSy2x+8F$|Y0P|cdPVdvag6s;-8G~AREjGKU)j}@$?9rW7Mdc1NOtySm)e8BKF
zBP<?_xNs-n8RA=@8&6e^uf?K!GoP$6u^6&~NP1-G#9gBMBZMxL3L66zz!#M%UeA}k
zn8d@v?Gz^q4)CDp86up*w+@P@GOL+Qu<q^mdRG4gh(we;##}Z)xmeXRGks`)yfnpu
zkNk!_&qerj(WpJ6d830rJ`Gr_bnVrXUV`rLMP7u4*@kY&&R%*<KXO7gH}7^Z0^$R=
z*<%j)p8eAU!>SF%Yp%6c<QIo5&Tr}bhODUdLX3Rh+T7jZaIPeO8Q2Inu$$-r<keR`
zv}&+DpCTYc5t9Qfg?ryGKAjj2Zw8jlCPx(Tl3;y2`w&nza!riT&gNPfrR_45B?&F<
zI%86Rwd8_q_-*kfie2;d3m})IFE<W2_5jlf72IF_>e|xm^)!0*r2p@)N9^Tlj@M*8
zMx>_^Rs?rB^L6s7>Dle!qW#SUeKAZj`t8^PUyQuIWi7IN5QL|yT?1dTATl-bS_it0
zBX1}pAkmGLX{v-3GW6LtsPv-YHQzfu4N_I6FCv$tYkuz=?v4wGRZ2-TI><2LT~V9u
zKd=&3x*NA{>%`Oi>+Y`^Y>`6>*n)uQWU-R(7k1crxTP}xo|zgACD>f4TjwbUZIX1o
zep1rsfV^YO-wgaBUyR}l84Pp>#GDbZZzG;Wl(0jx-g#S!`9qsz_Ry~B`3^nNr+36p
zP!_!=^LS-A=5RB^<h<L8zu?0h%sJ8l`YF?~&17V~&Y>55iSu$9_)<#~h*lZ%NG1oL
z$G%6rb>e7d`0slk)&GyqVos_}M=*kE8&mS4GJ#h;A^+Z6IE-bqfATYK&n~8eegA{&
zva5fjL#vzdoP2+av#e9Xhn71v7+)~?Q=WKApZ(`J?y@T%cF+x1oz;=H@(tlZ=!&00
zHR*{#hm@wIJ}`UOl0v8DJ!##sB)aB@7-H?^PkC*=sj5ZBk<w>0Q`BRqu1X8d0OhLH
z=&GA;L<p5W4W)c^oCE(gNcrliVQq4=qKh&6c+Wi%guHY|4hJKl<w#9;2bvOsk~#+z
zo8>Q_*<3t0;Cr~D6zJwWPS;KSHq5rP&FT%pfo`nfjT%eCIw!Sn3BuNv`YE)^F#!p6
z%*mzn`rq1t2fG-g8q9mU_#Qx+yO#3Z+2USe5Ygrxh}%xTvFaYG+(Lb=Z}IQVQTc|R
zT2e^xEKOw>)r@8f4zkrY<Vt&sqXxi~<Lr<2Vn|AF4N%RZxZCBX;EYEFi}KY(;u}~I
zNy*F3T@UEPe>EO(1x@KlS=p@klGn4h^EWaOa!x>Nb$wG~KTG;%HJ=wZ0GU8Db?>pp
z{Ky1dz(=cPs0q@`Bqk2eu3q5o<+7@JMG(~+bfoqyr9}(Tj^G3!P%!QP+tLWbi*g0(
zPIfpq>tyIWYQG5ET~LC9cqP-}Q~CxTBSbx3AjZwbe7&-eN8m{?i%$u!is^pG3wO3!
zDyXR7#V#r5`1o3P<t>ZQZb4+8Bs`{Ro;wBD9Ut*;*8BUio&vTVr7FeXmb}KQb5*VW
z@sCfxn$J7taNH?%!_l4jV;_D}|9)8#W=urGFA#>^l)zwd86X_Pn{qY2HUlfKN;n>5
z`I)V1$Yx$&%}Uc><r{#`=TwP2Gggwa2ZL(lZh&#KL%$rGFS}+Aop_zx2|z}dPJwN;
zsS&hFYv=8IA1M$UW?FE0=nt|{e5?Qn-gLr263`lEpzvA(%znsXIPrSTHJ4CR;#X=i
zP~DwaaD12A2$`s&+TQvbkLAq@)DTfGp;*w;$SSsK0SpM|#eF3hYS?GEM1&fizSDsd
zQVjN;)@17ouJ5+J)=U@v2d}G2?F58kTCQi~YpawA_=%=ZuFsF*8CjY>u6H|~KHYo!
z8<3*;V%6-vdhI`;le!wtXNJkIZG?=3Dh(U+I<8YOE4qXcub5&@N<+P8_5Z1c@;y^7
z^7F%?z>mQb&UQ@D*RwC+@M<|<FKoaL`K^JIw)|;)J4uYn+^S(?He{M&K%e6p2H}fh
z{L=wr7&Iq9Y>bfRPO@S$<t_eO1aCG_Uw<*;kQR=faIdPS!)H}h>|B}=?u`7+PPOir
zFdYC@2A2cZ_Kae9ftjXv03UzoARR>l2Z*9a(1vM7Or6A>2izy}+MuGrMQRq+m`DVo
zn1T4cxUO!%cq!$P(nnk_A6*_q2?o+Ra~Pk01K*l%Pk{D0?w_4G%y<3^R5tXKBYy*7
zdS<Y*NAR{!%H_leqwK#A`Cl%DgTZmR7()7hk?|7QfK@SS?wWz&VlV4=Bl7LQJg&2f
zEWkKSuWSEo&L%p@!m{>!M`_yjSNj-db|JBPbiH?MVh*6r)w|WQw82v=h!ANk%2d$}
zElsX_=Fz~d(C(*87(TKENAAh-uOdhVBN^0a(2Jh}OLZcd3kN61Uv4EnFQyhUg8?6A
zwvIPZN<+I1<@YJx)UX-RiUmY&2O3^hgcn5LxXip*!hhw>4ZsN1E>nm=y1*LHyBGFZ
z%$vcadOI@@>%aw^VF2o4<pOKza|vpYN!5&(^#Z}}O7s3!$O0^pGMV27#rLo-V1vPv
z2r)Qx;#YNGOQ+k^Nao>K%nWGyM_&-r=AUap;^z#Hs^eTBs(O8cyejyESsuuLU!|1^
zzARXTJl?9e7{)6u!NC!Z@QC0{16LwaRyiZNFj$tR=+a{Md7D~a?&z`a6*u+OHFC%t
zMY*V!(>oG@CjqC+Zpu)0O=yl7k3Bcf^$juF^^C%<ff#`r7WN<WnT1;s&Z3VF&Gc$b
zOVotoqn@OELn5Jk1p7>j#f$LYFmuD#Cr1aLUDUJBe#96{->=UVK_-;Kxj9o7o=EML
zNIXnT%QI|D!0y8Rsh!)Q@E|^S%Vz-V^M<qA5j{32dQ8C)ow8vt^AEq#F-kTk;p&Dr
zxHS(U-9R2w5PuFAD0~d}SIg<6{m;%0pB)~bLul+LE8@?W>&42obI7uYf1C~JZu$Y;
z4j=+>fz7&@Lo_w~exC(Rkn1igcd(G#Y39Lg7T_XSp!xdhI@%t^MFu+}&Y;jegzAE9
zM{GO2ndF-4^93`E83aL)c?k-ML`{Y0g|q}AXXJLWBQCF;Q~)_0*i#Xp@abpoBuR?`
zu8QGy(6t3^3Cm%6F{7R>Z>%Oy(ZSNM_0z>GrFJDyU=oMzhqH+zNcmn2vd@HQbO(*F
zF^VEpwO9u31;uWwq(sr6%x5b3$O%V%&6niqCdfp=@BK(53JGocQE-w{{3k6p*`Kx)
zopN9J)wN`$ekS?K{NLsigf>W)%LrWdCoh;t-#M=$N9GP);4Ov#K^868NTq#^kdmk-
z6!z+K&El_uf9D1wtmLkTt>d+t2oQ@y$x+Cr{*y+i){hD?R-^8<KtaI2x!9)%4<0-$
z_V=DY|8j5t+v4oS!T#Y_hx-B{6gcsQC_<p&3XHVKmo*E+&(w^1ZGInD1VVrw@n}47
z8t(^oSO{&vX$S2az}U{(%z&@YvO?+>s%h;iFe2TOaAvfpma+~l$bKQTk(jT0B9xHV
za;T)a5AH}gnJi(Or1;*`r>6&JXZMV}h~@D4ROIma)Y}xG@=Mb$Yzgy<%(<di7sGpO
z(1Zu^$EgrF3&;T{0GqPBa`dwOG0;Hr;VY4G>wY0%zcHRetG&!$vjM7AFkKQI$reM|
z#|dn|7b=6?6aJSk8T==TAg6%uO;O5m3^+HvMO`lGS8tjr*z#7@yKTBxrNiN8(BL!?
z+%0aa^3Afk9DHy7J*%`~>|ybIZceVGx`ceI%^ZX!^UT+X;t3eCKmieeR#8NJ?Y&48
zz|#d+_xS|zuB<b!_2p=YS6!lx9kn>qX*Kl_^$9`r&N7JOat)FpoU}<*PAQCG;Lmjm
zGuSOoVJ4w>L){F;;A)Ea359h5;GBnEDNE8RpeNzI;R4u#%b{(OO0C7lPr~D|Eiowo
zVg)FvfatrPb$bI%JIc!;dSvgi%g+D;5zVne-xhrw`|eajT50pY;Mmd8qE=jU^iP+&
z>C2zbPWHbYo}Hf_>>cT41-Dw@y|~YA+_Ed3_7d_64~&33>1L-90?dB?XtiE2zfdpU
z{%61VcmFRwbPOz!+%Ugevxe{!7(|jGQ{d8G0oKaU-F0Y9FKVhZlN2oa#1#!4pFrFS
z_{^IIvR-xe*nNy##+Ej%#L6ZHaAK2vdpdDmQFBVglj!HizwG?{<d>eGi!8C)UL)*5
zW$$m-3@``AsbkxqT)(J1l!~MB?xGSXU<lu7D1OJB|Eip=7sYz<-~L(Uvb*Ls!4F+Z
zTTMY5=8AZ&KjjQH=GrChICE<x?QB7Atyq+7MgsgSfdEHyfBO^ELLx$~xnnL6ff<({
z+oD+~NAR9n(*RlnM$puV$f{U69=F;MQ>QgI6@9_;vyt2JeiJ$(P0J*h7dJXWJ-z(}
z<6vi?sxd!wz1%5iVVPdfn^ndgGBba9o$V3;_01A{HwWZ>P-TqbihCDDI#Fz3QFt#x
z;0J@gD9}j2m&!pK4T?S8R47Jiyij&Yfx(Hg51<slDu;mFktO5(lqb4V-5U45Wi$Z!
zu*wS_J8*7%z-il^m#1Yl3tk)CHM6uyKP>MtppEVRN0I=hT}V4;^oqJOVRqjT+Rh*<
zDn$1NyeKCXa7c+)(|?hAy>QS}l(rQ^VA7B$eJ+ZWvge(`0tgr$e#mkckHCM}=+yxU
zBHW2ocFuAFdY66y(CqXRK4R5`2rjq)(LZTtTstzzB7px-7Td^d<CPqO;ROr}528lI
zE4p<y@S&8BsjVz9{h7=-U7OZ(DEFja4AV>qjW^&w!m+l@7nhA^o6o5kj0OOvxK4UA
zY`uH*-aZ8-!E8NDyJD{H;C5Lqc<nX}J=_3U_j*zV;WyrRXN$FCjNOXely5397j-+!
zi%z<#dILi20sBpU%+YvpB<Y?u5l%aq%PnuOH;_Pl1TL}be$VGX_17T6K}h!%IJIsd
zZ>h2NRj0e#j5D8=RX#thYqJ$LR!2`N5O4Ce$!)o%>K5|AR<z1{0FnJ>!hKt!?Un$Y
z^v(q5H@zP5)*v}=*%;akEVY}sU6C-c^<Whj&QIWcv5+&8d+|K93U~jwctjjlrsR#6
zrAA$5VJr4C_t>gpq@h#~vHCB8pq8h#)Iy9+;k4s++H~|{9#(}zkawfw(o-=<ns@WL
zEDn?LgTl1N2^I4`pQ;XB8~MUSt-kUKn1XIdmHhL?8LDN+BoA!XSsp&=YnqD#4yA$l
z;(o$Fv1e9&8hDh3mJ0EA$O#UhRD&!!1it3fNrK;@trG1!81l&y=l9giSMAhdS;<M1
z)yUqut#&sQ0pJr*Ijf0F(`X~>GpaD8aKlA_fm<2D1uo*Rfv2ErrFvJ5*W}x>H$}h?
zyb-setXs|5#iR;6f?N~J3edqyBeTLx$@k>8$`q!C1-E33*uHQR(Vq`@Zw;4ILMEWa
zSdM^C5QZn8oH7$_=52-B$^{Q$o1)LJ;z-b@@=3L1giQpW?MXwDX}>RhfRB2IqA(<H
z0tiA0nyh8O@m$u3B-1Y3#>!`}6klu1JKjru6|UG~-mJF1x1j-7J#wUY^HRaYrABPE
ztr~@44+Jp?-)T4C`Y|oZ96Ql#DHc`aloL<x`yoR`##EiAXT7MF-FOZ7aUR&Q2HTW1
z(}=i%|N37AI{IN0A$324BBQIHRyXsv2o?2Y@OV(18?mO~M1-E+PJ%%tVTs@C`x+)~
zEB8j4PYxboQLmY>!e}olo<tMG$9#ZsaRbPHJmH}H{k6`TF16|<({K8|W<LNi5WQ*f
zic0D(ZF@zO*og2bPczZrwCi1*Qw)cfx9I)wiG~L!CW{UTCsvl>B|)77!v6PquXY|i
zc^x&uXUm&Gn!Jk>&-RncJnrcKo52Dwb&Jcgo;DO+bXRz;^!gL7`DD(w7@c?W(lD=C
zUy+o)rXl?zkp#h21_UVWS0|tO#gCL`^}Tmua|He=oy&>ccgPZFolGE&?9We93RU>9
zQq|6$#@mahX?B6<!bcN)w*lIT{yKojtA&|F0^UhJqzp_nY|pD88i9ip`|oHhrgXFH
z>E;sZxrlrMpXVX!txa#E+HQR5O$%MW^+7em4eYE}myiCdlUOGTI}RuGZ142Ty=Mnz
zHqRm3<JNOa9+YIsA#)0X*~L>=QZ;Yw)-%su6+aK->B0HS)8pZh`Ps~Hn+3{gjzqaZ
z7+^1FZ)PYPyF+aauYy*Uo{S+S)h7_<Nv_c7a%@pK#C!i@3ihJ$6-gW=d8}ueW2keq
zI>Ow|62mRHSOP^z->dgn(1X*{lhdu2Qz4@6c@E79U{raXBJm_vC+!m>5Rm?i;)}<?
z(@ZMRH+7w{3Y}A10x1%!N6?C?^j)2}jZz-!PCthk)sCa`P$giS!NHp<=-41fZu@k!
zlI~ZVi~}#|;;Ki;A;j+EcTt?>y>wGy3S`q@=$`@>@Y$8Xn?!2chz39g4ur;MS(;&c
z1x$uCf@G4#w!F$o2|MUA5~+iCAQriU1{gY;idb%r1&okg^M#N2D-l_|Z-nRaItmvy
z*7YkrZdyPNm@-e-A1Qm?t4YU2-cDLBFQYQ^12E=#=47-a#I+<a*2K+aAO^}80xiNP
zf<yd-0Ur*Z7HfJRKIyxj+i<2?`xh``Tkv4W!de$aK(8mRw&^A~i8%E*K;64n_!X_$
z>pqXC*Zq_lOEuHJX#D<ewQPD=-g-%mE$k@qTXBy5-t10N1u#%nHHgn<Owqhghuxwv
zGfDtZas~>Gz}m8fO&8+dN|57+=z*c!C(WuvO9tTJq#5XP6dEW~kZkv2X2ow2asv<i
zKY2CM3{TVsxQJH>vPMh}d>N|BKJI1mhY;T*Bnaxf6F~?GfxREE+`R$C;61Mzd*?yL
zzbIhSo%k-MV2wL>kk1@yCyhkC#El-5hHe^~iNE;;-edT(GuX6M2Zs565^14>BBO%R
z`!uiw@9IkBtF^HL833C-ta3;E3SS-?1~!49BR3cu#~8^N0(e=DRfvJKaZQ(6P`tSv
z@Tr+lj>!?3C`v|pp|7Nsk?U*3?rb<H!8b;Hk^qwgqhA5MqZdR6*tRg{l1Ho@!!Q|g
zw2IzroAm{7p&V^DsmCkVl09G}q|b&nkNU-EQcb(^dk_9#6C~mKxIeMFdenDw$Iwvt
z5(<HYj8|&3al{Rqo*sd9YD1dzFzb1n_cqRVxA^%N|If4P-HKEbkKJ{L8QOOrBiRfw
z8ds)VA_Qh{0%@!H#7HbFEJ_q(qG6kC@p3ZpYuQQ`oSLBM3v<eiAYzWn(!5KCtEy4A
zW3l)J8%Yh#k(Cxhnt4TJb_s(Ttc-lU1wF4F?zA6&bb9dB;ol8$iWNY-0~vFU?9X3!
zizl%f7B&?Qr|FcYA5tPSXmxZ9!K@8cv&nkiRCj7G+ZIWCd}61j>TBJ!xhn73G+Z*j
zT~B0zixr&pw$^M*)`X-62?{!ZfqMT1FcmD93%%&p%_WcT<rZxMaj*^cFf{PXvZ==y
zEn=u4%JUEC7sK5cf!N$n$>=3XK7<+g>frR?c>iE{{=*B-<}@vDE+%C`F5k$+hmm6{
zLshm^&kPK}R(|;Wo8j5Fhc85jb$D`oCcOU~a@4)8mk7;*oQM0%t7OgBW>szhR!^G1
zQs<{HSrKNa9cI`t>)J`;(HoBN{{c`-0|XQR000O8+I44C0pyj}(+vOsaYq0E7yujq
zaCtOpFKS_SVP9i!Y;0v?bZKvHb1rasRa6N8152DGVzzZLVs&^6009K{0RR956aWAK
z?Ob1P+c*+`_ov`FFVfa^lXf2tK~o@`b??$k(jayhD7uA8%XF+(mVBa|#8>QhznP&(
zk^18}aoT2q8YpU$GsEF<=EoUQ$_{(_%TpGnlPI~|XYWR@pZ*hDI^Ax!bDB<;v0#~y
zi!_nZ7`{)_42Y8}k>$X^-ta39zvEbhd6XtBjJcFTvOHzyt9*tnrn6IY_St9_Nj6u)
z;aj@EZXUDoGG}qhCt||JD>e~RzKrvJr_(vg1ka%r<h|hud;Yw~p6~AMb~^if38Q5D
z>^Gj}GZBmViv7+nMZzOVQ<!GyoCU#jnJ+UD1T2~_(ky4oB!Vf6AdYg8@mTg4AImge
z<|0rOoz4zB1l6W`9xwVYdJKj@!aU<)&c<8{GvO$q&T_V9f-`<4K-gG_1e&g37GNBU
zuFuC|9|$}tfH=<=a{t-0NgB%jf=2GA+2u1B{aGB1GoG!URe5LmJoY<zw%YG7`0Qwn
zUmXA!HIWf2Y}yhYQQ+rvnR8g;Q@&Wh()QR;{C6pmQ1o<<X{fRcMULdT-w`*VSmf-O
zWDl|omc~`;B`Ed0QK@gEP$5*VWJmeJV)g6iewfY|Ja<P#Pk?`O@P$O&0RN0jp640(
zS|rD-T<|R8D>YOk)!Mwkc5sItIRktFJ$-AO&k!ELCAjW2U+K{>WZsTH!>oJs{Z!1y
z0wi3DJeUiRI#KOM=vk#K&A!OeJO#5cF4cEyIMe4$?0og>8(IoA2F!iVlS#0Eh*2$>
zfZ#}iD;_Vwwwbt$B-kQ|lEo6qDcC_n5@veSW$twQ5T+Cir2H|4Ri0*EIa^<O0t6*2
zsAODwL=5uC_(+ej!4;}F?EnJEJc1DWWXt~qfE35+wWR1PBpP9*t&$1L01_ZjISKkS
z%+rMu0>1Z&3A7Kpo(UDkMnVW^lcWGj$)(7OX3<ArK`DSxfGtrH<v{@98&3ftc!nm8
zU=fRT55hPSiJm@wN}KdI<~Gbo*bmS{gM`loJ78V>(KQ4+Y?QKb1T&7KsR&mgU=YAq
zj+kqk8Rv=qS=tyG7n!(%@hoYVp<)-+#VBTv&Y<+Srl?$slr4!-&nh99l6C{^W6J&6
zb%qXtBYwi>Pr5xylxJu?WTA-h4DA(t8p$siB`hQW&4e5wn!<lfAr<afX&5du=867g
zzXX!Abhr#>1K3c&8Us!1!#=?r*RkZlDCA=k;~xYS)7p|HX^wMI%}PaCTBKa$qU%G2
z|0Aq<2^^lo*x(^OYUs6)Sstev+WgM!FAYoFuBulGD7FTJMipsjIoGtnOC*7k7r?af
zi-R)&C7una?v~L8Ilo7=<-lfY;SG=!{&X0}kdqWz0LrFP<vze^L`*;g<C#FHRZdc@
z`ws>NpaMIUdr86Hg_f>wD+dj@+nEu*h*fj^l1vY~ZFX5_?=jjzEGfheyM$-;+CpNH
ztc8xgjQ%G)hh-w-HEI)6CNRyaX?!bC^??I&2G8$nfN_V0tuE(LA~!IZCjF0C)h(Cb
zqDBB4@&aibwxX@VM+ploxQhbaue*Qkv#)!9b^DO7&w1`4xA(=z-5tJM`MyJsN&OdC
zAVejT2C1R^s*2sOUqSj`rRXkrSO0SwB_6T)ZX$pNZ3s}z&02yJx~!;#Uw4Q~@XGd}
z41&Sw`RIKx9E@<+1X}OIN54x5y^?)MV<bq1xSOW^IsXhcdC6Wd5d$s}vEMDkfPi2G
zvQ7?110xBHvZ*j_!5@tkd@^ao3TW?EX(5Z?StB*|)aX?ZHZrI#8;xMVqO%b}P*Jan
z9+OChJX_0G06~?rXl`bN^`j<xlWy9nAhKx}vo|<c+@@{3MGdlBHJ&YFd)q?g?SQu-
z&Q)#Hgmks(Jv7iw8JM8jT9;_YW*wnwNst1@8=oNXMlZNuhrL3)K@bPz4zV(HU`|)2
zHIO4vBYFjLLott@7NBUj_L)%dh35(~ng~pWJIX&41r;V^foBZSG!hW%cy@v*Gg#>|
zpXrp{rbmwsXB;XStC||~@CzQ~SZ6qqB)tko%$SkWU@VovFm}k7QYi%kK}1ly1UQaY
zjE9u#EwX4H<<S+CT~JzOf-7@r((5$)LWbuFSYx#ikkA*_`u@=91i*YjCqc<X+}QaI
zpL|}*{DjN=;5CP6w*wc#575r|sO`AB?BxLpfGq&Y8r>JVEg%6VMZZh-=VM7}1j=cZ
zKpH%$3o4avWm^Mb3hj>35(@9SsxHR{mV(cLqh;t*-DVR)O3;`sCE_~3DIVy^a*a+&
zlcp_BEP$2@`u^d?#o_zl=<Vt0;o0xO+2QGcJ^9no+NS##Aj0t7Z)@pIo*Sg(H{c<j
z560~NnRIJvJUOVHqj<%1>N-EsN)KRYt-|@?#mG?dL|%)`7{c|-z7#M-A<QfOuYptb
z5XVz8+e$jO3J7Ezg?c*#>%$LeE!1|#AD$c^4Iak)2o%cgFiTZ>y1u$@fr#_(xIAmB
z9SH77!UmCZ0BTiZ(OOnE$ih~PFsNVr@(fb>ji{i4vUCrMaqk!HAHIt*RW%|9kLz+B
zcyQvT4S`0mJrn?>HXuE#Ie;7PgfOJIgsi(qyA>qZra-A}`e+8w0k`iUfNywG5d?lU
z3yDd+0FPIlm7Qf&-Qc#a7f>9ETX8S$En3{6xa-2*S-5*~Dem5dyA^jYZpB@TEZh#;
z?mm0(d#)rS8Ofg~^P6L4j3n=yZ{m}yKxXN%PnPG!EM4jTKpSuSWs^It;_IdMHLIwl
zmGJJJ%K&{vs|}$l$MCq+=8N6rU0<Hb-0up=n<aEL%L5>6z1<RF%!K*SGF{+&HcN7z
z=<2+sqb~$;qA(t?^h6>XW91k)3eh`YvLEHjY<vk9X!*^{Rj^Hgk%Dn5o8r;Xf_m9O
z<akGg!XpRlbSkDp79UfNJ~>0zli>w_WNb|Rxc!0V@fR%4Nqj3#TtyI(r9M-1I@HIH
z=Y#J(sUajnHV!>s@~k!LLi>3Mo{|-RYd+Y)OVI|^!NV60!V{^7ctRh~dcR*#E?G`-
zKFj(|{MO*4&68wMzxVw|JMK?EbPyr7<-&6~D2_ghD}EPK<_j26nA5w0z56J*40?+R
z2db;{?f2WNI5E8<ios2iveIYpSW#Wtte+-CnFmdfI!hQd<I27rD6Z~rm_(~93bFJO
z=r>G{znWnFAfzk>J3P*swoWI4cMh?~WM?gL^&}8urSS>Q1`W`D`9QK__ByVHXWEhN
zb+A^i>mX~bkj4d+ssOo!YgA|XhNNA?<I5FKkiLyX^t}gfmP{+@3YWZpoLz4#(Hi<`
z^@0*yRI~KeR-6sAA%GQjRR5??A1b@FiV0RIjkYDypv{h}byb}dAM4XR!Y4IH%46Nu
zd3Yw1ZU~ar^(Io=7Xd>rJNl9+LSdVer2)cCXWY2baK@^|W7E2Jbn_{b-Auyw%io32
z;7>8a3^ewJ1LnnySucP0epkgNu}DX2BPGO{&pbU!$ffa_Lr@Fxv(S5RxTZ5;WZiB#
z7E_ci>hoNozd&`v9Efa!9XJ9lhepNL>G!flZ`W$TOL$t}H8G~@L@s)RiRj>YJ{p_;
zF+OHxy0D<QXbi}n5%Q|I%(#iZ?#ZBUh#u8@vQfOmHL)I9Gn$%D>9|`ef9junmNne+
z^=HQy9EU4TdtwYD9NlaqUCJRslO#+U5Hov!;+MVf%3tM|diC%J1N)d1`4KnNzLfgB
zP74&9qpgn$RTylSY`AVsK5h}~h+&PEom<=_;3mTP)ruz#AIkDe5Cj5@yEr&qn%cA=
zwCIDC2>I}!M-F>oEYWI{m8O@YlJsrhNBevPnf`ZOi+Q-i#^!tWHM7;z>=`%O6=oM(
zQ5AZw#|28gEmvIE6iua1j2Fp~(;uP+W6dJYI@v2>@Tje?u-w3<w(dx^++xi-9$M>E
zc2(Y9Hc4ucm@Dv#Ek_<%#a`GevH{e|3~<~QLGftYi**Px7?CbY+7BCy^7J4HWQhde
zp!OBm*y?6>X{?N<^$*?Y0|(-xiO;HWme?&w+s}eakH!P}6_jz@RtFzzUbL|e?_`=j
zw-(#FJ;CMeRZ~Gg`u@*Y-+5N8+&8bWia8)NoF58vsE!(V-Kx?$n|U1=B*x6)E$lhq
z3}QL}CE2|{UrW%hmA%{wR}u6m(<UcjV$1f7xqDh{Ij!~Ya8IivIJ)MsQz^A>H4}6C
z8Ys)hB-&;BgIb-ul!5@e-x5boo-oMOdyX3D;-f9~^Wt_X+LwPF(&}cozDJy~XWn0S
z*#@B>f~y)t5%(=z&%RcEB-Rqq&OS)7zAiRR)P&J4%T6DhbcU}#HnM9mBT=@r))!SD
z{bjJH^!xR53dg}&qXE}Uz7%D^+7G9g5J&@0s-opKZ@^wjVSa8!nidv;^H%!{=;k+2
zk^=)QIi0aio?PcbP*zH0?9XoN!GG81*3G1K#3s(NZy*L;U=ACNp_viyyZ*b}NMpUw
z%?Ac<zU;s=W^q41o5ICFsp$uDVEW1;OU?R%*TE{BZsey|ws%b)isixh4_@3Dn*$a4
z!Kwq%U+=es^J_cO<06KQs56q6Gl@Ze?7gGjIy<-i3OQRr5u`<gb?iS%+O~?^CQO{Z
zB1A!>_##Bx0WOz1)F`+Pz>ZqK(SYZNr-l;HLCgx#kvxEgg?2h7<x&g@9l`vHHW@Va
zcqJU$@BJnF{3gsUNdR&@d^)*zmw_sGKcOdC?7!q`ab5pgZ&1BT)K-AIeZ+Mz&#`LB
zb7OmIW6J?@vo%qW(dk+9HRKiMpD&4DH5^pcS=w*!2><~7w@y9(t0RuFjisrbi?gW{
z=%4-JMa9^R8eQ3pT-}iYQ15P_0DyPcZ_Ry91+1O7KP9X`GgM!O<$<`sW7IkgMf3*W
zC+FpU9@muEDC{{cVav<Lji*vHP;rsy9lbnW!vHSw8NEo{Nb|vRRG1ck=Q9bIH9VHr
zC*?7)hyL2vzQ?i!me){)8@QnE6{`;BLW@7O>N}%q<SDf_gz*s`9hyTQe7oAT%@)-d
z%UT%wqHRbE8E@GyZcmJ9d`FH!SvXs^pV~%c5%tz>m)pSD9!n#hYO;53Kbzc?p>ux%
z@hQ*JzdNtGCw~|5O<y(Lt-x-ud1**Soq*Bqs!~(?R0|aj5dB7@Ga`^Zr}?we>L7J<
zR+7S?>%5_G0vgVGx!i7-h!4EfA*ggr{tF?Rwi<pho;5PxUw=;7$=aJcUwyB{eF(V^
z3WeU>8kR^0f5^D+cx^4eGMPdi?--;v$|-)L?#eK48e=$30sqrPoC9-7UIhn0w~Gtj
zFQ-T1v3AyMIpZA+iSEOYV{fC|$@bJ$ZE6xCey;mrc3W#oMtmrzqq!xeJLSAuFrEUZ
zsbkLJkvM*K9R#1Yy5N*&*0B6^zH;}uOWZS~;jJZ`Mb~h)|77QUi_MId5*Mq&%hd0R
z6_0Dhm{MUF1*Fv2a>*@JQEqyt4fZ{R>+qE=UPqdg+Lw>VZd#}&@R?l2CC@{)V*m(1
zO^cmoy}Zuz3+CX=Lf9XLs_EYC?a0Gj)7y>J`+giZE(PC?hl?jhA2-LB9KoyO%G;?a
z$f}yB>m|tZg+x1>ghbwtUr-=h|Db3~ZaR%Xz;@ZHA%Qi(1D3l-tz@aSoe)vT%qMYm
zL={8et^GjBc(rKD06V`ug9TM>Xs3=U+aIEDKUbd4Z!PsS{OzAS;c9i}ZjYqv-d#xT
zCoDbTN^3Bskl`O9>7TQo(+!=sGS9?n1w4g0ya%qazIrEbjv)MYu(wCkQkvfqy4u0_
z%$cq;5lY-K<*<TV+`b&V)fn6-otR#rH4iJHedTJG63jGsGWLWLg8Q+V%?TSZow^Tq
zxO?dG5Oa=ZYXDWQ70(szo|U2%|Jqb8*d+oq+`Ha1iB0B6K}H|>Ikv(n|3QmUf>Hfk
zzZC5Y@r<ay?CT%>3Jt&%Ay7;p)EEY|h^kg4`=KBD)gc9sy#<`bb}=(gcnc*kmhP1*
z@wwyztlf>^%lFIsH$(`M&lwz(Lnt)qb1s1!9o3{D^<*w%CEQIBwsS>3hG~v|YV1nz
z_)~xN3GP)k{}~^~DmdO9^+CDS8a{(uqk@}m{}QVP_%0-OZ9txUm;P`!HxEdvDw<^^
z4XsuX4oCkYniask5cn7m3k$TbBjWsIL0QA@Gb_vLX!u$E*N1YrA?BU{lE^1q#&`Io
z*6JbUJ(YtCp1E<3SzPRR=2txUBYR|ucBLrqiBM^Uj#UpSQu?3#6?vUDrgWw^;fIRA
zJaX7(bKv(P(1^$NrL`Z!i|MLG83Y^P&DA1KWjVDDm6^*t69Z!cUJk4lMSo8iRV#X`
z6apLOyY>kD>4c#lH|~#HWLJ*#lV?{Gcxz`K27?cZF1X1vDt!SpiZ<qMkhuHQXTYho
z$wYY81ZiYq$fD{op43LMuq=v8$4`C%qZ3`2)SHPzZ-KVNx<^Q*)iUN?7_Lh+3<JE(
zq-9H}V~Q4^a~o~7m=aScry44iW$3be8WhGAEWMp{=Xc^;ZX_Ob#wZ&_omdzuas{@~
zMN4;Vflo4-c`>^bI(R$f&npI<uP=`d<uO{crMs08;+63*Kp}M!ii~k8**QJE3j03@
zDihqxdLE@CP@X)5nJ!9dx5wYfHln2rZwR;7*oKHw)m2SvFg`PX`=uggm2Z{*B1&=1
zrDR<w{K2pqj!Z44{S#+-Cf~+91Gef1d&)DXn?`ExVnCrbew79p+px2q*{xoNp;`3D
zIPo251bLayF>07j1K{+}7{B7c7#wk5AddE*aBWvN(cu*0h!i&7ZK(>Gs#AK!H!hr&
zXWq((%ti7M6emRT&ndQ@-l(P$7@pn>jKOJDz577#QX%ziI9hNYWZdh$vxpFDj9ma^
zIWIjrqGW9=$5TTcF&hwTj<2&pEt^!U4wzL}P15c@j(w`9_s3^q6b=A7?p<?I;eRpX
zr6xp|zH-*tPf#x|4K<+Czdx+t;73=iJJ~I^+;!sDS-@Y@3o!FR#+oEFG9>(jUtVj)
zWwM~bRuJWIeNfB=P0V!{auW{I#AuK3IwB*vV3CHTO%*ewO;TUj8>yX|Odtldo-H~{
zLeI!DLyccD4zN&d6sZuKV%yYDG;R(%IRU$Q5A#!DB1E&wWJ_<&Dl-&<UAj@?g_9WF
zi$#V*&hoc_C$p6FO&r!EQmT%8gbdun=&bL?LY@pJmVSTCSu>Y1_cca9j1Tz(c4d?7
ze~qW~GYn13#fRZv!rVMOScfYbG&=1r*z`lwTHA6?Mc<-fAnmG+T8b8p$*g;7i2|E^
z3!xM}gciT~Q4yd?#&?Wa)V3Ja|KsPl8KDw}0o01Wc7OA<TA94u<{}Af-sO6~+d-eo
zny7%J^OBIEY9RCkZUuZ(9Z2R7XtB_I%;kZcC*nxB+MU_h%=sw@Iu0I*R~4b^L`X<e
z;gljYiRo--2!%&u)?lRE#DL4++_6Y=ugQ&Rm~J{tch$Cc7a9J81@b+ez1GS{^Mn+}
zR_ZbyTz_3J_g_6t{)`G$d#S2WtFjzyy=IIsjx!}s!aa;&kS1<0-cbHm-BDjYF16xG
zE{vVPA1~U}^y?9`q4WCa2z*~k;t7#xSHmg8Y4i+|vjlIBY$bh7v|JX9n|!YIS=!fy
zNqM%|YpWMvjj*%mSKk41ANF8ppHX6fpjRENG&U^F8ZCpsgNzUh8llGo4DC2aCm{l{
zL!t8COj8C*C2f4X5aH$Ik0`5~YSvrTdWnV)ScqFfcIpJJALvZsqauGn`U-W7xSyZP
zf*sbG@9u$-TB(auumSL1;-XGiBD7ME1Hs6BZ;yAhiBMP)+@%nn?Pzur2yP%v)=}$f
zsus()E@stpg$lsuS^EsjI__DmSV6?HtWns84A39O)HLW8;%2ydg)FE%94Iqg3I?wp
zJ98qUsAfRC-I-BL@M%ttyKqty70Peh;MRoFMmm=Rll`8Wl5aiPTl|zs69m3f+>V`>
z0DfJ?Y6YvRjCS#e$x8{Ips|<Io5*~q{uDQ<XhK}~!u%w(WrNe7${MTmh^tc!IG?TS
zS-E#vE51=~o+%7O<dM8K44S)M|C!KdC@U7S;qt+89{9NsbIFZh*AcIesEqn4k--Yc
zDb>9Z?l9raLvaVq`DH9V^guo>Pe0uad{N~ojU^#y>JC^CIHN}iGy6GyMlPdEZcG5U
z>y=}sDSkDtfkynQmT@2*h9dlX3F`~wg9@!<+;>99=#|e15w*JuM1^exRN-T_b_VL?
z;Vz-L(1`gnxs}bJumYQi5=~ZW_{AGaqpvB4CL|q~=Ao3WXCU=G<l543Z>uk-5;ySp
z5$p56aaGEjL7%n6l92;Tc$Q%smYEgEnN3$}k)V=QM3B}EpCes2;kh!bQ1OAQmY}KY
zWRBAymRf>cOe+ae{~zCrKxqs0?LiTr8F)tPpgU5oL^zNI-ic3ijYo;MX$?0gtnn=8
z355}FR^guB=|3fI5gX@uJ*^QN6KkY-V#NF*@7q0+4v2f3Virnl%f{CYY7~}`L57T?
zh7Lt!B%?W*GssBe`!7{7+91njmKmba%CS)z9VtFw8V)P}1;>6<w>8;EkpIkSrt!pd
z(r1gFGwf^hHFew;4NkZHcp2{E<*;3ZZm7mzUX^sGfoJHj?l1}`JMW~jaUSA(m}+%;
zB;sg~dj!`xWSc(*OXGu0{IU!z;Ou4@FtgJ0ayB`=e}9FbZ|RSGAHJ@Xqe#4_PUo5w
zu{R4b(K{liz@MR<4|bax?;=`}OpwqY8X|htJQ7=owNat`f;G?>%6oQ2>VA0&1;cZj
z-8>HS#s7UbM-wt~deLHOKD3<UmVCb`C+AhR3;S~2>4b>YY0W5RYlX+k6i#)S6ettR
z#AIG*&gCnk=e@eo@1>Z!>HYZ3&#kal?-3HVks$koyW?0I_=Y3zJ*WxB9E=@fRJ?{V
z@DccS4yt9=)S@<>I4Y!bcfEO|*pA=P=rAw$4Zh892-Gy58k`g8JN5^;3?P;H`^{3d
z!ZVwq5jw~_Z+Jq=JW>2Hyc_II$^%%QR>`m~cf2+>K@lYL05_bN&x%p>-1rXd^vZ<2
zei02rl7&{8ffA!K^qNlBu(S&bN;|D|gd_#z66*33=Tq@SN&}?0-7XP{%6z`<ii)_i
zgH%wtcItgOA!z#;6Q>oFwaurEDJ#0~+j8%6cB{h3Q9h4x@!{{AqDWM6y#JiwMD&aT
zME5DqO8-bwG4eA^dqLBdQV-p18PL;O;J;T~Ax@yy>9W}9>RV@LS{RC0H?;>vs~iZS
z@&M{~El9$e^VZLEUohfP2&L6o%->}?AZc{6Fn0;Rmdd~^w^Go~*C2v^2p7IeTQ`EO
z?lCE#Hc-A=Ej~u2Qsvu{kov2!Ys|>yrwebs(v}0lH+&6roxT<%9oWpNx`<@9fSsx^
z0}o7Aknz;aRa1>=KM&HA=jz`$me6R7Sa#JXp)t3EHly9wZ5c7~W`6bGKIbXLu5ciE
zJ;WqWo5bH+&;|QPQnG>EeEdGzJsG*IhcbLM2F_WxD<vm(#AX&Qf>HA0d>mu)u(>4Q
z6n)2Gf-kRlUoGt8-6Di$2o*!9yMI6CuJ^hY@3)NAXu(B-;I@#wp2xQUclVPW*u)Qu
zg<usFNi|dRA=~8Xw*Kj3Tf6Xdoy_C4KCg_f2Mn+AGi5weQk*2-OvU|SSMM=vN7_gp
z+TPg-#QP0i@hF`NRjqY!dx{7pR#LPl5cG1bNX%1((>SEip3CI=ouP@}h>Q7yh!zV_
zfXU1*>T_0#yllUewgSE2MbQs>f<uULmN5>fKskDr(8U2sy;DkM(>D6@EAECxdymYS
z_k90TXrCQw!?;sD4Xu0Esj5nRrJzSaNzP*X^<gMEMP1lyLY}QxV5rj2u)}PxZpZDs
zanq{ASD$8LbGjr6AF_IpX!AP?-`GFKZg!bwv}PF-i*kY$&EetGRbob7eA4z$<nh<~
ziKn)0q4b9N6_rh4?6Q;$$cuky#E{o+d18yskEpBNMe|x<VBtEt$}5F#=;8^X3L4Kr
zt|<AYyFPYMvDIzX{-hwZq@LyCy{$oY#?v{a$a?yjx5=`5u86@TGN$`CjcRy#W&90O
zz@4PudmBHLvIT!v+$j4XEDj0KWJ!t;qp+EWF?P7Ak84M3oT}OL^T5jRAQ-xZ-e(pT
ze(9+e4Rx~aun6O%LgR8QV`xzoqV2bU563MU(d-?1h>hCg;7E`CBUx%}tdkgC2Br`E
zy@8)Ia33_~Zg`a*`L4lccU0S50D<*+TqW*>H!tkfBBQfp76zW1GV0Z&@yQR*GD=UG
zVAwp62m(jhNBr{>$Un<BqUp$%ZBX4A{jQ=_f2fDVo^<6oT0^RR)<l2J1l2R>fZ0`t
zM^ujpm{ft2UJ6#G-TXs(^9<)D%*;mIf2w>OOPp6#D%hE?=3L6{cmv}u@Ijr)9CH=>
z`#n&wOBmTU)Cr~Xwq+PuEll&ILPhTnhH*+&iBW|dRJM*}zCbQKysUP>(CUu(nw@S}
ztM3IFFVpjK2Fg73jmrk*-llTS+kNl3Y=}g+<^Tou#4S$O5)Bog4AO3U^-*a^clY6u
z-ZxFW$DzABDAPMBk*ouUbLQ{cjn)rks2iPT#EKtBizso|{G2lTK(*yxwr2E{`|iFA
z_xdASQW8mX;0fx7QM{Idg3|-(OMzDMt+<HJ()E$2Xf%ToTJsr()A%Ts$=H{8eSJMS
zwDr~igO2dJFco-QG`j09O>VK%1WEG#!NxM)Ce)%~y9QFh*(I44nhVM~_1E~0g04(1
z)Y<N8v?<;lg@oM6A(i*N_~=YLpN339EO_qRzKZK86j}W9{S}*11tjVP^T4ZFa6rOE
zed`D;MAr))DE<wMRbH~YJrv6lvfE`RZtC%ZE!Uv)CwGz*?7Z>7x)lM;Db$2Dubiyy
z+l24*yA^z8^z(y#MHz5OW$!6*`QCqb{yZ0ZICa0$6lOknxU{))_>mdq8Ove0t#o)~
zPdHsqkn#O5Hv=kGB^H9dCey;wZ3{DhWzS^RB%yDn{KjJ8XWIqM_3!n~J-W|g<#BID
z8YYXp(*hb;9yNn&1$ZOrW=Tc|4I0x^zg~it&fp|Z5@fj8XJ&c37rmll5<+zgtlm@c
zDa9L*@aDk?5mes{YEsnm(MX<;VekmuMxTI%5Y6IQsMI70xQ|faP>azB6z{1Zw$7I7
zTiMpfO#GX5AQCg$Jao6Fb}AU+gaaVoH$#ki#(iAR6z5+4J4BRn*9rxr_|A21GH)UK
z9FvTZz|UbCGiYIge9B)!;|kJ)Ff^MYQ~U_~%EPLT8_NcL9CZ)J4su#0qIrij$IYzy
zvs<BTT851}#?AOsJ|C}`81g#BremIvWd=*q<ItGUH<AR8jCdQiqEoTZn-cL(b<KKj
zoaVk$s;?aJS(I9^W-wj_&$vE+aje&#KbjP^?`$5pMfC#dtd=9X_GsT?C$+2B$ZugY
zYPD+op_yVaH@`l5y3-|U^ya}cT|9h(MPTI`RCfKD6>0R-kRN7{`cSDv!vPlyYG7EC
zS|9@0soSTipF?}NYm}o^BeJ@JuT&>S0@WevF<)Lh(V*J!EwpCQll78kBy^4r?(wWd
zk==NkY$rZp>9D{5Ufk0+S^w)~OcNb)s6Q4WlcBk5MMjCucab&6_!P3$PC5OK2d<w7
zLq!|pFseVGVgpBb!&q<nV^#NOOJ|<vl|BtqQ#*rsTk~xbi<UEsB2kjdKv)4KxMrst
z^jDUv0seQWZUyIt#I6!3J7p%}8r-OG7i}q?;XkH+*+wZk(vT?i+`kt=SuB92{s5uH
zq=EE9j-LCq9zdQR9(QKECus;J>;wZWRWzM&6%Q<N)ALV4uCbrMcD}M#_jZ{*<+XSr
zq8V{k{HGnn#M#+kn8wD<h{;AB+t%O>IOJq`f%ijv26M^hE|U2RyvsSM%MM|Sb})R{
zgX*_?=UamQ4%PKDGlx@iiMSsI?d`Ab+Fc`t&7-ghazelP_R>YcT)Z40+;O;ixN4MN
zv(fk_c>-^J&pUpy*uuNC`J4+rZUuP*_4)&fBX7;SHx4MHT_kX_uOE>gUBGci<=1=c
zq^U=CPu6|%(NGGd`jGv!aI=a|hIGNfdLLx3y~XiT$F@`wc?I8e?KA29Ya<-|P>46O
zUK69av?Q25oY?xfQIw&v8d7umo5PjADSI|>HFIyHjr>7*ybpR7^|JJ_(hLLYbZxVR
z!M#dYJ(qdOA+aR`GSwfOljiIiSZDzkjwIPrZ>qQ(O8Rd1>$0T#<XkqDHaSb^_A-u&
zRnC-y?g5imOmnXTeXyz$^Z3#lqY_+EjeE=xkxAeQr01t#+(7~Z)Nb|;;@kiFXK-s_
zdEsYc{FO_>SToTmg?uNEr9&8jFGlgZuzIXf<|aW>)o9XZ;+4-xY8CSrXcC{wZmnM>
z57C<hQ_&`51$QHI%-z+$AY?#oTb3d3#;qh{sjwb-GUC79Y8c8CWa&Lxd54!`mEjtH
z*TAb)T}uU(4i~(f(WT`0HqP9^5)ZI!GWu#sULbzm+UE&ZxYYVG$#&v~)R`oD$7%ix
zbnn?E(2L;>Cz7)ZIICC7A6Kl7E?}#4<=nF@otU1v28mCQr3Pu6xs0CG<ZXYmbttOz
zGHh#f`i5OSu--ZiM8BD+XF9vJCml7uM6{QSP8o6vaW&}XPH#QY3P1he7h-ysVJO<B
zT&@yuqGBzcno{O^Y{x>k8{<+!_$g^R++&C$xeeW~CQb8++6D5&Fqw#VWOy`Oqghwl
ziSJ5jFY9$7U*4frq-OiM)MU1e$vYEMaBHG>$m*U3RE`^GJKyM9qUZ<AeCAcYBB1dz
zGh}03i7!??{A%IM65e!P<aI(qplvjH_}N5y(A2nN*DDtipB|@_6?>DgPQL9?62sz1
zIlT7L6}$F3N;L(tihzMiU0#4dv*=RE6S1?P+bm?t10v-S*h*&b2U^vAZ|F&hXgXkF
zkU7H^p<|cC#c8VJJ)iTBk1^o=@%BmQT1?nDJ1@p>`ehPwvM6YwrDAgUIviwDO#pzd
zZqi%H9esjEkLDH6_{GxH&9S$ZwG90$mV|Q&kCu8}vA;=YiI(rzk<S=MS%oVZT}PI)
z0Omvb))`f^m)D}NyHW6M$u%Nf>v?hJHX_}P_s8aiuwvfc-ILRP8Dp%n`P#B??Yip@
zGgh0}#$Pt=m*r;l@E#O;UEB&%Gn$y*m5%GIvF6Ho&FG~`xD8;izM}OVJf>s?UA^Q~
zIimTtD>r#;*WR5V<E2iE+VQf}dW@;bOYiviu|Bw2|D2l5oS=88tR-xP?z`BcL5O8R
zx?Z!;<}Z7%X@GEeygbCk`W2Hp<l<OfCgz-Ux3ewz^(NmTze-RI-nP|xy@HAQc8#H9
z-Ku1!B0d2whwS}9^~?O@D3S03d!K!jD^Xnc_u6$xCTx~q&GpL76>TwRJyu-B?K1za
z_i&~1{Nn(`Ez{=<?(`~Kx0?pMA4XVq@kG2#B<4}ianJ_K<Kpwvp1ydn0cGQ92M{fe
zIr>-Zcb-Ogw3HJPMh}y2dY;*!ayyeI=7m-{M#Y**KDzDo8l1r@?M^*hBF1%N0loRD
z(YTvovZi<p<z1*?S@~yC{z*xpdM7^q&g}_@KYnjsLnvqx7^F7|2mtuy?4bJBgTL=!
z-~5JuThIZg!hbn(@Zhv!QB#Elz})_)hV(boU(_T7z_P2k>cm5OXdNs7fQSJAp#2^7
zrZ)i!0|ftpa<(+L1KRw}z4!)|&7}T!Vd}rfhiu}Ydd@#h`=)aN{<8fmln?Ij@x7^C
z&TpPbW(SYIMk=BGA8OaXM~bS1>bVd721Nt_-v3LaS^t0<+uJ$-UH-xR9|Qg8=v5vH
zf@fd=fN6990OMbnV{cdaYa-1o-v)2#`FC;d|1|!8Vo~9WS5PnkfH@uj0R3OEM{gd{
zzhWJn>|N~NyrKUe!BQE*EXu!S^i4hdH>wcX->C)$mUfmd1_u9+)OR3r)yNhDiX2S<
z;D<NVn<)9WTKZc7Ec}D|S0Af6(8bgZ=<yG7>i<*)|IEU_dSd@OxzFS8@&0cevj6uS
t|EHMx-|2^8f2aS4wE9N=i?sT0vZ|si>{|f=07!4E{#!Ay=KtmFzW~{AuE+oY

literal 0
HcmV?d00001

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index e47f162ca936c..59b5fa7f3a434 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -1365,7 +1365,7 @@ def setJobGroup(self, groupId: str, description: str, interruptOnCancel: bool =
         to HDFS-1208, where HDFS may respond to Thread.interrupt() by marking nodes as dead.
 
         If you run jobs in parallel, use :class:`pyspark.InheritableThread` for thread
-        local inheritance, and preventing resource leak.
+        local inheritance.
 
         Examples
         --------
@@ -1405,7 +1405,7 @@ def setLocalProperty(self, key: str, value: str) -> None:
         Notes
         -----
         If you run jobs in parallel, use :class:`pyspark.InheritableThread` for thread
-        local inheritance, and preventing resource leak.
+        local inheritance.
         """
         self._jsc.setLocalProperty(key, value)
 
@@ -1423,7 +1423,7 @@ def setJobDescription(self, value: str) -> None:
         Notes
         -----
         If you run jobs in parallel, use :class:`pyspark.InheritableThread` for thread
-        local inheritance, and preventing resource leak.
+        local inheritance.
         """
         self._jsc.setJobDescription(value)
 
diff --git a/python/pyspark/util.py b/python/pyspark/util.py
index 5abbbb919636f..b7b972a5d35b8 100644
--- a/python/pyspark/util.py
+++ b/python/pyspark/util.py
@@ -331,13 +331,10 @@ def inheritable_thread_target(f: Callable) -> Callable:
 
         @functools.wraps(f)
         def wrapped(*args: Any, **kwargs: Any) -> Any:
-            try:
-                # Set local properties in child thread.
-                assert SparkContext._active_spark_context is not None
-                SparkContext._active_spark_context._jsc.sc().setLocalProperties(properties)
-                return f(*args, **kwargs)
-            finally:
-                InheritableThread._clean_py4j_conn_for_current_thread()
+            # Set local properties in child thread.
+            assert SparkContext._active_spark_context is not None
+            SparkContext._active_spark_context._jsc.sc().setLocalProperties(properties)
+            return f(*args, **kwargs)
 
         return wrapped
     else:
@@ -377,10 +374,7 @@ def copy_local_properties(*a: Any, **k: Any) -> Any:
                 assert hasattr(self, "_props")
                 assert SparkContext._active_spark_context is not None
                 SparkContext._active_spark_context._jsc.sc().setLocalProperties(self._props)
-                try:
-                    return target(*a, **k)
-                finally:
-                    InheritableThread._clean_py4j_conn_for_current_thread()
+                return target(*a, **k)
 
             super(InheritableThread, self).__init__(
                 target=copy_local_properties, *args, **kwargs  # type: ignore[misc]
@@ -401,25 +395,6 @@ def start(self) -> None:
             self._props = SparkContext._active_spark_context._jsc.sc().getLocalProperties().clone()
         return super(InheritableThread, self).start()
 
-    @staticmethod
-    def _clean_py4j_conn_for_current_thread() -> None:
-        from pyspark import SparkContext
-
-        jvm = SparkContext._jvm
-        assert jvm is not None
-        thread_connection = jvm._gateway_client.get_thread_connection()
-        if thread_connection is not None:
-            try:
-                # Dequeue is shared across other threads but it's thread-safe.
-                # If this function has to be invoked one more time in the same thead
-                # Py4J will create a new connection automatically.
-                jvm._gateway_client.deque.remove(thread_connection)
-            except ValueError:
-                # Should never reach this point
-                return
-            finally:
-                thread_connection.close()
-
 
 if __name__ == "__main__":
     if "pypy" not in platform.python_implementation().lower() and sys.version_info[:2] >= (3, 7):
diff --git a/python/setup.py b/python/setup.py
index 673b146cb6c5d..ab9b64f79bc37 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -258,7 +258,7 @@ def run(self):
         license='http://www.apache.org/licenses/LICENSE-2.0',
         # Don't forget to update python/docs/source/getting_started/install.rst
         # if you're updating the versions or dependencies.
-        install_requires=['py4j==0.10.9.3'],
+        install_requires=['py4j==0.10.9.4'],
         extras_require={
             'ml': ['numpy>=1.15'],
             'mllib': ['numpy>=1.15'],
diff --git a/sbin/spark-config.sh b/sbin/spark-config.sh
index f27b6fe8d9a04..341eb053ed7b2 100755
--- a/sbin/spark-config.sh
+++ b/sbin/spark-config.sh
@@ -28,6 +28,6 @@ export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}"
 # Add the PySpark classes to the PYTHONPATH:
 if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then
   export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"
-  export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.3-src.zip:${PYTHONPATH}"
+  export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.4-src.zip:${PYTHONPATH}"
   export PYSPARK_PYTHONPATH_SET=1
 fi

From 1b4141624dcf178ef8b7583bccddb77e553a41a5 Mon Sep 17 00:00:00 2001
From: Tengfei Huang <tengfei.h@gmail.com>
Date: Wed, 16 Mar 2022 13:41:05 +0300
Subject: [PATCH 508/513] [SPARK-38106][SQL] Use error classes in the parsing
 errors of functions

### What changes were proposed in this pull request?
Migrate the following errors in QueryExecutionErrors onto use error classes as INVALID_SQL_SYNTAX:
- functionNameUnsupportedError
- showFunctionsUnsupportedError
- showFunctionsInvalidPatternError
- createFuncWithBothIfNotExistsAndReplaceError
- defineTempFuncWithIfNotExistsError
- unsupportedFunctionNameError
- specifyingDBInCreateTempFuncError
- invalidNameForDropTempFunc

### Why are the changes needed?
Porting parsing errors of functions to new error framework.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
UT added.

Closes #35865 from ivoson/SPARK-38106.

Lead-authored-by: Tengfei Huang <tengfei.h@gmail.com>
Co-authored-by: Huang Tengfei <tengfei.h@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../spark/sql/errors/QueryParsingErrors.scala |  27 ++-
 .../sql/errors/QueryParsingErrorsSuite.scala  | 172 ++++++++++++++++++
 .../sql/execution/command/DDLSuite.scala      |  51 ------
 3 files changed, 189 insertions(+), 61 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index c03b1b45f644d..d055299b39396 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -161,7 +161,8 @@ object QueryParsingErrors {
   }
 
   def functionNameUnsupportedError(functionName: String, ctx: ParserRuleContext): Throwable = {
-    new ParseException(s"Unsupported function name '$functionName'", ctx)
+    new ParseException("INVALID_SQL_SYNTAX",
+      Array(s"Unsupported function name '$functionName'"), ctx)
   }
 
   def cannotParseValueTypeError(
@@ -293,12 +294,13 @@ object QueryParsingErrors {
   }
 
   def showFunctionsUnsupportedError(identifier: String, ctx: IdentifierContext): Throwable = {
-    new ParseException(s"SHOW $identifier FUNCTIONS not supported", ctx)
+    new ParseException("INVALID_SQL_SYNTAX",
+      Array(s"SHOW $identifier FUNCTIONS not supported"), ctx)
   }
 
   def showFunctionsInvalidPatternError(pattern: String, ctx: ParserRuleContext): Throwable = {
-    new ParseException(s"Invalid pattern in SHOW FUNCTIONS: $pattern. It must be " +
-      "a string literal.", ctx)
+    new ParseException("INVALID_SQL_SYNTAX",
+      Array(s"Invalid pattern in SHOW FUNCTIONS: $pattern. It must be a string literal."), ctx)
   }
 
   def duplicateCteDefinitionNamesError(duplicateNames: String, ctx: CtesContext): Throwable = {
@@ -403,22 +405,27 @@ object QueryParsingErrors {
   }
 
   def createFuncWithBothIfNotExistsAndReplaceError(ctx: CreateFunctionContext): Throwable = {
-    new ParseException("CREATE FUNCTION with both IF NOT EXISTS and REPLACE is not allowed.", ctx)
+    new ParseException("INVALID_SQL_SYNTAX",
+      Array("CREATE FUNCTION with both IF NOT EXISTS and REPLACE is not allowed."), ctx)
   }
 
   def defineTempFuncWithIfNotExistsError(ctx: CreateFunctionContext): Throwable = {
-    new ParseException("It is not allowed to define a TEMPORARY function with IF NOT EXISTS.", ctx)
+    new ParseException("INVALID_SQL_SYNTAX",
+      Array("It is not allowed to define a TEMPORARY function with IF NOT EXISTS."), ctx)
   }
 
   def unsupportedFunctionNameError(quoted: String, ctx: CreateFunctionContext): Throwable = {
-    new ParseException(s"Unsupported function name '$quoted'", ctx)
+    new ParseException("INVALID_SQL_SYNTAX",
+      Array(s"Unsupported function name '$quoted'"), ctx)
   }
 
   def specifyingDBInCreateTempFuncError(
       databaseName: String,
       ctx: CreateFunctionContext): Throwable = {
     new ParseException(
-      s"Specifying a database in CREATE TEMPORARY FUNCTION is not allowed: '$databaseName'", ctx)
+      "INVALID_SQL_SYNTAX",
+      Array(s"Specifying a database in CREATE TEMPORARY FUNCTION is not allowed: '$databaseName'"),
+      ctx)
   }
 
   def unclosedBracketedCommentError(command: String, position: Origin): Throwable = {
@@ -430,8 +437,8 @@ object QueryParsingErrors {
   }
 
   def invalidNameForDropTempFunc(name: Seq[String], ctx: ParserRuleContext): Throwable = {
-    new ParseException(
-      s"DROP TEMPORARY FUNCTION requires a single part name but got: ${name.quoted}", ctx)
+    new ParseException("INVALID_SQL_SYNTAX",
+      Array(s"DROP TEMPORARY FUNCTION requires a single part name but got: ${name.quoted}"), ctx)
   }
 
   def defaultColumnNotImplementedYetError(ctx: ParserRuleContext): Throwable = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
index 5610c4d000bfa..e7f62d28e8efa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
@@ -218,4 +218,176 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession {
           |--------------------------------------------^^^
           |""".stripMargin)
   }
+
+  test("INVALID_SQL_SYNTAX: Invalid table value function name") {
+    validateParsingError(
+      sqlText = "SELECT * FROM ns.db.func()",
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      message =
+        """
+          |Invalid SQL syntax: Unsupported function name 'ns.db.func'(line 1, pos 14)
+          |
+          |== SQL ==
+          |SELECT * FROM ns.db.func()
+          |--------------^^^
+          |""".stripMargin)
+  }
+
+  test("INVALID_SQL_SYNTAX: Invalid scope in show functions") {
+    validateParsingError(
+      sqlText = "SHOW sys FUNCTIONS",
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      message =
+        """
+          |Invalid SQL syntax: SHOW sys FUNCTIONS not supported(line 1, pos 5)
+          |
+          |== SQL ==
+          |SHOW sys FUNCTIONS
+          |-----^^^
+          |""".stripMargin)
+  }
+
+  test("INVALID_SQL_SYNTAX: Invalid pattern in show functions") {
+    val errorDesc =
+      "Invalid pattern in SHOW FUNCTIONS: f1. It must be a string literal.(line 1, pos 21)"
+
+    validateParsingError(
+      sqlText = "SHOW FUNCTIONS IN db f1",
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      message =
+        s"""
+          |Invalid SQL syntax: $errorDesc
+          |
+          |== SQL ==
+          |SHOW FUNCTIONS IN db f1
+          |---------------------^^^
+          |""".stripMargin)
+  }
+
+  test("INVALID_SQL_SYNTAX: Create function with both if not exists and replace") {
+    val sqlText =
+      """
+        |CREATE OR REPLACE FUNCTION IF NOT EXISTS func1 as
+        |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
+        |JAR '/path/to/jar2'
+        |""".stripMargin
+    val errorDesc =
+      "CREATE FUNCTION with both IF NOT EXISTS and REPLACE is not allowed.(line 2, pos 0)"
+
+    validateParsingError(
+      sqlText = sqlText,
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      message =
+        s"""
+          |Invalid SQL syntax: $errorDesc
+          |
+          |== SQL ==
+          |
+          |CREATE OR REPLACE FUNCTION IF NOT EXISTS func1 as
+          |^^^
+          |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
+          |JAR '/path/to/jar2'
+          |""".stripMargin)
+  }
+
+  test("INVALID_SQL_SYNTAX: Create temporary function with if not exists") {
+    val sqlText =
+      """
+        |CREATE TEMPORARY FUNCTION IF NOT EXISTS func1 as
+        |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
+        |JAR '/path/to/jar2'
+        |""".stripMargin
+    val errorDesc =
+      "It is not allowed to define a TEMPORARY function with IF NOT EXISTS.(line 2, pos 0)"
+
+    validateParsingError(
+      sqlText = sqlText,
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      message =
+        s"""
+          |Invalid SQL syntax: $errorDesc
+          |
+          |== SQL ==
+          |
+          |CREATE TEMPORARY FUNCTION IF NOT EXISTS func1 as
+          |^^^
+          |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
+          |JAR '/path/to/jar2'
+          |""".stripMargin)
+  }
+
+  test("INVALID_SQL_SYNTAX: Create temporary function with multi-part name") {
+    val sqlText =
+      """
+        |CREATE TEMPORARY FUNCTION ns.db.func as
+        |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
+        |JAR '/path/to/jar2'
+        |""".stripMargin
+
+    validateParsingError(
+      sqlText = sqlText,
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      message =
+        """
+          |Invalid SQL syntax: Unsupported function name 'ns.db.func'(line 2, pos 0)
+          |
+          |== SQL ==
+          |
+          |CREATE TEMPORARY FUNCTION ns.db.func as
+          |^^^
+          |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
+          |JAR '/path/to/jar2'
+          |""".stripMargin)
+  }
+
+  test("INVALID_SQL_SYNTAX: Specifying database while creating temporary function") {
+    val sqlText =
+      """
+        |CREATE TEMPORARY FUNCTION db.func as
+        |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
+        |JAR '/path/to/jar2'
+        |""".stripMargin
+    val errorDesc =
+      "Specifying a database in CREATE TEMPORARY FUNCTION is not allowed: 'db'(line 2, pos 0)"
+
+    validateParsingError(
+      sqlText = sqlText,
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      message =
+        s"""
+          |Invalid SQL syntax: $errorDesc
+          |
+          |== SQL ==
+          |
+          |CREATE TEMPORARY FUNCTION db.func as
+          |^^^
+          |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
+          |JAR '/path/to/jar2'
+          |""".stripMargin)
+  }
+
+  test("INVALID_SQL_SYNTAX: Drop temporary function requires a single part name") {
+    val errorDesc =
+      "DROP TEMPORARY FUNCTION requires a single part name but got: db.func(line 1, pos 0)"
+
+    validateParsingError(
+      sqlText = "DROP TEMPORARY FUNCTION db.func",
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      message =
+        s"""
+          |Invalid SQL syntax: $errorDesc
+          |
+          |== SQL ==
+          |DROP TEMPORARY FUNCTION db.func
+          |^^^
+          |""".stripMargin)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 9da40df7dbd2d..c3d1126dc07f5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -2103,57 +2103,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
   }
 
-  test("create temporary function with if not exists") {
-    withUserDefinedFunction("func1" -> true) {
-      val sql1 =
-        """
-          |CREATE TEMPORARY FUNCTION IF NOT EXISTS func1 as
-          |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
-          |JAR '/path/to/jar2'
-        """.stripMargin
-      val e = intercept[AnalysisException] {
-        sql(sql1)
-      }.getMessage
-      assert(e.contains("It is not allowed to define a TEMPORARY function with IF NOT EXISTS"))
-    }
-  }
-
-  test("create function with both if not exists and replace") {
-    withUserDefinedFunction("func1" -> false) {
-      val sql1 =
-        """
-          |CREATE OR REPLACE FUNCTION IF NOT EXISTS func1 as
-          |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
-          |JAR '/path/to/jar2'
-        """.stripMargin
-      val e = intercept[AnalysisException] {
-        sql(sql1)
-      }.getMessage
-      assert(e.contains("CREATE FUNCTION with both IF NOT EXISTS and REPLACE is not allowed"))
-    }
-  }
-
-  test("create temporary function by specifying a database") {
-    val dbName = "mydb"
-    withDatabase(dbName) {
-      sql(s"CREATE DATABASE $dbName")
-      sql(s"USE $dbName")
-      withUserDefinedFunction("func1" -> true) {
-        val sql1 =
-          s"""
-             |CREATE TEMPORARY FUNCTION $dbName.func1 as
-             |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
-             |JAR '/path/to/jar2'
-            """.stripMargin
-        val e = intercept[AnalysisException] {
-          sql(sql1)
-        }.getMessage
-        assert(e.contains(s"Specifying a database in CREATE TEMPORARY FUNCTION " +
-          s"is not allowed: '$dbName'"))
-      }
-    }
-  }
-
   Seq(true, false).foreach { caseSensitive =>
     test(s"alter table add columns with existing column name - caseSensitive $caseSensitive") {
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> s"$caseSensitive") {

From 71e2110b799220adc107c9ac5ce737281f2b65cc Mon Sep 17 00:00:00 2001
From: Adam Binford <adamq43@gmail.com>
Date: Wed, 16 Mar 2022 10:54:18 -0500
Subject: [PATCH 509/513] [SPARK-38194][YARN][MESOS][K8S] Make memory overhead
 factor configurable

### What changes were proposed in this pull request?

Add a new config to set the memory overhead factor for drivers and executors. Currently the memory overhead is hard coded to 10% (except in Kubernetes), and the only way to set it higher is to set it to a specific memory amount.

### Why are the changes needed?

In dynamic environments where different people or use cases need different memory requirements, it would be helpful to set a higher memory overhead factor instead of having to set a higher specific memory overhead value. The kubernetes resource manager already makes this configurable. This makes it configurable across the board.

### Does this PR introduce _any_ user-facing change?

No change to default behavior, just adds a new config users can change.

### How was this patch tested?

New UT to check the memory calculation.

Closes #35504 from Kimahriman/yarn-configurable-memory-overhead-factor.

Authored-by: Adam Binford <adamq43@gmail.com>
Signed-off-by: Thomas Graves <tgraves@apache.org>
---
 .../scala/org/apache/spark/SparkConf.scala    |  4 +-
 .../spark/internal/config/package.scala       | 28 +++++++++
 docs/configuration.md                         | 30 ++++++++-
 docs/running-on-kubernetes.md                 |  9 ---
 .../k8s/features/BasicDriverFeatureStep.scala | 13 ++--
 .../features/BasicExecutorFeatureStep.scala   |  7 ++-
 .../BasicDriverFeatureStepSuite.scala         | 63 +++++++++++++++++--
 .../BasicExecutorFeatureStepSuite.scala       | 54 ++++++++++++++++
 .../deploy/rest/mesos/MesosRestServer.scala   |  5 +-
 .../cluster/mesos/MesosSchedulerUtils.scala   |  9 +--
 .../rest/mesos/MesosRestServerSuite.scala     |  8 ++-
 .../org/apache/spark/deploy/yarn/Client.scala | 14 ++++-
 .../spark/deploy/yarn/YarnAllocator.scala     |  5 +-
 .../deploy/yarn/YarnSparkHadoopUtil.scala     |  5 +-
 .../deploy/yarn/YarnAllocatorSuite.scala      | 29 +++++++++
 15 files changed, 248 insertions(+), 35 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 5f37a1abb1909..cf121749b7348 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -636,7 +636,9 @@ private[spark] object SparkConf extends Logging {
       DeprecatedConfig("spark.blacklist.killBlacklistedExecutors", "3.1.0",
         "Please use spark.excludeOnFailure.killExcludedExecutors"),
       DeprecatedConfig("spark.yarn.blacklist.executor.launch.blacklisting.enabled", "3.1.0",
-        "Please use spark.yarn.executor.launch.excludeOnFailure.enabled")
+        "Please use spark.yarn.executor.launch.excludeOnFailure.enabled"),
+      DeprecatedConfig("spark.kubernetes.memoryOverheadFactor", "3.3.0",
+        "Please use spark.driver.memoryOverheadFactor and spark.executor.memoryOverheadFactor")
     )
 
     Map(configs.map { cfg => (cfg.key -> cfg) } : _*)
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index dbec61a1fdb76..ffe4501248f43 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -105,6 +105,22 @@ package object config {
     .bytesConf(ByteUnit.MiB)
     .createOptional
 
+  private[spark] val DRIVER_MEMORY_OVERHEAD_FACTOR =
+    ConfigBuilder("spark.driver.memoryOverheadFactor")
+      .doc("Fraction of driver memory to be allocated as additional non-heap memory per driver " +
+        "process in cluster mode. This is memory that accounts for things like VM overheads, " +
+        "interned strings, other native overheads, etc. This tends to grow with the container " +
+        "size. This value defaults to 0.10 except for Kubernetes non-JVM jobs, which defaults to " +
+        "0.40. This is done as non-JVM tasks need more non-JVM heap space and such tasks " +
+        "commonly fail with \"Memory Overhead Exceeded\" errors. This preempts this error " +
+        "with a higher default. This value is ignored if spark.driver.memoryOverhead is set " +
+        "directly.")
+      .version("3.3.0")
+      .doubleConf
+      .checkValue(factor => factor > 0,
+        "Ensure that memory overhead is a double greater than 0")
+      .createWithDefault(0.1)
+
   private[spark] val DRIVER_LOG_DFS_DIR =
     ConfigBuilder("spark.driver.log.dfsDir").version("3.0.0").stringConf.createOptional
 
@@ -315,6 +331,18 @@ package object config {
     .bytesConf(ByteUnit.MiB)
     .createOptional
 
+  private[spark] val EXECUTOR_MEMORY_OVERHEAD_FACTOR =
+    ConfigBuilder("spark.executor.memoryOverheadFactor")
+      .doc("Fraction of executor memory to be allocated as additional non-heap memory per " +
+        "executor process. This is memory that accounts for things like VM overheads, " +
+        "interned strings, other native overheads, etc. This tends to grow with the container " +
+        "size. This value is ignored if spark.executor.memoryOverhead is set directly.")
+      .version("3.3.0")
+      .doubleConf
+      .checkValue(factor => factor > 0,
+        "Ensure that memory overhead is a double greater than 0")
+      .createWithDefault(0.1)
+
   private[spark] val CORES_MAX = ConfigBuilder("spark.cores.max")
     .doc("When running on a standalone deploy cluster or a Mesos cluster in coarse-grained " +
       "sharing mode, the maximum amount of CPU cores to request for the application from across " +
diff --git a/docs/configuration.md b/docs/configuration.md
index ae3f422f34b3a..a2e6797b55e2f 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -183,7 +183,7 @@ of the most common options to set are:
 </tr>
 <tr>
   <td><code>spark.driver.memoryOverhead</code></td>
-  <td>driverMemory * 0.10, with minimum of 384 </td>
+  <td>driverMemory * <code>spark.driver.memoryOverheadFactor</code>, with minimum of 384 </td>
   <td>
     Amount of non-heap memory to be allocated per driver process in cluster mode, in MiB unless
     otherwise specified. This is memory that accounts for things like VM overheads, interned strings,
@@ -198,6 +198,21 @@ of the most common options to set are:
   </td>
   <td>2.3.0</td>
 </tr>
+<tr>
+  <td><code>spark.driver.memoryOverheadFactor</code></td>
+  <td>0.10</td>
+  <td>
+    Fraction of driver memory to be allocated as additional non-heap memory per driver process in cluster mode.
+    This is memory that accounts for things like VM overheads, interned strings,
+    other native overheads, etc. This tends to grow with the container size.
+    This value defaults to 0.10 except for Kubernetes non-JVM jobs, which defaults to
+    0.40. This is done as non-JVM tasks need more non-JVM heap space and such tasks
+    commonly fail with "Memory Overhead Exceeded" errors. This preempts this error
+    with a higher default.
+    This value is ignored if <code>spark.driver.memoryOverhead</code> is set directly.
+  </td>
+  <td>3.3.0</td>
+</tr>
 <tr>
  <td><code>spark.driver.resource.{resourceName}.amount</code></td>
   <td>0</td>
@@ -272,7 +287,7 @@ of the most common options to set are:
 </tr>
 <tr>
  <td><code>spark.executor.memoryOverhead</code></td>
-  <td>executorMemory * 0.10, with minimum of 384 </td>
+  <td>executorMemory * <code>spark.executor.memoryOverheadFactor</code>, with minimum of 384 </td>
   <td>
     Amount of additional memory to be allocated per executor process, in MiB unless otherwise specified.
     This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc.
@@ -287,6 +302,17 @@ of the most common options to set are:
   </td>
   <td>2.3.0</td>
 </tr>
+<tr>
+  <td><code>spark.executor.memoryOverheadFactor</code></td>
+  <td>0.10</td>
+  <td>
+    Fraction of executor memory to be allocated as additional non-heap memory per executor process.
+    This is memory that accounts for things like VM overheads, interned strings,
+    other native overheads, etc. This tends to grow with the container size.
+    This value is ignored if <code>spark.executor.memoryOverhead</code> is set directly.
+  </td>
+  <td>3.3.0</td>
+</tr>
 <tr>
  <td><code>spark.executor.resource.{resourceName}.amount</code></td>
   <td>0</td>
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index a5da80a68d32d..de37e22cc78d7 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1137,15 +1137,6 @@ See the [configuration page](configuration.html) for information on Spark config
   </td>
   <td>3.0.0</td>
 </tr>
-<tr>
-  <td><code>spark.kubernetes.memoryOverheadFactor</code></td>
-  <td><code>0.1</code></td>
-  <td>
-    This sets the Memory Overhead Factor that will allocate memory to non-JVM memory, which includes off-heap memory allocations, non-JVM tasks, various systems processes, and <code>tmpfs</code>-based local directories when <code>spark.kubernetes.local.dirs.tmpfs</code> is <code>true</code>. For JVM-based jobs this value will default to 0.10 and 0.40 for non-JVM jobs.
-    This is done as non-JVM tasks need more non-JVM heap space and such tasks commonly fail with "Memory Overhead Exceeded" errors. This preempts this error with a higher default.
-  </td>
-  <td>2.4.0</td>
-</tr>
 <tr>
   <td><code>spark.kubernetes.pyspark.pythonVersion</code></td>
   <td><code>"3"</code></td>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
index 3b2b5612566a1..97151494fc60c 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
@@ -53,18 +53,23 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf)
 
   // Memory settings
   private val driverMemoryMiB = conf.get(DRIVER_MEMORY)
+  private val memoryOverheadFactor = if (conf.contains(DRIVER_MEMORY_OVERHEAD_FACTOR)) {
+    conf.get(DRIVER_MEMORY_OVERHEAD_FACTOR)
+  } else {
+    conf.get(MEMORY_OVERHEAD_FACTOR)
+  }
 
   // The memory overhead factor to use. If the user has not set it, then use a different
   // value for non-JVM apps. This value is propagated to executors.
   private val overheadFactor =
     if (conf.mainAppResource.isInstanceOf[NonJVMResource]) {
-      if (conf.contains(MEMORY_OVERHEAD_FACTOR)) {
-        conf.get(MEMORY_OVERHEAD_FACTOR)
+      if (conf.contains(MEMORY_OVERHEAD_FACTOR) || conf.contains(DRIVER_MEMORY_OVERHEAD_FACTOR)) {
+        memoryOverheadFactor
       } else {
         NON_JVM_MEMORY_OVERHEAD_FACTOR
       }
     } else {
-      conf.get(MEMORY_OVERHEAD_FACTOR)
+      memoryOverheadFactor
     }
 
   private val memoryOverheadMiB = conf
@@ -164,7 +169,7 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf)
       KUBERNETES_DRIVER_POD_NAME.key -> driverPodName,
       "spark.app.id" -> conf.appId,
       KUBERNETES_DRIVER_SUBMIT_CHECK.key -> "true",
-      MEMORY_OVERHEAD_FACTOR.key -> overheadFactor.toString)
+      DRIVER_MEMORY_OVERHEAD_FACTOR.key -> overheadFactor.toString)
     // try upload local, resolvable files to a hadoop compatible file system
     Seq(JARS, FILES, ARCHIVES, SUBMIT_PYTHON_FILES).foreach { key =>
       val uris = conf.get(key).filter(uri => KubernetesUtils.isLocalAndResolvable(uri))
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
index a7625194bd6e6..15c69ad487f5f 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -59,11 +59,16 @@ private[spark] class BasicExecutorFeatureStep(
   private val isDefaultProfile = resourceProfile.id == ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
   private val isPythonApp = kubernetesConf.get(APP_RESOURCE_TYPE) == Some(APP_RESOURCE_TYPE_PYTHON)
   private val disableConfigMap = kubernetesConf.get(KUBERNETES_EXECUTOR_DISABLE_CONFIGMAP)
+  private val memoryOverheadFactor = if (kubernetesConf.contains(EXECUTOR_MEMORY_OVERHEAD_FACTOR)) {
+    kubernetesConf.get(EXECUTOR_MEMORY_OVERHEAD_FACTOR)
+  } else {
+    kubernetesConf.get(MEMORY_OVERHEAD_FACTOR)
+  }
 
   val execResources = ResourceProfile.getResourcesForClusterManager(
     resourceProfile.id,
     resourceProfile.executorResources,
-    kubernetesConf.get(MEMORY_OVERHEAD_FACTOR),
+    memoryOverheadFactor,
     kubernetesConf.sparkConf,
     isPythonApp,
     Map.empty)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
index bf7fbcc912f54..d45f5f97da213 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
@@ -134,7 +134,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
       KUBERNETES_DRIVER_POD_NAME.key -> "spark-driver-pod",
       "spark.app.id" -> KubernetesTestConf.APP_ID,
       "spark.kubernetes.submitInDriver" -> "true",
-      MEMORY_OVERHEAD_FACTOR.key -> MEMORY_OVERHEAD_FACTOR.defaultValue.get.toString)
+      DRIVER_MEMORY_OVERHEAD_FACTOR.key -> DRIVER_MEMORY_OVERHEAD_FACTOR.defaultValue.get.toString)
     assert(featureStep.getAdditionalPodSystemProperties() === expectedSparkConf)
   }
 
@@ -193,7 +193,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
   // Memory overhead tests. Tuples are:
   //   test name, main resource, overhead factor, expected factor
   Seq(
-    ("java", JavaMainAppResource(None), None, MEMORY_OVERHEAD_FACTOR.defaultValue.get),
+    ("java", JavaMainAppResource(None), None, DRIVER_MEMORY_OVERHEAD_FACTOR.defaultValue.get),
     ("python default", PythonMainAppResource(null), None, NON_JVM_MEMORY_OVERHEAD_FACTOR),
     ("python w/ override", PythonMainAppResource(null), Some(0.9d), 0.9d),
     ("r default", RMainAppResource(null), None, NON_JVM_MEMORY_OVERHEAD_FACTOR)
@@ -201,13 +201,13 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
     test(s"memory overhead factor: $name") {
       // Choose a driver memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB
       val driverMem =
-        ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / MEMORY_OVERHEAD_FACTOR.defaultValue.get * 2
+        ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / DRIVER_MEMORY_OVERHEAD_FACTOR.defaultValue.get * 2
 
       // main app resource, overhead factor
       val sparkConf = new SparkConf(false)
         .set(CONTAINER_IMAGE, "spark-driver:latest")
         .set(DRIVER_MEMORY.key, s"${driverMem.toInt}m")
-      factor.foreach { value => sparkConf.set(MEMORY_OVERHEAD_FACTOR, value) }
+      factor.foreach { value => sparkConf.set(DRIVER_MEMORY_OVERHEAD_FACTOR, value) }
       val conf = KubernetesTestConf.createDriverConf(
         sparkConf = sparkConf,
         mainAppResource = resource)
@@ -218,10 +218,63 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
       assert(mem === s"${expected}Mi")
 
       val systemProperties = step.getAdditionalPodSystemProperties()
-      assert(systemProperties(MEMORY_OVERHEAD_FACTOR.key) === expectedFactor.toString)
+      assert(systemProperties(DRIVER_MEMORY_OVERHEAD_FACTOR.key) === expectedFactor.toString)
     }
   }
 
+  test(s"SPARK-38194: memory overhead factor precendence") {
+    // Choose a driver memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB
+    val driverMem =
+      ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / DRIVER_MEMORY_OVERHEAD_FACTOR.defaultValue.get * 2
+
+    // main app resource, overhead factor
+    val sparkConf = new SparkConf(false)
+      .set(CONTAINER_IMAGE, "spark-driver:latest")
+      .set(DRIVER_MEMORY.key, s"${driverMem.toInt}m")
+
+    // New config should take precedence
+    val expectedFactor = 0.2
+    sparkConf.set(DRIVER_MEMORY_OVERHEAD_FACTOR, expectedFactor)
+    sparkConf.set(MEMORY_OVERHEAD_FACTOR, 0.3)
+
+    val conf = KubernetesTestConf.createDriverConf(
+      sparkConf = sparkConf)
+    val step = new BasicDriverFeatureStep(conf)
+    val pod = step.configurePod(SparkPod.initialPod())
+    val mem = amountAndFormat(pod.container.getResources.getRequests.get("memory"))
+    val expected = (driverMem + driverMem * expectedFactor).toInt
+    assert(mem === s"${expected}Mi")
+
+    val systemProperties = step.getAdditionalPodSystemProperties()
+    assert(systemProperties(DRIVER_MEMORY_OVERHEAD_FACTOR.key) === expectedFactor.toString)
+  }
+
+  test(s"SPARK-38194: old memory factor settings is applied if new one isn't given") {
+    // Choose a driver memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB
+    val driverMem =
+      ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / DRIVER_MEMORY_OVERHEAD_FACTOR.defaultValue.get * 2
+
+    // main app resource, overhead factor
+    val sparkConf = new SparkConf(false)
+      .set(CONTAINER_IMAGE, "spark-driver:latest")
+      .set(DRIVER_MEMORY.key, s"${driverMem.toInt}m")
+
+    // Old config still works if new config isn't given
+    val expectedFactor = 0.3
+    sparkConf.set(MEMORY_OVERHEAD_FACTOR, expectedFactor)
+
+    val conf = KubernetesTestConf.createDriverConf(
+      sparkConf = sparkConf)
+    val step = new BasicDriverFeatureStep(conf)
+    val pod = step.configurePod(SparkPod.initialPod())
+    val mem = amountAndFormat(pod.container.getResources.getRequests.get("memory"))
+    val expected = (driverMem + driverMem * expectedFactor).toInt
+    assert(mem === s"${expected}Mi")
+
+    val systemProperties = step.getAdditionalPodSystemProperties()
+    assert(systemProperties(DRIVER_MEMORY_OVERHEAD_FACTOR.key) === expectedFactor.toString)
+  }
+
   test("SPARK-35493: make spark.blockManager.port be able to be fallen back to in driver pod") {
     val initPod = SparkPod.initialPod()
     val sparkConf = new SparkConf()
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
index f5f2712481604..731a9b77d2059 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
@@ -441,6 +441,60 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     ))
   }
 
+  test(s"SPARK-38194: memory overhead factor precendence") {
+    // Choose an executor memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB
+    val defaultFactor = EXECUTOR_MEMORY_OVERHEAD_FACTOR.defaultValue.get
+    val executorMem = ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / defaultFactor * 2
+
+    // main app resource, overhead factor
+    val sparkConf = new SparkConf(false)
+      .set(CONTAINER_IMAGE, "spark-driver:latest")
+      .set(EXECUTOR_MEMORY.key, s"${executorMem.toInt}m")
+
+    // New config should take precedence
+    val expectedFactor = 0.2
+    sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, expectedFactor)
+    sparkConf.set(MEMORY_OVERHEAD_FACTOR, 0.3)
+
+    val conf = KubernetesTestConf.createExecutorConf(
+      sparkConf = sparkConf)
+    ResourceProfile.clearDefaultProfile()
+    val resourceProfile = ResourceProfile.getOrCreateDefaultProfile(sparkConf)
+    val step = new BasicExecutorFeatureStep(conf, new SecurityManager(baseConf),
+      resourceProfile)
+    val pod = step.configurePod(SparkPod.initialPod())
+    val mem = amountAndFormat(pod.container.getResources.getRequests.get("memory"))
+    val expected = (executorMem + executorMem * expectedFactor).toInt
+    assert(mem === s"${expected}Mi")
+  }
+
+  test(s"SPARK-38194: old memory factor settings is applied if new one isn't given") {
+    // Choose an executor memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB
+    val defaultFactor = EXECUTOR_MEMORY_OVERHEAD_FACTOR.defaultValue.get
+    val executorMem = ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / defaultFactor * 2
+
+    // main app resource, overhead factor
+    val sparkConf = new SparkConf(false)
+      .set(CONTAINER_IMAGE, "spark-driver:latest")
+      .set(EXECUTOR_MEMORY.key, s"${executorMem.toInt}m")
+
+    // New config should take precedence
+    val expectedFactor = 0.3
+    sparkConf.set(MEMORY_OVERHEAD_FACTOR, expectedFactor)
+
+    val conf = KubernetesTestConf.createExecutorConf(
+      sparkConf = sparkConf)
+    ResourceProfile.clearDefaultProfile()
+    val resourceProfile = ResourceProfile.getOrCreateDefaultProfile(sparkConf)
+    val step = new BasicExecutorFeatureStep(conf, new SecurityManager(baseConf),
+      resourceProfile)
+    val pod = step.configurePod(SparkPod.initialPod())
+    val mem = amountAndFormat(pod.container.getResources.getRequests.get("memory"))
+    val expected = (executorMem + executorMem * expectedFactor).toInt
+    assert(mem === s"${expected}Mi")
+  }
+
+
   // There is always exactly one controller reference, and it points to the driver pod.
   private def checkOwnerReferences(executor: Pod, driverPodUid: String): Unit = {
     assert(executor.getMetadata.getOwnerReferences.size() === 1)
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala
index 2fd13a5903243..9e4187837b680 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala
@@ -105,6 +105,7 @@ private[mesos] class MesosSubmitRequestServlet(
     val superviseDriver = sparkProperties.get(config.DRIVER_SUPERVISE.key)
     val driverMemory = sparkProperties.get(config.DRIVER_MEMORY.key)
     val driverMemoryOverhead = sparkProperties.get(config.DRIVER_MEMORY_OVERHEAD.key)
+    val driverMemoryOverheadFactor = sparkProperties.get(config.DRIVER_MEMORY_OVERHEAD_FACTOR.key)
     val driverCores = sparkProperties.get(config.DRIVER_CORES.key)
     val name = request.sparkProperties.getOrElse("spark.app.name", mainClass)
 
@@ -121,8 +122,10 @@ private[mesos] class MesosSubmitRequestServlet(
       mainClass, appArgs, environmentVariables, extraClassPath, extraLibraryPath, javaOpts)
     val actualSuperviseDriver = superviseDriver.map(_.toBoolean).getOrElse(DEFAULT_SUPERVISE)
     val actualDriverMemory = driverMemory.map(Utils.memoryStringToMb).getOrElse(DEFAULT_MEMORY)
+    val actualDriverMemoryFactor = driverMemoryOverheadFactor.map(_.toDouble).getOrElse(
+      MEMORY_OVERHEAD_FACTOR)
     val actualDriverMemoryOverhead = driverMemoryOverhead.map(_.toInt).getOrElse(
-      math.max((MEMORY_OVERHEAD_FACTOR * actualDriverMemory).toInt, MEMORY_OVERHEAD_MIN))
+      math.max((actualDriverMemoryFactor * actualDriverMemory).toInt, MEMORY_OVERHEAD_MIN))
     val actualDriverCores = driverCores.map(_.toDouble).getOrElse(DEFAULT_CORES)
     val submitDate = new Date()
     val submissionId = newDriverId(submitDate)
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
index 38f83df00e428..524b1d514fafe 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
@@ -387,8 +387,7 @@ trait MesosSchedulerUtils extends Logging {
     }
   }
 
-  // These defaults copied from YARN
-  private val MEMORY_OVERHEAD_FRACTION = 0.10
+  // This default copied from YARN
   private val MEMORY_OVERHEAD_MINIMUM = 384
 
   /**
@@ -400,8 +399,9 @@ trait MesosSchedulerUtils extends Logging {
    *         (whichever is larger)
    */
   def executorMemory(sc: SparkContext): Int = {
+    val memoryOverheadFactor = sc.conf.get(EXECUTOR_MEMORY_OVERHEAD_FACTOR)
     sc.conf.get(mesosConfig.EXECUTOR_MEMORY_OVERHEAD).getOrElse(
-      math.max(MEMORY_OVERHEAD_FRACTION * sc.executorMemory, MEMORY_OVERHEAD_MINIMUM).toInt) +
+      math.max(memoryOverheadFactor * sc.executorMemory, MEMORY_OVERHEAD_MINIMUM).toInt) +
       sc.executorMemory
   }
 
@@ -415,7 +415,8 @@ trait MesosSchedulerUtils extends Logging {
    *         `MEMORY_OVERHEAD_FRACTION (=0.1) * driverMemory`
    */
   def driverContainerMemory(driverDesc: MesosDriverDescription): Int = {
-    val defaultMem = math.max(MEMORY_OVERHEAD_FRACTION * driverDesc.mem, MEMORY_OVERHEAD_MINIMUM)
+    val memoryOverheadFactor = driverDesc.conf.get(DRIVER_MEMORY_OVERHEAD_FACTOR)
+    val defaultMem = math.max(memoryOverheadFactor * driverDesc.mem, MEMORY_OVERHEAD_MINIMUM)
     driverDesc.conf.get(mesosConfig.DRIVER_MEMORY_OVERHEAD).getOrElse(defaultMem.toInt) +
       driverDesc.mem
   }
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala
index 344fc38c84fb1..8bed43a54d5d0 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala
@@ -35,10 +35,16 @@ class MesosRestServerSuite extends SparkFunSuite
     testOverheadMemory(new SparkConf(), "2000M", 2384)
   }
 
-  test("test driver overhead memory with overhead factor") {
+  test("test driver overhead memory with default overhead factor") {
     testOverheadMemory(new SparkConf(), "5000M", 5500)
   }
 
+  test("test driver overhead memory with overhead factor") {
+    val conf = new SparkConf()
+    conf.set(config.DRIVER_MEMORY_OVERHEAD_FACTOR.key, "0.2")
+    testOverheadMemory(conf, "5000M", 6000)
+  }
+
   test("test configured driver overhead memory") {
     val conf = new SparkConf()
     conf.set(config.DRIVER_MEMORY_OVERHEAD.key, "1000")
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index ae85ea8d6110a..f364b79216098 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -54,6 +54,7 @@ import org.apache.spark.api.python.PythonUtils
 import org.apache.spark.deploy.{SparkApplication, SparkHadoopUtil}
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.deploy.yarn.ResourceRequestHelper._
+import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
 import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
@@ -70,7 +71,6 @@ private[spark] class Client(
   extends Logging {
 
   import Client._
-  import YarnSparkHadoopUtil._
 
   private val yarnClient = YarnClient.createYarnClient
   private val hadoopConf = new YarnConfiguration(SparkHadoopUtil.newConfiguration(sparkConf))
@@ -85,6 +85,12 @@ private[spark] class Client(
   private var appMaster: ApplicationMaster = _
   private var stagingDirPath: Path = _
 
+  private val amMemoryOverheadFactor = if (isClusterMode) {
+    sparkConf.get(DRIVER_MEMORY_OVERHEAD_FACTOR)
+  } else {
+    AM_MEMORY_OVERHEAD_FACTOR
+  }
+
   // AM related configurations
   private val amMemory = if (isClusterMode) {
     sparkConf.get(DRIVER_MEMORY).toInt
@@ -94,7 +100,7 @@ private[spark] class Client(
   private val amMemoryOverhead = {
     val amMemoryOverheadEntry = if (isClusterMode) DRIVER_MEMORY_OVERHEAD else AM_MEMORY_OVERHEAD
     sparkConf.get(amMemoryOverheadEntry).getOrElse(
-      math.max((MEMORY_OVERHEAD_FACTOR * amMemory).toLong,
+      math.max((amMemoryOverheadFactor * amMemory).toLong,
         ResourceProfile.MEMORY_OVERHEAD_MIN_MIB)).toInt
   }
   private val amCores = if (isClusterMode) {
@@ -107,8 +113,10 @@ private[spark] class Client(
   private val executorMemory = sparkConf.get(EXECUTOR_MEMORY)
   // Executor offHeap memory in MiB.
   protected val executorOffHeapMemory = Utils.executorOffHeapMemorySizeAsMb(sparkConf)
+
+  private val executorMemoryOvereadFactor = sparkConf.get(EXECUTOR_MEMORY_OVERHEAD_FACTOR)
   private val executorMemoryOverhead = sparkConf.get(EXECUTOR_MEMORY_OVERHEAD).getOrElse(
-    math.max((MEMORY_OVERHEAD_FACTOR * executorMemory).toLong,
+    math.max((executorMemoryOvereadFactor * executorMemory).toLong,
       ResourceProfile.MEMORY_OVERHEAD_MIN_MIB)).toInt
 
   private val isPython = sparkConf.get(IS_PYTHON_APP)
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index 54ab643f2755b..a85b7174673af 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -163,6 +163,8 @@ private[yarn] class YarnAllocator(
 
   private val isPythonApp = sparkConf.get(IS_PYTHON_APP)
 
+  private val memoryOverheadFactor = sparkConf.get(EXECUTOR_MEMORY_OVERHEAD_FACTOR)
+
   private val launcherPool = ThreadUtils.newDaemonCachedThreadPool(
     "ContainerLauncher", sparkConf.get(CONTAINER_LAUNCH_MAX_THREADS))
 
@@ -280,9 +282,10 @@ private[yarn] class YarnAllocator(
       // track the resource profile if not already there
       getOrUpdateRunningExecutorForRPId(rp.id)
       logInfo(s"Resource profile ${rp.id} doesn't exist, adding it")
+
       val resourcesWithDefaults =
         ResourceProfile.getResourcesForClusterManager(rp.id, rp.executorResources,
-          MEMORY_OVERHEAD_FACTOR, sparkConf, isPythonApp, resourceNameMapping)
+          memoryOverheadFactor, sparkConf, isPythonApp, resourceNameMapping)
       val customSparkResources =
         resourcesWithDefaults.customResources.map { case (name, execReq) =>
           (name, execReq.amount.toString)
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
index f347e37ba24ab..1869c739e4844 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -34,11 +34,10 @@ import org.apache.spark.util.Utils
 
 object YarnSparkHadoopUtil {
 
-  // Additional memory overhead
+  // Additional memory overhead for application masters in client mode.
   // 10% was arrived at experimentally. In the interest of minimizing memory waste while covering
   // the common cases. Memory overhead tends to grow with container size.
-
-  val MEMORY_OVERHEAD_FACTOR = 0.10
+  val AM_MEMORY_OVERHEAD_FACTOR = 0.10
 
   val ANY_HOST = "*"
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
index db65d128b07f0..ae010f11503dd 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
@@ -706,4 +706,33 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
       sparkConf.set(MEMORY_OFFHEAP_SIZE, originalOffHeapSize)
     }
   }
+
+  test("SPARK-38194: Configurable memory overhead factor") {
+    val executorMemory = sparkConf.get(EXECUTOR_MEMORY).toLong
+    try {
+      sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, 0.5)
+      val (handler, _) = createAllocator(maxExecutors = 1,
+        additionalConfigs = Map(EXECUTOR_MEMORY.key -> executorMemory.toString))
+      val defaultResource = handler.rpIdToYarnResource.get(defaultRPId)
+      val memory = defaultResource.getMemory
+      assert(memory == (executorMemory * 1.5).toLong)
+    } finally {
+      sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, 0.1)
+    }
+  }
+
+  test("SPARK-38194: Memory overhead takes precedence over factor") {
+    val executorMemory = sparkConf.get(EXECUTOR_MEMORY)
+    try {
+      sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, 0.5)
+      sparkConf.set(EXECUTOR_MEMORY_OVERHEAD, (executorMemory * 0.4).toLong)
+      val (handler, _) = createAllocator(maxExecutors = 1,
+        additionalConfigs = Map(EXECUTOR_MEMORY.key -> executorMemory.toString))
+      val defaultResource = handler.rpIdToYarnResource.get(defaultRPId)
+      val memory = defaultResource.getMemory
+      assert(memory == (executorMemory * 1.4).toLong)
+    } finally {
+      sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, 0.1)
+    }
+  }
 }

From 32b0705b2b0d3efba646eb8d6fe9c18507c4494b Mon Sep 17 00:00:00 2001
From: Holden Karau <hkarau@netflix.com>
Date: Mon, 1 Nov 2021 16:04:47 -0700
Subject: [PATCH 510/513] Try using a queue to keep track of exec request
 times.

Try and get the time from the queue if it's defined for when the exec was requested.

Add requestTs to the ExecutorData

Core tests compile, not really tested yet though

Add more tests

Add some comments explaining the logic and also the amortized cost of the queue ops.

There is no exec request time with the local sched backend since we don't really have an exec request time.

Test the request times are set for both default profile and custom resource profile. Handle nulls in ExecutorInfo hashing.

Add old constructor for ExecutorInfo for src/bin compat.

Back out unrelated test changes to verify the legacy constructor is called, update the JSON protocol suite to validate we store/deserialize the request time info, syncrhonize some access to requestedTotalExecutorsPerResourceProfile

CR feedback, check if times is empty rather than hanlding an exception.
---
 .../CoarseGrainedSchedulerBackend.scala       | 66 ++++++++++++++++++-
 .../scheduler/cluster/ExecutorData.scala      |  6 +-
 .../scheduler/cluster/ExecutorInfo.scala      | 20 ++++--
 .../org/apache/spark/util/JsonProtocol.scala  | 13 +++-
 .../CoarseGrainedSchedulerBackendSuite.scala  | 19 ++++++
 .../SparkListenerWithClusterSuite.scala       |  1 +
 .../dynalloc/ExecutorMonitorSuite.scala       |  2 +-
 .../apache/spark/util/JsonProtocolSuite.scala | 38 +++++++++++
 8 files changed, 153 insertions(+), 12 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 13a7183a29dd6..b525761557d3b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -21,7 +21,7 @@ import java.util.concurrent.{ScheduledExecutorService, TimeUnit}
 import java.util.concurrent.atomic.{AtomicInteger, AtomicReference}
 import javax.annotation.concurrent.GuardedBy
 
-import scala.collection.mutable.{HashMap, HashSet}
+import scala.collection.mutable.{HashMap, HashSet, Queue}
 import scala.concurrent.Future
 
 import org.apache.hadoop.security.UserGroupInformation
@@ -82,6 +82,12 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   @GuardedBy("CoarseGrainedSchedulerBackend.this")
   private val requestedTotalExecutorsPerResourceProfile = new HashMap[ResourceProfile, Int]
 
+  // Profile IDs to the times that executors were requested for.
+  // The operations we do on queue are all amortized constant cost
+  // see https://www.scala-lang.org/api/2.13.x/scala/collection/mutable/ArrayDeque.html
+  @GuardedBy("CoarseGrainedSchedulerBackend.this")
+  private val execRequestTimes = new HashMap[Int, Queue[(Int, Long)]]
+
   private val listenerBus = scheduler.sc.listenerBus
 
   // Executors we have requested the cluster manager to kill that have not died yet; maps
@@ -260,9 +266,27 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
               .resourceProfileFromId(resourceProfileId).getNumSlotsPerAddress(rName, conf)
             (info.name, new ExecutorResourceInfo(info.name, info.addresses, numParts))
           }
+          // If we've requested the executor figure out when we did.
+          val reqTs: Option[Long] = CoarseGrainedSchedulerBackend.this.synchronized {
+            execRequestTimes.get(resourceProfileId).flatMap {
+              times =>
+              times.headOption.map {
+                h =>
+                // Take off the top element
+                times.dequeue()
+                // If we requested more than one exec reduce the req count by 1 and prepend it back
+                if (h._1 > 1) {
+                  ((h._1 - 1, h._2)) +=: times
+                }
+                h._2
+              }
+            }
+          }
+
           val data = new ExecutorData(executorRef, executorAddress, hostname,
             0, cores, logUrlHandler.applyPattern(logUrls, attributes), attributes,
-            resourcesInfo, resourceProfileId, registrationTs = System.currentTimeMillis())
+            resourcesInfo, resourceProfileId, registrationTs = System.currentTimeMillis(),
+            requestTs = reqTs)
           // This must be synchronized because variables mutated
           // in this block are read when requesting executors
           CoarseGrainedSchedulerBackend.this.synchronized {
@@ -742,6 +766,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       val numExisting = requestedTotalExecutorsPerResourceProfile.getOrElse(defaultProf, 0)
       requestedTotalExecutorsPerResourceProfile(defaultProf) = numExisting + numAdditionalExecutors
       // Account for executors pending to be added or removed
+      updateExecRequestTime(defaultProf.id, numAdditionalExecutors)
       doRequestTotalExecutors(requestedTotalExecutorsPerResourceProfile.toMap)
     }
 
@@ -780,15 +805,52 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       (scheduler.sc.resourceProfileManager.resourceProfileFromId(rpid), num)
     }
     val response = synchronized {
+      val oldResourceProfileToNumExecutors = requestedTotalExecutorsPerResourceProfile.map {
+        case (rp, num) =>
+          (rp.id, num)
+      }.toMap
       this.requestedTotalExecutorsPerResourceProfile.clear()
       this.requestedTotalExecutorsPerResourceProfile ++= resourceProfileToNumExecutors
       this.numLocalityAwareTasksPerResourceProfileId = numLocalityAwareTasksPerResourceProfileId
       this.rpHostToLocalTaskCount = hostToLocalTaskCount
+      updateExecRequestTimes(oldResourceProfileToNumExecutors, resourceProfileIdToNumExecutors)
       doRequestTotalExecutors(requestedTotalExecutorsPerResourceProfile.toMap)
     }
     defaultAskTimeout.awaitResult(response)
   }
 
+  private def updateExecRequestTimes(oldProfile: Map[Int, Int], newProfile: Map[Int, Int]): Unit = {
+    newProfile.map {
+      case (k, v) =>
+        val delta = v - oldProfile.getOrElse(k, 0)
+        if (delta != 0) {
+          updateExecRequestTime(k, delta)
+        }
+    }
+  }
+
+  private def updateExecRequestTime(profileId: Int, delta: Int) = {
+    val times = execRequestTimes.getOrElseUpdate(profileId, Queue[(Int, Long)]())
+    if (delta > 0) {
+      // Add the request to the end, constant time op
+      times += ((delta, System.currentTimeMillis()))
+    } else if (delta < 0) {
+      // Consume as if |delta| had been allocated
+      var c = -delta
+      // Note: it's possible that something else allocated an executor and we have
+      // a negative delta, we can just avoid mutating the queue.
+      while (c > 0 && !times.isEmpty) {
+        val h = times.dequeue
+        if (h._1 > c) {
+          // Prepend updated first req to times, constant time op
+          ((h._1 - c, h._2)) +=: times
+        } else {
+          c = c - h._1
+        }
+      }
+    }
+  }
+
   /**
    * Request executors from the cluster manager by specifying the total number desired,
    * including existing pending and running executors.
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala
index 86b44e835368c..07236d4007faa 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala
@@ -31,6 +31,7 @@ import org.apache.spark.scheduler.ExecutorResourceInfo
  * @param resourcesInfo The information of the currently available resources on the executor
  * @param resourceProfileId The id of the ResourceProfile being used by this executor
  * @param registrationTs The registration timestamp of this executor
+ * @param requestTs What time this executor was most likely requested at
  */
 private[cluster] class ExecutorData(
     val executorEndpoint: RpcEndpointRef,
@@ -42,6 +43,7 @@ private[cluster] class ExecutorData(
     override val attributes: Map[String, String],
     override val resourcesInfo: Map[String, ExecutorResourceInfo],
     override val resourceProfileId: Int,
-    val registrationTs: Long
+    val registrationTs: Long,
+    val requestTs: Option[Long]
 ) extends ExecutorInfo(executorHost, totalCores, logUrlMap, attributes,
-  resourcesInfo, resourceProfileId)
+  resourcesInfo, resourceProfileId, Some(registrationTs), requestTs)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorInfo.scala
index a97b08941ba78..5be8950192c8c 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorInfo.scala
@@ -31,10 +31,19 @@ class ExecutorInfo(
     val logUrlMap: Map[String, String],
     val attributes: Map[String, String],
     val resourcesInfo: Map[String, ResourceInformation],
-    val resourceProfileId: Int) {
+    val resourceProfileId: Int,
+    val registrationTime: Option[Long],
+    val requestTime: Option[Long]) {
 
+  def this(executorHost: String, totalCores: Int, logUrlMap: Map[String, String],
+      attributes: Map[String, String], resourcesInfo: Map[String, ResourceInformation],
+      resourceProfileId: Int) = {
+    this(executorHost, totalCores, logUrlMap, attributes, resourcesInfo, resourceProfileId,
+      None, None)
+  }
   def this(executorHost: String, totalCores: Int, logUrlMap: Map[String, String]) = {
-    this(executorHost, totalCores, logUrlMap, Map.empty, Map.empty, DEFAULT_RESOURCE_PROFILE_ID)
+    this(executorHost, totalCores, logUrlMap, Map.empty, Map.empty, DEFAULT_RESOURCE_PROFILE_ID,
+      None, None)
   }
 
   def this(
@@ -42,7 +51,8 @@ class ExecutorInfo(
       totalCores: Int,
       logUrlMap: Map[String, String],
       attributes: Map[String, String]) = {
-    this(executorHost, totalCores, logUrlMap, attributes, Map.empty, DEFAULT_RESOURCE_PROFILE_ID)
+    this(executorHost, totalCores, logUrlMap, attributes, Map.empty, DEFAULT_RESOURCE_PROFILE_ID,
+      None, None)
   }
 
   def this(
@@ -52,7 +62,7 @@ class ExecutorInfo(
       attributes: Map[String, String],
       resourcesInfo: Map[String, ResourceInformation]) = {
     this(executorHost, totalCores, logUrlMap, attributes, resourcesInfo,
-      DEFAULT_RESOURCE_PROFILE_ID)
+      DEFAULT_RESOURCE_PROFILE_ID, None, None)
   }
 
   def canEqual(other: Any): Boolean = other.isInstanceOf[ExecutorInfo]
@@ -72,6 +82,6 @@ class ExecutorInfo(
   override def hashCode(): Int = {
     val state = Seq(executorHost, totalCores, logUrlMap, attributes, resourcesInfo,
       resourceProfileId)
-    state.map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
+    state.filter(_ != null).map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index f9b6ed37977cb..3287a786597c3 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -526,7 +526,9 @@ private[spark] object JsonProtocol {
     ("Log Urls" -> mapToJson(executorInfo.logUrlMap)) ~
     ("Attributes" -> mapToJson(executorInfo.attributes)) ~
     ("Resources" -> resourcesMapToJson(executorInfo.resourcesInfo)) ~
-    ("Resource Profile Id" -> executorInfo.resourceProfileId)
+    ("Resource Profile Id" -> executorInfo.resourceProfileId) ~
+    ("Registration Time" -> executorInfo.registrationTime) ~
+    ("Request Time" -> executorInfo.requestTime)
   }
 
   def resourcesMapToJson(m: Map[String, ResourceInformation]): JValue = {
@@ -1220,8 +1222,15 @@ private[spark] object JsonProtocol {
       case Some(id) => id.extract[Int]
       case None => ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
     }
+    val registrationTs = jsonOption(json \ "Registration Time") map { ts =>
+      ts.extract[Long]
+    }
+    val requestTs = jsonOption(json \ "Request Time") map { ts =>
+      ts.extract[Long]
+    }
+
     new ExecutorInfo(executorHost, totalCores, logUrls, attributes.toMap, resources.toMap,
-      resourceProfileId)
+      resourceProfileId, registrationTs, requestTs)
   }
 
   def blockUpdatedInfoFromJson(json: JValue): BlockUpdatedInfo = {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
index 4663717dc86be..a5f677678c615 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
@@ -40,6 +40,7 @@ import org.apache.spark.resource.TestResourceIDs._
 import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef, RpcEnv}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
+import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.util.{RpcUtils, SerializableBuffer, Utils}
 
 class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkContext
@@ -189,6 +190,8 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
 
   test("extra resources from executor") {
 
+    val testStartTime = System.currentTimeMillis()
+
     val execCores = 3
     val conf = new SparkConf()
       .set(EXECUTOR_CORES, execCores)
@@ -207,6 +210,10 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
     sc.resourceProfileManager.addResourceProfile(rp)
     assert(rp.id > ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
     val backend = sc.schedulerBackend.asInstanceOf[TestCoarseGrainedSchedulerBackend]
+    // Note we get two in default profile and one in the new rp
+    // we need to put a req time in for all of them.
+    backend.requestTotalExecutors(Map((rp.id, 1)), Map(), Map())
+    backend.requestExecutors(3)
     val mockEndpointRef = mock[RpcEndpointRef]
     val mockAddress = mock[RpcAddress]
     when(mockEndpointRef.send(LaunchTask)).thenAnswer((_: InvocationOnMock) => {})
@@ -214,8 +221,12 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
     val resources = Map(GPU -> new ResourceInformation(GPU, Array("0", "1", "3")))
 
     var executorAddedCount: Int = 0
+    val infos = scala.collection.mutable.ArrayBuffer[ExecutorInfo]()
     val listener = new SparkListener() {
       override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): Unit = {
+        // Lets check that the exec allocation times "make sense"
+        val info = executorAdded.executorInfo
+        infos += info
         executorAddedCount += 1
       }
     }
@@ -271,6 +282,14 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
     }
     sc.listenerBus.waitUntilEmpty(executorUpTimeout.toMillis)
     assert(executorAddedCount === 3)
+    infos.foreach { info =>
+      assert(info.requestTime.get > 0,
+        "Exec allocation and request times don't make sense")
+      assert(info.requestTime.get > testStartTime,
+        "Exec allocation and request times don't make sense")
+      assert(info.registrationTime.get > info.requestTime.get,
+        "Exec allocation and request times don't make sense")
+    }
   }
 
   private def testSubmitJob(sc: SparkContext, rdd: RDD[Int]): Unit = {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
index c84735c9665a7..8b81468406bbb 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
@@ -52,6 +52,7 @@ class SparkListenerWithClusterSuite extends SparkFunSuite with LocalSparkContext
     assert(listener.addedExecutorInfo.size == 2)
     assert(listener.addedExecutorInfo("0").totalCores == 1)
     assert(listener.addedExecutorInfo("1").totalCores == 1)
+    assert(listener.addedExecutorInfo("0").registrationTime.get > 0 )
   }
 
   private class SaveExecutorInfo extends SparkListener {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala
index 336198b182c87..c8916dcd6eb4f 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala
@@ -285,7 +285,7 @@ class ExecutorMonitorSuite extends SparkFunSuite {
     knownExecs ++= Set("1", "2", "3")
 
     val execInfoRp1 = new ExecutorInfo("host1", 1, Map.empty,
-      Map.empty, Map.empty, 1)
+      Map.empty, Map.empty, 1, None, None)
 
     monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
     monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "2", execInfo))
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index 4eea2256553f5..a3dc2d8fa735e 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -96,6 +96,9 @@ class JsonProtocolSuite extends SparkFunSuite {
     val applicationEnd = SparkListenerApplicationEnd(42L)
     val executorAdded = SparkListenerExecutorAdded(executorAddedTime, "exec1",
       new ExecutorInfo("Hostee.awesome.com", 11, logUrlMap, attributes, resources.toMap, 4))
+    val executorAddedWithTime = SparkListenerExecutorAdded(executorAddedTime, "exec1",
+      new ExecutorInfo("Hostee.awesome.com", 11, logUrlMap, attributes, resources.toMap, 4,
+        Some(0), Some(1)))
     val executorRemoved = SparkListenerExecutorRemoved(executorRemovedTime, "exec2", "test reason")
     val executorBlacklisted = SparkListenerExecutorBlacklisted(executorExcludedTime, "exec1", 22)
     val executorUnblacklisted =
@@ -155,6 +158,7 @@ class JsonProtocolSuite extends SparkFunSuite {
     testEvent(applicationStartWithLogs, applicationStartJsonWithLogUrlsString)
     testEvent(applicationEnd, applicationEndJsonString)
     testEvent(executorAdded, executorAddedJsonString)
+    testEvent(executorAddedWithTime, executorAddedWithTimeJsonString)
     testEvent(executorRemoved, executorRemovedJsonString)
     testEvent(executorBlacklisted, executorBlacklistedJsonString)
     testEvent(executorUnblacklisted, executorUnblacklistedJsonString)
@@ -173,6 +177,7 @@ class JsonProtocolSuite extends SparkFunSuite {
   test("Dependent Classes") {
     val logUrlMap = Map("stderr" -> "mystderr", "stdout" -> "mystdout").toMap
     val attributes = Map("ContainerId" -> "ct1", "User" -> "spark").toMap
+    val rinfo = Map[String, ResourceInformation]().toMap
     testRDDInfo(makeRddInfo(2, 3, 4, 5L, 6L, DeterministicLevel.DETERMINATE))
     testStageInfo(makeStageInfo(10, 20, 30, 40L, 50L))
     testTaskInfo(makeTaskInfo(999L, 888, 55, 777L, false))
@@ -180,6 +185,8 @@ class JsonProtocolSuite extends SparkFunSuite {
       33333L, 44444L, 55555L, 66666L, 7, 8, hasHadoopInput = false, hasOutput = false))
     testBlockManagerId(BlockManagerId("Hong", "Kong", 500))
     testExecutorInfo(new ExecutorInfo("host", 43, logUrlMap, attributes))
+    testExecutorInfo(new ExecutorInfo("host", 43, logUrlMap, attributes,
+      rinfo, 1, Some(0), Some(1)))
 
     // StorageLevel
     testStorageLevel(StorageLevel.NONE)
@@ -2141,6 +2148,37 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |}
     """.stripMargin
 
+  private val executorAddedWithTimeJsonString =
+    s"""
+      |{
+      |  "Event": "SparkListenerExecutorAdded",
+      |  "Timestamp": ${executorAddedTime},
+      |  "Executor ID": "exec1",
+      |  "Executor Info": {
+      |    "Host": "Hostee.awesome.com",
+      |    "Total Cores": 11,
+      |    "Log Urls" : {
+      |      "stderr" : "mystderr",
+      |      "stdout" : "mystdout"
+      |    },
+      |    "Attributes" : {
+      |      "ContainerId" : "ct1",
+      |      "User" : "spark"
+      |    },
+      |    "Resources" : {
+      |      "gpu" : {
+      |        "name" : "gpu",
+      |        "addresses" : [ "0", "1" ]
+      |      }
+      |    },
+      |    "Resource Profile Id": 4,
+      |    "Registration Time" : 0,
+      |    "Request Time" : 1
+      |  }
+      |
+      |}
+    """.stripMargin
+
   private val executorRemovedJsonString =
     s"""
       |{

From b1394403dd646c81e6b37c29e7180297ca76caa9 Mon Sep 17 00:00:00 2001
From: Holden Karau <hkarau@netflix.com>
Date: Tue, 11 Jan 2022 14:32:04 -0800
Subject: [PATCH 511/513] ugh workflows

---
 .github/workflows/build_and_test.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index ebe17b5963f20..6b07e65048a22 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -95,6 +95,7 @@ jobs:
           echo '::set-output name=envs::{"SKIP_MIMA": "true", "SKIP_UNIDOC": "true"}'
           echo '::set-output name=hadoop::hadoop3'
         else
+          echo ' sup'
           echo '::set-output name=java::8'
           echo '::set-output name=branch::master' # Default branch to run on. CHANGE here when a branch is cut out.
           echo '::set-output name=type::regular'

From b01149cf64f2e4b2820d1ff540c1c05ccdb79d0d Mon Sep 17 00:00:00 2001
From: Holden Karau <hkarau@netflix.com>
Date: Tue, 11 Jan 2022 14:33:40 -0800
Subject: [PATCH 512/513] Revert "ugh workflows"

This reverts commit 85a3f9d5dab1cdfd55cc3816861e0b524d1590b9.
---
 .github/workflows/build_and_test.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 6b07e65048a22..ebe17b5963f20 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -95,7 +95,6 @@ jobs:
           echo '::set-output name=envs::{"SKIP_MIMA": "true", "SKIP_UNIDOC": "true"}'
           echo '::set-output name=hadoop::hadoop3'
         else
-          echo ' sup'
           echo '::set-output name=java::8'
           echo '::set-output name=branch::master' # Default branch to run on. CHANGE here when a branch is cut out.
           echo '::set-output name=type::regular'

From 603ba9796d23b28088dfcfa39c3f35e1094ded43 Mon Sep 17 00:00:00 2001
From: Holden Karau <hkarau@netflix.com>
Date: Wed, 16 Mar 2022 11:58:38 -0700
Subject: [PATCH 513/513] Fix the resource request decrease scenario and add a
 test for it.

---
 .../CoarseGrainedSchedulerBackend.scala       |   1 +
 .../CoarseGrainedSchedulerBackendSuite.scala  | 112 ++++++++++++++++++
 2 files changed, 113 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index b525761557d3b..08f0e5f9c5f1d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -844,6 +844,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         if (h._1 > c) {
           // Prepend updated first req to times, constant time op
           ((h._1 - c, h._2)) +=: times
+          c = 0
         } else {
           c = c - h._1
         }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
index a5f677678c615..e77ade60a61be 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
@@ -292,6 +292,118 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
     }
   }
 
+  test("exec alloc decrease.") {
+
+    val testStartTime = System.currentTimeMillis()
+
+    val execCores = 3
+    val conf = new SparkConf()
+      .set(EXECUTOR_CORES, execCores)
+      .set(SCHEDULER_REVIVE_INTERVAL.key, "1m") // don't let it auto revive during test
+      .set(EXECUTOR_INSTANCES, 0) // avoid errors about duplicate executor registrations
+      .setMaster(
+      "coarseclustermanager[org.apache.spark.scheduler.TestCoarseGrainedSchedulerBackend]")
+      .setAppName("test")
+    conf.set(TASK_GPU_ID.amountConf, "1")
+    conf.set(EXECUTOR_GPU_ID.amountConf, "1")
+
+    sc = new SparkContext(conf)
+    val execGpu = new ExecutorResourceRequests().cores(1).resource(GPU, 3)
+    val taskGpu = new TaskResourceRequests().cpus(1).resource(GPU, 1)
+    val rp = new ResourceProfile(execGpu.requests, taskGpu.requests)
+    sc.resourceProfileManager.addResourceProfile(rp)
+    assert(rp.id > ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+    val backend = sc.schedulerBackend.asInstanceOf[TestCoarseGrainedSchedulerBackend]
+    // Note we get two in default profile and one in the new rp
+    // we need to put a req time in for all of them.
+    backend.requestTotalExecutors(Map((rp.id, 1)), Map(), Map())
+    // Decrease the number of execs requested in the new rp.
+    backend.requestTotalExecutors(Map((rp.id, 0)), Map(), Map())
+    // Request execs in the default profile.
+    backend.requestExecutors(3)
+    val mockEndpointRef = mock[RpcEndpointRef]
+    val mockAddress = mock[RpcAddress]
+    when(mockEndpointRef.send(LaunchTask)).thenAnswer((_: InvocationOnMock) => {})
+
+    val resources = Map(GPU -> new ResourceInformation(GPU, Array("0", "1", "3")))
+
+    var executorAddedCount: Int = 0
+    val infos = scala.collection.mutable.ArrayBuffer[ExecutorInfo]()
+    val listener = new SparkListener() {
+      override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): Unit = {
+        // Lets check that the exec allocation times "make sense"
+        val info = executorAdded.executorInfo
+        infos += info
+        executorAddedCount += 1
+      }
+    }
+
+    sc.addSparkListener(listener)
+
+    backend.driverEndpoint.askSync[Boolean](
+      RegisterExecutor("1", mockEndpointRef, mockAddress.host, 1, Map.empty, Map.empty, resources,
+        ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
+    backend.driverEndpoint.askSync[Boolean](
+      RegisterExecutor("2", mockEndpointRef, mockAddress.host, 1, Map.empty, Map.empty, resources,
+        ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
+    backend.driverEndpoint.askSync[Boolean](
+      RegisterExecutor("3", mockEndpointRef, mockAddress.host, 1, Map.empty, Map.empty, resources,
+        rp.id))
+
+    val frameSize = RpcUtils.maxMessageSizeBytes(sc.conf)
+    val bytebuffer = java.nio.ByteBuffer.allocate(frameSize - 100)
+    val buffer = new SerializableBuffer(bytebuffer)
+
+    var execResources = backend.getExecutorAvailableResources("1")
+    assert(execResources(GPU).availableAddrs.sorted === Array("0", "1", "3"))
+
+    val exec3ResourceProfileId = backend.getExecutorResourceProfileId("3")
+    assert(exec3ResourceProfileId === rp.id)
+
+    val taskResources = Map(GPU -> new ResourceInformation(GPU, Array("0")))
+    val taskDescs: Seq[Seq[TaskDescription]] = Seq(Seq(new TaskDescription(1, 0, "1",
+      "t1", 0, 1, mutable.Map.empty[String, Long],
+      mutable.Map.empty[String, Long], mutable.Map.empty[String, Long],
+      new Properties(), 1, taskResources, bytebuffer)))
+    val ts = backend.getTaskSchedulerImpl()
+    when(ts.resourceOffers(any[IndexedSeq[WorkerOffer]], any[Boolean])).thenReturn(taskDescs)
+
+    backend.driverEndpoint.send(ReviveOffers)
+
+    eventually(timeout(5 seconds)) {
+      execResources = backend.getExecutorAvailableResources("1")
+      assert(execResources(GPU).availableAddrs.sorted === Array("1", "3"))
+      assert(execResources(GPU).assignedAddrs === Array("0"))
+    }
+
+    // To avoid allocating any resources immediately after releasing the resource from the task to
+    // make sure that `availableAddrs` below won't change
+    when(ts.resourceOffers(any[IndexedSeq[WorkerOffer]], any[Boolean])).thenReturn(Seq.empty)
+    backend.driverEndpoint.send(
+      StatusUpdate("1", 1, TaskState.FINISHED, buffer, taskResources))
+
+    eventually(timeout(5 seconds)) {
+      execResources = backend.getExecutorAvailableResources("1")
+      assert(execResources(GPU).availableAddrs.sorted === Array("0", "1", "3"))
+      assert(execResources(GPU).assignedAddrs.isEmpty)
+    }
+    sc.listenerBus.waitUntilEmpty(executorUpTimeout.toMillis)
+    assert(executorAddedCount === 3)
+    infos.foreach { info =>
+      info.requestTime.map { t =>
+        assert(t > 0,
+          "Exec request times don't make sense")
+        assert(t >= testStartTime,
+          "Exec allocation and request times don't make sense")
+        assert(t >= info.requestTime.get,
+          "Exec allocation and request times don't make sense")
+      }
+    }
+    assert(infos.filter(_.requestTime.isEmpty).length === 1,
+      "Our unexpected executor does not have a request time.")
+  }
+
+
   private def testSubmitJob(sc: SparkContext, rdd: RDD[Int]): Unit = {
     sc.submitJob(
       rdd,